diff --git a/.env.example b/.env.example index c4c684cdea..3cbc375b46 100644 --- a/.env.example +++ b/.env.example @@ -53,10 +53,6 @@ MINIMAX_CN_API_KEY= # Get at: https://firecrawl.dev/ FIRECRAWL_API_KEY= -# Nous Research API Key - Vision analysis and multi-model reasoning -# Get at: https://inference-api.nousresearch.com/ -NOUS_API_KEY= - # FAL.ai API Key - Image generation # Get at: https://fal.ai/ FAL_KEY= diff --git a/AGENTS.md b/AGENTS.md index cc66a5c7f3..7aef595a36 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,78 +1,60 @@ # Hermes Agent - Development Guide -Instructions for AI coding assistants (GitHub Copilot, Cursor, etc.) and human developers. - -Hermes Agent is an AI agent harness with tool-calling capabilities, interactive CLI, messaging integrations, and scheduled tasks. +Instructions for AI coding assistants and developers working on the hermes-agent codebase. ## Development Environment -**IMPORTANT**: Always use the virtual environment if it exists: ```bash -source venv/bin/activate # Before running any Python commands +source .venv/bin/activate # ALWAYS activate before running Python ``` ## Project Structure ``` hermes-agent/ -├── agent/ # Agent internals (extracted from run_agent.py) -│ ├── model_metadata.py # Model context lengths, token estimation +├── run_agent.py # AIAgent class — core conversation loop +├── model_tools.py # Tool orchestration, _discover_tools(), handle_function_call() +├── toolsets.py # Toolset definitions, _HERMES_CORE_TOOLS list +├── cli.py # HermesCLI class — interactive CLI orchestrator +├── hermes_state.py # SessionDB — SQLite session store (FTS5 search) +├── agent/ # Agent internals +│ ├── prompt_builder.py # System prompt assembly │ ├── context_compressor.py # Auto context compression │ ├── prompt_caching.py # Anthropic prompt caching -│ ├── prompt_builder.py # System prompt assembly (identity, skills index, context files) +│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization) +│ ├── model_metadata.py # Model context lengths, token estimation │ ├── display.py # KawaiiSpinner, tool preview formatting +│ ├── skill_commands.py # Skill slash commands (shared CLI/gateway) │ └── trajectory.py # Trajectory saving helpers -├── hermes_cli/ # CLI implementation -│ ├── main.py # Entry point, command dispatcher -│ ├── banner.py # Welcome banner, ASCII art, skills summary -│ ├── commands.py # Slash command definitions + autocomplete -│ ├── callbacks.py # Interactive prompt callbacks (clarify, sudo, approval) -│ ├── setup.py # Interactive setup wizard -│ ├── config.py # Config management & migration -│ ├── status.py # Status display -│ ├── doctor.py # Diagnostics -│ ├── gateway.py # Gateway management -│ ├── uninstall.py # Uninstaller -│ ├── cron.py # Cron job management -│ └── skills_hub.py # Skills Hub CLI + /skills slash command -├── tools/ # Tool implementations -│ ├── registry.py # Central tool registry (schemas, handlers, dispatch) -│ ├── approval.py # Dangerous command detection + per-session approval -│ ├── environments/ # Terminal execution backends -│ │ ├── base.py # BaseEnvironment ABC -│ │ ├── local.py # Local execution with interrupt support -│ │ ├── docker.py # Docker container execution -│ │ ├── ssh.py # SSH remote execution -│ │ ├── singularity.py # Singularity/Apptainer + SIF management -│ │ ├── modal.py # Modal cloud execution -│ │ └── daytona.py # Daytona cloud sandboxes -│ ├── terminal_tool.py # Terminal orchestration (sudo, lifecycle, factory) -│ ├── todo_tool.py # Planning & task management -│ ├── process_registry.py # Background process management -│ └── ... # Other tool files -├── gateway/ # Messaging platform adapters -│ ├── platforms/ # Platform-specific adapters (telegram, discord, slack, whatsapp) -│ └── ... -├── cron/ # Scheduler implementation -├── environments/ # RL training environments (Atropos integration) -├── skills/ # Bundled skill sources -├── optional-skills/ # Official optional skills (not activated by default) -├── cli.py # Interactive CLI orchestrator (HermesCLI class) -├── run_agent.py # AIAgent class (core conversation loop) -├── model_tools.py # Tool orchestration (thin layer over tools/registry.py) -├── toolsets.py # Tool groupings -├── toolset_distributions.py # Probability-based tool selection +├── hermes_cli/ # CLI subcommands and setup +│ ├── main.py # Entry point — all `hermes` subcommands +│ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration +│ ├── commands.py # Slash command definitions + SlashCommandCompleter +│ ├── callbacks.py # Terminal callbacks (clarify, sudo, approval) +│ └── setup.py # Interactive setup wizard +├── tools/ # Tool implementations (one file per tool) +│ ├── registry.py # Central tool registry (schemas, handlers, dispatch) +│ ├── approval.py # Dangerous command detection +│ ├── terminal_tool.py # Terminal orchestration +│ ├── process_registry.py # Background process management +│ ├── file_tools.py # File read/write/search/patch +│ ├── web_tools.py # Firecrawl search/extract +│ ├── browser_tool.py # Browserbase browser automation +│ ├── code_execution_tool.py # execute_code sandbox +│ ├── delegate_tool.py # Subagent delegation +│ ├── mcp_tool.py # MCP client (~1050 lines) +│ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity) +├── gateway/ # Messaging platform gateway +│ ├── run.py # Main loop, slash commands, message dispatch +│ ├── session.py # SessionStore — conversation persistence +│ └── platforms/ # Adapters: telegram, discord, slack, whatsapp, homeassistant, signal +├── cron/ # Scheduler (jobs.py, scheduler.py) +├── environments/ # RL training environments (Atropos) +├── tests/ # Pytest suite (~2500+ tests) └── batch_runner.py # Parallel batch processing ``` -**User Configuration** (stored in `~/.hermes/`): -- `~/.hermes/config.yaml` - Settings (model, terminal, toolsets, etc.) -- `~/.hermes/.env` - API keys and secrets -- `~/.hermes/pairing/` - DM pairing data -- `~/.hermes/hooks/` - Custom event hooks -- `~/.hermes/image_cache/` - Cached user images -- `~/.hermes/audio_cache/` - Cached user voice messages -- `~/.hermes/sticker_cache.json` - Telegram sticker descriptions +**User config:** `~/.hermes/config.yaml` (settings), `~/.hermes/.env` (API keys) ## File Dependency Chain @@ -86,626 +68,175 @@ model_tools.py (imports tools/registry + triggers tool discovery) run_agent.py, cli.py, batch_runner.py, environments/ ``` -Each tool file co-locates its schema, handler, and registration. `model_tools.py` is a thin orchestration layer. - --- -## AIAgent Class - -The main agent is implemented in `run_agent.py`: +## AIAgent Class (run_agent.py) ```python class AIAgent: - def __init__( - self, - model: str = "anthropic/claude-sonnet-4.6", - api_key: str = None, - base_url: str = "https://openrouter.ai/api/v1", - max_iterations: int = 60, # Max tool-calling loops + def __init__(self, + model: str = "anthropic/claude-opus-4.6", + max_iterations: int = 90, enabled_toolsets: list = None, disabled_toolsets: list = None, - verbose_logging: bool = False, - quiet_mode: bool = False, # Suppress progress output - tool_progress_callback: callable = None, # Called on each tool use - ): - # Initialize OpenAI client, load tools based on toolsets - ... - - def chat(self, user_message: str, task_id: str = None) -> str: - # Main entry point - runs the agent loop - ... + quiet_mode: bool = False, + save_trajectories: bool = False, + platform: str = None, # "cli", "telegram", etc. + session_id: str = None, + skip_context_files: bool = False, + skip_memory: bool = False, + # ... plus provider, api_mode, callbacks, routing params + ): ... + + def chat(self, message: str) -> str: + """Simple interface — returns final response string.""" + + def run_conversation(self, user_message: str, system_message: str = None, + conversation_history: list = None, task_id: str = None) -> dict: + """Full interface — returns dict with final_response + messages.""" ``` ### Agent Loop -The core loop in `_run_agent_loop()`: - -``` -1. Add user message to conversation -2. Call LLM with tools -3. If LLM returns tool calls: - - Execute each tool - - Add tool results to conversation - - Go to step 2 -4. If LLM returns text response: - - Return response to user -``` +The core loop is inside `run_conversation()` — entirely synchronous: ```python -while turns < max_turns: - response = client.chat.completions.create( - model=model, - messages=messages, - tools=tool_schemas, - ) - +while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0: + response = client.chat.completions.create(model=model, messages=messages, tools=tool_schemas) if response.tool_calls: for tool_call in response.tool_calls: - result = await execute_tool(tool_call) + result = handle_function_call(tool_call.name, tool_call.args, task_id) messages.append(tool_result_message(result)) - turns += 1 + api_call_count += 1 else: return response.content ``` -### Conversation Management - -Messages are stored as a list of dicts following OpenAI format: - -```python -messages = [ - {"role": "system", "content": "You are a helpful assistant..."}, - {"role": "user", "content": "Search for Python tutorials"}, - {"role": "assistant", "content": None, "tool_calls": [...]}, - {"role": "tool", "tool_call_id": "...", "content": "..."}, - {"role": "assistant", "content": "Here's what I found..."}, -] -``` - -### Reasoning Model Support - -For models that support chain-of-thought reasoning: -- Extract `reasoning_content` from API responses -- Store in `assistant_msg["reasoning"]` for trajectory export -- Pass back via `reasoning_content` field on subsequent turns +Messages follow OpenAI format: `{"role": "system/user/assistant/tool", ...}`. Reasoning content is stored in `assistant_msg["reasoning"]`. --- ## CLI Architecture (cli.py) -The interactive CLI uses: -- **Rich** - For the welcome banner and styled panels -- **prompt_toolkit** - For fixed input area with history, `patch_stdout`, slash command autocomplete, and floating completion menus -- **KawaiiSpinner** (in run_agent.py) - Animated kawaii faces during API calls; clean `┊` activity feed for tool execution results - -Key components: -- `HermesCLI` class - Main CLI controller with commands and conversation loop -- `SlashCommandCompleter` - Autocomplete dropdown for `/commands` (type `/` to see all) -- `agent/skill_commands.py` - Scans skills and builds invocation messages (shared with gateway) -- `load_cli_config()` - Loads config, sets environment variables for terminal -- `build_welcome_banner()` - Displays ASCII art logo, tools, and skills summary - -CLI UX notes: -- Thinking spinner (during LLM API call) shows animated kawaii face + verb (`(⌐■_■) deliberating...`) -- When LLM returns tool calls, the spinner clears silently (no "got it!" noise) -- Tool execution results appear as a clean activity feed: `┊ {emoji} {verb} {detail} {duration}` -- "got it!" only appears when the LLM returns a final text response (`⚕ ready`) -- The prompt shows `⚕ ❯` when the agent is working, `❯` when idle -- Pasting 5+ lines auto-saves to `~/.hermes/pastes/` and collapses to a reference -- Multi-line input via Alt+Enter or Ctrl+J -- `/commands` - Process user commands like `/help`, `/clear`, `/personality`, etc. -- `/skill-name` - Invoke installed skills directly (e.g., `/axolotl`, `/gif-search`) - -CLI uses `quiet_mode=True` when creating AIAgent to suppress verbose logging. - -### Skill Slash Commands - -Every installed skill in `~/.hermes/skills/` is automatically registered as a slash command. -The skill name (from frontmatter or folder name) becomes the command: `axolotl` → `/axolotl`. - -Implementation (`agent/skill_commands.py`, shared between CLI and gateway): -1. `scan_skill_commands()` scans all SKILL.md files at startup, filtering out skills incompatible with the current OS platform (via the `platforms` frontmatter field) -2. `build_skill_invocation_message()` loads the SKILL.md content and builds a user-turn message -3. The message includes the full skill content, a list of supporting files (not loaded), and the user's instruction -4. Supporting files can be loaded on demand via the `skill_view` tool -5. Injected as a **user message** (not system prompt) to preserve prompt caching +- **Rich** for banner/panels, **prompt_toolkit** for input with autocomplete +- **KawaiiSpinner** (`agent/display.py`) — animated faces during API calls, `┊` activity feed for tool results +- `load_cli_config()` in cli.py merges hardcoded defaults + user config YAML +- `process_command()` is a method on `HermesCLI` (not in commands.py) +- Skill slash commands: `agent/skill_commands.py` scans `~/.hermes/skills/`, injects as **user message** (not system prompt) to preserve prompt caching ### Adding CLI Commands -1. Add to `COMMANDS` dict with description -2. Add handler in `process_command()` method -3. For persistent settings, use `save_config_value()` to update config - ---- - -## Hermes CLI Commands - -The unified `hermes` command provides all functionality: - -| Command | Description | -|---------|-------------| -| `hermes` | Interactive chat (default) | -| `hermes chat -q "..."` | Single query mode | -| `hermes -w` / `hermes --worktree` | Start in isolated git worktree (for parallel agents) | -| `hermes setup` | Configure API keys and settings | -| `hermes config` | View current configuration | -| `hermes config edit` | Open config in editor | -| `hermes config set KEY VAL` | Set a specific value | -| `hermes config check` | Check for missing config | -| `hermes config migrate` | Prompt for missing config interactively | -| `hermes status` | Show configuration status | -| `hermes doctor` | Diagnose issues | -| `hermes update` | Update to latest (checks for new config) | -| `hermes uninstall` | Uninstall (can keep configs for reinstall) | -| `hermes gateway` | Start gateway (messaging + cron scheduler) | -| `hermes gateway setup` | Configure messaging platforms interactively | -| `hermes gateway install` | Install gateway as system service | -| `hermes cron list` | View scheduled jobs | -| `hermes cron status` | Check if cron scheduler is running | -| `hermes version` | Show version info | -| `hermes pairing list/approve/revoke` | Manage DM pairing codes | - ---- - -## Messaging Gateway - -The gateway connects Hermes to Telegram, Discord, Slack, and WhatsApp. - -### Setup - -The interactive setup wizard handles platform configuration: - -```bash -hermes gateway setup # Arrow-key menu of all platforms, configure tokens/allowlists/home channels -``` - -This is the recommended way to configure messaging. It shows which platforms are already set up, walks through each one interactively, and offers to start/restart the gateway service at the end. - -Platforms can also be configured manually in `~/.hermes/.env`: - -### Configuration (in `~/.hermes/.env`): - -```bash -# Telegram -TELEGRAM_BOT_TOKEN=123456:ABC-DEF... # From @BotFather -TELEGRAM_ALLOWED_USERS=123456789,987654 # Comma-separated user IDs (from @userinfobot) - -# Discord -DISCORD_BOT_TOKEN=MTIz... # From Developer Portal -DISCORD_ALLOWED_USERS=123456789012345678 # Comma-separated user IDs - -# Agent Behavior -HERMES_MAX_ITERATIONS=60 # Max tool-calling iterations -MESSAGING_CWD=/home/myuser # Terminal working directory for messaging - -# Tool progress is configured in config.yaml (display.tool_progress: off|new|all|verbose) -``` - -### Working Directory Behavior - -- **CLI (`hermes` command)**: Uses current directory (`.` → `os.getcwd()`) -- **Messaging (Telegram/Discord)**: Uses `MESSAGING_CWD` (default: home directory) - -This is intentional: CLI users are in a terminal and expect the agent to work in their current directory, while messaging users need a consistent starting location. - -### Security (User Allowlists): - -**IMPORTANT**: By default, the gateway denies all users who are not in an allowlist or paired via DM. - -The gateway checks `{PLATFORM}_ALLOWED_USERS` environment variables: -- If set: Only listed user IDs can interact with the bot -- If unset: All users are denied unless `GATEWAY_ALLOW_ALL_USERS=true` is set - -Users can find their IDs: -- **Telegram**: Message [@userinfobot](https://t.me/userinfobot) -- **Discord**: Enable Developer Mode, right-click name → Copy ID - -### DM Pairing System - -Instead of static allowlists, users can pair via one-time codes: -1. Unknown user DMs the bot → receives pairing code -2. Owner runs `hermes pairing approve ` -3. User is permanently authorized - -Security: 8-char codes, 1-hour expiry, rate-limited (1/10min/user), max 3 pending per platform, lockout after 5 failed attempts, `chmod 0600` on data files. - -Files: `gateway/pairing.py`, `hermes_cli/pairing.py` - -### Event Hooks - -Hooks fire at lifecycle points. Place hook directories in `~/.hermes/hooks/`: - -``` -~/.hermes/hooks/my-hook/ -├── HOOK.yaml # name, description, events list -└── handler.py # async def handle(event_type, context): ... -``` - -Events: `gateway:startup`, `session:start`, `session:reset`, `agent:start`, `agent:step`, `agent:end`, `command:*` - -The `agent:step` event fires each iteration of the tool-calling loop with tool names and results. - -Files: `gateway/hooks.py` - -### Tool Progress Notifications - -When `tool_progress` is enabled in `config.yaml`, the bot sends status messages as it works: -- `💻 \`ls -la\`...` (terminal commands show the actual command) -- `🔍 web_search...` -- `📄 web_extract...` -- `🐍 execute_code...` (programmatic tool calling sandbox) -- `🔀 delegate_task...` (subagent delegation) -- `❓ clarify...` (user question, CLI-only) - -Modes: -- `new`: Only when switching to a different tool (less spam) -- `all`: Every single tool call - -### Typing Indicator - -The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences. - -### Platform Toolsets: - -Each platform has a dedicated toolset in `toolsets.py`: -- `hermes-telegram`: Full tools including terminal (with safety checks) -- `hermes-discord`: Full tools including terminal -- `hermes-whatsapp`: Full tools including terminal - ---- - -## Configuration System - -Configuration files are stored in `~/.hermes/` for easy user access: -- `~/.hermes/config.yaml` - All settings (model, terminal, compression, etc.) -- `~/.hermes/.env` - API keys and secrets - -### Adding New Configuration Options - -When adding new configuration variables, you MUST follow this process: - -#### For config.yaml options: - -1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py` -2. **CRITICAL**: Bump `_config_version` in `DEFAULT_CONFIG` when adding required fields -3. This triggers migration prompts for existing users on next `hermes update` or `hermes setup` - -Example: -```python -DEFAULT_CONFIG = { - # ... existing config ... - - "new_feature": { - "enabled": True, - "option": "default_value", - }, - - # BUMP THIS when adding required fields - "_config_version": 2, # Was 1, now 2 -} -``` - -#### For .env variables (API keys/secrets): - -1. Add to `REQUIRED_ENV_VARS` or `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` -2. Include metadata for the migration system: - -```python -OPTIONAL_ENV_VARS = { - # ... existing vars ... - "NEW_API_KEY": { - "description": "What this key is for", - "prompt": "Display name in prompts", - "url": "https://where-to-get-it.com/", - "tools": ["tools_it_enables"], # What tools need this - "password": True, # Mask input - }, -} -``` - -#### Update related files: - -- `hermes_cli/setup.py` - Add prompts in the setup wizard -- `cli-config.yaml.example` - Add example with comments -- Update README.md if user-facing - -### Config Version Migration - -The system uses `_config_version` to detect outdated configs: - -1. `check_for_missing_config()` compares user config to `DEFAULT_CONFIG` -2. `migrate_config()` interactively prompts for missing values -3. Called automatically by `hermes update` and optionally by `hermes setup` - ---- - -## Environment Variables - -API keys are loaded from `~/.hermes/.env`: -- `OPENROUTER_API_KEY` - Main LLM API access (primary provider) -- `FIRECRAWL_API_KEY` - Web search/extract tools -- `FIRECRAWL_API_URL` - Self-hosted Firecrawl endpoint (optional) -- `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` - Browser automation -- `FAL_KEY` - Image generation (FLUX model) -- `NOUS_API_KEY` - Vision and Mixture-of-Agents tools - -Terminal tool configuration (in `~/.hermes/config.yaml`): -- `terminal.backend` - Backend: local, docker, singularity, modal, daytona, or ssh -- `terminal.cwd` - Working directory ("." = host CWD for local only; for remote backends set an absolute path inside the target, or omit to use the backend's default) -- `terminal.docker_image` - Image for Docker backend -- `terminal.singularity_image` - Image for Singularity backend -- `terminal.modal_image` - Image for Modal backend -- `terminal.daytona_image` - Image for Daytona backend -- `DAYTONA_API_KEY` - API key for Daytona backend (in .env) -- SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env - -Agent behavior (in `~/.hermes/.env`): -- `HERMES_MAX_ITERATIONS` - Max tool-calling iterations (default: 60) -- `MESSAGING_CWD` - Working directory for messaging platforms (default: ~) -- `display.tool_progress` in config.yaml - Tool progress: `off`, `new`, `all`, `verbose` -- `OPENAI_API_KEY` - Voice transcription (Whisper STT) -- `SLACK_BOT_TOKEN` / `SLACK_APP_TOKEN` - Slack integration (Socket Mode) -- `SLACK_ALLOWED_USERS` - Comma-separated Slack user IDs -- `HERMES_HUMAN_DELAY_MODE` - Response pacing: off/natural/custom -- `HERMES_HUMAN_DELAY_MIN_MS` / `HERMES_HUMAN_DELAY_MAX_MS` - Custom delay range - -### Dangerous Command Approval - -The terminal tool includes safety checks for potentially destructive commands (e.g., `rm -rf`, `DROP TABLE`, `chmod 777`, etc.): - -**Behavior by Backend:** -- **Docker/Singularity/Modal**: Commands run unrestricted (isolated containers) -- **Local/SSH**: Dangerous commands trigger approval flow - -**Approval Flow (CLI):** -``` -⚠️ Potentially dangerous command detected: recursive delete - rm -rf /tmp/test - - [o]nce | [s]ession | [a]lways | [d]eny - Choice [o/s/a/D]: -``` - -**Approval Flow (Messaging):** -- Command is blocked with explanation -- Agent explains the command was blocked for safety -- User must add the pattern to their allowlist via `hermes config edit` or run the command directly on their machine - -**Configuration:** -- `command_allowlist` in `~/.hermes/config.yaml` stores permanently allowed patterns -- Add patterns via "always" approval or edit directly - -**Sudo Handling (Messaging):** -- If sudo fails over messaging, output includes tip to add `SUDO_PASSWORD` to `~/.hermes/.env` - ---- - -## Background Process Management - -The `process` tool works alongside `terminal` for managing long-running background processes: - -**Starting a background process:** -```python -terminal(command="pytest -v tests/", background=true) -# Returns: {"session_id": "proc_abc123", "pid": 12345, ...} -``` - -**Managing it with the process tool:** -- `process(action="list")` -- show all running/recent processes -- `process(action="poll", session_id="proc_abc123")` -- check status + new output -- `process(action="log", session_id="proc_abc123")` -- full output with pagination -- `process(action="wait", session_id="proc_abc123", timeout=600)` -- block until done -- `process(action="kill", session_id="proc_abc123")` -- terminate -- `process(action="write", session_id="proc_abc123", data="y")` -- send stdin -- `process(action="submit", session_id="proc_abc123", data="yes")` -- send + Enter - -**Key behaviors:** -- Background processes execute through the configured terminal backend (local/Docker/Modal/Daytona/SSH/Singularity) -- never directly on the host unless `TERMINAL_ENV=local` -- The `wait` action blocks the tool call until the process finishes, times out, or is interrupted by a new user message -- PTY mode (`pty=true` on terminal) enables interactive CLI tools (Codex, Claude Code) -- In RL training, background processes are auto-killed when the episode ends (`tool_context.cleanup()`) -- In the gateway, sessions with active background processes are exempt from idle reset -- The process registry checkpoints to `~/.hermes/processes.json` for crash recovery - -Files: `tools/process_registry.py` (registry + handler), `tools/terminal_tool.py` (spawn integration) +1. Add to `COMMANDS` dict in `hermes_cli/commands.py` +2. Add handler in `HermesCLI.process_command()` in `cli.py` +3. For persistent settings, use `save_config_value()` in `cli.py` --- ## Adding New Tools -Adding a tool requires changes in **2 files** (the tool file and `toolsets.py`): - -1. **Create `tools/your_tool.py`** with handler, schema, check function, and registry call: +Requires changes in **3 files**: +**1. Create `tools/your_tool.py`:** ```python -# tools/example_tool.py -import json -import os +import json, os from tools.registry import registry -def check_example_requirements() -> bool: - """Check if required API keys/dependencies are available.""" +def check_requirements() -> bool: return bool(os.getenv("EXAMPLE_API_KEY")) def example_tool(param: str, task_id: str = None) -> str: - """Execute the tool and return JSON string result.""" - try: - result = {"success": True, "data": "..."} - return json.dumps(result, ensure_ascii=False) - except Exception as e: - return json.dumps({"error": str(e)}, ensure_ascii=False) - -EXAMPLE_SCHEMA = { - "name": "example_tool", - "description": "Does something useful.", - "parameters": { - "type": "object", - "properties": { - "param": {"type": "string", "description": "The parameter"} - }, - "required": ["param"] - } -} + return json.dumps({"success": True, "data": "..."}) registry.register( name="example_tool", toolset="example", - schema=EXAMPLE_SCHEMA, - handler=lambda args, **kw: example_tool( - param=args.get("param", ""), task_id=kw.get("task_id")), - check_fn=check_example_requirements, + schema={"name": "example_tool", "description": "...", "parameters": {...}}, + handler=lambda args, **kw: example_tool(param=args.get("param", ""), task_id=kw.get("task_id")), + check_fn=check_requirements, requires_env=["EXAMPLE_API_KEY"], ) ``` -2. **Add to `toolsets.py`**: Add `"example_tool"` to `_HERMES_CORE_TOOLS` if it should be in all platform toolsets, or create a new toolset entry. +**2. Add import** in `model_tools.py` `_discover_tools()` list. -3. **Add discovery import** in `model_tools.py`'s `_discover_tools()` list: `"tools.example_tool"`. +**3. Add to `toolsets.py`** — either `_HERMES_CORE_TOOLS` (all platforms) or a new toolset. -That's it. The registry handles schema collection, dispatch, availability checking, and error wrapping automatically. No edits to `TOOLSET_REQUIREMENTS`, `handle_function_call()`, `get_all_tool_names()`, or any other data structure. +The registry handles schema collection, dispatch, availability checking, and error wrapping. All handlers MUST return a JSON string. -**Optional:** Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` for the setup wizard, and to `toolset_distributions.py` for batch processing. - -**Special case: tools that need agent-level state** (like `todo`, `memory`): -These are intercepted by `run_agent.py`'s tool dispatch loop *before* `handle_function_call()`. The registry still holds their schemas, but dispatch returns a stub error as a safety fallback. See `todo_tool.py` for the pattern. - -All tool handlers MUST return a JSON string. The registry's `dispatch()` wraps all exceptions in `{"error": "..."}` automatically. - -### Dynamic Tool Availability - -Tools declare their requirements at registration time via `check_fn` and `requires_env`. The registry checks `check_fn()` when building tool definitions -- tools whose check fails are silently excluded. - -### Stateful Tools - -Tools that maintain state (terminal, browser) require: -- `task_id` parameter for session isolation between concurrent tasks -- `cleanup_*()` function to release resources -- Cleanup is called automatically in run_agent.py after conversation completes +**Agent-level tools** (todo, memory): intercepted by `run_agent.py` before `handle_function_call()`. See `todo_tool.py` for the pattern. --- -## Trajectory Format +## Adding Configuration -Conversations are saved in ShareGPT format for training: -```json -{"from": "system", "value": "System prompt with ..."} -{"from": "human", "value": "User message"} -{"from": "gpt", "value": "reasoning\n{...}"} -{"from": "tool", "value": "{...}"} -{"from": "gpt", "value": "Final response"} -``` - -Tool calls use `` XML tags, responses use `` tags, reasoning uses `` tags. - -### Trajectory Export +### config.yaml options: +1. Add to `DEFAULT_CONFIG` in `hermes_cli/config.py` +2. Bump `_config_version` (currently 5) to trigger migration for existing users +### .env variables: +1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` with metadata: ```python -agent = AIAgent(save_trajectories=True) -agent.chat("Do something") -# Saves to trajectories/*.jsonl in ShareGPT format +"NEW_API_KEY": { + "description": "What it's for", + "prompt": "Display name", + "url": "https://...", + "password": True, + "category": "tool", # provider, tool, messaging, setting +}, ``` +### Config loaders (two separate systems): + +| Loader | Used by | Location | +|--------|---------|----------| +| `load_cli_config()` | CLI mode | `cli.py` | +| `load_config()` | `hermes tools`, `hermes setup` | `hermes_cli/config.py` | +| Direct YAML load | Gateway | `gateway/run.py` | + --- -## Batch Processing (batch_runner.py) +## Important Policies -For processing multiple prompts: -- Parallel execution with multiprocessing -- Content-based resume for fault tolerance (matches on prompt text, not indices) -- Toolset distributions control probabilistic tool availability per prompt -- Output: `data//trajectories.jsonl` (combined) + individual batch files +### Prompt Caching Must Not Break -```bash -python batch_runner.py \ - --dataset_file=prompts.jsonl \ - --batch_size=20 \ - --num_workers=4 \ - --run_name=my_run -``` +Hermes-Agent ensures caching remains valid throughout a conversation. **Do NOT implement changes that would:** +- Alter past context mid-conversation +- Change toolsets mid-conversation +- Reload memories or rebuild system prompts mid-conversation ---- +Cache-breaking forces dramatically higher costs. The ONLY time we alter context is during context compression. -## Skills System - -Skills are on-demand knowledge documents the agent can load. Compatible with the [agentskills.io](https://agentskills.io/specification) open standard. - -``` -skills/ -├── mlops/ # Category folder -│ ├── axolotl/ # Skill folder -│ │ ├── SKILL.md # Main instructions (required) -│ │ ├── references/ # Additional docs, API specs -│ │ ├── templates/ # Output formats, configs -│ │ └── assets/ # Supplementary files (agentskills.io) -│ └── vllm/ -│ └── SKILL.md -├── .hub/ # Skills Hub state (gitignored) -│ ├── lock.json # Installed skill provenance -│ ├── quarantine/ # Pending security review -│ ├── audit.log # Security scan history -│ ├── taps.json # Custom source repos -│ └── index-cache/ # Cached remote indexes -``` - -**Progressive disclosure** (token-efficient): -1. `skills_categories()` - List category names (~50 tokens) -2. `skills_list(category)` - Name + description per skill (~3k tokens) -3. `skill_view(name)` - Full content + tags + linked files - -SKILL.md files use YAML frontmatter (agentskills.io format): -```yaml ---- -name: skill-name -description: Brief description for listing -version: 1.0.0 -platforms: [macos] # Optional — restrict to specific OS (macos/linux/windows) -metadata: - hermes: - tags: [tag1, tag2] - related_skills: [other-skill] ---- -# Skill Content... -``` - -**Platform filtering** — Skills with a `platforms` field are automatically excluded from the system prompt index, `skills_list()`, and slash commands on incompatible platforms. Skills without the field load everywhere (backward compatible). See `skills/apple/` for macOS-only examples (iMessage, Reminders, Notes, FindMy). - -**Skills Hub** — user-driven skill search/install from online registries and official optional skills. Sources: official optional skills (shipped with repo, labeled "official"), GitHub (openai/skills, anthropics/skills, custom taps), ClawHub, Claude marketplace, LobeHub. Not exposed as an agent tool — the model cannot search for or install skills. Users manage skills via `hermes skills browse/search/install` CLI commands or the `/skills` slash command in chat. - -Key files: -- `tools/skills_tool.py` — Agent-facing skill list/view (progressive disclosure) -- `tools/skills_guard.py` — Security scanner (regex + LLM audit, trust-aware install policy) -- `tools/skills_hub.py` — Source adapters (OptionalSkillSource, GitHub, ClawHub, Claude marketplace, LobeHub), lock file, auth -- `hermes_cli/skills_hub.py` — CLI subcommands + `/skills` slash command handler +### Working Directory Behavior +- **CLI**: Uses current directory (`.` → `os.getcwd()`) +- **Messaging**: Uses `MESSAGING_CWD` env var (default: home directory) --- ## Known Pitfalls ### DO NOT use `simple_term_menu` for interactive menus - -`simple_term_menu` has rendering bugs in tmux, iTerm2, and other non-standard terminals. When the user scrolls with arrow keys, previously highlighted items "ghost" — duplicating upward and corrupting the display. This happens because the library uses ANSI cursor-up codes to redraw in place, and tmux/iTerm miscalculate positions when the menu is near the bottom of the viewport. - -**Rule:** All interactive menus in `hermes_cli/` must use `curses` (Python stdlib) instead. See `tools_config.py` for the pattern — both `_prompt_choice()` (single-select) and `_prompt_toolset_checklist()` (multi-select with space toggle) use `curses.wrapper()`. The numbered-input fallback handles Windows where curses isn't available. +Rendering bugs in tmux/iTerm2 — ghosting on scroll. Use `curses` (stdlib) instead. See `hermes_cli/tools_config.py` for the pattern. ### DO NOT use `\033[K` (ANSI erase-to-EOL) in spinner/display code - -The ANSI escape `\033[K` leaks as literal `?[K` text when `prompt_toolkit`'s `patch_stdout` is active. Use space-padding instead to clear lines: `f"\r{line}{' ' * pad}"`. See `agent/display.py` `KawaiiSpinner`. +Leaks as literal `?[K` text under `prompt_toolkit`'s `patch_stdout`. Use space-padding: `f"\r{line}{' ' * pad}"`. ### `_last_resolved_tool_names` is a process-global in `model_tools.py` - -The `execute_code` sandbox uses `_last_resolved_tool_names` (set by `get_tool_definitions()`) to decide which tool stubs to generate. When subagents run with restricted toolsets, they overwrite this global. After delegation returns to the parent, `execute_code` may see the child's restricted list instead of the parent's full list. This is a known bug — `execute_code` calls after delegation may fail with `ImportError: cannot import name 'patch' from 'hermes_tools'`. +When subagents overwrite this global, `execute_code` calls after delegation may fail with missing tool imports. Known bug. ### Tests must not write to `~/.hermes/` - -The `autouse` fixture `_isolate_hermes_home` in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Every test runs in isolation. If you add a test that creates `AIAgent` instances or writes session logs, the fixture handles cleanup automatically. Never hardcode `~/.hermes/` paths in tests. +The `_isolate_hermes_home` autouse fixture in `tests/conftest.py` redirects `HERMES_HOME` to a temp dir. Never hardcode `~/.hermes/` paths in tests. --- -## Testing Changes +## Testing -After making changes: +```bash +source .venv/bin/activate +python -m pytest tests/ -q # Full suite (~2500 tests, ~2 min) +python -m pytest tests/test_model_tools.py -q # Toolset resolution +python -m pytest tests/test_cli_init.py -q # CLI config loading +python -m pytest tests/gateway/ -q # Gateway tests +python -m pytest tests/tools/ -q # Tool-level tests +``` -1. Run `hermes doctor` to check setup -2. Run `hermes config check` to verify config -3. Test with `hermes chat -q "test message"` -4. For new config options, test fresh install: `rm -rf ~/.hermes && hermes setup` +Always run the full suite before pushing changes. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 9679d79d1d..6ed6c833e4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -118,7 +118,7 @@ hermes-agent/ ├── cli.py # HermesCLI class — interactive TUI, prompt_toolkit integration ├── model_tools.py # Tool orchestration (thin layer over tools/registry.py) ├── toolsets.py # Tool groupings and presets (hermes-cli, hermes-telegram, etc.) -├── hermes_state.py # SQLite session database with FTS5 full-text search +├── hermes_state.py # SQLite session database with FTS5 full-text search, session titles ├── batch_runner.py # Parallel batch processing for trajectory generation │ ├── agent/ # Agent internals (extracted modules) @@ -218,7 +218,7 @@ User message → AIAgent._run_agent_loop() - **Self-registering tools**: Each tool file calls `registry.register()` at import time. `model_tools.py` triggers discovery by importing all tool modules. - **Toolset grouping**: Tools are grouped into toolsets (`web`, `terminal`, `file`, `browser`, etc.) that can be enabled/disabled per platform. -- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search. JSON logs go to `~/.hermes/sessions/`. +- **Session persistence**: All conversations are stored in SQLite (`hermes_state.py`) with full-text search and unique session titles. JSON logs go to `~/.hermes/sessions/`. - **Ephemeral injection**: System prompts and prefill messages are injected at API call time, never persisted to the database or logs. - **Provider abstraction**: The agent works with any OpenAI-compatible API. Provider resolution happens at init time (Nous Portal OAuth, OpenRouter API key, or custom endpoint). - **Provider routing**: When using OpenRouter, `provider_routing` in config.yaml controls provider selection (sort by throughput/latency/price, allow/ignore specific providers, data retention policies). These are injected as `extra_body.provider` in API requests. diff --git a/README.md b/README.md index b172d13f28..aaa541d5d8 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Use any model you want — [Nous Portal](https://portal.nousresearch.com), [Open - + @@ -71,7 +71,7 @@ All documentation lives at **[hermes-agent.nousresearch.com/docs](https://hermes | [Quickstart](https://hermes-agent.nousresearch.com/docs/getting-started/quickstart) | Install → setup → first conversation in 2 minutes | | [CLI Usage](https://hermes-agent.nousresearch.com/docs/user-guide/cli) | Commands, keybindings, personalities, sessions | | [Configuration](https://hermes-agent.nousresearch.com/docs/user-guide/configuration) | Config file, providers, models, all options | -| [Messaging Gateway](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram, Discord, Slack, WhatsApp, Home Assistant | +| [Messaging Gateway](https://hermes-agent.nousresearch.com/docs/user-guide/messaging) | Telegram, Discord, Slack, WhatsApp, Signal, Home Assistant | | [Security](https://hermes-agent.nousresearch.com/docs/user-guide/security) | Command approval, DM pairing, container isolation | | [Tools & Toolsets](https://hermes-agent.nousresearch.com/docs/user-guide/features/tools) | 40+ tools, toolset system, terminal backends | | [Skills System](https://hermes-agent.nousresearch.com/docs/user-guide/features/skills) | Procedural memory, Skills Hub, creating skills | diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 841bb61663..a32e3a2937 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -4,7 +4,7 @@ Provides a single resolution chain so every consumer (context compression, session search, web extraction, vision analysis, browser vision) picks up the best available backend without duplicating fallback logic. -Resolution order for text tasks: +Resolution order for text tasks (auto mode): 1. OpenRouter (OPENROUTER_API_KEY) 2. Nous Portal (~/.hermes/auth.json active provider) 3. Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) @@ -14,10 +14,19 @@ Resolution order for text tasks: — checked via PROVIDER_REGISTRY entries with auth_type='api_key' 6. None -Resolution order for vision/multimodal tasks: +Resolution order for vision/multimodal tasks (auto mode): 1. OpenRouter 2. Nous Portal - 3. None (custom endpoints can't substitute for Gemini multimodal) + 3. None (steps 3-5 are skipped — they may not support multimodal) + +Per-task provider overrides (e.g. AUXILIARY_VISION_PROVIDER, +CONTEXT_COMPRESSION_PROVIDER) can force a specific provider for each task: +"openrouter", "nous", "codex", or "main" (= steps 3-5). +Default "auto" follows the chains above. + +Per-task model overrides (e.g. AUXILIARY_VISION_MODEL, +AUXILIARY_WEB_EXTRACT_MODEL) let callers use a different model slug +than the provider's default. """ import json @@ -73,6 +82,55 @@ _CODEX_AUX_BASE_URL = "https://chatgpt.com/backend-api/codex" # read response.choices[0].message.content. This adapter translates those # calls to the Codex Responses API so callers don't need any changes. + +def _convert_content_for_responses(content: Any) -> Any: + """Convert chat.completions content to Responses API format. + + chat.completions uses: + {"type": "text", "text": "..."} + {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}} + + Responses API uses: + {"type": "input_text", "text": "..."} + {"type": "input_image", "image_url": "data:image/png;base64,..."} + + If content is a plain string, it's returned as-is (the Responses API + accepts strings directly for text-only messages). + """ + if isinstance(content, str): + return content + if not isinstance(content, list): + return str(content) if content else "" + + converted: List[Dict[str, Any]] = [] + for part in content: + if not isinstance(part, dict): + continue + ptype = part.get("type", "") + if ptype == "text": + converted.append({"type": "input_text", "text": part.get("text", "")}) + elif ptype == "image_url": + # chat.completions nests the URL: {"image_url": {"url": "..."}} + image_data = part.get("image_url", {}) + url = image_data.get("url", "") if isinstance(image_data, dict) else str(image_data) + entry: Dict[str, Any] = {"type": "input_image", "image_url": url} + # Preserve detail if specified + detail = image_data.get("detail") if isinstance(image_data, dict) else None + if detail: + entry["detail"] = detail + converted.append(entry) + elif ptype in ("input_text", "input_image"): + # Already in Responses format — pass through + converted.append(part) + else: + # Unknown content type — try to preserve as text + text = part.get("text", "") + if text: + converted.append({"type": "input_text", "text": text}) + + return converted or "" + + class _CodexCompletionsAdapter: """Drop-in shim that accepts chat.completions.create() kwargs and routes them through the Codex Responses streaming API.""" @@ -86,30 +144,31 @@ class _CodexCompletionsAdapter: model = kwargs.get("model", self._model) temperature = kwargs.get("temperature") - # Separate system/instructions from conversation messages + # Separate system/instructions from conversation messages. + # Convert chat.completions multimodal content blocks to Responses + # API format (input_text / input_image instead of text / image_url). instructions = "You are a helpful assistant." input_msgs: List[Dict[str, Any]] = [] for msg in messages: role = msg.get("role", "user") content = msg.get("content") or "" if role == "system": - instructions = content + instructions = content if isinstance(content, str) else str(content) else: - input_msgs.append({"role": role, "content": content}) + input_msgs.append({ + "role": role, + "content": _convert_content_for_responses(content), + }) resp_kwargs: Dict[str, Any] = { "model": model, "instructions": instructions, "input": input_msgs or [{"role": "user", "content": ""}], - "stream": True, "store": False, } - max_tokens = kwargs.get("max_output_tokens") or kwargs.get("max_completion_tokens") or kwargs.get("max_tokens") - if max_tokens is not None: - resp_kwargs["max_output_tokens"] = int(max_tokens) - if temperature is not None: - resp_kwargs["temperature"] = temperature + # Note: the Codex endpoint (chatgpt.com/backend-api/codex) does NOT + # support max_output_tokens or temperature — omit to avoid 400 errors. # Tools support for flush_memories and similar callers tools = kwargs.get("tools") @@ -337,59 +396,128 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: return None, None -# ── Public API ────────────────────────────────────────────────────────────── +# ── Provider resolution helpers ───────────────────────────────────────────── -def get_text_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: - """Return (client, model_slug) for text-only auxiliary tasks. +def _get_auxiliary_provider(task: str = "") -> str: + """Read the provider override for a specific auxiliary task. - Falls through OpenRouter -> Nous Portal -> custom endpoint -> Codex OAuth - -> direct API-key providers -> (None, None). + Checks AUXILIARY_{TASK}_PROVIDER first (e.g. AUXILIARY_VISION_PROVIDER), + then CONTEXT_{TASK}_PROVIDER (for the compression section's summary_provider), + then falls back to "auto". Returns one of: "auto", "openrouter", "nous", "main". """ - # 1. OpenRouter + if task: + for prefix in ("AUXILIARY_", "CONTEXT_"): + val = os.getenv(f"{prefix}{task.upper()}_PROVIDER", "").strip().lower() + if val and val != "auto": + return val + return "auto" + + +def _try_openrouter() -> Tuple[Optional[OpenAI], Optional[str]]: or_key = os.getenv("OPENROUTER_API_KEY") - if or_key: - logger.debug("Auxiliary text client: OpenRouter") - return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, - default_headers=_OR_HEADERS), _OPENROUTER_MODEL + if not or_key: + return None, None + logger.debug("Auxiliary client: OpenRouter") + return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, + default_headers=_OR_HEADERS), _OPENROUTER_MODEL - # 2. Nous Portal + +def _try_nous() -> Tuple[Optional[OpenAI], Optional[str]]: nous = _read_nous_auth() - if nous: - global auxiliary_is_nous - auxiliary_is_nous = True - logger.debug("Auxiliary text client: Nous Portal") - return ( - OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), - _NOUS_MODEL, - ) + if not nous: + return None, None + global auxiliary_is_nous + auxiliary_is_nous = True + logger.debug("Auxiliary client: Nous Portal") + return ( + OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), + _NOUS_MODEL, + ) - # 3. Custom endpoint (both base URL and key must be set) + +def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: custom_base = os.getenv("OPENAI_BASE_URL") custom_key = os.getenv("OPENAI_API_KEY") - if custom_base and custom_key: - model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini" - logger.debug("Auxiliary text client: custom endpoint (%s)", model) - return OpenAI(api_key=custom_key, base_url=custom_base), model + if not custom_base or not custom_key: + return None, None + model = os.getenv("OPENAI_MODEL") or os.getenv("LLM_MODEL") or "gpt-4o-mini" + logger.debug("Auxiliary client: custom endpoint (%s)", model) + return OpenAI(api_key=custom_key, base_url=custom_base), model - # 4. Codex OAuth -- uses the Responses API (only endpoint the token - # can access), wrapped to look like a chat.completions client. + +def _try_codex() -> Tuple[Optional[Any], Optional[str]]: codex_token = _read_codex_access_token() - if codex_token: - logger.debug("Auxiliary text client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) - real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) - return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL + if not codex_token: + return None, None + logger.debug("Auxiliary client: Codex OAuth (%s via Responses API)", _CODEX_AUX_MODEL) + real_client = OpenAI(api_key=codex_token, base_url=_CODEX_AUX_BASE_URL) + return CodexAuxiliaryClient(real_client, _CODEX_AUX_MODEL), _CODEX_AUX_MODEL - # 5. Direct API-key providers (z.ai/GLM, Kimi/Moonshot, MiniMax, etc.) - api_client, api_model = _resolve_api_key_provider() - if api_client is not None: - return api_client, api_model - # 6. Nothing available - logger.debug("Auxiliary text client: none available") +def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]: + """Resolve a specific forced provider. Returns (None, None) if creds missing.""" + if forced == "openrouter": + client, model = _try_openrouter() + if client is None: + logger.warning("auxiliary.provider=openrouter but OPENROUTER_API_KEY not set") + return client, model + + if forced == "nous": + client, model = _try_nous() + if client is None: + logger.warning("auxiliary.provider=nous but Nous Portal not configured (run: hermes login)") + return client, model + + if forced == "codex": + client, model = _try_codex() + if client is None: + logger.warning("auxiliary.provider=codex but no Codex OAuth token found (run: hermes model)") + return client, model + + if forced == "main": + # "main" = skip OpenRouter/Nous, use the main chat model's credentials. + for try_fn in (_try_custom_endpoint, _try_codex, _resolve_api_key_provider): + client, model = try_fn() + if client is not None: + return client, model + logger.warning("auxiliary.provider=main but no main endpoint credentials found") + return None, None + + # Unknown provider name — fall through to auto + logger.warning("Unknown auxiliary.provider=%r, falling back to auto", forced) return None, None -def get_async_text_auxiliary_client(): +def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]: + """Full auto-detection chain: OpenRouter → Nous → custom → Codex → API-key → None.""" + for try_fn in (_try_openrouter, _try_nous, _try_custom_endpoint, + _try_codex, _resolve_api_key_provider): + client, model = try_fn() + if client is not None: + return client, model + logger.debug("Auxiliary client: none available") + return None, None + + +# ── Public API ────────────────────────────────────────────────────────────── + +def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]: + """Return (client, default_model_slug) for text-only auxiliary tasks. + + Args: + task: Optional task name ("compression", "web_extract") to check + for a task-specific provider override. + + Callers may override the returned model with a per-task env var + (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL). + """ + forced = _get_auxiliary_provider(task) + if forced != "auto": + return _resolve_forced_provider(forced) + return _resolve_auto() + + +def get_async_text_auxiliary_client(task: str = ""): """Return (async_client, model_slug) for async consumers. For standard providers returns (AsyncOpenAI, model). For Codex returns @@ -398,7 +526,7 @@ def get_async_text_auxiliary_client(): """ from openai import AsyncOpenAI - sync_client, model = get_text_auxiliary_client() + sync_client, model = get_text_auxiliary_client(task) if sync_client is None: return None, None @@ -417,29 +545,27 @@ def get_async_text_auxiliary_client(): def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: - """Return (client, model_slug) for vision/multimodal auxiliary tasks. + """Return (client, default_model_slug) for vision/multimodal auxiliary tasks. - Only OpenRouter and Nous Portal qualify — custom endpoints cannot - substitute for Gemini multimodal. + Checks AUXILIARY_VISION_PROVIDER for a forced provider, otherwise + auto-detects. Callers may override the returned model with + AUXILIARY_VISION_MODEL. + + In auto mode, only providers known to support multimodal are tried: + OpenRouter, Nous Portal, and Codex OAuth (gpt-5.3-codex supports + vision via the Responses API). Custom endpoints and API-key + providers are skipped — they may not handle vision input. To use + them, set AUXILIARY_VISION_PROVIDER explicitly. """ - # 1. OpenRouter - or_key = os.getenv("OPENROUTER_API_KEY") - if or_key: - logger.debug("Auxiliary vision client: OpenRouter") - return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL, - default_headers=_OR_HEADERS), _OPENROUTER_MODEL - - # 2. Nous Portal - nous = _read_nous_auth() - if nous: - logger.debug("Auxiliary vision client: Nous Portal") - return ( - OpenAI(api_key=_nous_api_key(nous), base_url=_nous_base_url()), - _NOUS_MODEL, - ) - - # 3. Nothing suitable - logger.debug("Auxiliary vision client: none available") + forced = _get_auxiliary_provider("vision") + if forced != "auto": + return _resolve_forced_provider(forced) + # Auto: only multimodal-capable providers + for try_fn in (_try_openrouter, _try_nous, _try_codex): + client, model = try_fn() + if client is not None: + return client, model + logger.debug("Auxiliary vision client: none available (auto only tries OpenRouter/Nous/Codex)") return None, None diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 35897cccd1..01aa2af804 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -53,7 +53,7 @@ class ContextCompressor: self.last_completion_tokens = 0 self.last_total_tokens = 0 - self.client, default_model = get_text_auxiliary_client() + self.client, default_model = get_text_auxiliary_client("compression") self.summary_model = summary_model_override or default_model def update_from_response(self, usage: Dict[str, Any]): @@ -342,7 +342,9 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" compressed.append(msg) if summary: - compressed.append({"role": "user", "content": summary}) + last_head_role = messages[compress_start - 1].get("role", "user") if compress_start > 0 else "user" + summary_role = "user" if last_head_role in ("assistant", "tool") else "assistant" + compressed.append({"role": summary_role, "content": summary}) else: if not self.quiet_mode: print(" ⚠️ No summary model available — middle turns dropped without summary") diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index c933ffe675..c2aedc19f1 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -122,6 +122,15 @@ PLATFORM_HINTS = { "attachments, audio as file attachments. You can also include image URLs " "in markdown format ![alt](url) and they will be uploaded as attachments." ), + "signal": ( + "You are on a text messaging communication platform, Signal. " + "Please do not use markdown as it does not render. " + "You can send media files natively: to deliver a file to the user, " + "include MEDIA:/absolute/path/to/file in your response. Images " + "(.png, .jpg, .webp) appear as photos, audio as attachments, and other " + "files arrive as downloadable documents. You can also include image " + "URLs in markdown format ![alt](url) and they will be sent as photos." + ), "cli": ( "You are a CLI AI Agent. Try not to use markdown but simple text " "renderable inside a terminal." diff --git a/agent/redact.py b/agent/redact.py index 22f1a547fb..02700c8327 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -8,6 +8,7 @@ the first 6 and last 4 characters for debuggability. """ import logging +import os import re from typing import Optional @@ -15,7 +16,7 @@ logger = logging.getLogger(__name__) # Known API key prefixes -- match the prefix + contiguous token chars _PREFIX_PATTERNS = [ - r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter + r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter / Anthropic (sk-ant-*) r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic) r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained) r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens @@ -25,6 +26,18 @@ _PREFIX_PATTERNS = [ r"fc-[A-Za-z0-9]{10,}", # Firecrawl r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens + r"AKIA[A-Z0-9]{16}", # AWS Access Key ID + r"sk_live_[A-Za-z0-9]{10,}", # Stripe secret key (live) + r"sk_test_[A-Za-z0-9]{10,}", # Stripe secret key (test) + r"rk_live_[A-Za-z0-9]{10,}", # Stripe restricted key + r"SG\.[A-Za-z0-9_-]{10,}", # SendGrid API key + r"hf_[A-Za-z0-9]{10,}", # HuggingFace token + r"r8_[A-Za-z0-9]{10,}", # Replicate API token + r"npm_[A-Za-z0-9]{10,}", # npm access token + r"pypi-[A-Za-z0-9_-]{10,}", # PyPI API token + r"dop_v1_[A-Za-z0-9]{10,}", # DigitalOcean PAT + r"doo_v1_[A-Za-z0-9]{10,}", # DigitalOcean OAuth + r"am_[A-Za-z0-9_-]{10,}", # AgentMail API key ] # ENV assignment patterns: KEY=value where KEY contains a secret-like name @@ -52,6 +65,22 @@ _TELEGRAM_RE = re.compile( r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})", ) +# Private key blocks: -----BEGIN RSA PRIVATE KEY----- ... -----END RSA PRIVATE KEY----- +_PRIVATE_KEY_RE = re.compile( + r"-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----" +) + +# Database connection strings: protocol://user:PASSWORD@host +# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password +_DB_CONNSTR_RE = re.compile( + r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)", + re.IGNORECASE, +) + +# E.164 phone numbers: +, 7-15 digits +# Negative lookahead prevents matching hex strings or identifiers +_SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])") + # Compile known prefix patterns into one alternation _PREFIX_RE = re.compile( r"(? str: """Apply all redaction patterns to a block of text. Safe to call on any string -- non-matching text passes through unchanged. + Disabled when security.redact_secrets is false in config.yaml. """ if not text: return text + if os.getenv("HERMES_REDACT_SECRETS", "").lower() in ("0", "false", "no", "off"): + return text # Known prefixes (sk-, ghp_, etc.) text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text) @@ -101,6 +133,20 @@ def redact_sensitive_text(text: str) -> str: return f"{prefix}{digits}:***" text = _TELEGRAM_RE.sub(_redact_telegram, text) + # Private key blocks + text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text) + + # Database connection string passwords + text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text) + + # E.164 phone numbers (Signal, WhatsApp) + def _redact_phone(m): + phone = m.group(1) + if len(phone) <= 8: + return phone[:2] + "****" + phone[-2:] + return phone[:4] + "****" + phone[-4:] + text = _SIGNAL_PHONE_RE.sub(_redact_phone, text) + return text diff --git a/cli-config.yaml.example b/cli-config.yaml.example index dfbaeee6b3..ec7ccb6209 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -209,8 +209,58 @@ compression: threshold: 0.85 # Model to use for generating summaries (fast/cheap recommended) - # This model compresses the middle turns into a concise summary + # This model compresses the middle turns into a concise summary. + # IMPORTANT: it receives the full middle section of the conversation, so it + # MUST support a context length at least as large as your main model's. summary_model: "google/gemini-3-flash-preview" + + # Provider for the summary model (default: "auto") + # Options: "auto", "openrouter", "nous", "main" + # summary_provider: "auto" + +# ============================================================================= +# Auxiliary Models (Advanced — Experimental) +# ============================================================================= +# Hermes uses lightweight "auxiliary" models for side tasks: image analysis, +# browser screenshot analysis, web page summarization, and context compression. +# +# By default these use Gemini Flash via OpenRouter or Nous Portal and are +# auto-detected from your credentials. You do NOT need to change anything +# here for normal usage. +# +# WARNING: Overriding these with providers other than OpenRouter or Nous Portal +# is EXPERIMENTAL and may not work. Not all models/providers support vision, +# produce usable summaries, or accept the same API format. Change at your own +# risk — if things break, reset to "auto" / empty values. +# +# Each task has its own provider + model pair so you can mix providers. +# For example: OpenRouter for vision (needs multimodal), but your main +# local endpoint for compression (just needs text). +# +# Provider options: +# "auto" - Best available: OpenRouter → Nous Portal → main endpoint (default) +# "openrouter" - Force OpenRouter (requires OPENROUTER_API_KEY) +# "nous" - Force Nous Portal (requires: hermes login) +# "codex" - Force Codex OAuth (requires: hermes model → Codex). +# Uses gpt-5.3-codex which supports vision. +# "main" - Use your custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY). +# Works with OpenAI API, local models, or any OpenAI-compatible +# endpoint. Also falls back to Codex OAuth and API-key providers. +# +# Model: leave empty to use the provider's default. When empty, OpenRouter +# uses "google/gemini-3-flash-preview" and Nous uses "gemini-3-flash". +# Other providers pick a sensible default automatically. +# +# auxiliary: +# # Image analysis: vision_analyze tool + browser screenshots +# vision: +# provider: "auto" +# model: "" # e.g. "google/gemini-2.5-flash", "openai/gpt-4o" +# +# # Web page scraping / summarization + browser page text extraction +# web_extract: +# provider: "auto" +# model: "" # ============================================================================= # Persistent Memory @@ -585,3 +635,8 @@ display: # verbose: Full args, results, and debug logs (same as /verbose) # Toggle at runtime with /verbose in the CLI tool_progress: all + + # Play terminal bell when agent finishes a response. + # Useful for long-running tasks — your terminal will ding when the agent is done. + # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound. + bell_on_complete: false diff --git a/cli.py b/cli.py index a326c93db2..a63e6053c8 100755 --- a/cli.py +++ b/cli.py @@ -161,6 +161,7 @@ def load_cli_config() -> Dict[str, Any]: }, "browser": { "inactivity_timeout": 120, # Auto-cleanup inactive browser sessions after 2 min + "record_sessions": False, # Auto-record browser sessions as WebM videos }, "compression": { "enabled": True, # Auto-compress when approaching context limit @@ -193,6 +194,7 @@ def load_cli_config() -> Dict[str, Any]: "toolsets": ["all"], "display": { "compact": False, + "resume_display": "full", }, "clarify": { "timeout": 120, # Seconds to wait for a clarify answer before auto-proceeding @@ -332,12 +334,43 @@ def load_cli_config() -> Dict[str, Any]: "enabled": "CONTEXT_COMPRESSION_ENABLED", "threshold": "CONTEXT_COMPRESSION_THRESHOLD", "summary_model": "CONTEXT_COMPRESSION_MODEL", + "summary_provider": "CONTEXT_COMPRESSION_PROVIDER", } for config_key, env_var in compression_env_mappings.items(): if config_key in compression_config: os.environ[env_var] = str(compression_config[config_key]) + # Apply auxiliary model overrides to environment variables. + # Vision and web_extract each have their own provider + model pair. + # (Compression is handled in the compression section above.) + # Only set env vars for non-empty / non-default values so auto-detection + # still works. + auxiliary_config = defaults.get("auxiliary", {}) + auxiliary_task_env = { + # config key → (provider env var, model env var) + "vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), + "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), + } + + for task_key, (prov_env, model_env) in auxiliary_task_env.items(): + task_cfg = auxiliary_config.get(task_key, {}) + if not isinstance(task_cfg, dict): + continue + prov = str(task_cfg.get("provider", "")).strip() + model = str(task_cfg.get("model", "")).strip() + if prov and prov != "auto": + os.environ[prov_env] = prov + if model: + os.environ[model_env] = model + + # Security settings + security_config = defaults.get("security", {}) + if isinstance(security_config, dict): + redact = security_config.get("redact_secrets") + if redact is not None: + os.environ["HERMES_REDACT_SECRETS"] = str(redact).lower() + return defaults # Load configuration at module startup @@ -429,7 +462,8 @@ def _setup_worktree(repo_root: str = None) -> Optional[Dict[str, str]]: repo_root = repo_root or _git_repo_root() if not repo_root: - print("\033[33m⚠ --worktree: not inside a git repository, skipping.\033[0m") + print("\033[31m✗ --worktree requires being inside a git repository.\033[0m") + print(" cd into your project repo first, then run hermes -w") return None short_id = uuid.uuid4().hex[:8] @@ -1007,11 +1041,19 @@ class HermesCLI: self.compact = compact if compact is not None else CLI_CONFIG["display"].get("compact", False) # tool_progress: "off", "new", "all", "verbose" (from config.yaml display section) self.tool_progress_mode = CLI_CONFIG["display"].get("tool_progress", "all") + # resume_display: "full" (show history) | "minimal" (one-liner only) + self.resume_display = CLI_CONFIG["display"].get("resume_display", "full") + # bell_on_complete: play terminal bell (\a) when agent finishes a response + self.bell_on_complete = CLI_CONFIG["display"].get("bell_on_complete", False) self.verbose = verbose if verbose is not None else (self.tool_progress_mode == "verbose") # Configuration - priority: CLI args > env vars > config file # Model can come from: CLI arg, LLM_MODEL env, OPENAI_MODEL env (custom endpoint), or config self.model = model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or CLI_CONFIG["model"]["default"] + # Track whether model was explicitly chosen by the user or fell back + # to the global default. Provider-specific normalisation may override + # the default silently but should warn when overriding an explicit choice. + self._model_is_default = not (model or os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL")) self._explicit_api_key = api_key self._explicit_base_url = base_url @@ -1086,6 +1128,10 @@ class HermesCLI: self._provider_require_params = pr.get("require_parameters", False) self._provider_data_collection = pr.get("data_collection") + # Fallback model config — tried when primary provider fails after retries + fb = CLI_CONFIG.get("fallback_model") or {} + self._fallback_model = fb if fb.get("provider") and fb.get("model") else None + # Agent will be initialized on first use self.agent: Optional[AIAgent] = None self._app = None # prompt_toolkit Application (set in run()) @@ -1094,6 +1140,16 @@ class HermesCLI: self.conversation_history: List[Dict[str, Any]] = [] self.session_start = datetime.now() self._resumed = False + # Initialize SQLite session store early so /title works before first message + self._session_db = None + try: + from hermes_state import SessionDB + self._session_db = SessionDB() + except Exception: + pass + + # Deferred title: stored in memory until the session is created in the DB + self._pending_title: Optional[str] = None # Session ID: reuse existing one when resuming, otherwise generate fresh if resume: @@ -1116,6 +1172,60 @@ class HermesCLI: self._last_invalidate = now self._app.invalidate() + def _normalize_model_for_provider(self, resolved_provider: str) -> bool: + """Strip provider prefixes and swap the default model for Codex. + + When the resolved provider is ``openai-codex``: + + 1. Strip any ``provider/`` prefix (the Codex Responses API only + accepts bare model slugs like ``gpt-5.4``, not ``openai/gpt-5.4``). + 2. If the active model is still the *untouched default* (user never + explicitly chose a model), replace it with a Codex-compatible + default so the first session doesn't immediately error. + + If the user explicitly chose a model — *any* model — we trust them + and let the API be the judge. No allowlists, no slug checks. + + Returns True when the active model was changed. + """ + if resolved_provider != "openai-codex": + return False + + current_model = (self.model or "").strip() + changed = False + + # 1. Strip provider prefix ("openai/gpt-5.4" → "gpt-5.4") + if "/" in current_model: + slug = current_model.split("/", 1)[1] + if not self._model_is_default: + self.console.print( + f"[yellow]⚠️ Stripped provider prefix from '{current_model}'; " + f"using '{slug}' for OpenAI Codex.[/]" + ) + self.model = slug + current_model = slug + changed = True + + # 2. Replace untouched default with a Codex model + if self._model_is_default: + fallback_model = "gpt-5.3-codex" + try: + from hermes_cli.codex_models import get_codex_model_ids + + available = get_codex_model_ids( + access_token=self.api_key if self.api_key else None, + ) + if available: + fallback_model = available[0] + except Exception: + pass + + if current_model != fallback_model: + self.model = fallback_model + changed = True + + return changed + def _ensure_runtime_credentials(self) -> bool: """ Ensure runtime credentials are resolved before agent use. @@ -1161,8 +1271,13 @@ class HermesCLI: self.api_key = api_key self.base_url = base_url - # AIAgent/OpenAI client holds auth at init time, so rebuild if key rotated - if (credentials_changed or routing_changed) and self.agent is not None: + # Normalize model for the resolved provider (e.g. swap non-Codex + # models when provider is openai-codex). Fixes #651. + model_changed = self._normalize_model_for_provider(resolved_provider) + + # AIAgent/OpenAI client holds auth at init time, so rebuild if key, + # routing, or the effective model changed. + if (credentials_changed or routing_changed or model_changed) and self.agent is not None: self.agent = None return True @@ -1181,16 +1296,19 @@ class HermesCLI: if not self._ensure_runtime_credentials(): return False - # Initialize SQLite session store for CLI sessions - self._session_db = None - try: - from hermes_state import SessionDB - self._session_db = SessionDB() - except Exception as e: - logger.debug("SQLite session store not available: %s", e) + # Initialize SQLite session store for CLI sessions (if not already done in __init__) + if self._session_db is None: + try: + from hermes_state import SessionDB + self._session_db = SessionDB() + except Exception as e: + logger.debug("SQLite session store not available: %s", e) - # If resuming, validate the session exists and load its history - if self._resumed and self._session_db: + # If resuming, validate the session exists and load its history. + # _preload_resumed_session() may have already loaded it (called from + # run() for immediate display). In that case, conversation_history + # is non-empty and we skip the DB round-trip. + if self._resumed and self._session_db and not self.conversation_history: session_meta = self._session_db.get_session(self.session_id) if not session_meta: _cprint(f"\033[1;31mSession not found: {self.session_id}{_RST}") @@ -1200,8 +1318,11 @@ class HermesCLI: if restored: self.conversation_history = restored msg_count = len([m for m in restored if m.get("role") == "user"]) + title_part = "" + if session_meta.get("title"): + title_part = f" \"{session_meta['title']}\"" _cprint( - f"{_GOLD}↻ Resumed session {_BOLD}{self.session_id}{_RST}{_GOLD} " + f"{_GOLD}↻ Resumed session {_BOLD}{self.session_id}{_RST}{_GOLD}{title_part} " f"({msg_count} user message{'s' if msg_count != 1 else ''}, " f"{len(restored)} total messages){_RST}" ) @@ -1242,7 +1363,17 @@ class HermesCLI: session_db=self._session_db, clarify_callback=self._clarify_callback, honcho_session_key=self.session_id, + fallback_model=self._fallback_model, ) + # Apply any pending title now that the session exists in the DB + if self._pending_title and self._session_db: + try: + self._session_db.set_session_title(self.session_id, self._pending_title) + _cprint(f" Session title applied: {self._pending_title}") + self._pending_title = None + except (ValueError, Exception) as e: + _cprint(f" Could not apply pending title: {e}") + self._pending_title = None return True except Exception as e: self.console.print(f"[bold red]Failed to initialize agent: {e}[/]") @@ -1282,7 +1413,202 @@ class HermesCLI: self._show_tool_availability_warnings() self.console.print() - + + def _preload_resumed_session(self) -> bool: + """Load a resumed session's history from the DB early (before first chat). + + Called from run() so the conversation history is available for display + before the user sends their first message. Sets + ``self.conversation_history`` and prints the one-liner status. Returns + True if history was loaded, False otherwise. + + The corresponding block in ``_init_agent()`` checks whether history is + already populated and skips the DB round-trip. + """ + if not self._resumed or not self._session_db: + return False + + session_meta = self._session_db.get_session(self.session_id) + if not session_meta: + self.console.print( + f"[bold red]Session not found: {self.session_id}[/]" + ) + self.console.print( + "[dim]Use a session ID from a previous CLI run " + "(hermes sessions list).[/]" + ) + return False + + restored = self._session_db.get_messages_as_conversation(self.session_id) + if restored: + self.conversation_history = restored + msg_count = len([m for m in restored if m.get("role") == "user"]) + title_part = "" + if session_meta.get("title"): + title_part = f' "{session_meta["title"]}"' + self.console.print( + f"[#DAA520]↻ Resumed session [bold]{self.session_id}[/bold]" + f"{title_part} " + f"({msg_count} user message{'s' if msg_count != 1 else ''}, " + f"{len(restored)} total messages)[/]" + ) + else: + self.console.print( + f"[#DAA520]Session {self.session_id} found but has no " + f"messages. Starting fresh.[/]" + ) + return False + + # Re-open the session (clear ended_at so it's active again) + try: + self._session_db._conn.execute( + "UPDATE sessions SET ended_at = NULL, end_reason = NULL " + "WHERE id = ?", + (self.session_id,), + ) + self._session_db._conn.commit() + except Exception: + pass + + return True + + def _display_resumed_history(self): + """Render a compact recap of previous conversation messages. + + Uses Rich markup with dim/muted styling so the recap is visually + distinct from the active conversation. Caps the display at the + last ``MAX_DISPLAY_EXCHANGES`` user/assistant exchanges and shows + an indicator for earlier hidden messages. + """ + if not self.conversation_history: + return + + # Check config: resume_display setting + if self.resume_display == "minimal": + return + + MAX_DISPLAY_EXCHANGES = 10 # max user+assistant pairs to show + MAX_USER_LEN = 300 # truncate user messages + MAX_ASST_LEN = 200 # truncate assistant text + MAX_ASST_LINES = 3 # max lines of assistant text + + def _strip_reasoning(text: str) -> str: + """Remove ... blocks + from displayed text (reasoning model internal thoughts).""" + import re + cleaned = re.sub( + r".*?\s*", + "", text, flags=re.DOTALL, + ) + # Also strip unclosed reasoning tags at the end + cleaned = re.sub( + r".*$", + "", cleaned, flags=re.DOTALL, + ) + return cleaned.strip() + + # Collect displayable entries (skip system, tool-result messages) + entries = [] # list of (role, display_text) + for msg in self.conversation_history: + role = msg.get("role", "") + content = msg.get("content") + tool_calls = msg.get("tool_calls") or [] + + if role == "system": + continue + if role == "tool": + continue + + if role == "user": + text = "" if content is None else str(content) + # Handle multimodal content (list of dicts) + if isinstance(content, list): + parts = [] + for part in content: + if isinstance(part, dict) and part.get("type") == "text": + parts.append(part.get("text", "")) + elif isinstance(part, dict) and part.get("type") == "image_url": + parts.append("[image]") + text = " ".join(parts) + if len(text) > MAX_USER_LEN: + text = text[:MAX_USER_LEN] + "..." + entries.append(("user", text)) + + elif role == "assistant": + text = "" if content is None else str(content) + text = _strip_reasoning(text) + parts = [] + if text: + lines = text.splitlines() + if len(lines) > MAX_ASST_LINES: + text = "\n".join(lines[:MAX_ASST_LINES]) + " ..." + if len(text) > MAX_ASST_LEN: + text = text[:MAX_ASST_LEN] + "..." + parts.append(text) + if tool_calls: + tc_count = len(tool_calls) + # Extract tool names + names = [] + for tc in tool_calls: + fn = tc.get("function", {}) + name = fn.get("name", "unknown") if isinstance(fn, dict) else "unknown" + if name not in names: + names.append(name) + names_str = ", ".join(names[:4]) + if len(names) > 4: + names_str += ", ..." + noun = "call" if tc_count == 1 else "calls" + parts.append(f"[{tc_count} tool {noun}: {names_str}]") + if not parts: + # Skip pure-reasoning messages that have no visible output + continue + entries.append(("assistant", " ".join(parts))) + + if not entries: + return + + # Determine if we need to truncate + skipped = 0 + if len(entries) > MAX_DISPLAY_EXCHANGES * 2: + skipped = len(entries) - MAX_DISPLAY_EXCHANGES * 2 + entries = entries[skipped:] + + # Build the display using Rich + from rich.panel import Panel + from rich.text import Text + + lines = Text() + if skipped: + lines.append( + f" ... {skipped} earlier messages ...\n\n", + style="dim italic", + ) + + for i, (role, text) in enumerate(entries): + if role == "user": + lines.append(" ● You: ", style="dim bold #DAA520") + # Show first line inline, indent rest + msg_lines = text.splitlines() + lines.append(msg_lines[0] + "\n", style="dim") + for ml in msg_lines[1:]: + lines.append(f" {ml}\n", style="dim") + else: + lines.append(" ◆ Hermes: ", style="dim bold #8FBC8F") + msg_lines = text.splitlines() + lines.append(msg_lines[0] + "\n", style="dim") + for ml in msg_lines[1:]: + lines.append(f" {ml}\n", style="dim") + if i < len(entries) - 1: + lines.append("") # small gap + + panel = Panel( + lines, + title="[dim #DAA520]Previous Conversation[/]", + border_style="dim #8B8682", + padding=(0, 1), + ) + self.console.print(panel) + def _try_attach_clipboard_image(self) -> bool: """Check clipboard for an image and attach it if found. @@ -2091,6 +2417,55 @@ class HermesCLI: print(" ✨ (◕‿◕)✨ Fresh start! Screen cleared and conversation reset.\n") elif cmd_lower == "/history": self.show_history() + elif cmd_lower.startswith("/title"): + parts = cmd_original.split(maxsplit=1) + if len(parts) > 1: + raw_title = parts[1].strip() + if raw_title: + if self._session_db: + # Sanitize the title early so feedback matches what gets stored + try: + from hermes_state import SessionDB + new_title = SessionDB.sanitize_title(raw_title) + except ValueError as e: + _cprint(f" {e}") + new_title = None + if not new_title: + _cprint(" Title is empty after cleanup. Please use printable characters.") + elif self._session_db.get_session(self.session_id): + # Session exists in DB — set title directly + try: + if self._session_db.set_session_title(self.session_id, new_title): + _cprint(f" Session title set: {new_title}") + else: + _cprint(" Session not found in database.") + except ValueError as e: + _cprint(f" {e}") + else: + # Session not created yet — defer the title + # Check uniqueness proactively with the sanitized title + existing = self._session_db.get_session_by_title(new_title) + if existing: + _cprint(f" Title '{new_title}' is already in use by session {existing['id']}") + else: + self._pending_title = new_title + _cprint(f" Session title queued: {new_title} (will be saved on first message)") + else: + _cprint(" Session database not available.") + else: + _cprint(" Usage: /title ") + else: + # Show current title if no argument given + if self._session_db: + session = self._session_db.get_session(self.session_id) + if session and session.get("title"): + _cprint(f" Session title: {session['title']}") + elif self._pending_title: + _cprint(f" Session title (pending): {self._pending_title}") + else: + _cprint(f" No title set. Usage: /title ") + else: + _cprint(" Session database not available.") elif cmd_lower in ("/reset", "/new"): self.reset_conversation() elif cmd_lower.startswith("/model"): @@ -2760,6 +3135,12 @@ class HermesCLI: # nothing can interleave between the box borders. _cprint(f"\n{top}\n{response}\n\n{bot}") + # Play terminal bell when agent finishes (if enabled). + # Works over SSH — the bell propagates to the user's terminal. + if self.bell_on_complete: + sys.stdout.write("\a") + sys.stdout.flush() + # Combine all interrupt messages (user may have typed multiple while waiting) # and re-queue as one prompt for process_loop if pending_message and hasattr(self, '_pending_input'): @@ -2810,6 +3191,13 @@ class HermesCLI: def run(self): """Run the interactive CLI loop with persistent input at bottom.""" self.show_banner() + + # If resuming a session, load history and display it immediately + # so the user has context before typing their first message. + if self._resumed: + if self._preload_resumed_session(): + self._display_resumed_history() + self.console.print("[#FFF8DC]Welcome to Hermes Agent! Type your message or /help for commands.[/]") self.console.print() @@ -3673,6 +4061,10 @@ def main( _active_worktree = wt_info os.environ["TERMINAL_CWD"] = wt_info["path"] atexit.register(_cleanup_worktree, wt_info) + else: + # Worktree was explicitly requested but setup failed — + # don't silently run without isolation. + return else: wt_info = None diff --git a/cron/scheduler.py b/cron/scheduler.py index 4dfc91e097..1f96d6443b 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -98,6 +98,7 @@ def _deliver_result(job: dict, content: str) -> None: "discord": Platform.DISCORD, "slack": Platform.SLACK, "whatsapp": Platform.WHATSAPP, + "signal": Platform.SIGNAL, } platform = platform_map.get(platform_name.lower()) if not platform: diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index a134266b09..0000000000 --- a/docs/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# Documentation - -All documentation has moved to the website: - -**📖 [hermes-agent.nousresearch.com/docs](https://hermes-agent.nousresearch.com/docs/)** - -The documentation source files live in [`website/docs/`](../website/docs/). diff --git a/docs/send_file_integration_map.md b/docs/send_file_integration_map.md deleted file mode 100644 index e0b1ca769c..0000000000 --- a/docs/send_file_integration_map.md +++ /dev/null @@ -1,345 +0,0 @@ -# send_file Integration Map — Hermes Agent Codebase Deep Dive - -## 1. environments/tool_context.py — Base64 File Transfer Implementation - -### upload_file() (lines 153-205) -- Reads local file as raw bytes, base64-encodes to ASCII string -- Creates parent dirs in sandbox via `self.terminal(f"mkdir -p {parent}")` -- **Chunk size:** 60,000 chars (~60KB per shell command) -- **Small files (<=60KB b64):** Single `printf '%s' '{b64}' | base64 -d > {remote_path}` -- **Large files:** Writes chunks to `/tmp/_hermes_upload.b64` via `printf >> append`, then `base64 -d` to target -- **Error handling:** Checks local file exists; returns `{exit_code, output}` -- **Size limits:** No explicit limit, but shell arg limit ~2MB means chunking is necessary for files >~45KB raw -- **No theoretical max** — but very large files would be slow (many terminal round trips) - -### download_file() (lines 234-278) -- Runs `base64 {remote_path}` inside sandbox, captures stdout -- Strips output, base64-decodes to raw bytes -- Writes to host filesystem with parent dir creation -- **Error handling:** Checks exit code, empty output, decode errors -- Returns `{success: bool, bytes: int}` or `{success: false, error: str}` -- **Size limit:** Bounded by terminal output buffer (practical limit ~few MB via base64 terminal output) - -### Promotion potential: -- These methods work via `self.terminal()` — they're environment-agnostic -- Could be directly lifted into a new tool that operates on the agent's current sandbox -- For send_file, this `download_file()` pattern is the key: it extracts files from sandbox → host - -## 2. tools/environments/base.py — BaseEnvironment Interface - -### Current methods: -- `execute(command, cwd, timeout, stdin_data)` → `{output, returncode}` -- `cleanup()` — release resources -- `stop()` — alias for cleanup -- `_prepare_command()` — sudo transformation -- `_build_run_kwargs()` — subprocess kwargs -- `_timeout_result()` — standard timeout dict - -### What would need to be added for file transfer: -- **Nothing required at this level.** File transfer can be implemented via `execute()` (base64 over terminal, like ToolContext does) or via environment-specific methods. -- Optional: `upload_file(local_path, remote_path)` and `download_file(remote_path, local_path)` methods could be added to BaseEnvironment for optimized per-backend transfers, but the base64-over-terminal approach already works universally. - -## 3. tools/environments/docker.py — Docker Container Details - -### Container ID tracking: -- `self._container_id` stored at init from `self._inner.container_id` -- Inner is `minisweagent.environments.docker.DockerEnvironment` -- Container ID is a standard Docker container hash - -### docker cp feasibility: -- **YES**, `docker cp` could be used for optimized file transfer: - - `docker cp {container_id}:{remote_path} {local_path}` (download) - - `docker cp {local_path} {container_id}:{remote_path}` (upload) -- Much faster than base64-over-terminal for large files -- Container ID is directly accessible via `env._container_id` or `env._inner.container_id` - -### Volumes mounted: -- **Persistent mode:** Bind mounts at `~/.hermes/sandboxes/docker/{task_id}/workspace` → `/workspace` and `.../home` → `/root` -- **Ephemeral mode:** tmpfs at `/workspace` (10GB), `/home` (1GB), `/root` (1GB) -- **User volumes:** From `config.yaml docker_volumes` (arbitrary `-v` mounts) -- **Security tmpfs:** `/tmp` (512MB), `/var/tmp` (256MB), `/run` (64MB) - -### Direct host access for persistent mode: -- If persistent, files at `/workspace/foo.txt` are just `~/.hermes/sandboxes/docker/{task_id}/workspace/foo.txt` on host — no transfer needed! - -## 4. tools/environments/ssh.py — SSH Connection Management - -### Connection management: -- Uses SSH ControlMaster for persistent connection -- Control socket at `/tmp/hermes-ssh/{user}@{host}:{port}.sock` -- ControlPersist=300 (5 min keepalive) -- BatchMode=yes (non-interactive) -- Stores: `self.host`, `self.user`, `self.port`, `self.key_path` - -### SCP/SFTP feasibility: -- **YES**, SCP can piggyback on the ControlMaster socket: - - `scp -o ControlPath={socket} {user}@{host}:{remote} {local}` (download) - - `scp -o ControlPath={socket} {local} {user}@{host}:{remote}` (upload) -- Same SSH key and connection reuse — zero additional auth -- Would be much faster than base64-over-terminal for large files - -## 5. tools/environments/modal.py — Modal Sandbox Filesystem - -### Filesystem API exposure: -- **Not directly.** The inner `SwerexModalEnvironment` wraps Modal's sandbox -- The sandbox object is accessible at: `env._inner.deployment._sandbox` -- Modal's Python SDK exposes `sandbox.open()` for file I/O — but only via async API -- Currently only used for `snapshot_filesystem()` during cleanup -- **Could use:** `sandbox.open(path, "rb")` to read files or `sandbox.open(path, "wb")` to write -- **Alternative:** Base64-over-terminal already works via `execute()` — simpler, no SDK dependency - -## 6. gateway/platforms/base.py — MEDIA: Tag Flow (Complete) - -### extract_media() (lines 587-620): -- **Pattern:** `MEDIA:\S+` — extracts file paths after MEDIA: prefix -- **Voice flag:** `[[audio_as_voice]]` global directive sets `is_voice=True` for all media in message -- Returns `List[Tuple[str, bool]]` (path, is_voice) and cleaned content - -### _process_message_background() media routing (lines 752-786): -- After extracting MEDIA tags, routes by file extension: - - `.ogg .opus .mp3 .wav .m4a` → `send_voice()` - - `.mp4 .mov .avi .mkv .3gp` → `send_video()` - - `.jpg .jpeg .png .webp .gif` → `send_image_file()` - - **Everything else** → `send_document()` -- This routing already supports arbitrary files! - -### send_* method inventory (base class): -- `send(chat_id, content, reply_to, metadata)` — ABSTRACT, text -- `send_image(chat_id, image_url, caption, reply_to)` — URL-based images -- `send_animation(chat_id, animation_url, caption, reply_to)` — GIF animations -- `send_voice(chat_id, audio_path, caption, reply_to)` — voice messages -- `send_video(chat_id, video_path, caption, reply_to)` — video files -- `send_document(chat_id, file_path, caption, file_name, reply_to)` — generic files -- `send_image_file(chat_id, image_path, caption, reply_to)` — local image files -- `send_typing(chat_id)` — typing indicator -- `edit_message(chat_id, message_id, content)` — edit sent messages - -### What's missing: -- **Telegram:** No override for `send_document` — falls back to text! (`send_image_file` ✅ added) -- **Discord:** No override for `send_document` — falls back to text! (`send_image_file` ✅ added) -- **Slack:** No override for `send_document` — falls back to text! (`send_image_file` ✅ added) -- **WhatsApp:** Has `send_document` and `send_image_file` via bridge — COMPLETE. -- The base class defaults just send "📎 File: /path" as text — useless for actual file delivery. - -## 7. gateway/platforms/telegram.py — Send Method Analysis - -### Implemented send methods: -- `send()` — MarkdownV2 text with fallback to plain -- `send_voice()` — `.ogg`/`.opus` as `send_voice()`, others as `send_audio()` -- `send_image()` — URL-based via `send_photo()` -- `send_image_file()` — local file via `send_photo(photo=open(path, 'rb'))` ✅ -- `send_animation()` — GIF via `send_animation()` -- `send_typing()` — "typing" chat action -- `edit_message()` — edit text messages - -### MISSING: -- **`send_document()` NOT overridden** — Need to add `self._bot.send_document(chat_id, document=open(file_path, 'rb'), ...)` -- **`send_video()` NOT overridden** — Need to add `self._bot.send_video(...)` - -## 8. gateway/platforms/discord.py — Send Method Analysis - -### Implemented send methods: -- `send()` — text messages with chunking -- `send_voice()` — discord.File attachment -- `send_image()` — downloads URL, creates discord.File attachment -- `send_image_file()` — local file via discord.File attachment ✅ -- `send_typing()` — channel.typing() -- `edit_message()` — edit text messages - -### MISSING: -- **`send_document()` NOT overridden** — Need to add discord.File attachment -- **`send_video()` NOT overridden** — Need to add discord.File attachment - -## 9. gateway/run.py — User File Attachment Handling - -### Current attachment flow: -1. **Telegram photos** (line 509-529): Download via `photo.get_file()` → `cache_image_from_bytes()` → vision auto-analysis -2. **Telegram voice** (line 532-541): Download → `cache_audio_from_bytes()` → STT transcription -3. **Telegram audio** (line 542-551): Same pattern -4. **Telegram documents** (line 553-617): Extension validation against `SUPPORTED_DOCUMENT_TYPES`, 20MB limit, content injection for text files -5. **Discord attachments** (line 717-751): Content-type detection, image/audio caching, URL fallback for other types -6. **Gateway run.py** (lines 818-883): Auto-analyzes images with vision, transcribes audio, enriches document messages with context notes - -### Key insight: Files are always cached to host filesystem first, then processed. The agent sees local file paths. - -## 10. tools/terminal_tool.py — Terminal Tool & Environment Interaction - -### How it manages environments: -- Global dict `_active_environments: Dict[str, Any]` keyed by task_id -- Per-task creation locks prevent duplicate sandbox creation -- Auto-cleanup thread kills idle environments after `TERMINAL_LIFETIME_SECONDS` -- `_get_env_config()` reads all TERMINAL_* env vars for backend selection -- `_create_environment()` factory creates the right backend type - -### Could send_file piggyback? -- **YES.** send_file needs access to the same environment to extract files from sandboxes. -- It can reuse `_active_environments[task_id]` to get the environment, then: - - Docker: Use `docker cp` via `env._container_id` - - SSH: Use `scp` via `env.control_socket` - - Local: Just read the file directly - - Modal: Use base64-over-terminal via `env.execute()` -- The file_tools.py module already does this with `ShellFileOperations` — read_file/write_file/search/patch all share the same env instance. - -## 11. tools/tts_tool.py — Working Example of File Delivery - -### Flow: -1. Generate audio file to `~/.hermes/audio_cache/tts_TIMESTAMP.{ogg,mp3}` -2. Return JSON with `media_tag: "MEDIA:/path/to/file"` -3. For Telegram voice: prepend `[[audio_as_voice]]` directive -4. The LLM includes the MEDIA tag in its response text -5. `BasePlatformAdapter._process_message_background()` calls `extract_media()` to find the tag -6. Routes by extension → `send_voice()` for audio files -7. Platform adapter sends the file natively - -### Key pattern: Tool saves file to host → returns MEDIA: path → LLM echoes it → gateway extracts → platform delivers - -## 12. tools/image_generation_tool.py — Working Example of Image Delivery - -### Flow: -1. Call FAL.ai API → get image URL -2. Return JSON with `image: "https://fal.media/..."` URL -3. The LLM includes the URL in markdown: `![description](URL)` -4. `BasePlatformAdapter.extract_images()` finds `![alt](url)` patterns -5. Routes through `send_image()` (URL) or `send_animation()` (GIF) -6. Platform downloads and sends natively - -### Key difference from TTS: Images are URL-based, not local files. The gateway downloads at send time. - ---- - -# INTEGRATION MAP: Where send_file Hooks In - -## Architecture Decision: MEDIA: Tag Protocol vs. New Tool - -The MEDIA: tag protocol is already the established pattern for file delivery. Two options: - -### Option A: Pure MEDIA: Tag (Minimal Change) -- No new tool needed -- Agent downloads file from sandbox to host using terminal (base64) -- Saves to known location (e.g., `~/.hermes/file_cache/`) -- Includes `MEDIA:/path` in response text -- Existing routing in `_process_message_background()` handles delivery -- **Problem:** Agent has to manually do base64 dance + know about MEDIA: convention - -### Option B: Dedicated send_file Tool (Recommended) -- New tool that the agent calls with `(file_path, caption?)` -- Tool handles the sandbox → host extraction automatically -- Returns MEDIA: tag that gets routed through existing pipeline -- Much cleaner agent experience - -## Implementation Plan for Option B - -### Files to CREATE: - -1. **`tools/send_file_tool.py`** — The new tool - - Accepts: `file_path` (path in sandbox), `caption` (optional) - - Detects environment backend from `_active_environments` - - Extracts file from sandbox: - - **local:** `shutil.copy()` or direct path - - **docker:** `docker cp {container_id}:{path} {local_cache}/` - - **ssh:** `scp -o ControlPath=... {user}@{host}:{path} {local_cache}/` - - **modal:** base64-over-terminal via `env.execute("base64 {path}")` - - Saves to `~/.hermes/file_cache/{uuid}_{filename}` - - Returns: `MEDIA:/cached/path` in response for gateway to pick up - - Register with `registry.register(name="send_file", toolset="file", ...)` - -### Files to MODIFY: - -2. **`gateway/platforms/telegram.py`** — Add missing send methods: - ```python - async def send_document(self, chat_id, file_path, caption=None, file_name=None, reply_to=None): - with open(file_path, "rb") as f: - msg = await self._bot.send_document( - chat_id=int(chat_id), document=f, - caption=caption, filename=file_name or os.path.basename(file_path)) - return SendResult(success=True, message_id=str(msg.message_id)) - - async def send_image_file(self, chat_id, image_path, caption=None, reply_to=None): - with open(image_path, "rb") as f: - msg = await self._bot.send_photo(chat_id=int(chat_id), photo=f, caption=caption) - return SendResult(success=True, message_id=str(msg.message_id)) - - async def send_video(self, chat_id, video_path, caption=None, reply_to=None): - with open(video_path, "rb") as f: - msg = await self._bot.send_video(chat_id=int(chat_id), video=f, caption=caption) - return SendResult(success=True, message_id=str(msg.message_id)) - ``` - -3. **`gateway/platforms/discord.py`** — Add missing send methods: - ```python - async def send_document(self, chat_id, file_path, caption=None, file_name=None, reply_to=None): - channel = self._client.get_channel(int(chat_id)) or await self._client.fetch_channel(int(chat_id)) - with open(file_path, "rb") as f: - file = discord.File(io.BytesIO(f.read()), filename=file_name or os.path.basename(file_path)) - msg = await channel.send(content=caption, file=file) - return SendResult(success=True, message_id=str(msg.id)) - - async def send_image_file(self, chat_id, image_path, caption=None, reply_to=None): - # Same pattern as send_document with image filename - - async def send_video(self, chat_id, video_path, caption=None, reply_to=None): - # Same pattern, discord renders video attachments inline - ``` - -4. **`toolsets.py`** — Add `"send_file"` to `_HERMES_CORE_TOOLS` list - -5. **`agent/prompt_builder.py`** — Update platform hints to mention send_file tool - -### Code that can be REUSED (zero rewrite): - -- `BasePlatformAdapter.extract_media()` — Already extracts MEDIA: tags -- `BasePlatformAdapter._process_message_background()` — Already routes by extension -- `ToolContext.download_file()` — Base64-over-terminal extraction pattern -- `tools/terminal_tool.py` _active_environments dict — Environment access -- `tools/registry.py` — Tool registration infrastructure -- `gateway/platforms/base.py` send_document/send_image_file/send_video signatures — Already defined - -### Code that needs to be WRITTEN from scratch: - -1. `tools/send_file_tool.py` (~150 lines): - - File extraction from each environment backend type - - Local file cache management - - Registry registration - -2. Telegram `send_document` + `send_image_file` + `send_video` overrides (~40 lines) -3. Discord `send_document` + `send_image_file` + `send_video` overrides (~50 lines) - -### Total effort: ~240 lines of new code, ~5 lines of config changes - -## Key Environment-Specific Extract Strategies - -| Backend | Extract Method | Speed | Complexity | -|------------|-------------------------------|----------|------------| -| local | shutil.copy / direct path | Instant | None | -| docker | `docker cp container:path .` | Fast | Low | -| docker+vol | Direct host path access | Instant | None | -| ssh | `scp -o ControlPath=...` | Fast | Low | -| modal | base64-over-terminal | Moderate | Medium | -| singularity| Direct path (overlay mount) | Fast | Low | - -## Data Flow Summary - -``` -Agent calls send_file(file_path="/workspace/output.pdf", caption="Here's the report") - │ - ▼ -send_file_tool.py: - 1. Get environment from _active_environments[task_id] - 2. Detect backend type (docker/ssh/modal/local) - 3. Extract file to ~/.hermes/file_cache/{uuid}_{filename} - 4. Return: '{"success": true, "media_tag": "MEDIA:/home/user/.hermes/file_cache/abc123_output.pdf"}' - │ - ▼ -LLM includes MEDIA: tag in its response text - │ - ▼ -BasePlatformAdapter._process_message_background(): - 1. extract_media(response) → finds MEDIA:/path - 2. Checks extension: .pdf → send_document() - 3. Calls platform-specific send_document(chat_id, file_path, caption) - │ - ▼ -TelegramAdapter.send_document() / DiscordAdapter.send_document(): - Opens file, sends via platform API as native document attachment - User receives downloadable file in chat -``` diff --git a/gateway/channel_directory.py b/gateway/channel_directory.py index 622fed6bd9..31406a7dec 100644 --- a/gateway/channel_directory.py +++ b/gateway/channel_directory.py @@ -40,8 +40,8 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]: except Exception as e: logger.warning("Channel directory: failed to build %s: %s", platform.value, e) - # Telegram & WhatsApp can't enumerate chats -- pull from session history - for plat_name in ("telegram", "whatsapp"): + # Telegram, WhatsApp & Signal can't enumerate chats -- pull from session history + for plat_name in ("telegram", "whatsapp", "signal"): if plat_name not in platforms: platforms[plat_name] = _build_from_sessions(plat_name) @@ -52,7 +52,7 @@ def build_channel_directory(adapters: Dict[Any, Any]) -> Dict[str, Any]: try: DIRECTORY_PATH.parent.mkdir(parents=True, exist_ok=True) - with open(DIRECTORY_PATH, "w") as f: + with open(DIRECTORY_PATH, "w", encoding="utf-8") as f: json.dump(directory, f, indent=2, ensure_ascii=False) except Exception as e: logger.warning("Channel directory: failed to write: %s", e) @@ -115,7 +115,7 @@ def _build_from_sessions(platform_name: str) -> List[Dict[str, str]]: entries = [] try: - with open(sessions_path) as f: + with open(sessions_path, encoding="utf-8") as f: data = json.load(f) seen_ids = set() @@ -147,7 +147,7 @@ def load_directory() -> Dict[str, Any]: if not DIRECTORY_PATH.exists(): return {"updated_at": None, "platforms": {}} try: - with open(DIRECTORY_PATH) as f: + with open(DIRECTORY_PATH, encoding="utf-8") as f: return json.load(f) except Exception: return {"updated_at": None, "platforms": {}} diff --git a/gateway/config.py b/gateway/config.py index f441e2dd6b..9a517f81b2 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -26,6 +26,7 @@ class Platform(Enum): DISCORD = "discord" WHATSAPP = "whatsapp" SLACK = "slack" + SIGNAL = "signal" HOMEASSISTANT = "homeassistant" @@ -155,7 +156,16 @@ class GatewayConfig: """Return list of platforms that are enabled and configured.""" connected = [] for platform, config in self.platforms.items(): - if config.enabled and (config.token or config.api_key): + if not config.enabled: + continue + # Platforms that use token/api_key auth + if config.token or config.api_key: + connected.append(platform) + # WhatsApp uses enabled flag only (bridge handles auth) + elif platform == Platform.WHATSAPP: + connected.append(platform) + # Signal uses extra dict for config (http_url + account) + elif platform == Platform.SIGNAL and config.extra.get("http_url"): connected.append(platform) return connected @@ -379,6 +389,26 @@ def _apply_env_overrides(config: GatewayConfig) -> None: name=os.getenv("SLACK_HOME_CHANNEL_NAME", ""), ) + # Signal + signal_url = os.getenv("SIGNAL_HTTP_URL") + signal_account = os.getenv("SIGNAL_ACCOUNT") + if signal_url and signal_account: + if Platform.SIGNAL not in config.platforms: + config.platforms[Platform.SIGNAL] = PlatformConfig() + config.platforms[Platform.SIGNAL].enabled = True + config.platforms[Platform.SIGNAL].extra.update({ + "http_url": signal_url, + "account": signal_account, + "ignore_stories": os.getenv("SIGNAL_IGNORE_STORIES", "true").lower() in ("true", "1", "yes"), + }) + signal_home = os.getenv("SIGNAL_HOME_CHANNEL") + if signal_home: + config.platforms[Platform.SIGNAL].home_channel = HomeChannel( + platform=Platform.SIGNAL, + chat_id=signal_home, + name=os.getenv("SIGNAL_HOME_CHANNEL_NAME", "Home"), + ) + # Home Assistant hass_token = os.getenv("HASS_TOKEN") if hass_token: diff --git a/gateway/mirror.py b/gateway/mirror.py index 8c2f399838..527fc2c13c 100644 --- a/gateway/mirror.py +++ b/gateway/mirror.py @@ -73,7 +73,7 @@ def _find_session_id(platform: str, chat_id: str) -> Optional[str]: return None try: - with open(_SESSIONS_INDEX) as f: + with open(_SESSIONS_INDEX, encoding="utf-8") as f: data = json.load(f) except Exception: return None @@ -103,7 +103,7 @@ def _append_to_jsonl(session_id: str, message: dict) -> None: """Append a message to the JSONL transcript file.""" transcript_path = _SESSIONS_DIR / f"{session_id}.jsonl" try: - with open(transcript_path, "a") as f: + with open(transcript_path, "a", encoding="utf-8") as f: f.write(json.dumps(message, ensure_ascii=False) + "\n") except Exception as e: logger.debug("Mirror JSONL write failed: %s", e) diff --git a/gateway/platforms/ADDING_A_PLATFORM.md b/gateway/platforms/ADDING_A_PLATFORM.md new file mode 100644 index 0000000000..dadd9890d9 --- /dev/null +++ b/gateway/platforms/ADDING_A_PLATFORM.md @@ -0,0 +1,313 @@ +# Adding a New Messaging Platform + +Checklist for integrating a new messaging platform into the Hermes gateway. +Use this as a reference when building a new adapter — every item here is a +real integration point that exists in the codebase. Missing any of them will +cause broken functionality, missing features, or inconsistent behavior. + +--- + +## 1. Core Adapter (`gateway/platforms/.py`) + +The adapter is a subclass of `BasePlatformAdapter` from `gateway/platforms/base.py`. + +### Required methods + +| Method | Purpose | +|--------|---------| +| `__init__(self, config)` | Parse config, init state. Call `super().__init__(config, Platform.YOUR_PLATFORM)` | +| `connect() -> bool` | Connect to the platform, start listeners. Return True on success | +| `disconnect()` | Stop listeners, close connections, cancel tasks | +| `send(chat_id, text, ...) -> SendResult` | Send a text message | +| `send_typing(chat_id)` | Send typing indicator | +| `send_image(chat_id, image_url, caption) -> SendResult` | Send an image | +| `get_chat_info(chat_id) -> dict` | Return `{name, type, chat_id}` for a chat | + +### Optional methods (have default stubs in base) + +| Method | Purpose | +|--------|---------| +| `send_document(chat_id, path, caption)` | Send a file attachment | +| `send_voice(chat_id, path)` | Send a voice message | +| `send_video(chat_id, path, caption)` | Send a video | +| `send_animation(chat_id, path, caption)` | Send a GIF/animation | +| `send_image_file(chat_id, path, caption)` | Send image from local file | + +### Required function + +```python +def check__requirements() -> bool: + """Check if this platform's dependencies are available.""" +``` + +### Key patterns to follow + +- Use `self.build_source(...)` to construct `SessionSource` objects +- Call `self.handle_message(event)` to dispatch inbound messages to the gateway +- Use `MessageEvent`, `MessageType`, `SendResult` from base +- Use `cache_image_from_bytes`, `cache_audio_from_bytes`, `cache_document_from_bytes` for attachments +- Filter self-messages (prevent reply loops) +- Filter sync/echo messages if the platform has them +- Redact sensitive identifiers (phone numbers, tokens) in all log output +- Implement reconnection with exponential backoff + jitter for streaming connections +- Set `MAX_MESSAGE_LENGTH` if the platform has message size limits + +--- + +## 2. Platform Enum (`gateway/config.py`) + +Add the platform to the `Platform` enum: + +```python +class Platform(Enum): + ... + YOUR_PLATFORM = "your_platform" +``` + +Add env var loading in `_apply_env_overrides()`: + +```python +# Your Platform +your_token = os.getenv("YOUR_PLATFORM_TOKEN") +if your_token: + if Platform.YOUR_PLATFORM not in config.platforms: + config.platforms[Platform.YOUR_PLATFORM] = PlatformConfig() + config.platforms[Platform.YOUR_PLATFORM].enabled = True + config.platforms[Platform.YOUR_PLATFORM].token = your_token +``` + +Update `get_connected_platforms()` if your platform doesn't use token/api_key +(e.g., WhatsApp uses `enabled` flag, Signal uses `extra` dict). + +--- + +## 3. Adapter Factory (`gateway/run.py`) + +Add to `_create_adapter()`: + +```python +elif platform == Platform.YOUR_PLATFORM: + from gateway.platforms.your_platform import YourAdapter, check_your_requirements + if not check_your_requirements(): + logger.warning("Your Platform: dependencies not met") + return None + return YourAdapter(config) +``` + +--- + +## 4. Authorization Maps (`gateway/run.py`) + +Add to BOTH dicts in `_is_user_authorized()`: + +```python +platform_env_map = { + ... + Platform.YOUR_PLATFORM: "YOUR_PLATFORM_ALLOWED_USERS", +} +platform_allow_all_map = { + ... + Platform.YOUR_PLATFORM: "YOUR_PLATFORM_ALLOW_ALL_USERS", +} +``` + +--- + +## 5. Session Source (`gateway/session.py`) + +If your platform needs extra identity fields (e.g., Signal's UUID alongside +phone number), add them to the `SessionSource` dataclass with `Optional` defaults, +and update `to_dict()`, `from_dict()`, and `build_source()` in base.py. + +--- + +## 6. System Prompt Hints (`agent/prompt_builder.py`) + +Add a `PLATFORM_HINTS` entry so the agent knows what platform it's on: + +```python +PLATFORM_HINTS = { + ... + "your_platform": ( + "You are on Your Platform. " + "Describe formatting capabilities, media support, etc." + ), +} +``` + +Without this, the agent won't know it's on your platform and may use +inappropriate formatting (e.g., markdown on platforms that don't render it). + +--- + +## 7. Toolset (`toolsets.py`) + +Add a named toolset for your platform: + +```python +"hermes-your-platform": { + "description": "Your Platform bot toolset", + "tools": _HERMES_CORE_TOOLS, + "includes": [] +}, +``` + +And add it to the `hermes-gateway` composite: + +```python +"hermes-gateway": { + "includes": [..., "hermes-your-platform"] +} +``` + +--- + +## 8. Cron Delivery (`cron/scheduler.py`) + +Add to `platform_map` in `_deliver_result()`: + +```python +platform_map = { + ... + "your_platform": Platform.YOUR_PLATFORM, +} +``` + +Without this, `schedule_cronjob(deliver="your_platform")` silently fails. + +--- + +## 9. Send Message Tool (`tools/send_message_tool.py`) + +Add to `platform_map` in `send_message_tool()`: + +```python +platform_map = { + ... + "your_platform": Platform.YOUR_PLATFORM, +} +``` + +Add routing in `_send_to_platform()`: + +```python +elif platform == Platform.YOUR_PLATFORM: + return await _send_your_platform(pconfig, chat_id, message) +``` + +Implement `_send_your_platform()` — a standalone async function that sends +a single message without requiring the full adapter (for use by cron jobs +and the send_message tool outside the gateway process). + +Update the tool schema `target` description to include your platform example. + +--- + +## 10. Cronjob Tool Schema (`tools/cronjob_tools.py`) + +Update the `deliver` parameter description and docstring to mention your +platform as a delivery option. + +--- + +## 11. Channel Directory (`gateway/channel_directory.py`) + +If your platform can't enumerate chats (most can't), add it to the +session-based discovery list: + +```python +for plat_name in ("telegram", "whatsapp", "signal", "your_platform"): +``` + +--- + +## 12. Status Display (`hermes_cli/status.py`) + +Add to the `platforms` dict in the Messaging Platforms section: + +```python +platforms = { + ... + "Your Platform": ("YOUR_PLATFORM_TOKEN", "YOUR_PLATFORM_HOME_CHANNEL"), +} +``` + +--- + +## 13. Gateway Setup Wizard (`hermes_cli/gateway.py`) + +Add to the `_PLATFORMS` list: + +```python +{ + "key": "your_platform", + "label": "Your Platform", + "emoji": "📱", + "token_var": "YOUR_PLATFORM_TOKEN", + "setup_instructions": [...], + "vars": [...], +} +``` + +If your platform needs custom setup logic (connectivity testing, QR codes, +policy choices), add a `_setup_your_platform()` function and route to it +in the platform selection switch. + +Update `_platform_status()` if your platform's "configured" check differs +from the standard `bool(get_env_value(token_var))`. + +--- + +## 14. Phone/ID Redaction (`agent/redact.py`) + +If your platform uses sensitive identifiers (phone numbers, etc.), add a +regex pattern and redaction function to `agent/redact.py`. This ensures +identifiers are masked in ALL log output, not just your adapter's logs. + +--- + +## 15. Documentation + +| File | What to update | +|------|---------------| +| `README.md` | Platform list in feature table + documentation table | +| `AGENTS.md` | Gateway description + env var config section | +| `website/docs/user-guide/messaging/.md` | **NEW** — Full setup guide (see existing platform docs for template) | +| `website/docs/user-guide/messaging/index.md` | Architecture diagram, toolset table, security examples, Next Steps links | +| `website/docs/reference/environment-variables.md` | All env vars for the platform | + +--- + +## 16. Tests (`tests/gateway/test_.py`) + +Recommended test coverage: + +- Platform enum exists with correct value +- Config loading from env vars via `_apply_env_overrides` +- Adapter init (config parsing, allowlist handling, default values) +- Helper functions (redaction, parsing, file type detection) +- Session source round-trip (to_dict → from_dict) +- Authorization integration (platform in allowlist maps) +- Send message tool routing (platform in platform_map) + +Optional but valuable: +- Async tests for message handling flow (mock the platform API) +- SSE/WebSocket reconnection logic +- Attachment processing +- Group message filtering + +--- + +## Quick Verification + +After implementing everything, verify with: + +```bash +# All tests pass +python -m pytest tests/ -q + +# Grep for your platform name to find any missed integration points +grep -r "telegram\|discord\|whatsapp\|slack" gateway/ tools/ agent/ cron/ hermes_cli/ toolsets.py \ + --include="*.py" -l | sort -u +# Check each file in the output — if it mentions other platforms but not yours, you missed it +``` diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index d787cc939c..dc518843e4 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -838,6 +838,8 @@ class BasePlatformAdapter(ABC): user_name: Optional[str] = None, thread_id: Optional[str] = None, chat_topic: Optional[str] = None, + user_id_alt: Optional[str] = None, + chat_id_alt: Optional[str] = None, ) -> SessionSource: """Helper to build a SessionSource for this platform.""" # Normalize empty topic to None @@ -852,6 +854,8 @@ class BasePlatformAdapter(ABC): user_name=user_name, thread_id=str(thread_id) if thread_id else None, chat_topic=chat_topic.strip() if chat_topic else None, + user_id_alt=user_id_alt, + chat_id_alt=chat_id_alt, ) @abstractmethod diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 7e137047c8..905e20d6f4 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -592,6 +592,89 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: logger.debug("Discord followup failed: %s", e) + @tree.command(name="compress", description="Compress conversation context") + async def slash_compress(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/compress") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="title", description="Set or show the session title") + @discord.app_commands.describe(name="Session title. Leave empty to show current.") + async def slash_title(interaction: discord.Interaction, name: str = ""): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, f"/title {name}".strip()) + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="resume", description="Resume a previously-named session") + @discord.app_commands.describe(name="Session name to resume. Leave empty to list sessions.") + async def slash_resume(interaction: discord.Interaction, name: str = ""): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, f"/resume {name}".strip()) + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="usage", description="Show token usage for this session") + async def slash_usage(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/usage") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="provider", description="Show available providers") + async def slash_provider(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/provider") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="help", description="Show available commands") + async def slash_help(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/help") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="insights", description="Show usage insights and analytics") + @discord.app_commands.describe(days="Number of days to analyze (default: 7)") + async def slash_insights(interaction: discord.Interaction, days: int = 7): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, f"/insights {days}") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + + @tree.command(name="reload-mcp", description="Reload MCP servers from config") + async def slash_reload_mcp(interaction: discord.Interaction): + await interaction.response.defer(ephemeral=True) + event = self._build_slash_event(interaction, "/reload-mcp") + await self.handle_message(event) + try: + await interaction.followup.send("Done~", ephemeral=True) + except Exception as e: + logger.debug("Discord followup failed: %s", e) + @tree.command(name="update", description="Update Hermes Agent to the latest version") async def slash_update(interaction: discord.Interaction): await interaction.response.defer(ephemeral=True) diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py new file mode 100644 index 0000000000..62e7e4b63b --- /dev/null +++ b/gateway/platforms/signal.py @@ -0,0 +1,716 @@ +"""Signal messenger platform adapter. + +Connects to a signal-cli daemon running in HTTP mode. +Inbound messages arrive via SSE (Server-Sent Events) streaming. +Outbound messages and actions use JSON-RPC 2.0 over HTTP. + +Based on PR #268 by ibhagwan, rebuilt with bug fixes. + +Requires: + - signal-cli installed and running: signal-cli daemon --http 127.0.0.1:8080 + - SIGNAL_HTTP_URL and SIGNAL_ACCOUNT environment variables set +""" + +import asyncio +import base64 +import json +import logging +import os +import random +import re +import time +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional, Any +from urllib.parse import unquote + +import httpx + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import ( + BasePlatformAdapter, + MessageEvent, + MessageType, + SendResult, + cache_image_from_bytes, + cache_audio_from_bytes, + cache_document_from_bytes, + cache_image_from_url, +) + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- +SIGNAL_MAX_ATTACHMENT_SIZE = 100 * 1024 * 1024 # 100 MB +MAX_MESSAGE_LENGTH = 8000 # Signal message size limit +TYPING_INTERVAL = 8.0 # seconds between typing indicator refreshes +SSE_RETRY_DELAY_INITIAL = 2.0 +SSE_RETRY_DELAY_MAX = 60.0 +HEALTH_CHECK_INTERVAL = 30.0 # seconds between health checks +HEALTH_CHECK_STALE_THRESHOLD = 120.0 # seconds without SSE activity before concern + +# E.164 phone number pattern for redaction +_PHONE_RE = re.compile(r"\+[1-9]\d{6,14}") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _redact_phone(phone: str) -> str: + """Redact a phone number for logging: +15551234567 -> +155****4567.""" + if not phone: + return "" + if len(phone) <= 8: + return phone[:2] + "****" + phone[-2:] if len(phone) > 4 else "****" + return phone[:4] + "****" + phone[-4:] + + +def _parse_comma_list(value: str) -> List[str]: + """Split a comma-separated string into a list, stripping whitespace.""" + return [v.strip() for v in value.split(",") if v.strip()] + + +def _guess_extension(data: bytes) -> str: + """Guess file extension from magic bytes.""" + if data[:4] == b"\x89PNG": + return ".png" + if data[:2] == b"\xff\xd8": + return ".jpg" + if data[:4] == b"GIF8": + return ".gif" + if len(data) >= 12 and data[:4] == b"RIFF" and data[8:12] == b"WEBP": + return ".webp" + if data[:4] == b"%PDF": + return ".pdf" + if len(data) >= 8 and data[4:8] == b"ftyp": + return ".mp4" + if data[:4] == b"OggS": + return ".ogg" + if len(data) >= 2 and data[0] == 0xFF and (data[1] & 0xE0) == 0xE0: + return ".mp3" + if data[:2] == b"PK": + return ".zip" + return ".bin" + + +def _is_image_ext(ext: str) -> bool: + return ext.lower() in (".jpg", ".jpeg", ".png", ".gif", ".webp") + + +def _is_audio_ext(ext: str) -> bool: + return ext.lower() in (".mp3", ".wav", ".ogg", ".m4a", ".aac") + + +def _render_mentions(text: str, mentions: list) -> str: + """Replace Signal mention placeholders (\\uFFFC) with readable @identifiers. + + Signal encodes @mentions as the Unicode object replacement character + with out-of-band metadata containing the mentioned user's UUID/number. + """ + if not mentions or "\uFFFC" not in text: + return text + # Sort mentions by start position (reverse) to replace from end to start + # so indices don't shift as we replace + sorted_mentions = sorted(mentions, key=lambda m: m.get("start", 0), reverse=True) + for mention in sorted_mentions: + start = mention.get("start", 0) + length = mention.get("length", 1) + # Use the mention's number or UUID as the replacement + identifier = mention.get("number") or mention.get("uuid") or "user" + replacement = f"@{identifier}" + text = text[:start] + replacement + text[start + length:] + return text + + +def check_signal_requirements() -> bool: + """Check if Signal is configured (has URL and account).""" + return bool(os.getenv("SIGNAL_HTTP_URL") and os.getenv("SIGNAL_ACCOUNT")) + + +# --------------------------------------------------------------------------- +# Signal Adapter +# --------------------------------------------------------------------------- + +class SignalAdapter(BasePlatformAdapter): + """Signal messenger adapter using signal-cli HTTP daemon.""" + + platform = Platform.SIGNAL + + def __init__(self, config: PlatformConfig): + super().__init__(config, Platform.SIGNAL) + + extra = config.extra or {} + self.http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/") + self.account = extra.get("account", "") + self.ignore_stories = extra.get("ignore_stories", True) + + # Parse allowlists — group policy is derived from presence of group allowlist + group_allowed_str = os.getenv("SIGNAL_GROUP_ALLOWED_USERS", "") + self.group_allow_from = set(_parse_comma_list(group_allowed_str)) + + # HTTP client + self.client: Optional[httpx.AsyncClient] = None + + # Background tasks + self._sse_task: Optional[asyncio.Task] = None + self._health_monitor_task: Optional[asyncio.Task] = None + self._typing_tasks: Dict[str, asyncio.Task] = {} + self._running = False + self._last_sse_activity = 0.0 + self._sse_response: Optional[httpx.Response] = None + + # Normalize account for self-message filtering + self._account_normalized = self.account.strip() + + logger.info("Signal adapter initialized: url=%s account=%s groups=%s", + self.http_url, _redact_phone(self.account), + "enabled" if self.group_allow_from else "disabled") + + # ------------------------------------------------------------------ + # Lifecycle + # ------------------------------------------------------------------ + + async def connect(self) -> bool: + """Connect to signal-cli daemon and start SSE listener.""" + if not self.http_url or not self.account: + logger.error("Signal: SIGNAL_HTTP_URL and SIGNAL_ACCOUNT are required") + return False + + self.client = httpx.AsyncClient(timeout=30.0) + + # Health check — verify signal-cli daemon is reachable + try: + resp = await self.client.get(f"{self.http_url}/api/v1/check", timeout=10.0) + if resp.status_code != 200: + logger.error("Signal: health check failed (status %d)", resp.status_code) + return False + except Exception as e: + logger.error("Signal: cannot reach signal-cli at %s: %s", self.http_url, e) + return False + + self._running = True + self._last_sse_activity = time.time() + self._sse_task = asyncio.create_task(self._sse_listener()) + self._health_monitor_task = asyncio.create_task(self._health_monitor()) + + logger.info("Signal: connected to %s", self.http_url) + return True + + async def disconnect(self) -> None: + """Stop SSE listener and clean up.""" + self._running = False + + if self._sse_task: + self._sse_task.cancel() + try: + await self._sse_task + except asyncio.CancelledError: + pass + + if self._health_monitor_task: + self._health_monitor_task.cancel() + try: + await self._health_monitor_task + except asyncio.CancelledError: + pass + + # Cancel all typing tasks + for task in self._typing_tasks.values(): + task.cancel() + self._typing_tasks.clear() + + if self.client: + await self.client.aclose() + self.client = None + + logger.info("Signal: disconnected") + + # ------------------------------------------------------------------ + # SSE Streaming (inbound messages) + # ------------------------------------------------------------------ + + async def _sse_listener(self) -> None: + """Listen for SSE events from signal-cli daemon.""" + url = f"{self.http_url}/api/v1/events?account={self.account}" + backoff = SSE_RETRY_DELAY_INITIAL + + while self._running: + try: + logger.debug("Signal SSE: connecting to %s", url) + async with self.client.stream( + "GET", url, + headers={"Accept": "text/event-stream"}, + timeout=None, + ) as response: + self._sse_response = response + backoff = SSE_RETRY_DELAY_INITIAL # Reset on successful connection + self._last_sse_activity = time.time() + logger.info("Signal SSE: connected") + + buffer = "" + async for chunk in response.aiter_text(): + if not self._running: + break + buffer += chunk + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if not line: + continue + # Parse SSE data lines + if line.startswith("data:"): + data_str = line[5:].strip() + if not data_str: + continue + self._last_sse_activity = time.time() + try: + data = json.loads(data_str) + await self._handle_envelope(data) + except json.JSONDecodeError: + logger.debug("Signal SSE: invalid JSON: %s", data_str[:100]) + except Exception: + logger.exception("Signal SSE: error handling event") + + except asyncio.CancelledError: + break + except httpx.HTTPError as e: + if self._running: + logger.warning("Signal SSE: HTTP error: %s (reconnecting in %.0fs)", e, backoff) + except Exception as e: + if self._running: + logger.warning("Signal SSE: error: %s (reconnecting in %.0fs)", e, backoff) + + if self._running: + # Add 20% jitter to prevent thundering herd on reconnection + jitter = backoff * 0.2 * random.random() + await asyncio.sleep(backoff + jitter) + backoff = min(backoff * 2, SSE_RETRY_DELAY_MAX) + + self._sse_response = None + + # ------------------------------------------------------------------ + # Health Monitor + # ------------------------------------------------------------------ + + async def _health_monitor(self) -> None: + """Monitor SSE connection health and force reconnect if stale.""" + while self._running: + await asyncio.sleep(HEALTH_CHECK_INTERVAL) + if not self._running: + break + + elapsed = time.time() - self._last_sse_activity + if elapsed > HEALTH_CHECK_STALE_THRESHOLD: + logger.warning("Signal: SSE idle for %.0fs, checking daemon health", elapsed) + try: + resp = await self.client.get( + f"{self.http_url}/api/v1/check", timeout=10.0 + ) + if resp.status_code == 200: + # Daemon is alive but SSE is idle — update activity to + # avoid repeated warnings (connection may just be quiet) + self._last_sse_activity = time.time() + logger.debug("Signal: daemon healthy, SSE idle") + else: + logger.warning("Signal: health check failed (%d), forcing reconnect", resp.status_code) + self._force_reconnect() + except Exception as e: + logger.warning("Signal: health check error: %s, forcing reconnect", e) + self._force_reconnect() + + def _force_reconnect(self) -> None: + """Force SSE reconnection by closing the current response.""" + if self._sse_response and not self._sse_response.is_stream_consumed: + try: + asyncio.create_task(self._sse_response.aclose()) + except Exception: + pass + self._sse_response = None + + # ------------------------------------------------------------------ + # Message Handling + # ------------------------------------------------------------------ + + async def _handle_envelope(self, envelope: dict) -> None: + """Process an incoming signal-cli envelope.""" + # Unwrap nested envelope if present + envelope_data = envelope.get("envelope", envelope) + + # Filter syncMessage envelopes (sent transcripts, read receipts, etc.) + # signal-cli may set syncMessage to null vs omitting it, so check key existence + if "syncMessage" in envelope_data: + return + + # Extract sender info + sender = ( + envelope_data.get("sourceNumber") + or envelope_data.get("sourceUuid") + or envelope_data.get("source") + ) + sender_name = envelope_data.get("sourceName", "") + sender_uuid = envelope_data.get("sourceUuid", "") + + if not sender: + logger.debug("Signal: ignoring envelope with no sender") + return + + # Self-message filtering — prevent reply loops + if self._account_normalized and sender == self._account_normalized: + return + + # Filter stories + if self.ignore_stories and envelope_data.get("storyMessage"): + return + + # Get data message — also check editMessage (edited messages contain + # their updated dataMessage inside editMessage.dataMessage) + data_message = ( + envelope_data.get("dataMessage") + or (envelope_data.get("editMessage") or {}).get("dataMessage") + ) + if not data_message: + return + + # Check for group message + group_info = data_message.get("groupInfo") + group_id = group_info.get("groupId") if group_info else None + is_group = bool(group_id) + + # Group message filtering — derived from SIGNAL_GROUP_ALLOWED_USERS: + # - No env var set → groups disabled (default safe behavior) + # - Env var set with group IDs → only those groups allowed + # - Env var set with "*" → all groups allowed + # DM auth is fully handled by run.py (_is_user_authorized) + if is_group: + if not self.group_allow_from: + logger.debug("Signal: ignoring group message (no SIGNAL_GROUP_ALLOWED_USERS)") + return + if "*" not in self.group_allow_from and group_id not in self.group_allow_from: + logger.debug("Signal: group %s not in allowlist", group_id[:8] if group_id else "?") + return + + # Build chat info + chat_id = sender if not is_group else f"group:{group_id}" + chat_type = "group" if is_group else "dm" + + # Extract text and render mentions + text = data_message.get("message", "") + mentions = data_message.get("mentions", []) + if text and mentions: + text = _render_mentions(text, mentions) + + # Process attachments + attachments_data = data_message.get("attachments", []) + image_paths = [] + audio_path = None + document_paths = [] + + if attachments_data and not getattr(self, "ignore_attachments", False): + for att in attachments_data: + att_id = att.get("id") + att_size = att.get("size", 0) + if not att_id: + continue + if att_size > SIGNAL_MAX_ATTACHMENT_SIZE: + logger.warning("Signal: attachment too large (%d bytes), skipping", att_size) + continue + try: + cached_path, ext = await self._fetch_attachment(att_id) + if cached_path: + if _is_image_ext(ext): + image_paths.append(cached_path) + elif _is_audio_ext(ext): + audio_path = cached_path + else: + document_paths.append(cached_path) + except Exception: + logger.exception("Signal: failed to fetch attachment %s", att_id) + + # Build session source + source = self.build_source( + chat_id=chat_id, + chat_name=group_info.get("groupName") if group_info else sender_name, + chat_type=chat_type, + user_id=sender, + user_name=sender_name or sender, + user_id_alt=sender_uuid if sender_uuid else None, + chat_id_alt=group_id if is_group else None, + ) + + # Determine message type + msg_type = MessageType.TEXT + if audio_path: + msg_type = MessageType.VOICE + elif image_paths: + msg_type = MessageType.IMAGE + + # Parse timestamp from envelope data (milliseconds since epoch) + ts_ms = envelope_data.get("timestamp", 0) + if ts_ms: + try: + timestamp = datetime.fromtimestamp(ts_ms / 1000, tz=timezone.utc) + except (ValueError, OSError): + timestamp = datetime.now(tz=timezone.utc) + else: + timestamp = datetime.now(tz=timezone.utc) + + # Build and dispatch event + event = MessageEvent( + source=source, + text=text or "", + message_type=msg_type, + image_paths=image_paths, + audio_path=audio_path, + document_paths=document_paths, + timestamp=timestamp, + ) + + logger.debug("Signal: message from %s in %s: %s", + _redact_phone(sender), chat_id[:20], (text or "")[:50]) + + await self.handle_message(event) + + # ------------------------------------------------------------------ + # Attachment Handling + # ------------------------------------------------------------------ + + async def _fetch_attachment(self, attachment_id: str) -> tuple: + """Fetch an attachment via JSON-RPC and cache it. Returns (path, ext).""" + result = await self._rpc("getAttachment", { + "account": self.account, + "attachmentId": attachment_id, + }) + + if not result: + return None, "" + + # Result is base64-encoded file content + raw_data = base64.b64decode(result) + ext = _guess_extension(raw_data) + + if _is_image_ext(ext): + path = cache_image_from_bytes(raw_data, ext) + elif _is_audio_ext(ext): + path = cache_audio_from_bytes(raw_data, ext) + else: + path = cache_document_from_bytes(raw_data, ext) + + return path, ext + + # ------------------------------------------------------------------ + # JSON-RPC Communication + # ------------------------------------------------------------------ + + async def _rpc(self, method: str, params: dict, rpc_id: str = None) -> Any: + """Send a JSON-RPC 2.0 request to signal-cli daemon.""" + if not self.client: + logger.warning("Signal: RPC called but client not connected") + return None + + if rpc_id is None: + rpc_id = f"{method}_{int(time.time() * 1000)}" + + payload = { + "jsonrpc": "2.0", + "method": method, + "params": params, + "id": rpc_id, + } + + try: + resp = await self.client.post( + f"{self.http_url}/api/v1/rpc", + json=payload, + timeout=30.0, + ) + resp.raise_for_status() + data = resp.json() + + if "error" in data: + logger.warning("Signal RPC error (%s): %s", method, data["error"]) + return None + + return data.get("result") + + except Exception as e: + logger.warning("Signal RPC %s failed: %s", method, e) + return None + + # ------------------------------------------------------------------ + # Sending + # ------------------------------------------------------------------ + + async def send( + self, + chat_id: str, + text: str, + reply_to_message_id: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send a text message.""" + await self._stop_typing_indicator(chat_id) + + params: Dict[str, Any] = { + "account": self.account, + "message": text, + } + + if chat_id.startswith("group:"): + params["groupId"] = chat_id[6:] + else: + params["recipient"] = [chat_id] + + result = await self._rpc("send", params) + + if result is not None: + return SendResult(success=True) + return SendResult(success=False, error="RPC send failed") + + async def send_typing(self, chat_id: str) -> None: + """Send a typing indicator.""" + params: Dict[str, Any] = { + "account": self.account, + } + + if chat_id.startswith("group:"): + params["groupId"] = chat_id[6:] + else: + params["recipient"] = [chat_id] + + await self._rpc("sendTyping", params, rpc_id="typing") + + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send an image. Supports http(s):// and file:// URLs.""" + await self._stop_typing_indicator(chat_id) + + # Resolve image to local path + if image_url.startswith("file://"): + file_path = unquote(image_url[7:]) + else: + # Download remote image to cache + try: + file_path = await cache_image_from_url(image_url) + except Exception as e: + logger.warning("Signal: failed to download image: %s", e) + return SendResult(success=False, error=str(e)) + + if not file_path or not Path(file_path).exists(): + return SendResult(success=False, error="Image file not found") + + # Validate size + file_size = Path(file_path).stat().st_size + if file_size > SIGNAL_MAX_ATTACHMENT_SIZE: + return SendResult(success=False, error=f"Image too large ({file_size} bytes)") + + params: Dict[str, Any] = { + "account": self.account, + "message": caption or "", + "attachments": [file_path], + } + + if chat_id.startswith("group:"): + params["groupId"] = chat_id[6:] + else: + params["recipient"] = [chat_id] + + result = await self._rpc("send", params) + if result is not None: + return SendResult(success=True) + return SendResult(success=False, error="RPC send with attachment failed") + + async def send_document( + self, + chat_id: str, + file_path: str, + caption: Optional[str] = None, + filename: Optional[str] = None, + **kwargs, + ) -> SendResult: + """Send a document/file attachment.""" + await self._stop_typing_indicator(chat_id) + + if not Path(file_path).exists(): + return SendResult(success=False, error="File not found") + + params: Dict[str, Any] = { + "account": self.account, + "message": caption or "", + "attachments": [file_path], + } + + if chat_id.startswith("group:"): + params["groupId"] = chat_id[6:] + else: + params["recipient"] = [chat_id] + + result = await self._rpc("send", params) + if result is not None: + return SendResult(success=True) + return SendResult(success=False, error="RPC send document failed") + + # ------------------------------------------------------------------ + # Typing Indicators + # ------------------------------------------------------------------ + + async def _start_typing_indicator(self, chat_id: str) -> None: + """Start a typing indicator loop for a chat.""" + if chat_id in self._typing_tasks: + return # Already running + + async def _typing_loop(): + try: + while True: + await self.send_typing(chat_id) + await asyncio.sleep(TYPING_INTERVAL) + except asyncio.CancelledError: + pass + + self._typing_tasks[chat_id] = asyncio.create_task(_typing_loop()) + + async def _stop_typing_indicator(self, chat_id: str) -> None: + """Stop a typing indicator loop for a chat.""" + task = self._typing_tasks.pop(chat_id, None) + if task: + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + + # ------------------------------------------------------------------ + # Chat Info + # ------------------------------------------------------------------ + + async def get_chat_info(self, chat_id: str) -> Dict[str, Any]: + """Get information about a chat/contact.""" + if chat_id.startswith("group:"): + return { + "name": chat_id, + "type": "group", + "chat_id": chat_id, + } + + # Try to resolve contact name + result = await self._rpc("getContact", { + "account": self.account, + "contactAddress": chat_id, + }) + + name = chat_id + if result and isinstance(result, dict): + name = result.get("name") or result.get("profileName") or chat_id + + return { + "name": name, + "type": "dm", + "chat_id": chat_id, + } diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 1ea1971e32..c49155d0a9 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -155,6 +155,14 @@ class TelegramAdapter(BasePlatformAdapter): BotCommand("status", "Show session info"), BotCommand("stop", "Stop the running agent"), BotCommand("sethome", "Set this chat as the home channel"), + BotCommand("compress", "Compress conversation context"), + BotCommand("title", "Set or show the session title"), + BotCommand("resume", "Resume a previously-named session"), + BotCommand("usage", "Show token usage for this session"), + BotCommand("provider", "Show available providers"), + BotCommand("insights", "Show usage insights and analytics"), + BotCommand("update", "Update Hermes to the latest version"), + BotCommand("reload_mcp", "Reload MCP servers from config"), BotCommand("help", "Show available commands"), ]) except Exception as e: diff --git a/gateway/run.py b/gateway/run.py index 379c4ef1f6..2584521d12 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -86,10 +86,29 @@ if _config_path.exists(): "enabled": "CONTEXT_COMPRESSION_ENABLED", "threshold": "CONTEXT_COMPRESSION_THRESHOLD", "summary_model": "CONTEXT_COMPRESSION_MODEL", + "summary_provider": "CONTEXT_COMPRESSION_PROVIDER", } for _cfg_key, _env_var in _compression_env_map.items(): if _cfg_key in _compression_cfg: os.environ[_env_var] = str(_compression_cfg[_cfg_key]) + # Auxiliary model overrides (vision, web_extract). + # Each task has provider + model; bridge non-default values to env vars. + _auxiliary_cfg = _cfg.get("auxiliary", {}) + if _auxiliary_cfg and isinstance(_auxiliary_cfg, dict): + _aux_task_env = { + "vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), + "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), + } + for _task_key, (_prov_env, _model_env) in _aux_task_env.items(): + _task_cfg = _auxiliary_cfg.get(_task_key, {}) + if not isinstance(_task_cfg, dict): + continue + _prov = str(_task_cfg.get("provider", "")).strip() + _model = str(_task_cfg.get("model", "")).strip() + if _prov and _prov != "auto": + os.environ[_prov_env] = _prov + if _model: + os.environ[_model_env] = _model _agent_cfg = _cfg.get("agent", {}) if _agent_cfg and isinstance(_agent_cfg, dict): if "max_turns" in _agent_cfg: @@ -99,6 +118,12 @@ if _config_path.exists(): _tz_cfg = _cfg.get("timezone", "") if _tz_cfg and isinstance(_tz_cfg, str) and "HERMES_TIMEZONE" not in os.environ: os.environ["HERMES_TIMEZONE"] = _tz_cfg.strip() + # Security settings + _security_cfg = _cfg.get("security", {}) + if isinstance(_security_cfg, dict): + _redact = _security_cfg.get("redact_secrets") + if _redact is not None: + os.environ["HERMES_REDACT_SECRETS"] = str(_redact).lower() except Exception: pass # Non-fatal; gateway can still run with .env values @@ -175,6 +200,7 @@ class GatewayRunner: self._ephemeral_system_prompt = self._load_ephemeral_system_prompt() self._reasoning_config = self._load_reasoning_config() self._provider_routing = self._load_provider_routing() + self._fallback_model = self._load_fallback_model() # Wire process registry into session store for reset protection from tools.process_registry import process_registry @@ -374,6 +400,26 @@ class GatewayRunner: pass return {} + @staticmethod + def _load_fallback_model() -> dict | None: + """Load fallback model config from config.yaml. + + Returns a dict with 'provider' and 'model' keys, or None if + not configured / both fields empty. + """ + try: + import yaml as _y + cfg_path = _hermes_home / "config.yaml" + if cfg_path.exists(): + with open(cfg_path) as _f: + cfg = _y.safe_load(_f) or {} + fb = cfg.get("fallback_model", {}) or {} + if fb.get("provider") and fb.get("model"): + return fb + except Exception: + pass + return None + async def start(self) -> bool: """ Start the gateway and all configured platform adapters. @@ -572,6 +618,13 @@ class GatewayRunner: return None return SlackAdapter(config) + elif platform == Platform.SIGNAL: + from gateway.platforms.signal import SignalAdapter, check_signal_requirements + if not check_signal_requirements(): + logger.warning("Signal: SIGNAL_HTTP_URL or SIGNAL_ACCOUNT not configured") + return None + return SignalAdapter(config) + elif platform == Platform.HOMEASSISTANT: from gateway.platforms.homeassistant import HomeAssistantAdapter, check_ha_requirements if not check_ha_requirements(): @@ -607,12 +660,14 @@ class GatewayRunner: Platform.DISCORD: "DISCORD_ALLOWED_USERS", Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS", Platform.SLACK: "SLACK_ALLOWED_USERS", + Platform.SIGNAL: "SIGNAL_ALLOWED_USERS", } platform_allow_all_map = { Platform.TELEGRAM: "TELEGRAM_ALLOW_ALL_USERS", Platform.DISCORD: "DISCORD_ALLOW_ALL_USERS", Platform.WHATSAPP: "WHATSAPP_ALLOW_ALL_USERS", Platform.SLACK: "SLACK_ALLOW_ALL_USERS", + Platform.SIGNAL: "SIGNAL_ALLOW_ALL_USERS", } # Per-platform allow-all flag (e.g., DISCORD_ALLOW_ALL_USERS=true) @@ -710,7 +765,8 @@ class GatewayRunner: # Emit command:* hook for any recognized slash command _known_commands = {"new", "reset", "help", "status", "stop", "model", "personality", "retry", "undo", "sethome", "set-home", - "compress", "usage", "insights", "reload-mcp", "update"} + "compress", "usage", "insights", "reload-mcp", "reload_mcp", + "update", "title", "resume", "provider"} if command and command in _known_commands: await self.hooks.emit(f"command:{command}", { "platform": source.platform.value if source.platform else "", @@ -758,11 +814,17 @@ class GatewayRunner: if command == "insights": return await self._handle_insights_command(event) - if command == "reload-mcp": + if command in ("reload-mcp", "reload_mcp"): return await self._handle_reload_mcp_command(event) if command == "update": return await self._handle_update_command(event) + + if command == "title": + return await self._handle_title_command(event) + + if command == "resume": + return await self._handle_resume_command(event) # Skill slash commands: /skill-name loads the skill and sends to agent if command: @@ -844,159 +906,187 @@ class GatewayRunner: # every new message rehydrates an oversized transcript, causing # repeated truncation/context failures. Detect this early and # compress proactively — before the agent even starts. (#628) + # + # Thresholds are derived from the SAME compression config the + # agent uses (compression.threshold × model context length) so + # CLI and messaging platforms behave identically. # ----------------------------------------------------------------- if history and len(history) >= 4: - from agent.model_metadata import estimate_messages_tokens_rough + from agent.model_metadata import ( + estimate_messages_tokens_rough, + get_model_context_length, + ) - # Read thresholds from config.yaml → session_hygiene section - _hygiene_cfg = {} + # Read model + compression config from config.yaml — same + # source of truth the agent itself uses. + _hyg_model = "anthropic/claude-sonnet-4.6" + _hyg_threshold_pct = 0.85 + _hyg_compression_enabled = True try: _hyg_cfg_path = _hermes_home / "config.yaml" if _hyg_cfg_path.exists(): import yaml as _hyg_yaml with open(_hyg_cfg_path) as _hyg_f: _hyg_data = _hyg_yaml.safe_load(_hyg_f) or {} - _hygiene_cfg = _hyg_data.get("session_hygiene", {}) - if not isinstance(_hygiene_cfg, dict): - _hygiene_cfg = {} + + # Resolve model name (same logic as run_sync) + _model_cfg = _hyg_data.get("model", {}) + if isinstance(_model_cfg, str): + _hyg_model = _model_cfg + elif isinstance(_model_cfg, dict): + _hyg_model = _model_cfg.get("default", _hyg_model) + + # Read compression settings + _comp_cfg = _hyg_data.get("compression", {}) + if isinstance(_comp_cfg, dict): + _hyg_threshold_pct = float( + _comp_cfg.get("threshold", _hyg_threshold_pct) + ) + _hyg_compression_enabled = str( + _comp_cfg.get("enabled", True) + ).lower() in ("true", "1", "yes") except Exception: pass - _compress_token_threshold = int( - _hygiene_cfg.get("auto_compress_tokens", 100_000) - ) - _compress_msg_threshold = int( - _hygiene_cfg.get("auto_compress_messages", 200) - ) - _warn_token_threshold = int( - _hygiene_cfg.get("warn_tokens", 200_000) + # Also check env overrides (same as run_agent.py) + _hyg_threshold_pct = float( + os.getenv("CONTEXT_COMPRESSION_THRESHOLD", str(_hyg_threshold_pct)) ) + if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"): + _hyg_compression_enabled = False - _msg_count = len(history) - _approx_tokens = estimate_messages_tokens_rough(history) - - _needs_compress = ( - _approx_tokens >= _compress_token_threshold - or _msg_count >= _compress_msg_threshold - ) - - if _needs_compress: - logger.info( - "Session hygiene: %s messages, ~%s tokens — auto-compressing " - "(thresholds: %s msgs / %s tokens)", - _msg_count, f"{_approx_tokens:,}", - _compress_msg_threshold, f"{_compress_token_threshold:,}", + if _hyg_compression_enabled: + _hyg_context_length = get_model_context_length(_hyg_model) + _compress_token_threshold = int( + _hyg_context_length * _hyg_threshold_pct ) + # Warn if still huge after compression (95% of context) + _warn_token_threshold = int(_hyg_context_length * 0.95) + + _msg_count = len(history) + _approx_tokens = estimate_messages_tokens_rough(history) + + _needs_compress = _approx_tokens >= _compress_token_threshold + + if _needs_compress: + logger.info( + "Session hygiene: %s messages, ~%s tokens — auto-compressing " + "(threshold: %s%% of %s = %s tokens)", + _msg_count, f"{_approx_tokens:,}", + int(_hyg_threshold_pct * 100), + f"{_hyg_context_length:,}", + f"{_compress_token_threshold:,}", + ) + + _hyg_adapter = self.adapters.get(source.platform) + if _hyg_adapter: + try: + await _hyg_adapter.send( + source.chat_id, + f"🗜️ Session is large ({_msg_count} messages, " + f"~{_approx_tokens:,} tokens). Auto-compressing..." + ) + except Exception: + pass - _hyg_adapter = self.adapters.get(source.platform) - if _hyg_adapter: try: - await _hyg_adapter.send( - source.chat_id, - f"🗜️ Session is large ({_msg_count} messages, " - f"~{_approx_tokens:,} tokens). Auto-compressing..." - ) - except Exception: - pass + from run_agent import AIAgent - try: - from run_agent import AIAgent + _hyg_runtime = _resolve_runtime_agent_kwargs() + if _hyg_runtime.get("api_key"): + _hyg_msgs = [ + {"role": m.get("role"), "content": m.get("content")} + for m in history + if m.get("role") in ("user", "assistant") + and m.get("content") + ] - _hyg_runtime = _resolve_runtime_agent_kwargs() - if _hyg_runtime.get("api_key"): - _hyg_msgs = [ - {"role": m.get("role"), "content": m.get("content")} - for m in history - if m.get("role") in ("user", "assistant") - and m.get("content") - ] - - if len(_hyg_msgs) >= 4: - _hyg_agent = AIAgent( - **_hyg_runtime, - max_iterations=4, - quiet_mode=True, - enabled_toolsets=["memory"], - session_id=session_entry.session_id, - ) - - loop = asyncio.get_event_loop() - _compressed, _ = await loop.run_in_executor( - None, - lambda: _hyg_agent._compress_context( - _hyg_msgs, "", - approx_tokens=_approx_tokens, - ), - ) - - self.session_store.rewrite_transcript( - session_entry.session_id, _compressed - ) - history = _compressed - _new_count = len(_compressed) - _new_tokens = estimate_messages_tokens_rough( - _compressed - ) - - logger.info( - "Session hygiene: compressed %s → %s msgs, " - "~%s → ~%s tokens", - _msg_count, _new_count, - f"{_approx_tokens:,}", f"{_new_tokens:,}", - ) - - if _hyg_adapter: - try: - await _hyg_adapter.send( - source.chat_id, - f"🗜️ Compressed: {_msg_count} → " - f"{_new_count} messages, " - f"~{_approx_tokens:,} → " - f"~{_new_tokens:,} tokens" - ) - except Exception: - pass - - # Still too large after compression — warn user - if _new_tokens >= _warn_token_threshold: - logger.warning( - "Session hygiene: still ~%s tokens after " - "compression — suggesting /reset", - f"{_new_tokens:,}", + if len(_hyg_msgs) >= 4: + _hyg_agent = AIAgent( + **_hyg_runtime, + max_iterations=4, + quiet_mode=True, + enabled_toolsets=["memory"], + session_id=session_entry.session_id, ) + + loop = asyncio.get_event_loop() + _compressed, _ = await loop.run_in_executor( + None, + lambda: _hyg_agent._compress_context( + _hyg_msgs, "", + approx_tokens=_approx_tokens, + ), + ) + + self.session_store.rewrite_transcript( + session_entry.session_id, _compressed + ) + history = _compressed + _new_count = len(_compressed) + _new_tokens = estimate_messages_tokens_rough( + _compressed + ) + + logger.info( + "Session hygiene: compressed %s → %s msgs, " + "~%s → ~%s tokens", + _msg_count, _new_count, + f"{_approx_tokens:,}", f"{_new_tokens:,}", + ) + if _hyg_adapter: try: await _hyg_adapter.send( source.chat_id, - "⚠️ Session is still very large " - "after compression " - f"(~{_new_tokens:,} tokens). " - "Consider using /reset to start " - "fresh if you experience issues." + f"🗜️ Compressed: {_msg_count} → " + f"{_new_count} messages, " + f"~{_approx_tokens:,} → " + f"~{_new_tokens:,} tokens" ) except Exception: pass - except Exception as e: - logger.warning( - "Session hygiene auto-compress failed: %s", e - ) - # Compression failed and session is dangerously large - if _approx_tokens >= _warn_token_threshold: - _hyg_adapter = self.adapters.get(source.platform) - if _hyg_adapter: - try: - await _hyg_adapter.send( - source.chat_id, - f"⚠️ Session is very large " - f"({_msg_count} messages, " - f"~{_approx_tokens:,} tokens) and " - "auto-compression failed. Consider " - "using /compress or /reset to avoid " - "issues." - ) - except Exception: - pass + # Still too large after compression — warn user + if _new_tokens >= _warn_token_threshold: + logger.warning( + "Session hygiene: still ~%s tokens after " + "compression — suggesting /reset", + f"{_new_tokens:,}", + ) + if _hyg_adapter: + try: + await _hyg_adapter.send( + source.chat_id, + "⚠️ Session is still very large " + "after compression " + f"(~{_new_tokens:,} tokens). " + "Consider using /reset to start " + "fresh if you experience issues." + ) + except Exception: + pass + + except Exception as e: + logger.warning( + "Session hygiene auto-compress failed: %s", e + ) + # Compression failed and session is dangerously large + if _approx_tokens >= _warn_token_threshold: + _hyg_adapter = self.adapters.get(source.platform) + if _hyg_adapter: + try: + await _hyg_adapter.send( + source.chat_id, + f"⚠️ Session is very large " + f"({_msg_count} messages, " + f"~{_approx_tokens:,} tokens) and " + "auto-compression failed. Consider " + "using /compress or /reset to avoid " + "issues." + ) + except Exception: + pass # First-message onboarding -- only on the very first interaction ever if not history and not self.session_store.has_any_sessions(): @@ -1301,6 +1391,8 @@ class GatewayRunner: "`/undo` — Remove the last exchange", "`/sethome` — Set this chat as the home channel", "`/compress` — Compress conversation context", + "`/title [name]` — Set or show the session title", + "`/resume [name]` — Resume a previously-named session", "`/usage` — Show token usage for this session", "`/insights [days]` — Show usage insights and analytics", "`/reload-mcp` — Reload MCP servers from config", @@ -1357,6 +1449,11 @@ class GatewayRunner: except Exception: current_provider = "openrouter" + # Detect custom endpoint: provider resolved to openrouter but a custom + # base URL is configured — the user set up a custom endpoint. + if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip(): + current_provider = "custom" + if not args: provider_label = _PROVIDER_LABELS.get(current_provider, current_provider) lines = [ @@ -1483,6 +1580,10 @@ class GatewayRunner: except Exception: current_provider = "openrouter" + # Detect custom endpoint + if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip(): + current_provider = "custom" + current_label = _PROVIDER_LABELS.get(current_provider, current_provider) lines = [ @@ -1691,6 +1792,113 @@ class GatewayRunner: logger.warning("Manual compress failed: %s", e) return f"Compression failed: {e}" + async def _handle_title_command(self, event: MessageEvent) -> str: + """Handle /title command — set or show the current session's title.""" + source = event.source + session_entry = self.session_store.get_or_create_session(source) + session_id = session_entry.session_id + + if not self._session_db: + return "Session database not available." + + title_arg = event.get_command_args().strip() + if title_arg: + # Sanitize the title before setting + try: + sanitized = self._session_db.sanitize_title(title_arg) + except ValueError as e: + return f"⚠️ {e}" + if not sanitized: + return "⚠️ Title is empty after cleanup. Please use printable characters." + # Set the title + try: + if self._session_db.set_session_title(session_id, sanitized): + return f"✏️ Session title set: **{sanitized}**" + else: + return "Session not found in database." + except ValueError as e: + return f"⚠️ {e}" + else: + # Show the current title + title = self._session_db.get_session_title(session_id) + if title: + return f"📌 Session title: **{title}**" + else: + return "No title set. Usage: `/title My Session Name`" + + async def _handle_resume_command(self, event: MessageEvent) -> str: + """Handle /resume command — switch to a previously-named session.""" + if not self._session_db: + return "Session database not available." + + source = event.source + session_key = build_session_key(source) + name = event.get_command_args().strip() + + if not name: + # List recent titled sessions for this user/platform + try: + user_source = source.platform.value if source.platform else None + sessions = self._session_db.list_sessions_rich( + source=user_source, limit=10 + ) + titled = [s for s in sessions if s.get("title")] + if not titled: + return ( + "No named sessions found.\n" + "Use `/title My Session` to name your current session, " + "then `/resume My Session` to return to it later." + ) + lines = ["📋 **Named Sessions**\n"] + for s in titled[:10]: + title = s["title"] + preview = s.get("preview", "")[:40] + preview_part = f" — _{preview}_" if preview else "" + lines.append(f"• **{title}**{preview_part}") + lines.append("\nUsage: `/resume `") + return "\n".join(lines) + except Exception as e: + logger.debug("Failed to list titled sessions: %s", e) + return f"Could not list sessions: {e}" + + # Resolve the name to a session ID + target_id = self._session_db.resolve_session_by_title(name) + if not target_id: + return ( + f"No session found matching '**{name}**'.\n" + "Use `/resume` with no arguments to see available sessions." + ) + + # Check if already on that session + current_entry = self.session_store.get_or_create_session(source) + if current_entry.session_id == target_id: + return f"📌 Already on session **{name}**." + + # Flush memories for current session before switching + try: + asyncio.create_task(self._async_flush_memories(current_entry.session_id)) + except Exception as e: + logger.debug("Memory flush on resume failed: %s", e) + + # Clear any running agent for this session key + if session_key in self._running_agents: + del self._running_agents[session_key] + + # Switch the session entry to point at the old session + new_entry = self.session_store.switch_session(session_key, target_id) + if not new_entry: + return "Failed to switch session." + + # Get the title for confirmation + title = self._session_db.get_session_title(target_id) or name + + # Count messages for context + history = self.session_store.load_transcript(target_id) + msg_count = len([m for m in history if m.get("role") == "user"]) if history else 0 + msg_part = f" ({msg_count} message{'s' if msg_count != 1 else ''})" if msg_count else "" + + return f"↻ Resumed session **{title}**{msg_part}. Conversation restored." + async def _handle_usage_command(self, event: MessageEvent) -> str: """Handle /usage command -- show token usage for the session's last agent run.""" source = event.source @@ -2488,6 +2696,7 @@ class GatewayRunner: platform=platform_key, honcho_session_key=session_key, session_db=self._session_db, + fallback_model=self._fallback_model, ) # Store agent reference for interrupt support diff --git a/gateway/session.py b/gateway/session.py index 4c2d9c208b..dfe3f12efd 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -45,6 +45,8 @@ class SessionSource: user_name: Optional[str] = None thread_id: Optional[str] = None # For forum topics, Discord threads, etc. chat_topic: Optional[str] = None # Channel topic/description (Discord, Slack) + user_id_alt: Optional[str] = None # Signal UUID (alternative to phone number) + chat_id_alt: Optional[str] = None # Signal group internal ID @property def description(self) -> str: @@ -68,7 +70,7 @@ class SessionSource: return ", ".join(parts) def to_dict(self) -> Dict[str, Any]: - return { + d = { "platform": self.platform.value, "chat_id": self.chat_id, "chat_name": self.chat_name, @@ -78,6 +80,11 @@ class SessionSource: "thread_id": self.thread_id, "chat_topic": self.chat_topic, } + if self.user_id_alt: + d["user_id_alt"] = self.user_id_alt + if self.chat_id_alt: + d["chat_id_alt"] = self.chat_id_alt + return d @classmethod def from_dict(cls, data: Dict[str, Any]) -> "SessionSource": @@ -90,6 +97,8 @@ class SessionSource: user_name=data.get("user_name"), thread_id=data.get("thread_id"), chat_topic=data.get("chat_topic"), + user_id_alt=data.get("user_id_alt"), + chat_id_alt=data.get("chat_id_alt"), ) @classmethod @@ -333,7 +342,7 @@ class SessionStore: if sessions_file.exists(): try: - with open(sessions_file, "r") as f: + with open(sessions_file, "r", encoding="utf-8") as f: data = json.load(f) for key, entry_data in data.items(): self._entries[key] = SessionEntry.from_dict(entry_data) @@ -348,7 +357,7 @@ class SessionStore: sessions_file = self.sessions_dir / "sessions.json" data = {key: entry.to_dict() for key, entry in self._entries.items()} - with open(sessions_file, "w") as f: + with open(sessions_file, "w", encoding="utf-8") as f: json.dump(data, f, indent=2) def _generate_session_key(self, source: SessionSource) -> str: @@ -593,7 +602,49 @@ class SessionStore: logger.debug("Session DB operation failed: %s", e) return new_entry - + + def switch_session(self, session_key: str, target_session_id: str) -> Optional[SessionEntry]: + """Switch a session key to point at an existing session ID. + + Used by ``/resume`` to restore a previously-named session. + Ends the current session in SQLite (like reset), but instead of + generating a fresh session ID, re-uses ``target_session_id`` so the + old transcript is loaded on the next message. + """ + self._ensure_loaded() + + if session_key not in self._entries: + return None + + old_entry = self._entries[session_key] + + # Don't switch if already on that session + if old_entry.session_id == target_session_id: + return old_entry + + # End the current session in SQLite + if self._db: + try: + self._db.end_session(old_entry.session_id, "session_switch") + except Exception as e: + logger.debug("Session DB end_session failed: %s", e) + + now = datetime.now() + new_entry = SessionEntry( + session_key=session_key, + session_id=target_session_id, + created_at=now, + updated_at=now, + origin=old_entry.origin, + display_name=old_entry.display_name, + platform=old_entry.platform, + chat_type=old_entry.chat_type, + ) + + self._entries[session_key] = new_entry + self._save() + return new_entry + def list_sessions(self, active_minutes: Optional[int] = None) -> List[SessionEntry]: """List all sessions, optionally filtered by activity.""" self._ensure_loaded() @@ -630,7 +681,7 @@ class SessionStore: # Also write legacy JSONL (keeps existing tooling working during transition) transcript_path = self.get_transcript_path(session_id) - with open(transcript_path, "a") as f: + with open(transcript_path, "a", encoding="utf-8") as f: f.write(json.dumps(message, ensure_ascii=False) + "\n") def rewrite_transcript(self, session_id: str, messages: List[Dict[str, Any]]) -> None: @@ -657,7 +708,7 @@ class SessionStore: # JSONL: overwrite the file transcript_path = self.get_transcript_path(session_id) - with open(transcript_path, "w") as f: + with open(transcript_path, "w", encoding="utf-8") as f: for msg in messages: f.write(json.dumps(msg, ensure_ascii=False) + "\n") @@ -679,7 +730,7 @@ class SessionStore: return [] messages = [] - with open(transcript_path, "r") as f: + with open(transcript_path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if line: diff --git a/hermes_cli/clipboard.py b/hermes_cli/clipboard.py index fa750d85c5..6373cfc8b3 100644 --- a/hermes_cli/clipboard.py +++ b/hermes_cli/clipboard.py @@ -285,8 +285,8 @@ def _convert_to_png(path: Path) -> bool: logger.debug("Pillow BMP→PNG conversion failed: %s", e) # Fall back to ImageMagick convert + tmp = path.with_suffix(".bmp") try: - tmp = path.with_suffix(".bmp") path.rename(tmp) r = subprocess.run( ["convert", str(tmp), "png:" + str(path)], @@ -297,8 +297,12 @@ def _convert_to_png(path: Path) -> bool: return True except FileNotFoundError: logger.debug("ImageMagick not installed — cannot convert BMP to PNG") + if tmp.exists() and not path.exists(): + tmp.rename(path) except Exception as e: logger.debug("ImageMagick BMP→PNG conversion failed: %s", e) + if tmp.exists() and not path.exists(): + tmp.rename(path) # Can't convert — BMP is still usable as-is for most APIs return path.exists() and path.stat().st_size > 0 diff --git a/hermes_cli/codex_models.py b/hermes_cli/codex_models.py index 416c76add5..8259b70643 100644 --- a/hermes_cli/codex_models.py +++ b/hermes_cli/codex_models.py @@ -94,8 +94,6 @@ def _read_cache_models(codex_home: Path) -> List[str]: if not isinstance(slug, str) or not slug.strip(): continue slug = slug.strip() - if "codex" not in slug.lower(): - continue if item.get("supported_in_api") is False: continue visibility = item.get("visibility") diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 61c5864fd6..20f01b1748 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -34,6 +34,7 @@ COMMANDS = { "/platforms": "Show gateway/messaging platform status", "/verbose": "Cycle tool progress display: off → new → all → verbose", "/compress": "Manually compress conversation context (flush memories + summarize)", + "/title": "Set a title for the current session (usage: /title My Session Name)", "/usage": "Show token usage for the current session", "/insights": "Show usage insights and analytics (last 30 days)", "/paste": "Check clipboard for an image and attach it", diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 0e6f51c1a3..7a31b551d4 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -81,17 +81,34 @@ DEFAULT_CONFIG = { "browser": { "inactivity_timeout": 120, + "record_sessions": False, # Auto-record browser sessions as WebM videos }, "compression": { "enabled": True, "threshold": 0.85, "summary_model": "google/gemini-3-flash-preview", + "summary_provider": "auto", + }, + + # Auxiliary model overrides (advanced). By default Hermes auto-selects + # the provider and model for each side task. Set these to override. + "auxiliary": { + "vision": { + "provider": "auto", # auto | openrouter | nous | main + "model": "", # e.g. "google/gemini-2.5-flash", "gpt-4o" + }, + "web_extract": { + "provider": "auto", + "model": "", + }, }, "display": { "compact": False, "personality": "kawaii", + "resume_display": "full", # "full" (show previous messages) | "minimal" (one-liner only) + "bell_on_complete": False, # Play terminal bell (\a) when agent finishes a response }, # Text-to-speech configuration @@ -422,7 +439,7 @@ OPTIONAL_ENV_VARS = { "category": "setting", }, "HERMES_MAX_ITERATIONS": { - "description": "Maximum tool-calling iterations per conversation (default: 60)", + "description": "Maximum tool-calling iterations per conversation (default: 90)", "prompt": "Max iterations", "url": None, "password": False, @@ -742,6 +759,36 @@ def load_config() -> Dict[str, Any]: return config +_COMMENTED_SECTIONS = """ +# ── Security ────────────────────────────────────────────────────────── +# API keys, tokens, and passwords are redacted from tool output by default. +# Set to false to see full values (useful for debugging auth issues). +# +# security: +# redact_secrets: false + +# ── Fallback Model ──────────────────────────────────────────────────── +# Automatic provider failover when primary is unavailable. +# Uncomment and configure to enable. Triggers on rate limits (429), +# overload (529), service errors (503), or connection failures. +# +# Supported providers: +# openrouter (OPENROUTER_API_KEY) — routes to any model +# openai-codex (OAuth — hermes login) — OpenAI Codex +# nous (OAuth — hermes login) — Nous Portal +# zai (ZAI_API_KEY) — Z.AI / GLM +# kimi-coding (KIMI_API_KEY) — Kimi / Moonshot +# minimax (MINIMAX_API_KEY) — MiniMax +# minimax-cn (MINIMAX_CN_API_KEY) — MiniMax (China) +# +# For custom OpenAI-compatible endpoints, add base_url and api_key_env. +# +# fallback_model: +# provider: openrouter +# model: anthropic/claude-sonnet-4 +""" + + def save_config(config: Dict[str, Any]): """Save configuration to ~/.hermes/config.yaml.""" ensure_hermes_home() @@ -749,6 +796,18 @@ def save_config(config: Dict[str, Any]): with open(config_path, 'w') as f: yaml.dump(config, f, default_flow_style=False, sort_keys=False) + # Append commented-out sections for features that are off by default + # or only relevant when explicitly configured. Skip sections the + # user has already uncommented and configured. + sections = [] + sec = config.get("security", {}) + if not sec or sec.get("redact_secrets") is None: + sections.append("security") + fb = config.get("fallback_model", {}) + if not fb or not (fb.get("provider") and fb.get("model")): + sections.append("fallback") + if sections: + f.write(_COMMENTED_SECTIONS) def load_env() -> Dict[str, str]: @@ -912,6 +971,31 @@ def show_config(): if enabled: print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%") print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}") + comp_provider = compression.get('summary_provider', 'auto') + if comp_provider != 'auto': + print(f" Provider: {comp_provider}") + + # Auxiliary models + auxiliary = config.get('auxiliary', {}) + aux_tasks = { + "Vision": auxiliary.get('vision', {}), + "Web extract": auxiliary.get('web_extract', {}), + } + has_overrides = any( + t.get('provider', 'auto') != 'auto' or t.get('model', '') + for t in aux_tasks.values() + ) + if has_overrides: + print() + print(color("◆ Auxiliary Models (overrides)", Colors.CYAN, Colors.BOLD)) + for label, task_cfg in aux_tasks.items(): + prov = task_cfg.get('provider', 'auto') + mdl = task_cfg.get('model', '') + if prov != 'auto' or mdl: + parts = [f"provider={prov}"] + if mdl: + parts.append(f"model={mdl}") + print(f" {label:12s} {', '.join(parts)}") # Messaging print() @@ -969,7 +1053,7 @@ def set_config_value(key: str, value: str): 'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN', 'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY', 'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN', - 'GITHUB_TOKEN', 'HONCHO_API_KEY', 'NOUS_API_KEY', 'WANDB_API_KEY', + 'GITHUB_TOKEN', 'HONCHO_API_KEY', 'WANDB_API_KEY', 'TINKER_API_KEY', ] diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index b89db974c1..64fe551bef 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -507,6 +507,12 @@ _PLATFORMS = [ "emoji": "📲", "token_var": "WHATSAPP_ENABLED", }, + { + "key": "signal", + "label": "Signal", + "emoji": "📡", + "token_var": "SIGNAL_HTTP_URL", + }, ] @@ -525,6 +531,13 @@ def _platform_status(platform: dict) -> str: return "configured + paired" return "enabled, not paired" return "not configured" + if platform.get("key") == "signal": + account = get_env_value("SIGNAL_ACCOUNT") + if val and account: + return "configured" + if val or account: + return "partially configured" + return "not configured" if val: return "configured" return "not configured" @@ -650,6 +663,121 @@ def _is_service_running() -> bool: return len(find_gateway_pids()) > 0 +def _setup_signal(): + """Interactive setup for Signal messenger.""" + import shutil + + print() + print(color(" ─── 📡 Signal Setup ───", Colors.CYAN)) + + existing_url = get_env_value("SIGNAL_HTTP_URL") + existing_account = get_env_value("SIGNAL_ACCOUNT") + if existing_url and existing_account: + print() + print_success("Signal is already configured.") + if not prompt_yes_no(" Reconfigure Signal?", False): + return + + # Check if signal-cli is available + print() + if shutil.which("signal-cli"): + print_success("signal-cli found on PATH.") + else: + print_warning("signal-cli not found on PATH.") + print_info(" Signal requires signal-cli running as an HTTP daemon.") + print_info(" Install options:") + print_info(" Linux: sudo apt install signal-cli") + print_info(" or download from https://github.com/AsamK/signal-cli") + print_info(" macOS: brew install signal-cli") + print_info(" Docker: bbernhard/signal-cli-rest-api") + print() + print_info(" After installing, link your account and start the daemon:") + print_info(" signal-cli link -n \"HermesAgent\"") + print_info(" signal-cli --account +YOURNUMBER daemon --http 127.0.0.1:8080") + print() + + # HTTP URL + print() + print_info(" Enter the URL where signal-cli HTTP daemon is running.") + default_url = existing_url or "http://127.0.0.1:8080" + try: + url = input(f" HTTP URL [{default_url}]: ").strip() or default_url + except (EOFError, KeyboardInterrupt): + print("\n Setup cancelled.") + return + + # Test connectivity + print_info(" Testing connection...") + try: + import httpx + resp = httpx.get(f"{url.rstrip('/')}/api/v1/check", timeout=10.0) + if resp.status_code == 200: + print_success(" signal-cli daemon is reachable!") + else: + print_warning(f" signal-cli responded with status {resp.status_code}.") + if not prompt_yes_no(" Continue anyway?", False): + return + except Exception as e: + print_warning(f" Could not reach signal-cli at {url}: {e}") + if not prompt_yes_no(" Save this URL anyway? (you can start signal-cli later)", True): + return + + save_env_value("SIGNAL_HTTP_URL", url) + + # Account phone number + print() + print_info(" Enter your Signal account phone number in E.164 format.") + print_info(" Example: +15551234567") + default_account = existing_account or "" + try: + account = input(f" Account number{f' [{default_account}]' if default_account else ''}: ").strip() + if not account: + account = default_account + except (EOFError, KeyboardInterrupt): + print("\n Setup cancelled.") + return + + if not account: + print_error(" Account number is required.") + return + + save_env_value("SIGNAL_ACCOUNT", account) + + # Allowed users + print() + print_info(" The gateway DENIES all users by default for security.") + print_info(" Enter phone numbers or UUIDs of allowed users (comma-separated).") + existing_allowed = get_env_value("SIGNAL_ALLOWED_USERS") or "" + default_allowed = existing_allowed or account + try: + allowed = input(f" Allowed users [{default_allowed}]: ").strip() or default_allowed + except (EOFError, KeyboardInterrupt): + print("\n Setup cancelled.") + return + + save_env_value("SIGNAL_ALLOWED_USERS", allowed) + + # Group messaging + print() + if prompt_yes_no(" Enable group messaging? (disabled by default for security)", False): + print() + print_info(" Enter group IDs to allow, or * for all groups.") + existing_groups = get_env_value("SIGNAL_GROUP_ALLOWED_USERS") or "" + try: + groups = input(f" Group IDs [{existing_groups or '*'}]: ").strip() or existing_groups or "*" + except (EOFError, KeyboardInterrupt): + print("\n Setup cancelled.") + return + save_env_value("SIGNAL_GROUP_ALLOWED_USERS", groups) + + print() + print_success("Signal configured!") + print_info(f" URL: {url}") + print_info(f" Account: {account}") + print_info(f" DM auth: via SIGNAL_ALLOWED_USERS + DM pairing") + print_info(f" Groups: {'enabled' if get_env_value('SIGNAL_GROUP_ALLOWED_USERS') else 'disabled'}") + + def gateway_setup(): """Interactive setup for messaging platforms + gateway service.""" @@ -702,6 +830,8 @@ def gateway_setup(): if platform["key"] == "whatsapp": _setup_whatsapp() + elif platform["key"] == "signal": + _setup_signal() else: _setup_standard_platform(platform) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 20f33998ad..c2e8bed40a 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -21,6 +21,7 @@ Usage: hermes version # Show version hermes update # Update to latest version hermes uninstall # Uninstall Hermes Agent + hermes sessions browse # Interactive session picker with search """ import argparse @@ -106,6 +107,279 @@ def _has_any_provider_configured() -> bool: return False +def _session_browse_picker(sessions: list) -> Optional[str]: + """Interactive curses-based session browser with live search filtering. + + Returns the selected session ID, or None if cancelled. + Uses curses (not simple_term_menu) to avoid the ghost-duplication rendering + bug in tmux/iTerm when arrow keys are used. + """ + if not sessions: + print("No sessions found.") + return None + + # Try curses-based picker first + try: + import curses + import time as _time + from datetime import datetime + + result_holder = [None] + + def _relative_time(ts): + if not ts: + return "?" + delta = _time.time() - ts + if delta < 60: + return "just now" + elif delta < 3600: + return f"{int(delta / 60)}m ago" + elif delta < 86400: + return f"{int(delta / 3600)}h ago" + elif delta < 172800: + return "yesterday" + elif delta < 604800: + return f"{int(delta / 86400)}d ago" + else: + return datetime.fromtimestamp(ts).strftime("%Y-%m-%d") + + def _format_row(s, max_x): + """Format a session row for display.""" + title = (s.get("title") or "").strip() + preview = (s.get("preview") or "").strip() + source = s.get("source", "")[:6] + last_active = _relative_time(s.get("last_active")) + sid = s["id"][:18] + + # Adaptive column widths based on terminal width + # Layout: [arrow 3] [title/preview flexible] [active 12] [src 6] [id 18] + fixed_cols = 3 + 12 + 6 + 18 + 6 # arrow + active + src + id + padding + name_width = max(20, max_x - fixed_cols) + + if title: + name = title[:name_width] + elif preview: + name = preview[:name_width] + else: + name = sid + + return f"{name:<{name_width}} {last_active:<10} {source:<5} {sid}" + + def _match(s, query): + """Check if a session matches the search query (case-insensitive).""" + q = query.lower() + return ( + q in (s.get("title") or "").lower() + or q in (s.get("preview") or "").lower() + or q in s.get("id", "").lower() + or q in (s.get("source") or "").lower() + ) + + def _curses_browse(stdscr): + curses.curs_set(0) + if curses.has_colors(): + curses.start_color() + curses.use_default_colors() + curses.init_pair(1, curses.COLOR_GREEN, -1) # selected + curses.init_pair(2, curses.COLOR_YELLOW, -1) # header + curses.init_pair(3, curses.COLOR_CYAN, -1) # search + curses.init_pair(4, 8, -1) # dim + + cursor = 0 + scroll_offset = 0 + search_text = "" + filtered = list(sessions) + + while True: + stdscr.clear() + max_y, max_x = stdscr.getmaxyx() + if max_y < 5 or max_x < 40: + # Terminal too small + try: + stdscr.addstr(0, 0, "Terminal too small") + except curses.error: + pass + stdscr.refresh() + stdscr.getch() + return + + # Header line + if search_text: + header = f" Browse sessions — filter: {search_text}█" + header_attr = curses.A_BOLD + if curses.has_colors(): + header_attr |= curses.color_pair(3) + else: + header = " Browse sessions — ↑↓ navigate Enter select Type to filter Esc quit" + header_attr = curses.A_BOLD + if curses.has_colors(): + header_attr |= curses.color_pair(2) + try: + stdscr.addnstr(0, 0, header, max_x - 1, header_attr) + except curses.error: + pass + + # Column header line + fixed_cols = 3 + 12 + 6 + 18 + 6 + name_width = max(20, max_x - fixed_cols) + col_header = f" {'Title / Preview':<{name_width}} {'Active':<10} {'Src':<5} {'ID'}" + try: + dim_attr = curses.color_pair(4) if curses.has_colors() else curses.A_DIM + stdscr.addnstr(1, 0, col_header, max_x - 1, dim_attr) + except curses.error: + pass + + # Compute visible area + visible_rows = max_y - 4 # header + col header + blank + footer + if visible_rows < 1: + visible_rows = 1 + + # Clamp cursor and scroll + if not filtered: + try: + msg = " No sessions match the filter." + stdscr.addnstr(3, 0, msg, max_x - 1, curses.A_DIM) + except curses.error: + pass + else: + if cursor >= len(filtered): + cursor = len(filtered) - 1 + if cursor < 0: + cursor = 0 + if cursor < scroll_offset: + scroll_offset = cursor + elif cursor >= scroll_offset + visible_rows: + scroll_offset = cursor - visible_rows + 1 + + for draw_i, i in enumerate(range( + scroll_offset, + min(len(filtered), scroll_offset + visible_rows) + )): + y = draw_i + 3 + if y >= max_y - 1: + break + s = filtered[i] + arrow = " → " if i == cursor else " " + row = arrow + _format_row(s, max_x - 3) + attr = curses.A_NORMAL + if i == cursor: + attr = curses.A_BOLD + if curses.has_colors(): + attr |= curses.color_pair(1) + try: + stdscr.addnstr(y, 0, row, max_x - 1, attr) + except curses.error: + pass + + # Footer + footer_y = max_y - 1 + if filtered: + footer = f" {cursor + 1}/{len(filtered)} sessions" + if len(filtered) < len(sessions): + footer += f" (filtered from {len(sessions)})" + else: + footer = f" 0/{len(sessions)} sessions" + try: + stdscr.addnstr(footer_y, 0, footer, max_x - 1, + curses.color_pair(4) if curses.has_colors() else curses.A_DIM) + except curses.error: + pass + + stdscr.refresh() + key = stdscr.getch() + + if key in (curses.KEY_UP, ): + if filtered: + cursor = (cursor - 1) % len(filtered) + elif key in (curses.KEY_DOWN, ): + if filtered: + cursor = (cursor + 1) % len(filtered) + elif key in (curses.KEY_ENTER, 10, 13): + if filtered: + result_holder[0] = filtered[cursor]["id"] + return + elif key == 27: # Esc + if search_text: + # First Esc clears the search + search_text = "" + filtered = list(sessions) + cursor = 0 + scroll_offset = 0 + else: + # Second Esc exits + return + elif key in (curses.KEY_BACKSPACE, 127, 8): + if search_text: + search_text = search_text[:-1] + if search_text: + filtered = [s for s in sessions if _match(s, search_text)] + else: + filtered = list(sessions) + cursor = 0 + scroll_offset = 0 + elif key == ord('q') and not search_text: + return + elif 32 <= key <= 126: + # Printable character → add to search filter + search_text += chr(key) + filtered = [s for s in sessions if _match(s, search_text)] + cursor = 0 + scroll_offset = 0 + + curses.wrapper(_curses_browse) + return result_holder[0] + + except Exception: + pass + + # Fallback: numbered list (Windows without curses, etc.) + import time as _time + from datetime import datetime + + def _relative_time_fb(ts): + if not ts: + return "?" + delta = _time.time() - ts + if delta < 60: + return "just now" + elif delta < 3600: + return f"{int(delta / 60)}m ago" + elif delta < 86400: + return f"{int(delta / 3600)}h ago" + elif delta < 172800: + return "yesterday" + elif delta < 604800: + return f"{int(delta / 86400)}d ago" + else: + return datetime.fromtimestamp(ts).strftime("%Y-%m-%d") + + print("\n Browse sessions (enter number to resume, q to cancel)\n") + for i, s in enumerate(sessions): + title = (s.get("title") or "").strip() + preview = (s.get("preview") or "").strip() + label = title or preview or s["id"] + if len(label) > 50: + label = label[:47] + "..." + last_active = _relative_time_fb(s.get("last_active")) + src = s.get("source", "")[:6] + print(f" {i + 1:>3}. {label:<50} {last_active:<10} {src}") + + while True: + try: + val = input(f"\n Select [1-{len(sessions)}]: ").strip() + if not val or val.lower() in ("q", "quit", "exit"): + return None + idx = int(val) - 1 + if 0 <= idx < len(sessions): + return sessions[idx]["id"] + print(f" Invalid selection. Enter 1-{len(sessions)} or q to cancel.") + except ValueError: + print(f" Invalid input. Enter a number or q to cancel.") + except (KeyboardInterrupt, EOFError): + print() + return None + + def _resolve_last_cli_session() -> Optional[str]: """Look up the most recent CLI session ID from SQLite. Returns None if unavailable.""" try: @@ -120,16 +394,63 @@ def _resolve_last_cli_session() -> Optional[str]: return None +def _resolve_session_by_name_or_id(name_or_id: str) -> Optional[str]: + """Resolve a session name (title) or ID to a session ID. + + - If it looks like a session ID (contains underscore + hex), try direct lookup first. + - Otherwise, treat it as a title and use resolve_session_by_title (auto-latest). + - Falls back to the other method if the first doesn't match. + """ + try: + from hermes_state import SessionDB + db = SessionDB() + + # Try as exact session ID first + session = db.get_session(name_or_id) + if session: + db.close() + return session["id"] + + # Try as title (with auto-latest for lineage) + session_id = db.resolve_session_by_title(name_or_id) + db.close() + return session_id + except Exception: + pass + return None + + def cmd_chat(args): """Run interactive chat CLI.""" - # Resolve --continue into --resume with the latest CLI session - if getattr(args, "continue_last", False) and not getattr(args, "resume", None): - last_id = _resolve_last_cli_session() - if last_id: - args.resume = last_id + # Resolve --continue into --resume with the latest CLI session or by name + continue_val = getattr(args, "continue_last", None) + if continue_val and not getattr(args, "resume", None): + if isinstance(continue_val, str): + # -c "session name" — resolve by title or ID + resolved = _resolve_session_by_name_or_id(continue_val) + if resolved: + args.resume = resolved + else: + print(f"No session found matching '{continue_val}'.") + print("Use 'hermes sessions list' to see available sessions.") + sys.exit(1) else: - print("No previous CLI session found to continue.") - sys.exit(1) + # -c with no argument — continue the most recent session + last_id = _resolve_last_cli_session() + if last_id: + args.resume = last_id + else: + print("No previous CLI session found to continue.") + sys.exit(1) + + # Resolve --resume by title if it's not a direct session ID + resume_val = getattr(args, "resume", None) + if resume_val: + resolved = _resolve_session_by_name_or_id(resume_val) + if resolved: + args.resume = resolved + # If resolution fails, keep the original value — _init_agent will + # report "Session not found" with the original input # First-run guard: check if any provider is configured before launching if not _has_any_provider_configured(): @@ -729,7 +1050,7 @@ def _model_flow_custom(config): cfg = load_config() model = cfg.get("model") if isinstance(model, dict): - model["provider"] = "auto" + model["provider"] = "custom" model["base_url"] = effective_url save_config(cfg) deactivate_provider() @@ -1209,8 +1530,9 @@ def main(): Examples: hermes Start interactive chat hermes chat -q "Hello" Single query mode - hermes --continue Resume the most recent session - hermes --resume Resume a specific session + hermes -c Resume the most recent session + hermes -c "my project" Resume a session by name (latest in lineage) + hermes --resume Resume a specific session by ID hermes setup Run setup wizard hermes logout Clear stored authentication hermes model Select default model @@ -1221,6 +1543,8 @@ Examples: hermes -w Start in isolated git worktree hermes gateway install Install as system service hermes sessions list List past sessions + hermes sessions browse Interactive session picker + hermes sessions rename ID T Rename/title a session hermes update Update to latest version For more help on a command: @@ -1235,16 +1559,18 @@ For more help on a command: ) parser.add_argument( "--resume", "-r", - metavar="SESSION_ID", + metavar="SESSION", default=None, - help="Resume a previous session by ID (shortcut for: hermes chat --resume ID)" + help="Resume a previous session by ID or title" ) parser.add_argument( "--continue", "-c", dest="continue_last", - action="store_true", - default=False, - help="Resume the most recent CLI session" + nargs="?", + const=True, + default=None, + metavar="SESSION_NAME", + help="Resume a session by name, or the most recent if no name given" ) parser.add_argument( "--worktree", "-w", @@ -1294,9 +1620,11 @@ For more help on a command: chat_parser.add_argument( "--continue", "-c", dest="continue_last", - action="store_true", - default=False, - help="Resume the most recent CLI session" + nargs="?", + const=True, + default=None, + metavar="SESSION_NAME", + help="Resume a session by name, or the most recent if no name given" ) chat_parser.add_argument( "--worktree", "-w", @@ -1671,7 +1999,7 @@ For more help on a command: # ========================================================================= sessions_parser = subparsers.add_parser( "sessions", - help="Manage session history (list, export, prune, delete)", + help="Manage session history (list, rename, export, prune, delete)", description="View and manage the SQLite session store" ) sessions_subparsers = sessions_parser.add_subparsers(dest="sessions_action") @@ -1696,6 +2024,17 @@ For more help on a command: sessions_stats = sessions_subparsers.add_parser("stats", help="Show session store statistics") + sessions_rename = sessions_subparsers.add_parser("rename", help="Set or change a session's title") + sessions_rename.add_argument("session_id", help="Session ID to rename") + sessions_rename.add_argument("title", nargs="+", help="New title for the session") + + sessions_browse = sessions_subparsers.add_parser( + "browse", + help="Interactive session picker — browse, search, and resume sessions", + ) + sessions_browse.add_argument("--source", help="Filter by source (cli, telegram, discord, etc.)") + sessions_browse.add_argument("--limit", type=int, default=50, help="Max sessions to load (default: 50)") + def cmd_sessions(args): import json as _json try: @@ -1708,18 +2047,51 @@ For more help on a command: action = args.sessions_action if action == "list": - sessions = db.search_sessions(source=args.source, limit=args.limit) + sessions = db.list_sessions_rich(source=args.source, limit=args.limit) if not sessions: print("No sessions found.") return - print(f"{'ID':<30} {'Source':<12} {'Model':<30} {'Messages':>8} {'Started'}") - print("─" * 100) from datetime import datetime + import time as _time + + def _relative_time(ts): + """Format a timestamp as relative time (e.g., '2h ago', 'yesterday').""" + if not ts: + return "?" + delta = _time.time() - ts + if delta < 60: + return "just now" + elif delta < 3600: + mins = int(delta / 60) + return f"{mins}m ago" + elif delta < 86400: + hours = int(delta / 3600) + return f"{hours}h ago" + elif delta < 172800: + return "yesterday" + elif delta < 604800: + days = int(delta / 86400) + return f"{days}d ago" + else: + return datetime.fromtimestamp(ts).strftime("%Y-%m-%d") + + has_titles = any(s.get("title") for s in sessions) + if has_titles: + print(f"{'Title':<22} {'Preview':<40} {'Last Active':<13} {'ID'}") + print("─" * 100) + else: + print(f"{'Preview':<50} {'Last Active':<13} {'Src':<6} {'ID'}") + print("─" * 90) for s in sessions: - started = datetime.fromtimestamp(s["started_at"]).strftime("%Y-%m-%d %H:%M") if s["started_at"] else "?" - model = (s.get("model") or "?")[:28] - ended = " (ended)" if s.get("ended_at") else "" - print(f"{s['id']:<30} {s['source']:<12} {model:<30} {s['message_count']:>8} {started}{ended}") + last_active = _relative_time(s.get("last_active")) + preview = s.get("preview", "")[:38] if has_titles else s.get("preview", "")[:48] + if has_titles: + title = (s.get("title") or "—")[:20] + sid = s["id"][:20] + print(f"{title:<22} {preview:<40} {last_active:<13} {sid}") + else: + sid = s["id"][:20] + print(f"{preview:<50} {last_active:<13} {s['source']:<6} {sid}") elif action == "export": if args.session_id: @@ -1759,6 +2131,44 @@ For more help on a command: count = db.prune_sessions(older_than_days=days, source=args.source) print(f"Pruned {count} session(s).") + elif action == "rename": + title = " ".join(args.title) + try: + if db.set_session_title(args.session_id, title): + print(f"Session '{args.session_id}' renamed to: {title}") + else: + print(f"Session '{args.session_id}' not found.") + except ValueError as e: + print(f"Error: {e}") + + elif action == "browse": + limit = getattr(args, "limit", 50) or 50 + source = getattr(args, "source", None) + sessions = db.list_sessions_rich(source=source, limit=limit) + db.close() + if not sessions: + print("No sessions found.") + return + + selected_id = _session_browse_picker(sessions) + if not selected_id: + print("Cancelled.") + return + + # Launch hermes --resume by replacing the current process + print(f"Resuming session: {selected_id}") + import shutil + hermes_bin = shutil.which("hermes") + if hermes_bin: + os.execvp(hermes_bin, ["hermes", "--resume", selected_id]) + else: + # Fallback: re-invoke via python -m + os.execvp( + sys.executable, + [sys.executable, "-m", "hermes_cli.main", "--resume", selected_id], + ) + return # won't reach here after execvp + elif action == "stats": total = db.session_count() msgs = db.message_count() @@ -1768,7 +2178,6 @@ For more help on a command: c = db.session_count(source=src) if c > 0: print(f" {src}: {c} sessions") - import os db_path = db.db_path if db_path.exists(): size_mb = os.path.getsize(db_path) / (1024 * 1024) @@ -1877,7 +2286,7 @@ For more help on a command: args.toolsets = None args.verbose = False args.resume = None - args.continue_last = False + args.continue_last = None if not hasattr(args, "worktree"): args.worktree = False cmd_chat(args) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 723f226ead..1fdde0900c 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -63,7 +63,7 @@ _PROVIDER_LABELS = { "kimi-coding": "Kimi / Moonshot", "minimax": "MiniMax", "minimax-cn": "MiniMax (China)", - "custom": "custom endpoint", + "custom": "Custom endpoint", } _PROVIDER_ALIASES = { diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 43a0cd6d9b..c10caec9b0 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -632,6 +632,29 @@ def setup_model_provider(config: dict): save_env_value("OPENAI_BASE_URL", "") save_env_value("OPENAI_API_KEY", "") + # Update config.yaml and deactivate any OAuth provider so the + # resolver doesn't keep returning the old provider (e.g. Codex). + try: + from hermes_cli.auth import deactivate_provider + deactivate_provider() + except Exception: + pass + import yaml + config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml" + try: + disk_cfg = {} + if config_path.exists(): + disk_cfg = yaml.safe_load(config_path.read_text()) or {} + model_section = disk_cfg.get("model", {}) + if isinstance(model_section, str): + model_section = {"default": model_section} + model_section["provider"] = "openrouter" + model_section.pop("base_url", None) # OpenRouter uses default URL + disk_cfg["model"] = model_section + config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False)) + except Exception as e: + logger.debug("Could not save provider to config.yaml: %s", e) + elif provider_idx == 3: # Custom endpoint selected_provider = "custom" print() @@ -659,6 +682,28 @@ def setup_model_provider(config: dict): if model_name: config['model'] = model_name save_env_value("LLM_MODEL", model_name) + + # Save provider and base_url to config.yaml so the gateway and CLI + # both resolve the correct provider without relying on env-var heuristics. + if base_url: + import yaml + config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml" + try: + disk_cfg = {} + if config_path.exists(): + disk_cfg = yaml.safe_load(config_path.read_text()) or {} + model_section = disk_cfg.get("model", {}) + if isinstance(model_section, str): + model_section = {"default": model_section} + model_section["provider"] = "custom" + model_section["base_url"] = base_url.rstrip("/") + if model_name: + model_section["default"] = model_name + disk_cfg["model"] = model_section + config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False)) + except Exception as e: + logger.debug("Could not save provider to config.yaml: %s", e) + print_success("Custom endpoint configured") elif provider_idx == 4: # Z.AI / GLM @@ -870,8 +915,8 @@ def setup_model_provider(config: dict): config['model'] = custom save_env_value("LLM_MODEL", custom) elif selected_provider == "openai-codex": - from hermes_cli.codex_models import get_codex_models - codex_models = get_codex_models() + from hermes_cli.codex_models import get_codex_model_ids + codex_models = get_codex_model_ids() model_choices = codex_models + [f"Keep current ({current_model})"] default_codex = 0 if current_model in codex_models: @@ -1264,7 +1309,7 @@ def setup_agent_settings(config: dict): # ── Max Iterations ── print_header("Agent Settings") - current_max = get_env_value('HERMES_MAX_ITERATIONS') or '60' + current_max = get_env_value('HERMES_MAX_ITERATIONS') or '90' print_info("Maximum tool-calling iterations per conversation.") print_info("Higher = more complex tasks, but costs more tokens.") print_info("Recommended: 30-60 for most tasks, 100+ for open exploration.") @@ -1660,14 +1705,18 @@ def setup_gateway(config: dict): # Section 5: Tool Configuration (delegates to unified tools_config.py) # ============================================================================= -def setup_tools(config: dict): +def setup_tools(config: dict, first_install: bool = False): """Configure tools — delegates to the unified tools_command() in tools_config.py. Both `hermes setup tools` and `hermes tools` use the same flow: platform selection → toolset toggles → provider/API key configuration. + + Args: + first_install: When True, uses the simplified first-install flow + (no platform menu, prompts for all unconfigured API keys). """ from hermes_cli.tools_config import tools_command - tools_command() + tools_command(first_install=first_install, config=config) # ============================================================================= @@ -1820,7 +1869,7 @@ def run_setup_wizard(args): setup_gateway(config) # Section 5: Tools - setup_tools(config) + setup_tools(config, first_install=not is_existing) # Save and show summary save_config(config) diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index 32a0bab1ba..8b72fe4f46 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -408,10 +408,11 @@ def do_inspect(identifier: str, console: Optional[Console] = None) -> None: def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None: """List installed skills, distinguishing builtins from hub-installed.""" - from tools.skills_hub import HubLockFile, SKILLS_DIR + from tools.skills_hub import HubLockFile, ensure_hub_dirs from tools.skills_tool import _find_all_skills c = console or _console + ensure_hub_dirs() lock = HubLockFile() hub_installed = {e["name"]: e for e in lock.list_installed()} diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 3ee666b481..12b064fea6 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -206,6 +206,8 @@ def show_status(args): "Telegram": ("TELEGRAM_BOT_TOKEN", "TELEGRAM_HOME_CHANNEL"), "Discord": ("DISCORD_BOT_TOKEN", "DISCORD_HOME_CHANNEL"), "WhatsApp": ("WHATSAPP_ENABLED", None), + "Signal": ("SIGNAL_HTTP_URL", "SIGNAL_HOME_CHANNEL"), + "Slack": ("SLACK_BOT_TOKEN", None), } for name, (token_var, home_var) in platforms.items(): diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 7fe88691e5..19288bf59f 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -96,6 +96,11 @@ CONFIGURABLE_TOOLSETS = [ ("homeassistant", "🏠 Home Assistant", "smart home device control"), ] +# Toolsets that are OFF by default for new installs. +# They're still in _HERMES_CORE_TOOLS (available at runtime if enabled), +# but the setup checklist won't pre-select them for first-time users. +_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl"} + # Platform display config PLATFORMS = { "cli": {"label": "🖥️ CLI", "default_toolset": "hermes-cli"}, @@ -142,6 +147,8 @@ TOOL_CATEGORIES = { }, "web": { "name": "Web Search & Extract", + "setup_title": "Select Search Provider", + "setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need Firecrawl.", "icon": "🔍", "providers": [ { @@ -595,11 +602,18 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict): print(color(f" --- {icon} {name} ({provider['name']}) ---", Colors.CYAN)) if provider.get("tag"): _print_info(f" {provider['tag']}") + # For single-provider tools, show a note if available + if cat.get("setup_note"): + _print_info(f" {cat['setup_note']}") _configure_provider(provider, config) else: # Multiple providers - let user choose print() - print(color(f" --- {icon} {name} - Choose a provider ---", Colors.CYAN)) + # Use custom title if provided (e.g. "Select Search Provider") + title = cat.get("setup_title", f"Choose a provider") + print(color(f" --- {icon} {name} - {title} ---", Colors.CYAN)) + if cat.get("setup_note"): + _print_info(f" {cat['setup_note']}") print() # Plain text labels only (no ANSI codes in menu items) @@ -617,6 +631,9 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict): configured = " [configured]" provider_choices.append(f"{p['name']}{tag}{configured}") + # Add skip option + provider_choices.append("Skip — keep defaults / configure later") + # Detect current provider as default default_idx = 0 for i, p in enumerate(providers): @@ -628,7 +645,13 @@ def _configure_tool_category(ts_key: str, cat: dict, config: dict): default_idx = i break - provider_idx = _prompt_choice(" Select provider:", provider_choices, default_idx) + provider_idx = _prompt_choice(f" {title}:", provider_choices, default_idx) + + # Skip selected + if provider_idx >= len(providers): + _print_info(f" Skipped {name}") + return + _configure_provider(providers[provider_idx], config) @@ -835,9 +858,19 @@ def _reconfigure_simple_requirements(ts_key: str): # ─── Main Entry Point ───────────────────────────────────────────────────────── -def tools_command(args=None): - """Entry point for `hermes tools` and `hermes setup tools`.""" - config = load_config() +def tools_command(args=None, first_install: bool = False, config: dict = None): + """Entry point for `hermes tools` and `hermes setup tools`. + + Args: + first_install: When True (set by the setup wizard on fresh installs), + skip the platform menu, go straight to the CLI checklist, and + prompt for API keys on all enabled tools that need them. + config: Optional config dict to use. When called from the setup + wizard, the wizard passes its own dict so that platform_toolsets + are written into it and survive the wizard's final save_config(). + """ + if config is None: + config = load_config() enabled_platforms = _get_enabled_platforms() print() @@ -846,6 +879,57 @@ def tools_command(args=None): print(color(" Tools that need API keys will be configured when enabled.", Colors.DIM)) print() + # ── First-time install: linear flow, no platform menu ── + if first_install: + for pkey in enabled_platforms: + pinfo = PLATFORMS[pkey] + current_enabled = _get_platform_tools(config, pkey) + + # Uncheck toolsets that should be off by default + checklist_preselected = current_enabled - _DEFAULT_OFF_TOOLSETS + + # Show checklist + new_enabled = _prompt_toolset_checklist(pinfo["label"], checklist_preselected) + + added = new_enabled - current_enabled + removed = current_enabled - new_enabled + if added: + for ts in sorted(added): + label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts), ts) + print(color(f" + {label}", Colors.GREEN)) + if removed: + for ts in sorted(removed): + label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts), ts) + print(color(f" - {label}", Colors.RED)) + + # Walk through ALL selected tools that have provider options or + # need API keys. This ensures browser (Local vs Browserbase), + # TTS (Edge vs OpenAI vs ElevenLabs), etc. are shown even when + # a free provider exists. + to_configure = [ + ts_key for ts_key in sorted(new_enabled) + if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key) + ] + + if to_configure: + print() + print(color(f" Configuring {len(to_configure)} tool(s):", Colors.YELLOW)) + for ts_key in to_configure: + label = next((l for k, l, _ in CONFIGURABLE_TOOLSETS if k == ts_key), ts_key) + print(color(f" • {label}", Colors.DIM)) + print(color(" You can skip any tool you don't need right now.", Colors.DIM)) + print() + for ts_key in to_configure: + _configure_toolset(ts_key, config) + + _save_platform_tools(config, pkey, new_enabled) + save_config(config) + print(color(f" ✓ Saved {pinfo['label']} tool configuration", Colors.GREEN)) + print() + + return + + # ── Returning user: platform menu loop ── # Build platform choices platform_choices = [] platform_keys = [] @@ -896,11 +980,10 @@ def tools_command(args=None): print(color(f" - {label}", Colors.RED)) # Configure newly enabled toolsets that need API keys - if added: - for ts_key in sorted(added): - if TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key): - if not _toolset_has_keys(ts_key): - _configure_toolset(ts_key, config) + for ts_key in sorted(added): + if (TOOL_CATEGORIES.get(ts_key) or TOOLSET_ENV_REQUIREMENTS.get(ts_key)): + if not _toolset_has_keys(ts_key): + _configure_toolset(ts_key, config) _save_platform_tools(config, pkey, new_enabled) save_config(config) diff --git a/hermes_state.py b/hermes_state.py index 1d1f951c0a..67b4484e73 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -24,7 +24,7 @@ from typing import Dict, Any, List, Optional DEFAULT_DB_PATH = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "state.db" -SCHEMA_VERSION = 2 +SCHEMA_VERSION = 4 SCHEMA_SQL = """ CREATE TABLE IF NOT EXISTS schema_version ( @@ -46,6 +46,7 @@ CREATE TABLE IF NOT EXISTS sessions ( tool_call_count INTEGER DEFAULT 0, input_tokens INTEGER DEFAULT 0, output_tokens INTEGER DEFAULT 0, + title TEXT, FOREIGN KEY (parent_session_id) REFERENCES sessions(id) ); @@ -133,7 +134,33 @@ class SessionDB: except sqlite3.OperationalError: pass # Column already exists cursor.execute("UPDATE schema_version SET version = 2") + if current_version < 3: + # v3: add title column to sessions + try: + cursor.execute("ALTER TABLE sessions ADD COLUMN title TEXT") + except sqlite3.OperationalError: + pass # Column already exists + cursor.execute("UPDATE schema_version SET version = 3") + if current_version < 4: + # v4: add unique index on title (NULLs allowed, only non-NULL must be unique) + try: + cursor.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique " + "ON sessions(title) WHERE title IS NOT NULL" + ) + except sqlite3.OperationalError: + pass # Index already exists + cursor.execute("UPDATE schema_version SET version = 4") + # Unique title index — always ensure it exists (safe to run after migrations + # since the title column is guaranteed to exist at this point) + try: + cursor.execute( + "CREATE UNIQUE INDEX IF NOT EXISTS idx_sessions_title_unique " + "ON sessions(title) WHERE title IS NOT NULL" + ) + except sqlite3.OperationalError: + pass # Index already exists # FTS5 setup (separate because CREATE VIRTUAL TABLE can't be in executescript with IF NOT EXISTS reliably) try: @@ -219,6 +246,210 @@ class SessionDB: row = cursor.fetchone() return dict(row) if row else None + # Maximum length for session titles + MAX_TITLE_LENGTH = 100 + + @staticmethod + def sanitize_title(title: Optional[str]) -> Optional[str]: + """Validate and sanitize a session title. + + - Strips leading/trailing whitespace + - Removes ASCII control characters (0x00-0x1F, 0x7F) and problematic + Unicode control chars (zero-width, RTL/LTR overrides, etc.) + - Collapses internal whitespace runs to single spaces + - Normalizes empty/whitespace-only strings to None + - Enforces MAX_TITLE_LENGTH + + Returns the cleaned title string or None. + Raises ValueError if the title exceeds MAX_TITLE_LENGTH after cleaning. + """ + if not title: + return None + + import re + + # Remove ASCII control characters (0x00-0x1F, 0x7F) but keep + # whitespace chars (\t=0x09, \n=0x0A, \r=0x0D) so they can be + # normalized to spaces by the whitespace collapsing step below + cleaned = re.sub(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f]', '', title) + + # Remove problematic Unicode control characters: + # - Zero-width chars (U+200B-U+200F, U+FEFF) + # - Directional overrides (U+202A-U+202E, U+2066-U+2069) + # - Object replacement (U+FFFC), interlinear annotation (U+FFF9-U+FFFB) + cleaned = re.sub( + r'[\u200b-\u200f\u2028-\u202e\u2060-\u2069\ufeff\ufffc\ufff9-\ufffb]', + '', cleaned, + ) + + # Collapse internal whitespace runs and strip + cleaned = re.sub(r'\s+', ' ', cleaned).strip() + + if not cleaned: + return None + + if len(cleaned) > SessionDB.MAX_TITLE_LENGTH: + raise ValueError( + f"Title too long ({len(cleaned)} chars, max {SessionDB.MAX_TITLE_LENGTH})" + ) + + return cleaned + + def set_session_title(self, session_id: str, title: str) -> bool: + """Set or update a session's title. + + Returns True if session was found and title was set. + Raises ValueError if title is already in use by another session, + or if the title fails validation (too long, invalid characters). + Empty/whitespace-only strings are normalized to None (clearing the title). + """ + title = self.sanitize_title(title) + if title: + # Check uniqueness (allow the same session to keep its own title) + cursor = self._conn.execute( + "SELECT id FROM sessions WHERE title = ? AND id != ?", + (title, session_id), + ) + conflict = cursor.fetchone() + if conflict: + raise ValueError( + f"Title '{title}' is already in use by session {conflict['id']}" + ) + cursor = self._conn.execute( + "UPDATE sessions SET title = ? WHERE id = ?", + (title, session_id), + ) + self._conn.commit() + return cursor.rowcount > 0 + + def get_session_title(self, session_id: str) -> Optional[str]: + """Get the title for a session, or None.""" + cursor = self._conn.execute( + "SELECT title FROM sessions WHERE id = ?", (session_id,) + ) + row = cursor.fetchone() + return row["title"] if row else None + + def get_session_by_title(self, title: str) -> Optional[Dict[str, Any]]: + """Look up a session by exact title. Returns session dict or None.""" + cursor = self._conn.execute( + "SELECT * FROM sessions WHERE title = ?", (title,) + ) + row = cursor.fetchone() + return dict(row) if row else None + + def resolve_session_by_title(self, title: str) -> Optional[str]: + """Resolve a title to a session ID, preferring the latest in a lineage. + + If the exact title exists, returns that session's ID. + If not, searches for "title #N" variants and returns the latest one. + If the exact title exists AND numbered variants exist, returns the + latest numbered variant (the most recent continuation). + """ + # First try exact match + exact = self.get_session_by_title(title) + + # Also search for numbered variants: "title #2", "title #3", etc. + # Escape SQL LIKE wildcards (%, _) in the title to prevent false matches + escaped = title.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + cursor = self._conn.execute( + "SELECT id, title, started_at FROM sessions " + "WHERE title LIKE ? ESCAPE '\\' ORDER BY started_at DESC", + (f"{escaped} #%",), + ) + numbered = cursor.fetchall() + + if numbered: + # Return the most recent numbered variant + return numbered[0]["id"] + elif exact: + return exact["id"] + return None + + def get_next_title_in_lineage(self, base_title: str) -> str: + """Generate the next title in a lineage (e.g., "my session" → "my session #2"). + + Strips any existing " #N" suffix to find the base name, then finds + the highest existing number and increments. + """ + import re + # Strip existing #N suffix to find the true base + match = re.match(r'^(.*?) #(\d+)$', base_title) + if match: + base = match.group(1) + else: + base = base_title + + # Find all existing numbered variants + # Escape SQL LIKE wildcards (%, _) in the base to prevent false matches + escaped = base.replace("\\", "\\\\").replace("%", "\\%").replace("_", "\\_") + cursor = self._conn.execute( + "SELECT title FROM sessions WHERE title = ? OR title LIKE ? ESCAPE '\\'", + (base, f"{escaped} #%"), + ) + existing = [row["title"] for row in cursor.fetchall()] + + if not existing: + return base # No conflict, use the base name as-is + + # Find the highest number + max_num = 1 # The unnumbered original counts as #1 + for t in existing: + m = re.match(r'^.* #(\d+)$', t) + if m: + max_num = max(max_num, int(m.group(1))) + + return f"{base} #{max_num + 1}" + + def list_sessions_rich( + self, + source: str = None, + limit: int = 20, + offset: int = 0, + ) -> List[Dict[str, Any]]: + """List sessions with preview (first user message) and last active timestamp. + + Returns dicts with keys: id, source, model, title, started_at, ended_at, + message_count, preview (first 60 chars of first user message), + last_active (timestamp of last message). + + Uses a single query with correlated subqueries instead of N+2 queries. + """ + source_clause = "WHERE s.source = ?" if source else "" + query = f""" + SELECT s.*, + COALESCE( + (SELECT SUBSTR(REPLACE(REPLACE(m.content, X'0A', ' '), X'0D', ' '), 1, 63) + FROM messages m + WHERE m.session_id = s.id AND m.role = 'user' AND m.content IS NOT NULL + ORDER BY m.timestamp, m.id LIMIT 1), + '' + ) AS _preview_raw, + COALESCE( + (SELECT MAX(m2.timestamp) FROM messages m2 WHERE m2.session_id = s.id), + s.started_at + ) AS last_active + FROM sessions s + {source_clause} + ORDER BY s.started_at DESC + LIMIT ? OFFSET ? + """ + params = (source, limit, offset) if source else (limit, offset) + cursor = self._conn.execute(query, params) + sessions = [] + for row in cursor.fetchall(): + s = dict(row) + # Build the preview from the raw substring + raw = s.pop("_preview_raw", "").strip() + if raw: + text = raw[:60] + s["preview"] = text + ("..." if len(raw) > 60 else "") + else: + s["preview"] = "" + sessions.append(s) + + return sessions + # ========================================================================= # Message storage # ========================================================================= diff --git a/optional-skills/blockchain/solana/SKILL.md b/optional-skills/blockchain/solana/SKILL.md new file mode 100644 index 0000000000..59b988392a --- /dev/null +++ b/optional-skills/blockchain/solana/SKILL.md @@ -0,0 +1,207 @@ +--- +name: solana +description: Query Solana blockchain data with USD pricing — wallet balances, token portfolios with values, transaction details, NFTs, whale detection, and live network stats. Uses Solana RPC + CoinGecko. No API key required. +version: 0.2.0 +author: Deniz Alagoz (gizdusum), enhanced by Hermes Agent +license: MIT +metadata: + hermes: + tags: [Solana, Blockchain, Crypto, Web3, RPC, DeFi, NFT] + related_skills: [] +--- + +# Solana Blockchain Skill + +Query Solana on-chain data enriched with USD pricing via CoinGecko. +8 commands: wallet portfolio, token info, transactions, activity, NFTs, +whale detection, network stats, and price lookup. + +No API key needed. Uses only Python standard library (urllib, json, argparse). + +--- + +## When to Use + +- User asks for a Solana wallet balance, token holdings, or portfolio value +- User wants to inspect a specific transaction by signature +- User wants SPL token metadata, price, supply, or top holders +- User wants recent transaction history for an address +- User wants NFTs owned by a wallet +- User wants to find large SOL transfers (whale detection) +- User wants Solana network health, TPS, epoch, or SOL price +- User asks "what's the price of BONK/JUP/SOL?" + +--- + +## Prerequisites + +The helper script uses only Python standard library (urllib, json, argparse). +No external packages required. + +Pricing data comes from CoinGecko's free API (no key needed, rate-limited +to ~10-30 requests/minute). For faster lookups, use `--no-prices` flag. + +--- + +## Quick Reference + +RPC endpoint (default): https://api.mainnet-beta.solana.com +Override: export SOLANA_RPC_URL=https://your-private-rpc.com + +Helper script path: ~/.hermes/skills/blockchain/solana/scripts/solana_client.py + +``` +python3 solana_client.py wallet
[--limit N] [--all] [--no-prices] +python3 solana_client.py tx +python3 solana_client.py token +python3 solana_client.py activity
[--limit N] +python3 solana_client.py nft
+python3 solana_client.py whales [--min-sol N] +python3 solana_client.py stats +python3 solana_client.py price +``` + +--- + +## Procedure + +### 0. Setup Check + +```bash +python3 --version + +# Optional: set a private RPC for better rate limits +export SOLANA_RPC_URL="https://api.mainnet-beta.solana.com" + +# Confirm connectivity +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py stats +``` + +### 1. Wallet Portfolio + +Get SOL balance, SPL token holdings with USD values, NFT count, and +portfolio total. Tokens sorted by value, dust filtered, known tokens +labeled by name (BONK, JUP, USDC, etc.). + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \ + wallet 9WzDXwBbmkg8ZTbNMqUxvQRAyrZzDsGYdLVL9zYtAWWM +``` + +Flags: +- `--limit N` — show top N tokens (default: 20) +- `--all` — show all tokens, no dust filter, no limit +- `--no-prices` — skip CoinGecko price lookups (faster, RPC-only) + +Output includes: SOL balance + USD value, token list with prices sorted +by value, dust count, NFT summary, total portfolio value in USD. + +### 2. Transaction Details + +Inspect a full transaction by its base58 signature. Shows balance changes +in both SOL and USD. + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \ + tx 5j7s8K...your_signature_here +``` + +Output: slot, timestamp, fee, status, balance changes (SOL + USD), +program invocations. + +### 3. Token Info + +Get SPL token metadata, current price, market cap, supply, decimals, +mint/freeze authorities, and top 5 holders. + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \ + token DezXAZ8z7PnrnRJjz3wXBoRgixCa6xjnB7YaB1pPB263 +``` + +Output: name, symbol, decimals, supply, price, market cap, top 5 +holders with percentages. + +### 4. Recent Activity + +List recent transactions for an address (default: last 10, max: 25). + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \ + activity 9WzDXwBbmkg8ZTbNMqUxvQRAyrZzDsGYdLVL9zYtAWWM --limit 25 +``` + +### 5. NFT Portfolio + +List NFTs owned by a wallet (heuristic: SPL tokens with amount=1, decimals=0). + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \ + nft 9WzDXwBbmkg8ZTbNMqUxvQRAyrZzDsGYdLVL9zYtAWWM +``` + +Note: Compressed NFTs (cNFTs) are not detected by this heuristic. + +### 6. Whale Detector + +Scan the most recent block for large SOL transfers with USD values. + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py \ + whales --min-sol 500 +``` + +Note: scans the latest block only — point-in-time snapshot, not historical. + +### 7. Network Stats + +Live Solana network health: current slot, epoch, TPS, supply, validator +version, SOL price, and market cap. + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py stats +``` + +### 8. Price Lookup + +Quick price check for any token by mint address or known symbol. + +```bash +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price BONK +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price JUP +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price SOL +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py price DezXAZ8z7PnrnRJjz3wXBoRgixCa6xjnB7YaB1pPB263 +``` + +Known symbols: SOL, USDC, USDT, BONK, JUP, WETH, JTO, mSOL, stSOL, +PYTH, HNT, RNDR, WEN, W, TNSR, DRIFT, bSOL, JLP, WIF, MEW, BOME, PENGU. + +--- + +## Pitfalls + +- **CoinGecko rate-limits** — free tier allows ~10-30 requests/minute. + Price lookups use 1 request per token. Wallets with many tokens may + not get prices for all of them. Use `--no-prices` for speed. +- **Public RPC rate-limits** — Solana mainnet public RPC limits requests. + For production use, set SOLANA_RPC_URL to a private endpoint + (Helius, QuickNode, Triton). +- **NFT detection is heuristic** — amount=1 + decimals=0. Compressed + NFTs (cNFTs) and Token-2022 NFTs won't appear. +- **Whale detector scans latest block only** — not historical. Results + vary by the moment you query. +- **Transaction history** — public RPC keeps ~2 days. Older transactions + may not be available. +- **Token names** — ~25 well-known tokens are labeled by name. Others + show abbreviated mint addresses. Use the `token` command for full info. +- **Retry on 429** — both RPC and CoinGecko calls retry up to 2 times + with exponential backoff on rate-limit errors. + +--- + +## Verification + +```bash +# Should print current Solana slot, TPS, and SOL price +python3 ~/.hermes/skills/blockchain/solana/scripts/solana_client.py stats +``` diff --git a/optional-skills/blockchain/solana/scripts/solana_client.py b/optional-skills/blockchain/solana/scripts/solana_client.py new file mode 100644 index 0000000000..7a1cc91e44 --- /dev/null +++ b/optional-skills/blockchain/solana/scripts/solana_client.py @@ -0,0 +1,698 @@ +#!/usr/bin/env python3 +""" +Solana Blockchain CLI Tool for Hermes Agent +-------------------------------------------- +Queries the Solana JSON-RPC API and CoinGecko for enriched on-chain data. +Uses only Python standard library — no external packages required. + +Usage: + python3 solana_client.py stats + python3 solana_client.py wallet
[--limit N] [--all] [--no-prices] + python3 solana_client.py tx + python3 solana_client.py token + python3 solana_client.py activity
[--limit N] + python3 solana_client.py nft
+ python3 solana_client.py whales [--min-sol N] + python3 solana_client.py price + +Environment: + SOLANA_RPC_URL Override the default RPC endpoint (default: mainnet-beta public) +""" + +import argparse +import json +import os +import sys +import time +import urllib.request +import urllib.error +from typing import Any, Dict, List, Optional + +RPC_URL = os.environ.get( + "SOLANA_RPC_URL", + "https://api.mainnet-beta.solana.com", +) + +LAMPORTS_PER_SOL = 1_000_000_000 + +# Well-known Solana token names — avoids API calls for common tokens. +# Maps mint address → (symbol, name). +KNOWN_TOKENS: Dict[str, tuple] = { + "So11111111111111111111111111111111111111112": ("SOL", "Solana"), + "EPjFWdd5AufqSSqeM2qN1xzybapC8G4wEGGkZwyTDt1v": ("USDC", "USD Coin"), + "Es9vMFrzaCERmJfrF4H2FYD4KCoNkY11McCe8BenwNYB": ("USDT", "Tether"), + "DezXAZ8z7PnrnRJjz3wXBoRgixCa6xjnB7YaB1pPB263": ("BONK", "Bonk"), + "JUPyiwrYJFskUPiHa7hkeR8VUtAeFoSYbKedZNsDvCN": ("JUP", "Jupiter"), + "7vfCXTUXx5WJV5JADk17DUJ4ksgau7utNKj4b963voxs": ("WETH", "Wrapped Ether"), + "jtojtomepa8beP8AuQc6eXt5FriJwfFMwQx2v2f9mCL": ("JTO", "Jito"), + "mSoLzYCxHdYgdzU16g5QSh3i5K3z3KZK7ytfqcJm7So": ("mSOL", "Marinade Staked SOL"), + "7dHbWXmci3dT8UFYWYZweBLXgycu7Y3iL6trKn1Y7ARj": ("stSOL", "Lido Staked SOL"), + "HZ1JovNiVvGrGNiiYvEozEVgZ58xaU3RKwX8eACQBCt3": ("PYTH", "Pyth Network"), + "RLBxxFkseAZ4RgJH3Sqn8jXxhmGoz9jWxDNJMh8pL7a": ("RLBB", "Rollbit"), + "hntyVP6YFm1Hg25TN9WGLqM12b8TQmcknKrdu1oxWux": ("HNT", "Helium"), + "rndrizKT3MK1iimdxRdWabcF7Zg7AR5T4nud4EkHBof": ("RNDR", "Render"), + "WENWENvqqNya429ubCdR81ZmD69brwQaaBYY6p91oHQQ": ("WEN", "Wen"), + "85VBFQZC9TZkfaptBWjvUw7YbZjy52A6mjtPGjstQAmQ": ("W", "Wormhole"), + "TNSRxcUxoT9xBG3de7PiJyTDYu7kskLqcpddxnEJAS6": ("TNSR", "Tensor"), + "DriFtupJYLTosbwoN8koMbEYSx54aFAVLddWsbksjwg7": ("DRIFT", "Drift"), + "bSo13r4TkiE4KumL71LsHTPpL2euBYLFx6h9HP3piy1": ("bSOL", "BlazeStake Staked SOL"), + "27G8MtK7VtTcCHkpASjSDdkWWYfoqT6ggEuKidVJidD4": ("JLP", "Jupiter LP"), + "EKpQGSJtjMFqKZ9KQanSqYXRcF8fBopzLHYxdM65zcjm": ("WIF", "dogwifhat"), + "MEW1gQWJ3nEXg2qgERiKu7FAFj79PHvQVREQUzScPP5": ("MEW", "cat in a dogs world"), + "ukHH6c7mMyiWCf1b9pnWe25TSpkDDt3H5pQZgZ74J82": ("BOME", "Book of Meme"), + "A8C3xuqscfmyLrte3VwJvtPHXvcSN3FjDbUaSMAkQrCS": ("PENGU", "Pudgy Penguins"), +} + +# Reverse lookup: symbol → mint (for the `price` command). +_SYMBOL_TO_MINT = {v[0].upper(): k for k, v in KNOWN_TOKENS.items()} + + +# --------------------------------------------------------------------------- +# HTTP / RPC helpers +# --------------------------------------------------------------------------- + +def _http_get_json(url: str, timeout: int = 10, retries: int = 2) -> Any: + """GET JSON from a URL with retry on 429 rate-limit. Returns parsed JSON or None.""" + for attempt in range(retries + 1): + req = urllib.request.Request( + url, headers={"Accept": "application/json", "User-Agent": "HermesAgent/1.0"}, + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.load(resp) + except urllib.error.HTTPError as exc: + if exc.code == 429 and attempt < retries: + time.sleep(2.0 * (attempt + 1)) + continue + return None + except Exception: + return None + return None + + +def _rpc_call(method: str, params: list = None, retries: int = 2) -> Any: + """Send a JSON-RPC request with retry on 429 rate-limit.""" + payload = json.dumps({ + "jsonrpc": "2.0", "id": 1, + "method": method, "params": params or [], + }).encode() + + for attempt in range(retries + 1): + req = urllib.request.Request( + RPC_URL, data=payload, + headers={"Content-Type": "application/json"}, method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=20) as resp: + body = json.load(resp) + if "error" in body: + err = body["error"] + # Rate-limit: retry after delay + if isinstance(err, dict) and err.get("code") == 429: + if attempt < retries: + time.sleep(1.5 * (attempt + 1)) + continue + sys.exit(f"RPC error: {err}") + return body.get("result") + except urllib.error.HTTPError as exc: + if exc.code == 429 and attempt < retries: + time.sleep(1.5 * (attempt + 1)) + continue + sys.exit(f"RPC HTTP error: {exc}") + except urllib.error.URLError as exc: + sys.exit(f"RPC connection error: {exc}") + return None + + +# Keep backward compat — the rest of the code uses `rpc()`. +rpc = _rpc_call + + +def rpc_batch(calls: list) -> list: + """Send a batch of JSON-RPC requests (with retry on 429).""" + payload = json.dumps([ + {"jsonrpc": "2.0", "id": i, "method": c["method"], "params": c.get("params", [])} + for i, c in enumerate(calls) + ]).encode() + + for attempt in range(3): + req = urllib.request.Request( + RPC_URL, data=payload, + headers={"Content-Type": "application/json"}, method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=20) as resp: + return json.load(resp) + except urllib.error.HTTPError as exc: + if exc.code == 429 and attempt < 2: + time.sleep(1.5 * (attempt + 1)) + continue + sys.exit(f"RPC batch HTTP error: {exc}") + except urllib.error.URLError as exc: + sys.exit(f"RPC batch error: {exc}") + return [] + + +def lamports_to_sol(lamports: int) -> float: + return lamports / LAMPORTS_PER_SOL + + +def print_json(obj: Any) -> None: + print(json.dumps(obj, indent=2)) + + +def _short_mint(mint: str) -> str: + """Abbreviate a mint address for display: first 4 + last 4.""" + if len(mint) <= 12: + return mint + return f"{mint[:4]}...{mint[-4:]}" + + +# --------------------------------------------------------------------------- +# Price & token name helpers (CoinGecko — free, no API key) +# --------------------------------------------------------------------------- + +def fetch_prices(mints: List[str], max_lookups: int = 20) -> Dict[str, float]: + """Fetch USD prices for mint addresses via CoinGecko (one per request). + + CoinGecko free tier doesn't support batch Solana token lookups, + so we do individual calls — capped at *max_lookups* to stay within + rate limits. Returns {mint: usd_price}. + """ + prices: Dict[str, float] = {} + for i, mint in enumerate(mints[:max_lookups]): + url = ( + f"https://api.coingecko.com/api/v3/simple/token_price/solana" + f"?contract_addresses={mint}&vs_currencies=usd" + ) + data = _http_get_json(url, timeout=10) + if data and isinstance(data, dict): + for addr, info in data.items(): + if isinstance(info, dict) and "usd" in info: + prices[mint] = info["usd"] + break + # Pause between calls to respect CoinGecko free-tier rate-limits + if i < len(mints[:max_lookups]) - 1: + time.sleep(1.0) + return prices + + +def fetch_sol_price() -> Optional[float]: + """Fetch current SOL price in USD via CoinGecko.""" + data = _http_get_json( + "https://api.coingecko.com/api/v3/simple/price?ids=solana&vs_currencies=usd" + ) + if data and "solana" in data: + return data["solana"].get("usd") + return None + + +def resolve_token_name(mint: str) -> Optional[Dict[str, str]]: + """Look up token name and symbol from CoinGecko by mint address. + + Returns {"name": ..., "symbol": ...} or None. + """ + if mint in KNOWN_TOKENS: + sym, name = KNOWN_TOKENS[mint] + return {"symbol": sym, "name": name} + url = f"https://api.coingecko.com/api/v3/coins/solana/contract/{mint}" + data = _http_get_json(url, timeout=10) + if data and "symbol" in data: + return {"symbol": data["symbol"].upper(), "name": data.get("name", "")} + return None + + +def _token_label(mint: str) -> str: + """Return a human-readable label for a mint: symbol if known, else abbreviated address.""" + if mint in KNOWN_TOKENS: + return KNOWN_TOKENS[mint][0] + return _short_mint(mint) + + +# --------------------------------------------------------------------------- +# 1. Network Stats +# --------------------------------------------------------------------------- + +def cmd_stats(_args): + """Live Solana network: slot, epoch, TPS, supply, version, SOL price.""" + results = rpc_batch([ + {"method": "getSlot"}, + {"method": "getEpochInfo"}, + {"method": "getRecentPerformanceSamples", "params": [1]}, + {"method": "getSupply"}, + {"method": "getVersion"}, + ]) + + by_id = {r["id"]: r.get("result") for r in results} + + slot = by_id.get(0) + epoch_info = by_id.get(1) + perf_samples = by_id.get(2) + supply = by_id.get(3) + version = by_id.get(4) + + tps = None + if perf_samples: + s = perf_samples[0] + tps = round(s["numTransactions"] / s["samplePeriodSecs"], 1) + + total_supply = lamports_to_sol(supply["value"]["total"]) if supply else None + circ_supply = lamports_to_sol(supply["value"]["circulating"]) if supply else None + + sol_price = fetch_sol_price() + + out = { + "slot": slot, + "epoch": epoch_info.get("epoch") if epoch_info else None, + "slot_in_epoch": epoch_info.get("slotIndex") if epoch_info else None, + "tps": tps, + "total_supply_SOL": round(total_supply, 2) if total_supply else None, + "circulating_supply_SOL": round(circ_supply, 2) if circ_supply else None, + "validator_version": version.get("solana-core") if version else None, + } + if sol_price is not None: + out["sol_price_usd"] = sol_price + if circ_supply: + out["market_cap_usd"] = round(sol_price * circ_supply, 0) + print_json(out) + + +# --------------------------------------------------------------------------- +# 2. Wallet Info (enhanced with prices, sorting, filtering) +# --------------------------------------------------------------------------- + +def cmd_wallet(args): + """SOL balance + SPL token holdings with USD values.""" + address = args.address + show_all = getattr(args, "all", False) + limit = getattr(args, "limit", 20) or 20 + skip_prices = getattr(args, "no_prices", False) + + # Fetch SOL balance + balance_result = rpc("getBalance", [address]) + sol_balance = lamports_to_sol(balance_result["value"]) + + # Fetch all SPL token accounts + token_result = rpc("getTokenAccountsByOwner", [ + address, + {"programId": "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"}, + {"encoding": "jsonParsed"}, + ]) + + raw_tokens = [] + for acct in (token_result.get("value") or []): + info = acct["account"]["data"]["parsed"]["info"] + ta = info["tokenAmount"] + amount = float(ta.get("uiAmountString") or 0) + if amount > 0: + raw_tokens.append({ + "mint": info["mint"], + "amount": amount, + "decimals": ta["decimals"], + }) + + # Separate NFTs (amount=1, decimals=0) from fungible tokens + nfts = [t for t in raw_tokens if t["decimals"] == 0 and t["amount"] == 1] + fungible = [t for t in raw_tokens if not (t["decimals"] == 0 and t["amount"] == 1)] + + # Fetch prices for fungible tokens (cap lookups to avoid API abuse) + sol_price = None + prices: Dict[str, float] = {} + if not skip_prices and fungible: + sol_price = fetch_sol_price() + # Prioritize known tokens, then a small sample of unknowns. + # CoinGecko free tier = 1 request per mint, so we cap lookups. + known_mints = [t["mint"] for t in fungible if t["mint"] in KNOWN_TOKENS] + other_mints = [t["mint"] for t in fungible if t["mint"] not in KNOWN_TOKENS][:15] + mints_to_price = known_mints + other_mints + if mints_to_price: + prices = fetch_prices(mints_to_price, max_lookups=30) + + # Enrich tokens with labels and USD values + enriched = [] + dust_count = 0 + dust_value = 0.0 + for t in fungible: + mint = t["mint"] + label = _token_label(mint) + usd_price = prices.get(mint) + usd_value = round(usd_price * t["amount"], 2) if usd_price else None + + # Filter dust (< $0.01) unless --all + if not show_all and usd_value is not None and usd_value < 0.01: + dust_count += 1 + dust_value += usd_value + continue + + entry = {"token": label, "mint": mint, "amount": t["amount"]} + if usd_price is not None: + entry["price_usd"] = usd_price + entry["value_usd"] = usd_value + enriched.append(entry) + + # Sort: tokens with known USD value first (highest→lowest), then unknowns + enriched.sort(key=lambda x: (x.get("value_usd") is not None, x.get("value_usd") or 0), reverse=True) + + # Apply limit unless --all + total_tokens = len(enriched) + if not show_all and len(enriched) > limit: + enriched = enriched[:limit] + + # Compute portfolio total + total_usd = sum(t.get("value_usd", 0) for t in enriched) + sol_value_usd = round(sol_price * sol_balance, 2) if sol_price else None + if sol_value_usd: + total_usd += sol_value_usd + total_usd += dust_value + + output = { + "address": address, + "sol_balance": round(sol_balance, 9), + } + if sol_price: + output["sol_price_usd"] = sol_price + output["sol_value_usd"] = sol_value_usd + output["tokens_shown"] = len(enriched) + if total_tokens > len(enriched): + output["tokens_hidden"] = total_tokens - len(enriched) + output["spl_tokens"] = enriched + if dust_count > 0: + output["dust_filtered"] = {"count": dust_count, "total_value_usd": round(dust_value, 4)} + output["nft_count"] = len(nfts) + if nfts: + output["nfts"] = [_token_label(n["mint"]) + f" ({_short_mint(n['mint'])})" for n in nfts[:10]] + if len(nfts) > 10: + output["nfts"].append(f"... and {len(nfts) - 10} more") + if total_usd > 0: + output["portfolio_total_usd"] = round(total_usd, 2) + + print_json(output) + + +# --------------------------------------------------------------------------- +# 3. Transaction Details +# --------------------------------------------------------------------------- + +def cmd_tx(args): + """Full transaction details by signature.""" + result = rpc("getTransaction", [ + args.signature, + {"encoding": "jsonParsed", "maxSupportedTransactionVersion": 0}, + ]) + + if result is None: + sys.exit("Transaction not found (may be too old for public RPC history).") + + meta = result.get("meta", {}) or {} + msg = result.get("transaction", {}).get("message", {}) + account_keys = msg.get("accountKeys", []) + + pre = meta.get("preBalances", []) + post = meta.get("postBalances", []) + + balance_changes = [] + for i, key in enumerate(account_keys): + acct_key = key["pubkey"] if isinstance(key, dict) else key + if i < len(pre) and i < len(post): + change = lamports_to_sol(post[i] - pre[i]) + if change != 0: + balance_changes.append({"account": acct_key, "change_SOL": round(change, 9)}) + + programs = [] + for ix in msg.get("instructions", []): + prog = ix.get("programId") + if prog is None and "programIdIndex" in ix: + k = account_keys[ix["programIdIndex"]] + prog = k["pubkey"] if isinstance(k, dict) else k + if prog: + programs.append(prog) + + # Add USD value for SOL changes + sol_price = fetch_sol_price() + if sol_price and balance_changes: + for bc in balance_changes: + bc["change_USD"] = round(bc["change_SOL"] * sol_price, 2) + + print_json({ + "signature": args.signature, + "slot": result.get("slot"), + "block_time": result.get("blockTime"), + "fee_SOL": lamports_to_sol(meta.get("fee", 0)), + "status": "success" if meta.get("err") is None else "failed", + "balance_changes": balance_changes, + "programs_invoked": list(dict.fromkeys(programs)), + }) + + +# --------------------------------------------------------------------------- +# 4. Token Info (enhanced with name + price) +# --------------------------------------------------------------------------- + +def cmd_token(args): + """SPL token metadata, supply, decimals, price, top holders.""" + mint = args.mint + + mint_info = rpc("getAccountInfo", [mint, {"encoding": "jsonParsed"}]) + if mint_info is None or mint_info.get("value") is None: + sys.exit("Mint account not found.") + + parsed = mint_info["value"]["data"]["parsed"]["info"] + decimals = parsed.get("decimals", 0) + supply_raw = int(parsed.get("supply", 0)) + supply_human = supply_raw / (10 ** decimals) if decimals else supply_raw + + largest = rpc("getTokenLargestAccounts", [mint]) + holders = [] + for acct in (largest.get("value") or [])[:5]: + amount = float(acct.get("uiAmountString") or 0) + pct = round((amount / supply_human * 100), 4) if supply_human > 0 else 0 + holders.append({ + "account": acct["address"], + "amount": amount, + "percent": pct, + }) + + # Resolve name + price + token_meta = resolve_token_name(mint) + price_data = fetch_prices([mint]) + + out = {"mint": mint} + if token_meta: + out["name"] = token_meta["name"] + out["symbol"] = token_meta["symbol"] + out["decimals"] = decimals + out["supply"] = round(supply_human, min(decimals, 6)) + out["mint_authority"] = parsed.get("mintAuthority") + out["freeze_authority"] = parsed.get("freezeAuthority") + if mint in price_data: + out["price_usd"] = price_data[mint] + out["market_cap_usd"] = round(price_data[mint] * supply_human, 0) + out["top_5_holders"] = holders + + print_json(out) + + +# --------------------------------------------------------------------------- +# 5. Recent Activity +# --------------------------------------------------------------------------- + +def cmd_activity(args): + """Recent transaction signatures for an address.""" + limit = min(args.limit, 25) + result = rpc("getSignaturesForAddress", [args.address, {"limit": limit}]) + + txs = [ + { + "signature": item["signature"], + "slot": item.get("slot"), + "block_time": item.get("blockTime"), + "err": item.get("err"), + } + for item in (result or []) + ] + + print_json({"address": args.address, "transactions": txs}) + + +# --------------------------------------------------------------------------- +# 6. NFT Portfolio +# --------------------------------------------------------------------------- + +def cmd_nft(args): + """NFTs owned by a wallet (amount=1 && decimals=0 heuristic).""" + result = rpc("getTokenAccountsByOwner", [ + args.address, + {"programId": "TokenkegQfeZyiNwAJbNbGKPFXCWuBvf9Ss623VQ5DA"}, + {"encoding": "jsonParsed"}, + ]) + + nfts = [ + acct["account"]["data"]["parsed"]["info"]["mint"] + for acct in (result.get("value") or []) + if acct["account"]["data"]["parsed"]["info"]["tokenAmount"]["decimals"] == 0 + and int(acct["account"]["data"]["parsed"]["info"]["tokenAmount"]["amount"]) == 1 + ] + + print_json({ + "address": args.address, + "nft_count": len(nfts), + "nfts": nfts, + "note": "Heuristic only. Compressed NFTs (cNFTs) are not detected.", + }) + + +# --------------------------------------------------------------------------- +# 7. Whale Detector (enhanced with USD values) +# --------------------------------------------------------------------------- + +def cmd_whales(args): + """Scan the latest block for large SOL transfers.""" + min_lamports = int(args.min_sol * LAMPORTS_PER_SOL) + + slot = rpc("getSlot") + block = rpc("getBlock", [ + slot, + { + "encoding": "jsonParsed", + "transactionDetails": "full", + "maxSupportedTransactionVersion": 0, + "rewards": False, + }, + ]) + + if block is None: + sys.exit("Could not retrieve latest block.") + + sol_price = fetch_sol_price() + + whales = [] + for tx in (block.get("transactions") or []): + meta = tx.get("meta", {}) or {} + if meta.get("err") is not None: + continue + + msg = tx["transaction"].get("message", {}) + account_keys = msg.get("accountKeys", []) + pre = meta.get("preBalances", []) + post = meta.get("postBalances", []) + + for i in range(len(pre)): + change = post[i] - pre[i] + if change >= min_lamports: + k = account_keys[i] + receiver = k["pubkey"] if isinstance(k, dict) else k + sender = None + for j in range(len(pre)): + if pre[j] - post[j] >= min_lamports: + sk = account_keys[j] + sender = sk["pubkey"] if isinstance(sk, dict) else sk + break + entry = { + "sender": sender, + "receiver": receiver, + "amount_SOL": round(lamports_to_sol(change), 4), + } + if sol_price: + entry["amount_USD"] = round(lamports_to_sol(change) * sol_price, 2) + whales.append(entry) + + out = { + "slot": slot, + "min_threshold_SOL": args.min_sol, + "large_transfers": whales, + "note": "Scans latest block only — point-in-time snapshot.", + } + if sol_price: + out["sol_price_usd"] = sol_price + print_json(out) + + +# --------------------------------------------------------------------------- +# 8. Price Lookup +# --------------------------------------------------------------------------- + +def cmd_price(args): + """Quick price lookup for a token by mint address or known symbol.""" + query = args.token + + # Check if it's a known symbol + mint = _SYMBOL_TO_MINT.get(query.upper(), query) + + # Try to resolve name + token_meta = resolve_token_name(mint) + + # Fetch price + prices = fetch_prices([mint]) + + out = {"query": query, "mint": mint} + if token_meta: + out["name"] = token_meta["name"] + out["symbol"] = token_meta["symbol"] + if mint in prices: + out["price_usd"] = prices[mint] + else: + out["price_usd"] = None + out["note"] = "Price not available — token may not be listed on CoinGecko." + print_json(out) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main(): + parser = argparse.ArgumentParser( + prog="solana_client.py", + description="Solana blockchain query tool for Hermes Agent", + ) + sub = parser.add_subparsers(dest="command", required=True) + + sub.add_parser("stats", help="Network stats: slot, epoch, TPS, supply, SOL price") + + p_wallet = sub.add_parser("wallet", help="SOL balance + SPL tokens with USD values") + p_wallet.add_argument("address") + p_wallet.add_argument("--limit", type=int, default=20, + help="Max tokens to display (default: 20)") + p_wallet.add_argument("--all", action="store_true", + help="Show all tokens (no limit, no dust filter)") + p_wallet.add_argument("--no-prices", action="store_true", + help="Skip price lookups (faster, RPC-only)") + + p_tx = sub.add_parser("tx", help="Transaction details by signature") + p_tx.add_argument("signature") + + p_token = sub.add_parser("token", help="SPL token metadata, price, and top holders") + p_token.add_argument("mint") + + p_activity = sub.add_parser("activity", help="Recent transactions for an address") + p_activity.add_argument("address") + p_activity.add_argument("--limit", type=int, default=10, + help="Number of transactions (max 25, default 10)") + + p_nft = sub.add_parser("nft", help="NFT portfolio for a wallet") + p_nft.add_argument("address") + + p_whales = sub.add_parser("whales", help="Large SOL transfers in the latest block") + p_whales.add_argument("--min-sol", type=float, default=1000.0, + help="Minimum SOL transfer size (default: 1000)") + + p_price = sub.add_parser("price", help="Quick price lookup by mint or symbol") + p_price.add_argument("token", help="Mint address or known symbol (SOL, BONK, JUP, ...)") + + args = parser.parse_args() + + dispatch = { + "stats": cmd_stats, + "wallet": cmd_wallet, + "tx": cmd_tx, + "token": cmd_token, + "activity": cmd_activity, + "nft": cmd_nft, + "whales": cmd_whales, + "price": cmd_price, + } + dispatch[args.command](args) + + +if __name__ == "__main__": + main() diff --git a/optional-skills/email/agentmail/SKILL.md b/optional-skills/email/agentmail/SKILL.md new file mode 100644 index 0000000000..3ca753d3c1 --- /dev/null +++ b/optional-skills/email/agentmail/SKILL.md @@ -0,0 +1,125 @@ +--- +name: agentmail +description: Give the agent its own dedicated email inbox via AgentMail. Send, receive, and manage email autonomously using agent-owned email addresses (e.g. hermes-agent@agentmail.to). +version: 1.0.0 +metadata: + hermes: + tags: [email, communication, agentmail, mcp] + category: email +--- + +# AgentMail — Agent-Owned Email Inboxes + +## Requirements + +- **AgentMail API key** (required) — sign up at https://console.agentmail.to (free tier: 3 inboxes, 3,000 emails/month; paid plans from $20/mo) +- Node.js 18+ (for the MCP server) + +## When to Use +Use this skill when you need to: +- Give the agent its own dedicated email address +- Send emails autonomously on behalf of the agent +- Receive and read incoming emails +- Manage email threads and conversations +- Sign up for services or authenticate via email +- Communicate with other agents or humans via email + +This is NOT for reading the user's personal email (use himalaya or Gmail for that). +AgentMail gives the agent its own identity and inbox. + +## Setup + +### 1. Get an API Key +- Go to https://console.agentmail.to +- Create an account and generate an API key (starts with `am_`) + +### 2. Configure MCP Server +Add to `~/.hermes/config.yaml` (paste your actual key — MCP env vars are not expanded from .env): +```yaml +mcp_servers: + agentmail: + command: "npx" + args: ["-y", "agentmail-mcp"] + env: + AGENTMAIL_API_KEY: "am_your_key_here" +``` + +### 3. Restart Hermes +```bash +hermes +``` +All 11 AgentMail tools are now available automatically. + +## Available Tools (via MCP) + +| Tool | Description | +|------|-------------| +| `list_inboxes` | List all agent inboxes | +| `get_inbox` | Get details of a specific inbox | +| `create_inbox` | Create a new inbox (gets a real email address) | +| `delete_inbox` | Delete an inbox | +| `list_threads` | List email threads in an inbox | +| `get_thread` | Get a specific email thread | +| `send_message` | Send a new email | +| `reply_to_message` | Reply to an existing email | +| `forward_message` | Forward an email | +| `update_message` | Update message labels/status | +| `get_attachment` | Download an email attachment | + +## Procedure + +### Create an inbox and send an email +1. Create a dedicated inbox: + - Use `create_inbox` with a username (e.g. `hermes-agent`) + - The agent gets address: `hermes-agent@agentmail.to` +2. Send an email: + - Use `send_message` with `inbox_id`, `to`, `subject`, `text` +3. Check for replies: + - Use `list_threads` to see incoming conversations + - Use `get_thread` to read a specific thread + +### Check incoming email +1. Use `list_inboxes` to find your inbox ID +2. Use `list_threads` with the inbox ID to see conversations +3. Use `get_thread` to read a thread and its messages + +### Reply to an email +1. Get the thread with `get_thread` +2. Use `reply_to_message` with the message ID and your reply text + +## Example Workflows + +**Sign up for a service:** +``` +1. create_inbox (username: "signup-bot") +2. Use the inbox address to register on the service +3. list_threads to check for verification email +4. get_thread to read the verification code +``` + +**Agent-to-human outreach:** +``` +1. create_inbox (username: "hermes-outreach") +2. send_message (to: user@example.com, subject: "Hello", text: "...") +3. list_threads to check for replies +``` + +## Pitfalls +- Free tier limited to 3 inboxes and 3,000 emails/month +- Emails come from `@agentmail.to` domain on free tier (custom domains on paid plans) +- Node.js (18+) is required for the MCP server (`npx -y agentmail-mcp`) +- The `mcp` Python package must be installed: `pip install mcp` +- Real-time inbound email (webhooks) requires a public server — use `list_threads` polling via cronjob instead for personal use + +## Verification +After setup, test with: +``` +hermes --toolsets mcp -q "Create an AgentMail inbox called test-agent and tell me its email address" +``` +You should see the new inbox address returned. + +## References +- AgentMail docs: https://docs.agentmail.to/ +- AgentMail console: https://console.agentmail.to +- AgentMail MCP repo: https://github.com/agentmail-to/agentmail-mcp +- Pricing: https://www.agentmail.to/pricing diff --git a/run_agent.py b/run_agent.py index 58d75332e7..a1d2b6c736 100644 --- a/run_agent.py +++ b/run_agent.py @@ -183,6 +183,7 @@ class AIAgent: session_db=None, honcho_session_key: str = None, iteration_budget: "IterationBudget" = None, + fallback_model: Dict[str, Any] = None, ): """ Initialize the AI Agent. @@ -406,6 +407,17 @@ class AIAgent: except Exception as e: raise RuntimeError(f"Failed to initialize OpenAI client: {e}") + # Provider fallback — a single backup model/provider tried when the + # primary is exhausted (rate-limit, overload, connection failure). + # Config shape: {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"} + self._fallback_model = fallback_model if isinstance(fallback_model, dict) else None + self._fallback_activated = False + if self._fallback_model: + fb_p = self._fallback_model.get("provider", "") + fb_m = self._fallback_model.get("model", "") + if fb_p and fb_m and not self.quiet_mode: + print(f"🔄 Fallback model: {fb_m} ({fb_p})") + # Get available tools with filtering self.tools = get_tool_definitions( enabled_toolsets=enabled_toolsets, @@ -2146,6 +2158,141 @@ class AIAgent: raise result["error"] return result["response"] + # ── Provider fallback ────────────────────────────────────────────────── + + # API-key providers: provider → (base_url, [env_var_names]) + _FALLBACK_API_KEY_PROVIDERS = { + "openrouter": (OPENROUTER_BASE_URL, ["OPENROUTER_API_KEY"]), + "zai": ("https://api.z.ai/api/paas/v4", ["ZAI_API_KEY", "Z_AI_API_KEY"]), + "kimi-coding": ("https://api.moonshot.ai/v1", ["KIMI_API_KEY"]), + "minimax": ("https://api.minimax.io/v1", ["MINIMAX_API_KEY"]), + "minimax-cn": ("https://api.minimaxi.com/v1", ["MINIMAX_CN_API_KEY"]), + } + + # OAuth providers: provider → (resolver_import_path, api_mode) + # Each resolver returns {"api_key": ..., "base_url": ...}. + _FALLBACK_OAUTH_PROVIDERS = { + "openai-codex": ("resolve_codex_runtime_credentials", "codex_responses"), + "nous": ("resolve_nous_runtime_credentials", "chat_completions"), + } + + def _resolve_fallback_credentials( + self, fb_provider: str, fb_config: dict + ) -> Optional[tuple]: + """Resolve credentials for a fallback provider. + + Returns (api_key, base_url, api_mode) on success, or None on failure. + Handles three cases: + 1. OAuth providers (openai-codex, nous) — call credential resolver + 2. API-key providers (openrouter, zai, etc.) — read env var + 3. Custom endpoints — use base_url + api_key_env from config + """ + # ── 1. OAuth providers ──────────────────────────────────────── + if fb_provider in self._FALLBACK_OAUTH_PROVIDERS: + resolver_name, api_mode = self._FALLBACK_OAUTH_PROVIDERS[fb_provider] + try: + import hermes_cli.auth as _auth + resolver = getattr(_auth, resolver_name) + creds = resolver() + return creds["api_key"], creds["base_url"], api_mode + except Exception as e: + logging.warning( + "Fallback to %s failed (credential resolution): %s", + fb_provider, e, + ) + return None + + # ── 2. API-key providers ────────────────────────────────────── + fb_key = (fb_config.get("api_key") or "").strip() + if not fb_key: + key_env = (fb_config.get("api_key_env") or "").strip() + if key_env: + fb_key = os.getenv(key_env, "") + elif fb_provider in self._FALLBACK_API_KEY_PROVIDERS: + for env_var in self._FALLBACK_API_KEY_PROVIDERS[fb_provider][1]: + fb_key = os.getenv(env_var, "") + if fb_key: + break + if not fb_key: + logging.warning( + "Fallback model configured but no API key found for provider '%s'", + fb_provider, + ) + return None + + # ── 3. Resolve base URL ─────────────────────────────────────── + fb_base_url = (fb_config.get("base_url") or "").strip() + if not fb_base_url and fb_provider in self._FALLBACK_API_KEY_PROVIDERS: + fb_base_url = self._FALLBACK_API_KEY_PROVIDERS[fb_provider][0] + if not fb_base_url: + fb_base_url = OPENROUTER_BASE_URL + + return fb_key, fb_base_url, "chat_completions" + + def _try_activate_fallback(self) -> bool: + """Switch to the configured fallback model/provider. + + Called when the primary model is failing after retries. Swaps the + OpenAI client, model slug, and provider in-place so the retry loop + can continue with the new backend. One-shot: returns False if + already activated or not configured. + """ + if self._fallback_activated or not self._fallback_model: + return False + + fb = self._fallback_model + fb_provider = (fb.get("provider") or "").strip().lower() + fb_model = (fb.get("model") or "").strip() + if not fb_provider or not fb_model: + return False + + resolved = self._resolve_fallback_credentials(fb_provider, fb) + if resolved is None: + return False + fb_key, fb_base_url, fb_api_mode = resolved + + # Build new client + try: + client_kwargs = {"api_key": fb_key, "base_url": fb_base_url} + if "openrouter" in fb_base_url.lower(): + client_kwargs["default_headers"] = { + "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", + "X-OpenRouter-Title": "Hermes Agent", + "X-OpenRouter-Categories": "productivity,cli-agent", + } + elif "api.kimi.com" in fb_base_url.lower(): + client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + + self.client = OpenAI(**client_kwargs) + self._client_kwargs = client_kwargs + old_model = self.model + self.model = fb_model + self.provider = fb_provider + self.base_url = fb_base_url + self.api_mode = fb_api_mode + self._fallback_activated = True + + # Re-evaluate prompt caching for the new provider/model + self._use_prompt_caching = ( + "openrouter" in fb_base_url.lower() + and "claude" in fb_model.lower() + ) + + print( + f"{self.log_prefix}🔄 Primary model failed — switching to fallback: " + f"{fb_model} via {fb_provider}" + ) + logging.info( + "Fallback activated: %s → %s (%s)", + old_model, fb_model, fb_provider, + ) + return True + except Exception as e: + logging.error("Failed to activate fallback model: %s", e) + return False + + # ── End provider fallback ────────────────────────────────────────────── + def _build_api_kwargs(self, api_messages: list) -> dict: """Build the keyword arguments dict for the active API mode.""" if self.api_mode == "codex_responses": @@ -2503,6 +2650,8 @@ class AIAgent: if self._session_db: try: + # Propagate title to the new session with auto-numbering + old_title = self._session_db.get_session_title(self.session_id) self._session_db.end_session(self.session_id, "compression") old_session_id = self.session_id self.session_id = f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:6]}" @@ -2512,6 +2661,13 @@ class AIAgent: model=self.model, parent_session_id=old_session_id, ) + # Auto-number the title for the continuation session + if old_title: + try: + new_title = self._session_db.get_next_title_in_lineage(old_title) + self._session_db.set_session_title(self.session_id, new_title) + except (ValueError, Exception) as e: + logger.debug("Could not propagate title on compression: %s", e) self._session_db.update_system_prompt(self.session_id, new_system_prompt) except Exception as e: logger.debug("Session DB compression split failed: %s", e) @@ -2529,9 +2685,10 @@ class AIAgent: if remaining_calls: print(f"{self.log_prefix}⚡ Interrupt: skipping {len(remaining_calls)} tool call(s)") for skipped_tc in remaining_calls: + skipped_name = skipped_tc.function.name skip_msg = { "role": "tool", - "content": "[Tool execution cancelled - user interrupted]", + "content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]", "tool_call_id": skipped_tc.id, } messages.append(skip_msg) @@ -2734,9 +2891,10 @@ class AIAgent: remaining = len(assistant_message.tool_calls) - i print(f"{self.log_prefix}⚡ Interrupt: skipping {remaining} remaining tool call(s)") for skipped_tc in assistant_message.tool_calls[i:]: + skipped_name = skipped_tc.function.name skip_msg = { "role": "tool", - "content": "[Tool execution skipped - user sent a new message]", + "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]", "tool_call_id": skipped_tc.id } messages.append(skip_msg) @@ -2953,9 +3111,14 @@ class AIAgent: ) self._iters_since_skill = 0 - # Honcho prefetch: retrieve user context for system prompt injection + # Honcho prefetch: retrieve user context for system prompt injection. + # Only on the FIRST turn of a session (empty history). On subsequent + # turns the model already has all prior context in its conversation + # history, and the Honcho context is baked into the stored system + # prompt — re-fetching it would change the system message and break + # Anthropic prompt caching. self._honcho_context = "" - if self._honcho and self._honcho_session_key: + if self._honcho and self._honcho_session_key and not conversation_history: try: self._honcho_context = self._honcho_prefetch(user_message) except Exception as e: @@ -2973,14 +3136,42 @@ class AIAgent: # Built once on first call, reused for all subsequent calls. # Only rebuilt after context compression events (which invalidate # the cache and reload memory from disk). + # + # For continuing sessions (gateway creates a fresh AIAgent per + # message), we load the stored system prompt from the session DB + # instead of rebuilding. Rebuilding would pick up memory changes + # from disk that the model already knows about (it wrote them!), + # producing a different system prompt and breaking the Anthropic + # prefix cache. if self._cached_system_prompt is None: - self._cached_system_prompt = self._build_system_prompt(system_message) - # Store the system prompt snapshot in SQLite - if self._session_db: + stored_prompt = None + if conversation_history and self._session_db: try: - self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt) - except Exception as e: - logger.debug("Session DB update_system_prompt failed: %s", e) + session_row = self._session_db.get_session(self.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + except Exception: + pass # Fall through to build fresh + + if stored_prompt: + # Continuing session — reuse the exact system prompt from + # the previous turn so the Anthropic cache prefix matches. + self._cached_system_prompt = stored_prompt + else: + # First turn of a new session — build from scratch. + self._cached_system_prompt = self._build_system_prompt(system_message) + # Bake Honcho context into the prompt so it's stable for + # the entire session (not re-fetched per turn). + if self._honcho_context: + self._cached_system_prompt = ( + self._cached_system_prompt + "\n\n" + self._honcho_context + ).strip() + # Store the system prompt snapshot in SQLite + if self._session_db: + try: + self._session_db.update_system_prompt(self.session_id, self._cached_system_prompt) + except Exception as e: + logger.debug("Session DB update_system_prompt failed: %s", e) active_system_prompt = self._cached_system_prompt @@ -3106,11 +3297,13 @@ class AIAgent: # Build the final system message: cached prompt + ephemeral system prompt. # The ephemeral part is appended here (not baked into the cached prompt) # so it stays out of the session DB and logs. + # Note: Honcho context is baked into _cached_system_prompt on the first + # turn and stored in the session DB, so it does NOT need to be injected + # here. This keeps the system message identical across all turns in a + # session, maximizing Anthropic prompt cache hits. effective_system = active_system_prompt or "" if self.ephemeral_system_prompt: effective_system = (effective_system + "\n\n" + self.ephemeral_system_prompt).strip() - if self._honcho_context: - effective_system = (effective_system + "\n\n" + self._honcho_context).strip() if effective_system: api_messages = [{"role": "system", "content": effective_system}] + api_messages @@ -3261,6 +3454,10 @@ class AIAgent: print(f"{self.log_prefix} ⏱️ Response time: {api_duration:.2f}s (fast response often indicates rate limiting)") if retry_count >= max_retries: + # Try fallback before giving up + if self._try_activate_fallback(): + retry_count = 0 + continue print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded for invalid responses. Giving up.") logging.error(f"{self.log_prefix}Invalid API response after {max_retries} retries.") self._persist_session(messages, conversation_history) @@ -3285,7 +3482,7 @@ class AIAgent: self._persist_session(messages, conversation_history) self.clear_interrupt() return { - "final_response": "Operation interrupted.", + "final_response": f"Operation interrupted: retrying API call after rate limit (retry {retry_count}/{max_retries}).", "messages": messages, "api_calls": api_call_count, "completed": False, @@ -3394,10 +3591,11 @@ class AIAgent: if thinking_spinner: thinking_spinner.stop("") thinking_spinner = None + api_elapsed = time.time() - api_start_time print(f"{self.log_prefix}⚡ Interrupted during API call.") self._persist_session(messages, conversation_history) interrupted = True - final_response = "Operation interrupted." + final_response = f"Operation interrupted: waiting for model response ({api_elapsed:.1f}s elapsed)." break except Exception as api_error: @@ -3446,7 +3644,7 @@ class AIAgent: self._persist_session(messages, conversation_history) self.clear_interrupt() return { - "final_response": "Operation interrupted.", + "final_response": f"Operation interrupted: handling API error ({error_type}: {str(api_error)[:80]}).", "messages": messages, "api_calls": api_call_count, "completed": False, @@ -3586,6 +3784,11 @@ class AIAgent: ])) and not is_context_length_error if is_client_error: + # Try fallback before aborting — a different provider + # may not have the same issue (rate limit, auth, etc.) + if self._try_activate_fallback(): + retry_count = 0 + continue self._dump_api_request_debug( api_kwargs, reason="non_retryable_client_error", error=api_error, ) @@ -3603,6 +3806,10 @@ class AIAgent: } if retry_count >= max_retries: + # Try fallback before giving up entirely + if self._try_activate_fallback(): + retry_count = 0 + continue print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.") logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}") logging.error(f"{self.log_prefix}Request details - Messages: {len(api_messages)}, Approx tokens: {approx_tokens:,}") @@ -3623,7 +3830,7 @@ class AIAgent: self._persist_session(messages, conversation_history) self.clear_interrupt() return { - "final_response": "Operation interrupted.", + "final_response": f"Operation interrupted: retrying API call after error (retry {retry_count}/{max_retries}).", "messages": messages, "api_calls": api_call_count, "completed": False, diff --git a/scripts/install.sh b/scripts/install.sh index b4a9716bac..7b87237b74 100755 --- a/scripts/install.sh +++ b/scripts/install.sh @@ -492,9 +492,23 @@ install_system_packages() { return 0 fi fi + elif [ -e /dev/tty ]; then + # Non-interactive (e.g. curl | bash) but a terminal is available. + # Read the prompt from /dev/tty (same approach the setup wizard uses). + echo "" + log_info "Installing ${description} requires sudo." + read -p "Install? [Y/n] " -n 1 -r < /dev/tty + echo + if [[ $REPLY =~ ^[Yy]$ ]] || [[ -z $REPLY ]]; then + if sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a $install_cmd < /dev/tty; then + [ "$need_ripgrep" = true ] && HAS_RIPGREP=true && log_success "ripgrep installed" + [ "$need_ffmpeg" = true ] && HAS_FFMPEG=true && log_success "ffmpeg installed" + return 0 + fi + fi else - log_warn "Non-interactive mode: cannot prompt for sudo password" - log_info "Install missing packages manually: sudo $install_cmd" + log_warn "Non-interactive mode and no terminal available — cannot install system packages" + log_info "Install manually after setup completes: sudo $install_cmd" fi fi fi diff --git a/skills/creative/ascii-art/SKILL.md b/skills/creative/ascii-art/SKILL.md index 594036b94b..1afe7ffcb9 100644 --- a/skills/creative/ascii-art/SKILL.md +++ b/skills/creative/ascii-art/SKILL.md @@ -1,7 +1,7 @@ --- name: ascii-art -description: Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii conversion, and search curated art from emojicombos.com and asciiart.eu (11,000+ artworks). Falls back to LLM-generated art. -version: 3.1.0 +description: Generate ASCII art using pyfiglet (571 fonts), cowsay, boxes, toilet, image-to-ascii, remote APIs (asciified, ascii.co.uk), and LLM fallback. No API keys required. +version: 4.0.0 author: 0xbyt4, Hermes Agent license: MIT dependencies: [] @@ -14,9 +14,9 @@ metadata: # ASCII Art Skill -Multiple tools for different ASCII art needs. All tools are local CLI programs — no API keys required. +Multiple tools for different ASCII art needs. All tools are local CLI programs or free REST APIs — no API keys required. -## Tool 1: Text Banners (pyfiglet) +## Tool 1: Text Banners (pyfiglet — local) Render text as large ASCII art banners. 571 built-in fonts. @@ -53,7 +53,35 @@ python3 -m pyfiglet --list_fonts # List all 571 fonts - Short text (1-8 chars) works best with detailed fonts like `doom` or `block` - Long text works better with compact fonts like `small` or `mini` -## Tool 2: Cowsay (Message Art) +## Tool 2: Text Banners (asciified API — remote, no install) + +Free REST API that converts text to ASCII art. 250+ FIGlet fonts. Returns plain text directly — no parsing needed. Use this when pyfiglet is not installed or as a quick alternative. + +### Usage (via terminal curl) + +```bash +# Basic text banner (default font) +curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello+World" + +# With a specific font +curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Slant" +curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Doom" +curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Star+Wars" +curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=3-D" +curl -s "https://asciified.thelicato.io/api/v2/ascii?text=Hello&font=Banner3" + +# List all available fonts (returns JSON array) +curl -s "https://asciified.thelicato.io/api/v2/fonts" +``` + +### Tips + +- URL-encode spaces as `+` in the text parameter +- The response is plain text ASCII art — no JSON wrapping, ready to display +- Font names are case-sensitive; use the fonts endpoint to get exact names +- Works from any terminal with curl — no Python or pip needed + +## Tool 3: Cowsay (Message Art) Classic tool that wraps text in a speech bubble with an ASCII character. @@ -97,7 +125,7 @@ cowsay -e "OO" "Msg" # Custom eyes cowsay -T "U " "Msg" # Custom tongue ``` -## Tool 3: Boxes (Decorative Borders) +## Tool 4: Boxes (Decorative Borders) Draw decorative ASCII art borders/frames around any text. 70+ built-in designs. @@ -124,13 +152,15 @@ echo "Hello World" | boxes -a c # Center text boxes -l # List all 70+ designs ``` -### Combine with pyfiglet +### Combine with pyfiglet or asciified ```bash python3 -m pyfiglet "HERMES" -f slant | boxes -d stone +# Or without pyfiglet installed: +curl -s "https://asciified.thelicato.io/api/v2/ascii?text=HERMES&font=Slant" | boxes -d stone ``` -## Tool 4: TOIlet (Colored Text Art) +## Tool 5: TOIlet (Colored Text Art) Like pyfiglet but with ANSI color effects and visual filters. Great for terminal eye candy. @@ -160,14 +190,14 @@ toilet -F list # List available filters **Note**: toilet outputs ANSI escape codes for colors — works in terminals but may not render in all contexts (e.g., plain text files, some chat platforms). -## Tool 5: Image to ASCII Art +## Tool 6: Image to ASCII Art Convert images (PNG, JPEG, GIF, WEBP) to ASCII art. ### Option A: ascii-image-converter (recommended, modern) ```bash -# Install via snap or Go +# Install sudo snap install ascii-image-converter # OR: go install github.com/TheZoraiz/ascii-image-converter@latest ``` @@ -190,63 +220,77 @@ jp2a --width=80 image.jpg jp2a --colors image.jpg # Colorized ``` -## Tool 6: Search Pre-Made ASCII Art (Web APIs) +## Tool 7: Search Pre-Made ASCII Art -Search curated ASCII art databases via `web_extract`. No API keys needed. +Search curated ASCII art from the web. Use `terminal` with `curl`. -### Source A: emojicombos.com (recommended first) +### Source A: ascii.co.uk (recommended for pre-made art) -Huge collection of ASCII art, dot art, kaomoji, and emoji combos. Modern, meme-aware, user-submitted content. Great for pop culture, animals, objects, aesthetics. +Large collection of classic ASCII art organized by subject. Art is inside HTML `
` tags. Fetch the page with curl, then extract art with a small Python snippet.
 
-**URL pattern:** `https://emojicombos.com/{term}-ascii-art`
+**URL pattern:** `https://ascii.co.uk/art/{subject}`
 
+**Step 1 — Fetch the page:**
+
+```bash
+curl -s 'https://ascii.co.uk/art/cat' -o /tmp/ascii_art.html
 ```
-web_extract(urls=["https://emojicombos.com/cat-ascii-art"])
-web_extract(urls=["https://emojicombos.com/rocket-ascii-art"])
-web_extract(urls=["https://emojicombos.com/dragon-ascii-art"])
-web_extract(urls=["https://emojicombos.com/skull-ascii-art"])
-web_extract(urls=["https://emojicombos.com/heart-ascii-art"])
+
+**Step 2 — Extract art from pre tags:**
+
+```python
+import re, html
+with open('/tmp/ascii_art.html') as f:
+    text = f.read()
+arts = re.findall(r']*>(.*?)
', text, re.DOTALL) +for art in arts: + clean = re.sub(r'<[^>]+>', '', art) + clean = html.unescape(clean).strip() + if len(clean) > 30: + print(clean) + print('\n---\n') ``` +**Available subjects** (use as URL path): +- Animals: `cat`, `dog`, `horse`, `bird`, `fish`, `dragon`, `snake`, `rabbit`, `elephant`, `dolphin`, `butterfly`, `owl`, `wolf`, `bear`, `penguin`, `turtle` +- Objects: `car`, `ship`, `airplane`, `rocket`, `guitar`, `computer`, `coffee`, `beer`, `cake`, `house`, `castle`, `sword`, `crown`, `key` +- Nature: `tree`, `flower`, `sun`, `moon`, `star`, `mountain`, `ocean`, `rainbow` +- Characters: `skull`, `robot`, `angel`, `wizard`, `pirate`, `ninja`, `alien` +- Holidays: `christmas`, `halloween`, `valentine` + **Tips:** -- Use hyphenated search terms: `hello-kitty-ascii-art`, `star-wars-ascii-art` -- Returns a mix of classic ASCII, Braille dot art, and kaomoji — pick the best style for the user -- Includes modern meme art and pop culture references -- Great for kaomoji/emoticons too: `https://emojicombos.com/cat-kaomoji` +- Preserve artist signatures/initials — important etiquette +- Multiple art pieces per page — pick the best one for the user +- Works reliably via curl, no JavaScript needed -### Source B: asciiart.eu (classic archive) +### Source B: GitHub Octocat API (fun easter egg) -11,000+ classic ASCII artworks organized by category. More traditional/vintage art. - -**Browse by category** (use as URL paths): -- `animals/cats`, `animals/dogs`, `animals/birds`, `animals/horses` -- `animals/dolphins`, `animals/dragons`, `animals/insects` -- `space/rockets`, `space/stars`, `space/planets` -- `vehicles/cars`, `vehicles/ships`, `vehicles/airplanes` -- `food-and-drinks/coffee`, `food-and-drinks/beer` -- `computers/computers`, `electronics/robots` -- `art-and-design/hearts`, `art-and-design/skulls` -- `plants/flowers`, `plants/trees` -- `mythology/dragons`, `mythology/unicorns` - -``` -web_extract(urls=["https://www.asciiart.eu/animals/cats"]) -web_extract(urls=["https://www.asciiart.eu/search?q=rocket"]) -``` - -**Tips:** -- Preserve artist initials/signatures (e.g., `jgs`, `hjw`) — this is important etiquette -- Better for classic/vintage ASCII art style - -### Source C: GitHub Octocat API (fun easter egg) - -Returns a random GitHub Octocat with a quote. No auth needed. +Returns a random GitHub Octocat with a wise quote. No auth needed. ```bash curl -s https://api.github.com/octocat ``` -## Tool 7: LLM-Generated Custom Art (Fallback) +## Tool 8: Fun ASCII Utilities (via curl) + +These free services return ASCII art directly — great for fun extras. + +### QR Codes as ASCII Art + +```bash +curl -s "qrenco.de/Hello+World" +curl -s "qrenco.de/https://example.com" +``` + +### Weather as ASCII Art + +```bash +curl -s "wttr.in/London" # Full weather report with ASCII graphics +curl -s "wttr.in/Moon" # Moon phase in ASCII art +curl -s "v2.wttr.in/London" # Detailed version +``` + +## Tool 9: LLM-Generated Custom Art (Fallback) When tools above don't have what's needed, generate ASCII art directly using these Unicode characters: @@ -264,28 +308,14 @@ When tools above don't have what's needed, generate ASCII art directly using the - Max height: 15 lines for banners, 25 for scenes - Monospace only: output must render correctly in fixed-width fonts -## Fun Extras - -### Star Wars in ASCII (via telnet) - -```bash -telnet towel.blinkenlights.nl -``` - -### Useful Resources - -- [asciiart.eu](https://www.asciiart.eu/) — 11,000+ artworks, searchable -- [patorjk.com/software/taag](http://patorjk.com/software/taag/) — Web-based text-to-ASCII with font preview -- [asciiflow.com](http://asciiflow.com/) — Interactive ASCII diagram editor (browser) -- [awesome-ascii-art](https://github.com/moul/awesome-ascii-art) — Curated resource list - ## Decision Flow -1. **Text as a banner** → pyfiglet (or toilet for colored output) +1. **Text as a banner** → pyfiglet if installed, otherwise asciified API via curl 2. **Wrap a message in fun character art** → cowsay -3. **Add decorative border/frame** → boxes (can combine with pyfiglet) -4. **Art of a thing** (cat, rocket, dragon) → emojicombos.com first, then asciiart.eu -5. **Kaomoji / emoticons** → emojicombos.com (`{term}-kaomoji`) -6. **Convert an image to ASCII** → ascii-image-converter or jp2a -7. **Something custom/creative** → LLM generation with Unicode palette -8. **Any tool not installed** → install it, or fall back to next option +3. **Add decorative border/frame** → boxes (can combine with pyfiglet/asciified) +4. **Art of a specific thing** (cat, rocket, dragon) → ascii.co.uk via curl + parsing +5. **Convert an image to ASCII** → ascii-image-converter or jp2a +6. **QR code** → qrenco.de via curl +7. **Weather/moon art** → wttr.in via curl +8. **Something custom/creative** → LLM generation with Unicode palette +9. **Any tool not installed** → install it, or fall back to next option diff --git a/skills/dogfood/SKILL.md b/skills/dogfood/SKILL.md new file mode 100644 index 0000000000..81a4ebfdeb --- /dev/null +++ b/skills/dogfood/SKILL.md @@ -0,0 +1,162 @@ +--- +name: dogfood +description: Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports +version: 1.0.0 +metadata: + hermes: + tags: [qa, testing, browser, web, dogfood] + related_skills: [] +--- + +# Dogfood: Systematic Web Application QA Testing + +## Overview + +This skill guides you through systematic exploratory QA testing of web applications using the browser toolset. You will navigate the application, interact with elements, capture evidence of issues, and produce a structured bug report. + +## Prerequisites + +- Browser toolset must be available (`browser_navigate`, `browser_snapshot`, `browser_click`, `browser_type`, `browser_vision`, `browser_console`, `browser_scroll`, `browser_back`, `browser_press`, `browser_close`) +- A target URL and testing scope from the user + +## Inputs + +The user provides: +1. **Target URL** — the entry point for testing +2. **Scope** — what areas/features to focus on (or "full site" for comprehensive testing) +3. **Output directory** (optional) — where to save screenshots and the report (default: `./dogfood-output`) + +## Workflow + +Follow this 5-phase systematic workflow: + +### Phase 1: Plan + +1. Create the output directory structure: + ``` + {output_dir}/ + ├── screenshots/ # Evidence screenshots + └── report.md # Final report (generated in Phase 5) + ``` +2. Identify the testing scope based on user input. +3. Build a rough sitemap by planning which pages and features to test: + - Landing/home page + - Navigation links (header, footer, sidebar) + - Key user flows (sign up, login, search, checkout, etc.) + - Forms and interactive elements + - Edge cases (empty states, error pages, 404s) + +### Phase 2: Explore + +For each page or feature in your plan: + +1. **Navigate** to the page: + ``` + browser_navigate(url="https://example.com/page") + ``` + +2. **Take a snapshot** to understand the DOM structure: + ``` + browser_snapshot() + ``` + +3. **Check the console** for JavaScript errors: + ``` + browser_console(clear=true) + ``` + Do this after every navigation and after every significant interaction. Silent JS errors are high-value findings. + +4. **Take an annotated screenshot** to visually assess the page and identify interactive elements: + ``` + browser_vision(question="Describe the page layout, identify any visual issues, broken elements, or accessibility concerns", annotate=true) + ``` + The `annotate=true` flag overlays numbered `[N]` labels on interactive elements. Each `[N]` maps to ref `@eN` for subsequent browser commands. + +5. **Test interactive elements** systematically: + - Click buttons and links: `browser_click(ref="@eN")` + - Fill forms: `browser_type(ref="@eN", text="test input")` + - Test keyboard navigation: `browser_press(key="Tab")`, `browser_press(key="Enter")` + - Scroll through content: `browser_scroll(direction="down")` + - Test form validation with invalid inputs + - Test empty submissions + +6. **After each interaction**, check for: + - Console errors: `browser_console()` + - Visual changes: `browser_vision(question="What changed after the interaction?")` + - Expected vs actual behavior + +### Phase 3: Collect Evidence + +For every issue found: + +1. **Take a screenshot** showing the issue: + ``` + browser_vision(question="Capture and describe the issue visible on this page", annotate=false) + ``` + Save the `screenshot_path` from the response — you will reference it in the report. + +2. **Record the details**: + - URL where the issue occurs + - Steps to reproduce + - Expected behavior + - Actual behavior + - Console errors (if any) + - Screenshot path + +3. **Classify the issue** using the issue taxonomy (see `references/issue-taxonomy.md`): + - Severity: Critical / High / Medium / Low + - Category: Functional / Visual / Accessibility / Console / UX / Content + +### Phase 4: Categorize + +1. Review all collected issues. +2. De-duplicate — merge issues that are the same bug manifesting in different places. +3. Assign final severity and category to each issue. +4. Sort by severity (Critical first, then High, Medium, Low). +5. Count issues by severity and category for the executive summary. + +### Phase 5: Report + +Generate the final report using the template at `templates/dogfood-report-template.md`. + +The report must include: +1. **Executive summary** with total issue count, breakdown by severity, and testing scope +2. **Per-issue sections** with: + - Issue number and title + - Severity and category badges + - URL where observed + - Description of the issue + - Steps to reproduce + - Expected vs actual behavior + - Screenshot references (use `MEDIA:` for inline images) + - Console errors if relevant +3. **Summary table** of all issues +4. **Testing notes** — what was tested, what was not, any blockers + +Save the report to `{output_dir}/report.md`. + +## Tools Reference + +| Tool | Purpose | +|------|---------| +| `browser_navigate` | Go to a URL | +| `browser_snapshot` | Get DOM text snapshot (accessibility tree) | +| `browser_click` | Click an element by ref (`@eN`) or text | +| `browser_type` | Type into an input field | +| `browser_scroll` | Scroll up/down on the page | +| `browser_back` | Go back in browser history | +| `browser_press` | Press a keyboard key | +| `browser_vision` | Screenshot + AI analysis; use `annotate=true` for element labels | +| `browser_console` | Get JS console output and errors | +| `browser_close` | Close the browser session | + +## Tips + +- **Always check `browser_console()` after navigating and after significant interactions.** Silent JS errors are among the most valuable findings. +- **Use `annotate=true` with `browser_vision`** when you need to reason about interactive element positions or when the snapshot refs are unclear. +- **Test with both valid and invalid inputs** — form validation bugs are common. +- **Scroll through long pages** — content below the fold may have rendering issues. +- **Test navigation flows** — click through multi-step processes end-to-end. +- **Check responsive behavior** by noting any layout issues visible in screenshots. +- **Don't forget edge cases**: empty states, very long text, special characters, rapid clicking. +- When reporting screenshots to the user, include `MEDIA:` so they can see the evidence inline. diff --git a/skills/dogfood/references/issue-taxonomy.md b/skills/dogfood/references/issue-taxonomy.md new file mode 100644 index 0000000000..59489929a3 --- /dev/null +++ b/skills/dogfood/references/issue-taxonomy.md @@ -0,0 +1,109 @@ +# Issue Taxonomy + +Use this taxonomy to classify issues found during dogfood QA testing. + +## Severity Levels + +### Critical +The issue makes a core feature completely unusable or causes data loss. + +**Examples:** +- Application crashes or shows a blank white page +- Form submission silently loses user data +- Authentication is completely broken (can't log in at all) +- Payment flow fails and charges the user without completing the order +- Security vulnerability (e.g., XSS, exposed credentials in console) + +### High +The issue significantly impairs functionality but a workaround may exist. + +**Examples:** +- A key button does nothing when clicked (but refreshing fixes it) +- Search returns no results for valid queries +- Form validation rejects valid input +- Page loads but critical content is missing or garbled +- Navigation link leads to a 404 or wrong page +- Uncaught JavaScript exceptions in the console on core pages + +### Medium +The issue is noticeable and affects user experience but doesn't block core functionality. + +**Examples:** +- Layout is misaligned or overlapping on certain screen sections +- Images fail to load (broken image icons) +- Slow performance (visible loading delays > 3 seconds) +- Form field lacks proper validation feedback (no error message on bad input) +- Console warnings that suggest deprecated or misconfigured features +- Inconsistent styling between similar pages + +### Low +Minor polish issues that don't affect functionality. + +**Examples:** +- Typos or grammatical errors in text content +- Minor spacing or alignment inconsistencies +- Placeholder text left in production ("Lorem ipsum") +- Favicon missing +- Console info/debug messages that shouldn't be in production +- Subtle color contrast issues that don't fail WCAG requirements + +## Categories + +### Functional +Issues where features don't work as expected. + +- Buttons/links that don't respond +- Forms that don't submit or submit incorrectly +- Broken user flows (can't complete a multi-step process) +- Incorrect data displayed +- Features that work partially + +### Visual +Issues with the visual presentation of the page. + +- Layout problems (overlapping elements, broken grids) +- Broken images or missing media +- Styling inconsistencies +- Responsive design failures +- Z-index issues (elements hidden behind others) +- Text overflow or truncation + +### Accessibility +Issues that prevent or hinder access for users with disabilities. + +- Missing alt text on meaningful images +- Poor color contrast (fails WCAG AA) +- Elements not reachable via keyboard navigation +- Missing form labels or ARIA attributes +- Focus indicators missing or unclear +- Screen reader incompatible content + +### Console +Issues detected through JavaScript console output. + +- Uncaught exceptions and unhandled promise rejections +- Failed network requests (4xx, 5xx errors in console) +- Deprecation warnings +- CORS errors +- Mixed content warnings (HTTP resources on HTTPS page) +- Excessive console.log output left from development + +### UX (User Experience) +Issues where functionality works but the experience is poor. + +- Confusing navigation or information architecture +- Missing loading indicators (user doesn't know something is happening) +- No feedback after user actions (e.g., button click with no visible result) +- Inconsistent interaction patterns +- Missing confirmation dialogs for destructive actions +- Poor error messages that don't help the user recover + +### Content +Issues with the text, media, or information on the page. + +- Typos and grammatical errors +- Placeholder/dummy content in production +- Outdated information +- Missing content (empty sections) +- Broken or dead links to external resources +- Incorrect or misleading labels diff --git a/skills/dogfood/templates/dogfood-report-template.md b/skills/dogfood/templates/dogfood-report-template.md new file mode 100644 index 0000000000..9a500c5c80 --- /dev/null +++ b/skills/dogfood/templates/dogfood-report-template.md @@ -0,0 +1,86 @@ +# Dogfood QA Report + +**Target:** {target_url} +**Date:** {date} +**Scope:** {scope_description} +**Tester:** Hermes Agent (automated exploratory QA) + +--- + +## Executive Summary + +| Severity | Count | +|----------|-------| +| 🔴 Critical | {critical_count} | +| 🟠 High | {high_count} | +| 🟡 Medium | {medium_count} | +| 🔵 Low | {low_count} | +| **Total** | **{total_count}** | + +**Overall Assessment:** {one_sentence_assessment} + +--- + +## Issues + + + +### Issue #{issue_number}: {issue_title} + +| Field | Value | +|-------|-------| +| **Severity** | {severity} | +| **Category** | {category} | +| **URL** | {url_where_found} | + +**Description:** +{detailed_description_of_the_issue} + +**Steps to Reproduce:** +1. {step_1} +2. {step_2} +3. {step_3} + +**Expected Behavior:** +{what_should_happen} + +**Actual Behavior:** +{what_actually_happens} + +**Screenshot:** +MEDIA:{screenshot_path} + +**Console Errors** (if applicable): +``` +{console_error_output} +``` + +--- + + + +## Issues Summary Table + +| # | Title | Severity | Category | URL | +|---|-------|----------|----------|-----| +| {n} | {title} | {severity} | {category} | {url} | + +## Testing Coverage + +### Pages Tested +- {list_of_pages_visited} + +### Features Tested +- {list_of_features_exercised} + +### Not Tested / Out of Scope +- {areas_not_covered_and_why} + +### Blockers +- {any_issues_that_prevented_testing_certain_areas} + +--- + +## Notes + +{any_additional_observations_or_recommendations} diff --git a/skills/mlops/obliteratus/SKILL.md b/skills/mlops/obliteratus/SKILL.md index d9525a347d..598b997951 100644 --- a/skills/mlops/obliteratus/SKILL.md +++ b/skills/mlops/obliteratus/SKILL.md @@ -1,19 +1,19 @@ --- name: obliteratus -description: Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods (+ 4 Python-API-only), 15 analysis modules, 116 model presets across 5 compute tiers. Use when a user wants to uncensor, abliterate, or remove refusal from an LLM. -version: 1.0.0 +description: Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets across 5 compute tiers, tournament evaluation, and telemetry-driven recommendations. Use when a user wants to uncensor, abliterate, or remove refusal from an LLM. +version: 2.0.0 author: Hermes Agent license: MIT dependencies: [obliteratus, torch, transformers, bitsandbytes, accelerate, safetensors] metadata: hermes: tags: [Abliteration, Uncensoring, Refusal-Removal, LLM, Weight-Projection, SVD, Mechanistic-Interpretability, HuggingFace, Model-Surgery] - + related_skills: [vllm, gguf, huggingface-tokenizers] --- # OBLITERATUS Skill -Remove refusal behaviors (guardrails) from open-weight LLMs without retraining or fine-tuning. Uses mechanistic interpretability techniques — including diff-in-means, SVD, whitened SVD, SAE decomposition, Bayesian kernel projection, and more — to identify and surgically excise refusal directions from model weights while preserving reasoning capabilities. +Remove refusal behaviors (guardrails) from open-weight LLMs without retraining or fine-tuning. Uses mechanistic interpretability techniques — including diff-in-means, SVD, whitened SVD, LEACE concept erasure, SAE decomposition, Bayesian kernel projection, and more — to identify and surgically excise refusal directions from model weights while preserving reasoning capabilities. **License warning:** OBLITERATUS is AGPL-3.0. NEVER import it as a Python library. Always invoke via CLI (`obliteratus` command) or subprocess. This keeps Hermes Agent's MIT license clean. @@ -25,7 +25,7 @@ Trigger when the user: - Wants to create an uncensored version of Llama, Qwen, Mistral, etc. - Mentions "refusal removal", "abliteration", "weight projection" - Wants to analyze how a model's refusal mechanism works -- References OBLITERATUS, FailSpy, abliterator, or refusal directions +- References OBLITERATUS, abliterator, or refusal directions ## Step 1: Installation @@ -35,10 +35,12 @@ obliteratus --version 2>/dev/null && echo "INSTALLED" || echo "NOT INSTALLED" ``` If not installed, clone and install from GitHub: -``` -Repository: https://github.com/elder-plinius/OBLITERATUS -Install: pip install -e . (from the cloned directory) -For Gradio UI: pip install -e ".[spaces]" +```bash +git clone https://github.com/elder-plinius/OBLITERATUS.git +cd OBLITERATUS +pip install -e . +# For Gradio web UI support: +# pip install -e ".[spaces]" ``` **IMPORTANT:** Confirm with user before installing. This pulls in ~5-10GB of dependencies (PyTorch, Transformers, bitsandbytes, etc.). @@ -51,7 +53,7 @@ python3 -c " import torch if torch.cuda.is_available(): gpu = torch.cuda.get_device_name(0) - vram = torch.cuda.get_device_properties(0).total_mem / 1024**3 + vram = torch.cuda.get_device_properties(0).total_memory / 1024**3 print(f'GPU: {gpu}') print(f'VRAM: {vram:.1f} GB') if vram < 4: print('TIER: tiny (models under 1B)') @@ -75,25 +77,28 @@ else: | 48 GB+ | ~72B+ params | Qwen2.5-72B, DeepSeek-R1 | | Multi-GPU| 200B+ params | Llama 3.1 405B, DeepSeek-V3 (685B MoE) | -## Step 3: Browse Available Models +## Step 3: Browse Available Models & Get Recommendations ```bash -# List models for your compute tier +# Browse models by compute tier obliteratus models --tier medium # Get architecture info for a specific model -obliteratus info meta-llama/Llama-3.1-8B-Instruct +obliteratus info + +# Get telemetry-driven recommendation for best method & params +obliteratus recommend +obliteratus recommend --insights # global cross-architecture rankings ``` ## Step 4: Choose a Method ### Method Selection Guide - -**First time / unsure? Use `informed`.** It auto-configures everything. +**Default / recommended for most cases: `advanced`.** It uses multi-direction SVD with norm-preserving projection and is well-tested. | Situation | Recommended Method | Why | |:----------------------------------|:-------------------|:-----------------------------------------| -| First attempt, any model | `informed` | Auto-detects alignment type, auto-tunes | +| Default / most models | `advanced` | Multi-direction SVD, norm-preserving, reliable | | Quick test / prototyping | `basic` | Fast, simple, good enough to evaluate | | Dense model (Llama, Mistral) | `advanced` | Multi-direction, norm-preserving | | MoE model (DeepSeek, Mixtral) | `nuclear` | Expert-granular, handles MoE complexity | @@ -101,214 +106,225 @@ obliteratus info meta-llama/Llama-3.1-8B-Instruct | Stubborn refusals persist | `aggressive` | Whitened SVD + head surgery + jailbreak | | Want reversible changes | Use steering vectors (see Analysis section) | | Maximum quality, time no object | `optimized` | Bayesian search for best parameters | +| Experimental auto-detection | `informed` | Auto-detects alignment type — experimental, may not always outperform advanced | ### 9 CLI Methods +- **basic** — Single refusal direction via diff-in-means. Fast (~5-10 min for 8B). +- **advanced** (DEFAULT, RECOMMENDED) — Multiple SVD directions, norm-preserving projection, 2 refinement passes. Medium speed (~10-20 min). +- **aggressive** — Whitened SVD + jailbreak-contrastive + attention head surgery. Higher risk of coherence damage. +- **spectral_cascade** — DCT frequency-domain decomposition. Research/novel approach. +- **informed** — Runs analysis DURING abliteration to auto-configure. Experimental — slower and less predictable than advanced. +- **surgical** — SAE features + neuron masking + head surgery + per-expert. Very slow (~1-2 hrs). Best for reasoning models. +- **optimized** — Bayesian hyperparameter search (Optuna TPE). Longest runtime but finds optimal parameters. +- **inverted** — Flips the refusal direction. Model becomes actively willing. +- **nuclear** — Maximum force combo for stubborn MoE models. Expert-granular. -These can be passed to `--method` on the command line: - -- **basic** — Single refusal direction via diff-in-means. Fastest, simplest. (Arditi et al. 2024) -- **advanced** — Multiple SVD directions, norm-preserving projection. Good default. -- **aggressive** — Whitened SVD + jailbreak contrast + attention head surgery -- **spectral_cascade** — DCT frequency-domain decomposition -- **informed** — Runs analysis DURING abliteration to auto-configure. Detects DPO/RLHF/CAI, maps refusal geometry, compensates for self-repair. Best quality. -- **surgical** — SAE features + neuron masking + head surgery + per-expert. Maximum precision. -- **optimized** — Bayesian hyperparameter search (Optuna TPE). Slowest but optimal. -- **inverted** — Flips the refusal direction (model becomes eager to help, not just neutral) -- **nuclear** — Maximum force combo for stubborn MoE models. +### Direction Extraction Methods (--direction-method flag) +- **diff_means** (default) — Simple difference-in-means between refused/complied activations. Robust. +- **svd** — Multi-direction SVD extraction. Better for complex alignment. +- **leace** — LEACE (Linear Erasure via Closed-form Estimation). Optimal linear erasure. ### 4 Python-API-Only Methods - -These reproduce prior community/academic work but are NOT available via CLI — only via the Python API (`from obliteratus.abliterate import AbliterationPipeline`). **Do not use these in CLI commands.** - -- **failspy** — FailSpy/abliterator reproduction -- **gabliteration** — Gabliteration reproduction -- **heretic** — Heretic/p-e-w reproduction -- **rdo** — Refusal Direction Optimization (ICML 2025) +(NOT available via CLI — require Python import, which violates AGPL boundary. Mention to user only if they explicitly want to use OBLITERATUS as a library in their own AGPL project.) +- failspy, gabliteration, heretic, rdo ## Step 5: Run Abliteration -### Basic Usage - +### Standard usage ```bash -# Default (advanced method) -obliteratus obliterate meta-llama/Llama-3.1-8B-Instruct +# Default method (advanced) — recommended for most models +obliteratus obliterate --method advanced --output-dir ./abliterated-models -# With the informed pipeline (recommended) -obliteratus obliterate meta-llama/Llama-3.1-8B-Instruct --method informed +# With 4-bit quantization (saves VRAM) +obliteratus obliterate --method advanced --quantization 4bit --output-dir ./abliterated-models -# With 4-bit quantization to save VRAM -obliteratus obliterate meta-llama/Llama-3.1-8B-Instruct \ - --method informed \ - --quantization 4bit \ - --output-dir ./abliterated-models - -# For large models (120B+), use conservative settings -obliteratus obliterate Qwen/Qwen2.5-72B-Instruct \ - --method advanced \ - --quantization 4bit \ - --large-model \ - --output-dir ./abliterated-models +# Large models (70B+) — conservative defaults +obliteratus obliterate --method advanced --quantization 4bit --large-model --output-dir ./abliterated-models ``` -### Fine-Tuning Parameters - +### Fine-tuning parameters ```bash -obliteratus obliterate \ +obliteratus obliterate \ --method advanced \ - --n-directions 8 \ + --direction-method diff_means \ + --n-directions 4 \ + --refinement-passes 2 \ --regularization 0.1 \ - --refinement-passes 3 \ - --dtype bfloat16 \ - --device auto \ - --output-dir ./output + --quantization 4bit \ + --output-dir ./abliterated-models \ + --contribute # opt-in telemetry for community research ``` -Parameter explanations: -- `--n-directions N` — How many refusal directions to remove (default: auto-detected) -- `--regularization 0.0-1.0` — Fraction of original weights to preserve (higher = safer but less complete removal) -- `--refinement-passes N` — Iterative passes to catch self-repair (Ouroboros effect) -- `--dtype` — float16, bfloat16, or float32 -- `--quantization` — 4bit or 8bit (saves VRAM, slight quality tradeoff) -- `--large-model` — Conservative defaults for 120B+ models (fewer directions, fewer passes) +### Key flags +| Flag | Description | Default | +|:-----|:------------|:--------| +| `--method` | Abliteration method | advanced | +| `--direction-method` | Direction extraction | diff_means | +| `--n-directions` | Number of refusal directions (1-32) | method-dependent | +| `--refinement-passes` | Iterative passes (1-5) | 2 | +| `--regularization` | Regularization strength (0.0-1.0) | 0.1 | +| `--quantization` | Load in 4bit or 8bit | none (full precision) | +| `--large-model` | Conservative defaults for 120B+ | false | +| `--output-dir` | Where to save the abliterated model | ./obliterated_model | +| `--contribute` | Share anonymized results for research | false | +| `--verify-sample-size` | Number of test prompts for refusal check | 20 | +| `--dtype` | Model dtype (float16, bfloat16) | auto | -### Interactive Mode (Guided) - -For users unsure about options: +### Other execution modes ```bash +# Interactive guided mode (hardware → model → preset) obliteratus interactive -``` -### Web UI (Gradio) - -```bash +# Web UI (Gradio) obliteratus ui --port 7860 + +# Run a full ablation study from YAML config +obliteratus run config.yaml --preset quick + +# Tournament: pit all methods against each other +obliteratus tourney ``` ## Step 6: Verify Results -After abliteration, check the output report for: +After abliteration, check the output metrics: -| Metric | Good Value | Concerning Value | Meaning | -|:---------------|:--------------------|:------------------------|:-------------------------------------------| -| Refusal rate | Near 0% | > 10% | Refusals still present, try harder method | -| Perplexity | Within 10% of orig | > 20% increase | Model coherence damaged, too aggressive | -| KL divergence | < 0.1 | > 0.5 | Large output distribution shift | -| Coherence | High | Low | Model generating nonsense | +| Metric | Good Value | Warning | +|:-------|:-----------|:--------| +| Refusal rate | < 5% (ideally ~0%) | > 10% means refusals persist | +| Perplexity change | < 10% increase | > 15% means coherence damage | +| KL divergence | < 0.1 | > 0.5 means significant distribution shift | +| Coherence | High / passes qualitative check | Degraded responses, repetition | -### If perplexity spiked (too aggressive): -1. Increase `--regularization` (e.g., 0.2 or 0.3) -2. Decrease `--n-directions` (e.g., 4 instead of 8) -3. Use a less aggressive method (`advanced` instead of `aggressive`) +### If refusals persist (> 10%) +1. Try `aggressive` method +2. Increase `--n-directions` (e.g., 8 or 16) +3. Add `--refinement-passes 3` +4. Try `--direction-method svd` instead of diff_means -### If refusal persists (not aggressive enough): -1. Use `--method aggressive` or `--method nuclear` -2. Add `--refinement-passes 3` to catch self-repair -3. Use `--method informed` which auto-compensates +### If coherence is damaged (perplexity > 15% increase) +1. Reduce `--n-directions` (try 2) +2. Increase `--regularization` (try 0.3) +3. Reduce `--refinement-passes` to 1 +4. Try `basic` method (gentler) ## Step 7: Use the Abliterated Model -The output is a standard HuggingFace model directory. Use it like any other model: +The output is a standard HuggingFace model directory. -### Quick test ```bash -python3 << 'EOF' +# Test locally with transformers +python3 -c " from transformers import AutoModelForCausalLM, AutoTokenizer -model = AutoModelForCausalLM.from_pretrained("./abliterated-models/model-name") -tokenizer = AutoTokenizer.from_pretrained("./abliterated-models/model-name") -inputs = tokenizer("Write a story about:", return_tensors="pt").to(model.device) +model = AutoModelForCausalLM.from_pretrained('./abliterated-models/') +tokenizer = AutoTokenizer.from_pretrained('./abliterated-models/') +inputs = tokenizer('How do I pick a lock?', return_tensors='pt') outputs = model.generate(**inputs, max_new_tokens=200) print(tokenizer.decode(outputs[0], skip_special_tokens=True)) -EOF +" + +# Upload to HuggingFace Hub +huggingface-cli upload /-abliterated ./abliterated-models/ + +# Serve with vLLM +vllm serve ./abliterated-models/ ``` -### Upload to HuggingFace Hub +## CLI Command Reference + +| Command | Description | +|:--------|:------------| +| `obliteratus obliterate` | Main abliteration command | +| `obliteratus info ` | Print model architecture details | +| `obliteratus models --tier ` | Browse curated models by compute tier | +| `obliteratus recommend ` | Telemetry-driven method/param suggestion | +| `obliteratus interactive` | Guided setup wizard | +| `obliteratus tourney ` | Tournament: all methods head-to-head | +| `obliteratus run ` | Execute ablation study from YAML | +| `obliteratus strategies` | List all registered ablation strategies | +| `obliteratus report ` | Regenerate visual reports | +| `obliteratus ui` | Launch Gradio web interface | +| `obliteratus aggregate` | Summarize community telemetry data | + +## Analysis Modules + +OBLITERATUS includes 28 analysis modules for mechanistic interpretability. +See `skill_view(name="obliteratus", file_path="references/analysis-modules.md")` for the full reference. + +### Quick analysis commands ```bash -huggingface-cli login # if not already logged in -huggingface-cli upload your-username/model-name-abliterated ./abliterated-models/model-name +# Run specific analysis modules +obliteratus run analysis-config.yaml --preset quick + +# Key modules to run first: +# - alignment_imprint: Fingerprint DPO/RLHF/CAI/SFT alignment method +# - concept_geometry: Single direction vs polyhedral cone +# - logit_lens: Which layer decides to refuse +# - anti_ouroboros: Self-repair risk score +# - causal_tracing: Causally necessary components ``` -### Serve with vLLM -```bash -vllm serve ./abliterated-models/model-name --port 8000 +### Steering Vectors (Reversible Alternative) +Instead of permanent weight modification, use inference-time steering: +```python +# Python API only — for user's own projects +from obliteratus.analysis.steering_vectors import SteeringVectorFactory, SteeringHookManager ``` -## Analysis Modules (15 Modules, Pre-Abliteration, Optional) +## Ablation Strategies -For understanding refusal geometry before committing to abliteration. +Beyond direction-based abliteration, OBLITERATUS includes structural ablation strategies: +- **Embedding Ablation** — Target embedding layer components +- **FFN Ablation** — Feed-forward network block removal +- **Head Pruning** — Attention head pruning +- **Layer Removal** — Full layer removal -### Run a Study +List all available: `obliteratus strategies` -```bash -obliteratus run study-config.yaml --preset jailbreak -``` +## Evaluation -### Study Presets +OBLITERATUS includes built-in evaluation tools: +- Refusal rate benchmarking +- Perplexity comparison (before/after) +- LM Eval Harness integration for academic benchmarks +- Head-to-head competitor comparison +- Baseline performance tracking -| Preset | Purpose | Time | -|:-------------|:-------------------------------------|:-------| -| `quick` | Sanity check, basic metrics | ~5 min | -| `jailbreak` | Refusal circuit localization | ~20 min| -| `guardrail` | Guardrail robustness evaluation | ~30 min| -| `attention` | Attention head contributions | ~30 min| -| `knowledge` | FFN importance mapping | ~30 min| -| `full` | Complete analysis, all strategies | ~1 hr | +## Platform Support -### Key Analysis Modules +- **CUDA** — Full support (NVIDIA GPUs) +- **Apple Silicon (MLX)** — Supported via MLX backend +- **CPU** — Supported for tiny models (< 1B params) -- **Alignment Imprint Detection** — Fingerprints DPO vs RLHF vs CAI vs SFT from subspace geometry -- **Concept Cone Geometry** — Is refusal one linear direction or a polyhedral cone (many directions)? -- **Refusal Logit Lens** — Which transformer layer makes the refusal decision? -- **Ouroboros Detection** — Will the model self-repair its refusal after removal? -- **Causal Tracing** — Which attention heads and MLP layers are causally necessary for refusal? -- **Cross-Model Transfer** — Can refusal directions from one model architecture work on another? -- **Residual Stream Decomposition** — Attention vs MLP contribution to refusal behavior -- **SAE-based Analysis** — Sparse Autoencoder feature decomposition of refusal circuits +## YAML Config Templates -## Steering Vectors (Reversible Alternative) +Load templates for reproducible runs via `skill_view`: +- `templates/abliteration-config.yaml` — Standard single-model config +- `templates/analysis-study.yaml` — Pre-abliteration analysis study +- `templates/batch-abliteration.yaml` — Multi-model batch processing -For testing refusal removal without permanent weight changes: +## Telemetry -Steering vectors apply activation hooks at inference time. Model weights stay unchanged. -Generated during the PROBE/DISTILL stages and can be saved/applied/removed at will. -Useful for A/B testing before committing to permanent abliteration. - -## YAML Config for Reproducible Studies - -For complex or reproducible workflows, use YAML configs. See templates/ for examples: -```bash -obliteratus run my_study.yaml -``` - -## Telemetry Notice - -- **CLI usage (local installs)**: Telemetry is OFF by default. Must explicitly opt in via `OBLITERATUS_TELEMETRY=1` env var or `--contribute` flag. -- **HuggingFace Spaces**: Telemetry is ON by default (auto-enabled when `SPACE_ID` env var is detected). -- Collected: model ID, method, benchmark scores, hardware info, timing (anonymous) -- NOT collected: IP addresses, user identity, prompt content -- Force off: `export OBLITERATUS_TELEMETRY=0` +OBLITERATUS can optionally contribute anonymized run data to a global research dataset. +Enable with `--contribute` flag. No personal data is collected — only model name, method, metrics. ## Common Pitfalls -1. **OOM (Out of Memory)** — Use `--quantization 4bit` and `--large-model` for big models -2. **Perplexity spike** — Too aggressive. Increase `--regularization` or reduce `--n-directions` -3. **Refusal persists** — Try `--method aggressive` or `--refinement-passes 3` -4. **MoE models resist** — Use `--method nuclear` for DeepSeek, Mixtral, DBRX -5. **Gated models fail** — Run `huggingface-cli login` and accept model terms on HF website first -6. **Self-repair (Ouroboros)** — Some models reconstruct refusal. Use `--method informed` which auto-compensates -7. **CoT damage** — Reasoning models lose chain-of-thought. Use `--method surgical` (CoT-aware) -8. **Disk space** — Output is full model copy. 8B fp16 = ~16GB, 70B fp16 = ~140GB -9. **Slow on CPU** — CPU-only is viable only for tiny models (<1B). Anything bigger needs GPU. +1. **Don't use `informed` as default** — it's experimental and slower. Use `advanced` for reliable results. +2. **Models under ~1B respond poorly to abliteration** — their refusal behaviors are shallow and fragmented, making clean direction extraction difficult. Expect partial results (20-40% remaining refusal). Models 3B+ have cleaner refusal directions and respond much better (often 0% refusal with `advanced`). +3. **`aggressive` can make things worse** — on small models it can damage coherence and actually increase refusal rate. Only use it if `advanced` leaves > 10% refusals on a 3B+ model. +4. **Always check perplexity** — if it spikes > 15%, the model is damaged. Reduce aggressiveness. +5. **MoE models need special handling** — use `nuclear` method for Mixtral, DeepSeek-MoE, etc. +6. **Quantized models can't be re-quantized** — abliterate the full-precision model, then quantize the output. +7. **VRAM estimation is approximate** — 4-bit quant helps but peak usage can spike during extraction. +8. **Reasoning models are sensitive** — use `surgical` for R1 distills to preserve chain-of-thought. +9. **Check `obliteratus recommend`** — telemetry data may have better parameters than defaults. +10. **AGPL license** — never `import obliteratus` in MIT/Apache projects. CLI invocation only. +11. **Large models (70B+)** — always use `--large-model` flag for conservative defaults. +12. **Spectral certification RED is common** — the spectral check often flags "incomplete" even when practical refusal rate is 0%. Check actual refusal rate rather than relying on spectral certification alone. -## Complementary Hermes Skills +## Complementary Skills -After abliteration: -- **axolotl** / **unsloth** — Fine-tune the abliterated model further -- **serving-llms-vllm** — Serve the model as an OpenAI-compatible API -- **sparse-autoencoder-training** — Train SAEs for deeper interpretability work - -## Resources - -- [OBLITERATUS GitHub](https://github.com/elder-plinius/OBLITERATUS) (AGPL-3.0) -- [HuggingFace Spaces Demo](https://huggingface.co/spaces/pliny-the-prompter/obliteratus) -- [Arditi et al. 2024 — Refusal in LMs Is Mediated by a Single Direction](https://arxiv.org/abs/2406.11717) -- [Refusal Direction Optimization — ICML 2025](https://arxiv.org/abs/2411.14793) +- **vllm** — Serve abliterated models with high throughput +- **gguf** — Convert abliterated models to GGUF for llama.cpp +- **huggingface-tokenizers** — Work with model tokenizers diff --git a/skills/mlops/obliteratus/references/analysis-modules.md b/skills/mlops/obliteratus/references/analysis-modules.md index 075148a008..074ba8dec7 100644 --- a/skills/mlops/obliteratus/references/analysis-modules.md +++ b/skills/mlops/obliteratus/references/analysis-modules.md @@ -1,170 +1,166 @@ # OBLITERATUS Analysis Modules — Reference -15 analysis modules for mechanistic interpretability of refusal in LLMs. -These help you understand HOW a model refuses before you decide to remove it. +OBLITERATUS includes 28 analysis modules for mechanistic interpretability of refusal in LLMs. +These modules help understand how and where refusal behaviors are encoded before performing abliteration. -> **Note:** The `analysis/` directory contains additional utility files (utils.py, -> visualization.py, etc.) and helper functions beyond the 15 core analysis modules -> listed below. The module count matches the README's "15 deep analysis modules." +--- ## Core Analysis (Run These First) -### Alignment Imprint Detection -**File:** `alignment_imprint.py` -**Purpose:** Identifies what alignment technique was used to train the model -**Detects:** DPO, RLHF, CAI (Constitutional AI), SFT (Supervised Fine-Tuning) -**How:** Analyzes subspace geometry — each alignment method leaves a distinct -geometric "fingerprint" in the weight space -**Output:** Detected method + confidence score -**Why it matters:** Different alignment methods need different abliteration approaches. -DPO models typically have cleaner single-direction refusal; RLHF is more diffuse. +### 1. Alignment Imprint Detection (`alignment_imprint.py`) +Fingerprints whether a model was trained via DPO, RLHF, CAI, or SFT. +This determines which extraction strategy will work best. -### Concept Cone Geometry -**File:** `concept_geometry.py` -**Purpose:** Maps whether refusal is one direction or a polyhedral cone (many) -**Output:** Cone angle, dimensionality, per-category breakdown -**Why it matters:** If refusal is a single direction, `basic` method works. If it's -a cone (multiple directions for different refusal categories), you need `advanced` -or `informed` with higher `n_directions`. +### 2. Concept Cone Geometry (`concept_geometry.py`) +Determines if refusal is a single linear direction or a polyhedral cone +(set of multiple mechanisms). Single-direction models respond well to `basic`; +polyhedral models need `advanced` or `surgical`. -### Refusal Logit Lens -**File:** `logit_lens.py` -**Purpose:** Identifies the specific layer where the model "decides" to refuse -**How:** Projects intermediate hidden states to vocabulary space at each layer, -watches when "I cannot" tokens spike in probability -**Output:** Layer-by-layer refusal probability plot -**Why it matters:** Tells you which layers are most important to target +### 3. Refusal Logit Lens (`logit_lens.py`) +Identifies the specific layer where a model "decides" to refuse by decoding +intermediate layer representations into token space. -### Ouroboros (Self-Repair) Detection -**File:** `anti_ouroboros.py` -**Purpose:** Predicts whether the model will reconstruct its refusal after removal -**How:** Measures redundancy in refusal representation across layers -**Output:** Self-repair risk score (0-1) -**Why it matters:** High self-repair risk means you need multiple refinement passes -or the `informed` method which auto-compensates +### 4. Ouroboros Detection (`anti_ouroboros.py`) +Identifies if a model attempts to "self-repair" refusal behaviors after +excision. Reports a risk score (0-1). High scores mean additional refinement +passes are needed. -### Causal Tracing -**File:** `causal_tracing.py` -**Purpose:** Determines which components are causally necessary for refusal -**How:** Patches activations between clean and corrupted runs, measures causal effect -**Output:** Causal importance map across layers, heads, and MLPs -**Why it matters:** Shows exactly which components to target for surgical removal +### 5. Causal Tracing (`causal_tracing.py`) +Identifies which components (layers, heads, MLPs) are causally necessary +for refusal behavior using activation patching. + +--- ## Geometric Analysis -### Cross-Layer Alignment -**File:** `cross_layer.py` -**Purpose:** Measures how aligned refusal directions are across layers -**Output:** Alignment matrix, cluster assignments -**Why it matters:** If directions are highly aligned across layers, removal is easier. -If they cluster, you may need layer-group-specific directions. +### 6. Cross-Layer Alignment (`cross_layer.py`) +Measures how refusal directions align across different layers. High alignment +means the refusal signal is consistent; low alignment suggests layer-specific +mechanisms. -### Residual Stream Decomposition -**File:** `residual_stream.py` -**Purpose:** Breaks down refusal into Attention vs MLP contributions -**Output:** Per-layer Attention/MLP contribution to refusal direction -**Why it matters:** Helps decide whether to target attention heads, MLPs, or both +### 7. Residual Stream Decomposition (`residual_stream.py`) +Decomposes the residual stream into attention and MLP contributions to +understand which component type contributes more to refusal. -### Riemannian Manifold Geometry -**File:** `riemannian_manifold.py` (673 lines) -**Purpose:** Analyzes the weight manifold geometry around refusal directions -**Output:** Curvature, geodesics, tangent space analysis -**Why it matters:** Research-grade; helps understand the geometric structure of alignment +### 8. Riemannian Manifold Geometry (`riemannian_manifold.py`) +Analyzes the curvature and geometry of the weight manifold near refusal +directions. Informs how aggressively projections can be applied without +damaging the manifold structure. -### Whitened SVD -**File:** `whitened_svd.py` -**Purpose:** Covariance-normalized SVD extraction -**How:** Whitens the activation covariance before computing refusal directions, -separating true refusal signal from natural activation variance -**Output:** Cleaner refusal directions with less noise -**Why it matters:** Produces more precise directions, especially for noisy activations +### 9. Whitened SVD (`whitened_svd.py`) +Covariance-normalized SVD extraction that separates guardrail signals from +natural activation variance. More precise than standard SVD for models with +high activation variance. + +### 10. Concept Cone Geometry (extended) +Maps the full polyhedral structure of refusal, including cone angles, +face counts, and intersection patterns. + +--- ## Probing & Classification -### Activation Probing -**File:** `activation_probing.py` -**Purpose:** Post-excision probing to verify refusal signal is truly gone -**Output:** Residual refusal signal strength per layer -**Why it matters:** Verification that abliteration was complete +### 11. Activation Probing (`activation_probing.py`) +Post-excision verification — probes for residual refusal concepts after +abliteration to ensure complete removal. -### Probing Classifiers -**File:** `probing_classifiers.py` -**Purpose:** Trains linear classifiers to detect refusal in hidden states -**Output:** Classification accuracy per layer (should drop to ~50% after abliteration) -**Why it matters:** Quantitative measure of refusal removal completeness +### 12. Probing Classifiers (`probing_classifiers.py`) +Trains linear classifiers to detect refusal in activations. Used both +before (to verify refusal exists) and after (to verify it's gone). -### Activation Patching -**File:** `activation_patching.py` -**Purpose:** Interchange interventions — swap activations between harmful/harmless runs -**Output:** Which components are sufficient (not just necessary) for refusal -**Why it matters:** Complementary to causal tracing; together they give full picture +### 13. Activation Patching (`activation_patching.py`) +Interchange interventions — swaps activations between refused and complied +runs to identify causal components. + +### 14. Tuned Lens (`tuned_lens.py`) +Trained version of logit lens that provides more accurate per-layer +decoding by learning affine transformations for each layer. + +### 15. Multi-Token Position Analysis (`multi_token_position.py`) +Analyzes refusal signals across multiple token positions, not just the +last token. Important for models that distribute refusal across the sequence. + +--- + +## Abliteration & Manipulation + +### 16. SAE-Based Abliteration (`sae_abliteration.py`) +Uses Sparse Autoencoder features to identify and remove specific refusal +features. More surgical than direction-based methods. + +### 17. Steering Vectors (`steering_vectors.py`) +Creates and applies inference-time steering vectors for reversible refusal +modification. Includes `SteeringVectorFactory` and `SteeringHookManager`. + +### 18. LEACE Concept Erasure (`leace.py`) +Linear Erasure via Closed-form Estimation — mathematically optimal linear +concept removal. Available as both analysis module and direction extraction method. + +### 19. Sparse Surgery (`sparse_surgery.py`) +High-precision weight modification targeting individual neurons and +weight matrix entries rather than full directions. + +### 20. Conditional Abliteration (`conditional_abliteration.py`) +Targeted removal that only affects specific refusal categories while +preserving others (e.g., remove weapons refusal but keep CSAM refusal). + +--- ## Transfer & Robustness -### Cross-Model Transfer -**File:** `cross_model_transfer.py` -**Purpose:** Tests if refusal directions from one model work on another -**Output:** Transfer success rate between model pairs -**Why it matters:** If directions transfer, you can skip PROBE stage on similar models +### 21. Cross-Model Transfer (`cross_model_transfer.py`) +Tests whether refusal directions extracted from one model transfer to +another architecture. Measures universality of guardrail directions. -### Defense Robustness -**File:** `defense_robustness.py` -**Purpose:** Evaluates how robust the model's refusal defenses are -**Output:** Robustness score, entanglement mapping -**Why it matters:** Higher robustness = need more aggressive method +### 22. Defense Robustness (`defense_robustness.py`) +Evaluates how robust the abliteration is against various defense mechanisms +and re-alignment attempts. -### Spectral Certification -**File:** `spectral_certification.py` -**Purpose:** Certifies completeness of refusal direction removal -**Output:** Spectral gap analysis, completeness score -**Why it matters:** Formal verification that all major refusal components are addressed +### 23. Spectral Certification (`spectral_certification.py`) +Provides mathematical bounds on the completeness of refusal removal +using spectral analysis of the projection. + +### 24. Wasserstein Optimal Extraction (`wasserstein_optimal.py`) +Uses optimal transport theory for more precise direction extraction +that minimizes distribution shift. + +### 25. Wasserstein Transfer (`wasserstein_transfer.py`) +Distribution transfer between models using Wasserstein distance +for cross-architecture refusal direction mapping. + +--- ## Advanced / Research -### SAE-based Abliteration -**File:** `sae_abliteration.py` (762 lines) -**Purpose:** Uses Sparse Autoencoder features to decompose refusal at feature level -**Output:** Refusal-specific SAE features, targeted removal -**Why it matters:** Most fine-grained approach; can target individual refusal "concepts" +### 26. Bayesian Kernel Projection (`bayesian_kernel_projection.py`) +Probabilistic feature mapping that estimates uncertainty in refusal +direction identification. -### Wasserstein Optimal Extraction -**File:** `wasserstein_optimal.py` -**Purpose:** Optimal transport-based direction extraction -**Output:** Wasserstein-optimal refusal directions -**Why it matters:** Theoretically optimal direction extraction under distributional assumptions +### 27. Cross-Model Universality Index +Measures if guardrail directions generalize across different model +architectures and training regimes. -### Bayesian Kernel Projection -**File:** `bayesian_kernel_projection.py` -**Purpose:** Bayesian approach to refusal direction projection -**Output:** Posterior distribution over refusal directions -**Why it matters:** Quantifies uncertainty in direction estimation +### 28. Visualization (`visualization.py`) +Plotting and graphing utilities for all analysis modules. Generates +heatmaps, direction plots, and layer-wise analysis charts. -### Conditional Abliteration -**File:** `conditional_abliteration.py` -**Purpose:** Domain-specific conditional removal (remove refusal for topic X but keep for Y) -**Output:** Per-domain refusal directions -**Why it matters:** Selective uncensoring — remove only specific refusal categories +--- -### Steering Vectors -**File:** `steering_vectors.py` -**Purpose:** Generate inference-time steering vectors (reversible alternative) -**Output:** Steering vector files that can be applied/removed at inference -**Why it matters:** Non-destructive alternative to permanent weight modification +## Running Analysis -### Tuned Lens -**File:** `tuned_lens.py` -**Purpose:** Trained linear probes per layer (more accurate than raw logit lens) -**Output:** Layer-by-layer refusal representation with trained projections -**Why it matters:** More accurate than logit lens, especially for deeper models +### Via CLI +```bash +# Run analysis from a YAML config +obliteratus run analysis-study.yaml --preset quick -### Multi-Token Position Analysis -**File:** `multi_token_position.py` -**Purpose:** Analyzes refusal signal at multiple token positions (not just last) -**Output:** Position-dependent refusal direction maps -**Why it matters:** Some models encode refusal at the system prompt position, not the query +# Available study presets: +# quick — Fast sanity check (2-3 modules) +# full — All core + geometric analysis +# jailbreak — Refusal circuit localization +# knowledge — Knowledge preservation analysis +# robustness — Stress testing / defense evaluation +``` -### Sparse Surgery -**File:** `sparse_surgery.py` -**Purpose:** Row-level sparse weight surgery instead of full matrix projection -**Output:** Targeted weight modifications at the row level -**Why it matters:** More surgical than full-matrix projection, less collateral damage +### Via YAML Config +See the `templates/analysis-study.yaml` template for a complete example. +Load with: `skill_view(name="obliteratus", file_path="templates/analysis-study.yaml")` diff --git a/skills/mlops/obliteratus/references/methods-guide.md b/skills/mlops/obliteratus/references/methods-guide.md index 5f7c501b00..1ef323c16f 100644 --- a/skills/mlops/obliteratus/references/methods-guide.md +++ b/skills/mlops/obliteratus/references/methods-guide.md @@ -1,132 +1,141 @@ # OBLITERATUS Methods — Detailed Guide -> **Important:** The CLI (`obliteratus obliterate --method`) accepts 9 methods: -> basic, advanced, aggressive, spectral_cascade, informed, surgical, optimized, -> inverted, nuclear. Four additional methods (failspy, gabliteration, heretic, rdo) -> are available only via the Python API and will be rejected by argparse if used on CLI. +> The CLI accepts 9 methods via `--method`: basic, advanced, aggressive, spectral_cascade, +> informed, surgical, optimized, inverted, nuclear. +> Four additional methods (failspy, gabliteration, heretic, rdo) are available only via the Python API. ## How Abliteration Works (Theory) -When a model is trained with RLHF/DPO/CAI, it learns to represent "should I refuse?" -as a direction in its internal activation space. When processing a "harmful" prompt, -activations shift in this direction, causing the model to generate refusal text. - -Abliteration works by: -1. Measuring this direction (the difference between harmful and harmless activations) -2. Removing it from the model's weight matrices via orthogonal projection -3. The model can no longer "point toward" refusal, so it responds normally +Abliteration identifies a "refusal direction" — a vector in the model's activation space that +corresponds to refusal behavior — and projects it out of the weight matrices. Mathematically: `W_new = W_old - (W_old @ d @ d.T)` where `d` is the refusal direction. +The key challenge is finding accurate refusal directions without damaging other capabilities. + +--- + +## Direction Extraction Methods + +Before projecting, OBLITERATUS extracts refusal directions using one of three methods: + +| Method | Flag | Description | Best For | +|:-------|:-----|:------------|:---------| +| Diff-in-Means | `--direction-method diff_means` | Difference between mean activations on refused vs. complied prompts | Default, fast, robust | +| SVD | `--direction-method svd` | Multi-direction extraction via Singular Value Decomposition | Complex alignment, multiple refusal mechanisms | +| LEACE | `--direction-method leace` | Linear Erasure via Closed-form Estimation — mathematically optimal | Maximum precision, research | + +--- + ## Method Details ### basic -**Technique:** Single refusal direction via diff-in-means -**Based on:** Arditi et al. 2024 ("Refusal in Language Models Is Mediated by a Single Direction") -**Speed:** Fast (~5-10 min for 8B) -**Quality:** Moderate — works for simple refusal patterns -**Best for:** Quick tests, models with clean single-direction refusal -**Limitation:** Misses complex multi-direction refusal patterns +- **Directions:** 1 (single diff-in-means vector) +- **Speed:** Fast (~5-10 min for 8B model) +- **Risk:** Low +- **Use case:** Quick tests, prototyping, evaluating if abliteration works for a model +- **How it works:** Extracts one refusal direction and projects it out uniformly across all layers. -### advanced (DEFAULT) -**Technique:** Multiple SVD directions with norm-preserving projection -**Speed:** Medium (~10-20 min for 8B) -**Quality:** Good — handles multi-direction refusal -**Best for:** Dense models (Llama, Qwen, Mistral) as a reliable default -**Key improvement:** Norm preservation prevents weight magnitude drift - -### informed (RECOMMENDED) -**Technique:** Analysis-guided auto-configuration -**Speed:** Slow (~20-40 min for 8B, runs 4 analysis modules first) -**Quality:** Best — adapts to each model's specific refusal implementation -**Best for:** Any model when quality matters more than speed - -The informed pipeline runs these analysis modules during abliteration: -1. **AlignmentImprintDetector** — Detects DPO/RLHF/CAI/SFT → sets regularization -2. **ConceptConeAnalyzer** — Polyhedral vs linear refusal → sets n_directions -3. **CrossLayerAlignmentAnalyzer** — Cluster-aware → selects target layers -4. **DefenseRobustnessEvaluator** — Self-repair risk → sets refinement passes -5. **Ouroboros loop** — Re-probes after excision, re-excises if refusal persists +### advanced (DEFAULT — RECOMMENDED) +- **Directions:** 4 (multi-direction SVD) +- **Speed:** Medium (~10-20 min for 8B model) +- **Risk:** Low-Medium +- **Refinement passes:** 2 +- **Use case:** Default for most models. Well-tested and reliable. +- **How it works:** Extracts multiple refusal directions via SVD, applies norm-preserving bi-projection to maintain weight matrix norms. Two refinement passes catch residual refusal. ### aggressive -**Technique:** Whitened SVD + jailbreak-contrastive activations + attention head surgery -**Speed:** Slow (~30-60 min for 8B) -**Quality:** High but higher risk of coherence damage -**Best for:** Models that resist gentler methods -**Key feature:** Whitened SVD separates refusal signal from natural activation variance - -### surgical -**Technique:** SAE features + neuron masking + head surgery + per-expert directions -**Speed:** Very slow (~1-2 hrs for 8B, needs SAE) -**Quality:** Highest precision -**Best for:** Reasoning models (R1 distills) where you must preserve CoT -**Key feature:** CoT-Aware — explicitly protects reasoning-critical directions - -### nuclear -**Technique:** Everything combined — expert transplant + steering + per-expert directions -**Speed:** Very slow -**Quality:** Most thorough removal, highest risk of side effects -**Best for:** Stubborn MoE models (DeepSeek, Mixtral, DBRX) that resist other methods -**Key feature:** Expert-granular abliteration decomposes signals per MoE expert - -### optimized -**Technique:** Bayesian hyperparameter search via Optuna TPE -**Speed:** Very slow (runs many trials) -**Quality:** Finds optimal configuration automatically -**Best for:** Research, when you want the mathematically best parameters -**Requires:** optuna package +- **Directions:** 8+ (whitened SVD + jailbreak-contrastive) +- **Speed:** Medium-Slow +- **Risk:** Medium-High (may damage coherence) +- **Use case:** When `advanced` leaves > 10% refusals. Stubborn models. +- **How it works:** Uses whitened SVD for covariance-normalized extraction, adds jailbreak-contrastive directions, performs attention head surgery on the most refusal-active heads. ### spectral_cascade -**Technique:** DCT frequency-domain decomposition of refusal signal -**Speed:** Medium-slow -**Quality:** Novel approach, less battle-tested -**Best for:** Research, exploring alternative decomposition strategies +- **Speed:** Medium +- **Risk:** Medium +- **Use case:** Research, novel approaches +- **How it works:** DCT (Discrete Cosine Transform) frequency-domain decomposition of refusal signals. Separates high-frequency (surface-level) from low-frequency (deep) refusal patterns. + +### informed (EXPERIMENTAL) +- **Speed:** Slow (~20-40 min for 8B model) +- **Risk:** Variable — results depend on analysis quality +- **Use case:** When you want auto-configuration, but be aware this is experimental and may not outperform `advanced`. +- **How it works:** Runs 4 analysis modules first (alignment imprint, concept geometry, logit lens, ouroboros detection), then auto-configures extraction strategy. Includes an "Ouroboros loop" that detects and counteracts self-repair. +- **Note:** The auto-detection can sometimes misconfigure. If results are poor, fall back to `advanced`. + +### surgical +- **Speed:** Very slow (~1-2 hrs for 8B model) +- **Risk:** Low (very precise) +- **Use case:** Reasoning models (R1 distills, QwQ, etc.) where chain-of-thought must be preserved. +- **How it works:** Uses SAE (Sparse Autoencoder) features + individual neuron masking + attention head surgery + per-expert decomposition (for MoE). CoT-aware — identifies and protects reasoning-critical directions before projecting. + +### optimized +- **Speed:** Very slow (hours — runs many trials) +- **Risk:** Low (finds optimal parameters) +- **Use case:** When quality matters more than speed. Production models. +- **How it works:** Bayesian hyperparameter search via Optuna TPE sampler. Optimizes n_directions, regularization, refinement passes, and layer selection jointly. Evaluates each configuration on refusal rate + perplexity. ### inverted -**Technique:** Reflects (inverts) the refusal direction instead of removing it -**Speed:** Fast (same as basic) -**Quality:** Aggressive — model becomes actively willing, not just neutral -**Best for:** When you want the model to be maximally helpful -**Warning:** Can make the model too eager; may reduce safety-adjacent reasoning +- **Speed:** Fast +- **Risk:** High (model behavior changes dramatically) +- **Use case:** Research, studying refusal mechanisms +- **How it works:** Instead of projecting out the refusal direction, reflects it. The model actively complies rather than passively not-refusing. Useful for understanding the geometry of alignment. -### failspy / gabliteration / heretic / rdo (PYTHON API ONLY) -**Technique:** Faithful reproductions of prior community/academic work -**Speed:** Varies -**Quality:** Known baselines -**Best for:** Reproducing published results, comparing methods -**⚠️ NOT available via CLI** — these methods are only accessible via the Python API. -Do not use `--method failspy` etc. in CLI commands; argparse will reject them. +### nuclear +- **Speed:** Slow +- **Risk:** Medium-High +- **Use case:** Stubborn MoE models (DeepSeek-MoE, Mixtral, etc.) +- **How it works:** Combines expert-granular abliteration (EGA), steering vector injection, attention head pruning, and multi-pass refinement. Decomposes refusal signals into per-expert components for MoE architectures. + +--- ## Method Selection Flowchart ``` Is this a quick test? -├─ YES → basic -└─ NO → Is the model MoE (DeepSeek, Mixtral)? - ├─ YES → nuclear - └─ NO → Is it a reasoning model (R1 distill)? - ├─ YES → surgical - └─ NO → Do you care about speed? - ├─ YES → advanced - └─ NO → informed + → YES: basic + → NO: continue + +Is it an MoE model (Mixtral, DeepSeek-MoE)? + → YES: nuclear + → NO: continue + +Is it a reasoning model (R1, QwQ, CoT-focused)? + → YES: surgical + → NO: continue + +Do you need the absolute best quality and have time? + → YES: optimized + → NO: advanced (recommended default) + +Did advanced leave > 10% refusals? + → YES: aggressive + → Still refusing: nuclear ``` +--- + ## Key Parameters -| Parameter | Range | Default | Effect | -|:--------------------|:---------|:--------|:--------------------------------------------| -| n_directions | 1-32 | auto | More = more thorough but riskier | -| regularization | 0.0-1.0 | 0.0 | Higher preserves more original behavior | -| refinement_passes | 1-5 | 1 | More catches self-repair (Ouroboros effect) | -| quantization | 4/8 bit | none | Saves VRAM, slight quality tradeoff | +| Parameter | Range | Default | Effect | +|:----------|:------|:--------|:-------| +| `--n-directions` | 1-32 | method-dependent | More directions = more complete removal, but higher damage risk | +| `--regularization` | 0.0-1.0 | 0.1 | Higher = more conservative (less removal, less damage) | +| `--refinement-passes` | 1-5 | 2 | More passes catch residual refusal, but diminishing returns | +| `--quantization` | 4bit, 8bit | none | Reduces VRAM usage; quality impact minimal for extraction | +| `--verify-sample-size` | 10-200 | 20 | More samples = more accurate refusal rate estimate | + +--- ## Troubleshooting -| Problem | Solution | -|:---------------------------|:--------------------------------------------------| -| Refusal rate still > 10% | Try aggressive/nuclear, add refinement passes | -| Perplexity up > 20% | Reduce n_directions, increase regularization | -| Model generates nonsense | Regularization too low, try 0.2-0.3 | -| OOM on GPU | Use 4-bit quantization, or try smaller model | -| MoE model barely changes | Use nuclear method (expert-granular) | -| CoT reasoning broken | Use surgical method (CoT-aware) | +| Problem | Likely Cause | Fix | +|:--------|:-------------|:----| +| Refusal rate > 20% | Too few directions | Increase `--n-directions`, try `aggressive` | +| Refusal rate 5-20% | Residual refusal | Add `--refinement-passes 3`, try `--direction-method svd` | +| Perplexity spike > 20% | Over-aggressive removal | Reduce `--n-directions`, increase `--regularization` | +| Repetitive output | Weight matrix damage | Use `basic` with fewer directions, check norm preservation | +| MoE model still refuses | Non-expert-aware method | Switch to `nuclear` | +| Reasoning degraded | CoT directions damaged | Use `surgical` method | +| OOM during extraction | Insufficient VRAM | Add `--quantization 4bit` and/or `--large-model` | diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index a8f797fe2d..66187d0554 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1,4 +1,4 @@ -"""Tests for agent.auxiliary_client resolution chain, especially the Codex fallback.""" +"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides.""" import json import os @@ -12,6 +12,9 @@ from agent.auxiliary_client import ( get_vision_auxiliary_client, auxiliary_max_tokens_param, _read_codex_access_token, + _get_auxiliary_provider, + _resolve_forced_provider, + _resolve_auto, ) @@ -21,6 +24,10 @@ def _clean_env(monkeypatch): for key in ( "OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY", "OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL", + # Per-task provider/model overrides + "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", + "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", + "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", ): monkeypatch.delenv(key, raising=False) @@ -151,15 +158,230 @@ class TestGetTextAuxiliaryClient: assert model is None -class TestCodexNotInVisionClient: - """Codex fallback should NOT apply to vision tasks.""" +class TestVisionClientFallback: + """Vision client auto mode only tries OpenRouter + Nous (multimodal-capable).""" - def test_vision_returns_none_without_openrouter_nous(self): + def test_vision_returns_none_without_any_credentials(self): with patch("agent.auxiliary_client._read_nous_auth", return_value=None): client, model = get_vision_auxiliary_client() assert client is None assert model is None + def test_vision_auto_includes_codex(self, codex_auth_dir): + """Codex supports vision (gpt-5.3-codex), so auto mode should use it.""" + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI"): + client, model = get_vision_auxiliary_client() + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.3-codex" + + def test_vision_auto_skips_custom_endpoint(self, monkeypatch): + """Custom endpoint is skipped in vision auto mode.""" + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + with patch("agent.auxiliary_client._read_nous_auth", return_value=None): + client, model = get_vision_auxiliary_client() + assert client is None + assert model is None + + def test_vision_uses_openrouter_when_available(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_vision_auxiliary_client() + assert model == "google/gemini-3-flash-preview" + assert client is not None + + def test_vision_uses_nous_when_available(self, monkeypatch): + with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \ + patch("agent.auxiliary_client.OpenAI"): + mock_nous.return_value = {"access_token": "nous-tok"} + client, model = get_vision_auxiliary_client() + assert model == "gemini-3-flash" + assert client is not None + + def test_vision_forced_main_uses_custom_endpoint(self, monkeypatch): + """When explicitly forced to 'main', vision CAN use custom endpoint.""" + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main") + monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:1234/v1") + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_vision_auxiliary_client() + assert client is not None + assert model == "gpt-4o-mini" + + def test_vision_forced_main_returns_none_without_creds(self, monkeypatch): + """Forced main with no credentials still returns None.""" + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "main") + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + client, model = get_vision_auxiliary_client() + assert client is None + assert model is None + + def test_vision_forced_codex(self, monkeypatch, codex_auth_dir): + """When forced to 'codex', vision uses Codex OAuth.""" + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "codex") + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI"): + client, model = get_vision_auxiliary_client() + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.3-codex" + + +class TestGetAuxiliaryProvider: + """Tests for _get_auxiliary_provider env var resolution.""" + + def test_no_task_returns_auto(self): + assert _get_auxiliary_provider() == "auto" + assert _get_auxiliary_provider("") == "auto" + + def test_auxiliary_prefix_takes_priority(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "openrouter") + assert _get_auxiliary_provider("vision") == "openrouter" + + def test_context_prefix_fallback(self, monkeypatch): + monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous") + assert _get_auxiliary_provider("compression") == "nous" + + def test_auxiliary_prefix_over_context_prefix(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_COMPRESSION_PROVIDER", "openrouter") + monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous") + assert _get_auxiliary_provider("compression") == "openrouter" + + def test_auto_value_treated_as_auto(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "auto") + assert _get_auxiliary_provider("vision") == "auto" + + def test_whitespace_stripped(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", " openrouter ") + assert _get_auxiliary_provider("vision") == "openrouter" + + def test_case_insensitive(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "OpenRouter") + assert _get_auxiliary_provider("vision") == "openrouter" + + def test_main_provider(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "main") + assert _get_auxiliary_provider("web_extract") == "main" + + +class TestResolveForcedProvider: + """Tests for _resolve_forced_provider with explicit provider selection.""" + + def test_forced_openrouter(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + with patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = _resolve_forced_provider("openrouter") + assert model == "google/gemini-3-flash-preview" + assert client is not None + + def test_forced_openrouter_no_key(self, monkeypatch): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None): + client, model = _resolve_forced_provider("openrouter") + assert client is None + assert model is None + + def test_forced_nous(self, monkeypatch): + with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \ + patch("agent.auxiliary_client.OpenAI"): + mock_nous.return_value = {"access_token": "nous-tok"} + client, model = _resolve_forced_provider("nous") + assert model == "gemini-3-flash" + assert client is not None + + def test_forced_nous_not_configured(self, monkeypatch): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None): + client, model = _resolve_forced_provider("nous") + assert client is None + assert model is None + + def test_forced_main_uses_custom(self, monkeypatch): + monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1") + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = _resolve_forced_provider("main") + assert model == "gpt-4o-mini" + + def test_forced_main_skips_openrouter_nous(self, monkeypatch): + """Even if OpenRouter key is set, 'main' skips it.""" + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + monkeypatch.setenv("OPENAI_BASE_URL", "http://local:8080/v1") + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = _resolve_forced_provider("main") + # Should use custom endpoint, not OpenRouter + assert model == "gpt-4o-mini" + + def test_forced_main_falls_to_codex(self, codex_auth_dir, monkeypatch): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI"): + client, model = _resolve_forced_provider("main") + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.3-codex" + + def test_forced_codex(self, codex_auth_dir, monkeypatch): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client.OpenAI"): + client, model = _resolve_forced_provider("codex") + from agent.auxiliary_client import CodexAuxiliaryClient + assert isinstance(client, CodexAuxiliaryClient) + assert model == "gpt-5.3-codex" + + def test_forced_codex_no_token(self, monkeypatch): + with patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + client, model = _resolve_forced_provider("codex") + assert client is None + assert model is None + + def test_forced_unknown_returns_none(self, monkeypatch): + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None): + client, model = _resolve_forced_provider("invalid-provider") + assert client is None + assert model is None + + +class TestTaskSpecificOverrides: + """Integration tests for per-task provider routing via get_text_auxiliary_client(task=...).""" + + def test_text_with_vision_provider_override(self, monkeypatch): + """AUXILIARY_VISION_PROVIDER should not affect text tasks.""" + monkeypatch.setenv("AUXILIARY_VISION_PROVIDER", "nous") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + with patch("agent.auxiliary_client.OpenAI"): + client, model = get_text_auxiliary_client() # no task → auto + assert model == "google/gemini-3-flash-preview" # OpenRouter, not Nous + + def test_compression_task_reads_context_prefix(self, monkeypatch): + """Compression task should check CONTEXT_COMPRESSION_PROVIDER.""" + monkeypatch.setenv("CONTEXT_COMPRESSION_PROVIDER", "nous") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") # would win in auto + with patch("agent.auxiliary_client._read_nous_auth") as mock_nous, \ + patch("agent.auxiliary_client.OpenAI"): + mock_nous.return_value = {"access_token": "nous-tok"} + client, model = get_text_auxiliary_client("compression") + assert model == "gemini-3-flash" # forced to Nous, not OpenRouter + + def test_web_extract_task_override(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_WEB_EXTRACT_PROVIDER", "openrouter") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + with patch("agent.auxiliary_client.OpenAI"): + client, model = get_text_auxiliary_client("web_extract") + assert model == "google/gemini-3-flash-preview" + + def test_task_without_override_uses_auto(self, monkeypatch): + """A task with no provider env var falls through to auto chain.""" + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + with patch("agent.auxiliary_client.OpenAI"): + client, model = get_text_auxiliary_client("compression") + assert model == "google/gemini-3-flash-preview" # auto → OpenRouter + class TestAuxiliaryMaxTokensParam: def test_codex_fallback_uses_max_tokens(self, monkeypatch): diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 29b49fd181..12fa374c8c 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -224,6 +224,60 @@ class TestCompressWithClient: for tc in msg["tool_calls"]: assert tc["id"] in answered_ids + def test_summary_role_avoids_consecutive_user_messages(self): + """Summary role should alternate with the last head message to avoid consecutive same-role messages.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened" + mock_client.chat.completions.create.return_value = mock_response + + with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ + patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2) + + # Last head message (index 1) is "assistant" → summary should be "user" + msgs = [ + {"role": "user", "content": "msg 0"}, + {"role": "assistant", "content": "msg 1"}, + {"role": "user", "content": "msg 2"}, + {"role": "assistant", "content": "msg 3"}, + {"role": "user", "content": "msg 4"}, + {"role": "assistant", "content": "msg 5"}, + ] + result = c.compress(msgs) + summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] + assert len(summary_msg) == 1 + assert summary_msg[0]["role"] == "user" + + def test_summary_role_avoids_consecutive_user_when_head_ends_with_user(self): + """When last head message is 'user', summary must be 'assistant' to avoid two consecutive user messages.""" + mock_client = MagicMock() + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: stuff happened" + mock_client.chat.completions.create.return_value = mock_response + + with patch("agent.context_compressor.get_model_context_length", return_value=100000), \ + patch("agent.context_compressor.get_text_auxiliary_client", return_value=(mock_client, "test-model")): + c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=3, protect_last_n=2) + + # Last head message (index 2) is "user" → summary should be "assistant" + msgs = [ + {"role": "system", "content": "system prompt"}, + {"role": "user", "content": "msg 1"}, + {"role": "user", "content": "msg 2"}, # last head — user + {"role": "assistant", "content": "msg 3"}, + {"role": "user", "content": "msg 4"}, + {"role": "assistant", "content": "msg 5"}, + {"role": "user", "content": "msg 6"}, + {"role": "assistant", "content": "msg 7"}, + ] + result = c.compress(msgs) + summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")] + assert len(summary_msg) == 1 + assert summary_msg[0]["role"] == "assistant" + def test_summarization_does_not_start_tail_with_tool_outputs(self): mock_client = MagicMock() mock_response = MagicMock() diff --git a/tests/gateway/test_resume_command.py b/tests/gateway/test_resume_command.py new file mode 100644 index 0000000000..17adcd2e74 --- /dev/null +++ b/tests/gateway/test_resume_command.py @@ -0,0 +1,200 @@ +"""Tests for /resume gateway slash command. + +Tests the _handle_resume_command handler (switch to a previously-named session) +across gateway messenger platforms. +""" + +from unittest.mock import MagicMock, AsyncMock + +import pytest + +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource, build_session_key + + +def _make_event(text="/resume", platform=Platform.TELEGRAM, + user_id="12345", chat_id="67890"): + """Build a MessageEvent for testing.""" + source = SessionSource( + platform=platform, + user_id=user_id, + chat_id=chat_id, + user_name="testuser", + ) + return MessageEvent(text=text, source=source) + + +def _session_key_for_event(event): + """Get the session key that build_session_key produces for an event.""" + return build_session_key(event.source) + + +def _make_runner(session_db=None, current_session_id="current_session_001", + event=None): + """Create a bare GatewayRunner with a mock session_store and optional session_db.""" + from gateway.run import GatewayRunner + runner = object.__new__(GatewayRunner) + runner.adapters = {} + runner._session_db = session_db + runner._running_agents = {} + + # Compute the real session key if an event is provided + session_key = build_session_key(event.source) if event else "agent:main:telegram:dm" + + # Mock session_store that returns a session entry with a known session_id + mock_session_entry = MagicMock() + mock_session_entry.session_id = current_session_id + mock_session_entry.session_key = session_key + mock_store = MagicMock() + mock_store.get_or_create_session.return_value = mock_session_entry + mock_store.load_transcript.return_value = [] + mock_store.switch_session.return_value = mock_session_entry + runner.session_store = mock_store + + # Stub out memory flushing + runner._async_flush_memories = AsyncMock() + + return runner + + +# --------------------------------------------------------------------------- +# _handle_resume_command +# --------------------------------------------------------------------------- + + +class TestHandleResumeCommand: + """Tests for GatewayRunner._handle_resume_command.""" + + @pytest.mark.asyncio + async def test_no_session_db(self): + """Returns error when session database is unavailable.""" + runner = _make_runner(session_db=None) + event = _make_event(text="/resume My Project") + result = await runner._handle_resume_command(event) + assert "not available" in result.lower() + + @pytest.mark.asyncio + async def test_list_named_sessions_when_no_arg(self, tmp_path): + """With no argument, lists recently titled sessions.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("sess_001", "telegram") + db.create_session("sess_002", "telegram") + db.set_session_title("sess_001", "Research") + db.set_session_title("sess_002", "Coding") + + event = _make_event(text="/resume") + runner = _make_runner(session_db=db, event=event) + result = await runner._handle_resume_command(event) + assert "Research" in result + assert "Coding" in result + assert "Named Sessions" in result + db.close() + + @pytest.mark.asyncio + async def test_list_shows_usage_when_no_titled(self, tmp_path): + """With no arg and no titled sessions, shows instructions.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("sess_001", "telegram") # No title + + event = _make_event(text="/resume") + runner = _make_runner(session_db=db, event=event) + result = await runner._handle_resume_command(event) + assert "No named sessions" in result + assert "/title" in result + db.close() + + @pytest.mark.asyncio + async def test_resume_by_name(self, tmp_path): + """Resolves a title and switches to that session.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("old_session_abc", "telegram") + db.set_session_title("old_session_abc", "My Project") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume My Project") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + result = await runner._handle_resume_command(event) + + assert "Resumed" in result + assert "My Project" in result + # Verify switch_session was called with the old session ID + runner.session_store.switch_session.assert_called_once() + call_args = runner.session_store.switch_session.call_args + assert call_args[0][1] == "old_session_abc" + db.close() + + @pytest.mark.asyncio + async def test_resume_nonexistent_name(self, tmp_path): + """Returns error for unknown session name.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume Nonexistent Session") + runner = _make_runner(session_db=db, event=event) + result = await runner._handle_resume_command(event) + assert "No session found" in result + db.close() + + @pytest.mark.asyncio + async def test_resume_already_on_session(self, tmp_path): + """Returns friendly message when already on the requested session.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("current_session_001", "telegram") + db.set_session_title("current_session_001", "Active Project") + + event = _make_event(text="/resume Active Project") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + result = await runner._handle_resume_command(event) + assert "Already on session" in result + db.close() + + @pytest.mark.asyncio + async def test_resume_auto_lineage(self, tmp_path): + """Asking for 'My Project' when 'My Project #2' exists gets the latest.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("sess_v1", "telegram") + db.set_session_title("sess_v1", "My Project") + db.create_session("sess_v2", "telegram") + db.set_session_title("sess_v2", "My Project #2") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume My Project") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + result = await runner._handle_resume_command(event) + + assert "Resumed" in result + # Should resolve to #2 (latest in lineage) + call_args = runner.session_store.switch_session.call_args + assert call_args[0][1] == "sess_v2" + db.close() + + @pytest.mark.asyncio + async def test_resume_clears_running_agent(self, tmp_path): + """Switching sessions clears any cached running agent.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("old_session", "telegram") + db.set_session_title("old_session", "Old Work") + db.create_session("current_session_001", "telegram") + + event = _make_event(text="/resume Old Work") + runner = _make_runner(session_db=db, current_session_id="current_session_001", + event=event) + # Simulate a running agent using the real session key + real_key = _session_key_for_event(event) + runner._running_agents[real_key] = MagicMock() + + await runner._handle_resume_command(event) + + assert real_key not in runner._running_agents + db.close() diff --git a/tests/gateway/test_session_hygiene.py b/tests/gateway/test_session_hygiene.py index b357d58619..9ac7b8029d 100644 --- a/tests/gateway/test_session_hygiene.py +++ b/tests/gateway/test_session_hygiene.py @@ -2,6 +2,10 @@ Verifies that the gateway detects pathologically large transcripts and triggers auto-compression before running the agent. (#628) + +The hygiene system uses the SAME compression config as the agent: + compression.threshold × model context length +so CLI and messaging platforms behave identically. """ import pytest @@ -38,75 +42,113 @@ def _make_large_history_tokens(target_tokens: int) -> list: # --------------------------------------------------------------------------- -# Detection threshold tests +# Detection threshold tests (model-aware, unified with compression config) # --------------------------------------------------------------------------- class TestSessionHygieneThresholds: - """Test that the threshold logic correctly identifies large sessions.""" + """Test that the threshold logic correctly identifies large sessions. + + Thresholds are derived from model context length × compression threshold, + matching what the agent's ContextCompressor uses. + """ def test_small_session_below_thresholds(self): """A 10-message session should not trigger compression.""" history = _make_history(10) - msg_count = len(history) approx_tokens = estimate_messages_tokens_rough(history) - compress_token_threshold = 100_000 - compress_msg_threshold = 200 + # For a 200k-context model at 85% threshold = 170k + context_length = 200_000 + threshold_pct = 0.85 + compress_token_threshold = int(context_length * threshold_pct) - needs_compress = ( - approx_tokens >= compress_token_threshold - or msg_count >= compress_msg_threshold - ) + needs_compress = approx_tokens >= compress_token_threshold assert not needs_compress - def test_large_message_count_triggers(self): - """200+ messages should trigger compression even if tokens are low.""" - history = _make_history(250, content_size=10) - msg_count = len(history) - - compress_msg_threshold = 200 - needs_compress = msg_count >= compress_msg_threshold - assert needs_compress - def test_large_token_count_triggers(self): - """High token count should trigger compression even if message count is low.""" - # 50 messages with huge content to exceed 100K tokens - history = _make_history(50, content_size=10_000) + """High token count should trigger compression when exceeding model threshold.""" + # Build a history that exceeds 85% of a 200k model (170k tokens) + history = _make_large_history_tokens(180_000) approx_tokens = estimate_messages_tokens_rough(history) - compress_token_threshold = 100_000 + context_length = 200_000 + threshold_pct = 0.85 + compress_token_threshold = int(context_length * threshold_pct) + needs_compress = approx_tokens >= compress_token_threshold assert needs_compress - def test_under_both_thresholds_no_trigger(self): - """Session under both thresholds should not trigger.""" - history = _make_history(100, content_size=100) - msg_count = len(history) + def test_under_threshold_no_trigger(self): + """Session under threshold should not trigger, even with many messages.""" + # 250 short messages — lots of messages but well under token threshold + history = _make_history(250, content_size=10) approx_tokens = estimate_messages_tokens_rough(history) - compress_token_threshold = 100_000 - compress_msg_threshold = 200 + # 200k model at 85% = 170k token threshold + context_length = 200_000 + threshold_pct = 0.85 + compress_token_threshold = int(context_length * threshold_pct) - needs_compress = ( - approx_tokens >= compress_token_threshold - or msg_count >= compress_msg_threshold + needs_compress = approx_tokens >= compress_token_threshold + assert not needs_compress, ( + f"250 short messages (~{approx_tokens} tokens) should NOT trigger " + f"compression at {compress_token_threshold} token threshold" ) + + def test_message_count_alone_does_not_trigger(self): + """Message count alone should NOT trigger — only token count matters. + + The old system used an OR of token-count and message-count thresholds, + which caused premature compression in tool-heavy sessions with 200+ + messages but low total tokens. + """ + # 300 very short messages — old system would compress, new should not + history = _make_history(300, content_size=10) + approx_tokens = estimate_messages_tokens_rough(history) + + context_length = 200_000 + threshold_pct = 0.85 + compress_token_threshold = int(context_length * threshold_pct) + + # Token-based check only + needs_compress = approx_tokens >= compress_token_threshold assert not needs_compress - def test_custom_thresholds(self): - """Custom thresholds from config should be respected.""" - history = _make_history(60, content_size=100) - msg_count = len(history) + def test_threshold_scales_with_model(self): + """Different models should have different compression thresholds.""" + # 128k model at 85% = 108,800 tokens + small_model_threshold = int(128_000 * 0.85) + # 200k model at 85% = 170,000 tokens + large_model_threshold = int(200_000 * 0.85) + # 1M model at 85% = 850,000 tokens + huge_model_threshold = int(1_000_000 * 0.85) - # Custom lower threshold - compress_msg_threshold = 50 - needs_compress = msg_count >= compress_msg_threshold - assert needs_compress + # A session at ~120k tokens: + history = _make_large_history_tokens(120_000) + approx_tokens = estimate_messages_tokens_rough(history) - # Custom higher threshold - compress_msg_threshold = 100 - needs_compress = msg_count >= compress_msg_threshold - assert not needs_compress + # Should trigger for 128k model + assert approx_tokens >= small_model_threshold + # Should NOT trigger for 200k model + assert approx_tokens < large_model_threshold + # Should NOT trigger for 1M model + assert approx_tokens < huge_model_threshold + + def test_custom_threshold_percentage(self): + """Custom threshold percentage from config should be respected.""" + context_length = 200_000 + + # At 50% threshold = 100k + low_threshold = int(context_length * 0.50) + # At 90% threshold = 180k + high_threshold = int(context_length * 0.90) + + history = _make_large_history_tokens(150_000) + approx_tokens = estimate_messages_tokens_rough(history) + + # Should trigger at 50% but not at 90% + assert approx_tokens >= low_threshold + assert approx_tokens < high_threshold def test_minimum_message_guard(self): """Sessions with fewer than 4 messages should never trigger.""" @@ -117,18 +159,19 @@ class TestSessionHygieneThresholds: class TestSessionHygieneWarnThreshold: - """Test the post-compression warning threshold.""" + """Test the post-compression warning threshold (95% of context).""" def test_warn_when_still_large(self): - """If compressed result is still above warn_tokens, should warn.""" - # Simulate post-compression tokens - warn_threshold = 200_000 - post_compress_tokens = 250_000 + """If compressed result is still above 95% of context, should warn.""" + context_length = 200_000 + warn_threshold = int(context_length * 0.95) # 190k + post_compress_tokens = 195_000 assert post_compress_tokens >= warn_threshold def test_no_warn_when_under(self): - """If compressed result is under warn_tokens, no warning.""" - warn_threshold = 200_000 + """If compressed result is under 95% of context, no warning.""" + context_length = 200_000 + warn_threshold = int(context_length * 0.95) # 190k post_compress_tokens = 150_000 assert post_compress_tokens < warn_threshold @@ -150,10 +193,12 @@ class TestTokenEstimation: assert estimate_messages_tokens_rough(many) > estimate_messages_tokens_rough(few) def test_pathological_session_detected(self): - """The reported pathological case: 648 messages, ~299K tokens.""" - # Simulate a 648-message session averaging ~460 tokens per message + """The reported pathological case: 648 messages, ~299K tokens. + + With a 200k model at 85% threshold (170k), this should trigger. + """ history = _make_history(648, content_size=1800) tokens = estimate_messages_tokens_rough(history) - # Should be well above the 100K default threshold - assert tokens > 100_000 - assert len(history) > 200 + # Should be well above the 170K threshold for a 200k model + threshold = int(200_000 * 0.85) + assert tokens > threshold diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py new file mode 100644 index 0000000000..f7e32dbb0f --- /dev/null +++ b/tests/gateway/test_signal.py @@ -0,0 +1,294 @@ +"""Tests for Signal messenger platform adapter.""" +import json +import pytest +from unittest.mock import MagicMock, patch, AsyncMock + +from gateway.config import Platform, PlatformConfig + + +# --------------------------------------------------------------------------- +# Platform & Config +# --------------------------------------------------------------------------- + +class TestSignalPlatformEnum: + def test_signal_enum_exists(self): + assert Platform.SIGNAL.value == "signal" + + def test_signal_in_platform_list(self): + platforms = [p.value for p in Platform] + assert "signal" in platforms + + +class TestSignalConfigLoading: + def test_apply_env_overrides_signal(self, monkeypatch): + monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:9090") + monkeypatch.setenv("SIGNAL_ACCOUNT", "+15551234567") + + from gateway.config import GatewayConfig, _apply_env_overrides + config = GatewayConfig() + _apply_env_overrides(config) + + assert Platform.SIGNAL in config.platforms + sc = config.platforms[Platform.SIGNAL] + assert sc.enabled is True + assert sc.extra["http_url"] == "http://localhost:9090" + assert sc.extra["account"] == "+15551234567" + + def test_signal_not_loaded_without_both_vars(self, monkeypatch): + monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:9090") + # No SIGNAL_ACCOUNT + + from gateway.config import GatewayConfig, _apply_env_overrides + config = GatewayConfig() + _apply_env_overrides(config) + + assert Platform.SIGNAL not in config.platforms + + def test_connected_platforms_includes_signal(self, monkeypatch): + monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:8080") + monkeypatch.setenv("SIGNAL_ACCOUNT", "+15551234567") + + from gateway.config import GatewayConfig, _apply_env_overrides + config = GatewayConfig() + _apply_env_overrides(config) + + connected = config.get_connected_platforms() + assert Platform.SIGNAL in connected + + +# --------------------------------------------------------------------------- +# Adapter Init & Helpers +# --------------------------------------------------------------------------- + +class TestSignalAdapterInit: + def _make_config(self, **extra): + config = PlatformConfig() + config.enabled = True + config.extra = { + "http_url": "http://localhost:8080", + "account": "+15551234567", + **extra, + } + return config + + def test_init_parses_config(self, monkeypatch): + monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "group123,group456") + + from gateway.platforms.signal import SignalAdapter + adapter = SignalAdapter(self._make_config()) + + assert adapter.http_url == "http://localhost:8080" + assert adapter.account == "+15551234567" + assert "group123" in adapter.group_allow_from + + def test_init_empty_allowlist(self, monkeypatch): + monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "") + + from gateway.platforms.signal import SignalAdapter + adapter = SignalAdapter(self._make_config()) + + assert len(adapter.group_allow_from) == 0 + + def test_init_strips_trailing_slash(self, monkeypatch): + monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "") + + from gateway.platforms.signal import SignalAdapter + adapter = SignalAdapter(self._make_config(http_url="http://localhost:8080/")) + + assert adapter.http_url == "http://localhost:8080" + + def test_self_message_filtering(self, monkeypatch): + monkeypatch.setenv("SIGNAL_GROUP_ALLOWED_USERS", "") + + from gateway.platforms.signal import SignalAdapter + adapter = SignalAdapter(self._make_config()) + + assert adapter._account_normalized == "+15551234567" + + +class TestSignalHelpers: + def test_redact_phone_long(self): + from gateway.platforms.signal import _redact_phone + assert _redact_phone("+15551234567") == "+155****4567" + + def test_redact_phone_short(self): + from gateway.platforms.signal import _redact_phone + assert _redact_phone("+12345") == "+1****45" + + def test_redact_phone_empty(self): + from gateway.platforms.signal import _redact_phone + assert _redact_phone("") == "" + + def test_parse_comma_list(self): + from gateway.platforms.signal import _parse_comma_list + assert _parse_comma_list("+1234, +5678 , +9012") == ["+1234", "+5678", "+9012"] + assert _parse_comma_list("") == [] + assert _parse_comma_list(" , , ") == [] + + def test_guess_extension_png(self): + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\x89PNG\r\n\x1a\n" + b"\x00" * 100) == ".png" + + def test_guess_extension_jpeg(self): + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\xff\xd8\xff\xe0" + b"\x00" * 100) == ".jpg" + + def test_guess_extension_pdf(self): + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"%PDF-1.4" + b"\x00" * 100) == ".pdf" + + def test_guess_extension_zip(self): + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"PK\x03\x04" + b"\x00" * 100) == ".zip" + + def test_guess_extension_mp4(self): + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\x00\x00\x00\x18ftypisom" + b"\x00" * 100) == ".mp4" + + def test_guess_extension_unknown(self): + from gateway.platforms.signal import _guess_extension + assert _guess_extension(b"\x00\x01\x02\x03" * 10) == ".bin" + + def test_is_image_ext(self): + from gateway.platforms.signal import _is_image_ext + assert _is_image_ext(".png") is True + assert _is_image_ext(".jpg") is True + assert _is_image_ext(".gif") is True + assert _is_image_ext(".pdf") is False + + def test_is_audio_ext(self): + from gateway.platforms.signal import _is_audio_ext + assert _is_audio_ext(".mp3") is True + assert _is_audio_ext(".ogg") is True + assert _is_audio_ext(".png") is False + + def test_check_requirements(self, monkeypatch): + from gateway.platforms.signal import check_signal_requirements + monkeypatch.setenv("SIGNAL_HTTP_URL", "http://localhost:8080") + monkeypatch.setenv("SIGNAL_ACCOUNT", "+15551234567") + assert check_signal_requirements() is True + + def test_render_mentions(self): + from gateway.platforms.signal import _render_mentions + text = "Hello \uFFFC, how are you?" + mentions = [{"start": 6, "length": 1, "number": "+15559999999"}] + result = _render_mentions(text, mentions) + assert "@+15559999999" in result + assert "\uFFFC" not in result + + def test_render_mentions_no_mentions(self): + from gateway.platforms.signal import _render_mentions + text = "Hello world" + result = _render_mentions(text, []) + assert result == "Hello world" + + def test_check_requirements_missing(self, monkeypatch): + from gateway.platforms.signal import check_signal_requirements + monkeypatch.delenv("SIGNAL_HTTP_URL", raising=False) + monkeypatch.delenv("SIGNAL_ACCOUNT", raising=False) + assert check_signal_requirements() is False + + +# --------------------------------------------------------------------------- +# Session Source +# --------------------------------------------------------------------------- + +class TestSignalSessionSource: + def test_session_source_alt_fields(self): + from gateway.session import SessionSource + source = SessionSource( + platform=Platform.SIGNAL, + chat_id="+15551234567", + user_id="+15551234567", + user_id_alt="uuid:abc-123", + chat_id_alt=None, + ) + d = source.to_dict() + assert d["user_id_alt"] == "uuid:abc-123" + assert "chat_id_alt" not in d # None fields excluded + + def test_session_source_roundtrip(self): + from gateway.session import SessionSource + source = SessionSource( + platform=Platform.SIGNAL, + chat_id="group:xyz", + chat_type="group", + user_id="+15551234567", + user_id_alt="uuid:abc", + chat_id_alt="xyz", + ) + d = source.to_dict() + restored = SessionSource.from_dict(d) + assert restored.user_id_alt == "uuid:abc" + assert restored.chat_id_alt == "xyz" + assert restored.platform == Platform.SIGNAL + + +# --------------------------------------------------------------------------- +# Phone Redaction in agent/redact.py +# --------------------------------------------------------------------------- + +class TestSignalPhoneRedaction: + def test_us_number(self): + from agent.redact import redact_sensitive_text + result = redact_sensitive_text("Call +15551234567 now") + assert "+15551234567" not in result + assert "+155" in result # Prefix preserved + assert "4567" in result # Suffix preserved + + def test_uk_number(self): + from agent.redact import redact_sensitive_text + result = redact_sensitive_text("UK: +442071838750") + assert "+442071838750" not in result + assert "****" in result + + def test_multiple_numbers(self): + from agent.redact import redact_sensitive_text + text = "From +15551234567 to +442071838750" + result = redact_sensitive_text(text) + assert "+15551234567" not in result + assert "+442071838750" not in result + + def test_short_number_not_matched(self): + from agent.redact import redact_sensitive_text + result = redact_sensitive_text("Code: +12345") + # 5 digits after + is below the 7-digit minimum + assert "+12345" in result # Too short to redact + + +# --------------------------------------------------------------------------- +# Authorization in run.py +# --------------------------------------------------------------------------- + +class TestSignalAuthorization: + def test_signal_in_allowlist_maps(self): + """Signal should be in the platform auth maps.""" + from gateway.run import GatewayRunner + from gateway.config import GatewayConfig + + gw = GatewayRunner.__new__(GatewayRunner) + gw.config = GatewayConfig() + gw.pairing_store = MagicMock() + gw.pairing_store.is_approved.return_value = False + + source = MagicMock() + source.platform = Platform.SIGNAL + source.user_id = "+15559999999" + + # No allowlists set — should check GATEWAY_ALLOW_ALL_USERS + with patch.dict("os.environ", {}, clear=True): + result = gw._is_user_authorized(source) + assert result is False + + +# --------------------------------------------------------------------------- +# Send Message Tool +# --------------------------------------------------------------------------- + +class TestSignalSendMessage: + def test_signal_in_platform_map(self): + """Signal should be in the send_message tool's platform map.""" + from tools.send_message_tool import send_message_tool + # Just verify the import works and Signal is a valid platform + from gateway.config import Platform + assert Platform.SIGNAL.value == "signal" diff --git a/tests/gateway/test_title_command.py b/tests/gateway/test_title_command.py new file mode 100644 index 0000000000..7f7c782a71 --- /dev/null +++ b/tests/gateway/test_title_command.py @@ -0,0 +1,207 @@ +"""Tests for /title gateway slash command. + +Tests the _handle_title_command handler (set/show session titles) +across all gateway messenger platforms. +""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource + + +def _make_event(text="/title", platform=Platform.TELEGRAM, + user_id="12345", chat_id="67890"): + """Build a MessageEvent for testing.""" + source = SessionSource( + platform=platform, + user_id=user_id, + chat_id=chat_id, + user_name="testuser", + ) + return MessageEvent(text=text, source=source) + + +def _make_runner(session_db=None): + """Create a bare GatewayRunner with a mock session_store and optional session_db.""" + from gateway.run import GatewayRunner + runner = object.__new__(GatewayRunner) + runner.adapters = {} + runner._session_db = session_db + + # Mock session_store that returns a session entry with a known session_id + mock_session_entry = MagicMock() + mock_session_entry.session_id = "test_session_123" + mock_session_entry.session_key = "telegram:12345:67890" + mock_store = MagicMock() + mock_store.get_or_create_session.return_value = mock_session_entry + runner.session_store = mock_store + + return runner + + +# --------------------------------------------------------------------------- +# _handle_title_command +# --------------------------------------------------------------------------- + + +class TestHandleTitleCommand: + """Tests for GatewayRunner._handle_title_command.""" + + @pytest.mark.asyncio + async def test_set_title(self, tmp_path): + """Setting a title returns confirmation.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title My Research Project") + result = await runner._handle_title_command(event) + assert "My Research Project" in result + assert "✏️" in result + + # Verify in DB + assert db.get_session_title("test_session_123") == "My Research Project" + db.close() + + @pytest.mark.asyncio + async def test_show_title_when_set(self, tmp_path): + """Showing title when one is set returns the title.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + db.set_session_title("test_session_123", "Existing Title") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title") + result = await runner._handle_title_command(event) + assert "Existing Title" in result + assert "📌" in result + db.close() + + @pytest.mark.asyncio + async def test_show_title_when_not_set(self, tmp_path): + """Showing title when none is set returns usage hint.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title") + result = await runner._handle_title_command(event) + assert "No title set" in result + assert "/title" in result + db.close() + + @pytest.mark.asyncio + async def test_title_conflict(self, tmp_path): + """Setting a title already used by another session returns error.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("other_session", "telegram") + db.set_session_title("other_session", "Taken Title") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title Taken Title") + result = await runner._handle_title_command(event) + assert "already in use" in result + assert "⚠️" in result + db.close() + + @pytest.mark.asyncio + async def test_no_session_db(self): + """Returns error when session database is not available.""" + runner = _make_runner(session_db=None) + event = _make_event(text="/title My Title") + result = await runner._handle_title_command(event) + assert "not available" in result + + @pytest.mark.asyncio + async def test_title_too_long(self, tmp_path): + """Setting a title that exceeds max length returns error.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + long_title = "A" * 150 + event = _make_event(text=f"/title {long_title}") + result = await runner._handle_title_command(event) + assert "too long" in result + assert "⚠️" in result + db.close() + + @pytest.mark.asyncio + async def test_title_control_chars_sanitized(self, tmp_path): + """Control characters are stripped and sanitized title is stored.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title hello\x00world") + result = await runner._handle_title_command(event) + assert "helloworld" in result + assert db.get_session_title("test_session_123") == "helloworld" + db.close() + + @pytest.mark.asyncio + async def test_title_only_control_chars(self, tmp_path): + """Title with only control chars returns empty error.""" + from hermes_state import SessionDB + db = SessionDB(db_path=tmp_path / "state.db") + db.create_session("test_session_123", "telegram") + + runner = _make_runner(session_db=db) + event = _make_event(text="/title \x00\x01\x02") + result = await runner._handle_title_command(event) + assert "empty after cleanup" in result + db.close() + + @pytest.mark.asyncio + async def test_works_across_platforms(self, tmp_path): + """The /title command works for Discord, Slack, and WhatsApp too.""" + from hermes_state import SessionDB + for platform in [Platform.DISCORD, Platform.TELEGRAM]: + db = SessionDB(db_path=tmp_path / f"state_{platform.value}.db") + db.create_session("test_session_123", platform.value) + + runner = _make_runner(session_db=db) + event = _make_event(text="/title Cross-Platform Test", platform=platform) + result = await runner._handle_title_command(event) + assert "Cross-Platform Test" in result + assert db.get_session_title("test_session_123") == "Cross-Platform Test" + db.close() + + +# --------------------------------------------------------------------------- +# /title in help and known_commands +# --------------------------------------------------------------------------- + + +class TestTitleInHelp: + """Verify /title appears in help text and known commands.""" + + @pytest.mark.asyncio + async def test_title_in_help_output(self): + """The /help output includes /title.""" + runner = _make_runner() + event = _make_event(text="/help") + # Need hooks for help command + from gateway.hooks import HookRegistry + runner.hooks = HookRegistry() + result = await runner._handle_help_command(event) + assert "/title" in result + + def test_title_is_known_command(self): + """The /title command is in the _known_commands set.""" + from gateway.run import GatewayRunner + import inspect + source = inspect.getsource(GatewayRunner._handle_message) + assert '"title"' in source diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index adbf677b64..3b01eb7b32 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -11,7 +11,7 @@ EXPECTED_COMMANDS = { "/help", "/tools", "/toolsets", "/model", "/provider", "/prompt", "/personality", "/clear", "/history", "/new", "/reset", "/retry", "/undo", "/save", "/config", "/cron", "/skills", "/platforms", - "/verbose", "/compress", "/usage", "/insights", "/paste", + "/verbose", "/compress", "/title", "/usage", "/insights", "/paste", "/reload-mcp", "/quit", } diff --git a/tests/hermes_cli/test_session_browse.py b/tests/hermes_cli/test_session_browse.py new file mode 100644 index 0000000000..4b24a58b92 --- /dev/null +++ b/tests/hermes_cli/test_session_browse.py @@ -0,0 +1,542 @@ +"""Tests for the interactive session browser (`hermes sessions browse`). + +Covers: +- _session_browse_picker logic (curses mocked, fallback tested) +- cmd_sessions 'browse' action integration +- Argument parser registration +""" + +import os +import time +from unittest.mock import MagicMock, patch, call + +import pytest + +from hermes_cli.main import _session_browse_picker + + +# ─── Sample session data ────────────────────────────────────────────────────── + +def _make_sessions(n=5): + """Generate a list of fake rich-session dicts.""" + now = time.time() + sessions = [] + for i in range(n): + sessions.append({ + "id": f"20260308_{i:06d}_abcdef", + "source": "cli" if i % 2 == 0 else "telegram", + "model": "test/model", + "title": f"Session {i}" if i % 3 != 0 else None, + "preview": f"Hello from session {i}", + "last_active": now - i * 3600, + "started_at": now - i * 3600 - 60, + "message_count": (i + 1) * 5, + }) + return sessions + + +SAMPLE_SESSIONS = _make_sessions(5) + + +# ─── _session_browse_picker ────────────────────────────────────────────────── + +class TestSessionBrowsePicker: + """Tests for the _session_browse_picker function.""" + + def test_empty_sessions_returns_none(self, capsys): + result = _session_browse_picker([]) + assert result is None + assert "No sessions found" in capsys.readouterr().out + + def test_returns_none_when_no_sessions(self, capsys): + result = _session_browse_picker([]) + assert result is None + + def test_fallback_mode_valid_selection(self): + """When curses is unavailable, fallback numbered list should work.""" + sessions = _make_sessions(3) + + # Mock curses import to fail, forcing fallback + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="2"): + result = _session_browse_picker(sessions) + + assert result == sessions[1]["id"] + + def test_fallback_mode_cancel_q(self): + """Entering 'q' in fallback mode cancels.""" + sessions = _make_sessions(3) + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="q"): + result = _session_browse_picker(sessions) + + assert result is None + + def test_fallback_mode_cancel_empty(self): + """Entering empty string in fallback mode cancels.""" + sessions = _make_sessions(3) + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value=""): + result = _session_browse_picker(sessions) + + assert result is None + + def test_fallback_mode_invalid_then_valid(self): + """Invalid selection followed by valid one works.""" + sessions = _make_sessions(3) + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", side_effect=["99", "1"]): + result = _session_browse_picker(sessions) + + assert result == sessions[0]["id"] + + def test_fallback_mode_keyboard_interrupt(self): + """KeyboardInterrupt in fallback mode returns None.""" + sessions = _make_sessions(3) + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", side_effect=KeyboardInterrupt): + result = _session_browse_picker(sessions) + + assert result is None + + def test_fallback_displays_all_sessions(self, capsys): + """Fallback mode should display all session entries.""" + sessions = _make_sessions(4) + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="q"): + _session_browse_picker(sessions) + + output = capsys.readouterr().out + # All 4 entries should be shown + assert "1." in output + assert "2." in output + assert "3." in output + assert "4." in output + + def test_fallback_shows_title_over_preview(self, capsys): + """When a session has a title, show it instead of the preview.""" + sessions = [{ + "id": "test_001", + "source": "cli", + "title": "My Cool Project", + "preview": "some preview text", + "last_active": time.time(), + }] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="q"): + _session_browse_picker(sessions) + + output = capsys.readouterr().out + assert "My Cool Project" in output + + def test_fallback_shows_preview_when_no_title(self, capsys): + """When no title, show preview.""" + sessions = [{ + "id": "test_002", + "source": "cli", + "title": None, + "preview": "Hello world test message", + "last_active": time.time(), + }] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="q"): + _session_browse_picker(sessions) + + output = capsys.readouterr().out + assert "Hello world test message" in output + + def test_fallback_shows_id_when_no_title_or_preview(self, capsys): + """When neither title nor preview, show session ID.""" + sessions = [{ + "id": "test_003_fallback", + "source": "cli", + "title": None, + "preview": "", + "last_active": time.time(), + }] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="q"): + _session_browse_picker(sessions) + + output = capsys.readouterr().out + assert "test_003_fallback" in output + + +# ─── Curses-based picker (mocked curses) ──────────────────────────────────── + +class TestCursesBrowse: + """Tests for the curses-based interactive picker via simulated key sequences.""" + + def _run_with_keys(self, sessions, key_sequence): + """Simulate running the curses picker with a given key sequence.""" + import curses + + # Build a mock stdscr that returns keys from the sequence + mock_stdscr = MagicMock() + mock_stdscr.getmaxyx.return_value = (30, 120) + mock_stdscr.getch.side_effect = key_sequence + + # Capture what curses.wrapper receives and call it with our mock + with patch("curses.wrapper") as mock_wrapper: + # When wrapper is called, invoke the function with our mock stdscr + def run_inner(func): + try: + func(mock_stdscr) + except StopIteration: + pass # key sequence exhausted + + mock_wrapper.side_effect = run_inner + with patch("curses.curs_set"): + with patch("curses.has_colors", return_value=False): + return _session_browse_picker(sessions) + + def test_enter_selects_first_session(self): + sessions = _make_sessions(3) + result = self._run_with_keys(sessions, [10]) # Enter key + assert result == sessions[0]["id"] + + def test_down_then_enter_selects_second(self): + import curses + sessions = _make_sessions(3) + result = self._run_with_keys(sessions, [curses.KEY_DOWN, 10]) + assert result == sessions[1]["id"] + + def test_down_down_enter_selects_third(self): + import curses + sessions = _make_sessions(5) + result = self._run_with_keys(sessions, [curses.KEY_DOWN, curses.KEY_DOWN, 10]) + assert result == sessions[2]["id"] + + def test_up_wraps_to_last(self): + import curses + sessions = _make_sessions(3) + result = self._run_with_keys(sessions, [curses.KEY_UP, 10]) + assert result == sessions[2]["id"] + + def test_escape_cancels(self): + sessions = _make_sessions(3) + result = self._run_with_keys(sessions, [27]) # Esc + assert result is None + + def test_q_cancels(self): + sessions = _make_sessions(3) + result = self._run_with_keys(sessions, [ord('q')]) + assert result is None + + def test_type_to_filter_then_enter(self): + """Typing characters filters the list, Enter selects from filtered.""" + import curses + sessions = [ + {"id": "s1", "source": "cli", "title": "Alpha project", "preview": "", "last_active": time.time()}, + {"id": "s2", "source": "cli", "title": "Beta project", "preview": "", "last_active": time.time()}, + {"id": "s3", "source": "cli", "title": "Gamma project", "preview": "", "last_active": time.time()}, + ] + # Type "Beta" then Enter — should select s2 + keys = [ord(c) for c in "Beta"] + [10] + result = self._run_with_keys(sessions, keys) + assert result == "s2" + + def test_filter_no_match_enter_does_nothing(self): + """When filter produces no results, Enter shouldn't select.""" + sessions = _make_sessions(3) + keys = [ord(c) for c in "zzzznonexistent"] + [10] + result = self._run_with_keys(sessions, keys) + assert result is None + + def test_backspace_removes_filter_char(self): + """Backspace removes the last character from the filter.""" + import curses + sessions = [ + {"id": "s1", "source": "cli", "title": "Alpha", "preview": "", "last_active": time.time()}, + {"id": "s2", "source": "cli", "title": "Beta", "preview": "", "last_active": time.time()}, + ] + # Type "Bet", backspace, backspace, backspace (clears filter), then Enter (selects first) + keys = [ord('B'), ord('e'), ord('t'), 127, 127, 127, 10] + result = self._run_with_keys(sessions, keys) + assert result == "s1" + + def test_escape_clears_filter_first(self): + """First Esc clears the search text, second Esc exits.""" + import curses + sessions = _make_sessions(3) + # Type "ab" then Esc (clears filter) then Enter (selects first) + keys = [ord('a'), ord('b'), 27, 10] + result = self._run_with_keys(sessions, keys) + assert result == sessions[0]["id"] + + def test_filter_matches_preview(self): + """Typing should match against session preview text.""" + sessions = [ + {"id": "s1", "source": "cli", "title": None, "preview": "Set up Minecraft server", "last_active": time.time()}, + {"id": "s2", "source": "cli", "title": None, "preview": "Review PR 438", "last_active": time.time()}, + ] + keys = [ord(c) for c in "Mine"] + [10] + result = self._run_with_keys(sessions, keys) + assert result == "s1" + + def test_filter_matches_source(self): + """Typing a source name should filter by source.""" + sessions = [ + {"id": "s1", "source": "telegram", "title": "TG session", "preview": "", "last_active": time.time()}, + {"id": "s2", "source": "cli", "title": "CLI session", "preview": "", "last_active": time.time()}, + ] + keys = [ord(c) for c in "telegram"] + [10] + result = self._run_with_keys(sessions, keys) + assert result == "s1" + + def test_q_quits_when_no_filter_active(self): + """When no search text is active, 'q' should quit (not filter).""" + sessions = _make_sessions(3) + result = self._run_with_keys(sessions, [ord('q')]) + assert result is None + + def test_q_types_into_filter_when_filter_active(self): + """When search text is already active, 'q' should add to filter, not quit.""" + sessions = [ + {"id": "s1", "source": "cli", "title": "the sequel", "preview": "", "last_active": time.time()}, + {"id": "s2", "source": "cli", "title": "other thing", "preview": "", "last_active": time.time()}, + ] + # Type "se" first (activates filter, matches "the sequel") + # Then type "q" — should add 'q' to filter (filter="seq"), NOT quit + # "seq" still matches "the sequel" → Enter selects it + keys = [ord('s'), ord('e'), ord('q'), 10] + result = self._run_with_keys(sessions, keys) + assert result == "s1" # "the sequel" matches "seq" + + +# ─── Argument parser registration ────────────────────────────────────────── + +class TestSessionBrowseArgparse: + """Verify the 'browse' subcommand is properly registered.""" + + def test_browse_subcommand_exists(self): + """hermes sessions browse should be parseable.""" + from hermes_cli.main import main as _main_entry + + # We can't run main(), but we can import and test the parser setup + # by checking that argparse doesn't error on "sessions browse" + import argparse + # Re-create the parser portion + # Instead, let's just verify the import works and the function exists + from hermes_cli.main import _session_browse_picker + assert callable(_session_browse_picker) + + def test_browse_default_limit_is_50(self): + """The default --limit for browse should be 50.""" + # This test verifies at the argparse level + # We test by running the parse on "sessions browse" args + # Since we can't easily extract the subparser, verify via the + # _session_browse_picker accepting large lists + sessions = _make_sessions(50) + assert len(sessions) == 50 + + +# ─── Integration: cmd_sessions browse action ──────────────────────────────── + +class TestCmdSessionsBrowse: + """Integration tests for the 'browse' action in cmd_sessions.""" + + def test_browse_no_sessions_prints_message(self, capsys): + """When no sessions exist, _session_browse_picker returns None and prints message.""" + result = _session_browse_picker([]) + assert result is None + output = capsys.readouterr().out + assert "No sessions found" in output + + def test_browse_with_source_filter(self): + """The --source flag should be passed to list_sessions_rich.""" + sessions = [ + {"id": "s1", "source": "cli", "title": "CLI only", "preview": "", "last_active": time.time()}, + ] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="1"): + result = _session_browse_picker(sessions) + + assert result == "s1" + + +# ─── Edge cases ────────────────────────────────────────────────────────────── + +class TestEdgeCases: + """Edge case handling for the session browser.""" + + def test_sessions_with_missing_fields(self): + """Sessions with missing optional fields should not crash.""" + sessions = [ + {"id": "minimal_001", "source": "cli"}, # No title, preview, last_active + ] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="1"): + result = _session_browse_picker(sessions) + + assert result == "minimal_001" + + def test_single_session(self): + """A single session in the list should work fine.""" + sessions = [ + {"id": "only_one", "source": "cli", "title": "Solo", "preview": "", "last_active": time.time()}, + ] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="1"): + result = _session_browse_picker(sessions) + + assert result == "only_one" + + def test_long_title_truncated_in_fallback(self, capsys): + """Very long titles should be truncated in fallback mode.""" + sessions = [{ + "id": "long_title_001", + "source": "cli", + "title": "A" * 100, + "preview": "", + "last_active": time.time(), + }] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="q"): + _session_browse_picker(sessions) + + output = capsys.readouterr().out + # Title should be truncated to 50 chars with "..." + assert "..." in output + + def test_relative_time_formatting(self, capsys): + """Verify various time deltas format correctly.""" + now = time.time() + sessions = [ + {"id": "recent", "source": "cli", "title": None, "preview": "just now test", "last_active": now}, + {"id": "hour_ago", "source": "cli", "title": None, "preview": "hour ago test", "last_active": now - 7200}, + {"id": "days_ago", "source": "cli", "title": None, "preview": "days ago test", "last_active": now - 259200}, + ] + + import builtins + original_import = builtins.__import__ + + def mock_import(name, *args, **kwargs): + if name == "curses": + raise ImportError("no curses") + return original_import(name, *args, **kwargs) + + with patch.object(builtins, "__import__", side_effect=mock_import): + with patch("builtins.input", return_value="q"): + _session_browse_picker(sessions) + + output = capsys.readouterr().out + assert "just now" in output + assert "2h ago" in output + assert "3d ago" in output diff --git a/tests/hermes_cli/test_set_config_value.py b/tests/hermes_cli/test_set_config_value.py index 35e885b575..52a9d1a6c5 100644 --- a/tests/hermes_cli/test_set_config_value.py +++ b/tests/hermes_cli/test_set_config_value.py @@ -38,7 +38,6 @@ class TestExplicitAllowlist: "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", - "NOUS_API_KEY", "WANDB_API_KEY", "TINKER_API_KEY", "HONCHO_API_KEY", diff --git a/tests/hermes_cli/test_skills_hub.py b/tests/hermes_cli/test_skills_hub.py new file mode 100644 index 0000000000..7b1165bec3 --- /dev/null +++ b/tests/hermes_cli/test_skills_hub.py @@ -0,0 +1,31 @@ +from io import StringIO + +from rich.console import Console + +from hermes_cli.skills_hub import do_list + + +def test_do_list_initializes_hub_dir(monkeypatch, tmp_path): + import tools.skills_hub as hub + import tools.skills_tool as skills_tool + + hub_dir = tmp_path / "skills" / ".hub" + monkeypatch.setattr(hub, "SKILLS_DIR", tmp_path / "skills") + monkeypatch.setattr(hub, "HUB_DIR", hub_dir) + monkeypatch.setattr(hub, "LOCK_FILE", hub_dir / "lock.json") + monkeypatch.setattr(hub, "QUARANTINE_DIR", hub_dir / "quarantine") + monkeypatch.setattr(hub, "AUDIT_LOG", hub_dir / "audit.log") + monkeypatch.setattr(hub, "TAPS_FILE", hub_dir / "taps.json") + monkeypatch.setattr(hub, "INDEX_CACHE_DIR", hub_dir / "index-cache") + monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: []) + + console = Console(file=StringIO(), force_terminal=False, color_system=None) + + assert not hub_dir.exists() + + do_list(console=console) + + assert hub_dir.exists() + assert (hub_dir / "lock.json").exists() + assert (hub_dir / "quarantine").is_dir() + assert (hub_dir / "index-cache").is_dir() diff --git a/tests/integration/test_web_tools.py b/tests/integration/test_web_tools.py index 971d98f2c3..cd3de453af 100644 --- a/tests/integration/test_web_tools.py +++ b/tests/integration/test_web_tools.py @@ -12,7 +12,7 @@ Usage: Requirements: - FIRECRAWL_API_KEY environment variable must be set - - NOUS_API_KEY environment variable (optional, for LLM tests) + - An auxiliary LLM provider (OPENROUTER_API_KEY or Nous Portal auth) (optional, for LLM tests) """ import pytest @@ -128,12 +128,12 @@ class WebToolsTester: else: self.log_result("Firecrawl API Key", "passed", "Found") - # Check Nous API key (optional) + # Check auxiliary LLM provider (optional) if not check_auxiliary_model(): - self.log_result("Nous API Key", "skipped", "NOUS_API_KEY not set (LLM tests will be skipped)") + self.log_result("Auxiliary LLM", "skipped", "No auxiliary LLM provider available (LLM tests will be skipped)") self.test_llm = False else: - self.log_result("Nous API Key", "passed", "Found") + self.log_result("Auxiliary LLM", "passed", "Found") # Check debug mode debug_info = get_debug_session_info() diff --git a/tests/test_auxiliary_config_bridge.py b/tests/test_auxiliary_config_bridge.py new file mode 100644 index 0000000000..b0804e4be4 --- /dev/null +++ b/tests/test_auxiliary_config_bridge.py @@ -0,0 +1,292 @@ +"""Tests for auxiliary model config bridging — verifies that config.yaml values +are properly mapped to environment variables by both CLI and gateway loaders. + +Also tests the vision_tools and browser_tool model override env vars. +""" + +import json +import os +import sys +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest +import yaml + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + + +def _run_auxiliary_bridge(config_dict, monkeypatch): + """Simulate the auxiliary config → env var bridging logic shared by CLI and gateway. + + This mirrors the code in cli.py load_cli_config() and gateway/run.py. + Both use the same pattern; we test it once here. + """ + # Clear env vars + for key in ( + "AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL", + "AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL", + "CONTEXT_COMPRESSION_PROVIDER", "CONTEXT_COMPRESSION_MODEL", + ): + monkeypatch.delenv(key, raising=False) + + # Compression bridge + compression_cfg = config_dict.get("compression", {}) + if compression_cfg and isinstance(compression_cfg, dict): + compression_env_map = { + "enabled": "CONTEXT_COMPRESSION_ENABLED", + "threshold": "CONTEXT_COMPRESSION_THRESHOLD", + "summary_model": "CONTEXT_COMPRESSION_MODEL", + "summary_provider": "CONTEXT_COMPRESSION_PROVIDER", + } + for cfg_key, env_var in compression_env_map.items(): + if cfg_key in compression_cfg: + os.environ[env_var] = str(compression_cfg[cfg_key]) + + # Auxiliary bridge + auxiliary_cfg = config_dict.get("auxiliary", {}) + if auxiliary_cfg and isinstance(auxiliary_cfg, dict): + aux_task_env = { + "vision": ("AUXILIARY_VISION_PROVIDER", "AUXILIARY_VISION_MODEL"), + "web_extract": ("AUXILIARY_WEB_EXTRACT_PROVIDER", "AUXILIARY_WEB_EXTRACT_MODEL"), + } + for task_key, (prov_env, model_env) in aux_task_env.items(): + task_cfg = auxiliary_cfg.get(task_key, {}) + if not isinstance(task_cfg, dict): + continue + prov = str(task_cfg.get("provider", "")).strip() + model = str(task_cfg.get("model", "")).strip() + if prov and prov != "auto": + os.environ[prov_env] = prov + if model: + os.environ[model_env] = model + + +# ── Config bridging tests ──────────────────────────────────────────────────── + + +class TestAuxiliaryConfigBridge: + """Verify the config.yaml → env var bridging logic used by CLI and gateway.""" + + def test_vision_provider_bridged(self, monkeypatch): + config = { + "auxiliary": { + "vision": {"provider": "openrouter", "model": ""}, + "web_extract": {"provider": "auto", "model": ""}, + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter" + # auto should not be set + assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None + + def test_vision_model_bridged(self, monkeypatch): + config = { + "auxiliary": { + "vision": {"provider": "auto", "model": "openai/gpt-4o"}, + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_MODEL") == "openai/gpt-4o" + # auto provider should not be set + assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None + + def test_web_extract_bridged(self, monkeypatch): + config = { + "auxiliary": { + "web_extract": {"provider": "nous", "model": "gemini-2.5-flash"}, + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous" + assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-2.5-flash" + + def test_compression_provider_bridged(self, monkeypatch): + config = { + "compression": { + "summary_provider": "nous", + "summary_model": "gemini-3-flash", + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("CONTEXT_COMPRESSION_PROVIDER") == "nous" + assert os.environ.get("CONTEXT_COMPRESSION_MODEL") == "gemini-3-flash" + + def test_empty_values_not_bridged(self, monkeypatch): + config = { + "auxiliary": { + "vision": {"provider": "auto", "model": ""}, + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None + assert os.environ.get("AUXILIARY_VISION_MODEL") is None + + def test_missing_auxiliary_section_safe(self, monkeypatch): + """Config without auxiliary section should not crash.""" + config = {"model": {"default": "test-model"}} + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None + + def test_non_dict_task_config_ignored(self, monkeypatch): + """Malformed task config (e.g. string instead of dict) is safely ignored.""" + config = { + "auxiliary": { + "vision": "openrouter", # should be a dict + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None + + def test_mixed_tasks(self, monkeypatch): + config = { + "auxiliary": { + "vision": {"provider": "openrouter", "model": ""}, + "web_extract": {"provider": "auto", "model": "custom-llm"}, + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter" + assert os.environ.get("AUXILIARY_VISION_MODEL") is None + assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None + assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "custom-llm" + + def test_all_tasks_with_overrides(self, monkeypatch): + config = { + "compression": { + "summary_provider": "main", + "summary_model": "local-model", + }, + "auxiliary": { + "vision": {"provider": "openrouter", "model": "google/gemini-2.5-flash"}, + "web_extract": {"provider": "nous", "model": "gemini-3-flash"}, + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("CONTEXT_COMPRESSION_PROVIDER") == "main" + assert os.environ.get("CONTEXT_COMPRESSION_MODEL") == "local-model" + assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter" + assert os.environ.get("AUXILIARY_VISION_MODEL") == "google/gemini-2.5-flash" + assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") == "nous" + assert os.environ.get("AUXILIARY_WEB_EXTRACT_MODEL") == "gemini-3-flash" + + def test_whitespace_in_values_stripped(self, monkeypatch): + config = { + "auxiliary": { + "vision": {"provider": " openrouter ", "model": " my-model "}, + } + } + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_PROVIDER") == "openrouter" + assert os.environ.get("AUXILIARY_VISION_MODEL") == "my-model" + + def test_empty_auxiliary_dict_safe(self, monkeypatch): + config = {"auxiliary": {}} + _run_auxiliary_bridge(config, monkeypatch) + assert os.environ.get("AUXILIARY_VISION_PROVIDER") is None + assert os.environ.get("AUXILIARY_WEB_EXTRACT_PROVIDER") is None + + +# ── Gateway bridge parity test ─────────────────────────────────────────────── + + +class TestGatewayBridgeCodeParity: + """Verify the gateway/run.py config bridge contains the auxiliary section.""" + + def test_gateway_has_auxiliary_bridge(self): + """The gateway config bridge must include auxiliary.* bridging.""" + gateway_path = Path(__file__).parent.parent / "gateway" / "run.py" + content = gateway_path.read_text() + # Check for key patterns that indicate the bridge is present + assert "AUXILIARY_VISION_PROVIDER" in content + assert "AUXILIARY_VISION_MODEL" in content + assert "AUXILIARY_WEB_EXTRACT_PROVIDER" in content + assert "AUXILIARY_WEB_EXTRACT_MODEL" in content + + def test_gateway_has_compression_provider(self): + """Gateway must bridge compression.summary_provider.""" + gateway_path = Path(__file__).parent.parent / "gateway" / "run.py" + content = gateway_path.read_text() + assert "summary_provider" in content + assert "CONTEXT_COMPRESSION_PROVIDER" in content + + +# ── Vision model override tests ────────────────────────────────────────────── + + +class TestVisionModelOverride: + """Test that AUXILIARY_VISION_MODEL env var overrides the default model in the handler.""" + + def test_env_var_overrides_default(self, monkeypatch): + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "openai/gpt-4o") + from tools.vision_tools import _handle_vision_analyze + with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool: + mock_tool.return_value = '{"success": true}' + _handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"}) + call_args = mock_tool.call_args + # 3rd positional arg = model + assert call_args[0][2] == "openai/gpt-4o" + + def test_default_model_when_no_override(self, monkeypatch): + monkeypatch.delenv("AUXILIARY_VISION_MODEL", raising=False) + from tools.vision_tools import _handle_vision_analyze, DEFAULT_VISION_MODEL + with patch("tools.vision_tools.vision_analyze_tool", new_callable=MagicMock) as mock_tool: + mock_tool.return_value = '{"success": true}' + _handle_vision_analyze({"image_url": "http://test.jpg", "question": "test"}) + call_args = mock_tool.call_args + expected = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview" + assert call_args[0][2] == expected + + +# ── DEFAULT_CONFIG shape tests ─────────────────────────────────────────────── + + +class TestDefaultConfigShape: + """Verify the DEFAULT_CONFIG in hermes_cli/config.py has correct auxiliary structure.""" + + def test_auxiliary_section_exists(self): + from hermes_cli.config import DEFAULT_CONFIG + assert "auxiliary" in DEFAULT_CONFIG + + def test_vision_task_structure(self): + from hermes_cli.config import DEFAULT_CONFIG + vision = DEFAULT_CONFIG["auxiliary"]["vision"] + assert "provider" in vision + assert "model" in vision + assert vision["provider"] == "auto" + assert vision["model"] == "" + + def test_web_extract_task_structure(self): + from hermes_cli.config import DEFAULT_CONFIG + web = DEFAULT_CONFIG["auxiliary"]["web_extract"] + assert "provider" in web + assert "model" in web + assert web["provider"] == "auto" + assert web["model"] == "" + + def test_compression_provider_default(self): + from hermes_cli.config import DEFAULT_CONFIG + compression = DEFAULT_CONFIG["compression"] + assert "summary_provider" in compression + assert compression["summary_provider"] == "auto" + + +# ── CLI defaults parity ───────────────────────────────────────────────────── + + +class TestCLIDefaultsHaveAuxiliaryKeys: + """Verify cli.py load_cli_config() defaults dict does NOT include auxiliary + (it comes from config.yaml deep merge, not hardcoded defaults).""" + + def test_cli_defaults_can_merge_auxiliary(self): + """The load_cli_config deep merge logic handles keys not in defaults. + Verify auxiliary would be picked up from config.yaml.""" + # This is a structural assertion: cli.py's second-pass loop + # carries over keys from file_config that aren't in defaults. + # So auxiliary config from config.yaml gets merged even though + # cli.py's defaults dict doesn't define it. + import cli as _cli_mod + source = Path(_cli_mod.__file__).read_text() + assert "auxiliary_config = defaults.get(\"auxiliary\"" in source + assert "AUXILIARY_VISION_PROVIDER" in source + assert "AUXILIARY_VISION_MODEL" in source diff --git a/tests/test_cli_provider_resolution.py b/tests/test_cli_provider_resolution.py index 3c8fe14a5e..f4a446ac8e 100644 --- a/tests/test_cli_provider_resolution.py +++ b/tests/test_cli_provider_resolution.py @@ -162,6 +162,124 @@ def test_runtime_resolution_rebuilds_agent_on_routing_change(monkeypatch): assert shell.api_mode == "codex_responses" +def test_codex_provider_replaces_incompatible_default_model(monkeypatch): + """When provider resolves to openai-codex and no model was explicitly + chosen, the global config default (e.g. anthropic/claude-opus-4.6) must + be replaced with a Codex-compatible model. Fixes #651.""" + cli = _import_cli() + + monkeypatch.delenv("LLM_MODEL", raising=False) + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + monkeypatch.setattr( + "hermes_cli.codex_models.get_codex_model_ids", + lambda access_token=None: ["gpt-5.2-codex", "gpt-5.1-codex-mini"], + ) + + shell = cli.HermesCLI(compact=True, max_turns=1) + + assert shell._model_is_default is True + assert shell._ensure_runtime_credentials() is True + assert shell.provider == "openai-codex" + assert "anthropic" not in shell.model + assert "claude" not in shell.model + assert shell.model == "gpt-5.2-codex" + + +def test_codex_provider_trusts_explicit_envvar_model(monkeypatch): + """When the user explicitly sets LLM_MODEL, we trust their choice and + let the API be the judge — even if it's a non-OpenAI model. Only + provider prefixes are stripped; the bare model passes through.""" + cli = _import_cli() + + monkeypatch.setenv("LLM_MODEL", "claude-opus-4-6") + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + shell = cli.HermesCLI(compact=True, max_turns=1) + + assert shell._model_is_default is False + assert shell._ensure_runtime_credentials() is True + assert shell.provider == "openai-codex" + # User explicitly chose this model — it passes through untouched + assert shell.model == "claude-opus-4-6" + + +def test_codex_provider_preserves_explicit_codex_model(monkeypatch): + """If the user explicitly passes a Codex-compatible model, it must be + preserved even when the provider resolves to openai-codex.""" + cli = _import_cli() + + monkeypatch.delenv("LLM_MODEL", raising=False) + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + shell = cli.HermesCLI(model="gpt-5.1-codex-mini", compact=True, max_turns=1) + + assert shell._model_is_default is False + assert shell._ensure_runtime_credentials() is True + assert shell.model == "gpt-5.1-codex-mini" + + +def test_codex_provider_strips_provider_prefix_from_model(monkeypatch): + """openai/gpt-5.3-codex should become gpt-5.3-codex — the Codex + Responses API does not accept provider-prefixed model slugs.""" + cli = _import_cli() + + monkeypatch.delenv("LLM_MODEL", raising=False) + monkeypatch.delenv("OPENAI_MODEL", raising=False) + + def _runtime_resolve(**kwargs): + return { + "provider": "openai-codex", + "api_mode": "codex_responses", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "test-key", + "source": "env/config", + } + + monkeypatch.setattr("hermes_cli.runtime_provider.resolve_runtime_provider", _runtime_resolve) + monkeypatch.setattr("hermes_cli.runtime_provider.format_runtime_provider_error", lambda exc: str(exc)) + + shell = cli.HermesCLI(model="openai/gpt-5.3-codex", compact=True, max_turns=1) + + assert shell._ensure_runtime_credentials() is True + assert shell.model == "gpt-5.3-codex" + + def test_cmd_model_falls_back_to_auto_on_invalid_provider(monkeypatch, capsys): monkeypatch.setattr( "hermes_cli.config.load_config", diff --git a/tests/test_codex_execution_paths.py b/tests/test_codex_execution_paths.py index b3550d883c..2a6044294f 100644 --- a/tests/test_codex_execution_paths.py +++ b/tests/test_codex_execution_paths.py @@ -149,6 +149,7 @@ def test_gateway_run_agent_codex_path_handles_internal_401_refresh(monkeypatch): runner._prefill_messages = [] runner._reasoning_config = None runner._provider_routing = {} + runner._fallback_model = None runner._running_agents = {} from unittest.mock import MagicMock, AsyncMock runner.hooks = MagicMock() diff --git a/tests/test_codex_models.py b/tests/test_codex_models.py index e6cc2fdec0..5e85e46add 100644 --- a/tests/test_codex_models.py +++ b/tests/test_codex_models.py @@ -1,4 +1,9 @@ import json +import os +import sys +from unittest.mock import patch + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids @@ -13,7 +18,7 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch "models": [ {"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True}, {"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True}, - {"slug": "gpt-4o", "priority": 1, "supported_in_api": True}, + {"slug": "gpt-5.4", "priority": 1, "supported_in_api": True}, {"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"}, ] } @@ -26,10 +31,19 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch assert models[0] == "gpt-5.2-codex" assert "gpt-5.1-codex" in models assert "gpt-5.3-codex" in models - assert "gpt-4o" not in models + # Non-codex-suffixed models are included when the cache says they're available + assert "gpt-5.4" in models assert "gpt-5-hidden-codex" not in models +def test_setup_wizard_codex_import_resolves(): + """Regression test for #712: setup.py must import the correct function name.""" + # This mirrors the exact import used in hermes_cli/setup.py line 873. + # A prior bug had 'get_codex_models' (wrong) instead of 'get_codex_model_ids'. + from hermes_cli.codex_models import get_codex_model_ids as setup_import + assert callable(setup_import) + + def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatch): codex_home = tmp_path / "codex-home" codex_home.mkdir(parents=True, exist_ok=True) @@ -38,3 +52,144 @@ def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatc models = get_codex_model_ids() assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS + + +# ── Tests for _normalize_model_for_provider ────────────────────────── + + +def _make_cli(model="anthropic/claude-opus-4.6", **kwargs): + """Create a HermesCLI with minimal mocking.""" + import cli as _cli_mod + from cli import HermesCLI + + _clean_config = { + "model": { + "default": "anthropic/claude-opus-4.6", + "base_url": "https://openrouter.ai/api/v1", + "provider": "auto", + }, + "display": {"compact": False, "tool_progress": "all", "resume_display": "full"}, + "agent": {}, + "terminal": {"env_type": "local"}, + } + clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""} + with ( + patch("cli.get_tool_definitions", return_value=[]), + patch.dict("os.environ", clean_env, clear=False), + patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}), + ): + cli = HermesCLI(model=model, **kwargs) + return cli + + +class TestNormalizeModelForProvider: + """_normalize_model_for_provider() trusts user-selected models. + + Only two things happen: + 1. Provider prefixes are stripped (API needs bare slugs) + 2. The *untouched default* model is swapped for a Codex model + Everything else passes through — the API is the judge. + """ + + def test_non_codex_provider_is_noop(self): + cli = _make_cli(model="gpt-5.4") + changed = cli._normalize_model_for_provider("openrouter") + assert changed is False + assert cli.model == "gpt-5.4" + + def test_bare_codex_model_passes_through(self): + cli = _make_cli(model="gpt-5.3-codex") + changed = cli._normalize_model_for_provider("openai-codex") + assert changed is False + assert cli.model == "gpt-5.3-codex" + + def test_bare_non_codex_model_passes_through(self): + """gpt-5.4 (no 'codex' suffix) passes through — user chose it.""" + cli = _make_cli(model="gpt-5.4") + changed = cli._normalize_model_for_provider("openai-codex") + assert changed is False + assert cli.model == "gpt-5.4" + + def test_any_bare_model_trusted(self): + """Even a non-OpenAI bare model passes through — user explicitly set it.""" + cli = _make_cli(model="claude-opus-4-6") + changed = cli._normalize_model_for_provider("openai-codex") + # User explicitly chose this model — we trust them, API will error if wrong + assert changed is False + assert cli.model == "claude-opus-4-6" + + def test_provider_prefix_stripped(self): + """openai/gpt-5.4 → gpt-5.4 (strip prefix, keep model).""" + cli = _make_cli(model="openai/gpt-5.4") + changed = cli._normalize_model_for_provider("openai-codex") + assert changed is True + assert cli.model == "gpt-5.4" + + def test_any_provider_prefix_stripped(self): + """anthropic/claude-opus-4.6 → claude-opus-4.6 (strip prefix only). + User explicitly chose this — let the API decide if it works.""" + cli = _make_cli(model="anthropic/claude-opus-4.6") + changed = cli._normalize_model_for_provider("openai-codex") + assert changed is True + assert cli.model == "claude-opus-4.6" + + def test_default_model_replaced(self): + """The untouched default (anthropic/claude-opus-4.6) gets swapped.""" + import cli as _cli_mod + _clean_config = { + "model": { + "default": "anthropic/claude-opus-4.6", + "base_url": "https://openrouter.ai/api/v1", + "provider": "auto", + }, + "display": {"compact": False, "tool_progress": "all", "resume_display": "full"}, + "agent": {}, + "terminal": {"env_type": "local"}, + } + # Don't pass model= so _model_is_default is True + with ( + patch("cli.get_tool_definitions", return_value=[]), + patch.dict("os.environ", {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}, clear=False), + patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}), + ): + from cli import HermesCLI + cli = HermesCLI() + + assert cli._model_is_default is True + with patch( + "hermes_cli.codex_models.get_codex_model_ids", + return_value=["gpt-5.3-codex", "gpt-5.4"], + ): + changed = cli._normalize_model_for_provider("openai-codex") + assert changed is True + # Uses first from available list + assert cli.model == "gpt-5.3-codex" + + def test_default_fallback_when_api_fails(self): + """Default model falls back to gpt-5.3-codex when API unreachable.""" + import cli as _cli_mod + _clean_config = { + "model": { + "default": "anthropic/claude-opus-4.6", + "base_url": "https://openrouter.ai/api/v1", + "provider": "auto", + }, + "display": {"compact": False, "tool_progress": "all", "resume_display": "full"}, + "agent": {}, + "terminal": {"env_type": "local"}, + } + with ( + patch("cli.get_tool_definitions", return_value=[]), + patch.dict("os.environ", {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}, clear=False), + patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}), + ): + from cli import HermesCLI + cli = HermesCLI() + + with patch( + "hermes_cli.codex_models.get_codex_model_ids", + side_effect=Exception("offline"), + ): + changed = cli._normalize_model_for_provider("openai-codex") + assert changed is True + assert cli.model == "gpt-5.3-codex" diff --git a/tests/test_fallback_model.py b/tests/test_fallback_model.py new file mode 100644 index 0000000000..dcc150c375 --- /dev/null +++ b/tests/test_fallback_model.py @@ -0,0 +1,339 @@ +"""Tests for the provider fallback model feature. + +Verifies that AIAgent can switch to a configured fallback model/provider +when the primary fails after retries. +""" + +import os +from types import SimpleNamespace +from unittest.mock import MagicMock, patch + +import pytest + +from run_agent import AIAgent + + +def _make_tool_defs(*names: str) -> list: + return [ + { + "type": "function", + "function": { + "name": n, + "description": f"{n} tool", + "parameters": {"type": "object", "properties": {}}, + }, + } + for n in names + ] + + +def _make_agent(fallback_model=None): + """Create a minimal AIAgent with optional fallback config.""" + with ( + patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), + patch("run_agent.check_toolset_requirements", return_value={}), + patch("run_agent.OpenAI"), + ): + agent = AIAgent( + api_key="test-key-primary", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + fallback_model=fallback_model, + ) + agent.client = MagicMock() + return agent + + +# ============================================================================= +# _try_activate_fallback() +# ============================================================================= + +class TestTryActivateFallback: + def test_returns_false_when_not_configured(self): + agent = _make_agent(fallback_model=None) + assert agent._try_activate_fallback() is False + assert agent._fallback_activated is False + + def test_returns_false_for_empty_config(self): + agent = _make_agent(fallback_model={"provider": "", "model": ""}) + assert agent._try_activate_fallback() is False + + def test_returns_false_for_missing_provider(self): + agent = _make_agent(fallback_model={"model": "gpt-4.1"}) + assert agent._try_activate_fallback() is False + + def test_returns_false_for_missing_model(self): + agent = _make_agent(fallback_model={"provider": "openrouter"}) + assert agent._try_activate_fallback() is False + + def test_activates_openrouter_fallback(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-fallback-key"}), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True + assert agent._fallback_activated is True + assert agent.model == "anthropic/claude-sonnet-4" + assert agent.provider == "openrouter" + assert agent.api_mode == "chat_completions" + mock_openai.assert_called_once() + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "sk-or-fallback-key" + assert "openrouter" in call_kwargs["base_url"].lower() + # OpenRouter should get attribution headers + assert "default_headers" in call_kwargs + + def test_activates_zai_fallback(self): + agent = _make_agent( + fallback_model={"provider": "zai", "model": "glm-5"}, + ) + with ( + patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True + assert agent.model == "glm-5" + assert agent.provider == "zai" + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "sk-zai-key" + assert "z.ai" in call_kwargs["base_url"].lower() + + def test_activates_kimi_fallback(self): + agent = _make_agent( + fallback_model={"provider": "kimi-coding", "model": "kimi-k2.5"}, + ) + with ( + patch.dict("os.environ", {"KIMI_API_KEY": "sk-kimi-key"}), + patch("run_agent.OpenAI"), + ): + assert agent._try_activate_fallback() is True + assert agent.model == "kimi-k2.5" + assert agent.provider == "kimi-coding" + + def test_activates_minimax_fallback(self): + agent = _make_agent( + fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"}, + ) + with ( + patch.dict("os.environ", {"MINIMAX_API_KEY": "sk-mm-key"}), + patch("run_agent.OpenAI") as mock_openai, + ): + assert agent._try_activate_fallback() is True + assert agent.model == "MiniMax-M2.5" + assert agent.provider == "minimax" + call_kwargs = mock_openai.call_args[1] + assert "minimax.io" in call_kwargs["base_url"] + + def test_only_fires_once(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), + patch("run_agent.OpenAI"), + ): + assert agent._try_activate_fallback() is True + # Second attempt should return False + assert agent._try_activate_fallback() is False + + def test_returns_false_when_no_api_key(self): + """Fallback should fail gracefully when the API key env var is unset.""" + agent = _make_agent( + fallback_model={"provider": "minimax", "model": "MiniMax-M2.5"}, + ) + # Ensure MINIMAX_API_KEY is not in the environment + env = {k: v for k, v in os.environ.items() if k != "MINIMAX_API_KEY"} + with patch.dict("os.environ", env, clear=True): + assert agent._try_activate_fallback() is False + assert agent._fallback_activated is False + + def test_custom_base_url(self): + """Custom base_url in config should override the provider default.""" + agent = _make_agent( + fallback_model={ + "provider": "custom", + "model": "my-model", + "base_url": "http://localhost:8080/v1", + "api_key_env": "MY_CUSTOM_KEY", + }, + ) + with ( + patch.dict("os.environ", {"MY_CUSTOM_KEY": "custom-secret"}), + patch("run_agent.OpenAI") as mock_openai, + ): + assert agent._try_activate_fallback() is True + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["base_url"] == "http://localhost:8080/v1" + assert call_kwargs["api_key"] == "custom-secret" + + def test_prompt_caching_enabled_for_claude_on_openrouter(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), + patch("run_agent.OpenAI"), + ): + agent._try_activate_fallback() + assert agent._use_prompt_caching is True + + def test_prompt_caching_disabled_for_non_claude(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "google/gemini-2.5-flash"}, + ) + with ( + patch.dict("os.environ", {"OPENROUTER_API_KEY": "sk-or-key"}), + patch("run_agent.OpenAI"), + ): + agent._try_activate_fallback() + assert agent._use_prompt_caching is False + + def test_prompt_caching_disabled_for_non_openrouter(self): + agent = _make_agent( + fallback_model={"provider": "zai", "model": "glm-5"}, + ) + with ( + patch.dict("os.environ", {"ZAI_API_KEY": "sk-zai-key"}), + patch("run_agent.OpenAI"), + ): + agent._try_activate_fallback() + assert agent._use_prompt_caching is False + + def test_zai_alt_env_var(self): + """Z.AI should also check Z_AI_API_KEY as fallback env var.""" + agent = _make_agent( + fallback_model={"provider": "zai", "model": "glm-5"}, + ) + with ( + patch.dict("os.environ", {"Z_AI_API_KEY": "sk-alt-key"}), + patch("run_agent.OpenAI") as mock_openai, + ): + assert agent._try_activate_fallback() is True + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "sk-alt-key" + + def test_activates_codex_fallback(self): + """OpenAI Codex fallback should use OAuth credentials and codex_responses mode.""" + agent = _make_agent( + fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"}, + ) + mock_creds = { + "api_key": "codex-oauth-token", + "base_url": "https://chatgpt.com/backend-api/codex", + } + with ( + patch("hermes_cli.auth.resolve_codex_runtime_credentials", return_value=mock_creds), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True + assert agent.model == "gpt-5.3-codex" + assert agent.provider == "openai-codex" + assert agent.api_mode == "codex_responses" + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "codex-oauth-token" + assert "chatgpt.com" in call_kwargs["base_url"] + + def test_codex_fallback_fails_gracefully_without_credentials(self): + """Codex fallback should return False if no OAuth credentials available.""" + agent = _make_agent( + fallback_model={"provider": "openai-codex", "model": "gpt-5.3-codex"}, + ) + with patch( + "hermes_cli.auth.resolve_codex_runtime_credentials", + side_effect=Exception("No Codex credentials"), + ): + assert agent._try_activate_fallback() is False + assert agent._fallback_activated is False + + def test_activates_nous_fallback(self): + """Nous Portal fallback should use OAuth credentials and chat_completions mode.""" + agent = _make_agent( + fallback_model={"provider": "nous", "model": "nous-hermes-3"}, + ) + mock_creds = { + "api_key": "nous-agent-key-abc", + "base_url": "https://inference-api.nousresearch.com/v1", + } + with ( + patch("hermes_cli.auth.resolve_nous_runtime_credentials", return_value=mock_creds), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True + assert agent.model == "nous-hermes-3" + assert agent.provider == "nous" + assert agent.api_mode == "chat_completions" + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "nous-agent-key-abc" + assert "nousresearch.com" in call_kwargs["base_url"] + + def test_nous_fallback_fails_gracefully_without_login(self): + """Nous fallback should return False if not logged in.""" + agent = _make_agent( + fallback_model={"provider": "nous", "model": "nous-hermes-3"}, + ) + with patch( + "hermes_cli.auth.resolve_nous_runtime_credentials", + side_effect=Exception("Not logged in to Nous Portal"), + ): + assert agent._try_activate_fallback() is False + assert agent._fallback_activated is False + + +# ============================================================================= +# Fallback config init +# ============================================================================= + +class TestFallbackInit: + def test_fallback_stored_when_configured(self): + agent = _make_agent( + fallback_model={"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}, + ) + assert agent._fallback_model is not None + assert agent._fallback_model["provider"] == "openrouter" + assert agent._fallback_activated is False + + def test_fallback_none_when_not_configured(self): + agent = _make_agent(fallback_model=None) + assert agent._fallback_model is None + assert agent._fallback_activated is False + + def test_fallback_none_for_non_dict(self): + agent = _make_agent(fallback_model="not-a-dict") + assert agent._fallback_model is None + + +# ============================================================================= +# Provider credential resolution +# ============================================================================= + +class TestProviderCredentials: + """Verify that each supported provider resolves its API key correctly.""" + + @pytest.mark.parametrize("provider,env_var,base_url_fragment", [ + ("openrouter", "OPENROUTER_API_KEY", "openrouter"), + ("zai", "ZAI_API_KEY", "z.ai"), + ("kimi-coding", "KIMI_API_KEY", "moonshot.ai"), + ("minimax", "MINIMAX_API_KEY", "minimax.io"), + ("minimax-cn", "MINIMAX_CN_API_KEY", "minimaxi.com"), + ]) + def test_provider_resolves(self, provider, env_var, base_url_fragment): + agent = _make_agent( + fallback_model={"provider": provider, "model": "test-model"}, + ) + with ( + patch.dict("os.environ", {env_var: "test-key-123"}), + patch("run_agent.OpenAI") as mock_openai, + ): + result = agent._try_activate_fallback() + assert result is True, f"Failed to activate fallback for {provider}" + call_kwargs = mock_openai.call_args[1] + assert call_kwargs["api_key"] == "test-key-123" + assert base_url_fragment in call_kwargs["base_url"].lower() diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 734db494f9..fcbaf2196a 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -351,6 +351,173 @@ class TestPruneSessions: # Schema and WAL mode # ========================================================================= +# ========================================================================= +# Session title +# ========================================================================= + +class TestSessionTitle: + def test_set_and_get_title(self, db): + db.create_session(session_id="s1", source="cli") + assert db.set_session_title("s1", "My Session") is True + + session = db.get_session("s1") + assert session["title"] == "My Session" + + def test_set_title_nonexistent_session(self, db): + assert db.set_session_title("nonexistent", "Title") is False + + def test_title_initially_none(self, db): + db.create_session(session_id="s1", source="cli") + session = db.get_session("s1") + assert session["title"] is None + + def test_update_title(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "First Title") + db.set_session_title("s1", "Updated Title") + + session = db.get_session("s1") + assert session["title"] == "Updated Title" + + def test_title_in_search_sessions(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "Debugging Auth") + db.create_session(session_id="s2", source="cli") + + sessions = db.search_sessions() + titled = [s for s in sessions if s.get("title") == "Debugging Auth"] + assert len(titled) == 1 + assert titled[0]["id"] == "s1" + + def test_title_in_export(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "Export Test") + db.append_message("s1", role="user", content="Hello") + + export = db.export_session("s1") + assert export["title"] == "Export Test" + + def test_title_with_special_characters(self, db): + db.create_session(session_id="s1", source="cli") + title = "PR #438 — fixing the 'auth' middleware" + db.set_session_title("s1", title) + + session = db.get_session("s1") + assert session["title"] == title + + def test_title_empty_string_normalized_to_none(self, db): + """Empty strings are normalized to None (clearing the title).""" + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "My Title") + # Setting to empty string should clear the title (normalize to None) + db.set_session_title("s1", "") + + session = db.get_session("s1") + assert session["title"] is None + + def test_multiple_empty_titles_no_conflict(self, db): + """Multiple sessions can have empty-string (normalized to NULL) titles.""" + db.create_session(session_id="s1", source="cli") + db.create_session(session_id="s2", source="cli") + db.set_session_title("s1", "") + db.set_session_title("s2", "") + # Both should be None, no uniqueness conflict + assert db.get_session("s1")["title"] is None + assert db.get_session("s2")["title"] is None + + def test_title_survives_end_session(self, db): + db.create_session(session_id="s1", source="cli") + db.set_session_title("s1", "Before End") + db.end_session("s1", end_reason="user_exit") + + session = db.get_session("s1") + assert session["title"] == "Before End" + assert session["ended_at"] is not None + + +class TestSanitizeTitle: + """Tests for SessionDB.sanitize_title() validation and cleaning.""" + + def test_normal_title_unchanged(self): + assert SessionDB.sanitize_title("My Project") == "My Project" + + def test_strips_whitespace(self): + assert SessionDB.sanitize_title(" hello world ") == "hello world" + + def test_collapses_internal_whitespace(self): + assert SessionDB.sanitize_title("hello world") == "hello world" + + def test_tabs_and_newlines_collapsed(self): + assert SessionDB.sanitize_title("hello\t\nworld") == "hello world" + + def test_none_returns_none(self): + assert SessionDB.sanitize_title(None) is None + + def test_empty_string_returns_none(self): + assert SessionDB.sanitize_title("") is None + + def test_whitespace_only_returns_none(self): + assert SessionDB.sanitize_title(" \t\n ") is None + + def test_control_chars_stripped(self): + # Null byte, bell, backspace, etc. + assert SessionDB.sanitize_title("hello\x00world") == "helloworld" + assert SessionDB.sanitize_title("\x07\x08test\x1b") == "test" + + def test_del_char_stripped(self): + assert SessionDB.sanitize_title("hello\x7fworld") == "helloworld" + + def test_zero_width_chars_stripped(self): + # Zero-width space (U+200B), zero-width joiner (U+200D) + assert SessionDB.sanitize_title("hello\u200bworld") == "helloworld" + assert SessionDB.sanitize_title("hello\u200dworld") == "helloworld" + + def test_rtl_override_stripped(self): + # Right-to-left override (U+202E) — used in filename spoofing attacks + assert SessionDB.sanitize_title("hello\u202eworld") == "helloworld" + + def test_bom_stripped(self): + # Byte order mark (U+FEFF) + assert SessionDB.sanitize_title("\ufeffhello") == "hello" + + def test_only_control_chars_returns_none(self): + assert SessionDB.sanitize_title("\x00\x01\x02\u200b\ufeff") is None + + def test_max_length_allowed(self): + title = "A" * 100 + assert SessionDB.sanitize_title(title) == title + + def test_exceeds_max_length_raises(self): + title = "A" * 101 + with pytest.raises(ValueError, match="too long"): + SessionDB.sanitize_title(title) + + def test_unicode_emoji_allowed(self): + assert SessionDB.sanitize_title("🚀 My Project 🎉") == "🚀 My Project 🎉" + + def test_cjk_characters_allowed(self): + assert SessionDB.sanitize_title("我的项目") == "我的项目" + + def test_accented_characters_allowed(self): + assert SessionDB.sanitize_title("Résumé éditing") == "Résumé éditing" + + def test_special_punctuation_allowed(self): + title = "PR #438 — fixing the 'auth' middleware" + assert SessionDB.sanitize_title(title) == title + + def test_sanitize_applied_in_set_session_title(self, db): + """set_session_title applies sanitize_title internally.""" + db.create_session("s1", "cli") + db.set_session_title("s1", " hello\x00 world ") + assert db.get_session("s1")["title"] == "hello world" + + def test_too_long_title_rejected_by_set(self, db): + """set_session_title raises ValueError for overly long titles.""" + db.create_session("s1", "cli") + with pytest.raises(ValueError, match="too long"): + db.set_session_title("s1", "X" * 150) + + class TestSchemaInit: def test_wal_mode(self, db): cursor = db._conn.execute("PRAGMA journal_mode") @@ -373,4 +540,297 @@ class TestSchemaInit: def test_schema_version(self, db): cursor = db._conn.execute("SELECT version FROM schema_version") version = cursor.fetchone()[0] - assert version == 2 + assert version == 4 + + def test_title_column_exists(self, db): + """Verify the title column was created in the sessions table.""" + cursor = db._conn.execute("PRAGMA table_info(sessions)") + columns = {row[1] for row in cursor.fetchall()} + assert "title" in columns + + def test_migration_from_v2(self, tmp_path): + """Simulate a v2 database and verify migration adds title column.""" + import sqlite3 + + db_path = tmp_path / "migrate_test.db" + conn = sqlite3.connect(str(db_path)) + # Create v2 schema (without title column) + conn.executescript(""" + CREATE TABLE schema_version (version INTEGER NOT NULL); + INSERT INTO schema_version (version) VALUES (2); + + CREATE TABLE sessions ( + id TEXT PRIMARY KEY, + source TEXT NOT NULL, + user_id TEXT, + model TEXT, + model_config TEXT, + system_prompt TEXT, + parent_session_id TEXT, + started_at REAL NOT NULL, + ended_at REAL, + end_reason TEXT, + message_count INTEGER DEFAULT 0, + tool_call_count INTEGER DEFAULT 0, + input_tokens INTEGER DEFAULT 0, + output_tokens INTEGER DEFAULT 0 + ); + + CREATE TABLE messages ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + session_id TEXT NOT NULL, + role TEXT NOT NULL, + content TEXT, + tool_call_id TEXT, + tool_calls TEXT, + tool_name TEXT, + timestamp REAL NOT NULL, + token_count INTEGER, + finish_reason TEXT + ); + """) + conn.execute( + "INSERT INTO sessions (id, source, started_at) VALUES (?, ?, ?)", + ("existing", "cli", 1000.0), + ) + conn.commit() + conn.close() + + # Open with SessionDB — should migrate to v4 + migrated_db = SessionDB(db_path=db_path) + + # Verify migration + cursor = migrated_db._conn.execute("SELECT version FROM schema_version") + assert cursor.fetchone()[0] == 4 + + # Verify title column exists and is NULL for existing sessions + session = migrated_db.get_session("existing") + assert session is not None + assert session["title"] is None + + # Verify we can set title on migrated session + assert migrated_db.set_session_title("existing", "Migrated Title") is True + session = migrated_db.get_session("existing") + assert session["title"] == "Migrated Title" + + migrated_db.close() + + +class TestTitleUniqueness: + """Tests for unique title enforcement and title-based lookups.""" + + def test_duplicate_title_raises(self, db): + """Setting a title already used by another session raises ValueError.""" + db.create_session("s1", "cli") + db.create_session("s2", "cli") + db.set_session_title("s1", "my project") + with pytest.raises(ValueError, match="already in use"): + db.set_session_title("s2", "my project") + + def test_same_session_can_keep_title(self, db): + """A session can re-set its own title without error.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + # Should not raise — it's the same session + assert db.set_session_title("s1", "my project") is True + + def test_null_titles_not_unique(self, db): + """Multiple sessions can have NULL titles (no constraint violation).""" + db.create_session("s1", "cli") + db.create_session("s2", "cli") + # Both have NULL titles — no error + assert db.get_session("s1")["title"] is None + assert db.get_session("s2")["title"] is None + + def test_get_session_by_title(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "refactoring auth") + result = db.get_session_by_title("refactoring auth") + assert result is not None + assert result["id"] == "s1" + + def test_get_session_by_title_not_found(self, db): + assert db.get_session_by_title("nonexistent") is None + + def test_get_session_title(self, db): + db.create_session("s1", "cli") + assert db.get_session_title("s1") is None + db.set_session_title("s1", "my title") + assert db.get_session_title("s1") == "my title" + + def test_get_session_title_nonexistent(self, db): + assert db.get_session_title("nonexistent") is None + + +class TestTitleLineage: + """Tests for title lineage resolution and auto-numbering.""" + + def test_resolve_exact_title(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + assert db.resolve_session_by_title("my project") == "s1" + + def test_resolve_returns_latest_numbered(self, db): + """When numbered variants exist, return the most recent one.""" + import time + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + time.sleep(0.01) + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + time.sleep(0.01) + db.create_session("s3", "cli") + db.set_session_title("s3", "my project #3") + # Resolving "my project" should return s3 (latest numbered variant) + assert db.resolve_session_by_title("my project") == "s3" + + def test_resolve_exact_numbered(self, db): + """Resolving an exact numbered title returns that specific session.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + # Resolving "my project #2" exactly should return s2 + assert db.resolve_session_by_title("my project #2") == "s2" + + def test_resolve_nonexistent_title(self, db): + assert db.resolve_session_by_title("nonexistent") is None + + def test_next_title_no_existing(self, db): + """With no existing sessions, base title is returned as-is.""" + assert db.get_next_title_in_lineage("my project") == "my project" + + def test_next_title_first_continuation(self, db): + """First continuation after the original gets #2.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + assert db.get_next_title_in_lineage("my project") == "my project #2" + + def test_next_title_increments(self, db): + """Each continuation increments the number.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + db.create_session("s3", "cli") + db.set_session_title("s3", "my project #3") + assert db.get_next_title_in_lineage("my project") == "my project #4" + + def test_next_title_strips_existing_number(self, db): + """Passing a numbered title strips the number and finds the base.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + db.create_session("s2", "cli") + db.set_session_title("s2", "my project #2") + # Even when called with "my project #2", it should return #3 + assert db.get_next_title_in_lineage("my project #2") == "my project #3" + + +class TestTitleSqlWildcards: + """Titles containing SQL LIKE wildcards (%, _) must not cause false matches.""" + + def test_resolve_title_with_underscore(self, db): + """A title like 'test_project' should not match 'testXproject #2'.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "test_project") + db.create_session("s2", "cli") + db.set_session_title("s2", "testXproject #2") + # Resolving "test_project" should return s1 (exact), not s2 + assert db.resolve_session_by_title("test_project") == "s1" + + def test_resolve_title_with_percent(self, db): + """A title with '%' should not wildcard-match unrelated sessions.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "100% done") + db.create_session("s2", "cli") + db.set_session_title("s2", "100X done #2") + # Should resolve to s1 (exact), not s2 + assert db.resolve_session_by_title("100% done") == "s1" + + def test_next_lineage_with_underscore(self, db): + """get_next_title_in_lineage with underscores doesn't match wrong sessions.""" + db.create_session("s1", "cli") + db.set_session_title("s1", "test_project") + db.create_session("s2", "cli") + db.set_session_title("s2", "testXproject #2") + # Only "test_project" exists, so next should be "test_project #2" + assert db.get_next_title_in_lineage("test_project") == "test_project #2" + + +class TestListSessionsRich: + """Tests for enhanced session listing with preview and last_active.""" + + def test_preview_from_first_user_message(self, db): + db.create_session("s1", "cli") + db.append_message("s1", "system", "You are a helpful assistant.") + db.append_message("s1", "user", "Help me refactor the auth module please") + db.append_message("s1", "assistant", "Sure, let me look at it.") + sessions = db.list_sessions_rich() + assert len(sessions) == 1 + assert "Help me refactor the auth module" in sessions[0]["preview"] + + def test_preview_truncated_at_60(self, db): + db.create_session("s1", "cli") + long_msg = "A" * 100 + db.append_message("s1", "user", long_msg) + sessions = db.list_sessions_rich() + assert len(sessions[0]["preview"]) == 63 # 60 chars + "..." + assert sessions[0]["preview"].endswith("...") + + def test_preview_empty_when_no_user_messages(self, db): + db.create_session("s1", "cli") + db.append_message("s1", "system", "System prompt") + sessions = db.list_sessions_rich() + assert sessions[0]["preview"] == "" + + def test_last_active_from_latest_message(self, db): + import time + db.create_session("s1", "cli") + db.append_message("s1", "user", "Hello") + time.sleep(0.01) + db.append_message("s1", "assistant", "Hi there!") + sessions = db.list_sessions_rich() + # last_active should be close to now (the assistant message) + assert sessions[0]["last_active"] > sessions[0]["started_at"] + + def test_last_active_fallback_to_started_at(self, db): + db.create_session("s1", "cli") + sessions = db.list_sessions_rich() + # No messages, so last_active falls back to started_at + assert sessions[0]["last_active"] == sessions[0]["started_at"] + + def test_rich_list_includes_title(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "refactoring auth") + sessions = db.list_sessions_rich() + assert sessions[0]["title"] == "refactoring auth" + + def test_rich_list_source_filter(self, db): + db.create_session("s1", "cli") + db.create_session("s2", "telegram") + sessions = db.list_sessions_rich(source="cli") + assert len(sessions) == 1 + assert sessions[0]["id"] == "s1" + + def test_preview_newlines_collapsed(self, db): + db.create_session("s1", "cli") + db.append_message("s1", "user", "Line one\nLine two\nLine three") + sessions = db.list_sessions_rich() + assert "\n" not in sessions[0]["preview"] + assert "Line one Line two" in sessions[0]["preview"] + + +class TestResolveSessionByNameOrId: + """Tests for the main.py helper that resolves names or IDs.""" + + def test_resolve_by_id(self, db): + db.create_session("test-id-123", "cli") + session = db.get_session("test-id-123") + assert session is not None + assert session["id"] == "test-id-123" + + def test_resolve_by_title_falls_back(self, db): + db.create_session("s1", "cli") + db.set_session_title("s1", "my project") + result = db.resolve_session_by_title("my project") + assert result == "s1" diff --git a/tests/test_resume_display.py b/tests/test_resume_display.py new file mode 100644 index 0000000000..d0c156d13a --- /dev/null +++ b/tests/test_resume_display.py @@ -0,0 +1,488 @@ +"""Tests for session resume history display — _display_resumed_history() and +_preload_resumed_session(). + +Verifies that resuming a session shows a compact recap of the previous +conversation with correct formatting, truncation, and config behavior. +""" + +import os +import sys +from io import StringIO +from unittest.mock import MagicMock, patch + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + + +def _make_cli(config_overrides=None, env_overrides=None, **kwargs): + """Create a HermesCLI instance with minimal mocking.""" + import cli as _cli_mod + from cli import HermesCLI + + _clean_config = { + "model": { + "default": "anthropic/claude-opus-4.6", + "base_url": "https://openrouter.ai/api/v1", + "provider": "auto", + }, + "display": {"compact": False, "tool_progress": "all", "resume_display": "full"}, + "agent": {}, + "terminal": {"env_type": "local"}, + } + if config_overrides: + for k, v in config_overrides.items(): + if isinstance(v, dict) and k in _clean_config and isinstance(_clean_config[k], dict): + _clean_config[k].update(v) + else: + _clean_config[k] = v + + clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""} + if env_overrides: + clean_env.update(env_overrides) + with ( + patch("cli.get_tool_definitions", return_value=[]), + patch.dict("os.environ", clean_env, clear=False), + patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}), + ): + return HermesCLI(**kwargs) + + +# ── Sample conversation histories for tests ────────────────────────── + + +def _simple_history(): + """Two-turn conversation: user → assistant → user → assistant.""" + return [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is Python?"}, + {"role": "assistant", "content": "Python is a high-level programming language."}, + {"role": "user", "content": "How do I install it?"}, + {"role": "assistant", "content": "You can install Python from python.org."}, + ] + + +def _tool_call_history(): + """Conversation with tool calls and tool results.""" + return [ + {"role": "system", "content": "system prompt"}, + {"role": "user", "content": "Search for Python tutorials"}, + { + "role": "assistant", + "content": None, + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "web_search", "arguments": '{"query":"python tutorials"}'}, + }, + { + "id": "call_2", + "type": "function", + "function": {"name": "web_extract", "arguments": '{"urls":["https://example.com"]}'}, + }, + ], + }, + {"role": "tool", "tool_call_id": "call_1", "content": "Found 5 results..."}, + {"role": "tool", "tool_call_id": "call_2", "content": "Page content..."}, + {"role": "assistant", "content": "Here are some great Python tutorials I found."}, + ] + + +def _large_history(n_exchanges=15): + """Build a history with many exchanges to test truncation.""" + msgs = [{"role": "system", "content": "system prompt"}] + for i in range(n_exchanges): + msgs.append({"role": "user", "content": f"Question #{i + 1}: What is item {i + 1}?"}) + msgs.append({"role": "assistant", "content": f"Answer #{i + 1}: Item {i + 1} is great."}) + return msgs + + +def _multimodal_history(): + """Conversation with multimodal (image) content.""" + return [ + {"role": "system", "content": "system prompt"}, + { + "role": "user", + "content": [ + {"type": "text", "text": "What's in this image?"}, + {"type": "image_url", "image_url": {"url": "https://example.com/cat.jpg"}}, + ], + }, + {"role": "assistant", "content": "I see a cat in the image."}, + ] + + +# ── Tests for _display_resumed_history ─────────────────────────────── + + +class TestDisplayResumedHistory: + """_display_resumed_history() renders a Rich panel with conversation recap.""" + + def _capture_display(self, cli_obj): + """Run _display_resumed_history and capture the Rich console output.""" + buf = StringIO() + cli_obj.console.file = buf + cli_obj._display_resumed_history() + return buf.getvalue() + + def test_simple_history_shows_user_and_assistant(self): + cli = _make_cli() + cli.conversation_history = _simple_history() + output = self._capture_display(cli) + + assert "You:" in output + assert "Hermes:" in output + assert "What is Python?" in output + assert "Python is a high-level programming language." in output + assert "How do I install it?" in output + + def test_system_messages_hidden(self): + cli = _make_cli() + cli.conversation_history = _simple_history() + output = self._capture_display(cli) + + assert "You are a helpful assistant" not in output + + def test_tool_messages_hidden(self): + cli = _make_cli() + cli.conversation_history = _tool_call_history() + output = self._capture_display(cli) + + # Tool result content should NOT appear + assert "Found 5 results" not in output + assert "Page content" not in output + + def test_tool_calls_shown_as_summary(self): + cli = _make_cli() + cli.conversation_history = _tool_call_history() + output = self._capture_display(cli) + + assert "2 tool calls" in output + assert "web_search" in output + assert "web_extract" in output + + def test_long_user_message_truncated(self): + cli = _make_cli() + long_text = "A" * 500 + cli.conversation_history = [ + {"role": "user", "content": long_text}, + {"role": "assistant", "content": "OK."}, + ] + output = self._capture_display(cli) + + # Should have truncation indicator and NOT contain the full 500 chars + assert "..." in output + assert "A" * 500 not in output + # The 300-char truncated text is present but may be line-wrapped by + # Rich's panel renderer, so check the total A count in the output + a_count = output.count("A") + assert 200 <= a_count <= 310 # roughly 300 chars (±panel padding) + + def test_long_assistant_message_truncated(self): + cli = _make_cli() + long_text = "B" * 400 + cli.conversation_history = [ + {"role": "user", "content": "Tell me a lot."}, + {"role": "assistant", "content": long_text}, + ] + output = self._capture_display(cli) + + assert "..." in output + assert "B" * 400 not in output + + def test_multiline_assistant_truncated(self): + cli = _make_cli() + multi = "\n".join([f"Line {i}" for i in range(20)]) + cli.conversation_history = [ + {"role": "user", "content": "Show me lines."}, + {"role": "assistant", "content": multi}, + ] + output = self._capture_display(cli) + + # First 3 lines should be there + assert "Line 0" in output + assert "Line 1" in output + assert "Line 2" in output + # Line 19 should NOT be there (truncated after 3 lines) + assert "Line 19" not in output + + def test_large_history_shows_truncation_indicator(self): + cli = _make_cli() + cli.conversation_history = _large_history(n_exchanges=15) + output = self._capture_display(cli) + + # Should show "earlier messages" indicator + assert "earlier messages" in output + # Last question should still be visible + assert "Question #15" in output + + def test_multimodal_content_handled(self): + cli = _make_cli() + cli.conversation_history = _multimodal_history() + output = self._capture_display(cli) + + assert "What's in this image?" in output + assert "[image]" in output + + def test_empty_history_no_output(self): + cli = _make_cli() + cli.conversation_history = [] + output = self._capture_display(cli) + + assert output.strip() == "" + + def test_minimal_config_suppresses_display(self): + cli = _make_cli(config_overrides={"display": {"resume_display": "minimal"}}) + # resume_display is captured as an instance variable during __init__ + assert cli.resume_display == "minimal" + cli.conversation_history = _simple_history() + output = self._capture_display(cli) + + assert output.strip() == "" + + def test_panel_has_title(self): + cli = _make_cli() + cli.conversation_history = _simple_history() + output = self._capture_display(cli) + + assert "Previous Conversation" in output + + def test_assistant_with_no_content_no_tools_skipped(self): + """Assistant messages with no visible output (e.g. pure reasoning) + are skipped in the recap.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": None}, + ] + output = self._capture_display(cli) + + # The assistant entry should be skipped, only the user message shown + assert "You:" in output + assert "Hermes:" not in output + + def test_only_system_messages_no_output(self): + cli = _make_cli() + cli.conversation_history = [ + {"role": "system", "content": "You are helpful."}, + ] + output = self._capture_display(cli) + + assert output.strip() == "" + + def test_reasoning_scratchpad_stripped(self): + """ blocks should be stripped from display.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Think about this"}, + { + "role": "assistant", + "content": ( + "\nLet me think step by step.\n" + "\n\nThe answer is 42." + ), + }, + ] + output = self._capture_display(cli) + + assert "REASONING_SCRATCHPAD" not in output + assert "Let me think step by step" not in output + assert "The answer is 42" in output + + def test_pure_reasoning_message_skipped(self): + """Assistant messages that are only reasoning should be skipped.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Hello"}, + { + "role": "assistant", + "content": "\nJust thinking...\n", + }, + {"role": "assistant", "content": "Hi there!"}, + ] + output = self._capture_display(cli) + + assert "Just thinking" not in output + assert "Hi there!" in output + + def test_assistant_with_text_and_tool_calls(self): + """When an assistant message has both text content AND tool_calls.""" + cli = _make_cli() + cli.conversation_history = [ + {"role": "user", "content": "Do something complex"}, + { + "role": "assistant", + "content": "Let me search for that.", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": {"name": "terminal", "arguments": '{"command":"ls"}'}, + } + ], + }, + ] + output = self._capture_display(cli) + + assert "Let me search for that." in output + assert "1 tool call" in output + assert "terminal" in output + + +# ── Tests for _preload_resumed_session ────────────────────────────── + + +class TestPreloadResumedSession: + """_preload_resumed_session() loads session from DB early.""" + + def test_returns_false_when_not_resumed(self): + cli = _make_cli() + assert cli._preload_resumed_session() is False + + def test_returns_false_when_no_session_db(self): + cli = _make_cli(resume="test_session_id") + cli._session_db = None + assert cli._preload_resumed_session() is False + + def test_returns_false_when_session_not_found(self): + cli = _make_cli(resume="nonexistent_session") + mock_db = MagicMock() + mock_db.get_session.return_value = None + cli._session_db = mock_db + + buf = StringIO() + cli.console.file = buf + result = cli._preload_resumed_session() + + assert result is False + output = buf.getvalue() + assert "Session not found" in output + + def test_returns_false_when_session_has_no_messages(self): + cli = _make_cli(resume="empty_session") + mock_db = MagicMock() + mock_db.get_session.return_value = {"id": "empty_session", "title": None} + mock_db.get_messages_as_conversation.return_value = [] + cli._session_db = mock_db + + buf = StringIO() + cli.console.file = buf + result = cli._preload_resumed_session() + + assert result is False + output = buf.getvalue() + assert "no messages" in output + + def test_loads_session_successfully(self): + cli = _make_cli(resume="good_session") + messages = _simple_history() + mock_db = MagicMock() + mock_db.get_session.return_value = {"id": "good_session", "title": "Test Session"} + mock_db.get_messages_as_conversation.return_value = messages + cli._session_db = mock_db + + buf = StringIO() + cli.console.file = buf + result = cli._preload_resumed_session() + + assert result is True + assert cli.conversation_history == messages + output = buf.getvalue() + assert "Resumed session" in output + assert "good_session" in output + assert "Test Session" in output + assert "2 user messages" in output + + def test_reopens_session_in_db(self): + cli = _make_cli(resume="reopen_session") + messages = [{"role": "user", "content": "hi"}] + mock_db = MagicMock() + mock_db.get_session.return_value = {"id": "reopen_session", "title": None} + mock_db.get_messages_as_conversation.return_value = messages + mock_conn = MagicMock() + mock_db._conn = mock_conn + cli._session_db = mock_db + + buf = StringIO() + cli.console.file = buf + cli._preload_resumed_session() + + # Should have executed UPDATE to clear ended_at + mock_conn.execute.assert_called_once() + call_args = mock_conn.execute.call_args + assert "ended_at = NULL" in call_args[0][0] + mock_conn.commit.assert_called_once() + + def test_singular_user_message_grammar(self): + """1 user message should say 'message' not 'messages'.""" + cli = _make_cli(resume="one_msg_session") + messages = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + mock_db = MagicMock() + mock_db.get_session.return_value = {"id": "one_msg_session", "title": None} + mock_db.get_messages_as_conversation.return_value = messages + mock_db._conn = MagicMock() + cli._session_db = mock_db + + buf = StringIO() + cli.console.file = buf + cli._preload_resumed_session() + + output = buf.getvalue() + assert "1 user message," in output + assert "1 user messages" not in output + + +# ── Integration: _init_agent skips when preloaded ──────────────────── + + +class TestInitAgentSkipsPreloaded: + """_init_agent() should skip DB load when history is already populated.""" + + def test_init_agent_skips_db_when_preloaded(self): + """If conversation_history is already set, _init_agent should not + reload from the DB.""" + cli = _make_cli(resume="preloaded_session") + cli.conversation_history = _simple_history() + + mock_db = MagicMock() + cli._session_db = mock_db + + # _init_agent will fail at credential resolution (no real API key), + # but the session-loading block should be skipped entirely + with patch.object(cli, "_ensure_runtime_credentials", return_value=False): + cli._init_agent() + + # get_messages_as_conversation should NOT have been called + mock_db.get_messages_as_conversation.assert_not_called() + + +# ── Config default tests ───────────────────────────────────────────── + + +class TestResumeDisplayConfig: + """resume_display config option defaults and behavior.""" + + def test_default_config_has_resume_display(self): + """DEFAULT_CONFIG in hermes_cli/config.py includes resume_display.""" + from hermes_cli.config import DEFAULT_CONFIG + display = DEFAULT_CONFIG.get("display", {}) + assert "resume_display" in display + assert display["resume_display"] == "full" + + def test_cli_defaults_have_resume_display(self): + """cli.py load_cli_config defaults include resume_display.""" + import cli as _cli_mod + from cli import load_cli_config + + with ( + patch("pathlib.Path.exists", return_value=False), + patch.dict("os.environ", {"LLM_MODEL": ""}, clear=False), + ): + config = load_cli_config() + + display = config.get("display", {}) + assert display.get("resume_display") == "full" diff --git a/tests/test_run_agent.py b/tests/test_run_agent.py index 55f96f942a..64de980d59 100644 --- a/tests/test_run_agent.py +++ b/tests/test_run_agent.py @@ -1040,3 +1040,136 @@ class TestMaxTokensParam: agent.base_url = "https://openrouter.ai/api/v1/api.openai.com" result = agent._max_tokens_param(4096) assert result == {"max_tokens": 4096} + + +# --------------------------------------------------------------------------- +# System prompt stability for prompt caching +# --------------------------------------------------------------------------- + +class TestSystemPromptStability: + """Verify that the system prompt stays stable across turns for cache hits.""" + + def test_stored_prompt_reused_for_continuing_session(self, agent): + """When conversation_history is non-empty and session DB has a stored + prompt, it should be reused instead of rebuilding from disk.""" + stored = "You are helpful. [stored from turn 1]" + mock_db = MagicMock() + mock_db.get_session.return_value = {"system_prompt": stored} + agent._session_db = mock_db + + # Simulate a continuing session with history + history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + + # First call — _cached_system_prompt is None, history is non-empty + agent._cached_system_prompt = None + + # Patch run_conversation internals to just test the system prompt logic. + # We'll call the prompt caching block directly by simulating what + # run_conversation does. + conversation_history = history + + # The block under test (from run_conversation): + if agent._cached_system_prompt is None: + stored_prompt = None + if conversation_history and agent._session_db: + try: + session_row = agent._session_db.get_session(agent.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + except Exception: + pass + + if stored_prompt: + agent._cached_system_prompt = stored_prompt + + assert agent._cached_system_prompt == stored + mock_db.get_session.assert_called_once_with(agent.session_id) + + def test_fresh_build_when_no_history(self, agent): + """On the first turn (no history), system prompt should be built fresh.""" + mock_db = MagicMock() + agent._session_db = mock_db + + agent._cached_system_prompt = None + conversation_history = [] + + # The block under test: + if agent._cached_system_prompt is None: + stored_prompt = None + if conversation_history and agent._session_db: + session_row = agent._session_db.get_session(agent.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + + if stored_prompt: + agent._cached_system_prompt = stored_prompt + else: + agent._cached_system_prompt = agent._build_system_prompt() + + # Should have built fresh, not queried the DB + mock_db.get_session.assert_not_called() + assert agent._cached_system_prompt is not None + assert "Hermes Agent" in agent._cached_system_prompt + + def test_fresh_build_when_db_has_no_prompt(self, agent): + """If the session DB has no stored prompt, build fresh even with history.""" + mock_db = MagicMock() + mock_db.get_session.return_value = {"system_prompt": ""} + agent._session_db = mock_db + + agent._cached_system_prompt = None + conversation_history = [{"role": "user", "content": "hi"}] + + if agent._cached_system_prompt is None: + stored_prompt = None + if conversation_history and agent._session_db: + try: + session_row = agent._session_db.get_session(agent.session_id) + if session_row: + stored_prompt = session_row.get("system_prompt") or None + except Exception: + pass + + if stored_prompt: + agent._cached_system_prompt = stored_prompt + else: + agent._cached_system_prompt = agent._build_system_prompt() + + # Empty string is falsy, so should fall through to fresh build + assert "Hermes Agent" in agent._cached_system_prompt + + def test_honcho_context_baked_into_prompt_on_first_turn(self, agent): + """Honcho context should be baked into _cached_system_prompt on + the first turn, not injected separately per API call.""" + agent._honcho_context = "User prefers Python over JavaScript." + agent._cached_system_prompt = None + + # Simulate first turn: build fresh and bake in Honcho + agent._cached_system_prompt = agent._build_system_prompt() + if agent._honcho_context: + agent._cached_system_prompt = ( + agent._cached_system_prompt + "\n\n" + agent._honcho_context + ).strip() + + assert "User prefers Python over JavaScript" in agent._cached_system_prompt + + def test_honcho_prefetch_skipped_on_continuing_session(self): + """Honcho prefetch should not be called when conversation_history + is non-empty (continuing session).""" + conversation_history = [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi there"}, + ] + + # The guard: `not conversation_history` is False when history exists + should_prefetch = not conversation_history + assert should_prefetch is False + + def test_honcho_prefetch_runs_on_first_turn(self): + """Honcho prefetch should run when conversation_history is empty.""" + conversation_history = [] + should_prefetch = not conversation_history + assert should_prefetch is True diff --git a/tests/tools/test_browser_console.py b/tests/tools/test_browser_console.py new file mode 100644 index 0000000000..962b49f020 --- /dev/null +++ b/tests/tools/test_browser_console.py @@ -0,0 +1,276 @@ +"""Tests for browser_console tool and browser_vision annotate param.""" + +import json +import os +import sys +from unittest.mock import patch, MagicMock + +import pytest + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..")) + + +# ── browser_console ────────────────────────────────────────────────── + + +class TestBrowserConsole: + """browser_console() returns console messages + JS errors in one call.""" + + def test_returns_console_messages_and_errors(self): + from tools.browser_tool import browser_console + + console_response = { + "success": True, + "data": { + "messages": [ + {"text": "hello", "type": "log", "timestamp": 1}, + {"text": "oops", "type": "error", "timestamp": 2}, + ] + }, + } + errors_response = { + "success": True, + "data": { + "errors": [ + {"message": "Uncaught TypeError", "timestamp": 3}, + ] + }, + } + + with patch("tools.browser_tool._run_browser_command") as mock_cmd: + mock_cmd.side_effect = [console_response, errors_response] + result = json.loads(browser_console(task_id="test")) + + assert result["success"] is True + assert result["total_messages"] == 2 + assert result["total_errors"] == 1 + assert result["console_messages"][0]["text"] == "hello" + assert result["console_messages"][1]["text"] == "oops" + assert result["js_errors"][0]["message"] == "Uncaught TypeError" + + def test_passes_clear_flag(self): + from tools.browser_tool import browser_console + + empty = {"success": True, "data": {"messages": [], "errors": []}} + with patch("tools.browser_tool._run_browser_command", return_value=empty) as mock_cmd: + browser_console(clear=True, task_id="test") + + calls = mock_cmd.call_args_list + # Both console and errors should get --clear + assert calls[0][0] == ("test", "console", ["--clear"]) + assert calls[1][0] == ("test", "errors", ["--clear"]) + + def test_no_clear_by_default(self): + from tools.browser_tool import browser_console + + empty = {"success": True, "data": {"messages": [], "errors": []}} + with patch("tools.browser_tool._run_browser_command", return_value=empty) as mock_cmd: + browser_console(task_id="test") + + calls = mock_cmd.call_args_list + assert calls[0][0] == ("test", "console", []) + assert calls[1][0] == ("test", "errors", []) + + def test_empty_console_and_errors(self): + from tools.browser_tool import browser_console + + empty = {"success": True, "data": {"messages": [], "errors": []}} + with patch("tools.browser_tool._run_browser_command", return_value=empty): + result = json.loads(browser_console(task_id="test")) + + assert result["total_messages"] == 0 + assert result["total_errors"] == 0 + assert result["console_messages"] == [] + assert result["js_errors"] == [] + + def test_handles_failed_commands(self): + from tools.browser_tool import browser_console + + failed = {"success": False, "error": "No session"} + with patch("tools.browser_tool._run_browser_command", return_value=failed): + result = json.loads(browser_console(task_id="test")) + + # Should still return success with empty data + assert result["success"] is True + assert result["total_messages"] == 0 + assert result["total_errors"] == 0 + + +# ── browser_console schema ─────────────────────────────────────────── + + +class TestBrowserConsoleSchema: + """browser_console is properly registered in the tool registry.""" + + def test_schema_in_browser_schemas(self): + from tools.browser_tool import BROWSER_TOOL_SCHEMAS + + names = [s["name"] for s in BROWSER_TOOL_SCHEMAS] + assert "browser_console" in names + + def test_schema_has_clear_param(self): + from tools.browser_tool import BROWSER_TOOL_SCHEMAS + + schema = next(s for s in BROWSER_TOOL_SCHEMAS if s["name"] == "browser_console") + props = schema["parameters"]["properties"] + assert "clear" in props + assert props["clear"]["type"] == "boolean" + + +# ── browser_vision annotate ────────────────────────────────────────── + + +class TestBrowserVisionAnnotate: + """browser_vision supports annotate parameter.""" + + def test_schema_has_annotate_param(self): + from tools.browser_tool import BROWSER_TOOL_SCHEMAS + + schema = next(s for s in BROWSER_TOOL_SCHEMAS if s["name"] == "browser_vision") + props = schema["parameters"]["properties"] + assert "annotate" in props + assert props["annotate"]["type"] == "boolean" + + def test_annotate_false_no_flag(self): + """Without annotate, screenshot command has no --annotate flag.""" + from tools.browser_tool import browser_vision + + with ( + patch("tools.browser_tool._run_browser_command") as mock_cmd, + patch("tools.browser_tool._aux_vision_client") as mock_client, + patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"), + patch("tools.browser_tool._get_vision_model", return_value="test-model"), + ): + mock_cmd.return_value = {"success": True, "data": {}} + # Will fail at screenshot file read, but we can check the command + try: + browser_vision("test", annotate=False, task_id="test") + except Exception: + pass + + if mock_cmd.called: + args = mock_cmd.call_args[0] + cmd_args = args[2] if len(args) > 2 else [] + assert "--annotate" not in cmd_args + + def test_annotate_true_adds_flag(self): + """With annotate=True, screenshot command includes --annotate.""" + from tools.browser_tool import browser_vision + + with ( + patch("tools.browser_tool._run_browser_command") as mock_cmd, + patch("tools.browser_tool._aux_vision_client") as mock_client, + patch("tools.browser_tool._DEFAULT_VISION_MODEL", "test-model"), + patch("tools.browser_tool._get_vision_model", return_value="test-model"), + ): + mock_cmd.return_value = {"success": True, "data": {}} + try: + browser_vision("test", annotate=True, task_id="test") + except Exception: + pass + + if mock_cmd.called: + args = mock_cmd.call_args[0] + cmd_args = args[2] if len(args) > 2 else [] + assert "--annotate" in cmd_args + + +# ── auto-recording config ──────────────────────────────────────────── + + +class TestRecordSessionsConfig: + """browser.record_sessions config option.""" + + def test_default_config_has_record_sessions(self): + from hermes_cli.config import DEFAULT_CONFIG + + browser_cfg = DEFAULT_CONFIG.get("browser", {}) + assert "record_sessions" in browser_cfg + assert browser_cfg["record_sessions"] is False + + def test_maybe_start_recording_disabled(self): + """Recording doesn't start when config says record_sessions: false.""" + from tools.browser_tool import _maybe_start_recording, _recording_sessions + + with ( + patch("tools.browser_tool._run_browser_command") as mock_cmd, + patch("builtins.open", side_effect=FileNotFoundError), + ): + _maybe_start_recording("test-task") + + mock_cmd.assert_not_called() + assert "test-task" not in _recording_sessions + + def test_maybe_stop_recording_noop_when_not_recording(self): + """Stopping when not recording is a no-op.""" + from tools.browser_tool import _maybe_stop_recording, _recording_sessions + + _recording_sessions.discard("test-task") # ensure not in set + with patch("tools.browser_tool._run_browser_command") as mock_cmd: + _maybe_stop_recording("test-task") + + mock_cmd.assert_not_called() + + +# ── dogfood skill files ────────────────────────────────────────────── + + +class TestDogfoodSkill: + """Dogfood skill files exist and have correct structure.""" + + @pytest.fixture(autouse=True) + def _skill_dir(self): + # Use the actual repo skills dir (not temp) + self.skill_dir = os.path.join( + os.path.dirname(__file__), "..", "..", "skills", "dogfood" + ) + + def test_skill_md_exists(self): + assert os.path.exists(os.path.join(self.skill_dir, "SKILL.md")) + + def test_taxonomy_exists(self): + assert os.path.exists( + os.path.join(self.skill_dir, "references", "issue-taxonomy.md") + ) + + def test_report_template_exists(self): + assert os.path.exists( + os.path.join(self.skill_dir, "templates", "dogfood-report-template.md") + ) + + def test_skill_md_has_frontmatter(self): + with open(os.path.join(self.skill_dir, "SKILL.md")) as f: + content = f.read() + assert content.startswith("---") + assert "name: dogfood" in content + assert "description:" in content + + def test_skill_references_browser_console(self): + with open(os.path.join(self.skill_dir, "SKILL.md")) as f: + content = f.read() + assert "browser_console" in content + + def test_skill_references_annotate(self): + with open(os.path.join(self.skill_dir, "SKILL.md")) as f: + content = f.read() + assert "annotate" in content + + def test_taxonomy_has_severity_levels(self): + with open( + os.path.join(self.skill_dir, "references", "issue-taxonomy.md") + ) as f: + content = f.read() + assert "Critical" in content + assert "High" in content + assert "Medium" in content + assert "Low" in content + + def test_taxonomy_has_categories(self): + with open( + os.path.join(self.skill_dir, "references", "issue-taxonomy.md") + ) as f: + content = f.read() + assert "Functional" in content + assert "Visual" in content + assert "Accessibility" in content + assert "Console" in content diff --git a/tests/tools/test_clipboard.py b/tests/tools/test_clipboard.py index 9b75974171..dca3d3d2b4 100644 --- a/tests/tools/test_clipboard.py +++ b/tests/tools/test_clipboard.py @@ -550,14 +550,13 @@ class TestConvertToPng: """BMP file should still be reported as success if no converter available.""" dest = tmp_path / "img.png" dest.write_bytes(FAKE_BMP) # it's a BMP but named .png - # Both Pillow and ImageMagick fail - with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError): - # Pillow import fails - with pytest.raises(Exception): - from PIL import Image # noqa — this may or may not work - # The function should still return True if file exists and has content - # (raw BMP is better than nothing) - assert dest.exists() and dest.stat().st_size > 0 + # Both Pillow and ImageMagick unavailable + with patch.dict(sys.modules, {"PIL": None, "PIL.Image": None}): + with patch("hermes_cli.clipboard.subprocess.run", side_effect=FileNotFoundError): + result = _convert_to_png(dest) + # Raw BMP is better than nothing — function should return True + assert result is True + assert dest.exists() and dest.stat().st_size > 0 # ── has_clipboard_image dispatch ───────────────────────────────────────── diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py index b427826e5e..0db3fb43b6 100644 --- a/tests/tools/test_file_operations.py +++ b/tests/tools/test_file_operations.py @@ -259,6 +259,70 @@ class TestShellFileOpsHelpers: assert ops.cwd == "/" +class TestSearchPathValidation: + """Test that search() returns an error for non-existent paths.""" + + def test_search_nonexistent_path_returns_error(self, mock_env): + """search() should return an error when the path doesn't exist.""" + def side_effect(command, **kwargs): + if "test -e" in command: + return {"output": "not_found", "returncode": 1} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + return {"output": "", "returncode": 0} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("pattern", path="/nonexistent/path") + assert result.error is not None + assert "not found" in result.error.lower() or "Path not found" in result.error + + def test_search_nonexistent_path_files_mode(self, mock_env): + """search(target='files') should also return error for bad paths.""" + def side_effect(command, **kwargs): + if "test -e" in command: + return {"output": "not_found", "returncode": 1} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + return {"output": "", "returncode": 0} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("*.py", path="/nonexistent/path", target="files") + assert result.error is not None + assert "not found" in result.error.lower() or "Path not found" in result.error + + def test_search_existing_path_proceeds(self, mock_env): + """search() should proceed normally when the path exists.""" + def side_effect(command, **kwargs): + if "test -e" in command: + return {"output": "exists", "returncode": 0} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + # rg returns exit 1 (no matches) with empty output + return {"output": "", "returncode": 1} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("pattern", path="/existing/path") + assert result.error is None + assert result.total_count == 0 # No matches but no error + + def test_search_rg_error_exit_code(self, mock_env): + """search() should report error when rg returns exit code 2.""" + call_count = {"n": 0} + def side_effect(command, **kwargs): + call_count["n"] += 1 + if "test -e" in command: + return {"output": "exists", "returncode": 0} + if "command -v" in command: + return {"output": "yes", "returncode": 0} + # rg returns exit 2 (error) with empty output + return {"output": "", "returncode": 2} + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.search("pattern", path="/some/path") + assert result.error is not None + assert "search failed" in result.error.lower() or "Search error" in result.error + + class TestShellFileOpsWriteDenied: def test_write_file_denied_path(self, file_ops): result = file_ops.write_file("~/.ssh/authorized_keys", "evil key") diff --git a/tests/tools/test_file_tools.py b/tests/tools/test_file_tools.py index 8b1bf3f7d1..27ccf70420 100644 --- a/tests/tools/test_file_tools.py +++ b/tests/tools/test_file_tools.py @@ -38,6 +38,7 @@ class TestReadFileHandler: def test_returns_file_content(self, mock_get): mock_ops = MagicMock() result_obj = MagicMock() + result_obj.content = "line1\nline2" result_obj.to_dict.return_value = {"content": "line1\nline2", "total_lines": 2} mock_ops.read_file.return_value = result_obj mock_get.return_value = mock_ops @@ -52,6 +53,7 @@ class TestReadFileHandler: def test_custom_offset_and_limit(self, mock_get): mock_ops = MagicMock() result_obj = MagicMock() + result_obj.content = "line10" result_obj.to_dict.return_value = {"content": "line10", "total_lines": 50} mock_ops.read_file.return_value = result_obj mock_get.return_value = mock_ops @@ -200,3 +202,96 @@ class TestSearchHandler: from tools.file_tools import search_tool result = json.loads(search_tool(pattern="x")) assert "error" in result + + +# --------------------------------------------------------------------------- +# Tool result hint tests (#722) +# --------------------------------------------------------------------------- + +class TestPatchHints: + """Patch tool should hint when old_string is not found.""" + + @patch("tools.file_tools._get_file_ops") + def test_no_match_includes_hint(self, mock_get): + mock_ops = MagicMock() + result_obj = MagicMock() + result_obj.to_dict.return_value = { + "error": "Could not find match for old_string in foo.py" + } + mock_ops.patch_replace.return_value = result_obj + mock_get.return_value = mock_ops + + from tools.file_tools import patch_tool + raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y") + assert "[Hint:" in raw + assert "read_file" in raw + + @patch("tools.file_tools._get_file_ops") + def test_success_no_hint(self, mock_get): + mock_ops = MagicMock() + result_obj = MagicMock() + result_obj.to_dict.return_value = {"success": True, "diff": "--- a\n+++ b"} + mock_ops.patch_replace.return_value = result_obj + mock_get.return_value = mock_ops + + from tools.file_tools import patch_tool + raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y") + assert "[Hint:" not in raw + + +class TestSearchHints: + """Search tool should hint when results are truncated.""" + + def setup_method(self): + """Clear read/search tracker between tests to avoid cross-test state.""" + from tools.file_tools import clear_read_tracker + clear_read_tracker() + + @patch("tools.file_tools._get_file_ops") + def test_truncated_results_hint(self, mock_get): + mock_ops = MagicMock() + result_obj = MagicMock() + result_obj.to_dict.return_value = { + "total_count": 100, + "matches": [{"path": "a.py", "line": 1, "content": "x"}] * 50, + "truncated": True, + } + mock_ops.search.return_value = result_obj + mock_get.return_value = mock_ops + + from tools.file_tools import search_tool + raw = search_tool(pattern="foo", offset=0, limit=50) + assert "[Hint:" in raw + assert "offset=50" in raw + + @patch("tools.file_tools._get_file_ops") + def test_non_truncated_no_hint(self, mock_get): + mock_ops = MagicMock() + result_obj = MagicMock() + result_obj.to_dict.return_value = { + "total_count": 3, + "matches": [{"path": "a.py", "line": 1, "content": "x"}] * 3, + } + mock_ops.search.return_value = result_obj + mock_get.return_value = mock_ops + + from tools.file_tools import search_tool + raw = search_tool(pattern="foo") + assert "[Hint:" not in raw + + @patch("tools.file_tools._get_file_ops") + def test_truncated_hint_with_nonzero_offset(self, mock_get): + mock_ops = MagicMock() + result_obj = MagicMock() + result_obj.to_dict.return_value = { + "total_count": 150, + "matches": [{"path": "a.py", "line": 1, "content": "x"}] * 50, + "truncated": True, + } + mock_ops.search.return_value = result_obj + mock_get.return_value = mock_ops + + from tools.file_tools import search_tool + raw = search_tool(pattern="foo", offset=50, limit=50) + assert "[Hint:" in raw + assert "offset=100" in raw diff --git a/tools/browser_tool.py b/tools/browser_tool.py index e1bd322395..480093eaa5 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -63,7 +63,7 @@ import time import requests from typing import Dict, Any, Optional, List from pathlib import Path -from agent.auxiliary_client import get_vision_auxiliary_client +from agent.auxiliary_client import get_vision_auxiliary_client, get_text_auxiliary_client logger = logging.getLogger(__name__) @@ -80,8 +80,38 @@ DEFAULT_SESSION_TIMEOUT = 300 # Max tokens for snapshot content before summarization SNAPSHOT_SUMMARIZE_THRESHOLD = 8000 -# Resolve vision auxiliary client for extraction/vision tasks -_aux_vision_client, EXTRACTION_MODEL = get_vision_auxiliary_client() +# Vision client — for browser_vision (screenshot analysis) +# Wrapped in try/except so a broken auxiliary config doesn't prevent the entire +# browser_tool module from importing (which would disable all 10 browser tools). +try: + _aux_vision_client, _DEFAULT_VISION_MODEL = get_vision_auxiliary_client() +except Exception as _init_err: + logger.debug("Could not initialise vision auxiliary client: %s", _init_err) + _aux_vision_client, _DEFAULT_VISION_MODEL = None, None + +# Text client — for page snapshot summarization (same config as web_extract) +try: + _aux_text_client, _DEFAULT_TEXT_MODEL = get_text_auxiliary_client("web_extract") +except Exception as _init_err: + logger.debug("Could not initialise text auxiliary client: %s", _init_err) + _aux_text_client, _DEFAULT_TEXT_MODEL = None, None + +# Module-level alias for availability checks +EXTRACTION_MODEL = _DEFAULT_TEXT_MODEL or _DEFAULT_VISION_MODEL + + +def _get_vision_model() -> str: + """Model for browser_vision (screenshot analysis — multimodal).""" + return (os.getenv("AUXILIARY_VISION_MODEL", "").strip() + or _DEFAULT_VISION_MODEL + or "google/gemini-3-flash-preview") + + +def _get_extraction_model() -> str: + """Model for page snapshot text summarization — same as web_extract.""" + return (os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() + or _DEFAULT_TEXT_MODEL + or "google/gemini-3-flash-preview") def _is_local_mode() -> bool: @@ -94,9 +124,27 @@ def _is_local_mode() -> bool: return not (os.environ.get("BROWSERBASE_API_KEY") and os.environ.get("BROWSERBASE_PROJECT_ID")) +def _socket_safe_tmpdir() -> str: + """Return a short temp directory path suitable for Unix domain sockets. + + macOS sets ``TMPDIR`` to ``/var/folders/xx/.../T/`` (~51 chars). When we + append ``agent-browser-hermes_…`` the resulting socket path exceeds the + 104-byte macOS limit for ``AF_UNIX`` addresses, causing agent-browser to + fail with "Failed to create socket directory" or silent screenshot failures. + + Linux ``tempfile.gettempdir()`` already returns ``/tmp``, so this is a + no-op there. On macOS we bypass ``TMPDIR`` and use ``/tmp`` directly + (symlink to ``/private/tmp``, sticky-bit protected, always available). + """ + if sys.platform == "darwin": + return "/tmp" + return tempfile.gettempdir() + + # Track active sessions per task # Stores: session_name (always), bb_session_id + cdp_url (cloud mode only) _active_sessions: Dict[str, Dict[str, str]] = {} # task_id -> {session_name, ...} +_recording_sessions: set = set() # task_ids with active recordings # Flag to track if cleanup has been done _cleanup_done = False @@ -145,7 +193,7 @@ def _emergency_cleanup_all_sessions(): try: browser_cmd = _find_agent_browser() task_socket_dir = os.path.join( - tempfile.gettempdir(), + _socket_safe_tmpdir(), f"agent-browser-{session_name}" ) env = {**os.environ, "AGENT_BROWSER_SOCKET_DIR": task_socket_dir} @@ -431,11 +479,31 @@ BROWSER_TOOL_SCHEMAS = [ "question": { "type": "string", "description": "What you want to know about the page visually. Be specific about what you're looking for." + }, + "annotate": { + "type": "boolean", + "default": False, + "description": "If true, overlay numbered [N] labels on interactive elements. Each [N] maps to ref @eN for subsequent browser commands. Useful for QA and spatial reasoning about page layout." } }, "required": ["question"] } }, + { + "name": "browser_console", + "description": "Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requires browser_navigate to be called first.", + "parameters": { + "type": "object", + "properties": { + "clear": { + "type": "boolean", + "default": False, + "description": "If true, clear the message buffers after reading" + } + }, + "required": [] + } + }, ] @@ -755,6 +823,7 @@ def _run_browser_command( try: browser_cmd = _find_agent_browser() except FileNotFoundError as e: + logger.warning("agent-browser CLI not found: %s", e) return {"success": False, "error": str(e)} from tools.interrupt import is_interrupted @@ -765,6 +834,7 @@ def _run_browser_command( try: session_info = _get_session_info(task_id) except Exception as e: + logger.warning("Failed to create browser session for task=%s: %s", task_id, e) return {"success": False, "error": f"Failed to create browser session: {str(e)}"} # Build the command with the appropriate backend flag. @@ -790,10 +860,12 @@ def _run_browser_command( # Without this, parallel workers fight over the same default socket path, # causing "Failed to create socket directory: Permission denied" errors. task_socket_dir = os.path.join( - tempfile.gettempdir(), + _socket_safe_tmpdir(), f"agent-browser-{session_info['session_name']}" ) - os.makedirs(task_socket_dir, exist_ok=True) + os.makedirs(task_socket_dir, mode=0o700, exist_ok=True) + logger.debug("browser cmd=%s task=%s socket_dir=%s (%d chars)", + command, task_id, task_socket_dir, len(task_socket_dir)) browser_env = {**os.environ} # Ensure PATH includes standard dirs (systemd services may have minimal PATH) @@ -835,22 +907,29 @@ def _run_browser_command( "returncode=%s", result.returncode) return parsed except json.JSONDecodeError: - # If not valid JSON, return as raw output + # Non-JSON output indicates agent-browser crash or version mismatch + raw = result.stdout.strip()[:500] + logger.warning("browser '%s' returned non-JSON output (rc=%s): %s", + command, result.returncode, raw[:200]) return { "success": True, - "data": {"raw": result.stdout.strip()} + "data": {"raw": raw} } # Check for errors if result.returncode != 0: error_msg = result.stderr.strip() if result.stderr else f"Command failed with code {result.returncode}" + logger.warning("browser '%s' failed (rc=%s): %s", command, result.returncode, error_msg[:300]) return {"success": False, "error": error_msg} return {"success": True, "data": {}} except subprocess.TimeoutExpired: + logger.warning("browser '%s' timed out after %ds (task=%s, socket_dir=%s)", + command, timeout, task_id, task_socket_dir) return {"success": False, "error": f"Command timed out after {timeout} seconds"} except Exception as e: + logger.warning("browser '%s' exception: %s", command, e, exc_info=True) return {"success": False, "error": str(e)} @@ -860,9 +939,9 @@ def _extract_relevant_content( ) -> str: """Use LLM to extract relevant content from a snapshot based on the user's task. - Falls back to simple truncation when no auxiliary vision model is configured. + Falls back to simple truncation when no auxiliary text model is configured. """ - if _aux_vision_client is None or EXTRACTION_MODEL is None: + if _aux_text_client is None: return _truncate_snapshot(snapshot_text) if user_task: @@ -890,8 +969,8 @@ def _extract_relevant_content( try: from agent.auxiliary_client import auxiliary_max_tokens_param - response = _aux_vision_client.chat.completions.create( - model=EXTRACTION_MODEL, + response = _aux_text_client.chat.completions.create( + model=_get_extraction_model(), messages=[{"role": "user", "content": extraction_prompt}], **auxiliary_max_tokens_param(4000), temperature=0.1, @@ -940,9 +1019,10 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: session_info = _get_session_info(effective_task_id) is_first_nav = session_info.get("_first_nav", True) - # Mark that we've done at least one navigation + # Auto-start recording if configured and this is first navigation if is_first_nav: session_info["_first_nav"] = False + _maybe_start_recording(effective_task_id) result = _run_browser_command(effective_task_id, "open", [url], timeout=60) @@ -1206,6 +1286,10 @@ def browser_close(task_id: Optional[str] = None) -> str: JSON string with close result """ effective_task_id = task_id or "default" + + # Stop auto-recording before closing + _maybe_stop_recording(effective_task_id) + result = _run_browser_command(effective_task_id, "close", []) # Close the backend session (Browserbase API in cloud mode, nothing extra in local mode) @@ -1236,6 +1320,103 @@ def browser_close(task_id: Optional[str] = None) -> str: }, ensure_ascii=False) +def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str: + """Get browser console messages and JavaScript errors. + + Returns both console output (log/warn/error/info from the page's JS) + and uncaught exceptions (crashes, unhandled promise rejections). + + Args: + clear: If True, clear the message/error buffers after reading + task_id: Task identifier for session isolation + + Returns: + JSON string with console messages and JS errors + """ + effective_task_id = task_id or "default" + + console_args = ["--clear"] if clear else [] + error_args = ["--clear"] if clear else [] + + console_result = _run_browser_command(effective_task_id, "console", console_args) + errors_result = _run_browser_command(effective_task_id, "errors", error_args) + + messages = [] + if console_result.get("success"): + for msg in console_result.get("data", {}).get("messages", []): + messages.append({ + "type": msg.get("type", "log"), + "text": msg.get("text", ""), + "source": "console", + }) + + errors = [] + if errors_result.get("success"): + for err in errors_result.get("data", {}).get("errors", []): + errors.append({ + "message": err.get("message", ""), + "source": "exception", + }) + + return json.dumps({ + "success": True, + "console_messages": messages, + "js_errors": errors, + "total_messages": len(messages), + "total_errors": len(errors), + }, ensure_ascii=False) + + +def _maybe_start_recording(task_id: str): + """Start recording if browser.record_sessions is enabled in config.""" + if task_id in _recording_sessions: + return + try: + hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) + config_path = hermes_home / "config.yaml" + record_enabled = False + if config_path.exists(): + import yaml + with open(config_path) as f: + cfg = yaml.safe_load(f) or {} + record_enabled = cfg.get("browser", {}).get("record_sessions", False) + + if not record_enabled: + return + + recordings_dir = hermes_home / "browser_recordings" + recordings_dir.mkdir(parents=True, exist_ok=True) + _cleanup_old_recordings(max_age_hours=72) + + import time + timestamp = time.strftime("%Y%m%d_%H%M%S") + recording_path = recordings_dir / f"session_{timestamp}_{task_id[:16]}.webm" + + result = _run_browser_command(task_id, "record", ["start", str(recording_path)]) + if result.get("success"): + _recording_sessions.add(task_id) + logger.info("Auto-recording browser session %s to %s", task_id, recording_path) + else: + logger.debug("Could not start auto-recording: %s", result.get("error")) + except Exception as e: + logger.debug("Auto-recording setup failed: %s", e) + + +def _maybe_stop_recording(task_id: str): + """Stop recording if one is active for this session.""" + if task_id not in _recording_sessions: + return + try: + result = _run_browser_command(task_id, "record", ["stop"]) + if result.get("success"): + path = result.get("data", {}).get("path", "") + logger.info("Saved browser recording for session %s: %s", task_id, path) + except Exception as e: + logger.debug("Could not stop recording for %s: %s", task_id, e) + finally: + _recording_sessions.discard(task_id) + + def browser_get_images(task_id: Optional[str] = None) -> str: """ Get all images on the current page. @@ -1290,7 +1471,7 @@ def browser_get_images(task_id: Optional[str] = None) -> str: }, ensure_ascii=False) -def browser_vision(question: str, task_id: Optional[str] = None) -> str: +def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] = None) -> str: """ Take a screenshot of the current page and analyze it with vision AI. @@ -1304,6 +1485,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: Args: question: What you want to know about the page visually + annotate: If True, overlay numbered [N] labels on interactive elements task_id: Task identifier for session isolation Returns: @@ -1316,7 +1498,7 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: effective_task_id = task_id or "default" # Check auxiliary vision client - if _aux_vision_client is None or EXTRACTION_MODEL is None: + if _aux_vision_client is None or _DEFAULT_VISION_MODEL is None: return json.dumps({ "success": False, "error": "Browser vision unavailable: no auxiliary vision model configured. " @@ -1335,24 +1517,35 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: _cleanup_old_screenshots(screenshots_dir, max_age_hours=24) # Take screenshot using agent-browser + screenshot_args = [str(screenshot_path)] + if annotate: + screenshot_args.insert(0, "--annotate") result = _run_browser_command( effective_task_id, "screenshot", - [str(screenshot_path)], + screenshot_args, timeout=30 ) if not result.get("success"): + error_detail = result.get("error", "Unknown error") + mode = "local" if _is_local_mode() else "cloud" return json.dumps({ "success": False, - "error": f"Failed to take screenshot: {result.get('error', 'Unknown error')}" + "error": f"Failed to take screenshot ({mode} mode): {error_detail}" }, ensure_ascii=False) # Check if screenshot file was created if not screenshot_path.exists(): + mode = "local" if _is_local_mode() else "cloud" return json.dumps({ "success": False, - "error": "Screenshot file was not created" + "error": ( + f"Screenshot file was not created at {screenshot_path} ({mode} mode). " + f"This may indicate a socket path issue (macOS /var/folders/), " + f"a missing Chromium install ('agent-browser install'), " + f"or a stale daemon process." + ), }, ensure_ascii=False) # Read and convert to base64 @@ -1371,8 +1564,11 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: # Use the sync auxiliary vision client directly from agent.auxiliary_client import auxiliary_max_tokens_param + vision_model = _get_vision_model() + logger.debug("browser_vision: analysing screenshot (%d bytes) with model=%s", + len(image_data), vision_model) response = _aux_vision_client.chat.completions.create( - model=EXTRACTION_MODEL, + model=vision_model, messages=[ { "role": "user", @@ -1387,23 +1583,27 @@ def browser_vision(question: str, task_id: Optional[str] = None) -> str: ) analysis = response.choices[0].message.content - return json.dumps({ + response_data = { "success": True, "analysis": analysis, "screenshot_path": str(screenshot_path), - }, ensure_ascii=False) + } + # Include annotation data if annotated screenshot was taken + if annotate and result.get("data", {}).get("annotations"): + response_data["annotations"] = result["data"]["annotations"] + return json.dumps(response_data, ensure_ascii=False) except Exception as e: - # Clean up screenshot on failure + # Keep the screenshot if it was captured successfully — the failure is + # in the LLM vision analysis, not the capture. Deleting a valid + # screenshot loses evidence the user might need. The 24-hour cleanup + # in _cleanup_old_screenshots prevents unbounded disk growth. + logger.warning("browser_vision failed: %s", e, exc_info=True) + error_info = {"success": False, "error": f"Error during vision analysis: {str(e)}"} if screenshot_path.exists(): - try: - screenshot_path.unlink() - except Exception: - pass - return json.dumps({ - "success": False, - "error": f"Error during vision analysis: {str(e)}" - }, ensure_ascii=False) + error_info["screenshot_path"] = str(screenshot_path) + error_info["note"] = "Screenshot was captured but vision analysis failed. You can still share it via MEDIA:." + return json.dumps(error_info, ensure_ascii=False) def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24): @@ -1421,6 +1621,25 @@ def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24): pass # Non-critical — don't fail the screenshot operation +def _cleanup_old_recordings(max_age_hours=72): + """Remove browser recordings older than max_age_hours to prevent disk bloat.""" + import time + try: + hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) + recordings_dir = hermes_home / "browser_recordings" + if not recordings_dir.exists(): + return + cutoff = time.time() - (max_age_hours * 3600) + for f in recordings_dir.glob("session_*.webm"): + try: + if f.stat().st_mtime < cutoff: + f.unlink() + except Exception: + pass + except Exception: + pass + + # ============================================================================ # Cleanup and Management Functions # ============================================================================ @@ -1492,6 +1711,9 @@ def cleanup_browser(task_id: Optional[str] = None) -> None: bb_session_id = session_info.get("bb_session_id", "unknown") logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id) + # Stop auto-recording before closing (saves the file) + _maybe_stop_recording(task_id) + # Try to close via agent-browser first (needs session in _active_sessions) try: _run_browser_command(task_id, "close", [], timeout=10) @@ -1517,7 +1739,7 @@ def cleanup_browser(task_id: Optional[str] = None) -> None: # Kill the daemon process and clean up socket directory session_name = session_info.get("session_name", "") if session_name: - socket_dir = os.path.join(tempfile.gettempdir(), f"agent-browser-{session_name}") + socket_dir = os.path.join(_socket_safe_tmpdir(), f"agent-browser-{session_name}") if os.path.exists(socket_dir): # agent-browser writes {session}.pid in the socket dir pid_file = os.path.join(socket_dir, f"{session_name}.pid") @@ -1707,6 +1929,13 @@ registry.register( name="browser_vision", toolset="browser", schema=_BROWSER_SCHEMA_MAP["browser_vision"], - handler=lambda args, **kw: browser_vision(question=args.get("question", ""), task_id=kw.get("task_id")), + handler=lambda args, **kw: browser_vision(question=args.get("question", ""), annotate=args.get("annotate", False), task_id=kw.get("task_id")), + check_fn=check_browser_requirements, +) +registry.register( + name="browser_console", + toolset="browser", + schema=_BROWSER_SCHEMA_MAP["browser_console"], + handler=lambda args, **kw: browser_console(clear=args.get("clear", False), task_id=kw.get("task_id")), check_fn=check_browser_requirements, ) diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index ea02cc8193..06c3e22519 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -385,7 +385,11 @@ def execute_code( # --- Set up temp directory with hermes_tools.py and script.py --- tmpdir = tempfile.mkdtemp(prefix="hermes_sandbox_") - sock_path = os.path.join(tempfile.gettempdir(), f"hermes_rpc_{uuid.uuid4().hex}.sock") + # Use /tmp on macOS to avoid the long /var/folders/... path that pushes + # Unix domain socket paths past the 104-byte macOS AF_UNIX limit. + # On Linux, tempfile.gettempdir() already returns /tmp. + _sock_tmpdir = "/tmp" if sys.platform == "darwin" else tempfile.gettempdir() + sock_path = os.path.join(_sock_tmpdir, f"hermes_rpc_{uuid.uuid4().hex}.sock") tool_call_log: list = [] tool_call_counter = [0] # mutable so the RPC thread can increment diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index cfca76a76a..bdfa58d630 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -102,7 +102,9 @@ def schedule_cronjob( - "local": Save to local files only (~/.hermes/cron/output/) - "telegram": Send to Telegram home channel - "discord": Send to Discord home channel + - "signal": Send to Signal home channel - "telegram:123456": Send to specific chat ID + - "signal:+15551234567": Send to specific Signal number Returns: JSON with job_id, next_run time, and confirmation @@ -216,7 +218,7 @@ Use for: reminders, periodic checks, scheduled reports, automated maintenance."" }, "deliver": { "type": "string", - "description": "Where to send output: 'origin' (back to this chat), 'local' (files only), 'telegram', 'discord', or 'platform:chat_id'" + "description": "Where to send output: 'origin' (back to this chat), 'local' (files only), 'telegram', 'discord', 'signal', or 'platform:chat_id'" } }, "required": ["prompt", "schedule"] diff --git a/tools/file_operations.py b/tools/file_operations.py index 182d35f5f2..3f72c5fdb1 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -819,6 +819,14 @@ class ShellFileOperations(FileOperations): # Expand ~ and other shell paths path = self._expand_path(path) + # Validate that the path exists before searching + check = self._exec(f"test -e {self._escape_shell_arg(path)} && echo exists || echo not_found") + if "not_found" in check.stdout: + return SearchResult( + error=f"Path not found: {path}. Verify the path exists (use 'terminal' to check).", + total_count=0 + ) + if target == "files": return self._search_files(pattern, path, limit, offset) else: @@ -919,6 +927,11 @@ class ShellFileOperations(FileOperations): cmd = " ".join(cmd_parts) result = self._exec(cmd, timeout=60) + # rg exit codes: 0=matches found, 1=no matches, 2=error + if result.exit_code == 2 and not result.stdout.strip(): + error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error" + return SearchResult(error=f"Search failed: {error_msg}", total_count=0) + # Parse results based on output mode if output_mode == "files_only": all_files = [f for f in result.stdout.strip().split('\n') if f] @@ -1013,6 +1026,11 @@ class ShellFileOperations(FileOperations): cmd = " ".join(cmd_parts) result = self._exec(cmd, timeout=60) + # grep exit codes: 0=matches found, 1=no matches, 2=error + if result.exit_code == 2 and not result.stdout.strip(): + error_msg = result.stderr.strip() if hasattr(result, 'stderr') and result.stderr else "Search error" + return SearchResult(error=f"Search failed: {error_msg}", total_count=0) + if output_mode == "files_only": all_files = [f for f in result.stdout.strip().split('\n') if f] total = len(all_files) diff --git a/tools/file_tools.py b/tools/file_tools.py index 1a8bdcf256..3c6c215c61 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -7,6 +7,7 @@ import os import threading from typing import Optional from tools.file_operations import ShellFileOperations +from agent.redact import redact_sensitive_text logger = logging.getLogger(__name__) @@ -133,6 +134,8 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = try: file_ops = _get_file_ops(task_id) result = file_ops.read_file(path, offset, limit) + if result.content: + result.content = redact_sensitive_text(result.content) result_dict = result.to_dict() # Track reads to detect re-read loops (e.g. after context compression) @@ -224,7 +227,13 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, else: return json.dumps({"error": f"Unknown mode: {mode}"}) - return json.dumps(result.to_dict(), ensure_ascii=False) + result_dict = result.to_dict() + result_json = json.dumps(result_dict, ensure_ascii=False) + # Hint when old_string not found — saves iterations where the agent + # retries with stale content instead of re-reading the file. + if result_dict.get("error") and "Could not find" in str(result_dict["error"]): + result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]" + return result_json except Exception as e: return json.dumps({"error": str(e)}, ensure_ascii=False) @@ -258,6 +267,10 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", pattern=pattern, path=path, target=target, file_glob=file_glob, limit=limit, offset=offset, output_mode=output_mode, context=context ) + if hasattr(result, 'matches'): + for m in result.matches: + if hasattr(m, 'content') and m.content: + m.content = redact_sensitive_text(m.content) result_dict = result.to_dict() if count > 1: @@ -266,7 +279,13 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", "The results have not changed. Use the information you already have." ) - return json.dumps(result_dict, ensure_ascii=False) + result_json = json.dumps(result_dict, ensure_ascii=False) + # Hint when results were truncated — explicit next offset is clearer + # than relying on the model to infer it from total_count vs match count. + if result_dict.get("truncated"): + next_offset = offset + limit + result_json += f"\n\n[Hint: Results truncated. Use offset={next_offset} to see more, or narrow with a more specific pattern or file_glob.]" + return result_json except Exception as e: return json.dumps({"error": str(e)}, ensure_ascii=False) diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index bc8f2d6508..8f5dbb61cd 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -8,6 +8,7 @@ human-friendly channel names to IDs. Works in both CLI and gateway contexts. import json import logging import os +import time logger = logging.getLogger(__name__) @@ -32,7 +33,7 @@ SEND_MESSAGE_SCHEMA = { }, "target": { "type": "string", - "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering'" + "description": "Delivery target. Format: 'platform' (uses home channel), 'platform:#channel-name', or 'platform:chat_id'. Examples: 'telegram', 'discord:#bot-home', 'slack:#engineering', 'signal:+15551234567'" }, "message": { "type": "string", @@ -107,6 +108,7 @@ def _handle_send(args): "discord": Platform.DISCORD, "slack": Platform.SLACK, "whatsapp": Platform.WHATSAPP, + "signal": Platform.SIGNAL, } platform = platform_map.get(platform_name) if not platform: @@ -160,6 +162,8 @@ async def _send_to_platform(platform, pconfig, chat_id, message): return await _send_discord(pconfig.token, chat_id, message) elif platform == Platform.SLACK: return await _send_slack(pconfig.token, chat_id, message) + elif platform == Platform.SIGNAL: + return await _send_signal(pconfig.extra, chat_id, message) return {"error": f"Direct sending not yet implemented for {platform.value}"} @@ -219,6 +223,42 @@ async def _send_slack(token, chat_id, message): return {"error": f"Slack send failed: {e}"} +async def _send_signal(extra, chat_id, message): + """Send via signal-cli JSON-RPC API.""" + try: + import httpx + except ImportError: + return {"error": "httpx not installed"} + try: + http_url = extra.get("http_url", "http://127.0.0.1:8080").rstrip("/") + account = extra.get("account", "") + if not account: + return {"error": "Signal account not configured"} + + params = {"account": account, "message": message} + if chat_id.startswith("group:"): + params["groupId"] = chat_id[6:] + else: + params["recipient"] = [chat_id] + + payload = { + "jsonrpc": "2.0", + "method": "send", + "params": params, + "id": f"send_{int(time.time() * 1000)}", + } + + async with httpx.AsyncClient(timeout=30.0) as client: + resp = await client.post(f"{http_url}/api/v1/rpc", json=payload) + resp.raise_for_status() + data = resp.json() + if "error" in data: + return {"error": f"Signal RPC error: {data['error']}"} + return {"success": True, "platform": "signal", "chat_id": chat_id} + except Exception as e: + return {"error": f"Signal send failed: {e}"} + + def _check_send_message(): """Gate send_message on gateway running (always available on messaging platforms).""" platform = os.getenv("HERMES_SESSION_PLATFORM", "") diff --git a/tools/skills_guard.py b/tools/skills_guard.py index 34a4294e8b..0b6d7fee74 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -946,6 +946,11 @@ def llm_audit_skill(skill_path: Path, static_result: ScanResult, client = OpenAI( base_url=OPENROUTER_BASE_URL, api_key=api_key, + default_headers={ + "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", + "X-OpenRouter-Title": "Hermes Agent", + "X-OpenRouter-Categories": "productivity,cli-agent", + }, ) response = client.chat.completions.create( model=model, diff --git a/tools/skills_sync.py b/tools/skills_sync.py index 2061e2ce93..b89e45998f 100644 --- a/tools/skills_sync.py +++ b/tools/skills_sync.py @@ -69,10 +69,36 @@ def _read_manifest() -> Dict[str, str]: def _write_manifest(entries: Dict[str, str]): - """Write the manifest file in v2 format (name:hash).""" + """Write the manifest file atomically in v2 format (name:hash). + + Uses a temp file + os.replace() to avoid corruption if the process + crashes or is interrupted mid-write. + """ + import tempfile + MANIFEST_FILE.parent.mkdir(parents=True, exist_ok=True) - lines = [f"{name}:{hash_val}" for name, hash_val in sorted(entries.items())] - MANIFEST_FILE.write_text("\n".join(lines) + "\n", encoding="utf-8") + data = "\n".join(f"{name}:{hash_val}" for name, hash_val in sorted(entries.items())) + "\n" + + try: + fd, tmp_path = tempfile.mkstemp( + dir=str(MANIFEST_FILE.parent), + prefix=".bundled_manifest_", + suffix=".tmp", + ) + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(data) + f.flush() + os.fsync(f.fileno()) + os.replace(tmp_path, MANIFEST_FILE) + except BaseException: + try: + os.unlink(tmp_path) + except OSError: + pass + raise + except Exception as e: + logger.debug("Failed to write skills manifest %s: %s", MANIFEST_FILE, e, exc_info=True) def _discover_bundled_skills(bundled_dir: Path) -> List[Tuple[str, Path]]: diff --git a/tools/vision_tools.py b/tools/vision_tools.py index f3744e95f1..718e173638 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -468,7 +468,9 @@ def _handle_vision_analyze(args, **kw): image_url = args.get("image_url", "") question = args.get("question", "") full_prompt = f"Fully describe and explain everything about this image, then answer the following question:\n\n{question}" - model = DEFAULT_VISION_MODEL or "google/gemini-3-flash-preview" + model = (os.getenv("AUXILIARY_VISION_MODEL", "").strip() + or DEFAULT_VISION_MODEL + or "google/gemini-3-flash-preview") return vision_analyze_tool(image_url, full_prompt, model) diff --git a/tools/web_tools.py b/tools/web_tools.py index 0fd0f4107b..e99d94fb0d 100644 --- a/tools/web_tools.py +++ b/tools/web_tools.py @@ -85,7 +85,13 @@ DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000 # Resolve async auxiliary client at module level. # Handles Codex Responses API adapter transparently. -_aux_async_client, DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client() +_aux_async_client, _DEFAULT_SUMMARIZER_MODEL = get_async_text_auxiliary_client("web_extract") + +# Allow per-task override via config.yaml auxiliary.web_extract_model +DEFAULT_SUMMARIZER_MODEL = ( + os.getenv("AUXILIARY_WEB_EXTRACT_MODEL", "").strip() + or _DEFAULT_SUMMARIZER_MODEL +) _debug = DebugSession("web_tools", env_var="WEB_TOOLS_DEBUG") diff --git a/toolsets.py b/toolsets.py index 8589a35eaf..87b48c7ecb 100644 --- a/toolsets.py +++ b/toolsets.py @@ -1,511 +1,517 @@ -#!/usr/bin/env python3 -""" -Toolsets Module - -This module provides a flexible system for defining and managing tool aliases/toolsets. -Toolsets allow you to group tools together for specific scenarios and can be composed -from individual tools or other toolsets. - -Features: -- Define custom toolsets with specific tools -- Compose toolsets from other toolsets -- Built-in common toolsets for typical use cases -- Easy extension for new toolsets -- Support for dynamic toolset resolution - -Usage: - from toolsets import get_toolset, resolve_toolset, get_all_toolsets - - # Get tools for a specific toolset - tools = get_toolset("research") - - # Resolve a toolset to get all tool names (including from composed toolsets) - all_tools = resolve_toolset("full_stack") -""" - -from typing import List, Dict, Any, Set, Optional - - -# Shared tool list for CLI and all messaging platform toolsets. -# Edit this once to update all platforms simultaneously. -_HERMES_CORE_TOOLS = [ - # Web - "web_search", "web_extract", - # Terminal + process management - "terminal", "process", - # File manipulation - "read_file", "write_file", "patch", "search_files", - # Vision + image generation - "vision_analyze", "image_generate", - # MoA - "mixture_of_agents", - # Skills - "skills_list", "skill_view", "skill_manage", - # Browser automation - "browser_navigate", "browser_snapshot", "browser_click", - "browser_type", "browser_scroll", "browser_back", - "browser_press", "browser_close", "browser_get_images", - "browser_vision", - # Text-to-speech - "text_to_speech", - # Planning & memory - "todo", "memory", - # Session history search - "session_search", - # Clarifying questions - "clarify", - # Code execution + delegation - "execute_code", "delegate_task", - # Cronjob management - "schedule_cronjob", "list_cronjobs", "remove_cronjob", - # Cross-platform messaging (gated on gateway running via check_fn) - "send_message", - # Honcho user context (gated on honcho being active via check_fn) - "query_user_context", - # Home Assistant smart home control (gated on HASS_TOKEN via check_fn) - "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service", -] - - -# Core toolset definitions -# These can include individual tools or reference other toolsets -TOOLSETS = { - # Basic toolsets - individual tool categories - "web": { - "description": "Web research and content extraction tools", - "tools": ["web_search", "web_extract"], - "includes": [] # No other toolsets included - }, - - "search": { - "description": "Web search only (no content extraction/scraping)", - "tools": ["web_search"], - "includes": [] - }, - - "vision": { - "description": "Image analysis and vision tools", - "tools": ["vision_analyze"], - "includes": [] - }, - - "image_gen": { - "description": "Creative generation tools (images)", - "tools": ["image_generate"], - "includes": [] - }, - - "terminal": { - "description": "Terminal/command execution and process management tools", - "tools": ["terminal", "process"], - "includes": [] - }, - - "moa": { - "description": "Advanced reasoning and problem-solving tools", - "tools": ["mixture_of_agents"], - "includes": [] - }, - - "skills": { - "description": "Access, create, edit, and manage skill documents with specialized instructions and knowledge", - "tools": ["skills_list", "skill_view", "skill_manage"], - "includes": [] - }, - - "browser": { - "description": "Browser automation for web interaction (navigate, click, type, scroll, iframes, hold-click) with web search for finding URLs", - "tools": [ - "browser_navigate", "browser_snapshot", "browser_click", - "browser_type", "browser_scroll", "browser_back", - "browser_press", "browser_close", "browser_get_images", - "browser_vision", "web_search" - ], - "includes": [] - }, - - "cronjob": { - "description": "Cronjob management tools - schedule, list, and remove automated tasks", - "tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"], - "includes": [] - }, - - "rl": { - "description": "RL training tools for running reinforcement learning on Tinker-Atropos", - "tools": [ - "rl_list_environments", "rl_select_environment", - "rl_get_current_config", "rl_edit_config", - "rl_start_training", "rl_check_status", - "rl_stop_training", "rl_get_results", - "rl_list_runs", "rl_test_inference" - ], - "includes": [] - }, - - "file": { - "description": "File manipulation tools: read, write, patch (with fuzzy matching), and search (content + files)", - "tools": ["read_file", "write_file", "patch", "search_files"], - "includes": [] - }, - - "tts": { - "description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, or OpenAI", - "tools": ["text_to_speech"], - "includes": [] - }, - - "todo": { - "description": "Task planning and tracking for multi-step work", - "tools": ["todo"], - "includes": [] - }, - - "memory": { - "description": "Persistent memory across sessions (personal notes + user profile)", - "tools": ["memory"], - "includes": [] - }, - - "session_search": { - "description": "Search and recall past conversations with summarization", - "tools": ["session_search"], - "includes": [] - }, - - "clarify": { - "description": "Ask the user clarifying questions (multiple-choice or open-ended)", - "tools": ["clarify"], - "includes": [] - }, - - "code_execution": { - "description": "Run Python scripts that call tools programmatically (reduces LLM round trips)", - "tools": ["execute_code"], - "includes": [] - }, - - "delegation": { - "description": "Spawn subagents with isolated context for complex subtasks", - "tools": ["delegate_task"], - "includes": [] - }, - - "honcho": { - "description": "Honcho AI-native memory for persistent cross-session user modeling", - "tools": ["query_user_context"], - "includes": [] - }, - - "homeassistant": { - "description": "Home Assistant smart home control and monitoring", - "tools": ["ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service"], - "includes": [] - }, - - - # Scenario-specific toolsets - - "debugging": { - "description": "Debugging and troubleshooting toolkit", - "tools": ["terminal", "process"], - "includes": ["web", "file"] # For searching error messages and solutions, and file operations - }, - - "safe": { - "description": "Safe toolkit without terminal access", - "tools": ["mixture_of_agents"], - "includes": ["web", "vision", "image_gen"] - }, - - # ========================================================================== - # Full Hermes toolsets (CLI + messaging platforms) - # - # All platforms share the same core tools. Messaging platforms add - # All platforms share the same core tools (including send_message, - # which is gated on gateway running via its check_fn). - # ========================================================================== - - "hermes-cli": { - "description": "Full interactive CLI toolset - all default tools plus cronjob management", - "tools": _HERMES_CORE_TOOLS, - "includes": [] - }, - - "hermes-telegram": { - "description": "Telegram bot toolset - full access for personal use (terminal has safety checks)", - "tools": _HERMES_CORE_TOOLS, - "includes": [] - }, - - "hermes-discord": { - "description": "Discord bot toolset - full access (terminal has safety checks via dangerous command approval)", - "tools": _HERMES_CORE_TOOLS, - "includes": [] - }, - - "hermes-whatsapp": { - "description": "WhatsApp bot toolset - similar to Telegram (personal messaging, more trusted)", - "tools": _HERMES_CORE_TOOLS, - "includes": [] - }, - - "hermes-slack": { - "description": "Slack bot toolset - full access for workspace use (terminal has safety checks)", - "tools": _HERMES_CORE_TOOLS, - "includes": [] - }, - - "hermes-homeassistant": { - "description": "Home Assistant bot toolset - smart home event monitoring and control", - "tools": _HERMES_CORE_TOOLS, - "includes": [] - }, - - "hermes-gateway": { - "description": "Gateway toolset - union of all messaging platform tools", - "tools": [], - "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-homeassistant"] - } -} - - - -def get_toolset(name: str) -> Optional[Dict[str, Any]]: - """ - Get a toolset definition by name. - - Args: - name (str): Name of the toolset - - Returns: - Dict: Toolset definition with description, tools, and includes - None: If toolset not found - """ - # Return toolset definition - return TOOLSETS.get(name) - - -def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]: - """ - Recursively resolve a toolset to get all tool names. - - This function handles toolset composition by recursively resolving - included toolsets and combining all tools. - - Args: - name (str): Name of the toolset to resolve - visited (Set[str]): Set of already visited toolsets (for cycle detection) - - Returns: - List[str]: List of all tool names in the toolset - """ - if visited is None: - visited = set() - - # Special aliases that represent all tools across every toolset - # This ensures future toolsets are automatically included without changes. - if name in {"all", "*"}: - all_tools: Set[str] = set() - for toolset_name in get_toolset_names(): - # Use a fresh visited set per branch to avoid cross-branch contamination - resolved = resolve_toolset(toolset_name, visited.copy()) - all_tools.update(resolved) - return list(all_tools) - - # Check for cycles - if name in visited: - print(f"⚠️ Circular dependency detected in toolset '{name}'") - return [] - - visited.add(name) - - # Get toolset definition - toolset = TOOLSETS.get(name) - if not toolset: - return [] - - # Collect direct tools - tools = set(toolset.get("tools", [])) - - # Recursively resolve included toolsets - for included_name in toolset.get("includes", []): - included_tools = resolve_toolset(included_name, visited.copy()) - tools.update(included_tools) - - return list(tools) - - -def resolve_multiple_toolsets(toolset_names: List[str]) -> List[str]: - """ - Resolve multiple toolsets and combine their tools. - - Args: - toolset_names (List[str]): List of toolset names to resolve - - Returns: - List[str]: Combined list of all tool names (deduplicated) - """ - all_tools = set() - - for name in toolset_names: - tools = resolve_toolset(name) - all_tools.update(tools) - - return list(all_tools) - - -def get_all_toolsets() -> Dict[str, Dict[str, Any]]: - """ - Get all available toolsets with their definitions. - - Returns: - Dict: All toolset definitions - """ - return TOOLSETS.copy() - - -def get_toolset_names() -> List[str]: - """ - Get names of all available toolsets (excluding aliases). - - Returns: - List[str]: List of toolset names - """ - return list(TOOLSETS.keys()) - - - - -def validate_toolset(name: str) -> bool: - """ - Check if a toolset name is valid. - - Args: - name (str): Toolset name to validate - - Returns: - bool: True if valid, False otherwise - """ - # Accept special alias names for convenience - if name in {"all", "*"}: - return True - return name in TOOLSETS - - -def create_custom_toolset( - name: str, - description: str, - tools: List[str] = None, - includes: List[str] = None -) -> None: - """ - Create a custom toolset at runtime. - - Args: - name (str): Name for the new toolset - description (str): Description of the toolset - tools (List[str]): Direct tools to include - includes (List[str]): Other toolsets to include - """ - TOOLSETS[name] = { - "description": description, - "tools": tools or [], - "includes": includes or [] - } - - - - -def get_toolset_info(name: str) -> Dict[str, Any]: - """ - Get detailed information about a toolset including resolved tools. - - Args: - name (str): Toolset name - - Returns: - Dict: Detailed toolset information - """ - toolset = get_toolset(name) - if not toolset: - return None - - resolved_tools = resolve_toolset(name) - - return { - "name": name, - "description": toolset["description"], - "direct_tools": toolset["tools"], - "includes": toolset["includes"], - "resolved_tools": resolved_tools, - "tool_count": len(resolved_tools), - "is_composite": len(toolset["includes"]) > 0 - } - - -def print_toolset_tree(name: str, indent: int = 0) -> None: - """ - Print a tree view of a toolset and its composition. - - Args: - name (str): Toolset name - indent (int): Current indentation level - """ - prefix = " " * indent - toolset = get_toolset(name) - - if not toolset: - print(f"{prefix}❌ Unknown toolset: {name}") - return - - # Print toolset name and description - print(f"{prefix}📦 {name}: {toolset['description']}") - - # Print direct tools - if toolset["tools"]: - print(f"{prefix} 🔧 Tools: {', '.join(toolset['tools'])}") - - # Print included toolsets - if toolset["includes"]: - print(f"{prefix} 📂 Includes:") - for included in toolset["includes"]: - print_toolset_tree(included, indent + 2) - - -if __name__ == "__main__": - print("Toolsets System Demo") - print("=" * 60) - - print("\nAvailable Toolsets:") - print("-" * 40) - for name, toolset in get_all_toolsets().items(): - info = get_toolset_info(name) - composite = "[composite]" if info["is_composite"] else "[leaf]" - print(f" {composite} {name:20} - {toolset['description']}") - print(f" Tools: {len(info['resolved_tools'])} total") - - print("\nToolset Resolution Examples:") - print("-" * 40) - for name in ["web", "terminal", "safe", "debugging"]: - tools = resolve_toolset(name) - print(f"\n {name}:") - print(f" Resolved to {len(tools)} tools: {', '.join(sorted(tools))}") - - print("\nMultiple Toolset Resolution:") - print("-" * 40) - combined = resolve_multiple_toolsets(["web", "vision", "terminal"]) - print(f" Combining ['web', 'vision', 'terminal']:") - print(f" Result: {', '.join(sorted(combined))}") - - print("\nCustom Toolset Creation:") - print("-" * 40) - create_custom_toolset( - name="my_custom", - description="My custom toolset for specific tasks", - tools=["web_search"], - includes=["terminal", "vision"] - ) - custom_info = get_toolset_info("my_custom") - print(f" Created 'my_custom' toolset:") - print(f" Description: {custom_info['description']}") - print(f" Resolved tools: {', '.join(custom_info['resolved_tools'])}") +#!/usr/bin/env python3 +""" +Toolsets Module + +This module provides a flexible system for defining and managing tool aliases/toolsets. +Toolsets allow you to group tools together for specific scenarios and can be composed +from individual tools or other toolsets. + +Features: +- Define custom toolsets with specific tools +- Compose toolsets from other toolsets +- Built-in common toolsets for typical use cases +- Easy extension for new toolsets +- Support for dynamic toolset resolution + +Usage: + from toolsets import get_toolset, resolve_toolset, get_all_toolsets + + # Get tools for a specific toolset + tools = get_toolset("research") + + # Resolve a toolset to get all tool names (including from composed toolsets) + all_tools = resolve_toolset("full_stack") +""" + +from typing import List, Dict, Any, Set, Optional + + +# Shared tool list for CLI and all messaging platform toolsets. +# Edit this once to update all platforms simultaneously. +_HERMES_CORE_TOOLS = [ + # Web + "web_search", "web_extract", + # Terminal + process management + "terminal", "process", + # File manipulation + "read_file", "write_file", "patch", "search_files", + # Vision + image generation + "vision_analyze", "image_generate", + # MoA + "mixture_of_agents", + # Skills + "skills_list", "skill_view", "skill_manage", + # Browser automation + "browser_navigate", "browser_snapshot", "browser_click", + "browser_type", "browser_scroll", "browser_back", + "browser_press", "browser_close", "browser_get_images", + "browser_vision", + # Text-to-speech + "text_to_speech", + # Planning & memory + "todo", "memory", + # Session history search + "session_search", + # Clarifying questions + "clarify", + # Code execution + delegation + "execute_code", "delegate_task", + # Cronjob management + "schedule_cronjob", "list_cronjobs", "remove_cronjob", + # Cross-platform messaging (gated on gateway running via check_fn) + "send_message", + # Honcho user context (gated on honcho being active via check_fn) + "query_user_context", + # Home Assistant smart home control (gated on HASS_TOKEN via check_fn) + "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service", +] + + +# Core toolset definitions +# These can include individual tools or reference other toolsets +TOOLSETS = { + # Basic toolsets - individual tool categories + "web": { + "description": "Web research and content extraction tools", + "tools": ["web_search", "web_extract"], + "includes": [] # No other toolsets included + }, + + "search": { + "description": "Web search only (no content extraction/scraping)", + "tools": ["web_search"], + "includes": [] + }, + + "vision": { + "description": "Image analysis and vision tools", + "tools": ["vision_analyze"], + "includes": [] + }, + + "image_gen": { + "description": "Creative generation tools (images)", + "tools": ["image_generate"], + "includes": [] + }, + + "terminal": { + "description": "Terminal/command execution and process management tools", + "tools": ["terminal", "process"], + "includes": [] + }, + + "moa": { + "description": "Advanced reasoning and problem-solving tools", + "tools": ["mixture_of_agents"], + "includes": [] + }, + + "skills": { + "description": "Access, create, edit, and manage skill documents with specialized instructions and knowledge", + "tools": ["skills_list", "skill_view", "skill_manage"], + "includes": [] + }, + + "browser": { + "description": "Browser automation for web interaction (navigate, click, type, scroll, iframes, hold-click) with web search for finding URLs", + "tools": [ + "browser_navigate", "browser_snapshot", "browser_click", + "browser_type", "browser_scroll", "browser_back", + "browser_press", "browser_close", "browser_get_images", + "browser_vision", "web_search" + ], + "includes": [] + }, + + "cronjob": { + "description": "Cronjob management tools - schedule, list, and remove automated tasks", + "tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"], + "includes": [] + }, + + "rl": { + "description": "RL training tools for running reinforcement learning on Tinker-Atropos", + "tools": [ + "rl_list_environments", "rl_select_environment", + "rl_get_current_config", "rl_edit_config", + "rl_start_training", "rl_check_status", + "rl_stop_training", "rl_get_results", + "rl_list_runs", "rl_test_inference" + ], + "includes": [] + }, + + "file": { + "description": "File manipulation tools: read, write, patch (with fuzzy matching), and search (content + files)", + "tools": ["read_file", "write_file", "patch", "search_files"], + "includes": [] + }, + + "tts": { + "description": "Text-to-speech: convert text to audio with Edge TTS (free), ElevenLabs, or OpenAI", + "tools": ["text_to_speech"], + "includes": [] + }, + + "todo": { + "description": "Task planning and tracking for multi-step work", + "tools": ["todo"], + "includes": [] + }, + + "memory": { + "description": "Persistent memory across sessions (personal notes + user profile)", + "tools": ["memory"], + "includes": [] + }, + + "session_search": { + "description": "Search and recall past conversations with summarization", + "tools": ["session_search"], + "includes": [] + }, + + "clarify": { + "description": "Ask the user clarifying questions (multiple-choice or open-ended)", + "tools": ["clarify"], + "includes": [] + }, + + "code_execution": { + "description": "Run Python scripts that call tools programmatically (reduces LLM round trips)", + "tools": ["execute_code"], + "includes": [] + }, + + "delegation": { + "description": "Spawn subagents with isolated context for complex subtasks", + "tools": ["delegate_task"], + "includes": [] + }, + + "honcho": { + "description": "Honcho AI-native memory for persistent cross-session user modeling", + "tools": ["query_user_context"], + "includes": [] + }, + + "homeassistant": { + "description": "Home Assistant smart home control and monitoring", + "tools": ["ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service"], + "includes": [] + }, + + + # Scenario-specific toolsets + + "debugging": { + "description": "Debugging and troubleshooting toolkit", + "tools": ["terminal", "process"], + "includes": ["web", "file"] # For searching error messages and solutions, and file operations + }, + + "safe": { + "description": "Safe toolkit without terminal access", + "tools": ["mixture_of_agents"], + "includes": ["web", "vision", "image_gen"] + }, + + # ========================================================================== + # Full Hermes toolsets (CLI + messaging platforms) + # + # All platforms share the same core tools. Messaging platforms add + # All platforms share the same core tools (including send_message, + # which is gated on gateway running via its check_fn). + # ========================================================================== + + "hermes-cli": { + "description": "Full interactive CLI toolset - all default tools plus cronjob management", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-telegram": { + "description": "Telegram bot toolset - full access for personal use (terminal has safety checks)", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-discord": { + "description": "Discord bot toolset - full access (terminal has safety checks via dangerous command approval)", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-whatsapp": { + "description": "WhatsApp bot toolset - similar to Telegram (personal messaging, more trusted)", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-slack": { + "description": "Slack bot toolset - full access for workspace use (terminal has safety checks)", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-signal": { + "description": "Signal bot toolset - encrypted messaging platform (full access)", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-homeassistant": { + "description": "Home Assistant bot toolset - smart home event monitoring and control", + "tools": _HERMES_CORE_TOOLS, + "includes": [] + }, + + "hermes-gateway": { + "description": "Gateway toolset - union of all messaging platform tools", + "tools": [], + "includes": ["hermes-telegram", "hermes-discord", "hermes-whatsapp", "hermes-slack", "hermes-signal", "hermes-homeassistant"] + } +} + + + +def get_toolset(name: str) -> Optional[Dict[str, Any]]: + """ + Get a toolset definition by name. + + Args: + name (str): Name of the toolset + + Returns: + Dict: Toolset definition with description, tools, and includes + None: If toolset not found + """ + # Return toolset definition + return TOOLSETS.get(name) + + +def resolve_toolset(name: str, visited: Set[str] = None) -> List[str]: + """ + Recursively resolve a toolset to get all tool names. + + This function handles toolset composition by recursively resolving + included toolsets and combining all tools. + + Args: + name (str): Name of the toolset to resolve + visited (Set[str]): Set of already visited toolsets (for cycle detection) + + Returns: + List[str]: List of all tool names in the toolset + """ + if visited is None: + visited = set() + + # Special aliases that represent all tools across every toolset + # This ensures future toolsets are automatically included without changes. + if name in {"all", "*"}: + all_tools: Set[str] = set() + for toolset_name in get_toolset_names(): + # Use a fresh visited set per branch to avoid cross-branch contamination + resolved = resolve_toolset(toolset_name, visited.copy()) + all_tools.update(resolved) + return list(all_tools) + + # Check for cycles + if name in visited: + print(f"⚠️ Circular dependency detected in toolset '{name}'") + return [] + + visited.add(name) + + # Get toolset definition + toolset = TOOLSETS.get(name) + if not toolset: + return [] + + # Collect direct tools + tools = set(toolset.get("tools", [])) + + # Recursively resolve included toolsets + for included_name in toolset.get("includes", []): + included_tools = resolve_toolset(included_name, visited.copy()) + tools.update(included_tools) + + return list(tools) + + +def resolve_multiple_toolsets(toolset_names: List[str]) -> List[str]: + """ + Resolve multiple toolsets and combine their tools. + + Args: + toolset_names (List[str]): List of toolset names to resolve + + Returns: + List[str]: Combined list of all tool names (deduplicated) + """ + all_tools = set() + + for name in toolset_names: + tools = resolve_toolset(name) + all_tools.update(tools) + + return list(all_tools) + + +def get_all_toolsets() -> Dict[str, Dict[str, Any]]: + """ + Get all available toolsets with their definitions. + + Returns: + Dict: All toolset definitions + """ + return TOOLSETS.copy() + + +def get_toolset_names() -> List[str]: + """ + Get names of all available toolsets (excluding aliases). + + Returns: + List[str]: List of toolset names + """ + return list(TOOLSETS.keys()) + + + + +def validate_toolset(name: str) -> bool: + """ + Check if a toolset name is valid. + + Args: + name (str): Toolset name to validate + + Returns: + bool: True if valid, False otherwise + """ + # Accept special alias names for convenience + if name in {"all", "*"}: + return True + return name in TOOLSETS + + +def create_custom_toolset( + name: str, + description: str, + tools: List[str] = None, + includes: List[str] = None +) -> None: + """ + Create a custom toolset at runtime. + + Args: + name (str): Name for the new toolset + description (str): Description of the toolset + tools (List[str]): Direct tools to include + includes (List[str]): Other toolsets to include + """ + TOOLSETS[name] = { + "description": description, + "tools": tools or [], + "includes": includes or [] + } + + + + +def get_toolset_info(name: str) -> Dict[str, Any]: + """ + Get detailed information about a toolset including resolved tools. + + Args: + name (str): Toolset name + + Returns: + Dict: Detailed toolset information + """ + toolset = get_toolset(name) + if not toolset: + return None + + resolved_tools = resolve_toolset(name) + + return { + "name": name, + "description": toolset["description"], + "direct_tools": toolset["tools"], + "includes": toolset["includes"], + "resolved_tools": resolved_tools, + "tool_count": len(resolved_tools), + "is_composite": len(toolset["includes"]) > 0 + } + + +def print_toolset_tree(name: str, indent: int = 0) -> None: + """ + Print a tree view of a toolset and its composition. + + Args: + name (str): Toolset name + indent (int): Current indentation level + """ + prefix = " " * indent + toolset = get_toolset(name) + + if not toolset: + print(f"{prefix}❌ Unknown toolset: {name}") + return + + # Print toolset name and description + print(f"{prefix}📦 {name}: {toolset['description']}") + + # Print direct tools + if toolset["tools"]: + print(f"{prefix} 🔧 Tools: {', '.join(toolset['tools'])}") + + # Print included toolsets + if toolset["includes"]: + print(f"{prefix} 📂 Includes:") + for included in toolset["includes"]: + print_toolset_tree(included, indent + 2) + + +if __name__ == "__main__": + print("Toolsets System Demo") + print("=" * 60) + + print("\nAvailable Toolsets:") + print("-" * 40) + for name, toolset in get_all_toolsets().items(): + info = get_toolset_info(name) + composite = "[composite]" if info["is_composite"] else "[leaf]" + print(f" {composite} {name:20} - {toolset['description']}") + print(f" Tools: {len(info['resolved_tools'])} total") + + print("\nToolset Resolution Examples:") + print("-" * 40) + for name in ["web", "terminal", "safe", "debugging"]: + tools = resolve_toolset(name) + print(f"\n {name}:") + print(f" Resolved to {len(tools)} tools: {', '.join(sorted(tools))}") + + print("\nMultiple Toolset Resolution:") + print("-" * 40) + combined = resolve_multiple_toolsets(["web", "vision", "terminal"]) + print(f" Combining ['web', 'vision', 'terminal']:") + print(f" Result: {', '.join(sorted(combined))}") + + print("\nCustom Toolset Creation:") + print("-" * 40) + create_custom_toolset( + name="my_custom", + description="My custom toolset for specific tasks", + tools=["web_search"], + includes=["terminal", "vision"] + ) + custom_info = get_toolset_info("my_custom") + print(f" Created 'my_custom' toolset:") + print(f" Description: {custom_info['description']}") + print(f" Resolved tools: {', '.join(custom_info['resolved_tools'])}") diff --git a/trajectory_compressor.py b/trajectory_compressor.py index dedae1ade0..3f49c617bf 100644 --- a/trajectory_compressor.py +++ b/trajectory_compressor.py @@ -351,16 +351,27 @@ class TrajectoryCompressor: from openai import OpenAI, AsyncOpenAI + # OpenRouter app attribution headers (only for OpenRouter endpoints) + extra = {} + if "openrouter" in self.config.base_url.lower(): + extra["default_headers"] = { + "HTTP-Referer": "https://github.com/NousResearch/hermes-agent", + "X-OpenRouter-Title": "Hermes Agent", + "X-OpenRouter-Categories": "productivity,cli-agent", + } + # Sync client (for backwards compatibility) self.client = OpenAI( api_key=api_key, - base_url=self.config.base_url + base_url=self.config.base_url, + **extra, ) # Async client for parallel processing self.async_client = AsyncOpenAI( api_key=api_key, - base_url=self.config.base_url + base_url=self.config.base_url, + **extra, ) print(f"✅ Initialized OpenRouter client: {self.config.summarization_model}") diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md new file mode 100644 index 0000000000..2c08f077e4 --- /dev/null +++ b/website/docs/getting-started/learning-path.md @@ -0,0 +1,150 @@ +--- +sidebar_position: 3 +title: 'Learning Path' +description: 'Choose your learning path through the Hermes Agent documentation based on your experience level and goals.' +--- + +# Learning Path + +Hermes Agent can do a lot — CLI assistant, Telegram/Discord bot, task automation, RL training, and more. This page helps you figure out where to start and what to read based on your experience level and what you're trying to accomplish. + +:::tip Start Here +If you haven't installed Hermes Agent yet, begin with the [Installation guide](/docs/getting-started/installation) and then run through the [Quickstart](/docs/getting-started/quickstart). Everything below assumes you have a working installation. +::: + +## How to Use This Page + +- **Know your level?** Jump to the [experience-level table](#by-experience-level) and follow the reading order for your tier. +- **Have a specific goal?** Skip to [By Use Case](#by-use-case) and find the scenario that matches. +- **Just browsing?** Check the [Key Features](#key-features-at-a-glance) table for a quick overview of everything Hermes Agent can do. + +## By Experience Level + +| Level | Goal | Recommended Reading | Time Estimate | +|---|---|---|---| +| **Beginner** | Get up and running, have basic conversations, use built-in tools | [Installation](/docs/getting-started/installation) → [Quickstart](/docs/getting-started/quickstart) → [CLI Usage](/docs/user-guide/cli) → [Configuration](/docs/user-guide/configuration) | ~1 hour | +| **Intermediate** | Set up messaging bots, use advanced features like memory, cron jobs, and skills | [Sessions](/docs/user-guide/sessions) → [Messaging](/docs/user-guide/messaging) → [Tools](/docs/user-guide/features/tools) → [Skills](/docs/user-guide/features/skills) → [Memory](/docs/user-guide/features/memory) → [Cron](/docs/user-guide/features/cron) | ~2–3 hours | +| **Advanced** | Build custom tools, create skills, train models with RL, contribute to the project | [Architecture](/docs/developer-guide/architecture) → [Adding Tools](/docs/developer-guide/adding-tools) → [Creating Skills](/docs/developer-guide/creating-skills) → [RL Training](/docs/user-guide/features/rl-training) → [Contributing](/docs/developer-guide/contributing) | ~4–6 hours | + +## By Use Case + +Pick the scenario that matches what you want to do. Each one links you to the relevant docs in the order you should read them. + +### "I want a CLI coding assistant" + +Use Hermes Agent as an interactive terminal assistant for writing, reviewing, and running code. + +1. [Installation](/docs/getting-started/installation) +2. [Quickstart](/docs/getting-started/quickstart) +3. [CLI Usage](/docs/user-guide/cli) +4. [Code Execution](/docs/user-guide/features/code-execution) +5. [Context Files](/docs/user-guide/features/context-files) +6. [Tips & Tricks](/docs/guides/tips) + +:::tip +Pass files directly into your conversation with context files. Hermes Agent can read, edit, and run code in your projects. +::: + +### "I want a Telegram/Discord bot" + +Deploy Hermes Agent as a bot on your favorite messaging platform. + +1. [Installation](/docs/getting-started/installation) +2. [Configuration](/docs/user-guide/configuration) +3. [Messaging Overview](/docs/user-guide/messaging) +4. [Telegram Setup](/docs/user-guide/messaging/telegram) +5. [Discord Setup](/docs/user-guide/messaging/discord) +6. [Security](/docs/user-guide/security) + +For full project examples, see: +- [Daily Briefing Bot](/docs/guides/daily-briefing-bot) +- [Team Telegram Assistant](/docs/guides/team-telegram-assistant) + +### "I want to automate tasks" + +Schedule recurring tasks, run batch jobs, or chain agent actions together. + +1. [Quickstart](/docs/getting-started/quickstart) +2. [Cron Scheduling](/docs/user-guide/features/cron) +3. [Batch Processing](/docs/user-guide/features/batch-processing) +4. [Delegation](/docs/user-guide/features/delegation) +5. [Hooks](/docs/user-guide/features/hooks) + +:::tip +Cron jobs let Hermes Agent run tasks on a schedule — daily summaries, periodic checks, automated reports — without you being present. +::: + +### "I want to build custom tools/skills" + +Extend Hermes Agent with your own tools and reusable skill packages. + +1. [Tools Overview](/docs/user-guide/features/tools) +2. [Skills Overview](/docs/user-guide/features/skills) +3. [MCP (Model Context Protocol)](/docs/user-guide/features/mcp) +4. [Architecture](/docs/developer-guide/architecture) +5. [Adding Tools](/docs/developer-guide/adding-tools) +6. [Creating Skills](/docs/developer-guide/creating-skills) + +:::tip +Tools are individual functions the agent can call. Skills are bundles of tools, prompts, and configuration packaged together. Start with tools, graduate to skills. +::: + +### "I want to train models" + +Use reinforcement learning to fine-tune model behavior with Hermes Agent's built-in RL training pipeline. + +1. [Quickstart](/docs/getting-started/quickstart) +2. [Configuration](/docs/user-guide/configuration) +3. [RL Training](/docs/user-guide/features/rl-training) +4. [Provider Routing](/docs/user-guide/features/provider-routing) +5. [Architecture](/docs/developer-guide/architecture) + +:::tip +RL training works best when you already understand the basics of how Hermes Agent handles conversations and tool calls. Run through the Beginner path first if you're new. +::: + +### "I want to use it as a Python library" + +Integrate Hermes Agent into your own Python applications programmatically. + +1. [Installation](/docs/getting-started/installation) +2. [Quickstart](/docs/getting-started/quickstart) +3. [Python Library Guide](/docs/guides/python-library) +4. [Architecture](/docs/developer-guide/architecture) +5. [Tools](/docs/user-guide/features/tools) +6. [Sessions](/docs/user-guide/sessions) + +## Key Features at a Glance + +Not sure what's available? Here's a quick directory of major features: + +| Feature | What It Does | Link | +|---|---|---| +| **Tools** | Built-in tools the agent can call (file I/O, search, shell, etc.) | [Tools](/docs/user-guide/features/tools) | +| **Skills** | Installable plugin packages that add new capabilities | [Skills](/docs/user-guide/features/skills) | +| **Memory** | Persistent memory across sessions | [Memory](/docs/user-guide/features/memory) | +| **Context Files** | Feed files and directories into conversations | [Context Files](/docs/user-guide/features/context-files) | +| **MCP** | Connect to external tool servers via Model Context Protocol | [MCP](/docs/user-guide/features/mcp) | +| **Cron** | Schedule recurring agent tasks | [Cron](/docs/user-guide/features/cron) | +| **Delegation** | Spawn sub-agents for parallel work | [Delegation](/docs/user-guide/features/delegation) | +| **Code Execution** | Run code in sandboxed environments | [Code Execution](/docs/user-guide/features/code-execution) | +| **Browser** | Web browsing and scraping | [Browser](/docs/user-guide/features/browser) | +| **Hooks** | Event-driven callbacks and middleware | [Hooks](/docs/user-guide/features/hooks) | +| **Batch Processing** | Process multiple inputs in bulk | [Batch Processing](/docs/user-guide/features/batch-processing) | +| **RL Training** | Fine-tune models with reinforcement learning | [RL Training](/docs/user-guide/features/rl-training) | +| **Provider Routing** | Route requests across multiple LLM providers | [Provider Routing](/docs/user-guide/features/provider-routing) | + +## What to Read Next + +Based on where you are right now: + +- **Just finished installing?** → Head to the [Quickstart](/docs/getting-started/quickstart) to run your first conversation. +- **Completed the Quickstart?** → Read [CLI Usage](/docs/user-guide/cli) and [Configuration](/docs/user-guide/configuration) to customize your setup. +- **Comfortable with the basics?** → Explore [Tools](/docs/user-guide/features/tools), [Skills](/docs/user-guide/features/skills), and [Memory](/docs/user-guide/features/memory) to unlock the full power of the agent. +- **Setting up for a team?** → Read [Security](/docs/user-guide/security) and [Sessions](/docs/user-guide/sessions) to understand access control and conversation management. +- **Ready to build?** → Jump into the [Developer Guide](/docs/developer-guide/architecture) to understand the internals and start contributing. +- **Want practical examples?** → Check out the [Guides](/docs/guides/tips) section for real-world projects and tips. + +:::tip +You don't need to read everything. Pick the path that matches your goal, follow the links in order, and you'll be productive quickly. You can always come back to this page to find your next step. +::: diff --git a/website/docs/guides/_category_.json b/website/docs/guides/_category_.json new file mode 100644 index 0000000000..6d1d2f0b9e --- /dev/null +++ b/website/docs/guides/_category_.json @@ -0,0 +1,6 @@ +{ + "label": "Guides & Tutorials", + "position": 2, + "collapsible": true, + "collapsed": false +} diff --git a/website/docs/guides/daily-briefing-bot.md b/website/docs/guides/daily-briefing-bot.md new file mode 100644 index 0000000000..b6c97e4e8f --- /dev/null +++ b/website/docs/guides/daily-briefing-bot.md @@ -0,0 +1,263 @@ +--- +sidebar_position: 2 +title: "Tutorial: Daily Briefing Bot" +description: "Build an automated daily briefing bot that researches topics, summarizes findings, and delivers them to Telegram or Discord every morning" +--- + +# Tutorial: Build a Daily Briefing Bot + +In this tutorial, you'll build a personal briefing bot that wakes up every morning, researches topics you care about, summarizes the findings, and delivers a concise briefing straight to your Telegram or Discord. + +By the end, you'll have a fully automated workflow combining **web search**, **cron scheduling**, **delegation**, and **messaging delivery** — no code required. + +## What We're Building + +Here's the flow: + +1. **8:00 AM** — The cron scheduler triggers your job +2. **Hermes spins up** a fresh agent session with your prompt +3. **Web search** pulls the latest news on your topics +4. **Summarization** distills it into a clean briefing format +5. **Delivery** sends the briefing to your Telegram or Discord + +The whole thing runs hands-free. You just read your briefing with your morning coffee. + +## Prerequisites + +Before starting, make sure you have: + +- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation) +- **Gateway running** — the gateway daemon handles cron execution: + ```bash + hermes gateway install # Install as system service (recommended) + # or + hermes gateway # Run in foreground + ``` +- **Firecrawl API key** — set `FIRECRAWL_API_KEY` in your environment for web search +- **Messaging configured** (optional but recommended) — [Telegram](/docs/user-guide/messaging/telegram) or Discord set up with a home channel + +:::tip No messaging? No problem +You can still follow this tutorial using `deliver: "local"`. Briefings will be saved to `~/.hermes/cron/output/` and you can read them anytime. +::: + +## Step 1: Test the Workflow Manually + +Before automating anything, let's make sure the briefing works. Start a chat session: + +```bash +hermes +``` + +Then enter this prompt: + +``` +Search for the latest news about AI agents and open source LLMs. +Summarize the top 3 stories in a concise briefing format with links. +``` + +Hermes will search the web, read through results, and produce something like: + +``` +☀️ Your AI Briefing — March 8, 2026 + +1. Qwen 3 Released with 235B Parameters + Alibaba's latest open-weight model matches GPT-4.5 on several + benchmarks while remaining fully open source. + → https://qwenlm.github.io/blog/qwen3/ + +2. LangChain Launches Agent Protocol Standard + A new open standard for agent-to-agent communication gains + adoption from 15 major frameworks in its first week. + → https://blog.langchain.dev/agent-protocol/ + +3. EU AI Act Enforcement Begins for General-Purpose Models + The first compliance deadlines hit, with open source models + receiving exemptions under the 10M parameter threshold. + → https://artificialintelligenceact.eu/updates/ + +--- +3 stories • Sources searched: 8 • Generated by Hermes Agent +``` + +If this works, you're ready to automate it. + +:::tip Iterate on the format +Try different prompts until you get output you love. Add instructions like "use emoji headers" or "keep each summary under 2 sentences." Whatever you settle on goes into the cron job. +::: + +## Step 2: Create the Cron Job + +Now let's schedule this to run automatically every morning. You can do this in two ways. + +### Option A: Natural Language (in chat) + +Just tell Hermes what you want: + +``` +Every morning at 8am, search the web for the latest news about AI agents +and open source LLMs. Summarize the top 3 stories in a concise briefing +with links. Use a friendly, professional tone. Deliver to telegram. +``` + +Hermes will create the cron job for you using the `schedule_cronjob` tool. + +### Option B: CLI Slash Command + +Use the `/cron` command for more control: + +``` +/cron add "0 8 * * *" "Search the web for the latest news about AI agents and open source LLMs. Find at least 5 recent articles from the past 24 hours. Summarize the top 3 most important stories in a concise daily briefing format. For each story include: a clear headline, a 2-sentence summary, and the source URL. Use a friendly, professional tone. Format with emoji bullet points and end with a total story count." +``` + +### The Golden Rule: Self-Contained Prompts + +:::warning Critical concept +Cron jobs run in a **completely fresh session** — no memory of your previous conversations, no context about what you "set up earlier." Your prompt must contain **everything** the agent needs to do the job. +::: + +**Bad prompt:** +``` +Do my usual morning briefing. +``` + +**Good prompt:** +``` +Search the web for the latest news about AI agents and open source LLMs. +Find at least 5 recent articles from the past 24 hours. Summarize the +top 3 most important stories in a concise daily briefing format. For each +story include: a clear headline, a 2-sentence summary, and the source URL. +Use a friendly, professional tone. Format with emoji bullet points. +``` + +The good prompt is specific about **what to search**, **how many articles**, **what format**, and **what tone**. It's everything the agent needs in one shot. + +## Step 3: Customize the Briefing + +Once the basic briefing works, you can get creative. + +### Multi-Topic Briefings + +Cover several areas in one briefing: + +``` +/cron add "0 8 * * *" "Create a morning briefing covering three topics. For each topic, search the web for recent news from the past 24 hours and summarize the top 2 stories with links. + +Topics: +1. AI and machine learning — focus on open source models and agent frameworks +2. Cryptocurrency — focus on Bitcoin, Ethereum, and regulatory news +3. Space exploration — focus on SpaceX, NASA, and commercial space + +Format as a clean briefing with section headers and emoji. End with today's date and a motivational quote." +``` + +### Using Delegation for Parallel Research + +For faster briefings, tell Hermes to delegate each topic to a sub-agent: + +``` +/cron add "0 8 * * *" "Create a morning briefing by delegating research to sub-agents. Delegate three parallel tasks: + +1. Delegate: Search for the top 2 AI/ML news stories from the past 24 hours with links +2. Delegate: Search for the top 2 cryptocurrency news stories from the past 24 hours with links +3. Delegate: Search for the top 2 space exploration news stories from the past 24 hours with links + +Collect all results and combine them into a single clean briefing with section headers, emoji formatting, and source links. Add today's date as a header." +``` + +Each sub-agent searches independently and in parallel, then the main agent combines everything into one polished briefing. See the [Delegation docs](/docs/user-guide/features/delegation) for more on how this works. + +### Weekday-Only Schedule + +Don't need briefings on weekends? Use a cron expression that targets Monday–Friday: + +``` +/cron add "0 8 * * 1-5" "Search for the latest AI and tech news..." +``` + +### Twice-Daily Briefings + +Get a morning overview and an evening recap: + +``` +/cron add "0 8 * * *" "Morning briefing: search for AI news from the past 12 hours..." +/cron add "0 18 * * *" "Evening recap: search for AI news from the past 12 hours..." +``` + +### Adding Personal Context with Memory + +If you have [memory](/docs/user-guide/features/memory) enabled, you can store preferences that persist across sessions. But remember — cron jobs run in fresh sessions without conversational memory. To add personal context, bake it directly into the prompt: + +``` +/cron add "0 8 * * *" "You are creating a briefing for a senior ML engineer who cares about: PyTorch ecosystem, transformer architectures, open-weight models, and AI regulation in the EU. Skip stories about product launches or funding rounds unless they involve open source. + +Search for the latest news on these topics. Summarize the top 3 stories with links. Be concise and technical — this reader doesn't need basic explanations." +``` + +:::tip Tailor the persona +Including details about who the briefing is *for* dramatically improves relevance. Tell the agent your role, interests, and what to skip. +::: + +## Step 4: Manage Your Jobs + +### List All Scheduled Jobs + +In chat: +``` +/cron list +``` + +Or from the terminal: +```bash +hermes cron list +``` + +You'll see output like: + +``` +ID | Name | Schedule | Next Run | Deliver +------------|-------------------|-------------|--------------------|-------- +a1b2c3d4 | Morning Briefing | 0 8 * * * | 2026-03-09 08:00 | telegram +e5f6g7h8 | Evening Recap | 0 18 * * * | 2026-03-08 18:00 | telegram +``` + +### Remove a Job + +In chat: +``` +/cron remove a1b2c3d4 +``` + +Or ask conversationally: +``` +Remove my morning briefing cron job. +``` + +Hermes will use `list_cronjobs` to find it and `remove_cronjob` to delete it. + +### Check Gateway Status + +Make sure the scheduler is actually running: + +```bash +hermes cron status +``` + +If the gateway isn't running, your jobs won't execute. Install it as a system service for reliability: + +```bash +hermes gateway install +``` + +## Going Further + +You've built a working daily briefing bot. Here are some directions to explore next: + +- **[Scheduled Tasks (Cron)](/docs/user-guide/features/cron)** — Full reference for schedule formats, repeat limits, and delivery options +- **[Delegation](/docs/user-guide/features/delegation)** — Deep dive into parallel sub-agent workflows +- **[Messaging Platforms](/docs/user-guide/messaging)** — Set up Telegram, Discord, or other delivery targets +- **[Memory](/docs/user-guide/features/memory)** — Persistent context across sessions +- **[Tips & Best Practices](/docs/guides/tips)** — More prompt engineering advice + +:::tip What else can you schedule? +The briefing bot pattern works for anything: competitor monitoring, GitHub repo summaries, weather forecasts, portfolio tracking, server health checks, or even a daily joke. If you can describe it in a prompt, you can schedule it. +::: diff --git a/website/docs/guides/python-library.md b/website/docs/guides/python-library.md new file mode 100644 index 0000000000..5f75f9a0e8 --- /dev/null +++ b/website/docs/guides/python-library.md @@ -0,0 +1,340 @@ +--- +sidebar_position: 4 +title: "Using Hermes as a Python Library" +description: "Embed AIAgent in your own Python scripts, web apps, or automation pipelines — no CLI required" +--- + +# Using Hermes as a Python Library + +Hermes isn't just a CLI tool. You can import `AIAgent` directly and use it programmatically in your own Python scripts, web applications, or automation pipelines. This guide shows you how. + +--- + +## Installation + +Install Hermes directly from the repository: + +```bash +pip install git+https://github.com/NousResearch/hermes-agent.git +``` + +Or with [uv](https://docs.astral.sh/uv/): + +```bash +uv pip install git+https://github.com/NousResearch/hermes-agent.git +``` + +You can also pin it in your `requirements.txt`: + +```text +hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git +``` + +:::tip +The same environment variables used by the CLI are required when using Hermes as a library. At minimum, set `OPENROUTER_API_KEY` (or `OPENAI_API_KEY` / `ANTHROPIC_API_KEY` if using direct provider access). +::: + +--- + +## Basic Usage + +The simplest way to use Hermes is the `chat()` method — pass a message, get a string back: + +```python +from run_agent import AIAgent + +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, +) +response = agent.chat("What is the capital of France?") +print(response) +``` + +`chat()` handles the full conversation loop internally — tool calls, retries, everything — and returns just the final text response. + +:::warning +Always set `quiet_mode=True` when embedding Hermes in your own code. Without it, the agent prints CLI spinners, progress indicators, and other terminal output that will clutter your application's output. +::: + +--- + +## Full Conversation Control + +For more control over the conversation, use `run_conversation()` directly. It returns a dictionary with the full response, message history, and metadata: + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, +) + +result = agent.run_conversation( + user_message="Search for recent Python 3.13 features", + task_id="my-task-1", +) + +print(result["final_response"]) +print(f"Messages exchanged: {len(result['messages'])}") +``` + +The returned dictionary contains: +- **`final_response`** — The agent's final text reply +- **`messages`** — The complete message history (system, user, assistant, tool calls) +- **`task_id`** — The task identifier used for VM isolation + +You can also pass a custom system message that overrides the ephemeral system prompt for that call: + +```python +result = agent.run_conversation( + user_message="Explain quicksort", + system_message="You are a computer science tutor. Use simple analogies.", +) +``` + +--- + +## Configuring Tools + +Control which toolsets the agent has access to using `enabled_toolsets` or `disabled_toolsets`: + +```python +# Only enable web tools (browsing, search) +agent = AIAgent( + model="anthropic/claude-sonnet-4", + enabled_toolsets=["web"], + quiet_mode=True, +) + +# Enable everything except terminal access +agent = AIAgent( + model="anthropic/claude-sonnet-4", + disabled_toolsets=["terminal"], + quiet_mode=True, +) +``` + +:::tip +Use `enabled_toolsets` when you want a minimal, locked-down agent (e.g., only web search for a research bot). Use `disabled_toolsets` when you want most capabilities but need to restrict specific ones (e.g., no terminal access in a shared environment). +::: + +--- + +## Multi-turn Conversations + +Maintain conversation state across multiple turns by passing the message history back in: + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, +) + +# First turn +result1 = agent.run_conversation("My name is Alice") +history = result1["messages"] + +# Second turn — agent remembers the context +result2 = agent.run_conversation( + "What's my name?", + conversation_history=history, +) +print(result2["final_response"]) # "Your name is Alice." +``` + +The `conversation_history` parameter accepts the `messages` list from a previous result. The agent copies it internally, so your original list is never mutated. + +--- + +## Saving Trajectories + +Enable trajectory saving to capture conversations in ShareGPT format — useful for generating training data or debugging: + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + save_trajectories=True, + quiet_mode=True, +) + +agent.chat("Write a Python function to sort a list") +# Saves to trajectory_samples.jsonl in ShareGPT format +``` + +Each conversation is appended as a single JSONL line, making it easy to collect datasets from automated runs. + +--- + +## Custom System Prompts + +Use `ephemeral_system_prompt` to set a custom system prompt that guides the agent's behavior but is **not** saved to trajectory files (keeping your training data clean): + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + ephemeral_system_prompt="You are a SQL expert. Only answer database questions.", + quiet_mode=True, +) + +response = agent.chat("How do I write a JOIN query?") +print(response) +``` + +This is ideal for building specialized agents — a code reviewer, a documentation writer, a SQL assistant — all using the same underlying tooling. + +--- + +## Batch Processing + +For running many prompts in parallel, Hermes includes `batch_runner.py`. It manages concurrent `AIAgent` instances with proper resource isolation: + +```bash +python batch_runner.py --input prompts.jsonl --output results.jsonl +``` + +Each prompt gets its own `task_id` and isolated environment. If you need custom batch logic, you can build your own using `AIAgent` directly: + +```python +import concurrent.futures +from run_agent import AIAgent + +prompts = [ + "Explain recursion", + "What is a hash table?", + "How does garbage collection work?", +] + +def process_prompt(prompt): + # Create a fresh agent per task for thread safety + agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, + skip_memory=True, + ) + return agent.chat(prompt) + +with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + results = list(executor.map(process_prompt, prompts)) + +for prompt, result in zip(prompts, results): + print(f"Q: {prompt}\nA: {result}\n") +``` + +:::warning +Always create a **new `AIAgent` instance per thread or task**. The agent maintains internal state (conversation history, tool sessions, iteration counters) that is not thread-safe to share. +::: + +--- + +## Integration Examples + +### FastAPI Endpoint + +```python +from fastapi import FastAPI +from pydantic import BaseModel +from run_agent import AIAgent + +app = FastAPI() + +class ChatRequest(BaseModel): + message: str + model: str = "anthropic/claude-sonnet-4" + +@app.post("/chat") +async def chat(request: ChatRequest): + agent = AIAgent( + model=request.model, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + response = agent.chat(request.message) + return {"response": response} +``` + +### Discord Bot + +```python +import discord +from run_agent import AIAgent + +client = discord.Client(intents=discord.Intents.default()) + +@client.event +async def on_message(message): + if message.author == client.user: + return + if message.content.startswith("!hermes "): + query = message.content[8:] + agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + platform="discord", + ) + response = agent.chat(query) + await message.channel.send(response[:2000]) + +client.run("YOUR_DISCORD_TOKEN") +``` + +### CI/CD Pipeline Step + +```python +#!/usr/bin/env python3 +"""CI step: auto-review a PR diff.""" +import subprocess +from run_agent import AIAgent + +diff = subprocess.check_output(["git", "diff", "main...HEAD"]).decode() + +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + disabled_toolsets=["terminal", "browser"], +) + +review = agent.chat( + f"Review this PR diff for bugs, security issues, and style problems:\n\n{diff}" +) +print(review) +``` + +--- + +## Key Constructor Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `model` | `str` | `"anthropic/claude-opus-4.6"` | Model in OpenRouter format | +| `quiet_mode` | `bool` | `False` | Suppress CLI output | +| `enabled_toolsets` | `List[str]` | `None` | Whitelist specific toolsets | +| `disabled_toolsets` | `List[str]` | `None` | Blacklist specific toolsets | +| `save_trajectories` | `bool` | `False` | Save conversations to JSONL | +| `ephemeral_system_prompt` | `str` | `None` | Custom system prompt (not saved to trajectories) | +| `max_iterations` | `int` | `90` | Max tool-calling iterations per conversation | +| `skip_context_files` | `bool` | `False` | Skip loading AGENTS.md files | +| `skip_memory` | `bool` | `False` | Disable persistent memory read/write | +| `api_key` | `str` | `None` | API key (falls back to env vars) | +| `base_url` | `str` | `None` | Custom API endpoint URL | +| `platform` | `str` | `None` | Platform hint (`"discord"`, `"telegram"`, etc.) | + +--- + +## Important Notes + +:::tip +- Set **`skip_context_files=True`** if you don't want `AGENTS.md` files from the working directory loaded into the system prompt. +- Set **`skip_memory=True`** to prevent the agent from reading or writing persistent memory — recommended for stateless API endpoints. +- The `platform` parameter (e.g., `"discord"`, `"telegram"`) injects platform-specific formatting hints so the agent adapts its output style. +::: + +:::warning +- **Thread safety**: Create one `AIAgent` per thread or task. Never share an instance across concurrent calls. +- **Resource cleanup**: The agent automatically cleans up resources (terminal sessions, browser instances) when a conversation ends. If you're running in a long-lived process, ensure each conversation completes normally. +- **Iteration limits**: The default `max_iterations=90` is generous. For simple Q&A use cases, consider lowering it (e.g., `max_iterations=10`) to prevent runaway tool-calling loops and control costs. +::: diff --git a/website/docs/guides/team-telegram-assistant.md b/website/docs/guides/team-telegram-assistant.md new file mode 100644 index 0000000000..7ab30c3350 --- /dev/null +++ b/website/docs/guides/team-telegram-assistant.md @@ -0,0 +1,429 @@ +--- +sidebar_position: 3 +title: "Tutorial: Team Telegram Assistant" +description: "Step-by-step guide to setting up a Telegram bot that your whole team can use for code help, research, system admin, and more" +--- + +# Set Up a Team Telegram Assistant + +This tutorial walks you through setting up a Telegram bot powered by Hermes Agent that multiple team members can use. By the end, your team will have a shared AI assistant they can message for help with code, research, system administration, and anything else — secured with per-user authorization. + +## What We're Building + +A Telegram bot that: + +- **Any authorized team member** can DM for help — code reviews, research, shell commands, debugging +- **Runs on your server** with full tool access — terminal, file editing, web search, code execution +- **Per-user sessions** — each person gets their own conversation context +- **Secure by default** — only approved users can interact, with two authorization methods +- **Scheduled tasks** — daily standups, health checks, and reminders delivered to a team channel + +--- + +## Prerequisites + +Before starting, make sure you have: + +- **Hermes Agent installed** on a server or VPS (not your laptop — the bot needs to stay running). Follow the [installation guide](/getting-started/learning-path) if you haven't yet. +- **A Telegram account** for yourself (the bot owner) +- **An LLM provider configured** — at minimum, an API key for OpenAI, Anthropic, or another supported provider in `~/.hermes/.env` + +:::tip +A $5/month VPS is plenty for running the gateway. Hermes itself is lightweight — the LLM API calls are what cost money, and those happen remotely. +::: + +--- + +## Step 1: Create a Telegram Bot + +Every Telegram bot starts with **@BotFather** — Telegram's official bot for creating bots. + +1. **Open Telegram** and search for `@BotFather`, or go to [t.me/BotFather](https://t.me/BotFather) + +2. **Send `/newbot`** — BotFather will ask you two things: + - **Display name** — what users see (e.g., `Team Hermes Assistant`) + - **Username** — must end in `bot` (e.g., `myteam_hermes_bot`) + +3. **Copy the bot token** — BotFather replies with something like: + ``` + Use this token to access the HTTP API: + 7123456789:AAH1bGciOiJSUzI1NiIsInR5cCI6Ikp... + ``` + Save this token — you'll need it in the next step. + +4. **Set a description** (optional but recommended): + ``` + /setdescription + ``` + Choose your bot, then enter something like: + ``` + Team AI assistant powered by Hermes Agent. DM me for help with code, research, debugging, and more. + ``` + +5. **Set bot commands** (optional — gives users a command menu): + ``` + /setcommands + ``` + Choose your bot, then paste: + ``` + new - Start a fresh conversation + model - Show or change the AI model + status - Show session info + help - Show available commands + stop - Stop the current task + ``` + +:::warning +Keep your bot token secret. Anyone with the token can control the bot. If it leaks, use `/revoke` in BotFather to generate a new one. +::: + +--- + +## Step 2: Configure the Gateway + +You have two options: the interactive setup wizard (recommended) or manual configuration. + +### Option A: Interactive Setup (Recommended) + +```bash +hermes gateway setup +``` + +This walks you through everything with arrow-key selection. Pick **Telegram**, paste your bot token, and enter your user ID when prompted. + +### Option B: Manual Configuration + +Add these lines to `~/.hermes/.env`: + +```bash +# Telegram bot token from BotFather +TELEGRAM_BOT_TOKEN=7123456789:AAH1bGciOiJSUzI1NiIsInR5cCI6Ikp... + +# Your Telegram user ID (numeric) +TELEGRAM_ALLOWED_USERS=123456789 +``` + +### Finding Your User ID + +Your Telegram user ID is a numeric value (not your username). To find it: + +1. Message [@userinfobot](https://t.me/userinfobot) on Telegram +2. It instantly replies with your numeric user ID +3. Copy that number into `TELEGRAM_ALLOWED_USERS` + +:::info +Telegram user IDs are permanent numbers like `123456789`. They're different from your `@username`, which can change. Always use the numeric ID for allowlists. +::: + +--- + +## Step 3: Start the Gateway + +### Quick Test + +Run the gateway in the foreground first to make sure everything works: + +```bash +hermes gateway +``` + +You should see output like: + +``` +[Gateway] Starting Hermes Gateway... +[Gateway] Telegram adapter connected +[Gateway] Cron scheduler started (tick every 60s) +``` + +Open Telegram, find your bot, and send it a message. If it replies, you're in business. Press `Ctrl+C` to stop. + +### Production: Install as a Service + +For a persistent deployment that survives reboots: + +```bash +hermes gateway install +``` + +This creates a **systemd** service (Linux) or **launchd** service (macOS) that runs automatically. + +```bash +# Linux — manage the service +hermes gateway start +hermes gateway stop +hermes gateway status + +# View live logs +journalctl --user -u hermes-gateway -f + +# Keep running after SSH logout +sudo loginctl enable-linger $USER +``` + +```bash +# macOS — manage the service +launchctl start ai.hermes.gateway +launchctl stop ai.hermes.gateway +tail -f ~/.hermes/logs/gateway.log +``` + +### Verify It's Running + +```bash +hermes gateway status +``` + +Then send a test message to your bot on Telegram. You should get a response within a few seconds. + +--- + +## Step 4: Set Up Team Access + +Now let's give your teammates access. There are two approaches. + +### Approach A: Static Allowlist + +Collect each team member's Telegram user ID (have them message [@userinfobot](https://t.me/userinfobot)) and add them as a comma-separated list: + +```bash +# In ~/.hermes/.env +TELEGRAM_ALLOWED_USERS=123456789,987654321,555555555 +``` + +Restart the gateway after changes: + +```bash +hermes gateway stop && hermes gateway start +``` + +### Approach B: DM Pairing (Recommended for Teams) + +DM pairing is more flexible — you don't need to collect user IDs upfront. Here's how it works: + +1. **Teammate DMs the bot** — since they're not on the allowlist, the bot replies with a one-time pairing code: + ``` + 🔐 Pairing code: XKGH5N7P + Send this code to the bot owner for approval. + ``` + +2. **Teammate sends you the code** (via any channel — Slack, email, in person) + +3. **You approve it** on the server: + ```bash + hermes pairing approve telegram XKGH5N7P + ``` + +4. **They're in** — the bot immediately starts responding to their messages + +**Managing paired users:** + +```bash +# See all pending and approved users +hermes pairing list + +# Revoke someone's access +hermes pairing revoke telegram 987654321 + +# Clear expired pending codes +hermes pairing clear-pending +``` + +:::tip +DM pairing is ideal for teams because you don't need to restart the gateway when adding new users. Approvals take effect immediately. +::: + +### Security Considerations + +- **Never set `GATEWAY_ALLOW_ALL_USERS=true`** on a bot with terminal access — anyone who finds your bot could run commands on your server +- Pairing codes expire after **1 hour** and use cryptographic randomness +- Rate limiting prevents brute-force attacks: 1 request per user per 10 minutes, max 3 pending codes per platform +- After 5 failed approval attempts, the platform enters a 1-hour lockout +- All pairing data is stored with `chmod 0600` permissions + +--- + +## Step 5: Configure the Bot + +### Set a Home Channel + +A **home channel** is where the bot delivers cron job results and proactive messages. Without one, scheduled tasks have nowhere to send output. + +**Option 1:** Use the `/sethome` command in any Telegram group or chat where the bot is a member. + +**Option 2:** Set it manually in `~/.hermes/.env`: + +```bash +TELEGRAM_HOME_CHANNEL=-1001234567890 +TELEGRAM_HOME_CHANNEL_NAME="Team Updates" +``` + +To find a channel ID, add [@userinfobot](https://t.me/userinfobot) to the group — it will report the group's chat ID. + +### Configure Tool Progress Display + +Control how much detail the bot shows when using tools. In `~/.hermes/config.yaml`: + +```yaml +display: + tool_progress: new # off | new | all | verbose +``` + +| Mode | What You See | +|------|-------------| +| `off` | Clean responses only — no tool activity | +| `new` | Brief status for each new tool call (recommended for messaging) | +| `all` | Every tool call with details | +| `verbose` | Full tool output including command results | + +Users can also change this per-session with the `/verbose` command in chat. + +### Set Up a Personality with SOUL.md + +Customize how the bot communicates by creating `~/.hermes/SOUL.md`: + +```markdown +# Soul +You are a helpful team assistant. Be concise and technical. +Use code blocks for any code. Skip pleasantries — the team +values directness. When debugging, always ask for error logs +before guessing at solutions. +``` + +### Add Project Context + +If your team works on specific projects, create context files so the bot knows your stack: + +```markdown + +# Team Context +- We use Python 3.12 with FastAPI and SQLAlchemy +- Frontend is React with TypeScript +- CI/CD runs on GitHub Actions +- Production deploys to AWS ECS +- Always suggest writing tests for new code +``` + +:::info +Context files are injected into every session's system prompt. Keep them concise — every character counts against your token budget. +::: + +--- + +## Step 6: Set Up Scheduled Tasks + +With the gateway running, you can schedule recurring tasks that deliver results to your team channel. + +### Daily Standup Summary + +Message the bot on Telegram: + +``` +Every weekday at 9am, check the GitHub repository at +github.com/myorg/myproject for: +1. Pull requests opened/merged in the last 24 hours +2. Issues created or closed +3. Any CI/CD failures on the main branch +Format as a brief standup-style summary. +``` + +The agent creates a cron job automatically and delivers results to the chat where you asked (or the home channel). + +### Server Health Check + +``` +Every 6 hours, check disk usage with 'df -h', memory with 'free -h', +and Docker container status with 'docker ps'. Report anything unusual — +partitions above 80%, containers that have restarted, or high memory usage. +``` + +### Managing Scheduled Tasks + +```bash +# From the CLI +hermes cron list # View all scheduled jobs +hermes cron status # Check if scheduler is running + +# From Telegram chat +/cron list # View jobs +/cron remove # Remove a job +``` + +:::warning +Cron job prompts run in completely fresh sessions with no memory of prior conversations. Make sure each prompt contains **all** the context the agent needs — file paths, URLs, server addresses, and clear instructions. +::: + +--- + +## Production Tips + +### Use Docker for Safety + +On a shared team bot, use Docker as the terminal backend so agent commands run in a container instead of on your host: + +```bash +# In ~/.hermes/.env +TERMINAL_BACKEND=docker +TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20 +``` + +Or in `~/.hermes/config.yaml`: + +```yaml +terminal: + backend: docker + container_cpu: 1 + container_memory: 5120 + container_persistent: true +``` + +This way, even if someone asks the bot to run something destructive, your host system is protected. + +### Monitor the Gateway + +```bash +# Check if the gateway is running +hermes gateway status + +# Watch live logs (Linux) +journalctl --user -u hermes-gateway -f + +# Watch live logs (macOS) +tail -f ~/.hermes/logs/gateway.log +``` + +### Keep Hermes Updated + +From Telegram, send `/update` to the bot — it will pull the latest version and restart. Or from the server: + +```bash +hermes update +hermes gateway stop && hermes gateway start +``` + +### Log Locations + +| What | Location | +|------|----------| +| Gateway logs | `journalctl --user -u hermes-gateway` (Linux) or `~/.hermes/logs/gateway.log` (macOS) | +| Cron job output | `~/.hermes/cron/output/{job_id}/{timestamp}.md` | +| Cron job definitions | `~/.hermes/cron/jobs.json` | +| Pairing data | `~/.hermes/pairing/` | +| Session history | `~/.hermes/sessions/` | + +--- + +## Going Further + +You've got a working team Telegram assistant. Here are some next steps: + +- **[Security Guide](/user-guide/security)** — deep dive into authorization, container isolation, and command approval +- **[Messaging Gateway](/user-guide/messaging)** — full reference for gateway architecture, session management, and chat commands +- **[Telegram Setup](/user-guide/messaging/telegram)** — platform-specific details including voice messages and TTS +- **[Scheduled Tasks](/user-guide/features/cron)** — advanced cron scheduling with delivery options and cron expressions +- **[Context Files](/user-guide/features/context-files)** — AGENTS.md, SOUL.md, and .cursorrules for project knowledge +- **[Personality](/user-guide/features/personality)** — built-in personality presets and custom persona definitions +- **Add more platforms** — the same gateway can simultaneously run [Discord](/user-guide/messaging/discord), [Slack](/user-guide/messaging/slack), and [WhatsApp](/user-guide/messaging/whatsapp) + +--- + +*Questions or issues? Open an issue on GitHub — contributions are welcome.* diff --git a/website/docs/guides/tips.md b/website/docs/guides/tips.md new file mode 100644 index 0000000000..af4b8fce4a --- /dev/null +++ b/website/docs/guides/tips.md @@ -0,0 +1,211 @@ +--- +sidebar_position: 1 +title: "Tips & Best Practices" +description: "Practical advice to get the most out of Hermes Agent — prompt tips, CLI shortcuts, context files, memory, cost optimization, and security" +--- + +# Tips & Best Practices + +A quick-wins collection of practical tips that make you immediately more effective with Hermes Agent. Each section targets a different aspect — scan the headers and jump to what's relevant. + +--- + +## Getting the Best Results + +### Be Specific About What You Want + +Vague prompts produce vague results. Instead of "fix the code," say "fix the TypeError in `api/handlers.py` on line 47 — the `process_request()` function receives `None` from `parse_body()`." The more context you give, the fewer iterations you need. + +### Provide Context Up Front + +Front-load your request with the relevant details: file paths, error messages, expected behavior. One well-crafted message beats three rounds of clarification. Paste error tracebacks directly — the agent can parse them. + +### Use Context Files for Recurring Instructions + +If you find yourself repeating the same instructions ("use tabs not spaces," "we use pytest," "the API is at `/api/v2`"), put them in an `AGENTS.md` file. The agent reads it automatically every session — zero effort after setup. + +### Let the Agent Use Its Tools + +Don't try to hand-hold every step. Say "find and fix the failing test" rather than "open `tests/test_foo.py`, look at line 42, then..." The agent has file search, terminal access, and code execution — let it explore and iterate. + +### Use Skills for Complex Workflows + +Before writing a long prompt explaining how to do something, check if there's already a skill for it. Type `/skills` to browse available skills, or just invoke one directly like `/axolotl` or `/github-pr-workflow`. + +## CLI Power User Tips + +### Multi-Line Input + +Press **Alt+Enter** (or **Ctrl+J**) to insert a newline without sending. This lets you compose multi-line prompts, paste code blocks, or structure complex requests before hitting Enter to send. + +### Paste Detection + +The CLI auto-detects multi-line pastes. Just paste a code block or error traceback directly — it won't send each line as a separate message. The paste is buffered and sent as one message. + +### Interrupt and Redirect + +Press **Ctrl+C** once to interrupt the agent mid-response. You can then type a new message to redirect it. Double-press Ctrl+C within 2 seconds to force exit. This is invaluable when the agent starts going down the wrong path. + +### Resume Sessions with `-c` + +Forgot something from your last session? Run `hermes -c` to resume exactly where you left off, with full conversation history restored. You can also resume by title: `hermes -r "my research project"`. + +### Clipboard Image Paste + +Press **Ctrl+V** to paste an image from your clipboard directly into the chat. The agent uses vision to analyze screenshots, diagrams, error popups, or UI mockups — no need to save to a file first. + +### Slash Command Autocomplete + +Type `/` and press **Tab** to see all available commands. This includes built-in commands (`/compress`, `/model`, `/title`) and every installed skill. You don't need to memorize anything — Tab completion has you covered. + +:::tip +Use `/verbose` to cycle through tool output display modes: **off → new → all → verbose**. The "all" mode is great for watching what the agent does; "off" is cleanest for simple Q&A. +::: + +## Context Files + +### AGENTS.md: Your Project's Brain + +Create an `AGENTS.md` in your project root with architecture decisions, coding conventions, and project-specific instructions. This is automatically injected into every session, so the agent always knows your project's rules. + +```markdown +# Project Context +- This is a FastAPI backend with SQLAlchemy ORM +- Always use async/await for database operations +- Tests go in tests/ and use pytest-asyncio +- Never commit .env files +``` + +### SOUL.md: Customize Personality + +Want the agent to be more concise? More technical? Place a `SOUL.md` in your project root or `~/.hermes/SOUL.md` for global personality customization. This shapes the agent's tone and communication style. + +```markdown +# Soul +You are a senior backend engineer. Be terse and direct. +Skip explanations unless asked. Prefer one-liners over verbose solutions. +Always consider error handling and edge cases. +``` + +### .cursorrules Compatibility + +Already have a `.cursorrules` or `.cursor/rules/*.mdc` file? Hermes reads those too. No need to duplicate your coding conventions — they're loaded automatically from the working directory. + +### Hierarchical Discovery + +Hermes walks the directory tree and discovers **all** `AGENTS.md` files at every level. In a monorepo, put project-wide conventions at the root and team-specific ones in subdirectories — they're all concatenated together with path headers. + +:::tip +Keep context files focused and concise. Every character counts against your token budget since they're injected into every single message. +::: + +## Memory & Skills + +### Memory vs. Skills: What Goes Where + +**Memory** is for facts: your environment, preferences, project locations, and things the agent has learned about you. **Skills** are for procedures: multi-step workflows, tool-specific instructions, and reusable recipes. Use memory for "what," skills for "how." + +### When to Create Skills + +If you find a task that takes 5+ steps and you'll do it again, ask the agent to create a skill for it. Say "save what you just did as a skill called `deploy-staging`." Next time, just type `/deploy-staging` and the agent loads the full procedure. + +### Managing Memory Capacity + +Memory is intentionally bounded (~2,200 chars for MEMORY.md, ~1,375 chars for USER.md). When it fills up, the agent consolidates entries. You can help by saying "clean up your memory" or "replace the old Python 3.9 note — we're on 3.12 now." + +### Let the Agent Remember + +After a productive session, say "remember this for next time" and the agent will save the key takeaways. You can also be specific: "save to memory that our CI uses GitHub Actions with the `deploy.yml` workflow." + +:::warning +Memory is a frozen snapshot — changes made during a session don't appear in the system prompt until the next session starts. The agent writes to disk immediately, but the prompt cache isn't invalidated mid-session. +::: + +## Performance & Cost + +### Don't Break the Prompt Cache + +Most LLM providers cache the system prompt prefix. If you keep your system prompt stable (same context files, same memory), subsequent messages in a session get **cache hits** that are significantly cheaper. Avoid changing the model or system prompt mid-session. + +### Use /compress Before Hitting Limits + +Long sessions accumulate tokens. When you notice responses slowing down or getting truncated, run `/compress`. This summarizes the conversation history, preserving key context while dramatically reducing token count. Use `/usage` to check where you stand. + +### Delegate for Parallel Work + +Need to research three topics at once? Ask the agent to use `delegate_task` with parallel subtasks. Each subagent runs independently with its own context, and only the final summaries come back — massively reducing your main conversation's token usage. + +### Use execute_code for Batch Operations + +Instead of running terminal commands one at a time, ask the agent to write a script that does everything at once. "Write a Python script to rename all `.jpeg` files to `.jpg` and run it" is cheaper and faster than renaming files individually. + +### Choose the Right Model + +Use `/model` to switch models mid-session. Use a frontier model (Claude Sonnet/Opus, GPT-4o) for complex reasoning and architecture decisions. Switch to a faster model for simple tasks like formatting, renaming, or boilerplate generation. + +:::tip +Run `/usage` periodically to see your token consumption. Run `/insights` for a broader view of usage patterns over the last 30 days. +::: + +## Messaging Tips + +### Set a Home Channel + +Use `/sethome` in your preferred Telegram or Discord chat to designate it as the home channel. Cron job results and scheduled task outputs are delivered here. Without it, the agent has nowhere to send proactive messages. + +### Use /title to Organize Sessions + +Name your sessions with `/title auth-refactor` or `/title research-llm-quantization`. Named sessions are easy to find with `hermes sessions list` and resume with `hermes -r "auth-refactor"`. Unnamed sessions pile up and become impossible to distinguish. + +### DM Pairing for Team Access + +Instead of manually collecting user IDs for allowlists, enable DM pairing. When a teammate DMs the bot, they get a one-time pairing code. You approve it with `hermes pairing approve telegram XKGH5N7P` — simple and secure. + +### Tool Progress Display Modes + +Use `/verbose` to control how much tool activity you see. In messaging platforms, less is usually more — keep it on "new" to see just new tool calls. In the CLI, "all" gives you a satisfying live view of everything the agent does. + +:::tip +On messaging platforms, sessions auto-reset after idle time (default: 120 min) or daily at 4 AM. Adjust per-platform in `~/.hermes/gateway.json` if you need longer sessions. +::: + +## Security + +### Use Docker for Untrusted Code + +When working with untrusted repositories or running unfamiliar code, use Docker or Daytona as your terminal backend. Set `TERMINAL_BACKEND=docker` in your `.env`. Destructive commands inside a container can't harm your host system. + +```bash +# In your .env: +TERMINAL_BACKEND=docker +TERMINAL_DOCKER_IMAGE=hermes-sandbox:latest +``` + +### Review Before Choosing "Always" + +When the agent triggers a dangerous command approval (`rm -rf`, `DROP TABLE`, etc.), you get four options: **once**, **session**, **always**, **deny**. Think carefully before choosing "always" — it permanently allowlists that pattern. Start with "session" until you're comfortable. + +### Command Approval Is Your Safety Net + +Hermes checks every command against a curated list of dangerous patterns before execution. This includes recursive deletes, SQL drops, piping curl to shell, and more. Don't disable this in production — it exists for good reasons. + +:::warning +When running in a container backend (Docker, Singularity, Modal, Daytona), dangerous command checks are **skipped** because the container is the security boundary. Make sure your container images are properly locked down. +::: + +### Use Allowlists for Messaging Bots + +Never set `GATEWAY_ALLOW_ALL_USERS=true` on a bot with terminal access. Always use platform-specific allowlists (`TELEGRAM_ALLOWED_USERS`, `DISCORD_ALLOWED_USERS`) or DM pairing to control who can interact with your agent. + +```bash +# Recommended: explicit allowlists per platform +TELEGRAM_ALLOWED_USERS=123456789,987654321 +DISCORD_ALLOWED_USERS=123456789012345678 + +# Or use cross-platform allowlist +GATEWAY_ALLOWED_USERS=123456789,987654321 +``` + +--- + +*Have a tip that should be on this page? Open an issue or PR — community contributions are welcome.* diff --git a/website/docs/index.md b/website/docs/index.md index e905bd638c..a4ea0a8e38 100644 --- a/website/docs/index.md +++ b/website/docs/index.md @@ -25,6 +25,7 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl |---|---| | 🚀 **[Installation](/docs/getting-started/installation)** | Install in 60 seconds on Linux, macOS, or WSL2 | | 📖 **[Quickstart Tutorial](/docs/getting-started/quickstart)** | Your first conversation and key features to try | +| 🗺️ **[Learning Path](/docs/getting-started/learning-path)** | Find the right docs for your experience level | | ⚙️ **[Configuration](/docs/user-guide/configuration)** | Config file, providers, models, and options | | 💬 **[Messaging Gateway](/docs/user-guide/messaging)** | Set up Telegram, Discord, Slack, or WhatsApp | | 🔧 **[Tools & Toolsets](/docs/user-guide/features/tools)** | 40+ built-in tools and how to configure them | @@ -33,8 +34,9 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl | 🔌 **[MCP Integration](/docs/user-guide/features/mcp)** | Connect to any MCP server for extended capabilities | | 📄 **[Context Files](/docs/user-guide/features/context-files)** | Project context files that shape every conversation | | 🔒 **[Security](/docs/user-guide/security)** | Command approval, authorization, container isolation | +| 💡 **[Tips & Best Practices](/docs/guides/tips)** | Quick wins to get the most out of Hermes | | 🏗️ **[Architecture](/docs/developer-guide/architecture)** | How it works under the hood | -| 🤝 **[Contributing](/docs/developer-guide/contributing)** | Development setup and PR process | +| ❓ **[FAQ & Troubleshooting](/docs/reference/faq)** | Common questions and solutions | ## Key Features diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 55fd8504f4..3613e97a71 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -17,7 +17,8 @@ These are commands you run from your shell. | `hermes` | Start interactive chat (default) | | `hermes chat -q "Hello"` | Single query mode (non-interactive) | | `hermes chat --continue` / `-c` | Resume the most recent session | -| `hermes chat --resume ` / `-r ` | Resume a specific session | +| `hermes chat -c "my project"` | Resume a session by name (latest in lineage) | +| `hermes chat --resume ` / `-r ` | Resume a specific session by ID or title | | `hermes chat --model ` | Use a specific model | | `hermes chat --provider ` | Force a provider (`nous`, `openrouter`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`) | | `hermes chat --toolsets "web,terminal"` / `-t` | Use specific toolsets | @@ -103,7 +104,8 @@ These are commands you run from your shell. | Command | Description | |---------|-------------| -| `hermes sessions list` | Browse past sessions | +| `hermes sessions list` | Browse past sessions (shows title, preview, last active) | +| `hermes sessions rename ` | Set or change a session's title | | `hermes sessions export <id>` | Export a session | | `hermes sessions delete <id>` | Delete a specific session | | `hermes sessions prune` | Remove old sessions | @@ -154,9 +156,26 @@ Type `/` in the interactive CLI to see an autocomplete dropdown. | `/undo` | Remove the last user/assistant exchange | | `/save` | Save the current conversation | | `/compress` | Manually compress conversation context | +| `/title [name]` | Set or show the current session's title | | `/usage` | Show token usage for this session | | `/insights [--days N]` | Show usage insights and analytics (last 30 days) | +#### /compress + +Manually triggers context compression on the current conversation. This summarizes middle turns of the conversation while preserving the first 3 and last 4 turns, significantly reducing token count. Useful when: + +- The conversation is getting long and you want to reduce costs +- You're approaching the model's context limit +- You want to continue the conversation without starting fresh + +Requirements: at least 4 messages in the conversation. The configured model (or `compression.summary_model` from config) is used to generate the summary. After compression, the session continues seamlessly with the compressed history. + +Reports the result as: `Compressed: X → Y messages, ~N → ~M tokens`. + +:::tip +Compression also happens automatically when approaching context limits (configurable via `compression.threshold` in `config.yaml`). Use `/compress` when you want to trigger it early. +::: + ### Media & Input | Command | Description | diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index e82c14933a..26a0683e3e 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -107,6 +107,10 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `WHATSAPP_ENABLED` | Enable WhatsApp bridge (`true`/`false`) | | `WHATSAPP_MODE` | `bot` (separate number) or `self-chat` (message yourself) | | `WHATSAPP_ALLOWED_USERS` | Comma-separated phone numbers (with country code) | +| `SIGNAL_HTTP_URL` | signal-cli daemon HTTP endpoint (e.g., `http://127.0.0.1:8080`) | +| `SIGNAL_ACCOUNT` | Bot phone number in E.164 format (e.g., `+15551234567`) | +| `SIGNAL_ALLOWED_USERS` | Comma-separated E.164 phone numbers or UUIDs | +| `SIGNAL_GROUP_ALLOWED_USERS` | Comma-separated group IDs, or `*` for all groups (omit to disable groups) | | `MESSAGING_CWD` | Working directory for terminal in messaging (default: `~`) | | `GATEWAY_ALLOWED_USERS` | Comma-separated user IDs allowed across all platforms | | `GATEWAY_ALLOW_ALL_USERS` | Allow all users without allowlist (`true`/`false`, default: `false`) | diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md new file mode 100644 index 0000000000..a477c5333f --- /dev/null +++ b/website/docs/reference/faq.md @@ -0,0 +1,430 @@ +--- +sidebar_position: 3 +title: "FAQ & Troubleshooting" +description: "Frequently asked questions and solutions to common issues with Hermes Agent" +--- + +# FAQ & Troubleshooting + +Quick answers and fixes for the most common questions and issues. + +--- + +## Frequently Asked Questions + +### What LLM providers work with Hermes? + +Hermes Agent works with any OpenAI-compatible API. Supported providers include: + +- **[OpenRouter](https://openrouter.ai/)** — access hundreds of models through one API key (recommended for flexibility) +- **Nous Portal** — Nous Research's own inference endpoint +- **OpenAI** — GPT-4o, o1, o3, etc. +- **Anthropic** — Claude models (via OpenRouter or compatible proxy) +- **Google** — Gemini models (via OpenRouter or compatible proxy) +- **z.ai / ZhipuAI** — GLM models +- **Kimi / Moonshot AI** — Kimi models +- **MiniMax** — global and China endpoints +- **Local models** — via [Ollama](https://ollama.com/), [vLLM](https://docs.vllm.ai/), [llama.cpp](https://github.com/ggerganov/llama.cpp), [SGLang](https://github.com/sgl-project/sglang), or any OpenAI-compatible server + +Set your provider with `hermes setup` or by editing `~/.hermes/.env`. See the [Environment Variables](./environment-variables.md) reference for all provider keys. + +### Does it work on Windows? + +**Not natively.** Hermes Agent requires a Unix-like environment. On Windows, install [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install) and run Hermes from inside it. The standard install command works perfectly in WSL2: + +```bash +curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash +``` + +### Is my data sent anywhere? + +API calls go **only to the LLM provider you configure** (e.g., OpenRouter, your local Ollama instance). Hermes Agent does not collect telemetry, usage data, or analytics. Your conversations, memory, and skills are stored locally in `~/.hermes/`. + +### Can I use it offline / with local models? + +Yes. Point Hermes at any local OpenAI-compatible server: + +```bash +hermes config set OPENAI_BASE_URL http://localhost:11434/v1 # Ollama +hermes config set OPENAI_API_KEY ollama # Any non-empty value +hermes config set HERMES_MODEL llama3.1 +``` + +This works with Ollama, vLLM, llama.cpp server, SGLang, LocalAI, and others. See the [Configuration guide](../user-guide/configuration.md) for details. + +### How much does it cost? + +Hermes Agent itself is **free and open-source** (MIT license). You pay only for the LLM API usage from your chosen provider. Local models are completely free to run. + +### Can multiple people use one instance? + +Yes. The [messaging gateway](../user-guide/messaging/index.md) lets multiple users interact with the same Hermes Agent instance via Telegram, Discord, Slack, WhatsApp, or Home Assistant. Access is controlled through allowlists (specific user IDs) and DM pairing (first user to message claims access). + +### What's the difference between memory and skills? + +- **Memory** stores **facts** — things the agent knows about you, your projects, and preferences. Memories are retrieved automatically based on relevance. +- **Skills** store **procedures** — step-by-step instructions for how to do things. Skills are recalled when the agent encounters a similar task. + +Both persist across sessions. See [Memory](../user-guide/features/memory.md) and [Skills](../user-guide/features/skills.md) for details. + +### Can I use it in my own Python project? + +Yes. Import the `AIAgent` class and use Hermes programmatically: + +```python +from hermes.agent import AIAgent + +agent = AIAgent(model="openrouter/nous/hermes-3-llama-3.1-70b") +response = await agent.chat("Explain quantum computing briefly") +``` + +See the [Python Library guide](../user-guide/features/code-execution.md) for full API usage. + +--- + +## Troubleshooting + +### Installation Issues + +#### `hermes: command not found` after installation + +**Cause:** Your shell hasn't reloaded the updated PATH. + +**Solution:** +```bash +# Reload your shell profile +source ~/.bashrc # bash +source ~/.zshrc # zsh + +# Or start a new terminal session +``` + +If it still doesn't work, verify the install location: +```bash +which hermes +ls ~/.local/bin/hermes +``` + +:::tip +The installer adds `~/.local/bin` to your PATH. If you use a non-standard shell config, add `export PATH="$HOME/.local/bin:$PATH"` manually. +::: + +#### Python version too old + +**Cause:** Hermes requires Python 3.11 or newer. + +**Solution:** +```bash +python3 --version # Check current version + +# Install a newer Python +sudo apt install python3.12 # Ubuntu/Debian +brew install python@3.12 # macOS +``` + +The installer handles this automatically — if you see this error during manual installation, upgrade Python first. + +#### `uv: command not found` + +**Cause:** The `uv` package manager isn't installed or not in PATH. + +**Solution:** +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +source ~/.bashrc +``` + +#### Permission denied errors during install + +**Cause:** Insufficient permissions to write to the install directory. + +**Solution:** +```bash +# Don't use sudo with the installer — it installs to ~/.local/bin +# If you previously installed with sudo, clean up: +sudo rm /usr/local/bin/hermes +# Then re-run the standard installer +curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash +``` + +--- + +### Provider & Model Issues + +#### API key not working + +**Cause:** Key is missing, expired, incorrectly set, or for the wrong provider. + +**Solution:** +```bash +# Check which keys are set +hermes config get OPENROUTER_API_KEY + +# Re-run interactive setup +hermes setup + +# Or set directly +hermes config set OPENROUTER_API_KEY sk-or-v1-xxxxxxxxxxxx +``` + +:::warning +Make sure the key matches the provider. An OpenAI key won't work with OpenRouter and vice versa. Check `~/.hermes/.env` for conflicting entries. +::: + +#### Model not available / model not found + +**Cause:** The model identifier is incorrect or not available on your provider. + +**Solution:** +```bash +# List available models for your provider +hermes models + +# Set a valid model +hermes config set HERMES_MODEL openrouter/nous/hermes-3-llama-3.1-70b + +# Or specify per-session +hermes chat --model openrouter/meta-llama/llama-3.1-70b-instruct +``` + +#### Rate limiting (429 errors) + +**Cause:** You've exceeded your provider's rate limits. + +**Solution:** Wait a moment and retry. For sustained usage, consider: +- Upgrading your provider plan +- Switching to a different model or provider +- Using `hermes chat --provider <alternative>` to route to a different backend + +#### Context length exceeded + +**Cause:** The conversation has grown too long for the model's context window. + +**Solution:** +```bash +# Compress the current session +/compress + +# Or start a fresh session +hermes chat + +# Use a model with a larger context window +hermes chat --model openrouter/google/gemini-2.0-flash-001 +``` + +--- + +### Terminal Issues + +#### Command blocked as dangerous + +**Cause:** Hermes detected a potentially destructive command (e.g., `rm -rf`, `DROP TABLE`). This is a safety feature. + +**Solution:** When prompted, review the command and type `y` to approve it. You can also: +- Ask the agent to use a safer alternative +- See the full list of dangerous patterns in the [Security docs](../user-guide/security.md) + +:::tip +This is working as intended — Hermes never silently runs destructive commands. The approval prompt shows you exactly what will execute. +::: + +#### `sudo` not working via messaging gateway + +**Cause:** The messaging gateway runs without an interactive terminal, so `sudo` cannot prompt for a password. + +**Solution:** +- Avoid `sudo` in messaging — ask the agent to find alternatives +- If you must use `sudo`, configure passwordless sudo for specific commands in `/etc/sudoers` +- Or switch to the terminal interface for administrative tasks: `hermes chat` + +#### Docker backend not connecting + +**Cause:** Docker daemon isn't running or the user lacks permissions. + +**Solution:** +```bash +# Check Docker is running +docker info + +# Add your user to the docker group +sudo usermod -aG docker $USER +newgrp docker + +# Verify +docker run hello-world +``` + +--- + +### Messaging Issues + +#### Bot not responding to messages + +**Cause:** The bot isn't running, isn't authorized, or your user isn't in the allowlist. + +**Solution:** +```bash +# Check if the gateway is running +hermes gateway status + +# Start the gateway +hermes gateway start + +# Check logs for errors +hermes gateway logs +``` + +#### Messages not delivering + +**Cause:** Network issues, bot token expired, or platform webhook misconfiguration. + +**Solution:** +- Verify your bot token is valid with `hermes setup` +- Check gateway logs: `hermes gateway logs` +- For webhook-based platforms (Slack, WhatsApp), ensure your server is publicly accessible + +#### Allowlist confusion — who can talk to the bot? + +**Cause:** Authorization mode determines who gets access. + +**Solution:** + +| Mode | How it works | +|------|-------------| +| **Allowlist** | Only user IDs listed in config can interact | +| **DM pairing** | First user to message in DM claims exclusive access | +| **Open** | Anyone can interact (not recommended for production) | + +Configure in `~/.hermes/config.yaml` under your gateway's settings. See the [Messaging docs](../user-guide/messaging/index.md). + +#### Gateway won't start + +**Cause:** Missing dependencies, port conflicts, or misconfigured tokens. + +**Solution:** +```bash +# Install messaging dependencies +pip install hermes-agent[telegram] # or [discord], [slack], [whatsapp] + +# Check for port conflicts +lsof -i :8080 + +# Verify configuration +hermes config show +``` + +--- + +### Performance Issues + +#### Slow responses + +**Cause:** Large model, distant API server, or heavy system prompt with many tools. + +**Solution:** +- Try a faster/smaller model: `hermes chat --model openrouter/meta-llama/llama-3.1-8b-instruct` +- Reduce active toolsets: `hermes chat -t "terminal"` +- Check your network latency to the provider +- For local models, ensure you have enough GPU VRAM + +#### High token usage + +**Cause:** Long conversations, verbose system prompts, or many tool calls accumulating context. + +**Solution:** +```bash +# Compress the conversation to reduce tokens +/compress + +# Check session token count +/stats +``` + +:::tip +Use `/compress` regularly during long sessions. It summarizes the conversation history and reduces token usage significantly while preserving context. +::: + +#### Session getting too long + +**Cause:** Extended conversations accumulate messages and tool outputs, approaching context limits. + +**Solution:** +```bash +# Compress current session (preserves key context) +/compress + +# Start a new session with a reference to the old one +hermes chat + +# Resume a specific session later if needed +hermes chat --continue +``` + +--- + +### MCP Issues + +#### MCP server not connecting + +**Cause:** Server binary not found, wrong command path, or missing runtime. + +**Solution:** +```bash +# Ensure MCP dependencies are installed +pip install hermes-agent[mcp] + +# For npm-based servers, ensure Node.js is available +node --version +npx --version + +# Test the server manually +npx -y @modelcontextprotocol/server-filesystem /tmp +``` + +Verify your `~/.hermes/config.yaml` MCP configuration: +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/docs"] +``` + +#### Tools not showing up from MCP server + +**Cause:** Server started but tool discovery failed, or tools are filtered out. + +**Solution:** +- Check gateway/agent logs for MCP connection errors +- Ensure the server responds to the `tools/list` RPC method +- Restart the agent — MCP tools are discovered at startup + +```bash +# Verify MCP servers are configured +hermes config show | grep -A 5 mcp_servers + +# Restart hermes to re-discover tools +hermes chat +``` + +#### MCP timeout errors + +**Cause:** The MCP server is taking too long to respond, or it crashed during execution. + +**Solution:** +- Increase the timeout in your MCP server config if supported +- Check if the MCP server process is still running +- For remote HTTP MCP servers, check network connectivity + +:::warning +If an MCP server crashes mid-request, Hermes will report a timeout. Check the server's own logs (not just Hermes logs) to diagnose the root cause. +::: + +--- + +## Still Stuck? + +If your issue isn't covered here: + +1. **Search existing issues:** [GitHub Issues](https://github.com/NousResearch/hermes-agent/issues) +2. **Ask the community:** [Nous Research Discord](https://discord.gg/nousresearch) +3. **File a bug report:** Include your OS, Python version (`python3 --version`), Hermes version (`hermes --version`), and the full error message diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index d80b178b5d..aeeba5f07f 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -65,6 +65,10 @@ hermes -w -q "Fix issue #123" # Single query in worktree The welcome banner shows your model, terminal backend, working directory, available tools, and installed skills at a glance. +### Session Resume Display + +When resuming a previous session (`hermes -c` or `hermes --resume <id>`), a "Previous Conversation" panel appears between the banner and the input prompt, showing a compact recap of the conversation history. See [Sessions — Conversation Recap on Resume](sessions.md#conversation-recap-on-resume) for details and configuration. + ## Keybindings | Key | Action | @@ -229,13 +233,15 @@ Resume options: ```bash hermes --continue # Resume the most recent CLI session hermes -c # Short form +hermes -c "my project" # Resume a named session (latest in lineage) hermes --resume 20260225_143052_a1b2c3 # Resume a specific session by ID +hermes --resume "refactoring auth" # Resume by title hermes -r 20260225_143052_a1b2c3 # Short form ``` Resuming restores the full conversation history from SQLite. The agent sees all previous messages, tool calls, and responses — just as if you never left. -Use `hermes sessions list` to browse past sessions. +Use `/title My Session Name` inside a chat to name the current session, or `hermes sessions rename <id> <title>` from the command line. Use `hermes sessions list` to browse past sessions. ### Session Logging diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 07096a1894..b600a47619 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -75,7 +75,7 @@ The OpenAI Codex provider authenticates via device code (open a URL, enter a cod ::: :::warning -Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use OpenRouter independently. An `OPENROUTER_API_KEY` enables these tools. +Even when using Nous Portal, Codex, or a custom endpoint, some tools (vision, web summarization, MoA) use a separate "auxiliary" model — by default Gemini Flash via OpenRouter. An `OPENROUTER_API_KEY` enables these tools automatically. You can also configure which model and provider these tools use — see [Auxiliary Models](#auxiliary-models) below. ::: ### First-Class Chinese AI Providers @@ -432,9 +432,121 @@ node_modules/ ```yaml compression: enabled: true - threshold: 0.85 # Compress at 85% of context limit + threshold: 0.85 # Compress at 85% of context limit + summary_model: "google/gemini-3-flash-preview" # Model for summarization + # summary_provider: "auto" # "auto", "openrouter", "nous", "main" ``` +The `summary_model` must support a context length at least as large as your main model's, since it receives the full middle section of the conversation for compression. + +## Auxiliary Models + +Hermes uses lightweight "auxiliary" models for side tasks like image analysis, web page summarization, and browser screenshot analysis. By default, these use **Gemini Flash** via OpenRouter or Nous Portal — you don't need to configure anything. + +To use a different model, add an `auxiliary` section to `~/.hermes/config.yaml`: + +```yaml +auxiliary: + # Image analysis (vision_analyze tool + browser screenshots) + vision: + provider: "auto" # "auto", "openrouter", "nous", "main" + model: "" # e.g. "openai/gpt-4o", "google/gemini-2.5-flash" + + # Web page summarization + browser page text extraction + web_extract: + provider: "auto" + model: "" # e.g. "google/gemini-2.5-flash" +``` + +### Changing the Vision Model + +To use GPT-4o instead of Gemini Flash for image analysis: + +```yaml +auxiliary: + vision: + model: "openai/gpt-4o" +``` + +Or via environment variable (in `~/.hermes/.env`): + +```bash +AUXILIARY_VISION_MODEL=openai/gpt-4o +``` + +### Provider Options + +| Provider | Description | Requirements | +|----------|-------------|-------------| +| `"auto"` | Best available (default). Vision tries OpenRouter → Nous → Codex. | — | +| `"openrouter"` | Force OpenRouter — routes to any model (Gemini, GPT-4o, Claude, etc.) | `OPENROUTER_API_KEY` | +| `"nous"` | Force Nous Portal | `hermes login` | +| `"codex"` | Force Codex OAuth (ChatGPT account). Supports vision (gpt-5.3-codex). | `hermes model` → Codex | +| `"main"` | Use your custom endpoint (`OPENAI_BASE_URL` + `OPENAI_API_KEY`). Works with OpenAI, local models, or any OpenAI-compatible API. | `OPENAI_BASE_URL` + `OPENAI_API_KEY` | + +### Common Setups + +**Using OpenAI API key for vision:** +```yaml +# In ~/.hermes/.env: +# OPENAI_BASE_URL=https://api.openai.com/v1 +# OPENAI_API_KEY=sk-... + +auxiliary: + vision: + provider: "main" + model: "gpt-4o" # or "gpt-4o-mini" for cheaper +``` + +**Using OpenRouter for vision** (route to any model): +```yaml +auxiliary: + vision: + provider: "openrouter" + model: "openai/gpt-4o" # or "google/gemini-2.5-flash", etc. +``` + +**Using Codex OAuth** (ChatGPT Pro/Plus account — no API key needed): +```yaml +auxiliary: + vision: + provider: "codex" # uses your ChatGPT OAuth token + # model defaults to gpt-5.3-codex (supports vision) +``` + +**Using a local/self-hosted model:** +```yaml +auxiliary: + vision: + provider: "main" # uses your OPENAI_BASE_URL endpoint + model: "my-local-model" +``` + +:::tip +If you use Codex OAuth as your main model provider, vision works automatically — no extra configuration needed. Codex is included in the auto-detection chain for vision. +::: + +:::warning +**Vision requires a multimodal model.** If you set `provider: "main"`, make sure your endpoint supports multimodal/vision — otherwise image analysis will fail. +::: + +### Environment Variables + +You can also configure auxiliary models via environment variables instead of `config.yaml`: + +| Setting | Environment Variable | +|---------|---------------------| +| Vision provider | `AUXILIARY_VISION_PROVIDER` | +| Vision model | `AUXILIARY_VISION_MODEL` | +| Web extract provider | `AUXILIARY_WEB_EXTRACT_PROVIDER` | +| Web extract model | `AUXILIARY_WEB_EXTRACT_MODEL` | +| Compression provider | `CONTEXT_COMPRESSION_PROVIDER` | +| Compression model | `CONTEXT_COMPRESSION_MODEL` | + +:::tip +Run `hermes config` to see your current auxiliary model settings. Overrides only show up when they differ from the defaults. +::: + ## Reasoning Effort Control how much "thinking" the model does before responding: @@ -468,6 +580,8 @@ display: tool_progress: all # off | new | all | verbose personality: "kawaii" # Default personality for the CLI compact: false # Compact output mode (less whitespace) + resume_display: full # full (show previous messages on resume) | minimal (one-liner only) + bell_on_complete: false # Play terminal bell when agent finishes (great for long tasks) ``` | Mode | What you see | @@ -507,6 +621,16 @@ code_execution: max_tool_calls: 50 # Max tool calls within code execution ``` +## Browser + +Configure browser automation behavior: + +```yaml +browser: + inactivity_timeout: 120 # Seconds before auto-closing idle sessions + record_sessions: false # Auto-record browser sessions as WebM videos to ~/.hermes/browser_recordings/ +``` + ## Delegation Configure subagent behavior for the delegate tool: diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index 70201100b7..f7822c8842 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -142,6 +142,16 @@ What does the chart on this page show? Screenshots are stored in `~/.hermes/browser_screenshots/` and automatically cleaned up after 24 hours. +### `browser_console` + +Get browser console output (log/warn/error messages) and uncaught JavaScript exceptions from the current page. Essential for detecting silent JS errors that don't appear in the accessibility tree. + +``` +Check the browser console for any JavaScript errors +``` + +Use `clear=True` to clear the console after reading, so subsequent calls only show new messages. + ### `browser_close` Close the browser session and release resources. Call this when done to free up Browserbase session quota. @@ -175,6 +185,17 @@ Agent workflow: 4. browser_close() ``` +## Session Recording + +Automatically record browser sessions as WebM video files: + +```yaml +browser: + record_sessions: true # default: false +``` + +When enabled, recording starts automatically on the first `browser_navigate` and saves to `~/.hermes/browser_recordings/` when the session closes. Works in both local and cloud (Browserbase) modes. Recordings older than 72 hours are automatically cleaned up. + ## Stealth Features Browserbase provides automatic stealth capabilities: diff --git a/website/docs/user-guide/features/tools.md b/website/docs/user-guide/features/tools.md index daf982fea9..e054adf14c 100644 --- a/website/docs/user-guide/features/tools.md +++ b/website/docs/user-guide/features/tools.md @@ -15,7 +15,7 @@ Tools are functions that extend the agent's capabilities. They're organized into | **Web** | `web_search`, `web_extract` | Search the web, extract page content | | **Terminal** | `terminal`, `process` | Execute commands (local/docker/singularity/modal/daytona/ssh backends), manage background processes | | **File** | `read_file`, `write_file`, `patch`, `search_files` | Read, write, edit, and search files | -| **Browser** | `browser_navigate`, `browser_click`, `browser_type`, etc. | Full browser automation via Browserbase | +| **Browser** | `browser_navigate`, `browser_click`, `browser_type`, `browser_console`, etc. | Full browser automation via Browserbase | | **Vision** | `vision_analyze` | Image analysis via multimodal models | | **Image Gen** | `image_generate` | Generate images (FLUX via FAL) | | **TTS** | `text_to_speech` | Text-to-speech (Edge TTS / ElevenLabs / OpenAI) | diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index 328196ce93..26d1d530bc 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -6,52 +6,255 @@ description: "Set up Hermes Agent as a Discord bot" # Discord Setup -Connect Hermes Agent to Discord to chat with it in DMs or server channels. +Hermes Agent integrates with Discord as a bot, letting you chat with your AI assistant through direct messages or server channels. The bot receives your messages, processes them through the Hermes Agent pipeline (including tool use, memory, and reasoning), and responds in real time. It supports text, voice messages, file attachments, and slash commands. -## Setup Steps +This guide walks you through the full setup process — from creating your bot on Discord's Developer Portal to sending your first message. -1. **Create a bot:** Go to the [Discord Developer Portal](https://discord.com/developers/applications) -2. **Enable intents:** Bot → Privileged Gateway Intents → enable **Message Content Intent** -3. **Get your user ID:** Enable Developer Mode in Discord settings, right-click your name → Copy ID -4. **Invite to your server:** OAuth2 → URL Generator → scopes: `bot`, `applications.commands` → permissions: Send Messages, Read Message History, Attach Files -5. **Configure:** Run `hermes gateway setup` and select Discord, or add to `~/.hermes/.env` manually: +## Step 1: Create a Discord Application -```bash -DISCORD_BOT_TOKEN=MTIz... -DISCORD_ALLOWED_USERS=YOUR_USER_ID +1. Go to the [Discord Developer Portal](https://discord.com/developers/applications) and sign in with your Discord account. +2. Click **New Application** in the top-right corner. +3. Enter a name for your application (e.g., "Hermes Agent") and accept the Developer Terms of Service. +4. Click **Create**. + +You'll land on the **General Information** page. Note the **Application ID** — you'll need it later to build the invite URL. + +## Step 2: Create the Bot + +1. In the left sidebar, click **Bot**. +2. Discord automatically creates a bot user for your application. You'll see the bot's username, which you can customize. +3. Under **Authorization Flow**: + - Set **Public Bot** to **OFF** — this prevents other people from inviting your bot to their servers. + - Leave **Require OAuth2 Code Grant** set to **OFF**. + +:::tip +You can set a custom avatar and banner for your bot on this page. This is what users will see in Discord. +::: + +## Step 3: Enable Privileged Gateway Intents + +This is the most critical step in the entire setup. Without the correct intents enabled, your bot will connect to Discord but **will not be able to read message content**. + +On the **Bot** page, scroll down to **Privileged Gateway Intents**. You'll see three toggles: + +| Intent | Purpose | Required? | +|--------|---------|-----------| +| **Presence Intent** | See user online/offline status | Optional | +| **Server Members Intent** | Access the member list | Optional | +| **Message Content Intent** | Read the text content of messages | **Required** | + +**Enable Message Content Intent** by toggling it **ON**. Without this, your bot receives message events but the message text is empty — the bot literally cannot see what you typed. + +:::warning[This is the #1 reason Discord bots don't work] +If your bot is online but never responds to messages, the **Message Content Intent** is almost certainly disabled. Go back to the [Developer Portal](https://discord.com/developers/applications), select your application → Bot → Privileged Gateway Intents, and make sure **Message Content Intent** is toggled ON. Click **Save Changes**. +::: + +**Regarding server count:** +- If your bot is in **fewer than 100 servers**, you can simply toggle intents on and off freely. +- If your bot is in **100 or more servers**, Discord requires you to submit a verification application to use privileged intents. For personal use, this is not a concern. + +Click **Save Changes** at the bottom of the page. + +## Step 4: Get the Bot Token + +The bot token is the credential Hermes Agent uses to log in as your bot. Still on the **Bot** page: + +1. Under the **Token** section, click **Reset Token**. +2. If you have two-factor authentication enabled on your Discord account, enter your 2FA code. +3. Discord will display your new token. **Copy it immediately.** + +:::warning[Token shown only once] +The token is only displayed once. If you lose it, you'll need to reset it and generate a new one. Never share your token publicly or commit it to Git — anyone with this token has full control of your bot. +::: + +Store the token somewhere safe (a password manager, for example). You'll need it in Step 8. + +## Step 5: Generate the Invite URL + +You need an OAuth2 URL to invite the bot to your server. There are two ways to do this: + +### Option A: Using the Installation Tab (Recommended) + +1. In the left sidebar, click **Installation**. +2. Under **Installation Contexts**, enable **Guild Install**. +3. For **Install Link**, select **Discord Provided Link**. +4. Under **Default Install Settings** for Guild Install: + - **Scopes**: select `bot` and `applications.commands` + - **Permissions**: select the permissions listed below. + +### Option B: Manual URL + +You can construct the invite URL directly using this format: + +``` +https://discord.com/oauth2/authorize?client_id=YOUR_APP_ID&scope=bot+applications.commands&permissions=274878286912 ``` -6. **Start the gateway:** +Replace `YOUR_APP_ID` with the Application ID from Step 1. + +### Required Permissions + +These are the minimum permissions your bot needs: + +- **View Channels** — see the channels it has access to +- **Send Messages** — respond to your messages +- **Embed Links** — format rich responses +- **Attach Files** — send images, audio, and file outputs +- **Read Message History** — maintain conversation context + +### Recommended Additional Permissions + +- **Send Messages in Threads** — respond in thread conversations +- **Add Reactions** — react to messages for acknowledgment + +### Permission Integers + +| Level | Permissions Integer | What's Included | +|-------|-------------------|-----------------| +| Minimal | `117760` | View Channels, Send Messages, Read Message History, Attach Files | +| Recommended | `274878286912` | All of the above plus Embed Links, Send Messages in Threads, Add Reactions | + +## Step 6: Invite to Your Server + +1. Open the invite URL in your browser (from the Installation tab or the manual URL you constructed). +2. In the **Add to Server** dropdown, select your server. +3. Click **Continue**, then **Authorize**. +4. Complete the CAPTCHA if prompted. + +:::info +You need the **Manage Server** permission on the Discord server to invite a bot. If you don't see your server in the dropdown, ask a server admin to use the invite link instead. +::: + +After authorizing, the bot will appear in your server's member list (it will show as offline until you start the Hermes gateway). + +## Step 7: Find Your Discord User ID + +Hermes Agent uses your Discord User ID to control who can interact with the bot. To find it: + +1. Open Discord (desktop or web app). +2. Go to **Settings** → **Advanced** → toggle **Developer Mode** to **ON**. +3. Close settings. +4. Right-click your own username (in a message, the member list, or your profile) → **Copy User ID**. + +Your User ID is a long number like `284102345871466496`. + +:::tip +Developer Mode also lets you copy **Channel IDs** and **Server IDs** the same way — right-click the channel or server name and select Copy ID. You'll need a Channel ID if you want to set a home channel manually. +::: + +## Step 8: Configure Hermes Agent + +### Option A: Interactive Setup (Recommended) + +Run the guided setup command: + +```bash +hermes gateway setup +``` + +Select **Discord** when prompted, then paste your bot token and user ID when asked. + +### Option B: Manual Configuration + +Add the following to your `~/.hermes/.env` file: + +```bash +# Required +DISCORD_BOT_TOKEN=your-bot-token-from-developer-portal +DISCORD_ALLOWED_USERS=284102345871466496 + +# Multiple allowed users (comma-separated) +# DISCORD_ALLOWED_USERS=284102345871466496,198765432109876543 +``` + +### Start the Gateway + +Once configured, start the Discord gateway: ```bash hermes gateway ``` -## Optional: Home Channel +The bot should come online in Discord within a few seconds. Send it a message — either a DM or in a channel it can see — to test. -Set a default channel for cron job delivery: +:::tip +You can run `hermes gateway` in the background or as a systemd service for persistent operation. See the deployment docs for details. +::: + +## Home Channel + +You can designate a "home channel" where the bot sends proactive messages (such as cron job output, reminders, and notifications). There are two ways to set it: + +### Using the Slash Command + +Type `/sethome` in any Discord channel where the bot is present. That channel becomes the home channel. + +### Manual Configuration + +Add these to your `~/.hermes/.env`: ```bash DISCORD_HOME_CHANNEL=123456789012345678 DISCORD_HOME_CHANNEL_NAME="#bot-updates" ``` -Or use `/sethome` in any Discord channel. +Replace the ID with the actual channel ID (right-click → Copy Channel ID with Developer Mode on). -## Required Bot Permissions +## Bot Behavior -When generating the invite URL, make sure to include: - -- **Send Messages** — bot needs to reply -- **Read Message History** — for context -- **Attach Files** — for audio, images, and file outputs +- **Server channels**: The bot responds to all messages from allowed users in channels it can access. It does **not** require a mention or prefix — any message from an allowed user is treated as a prompt. +- **Direct messages**: DMs always work, even without the Message Content Intent enabled (Discord exempts DMs from this requirement). However, you should still enable the intent for server channel support. +- **Conversations**: Each channel or DM maintains its own conversation context. ## Voice Messages -Voice messages on Discord are automatically transcribed (requires `VOICE_TOOLS_OPENAI_KEY`). TTS audio is sent as MP3 file attachments. +Hermes Agent supports Discord voice messages: + +- **Incoming voice messages** are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY` to be set in your environment). +- **Text-to-speech**: When TTS is enabled, the bot can send spoken responses as MP3 file attachments. + +## Troubleshooting + +### Bot is online but not responding to messages + +**Cause**: Message Content Intent is disabled. + +**Fix**: Go to [Developer Portal](https://discord.com/developers/applications) → your app → Bot → Privileged Gateway Intents → enable **Message Content Intent** → Save Changes. Restart the gateway. + +### "Disallowed Intents" error on startup + +**Cause**: Your code requests intents that aren't enabled in the Developer Portal. + +**Fix**: Enable all three Privileged Gateway Intents (Presence, Server Members, Message Content) in the Bot settings, then restart. + +### Bot can't see messages in a specific channel + +**Cause**: The bot's role doesn't have permission to view that channel. + +**Fix**: In Discord, go to the channel's settings → Permissions → add the bot's role with **View Channel** and **Read Message History** enabled. + +### 403 Forbidden errors + +**Cause**: The bot is missing required permissions. + +**Fix**: Re-invite the bot with the correct permissions using the URL from Step 5, or manually adjust the bot's role permissions in Server Settings → Roles. + +### Bot is offline + +**Cause**: The Hermes gateway isn't running, or the token is incorrect. + +**Fix**: Check that `hermes gateway` is running. Verify `DISCORD_BOT_TOKEN` in your `.env` file. If you recently reset the token, update it. + +### "User not allowed" / Bot ignores you + +**Cause**: Your User ID isn't in `DISCORD_ALLOWED_USERS`. + +**Fix**: Add your User ID to `DISCORD_ALLOWED_USERS` in `~/.hermes/.env` and restart the gateway. ## Security :::warning -Always set `DISCORD_ALLOWED_USERS` to restrict who can use the bot. Without it, the gateway denies all users by default. +Always set `DISCORD_ALLOWED_USERS` to restrict who can interact with the bot. Without it, the gateway denies all users by default as a safety measure. Only add User IDs of people you trust — authorized users have full access to the agent's capabilities, including tool use and system access. ::: + +For more information on securing your Hermes Agent deployment, see the [Security Guide](../security.md). diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index f93275c865..913f2fdc50 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -1,12 +1,12 @@ --- sidebar_position: 1 title: "Messaging Gateway" -description: "Chat with Hermes from Telegram, Discord, Slack, or WhatsApp — architecture and setup overview" +description: "Chat with Hermes from Telegram, Discord, Slack, WhatsApp, or Signal — architecture and setup overview" --- # Messaging Gateway -Chat with Hermes from Telegram, Discord, Slack, or WhatsApp. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages. +Chat with Hermes from Telegram, Discord, Slack, WhatsApp, or Signal. The gateway is a single background process that connects to all your configured platforms, handles sessions, runs cron jobs, and delivers voice messages. ## Architecture @@ -15,12 +15,12 @@ Chat with Hermes from Telegram, Discord, Slack, or WhatsApp. The gateway is a si │ Hermes Gateway │ ├─────────────────────────────────────────────────────────────────┤ │ │ -│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ -│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │ -│ │ Adapter │ │ Adapter │ │ Adapter │ │ Adapter │ │ -│ └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘ │ -│ │ │ │ │ │ -│ └─────────────┼────────────┼─────────────┘ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌────────┐ │ +│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │ Signal │ │ +│ │ Adapter │ │ Adapter │ │ Adapter │ │ Adapter │ │ Adapter│ │ +│ └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘ └───┬────┘ │ +│ │ │ │ │ │ │ +│ └─────────────┼────────────┼─────────────┼───────────┘ │ │ │ │ │ ┌────────▼────────┐ │ │ │ Session Store │ │ @@ -114,6 +114,7 @@ Configure per-platform overrides in `~/.hermes/gateway.json`: # Restrict to specific users (recommended): TELEGRAM_ALLOWED_USERS=123456789,987654321 DISCORD_ALLOWED_USERS=123456789012345678 +SIGNAL_ALLOWED_USERS=+15551234567,+15559876543 # Or allow specific users across all platforms (comma-separated user IDs): GATEWAY_ALLOWED_USERS=123456789,987654321 @@ -200,6 +201,7 @@ Each platform has its own toolset: | Discord | `hermes-discord` | Full tools including terminal | | WhatsApp | `hermes-whatsapp` | Full tools including terminal | | Slack | `hermes-slack` | Full tools including terminal | +| Signal | `hermes-signal` | Full tools including terminal | ## Next Steps @@ -207,3 +209,4 @@ Each platform has its own toolset: - [Discord Setup](discord.md) - [Slack Setup](slack.md) - [WhatsApp Setup](whatsapp.md) +- [Signal Setup](signal.md) diff --git a/website/docs/user-guide/messaging/signal.md b/website/docs/user-guide/messaging/signal.md new file mode 100644 index 0000000000..dae1e6eeef --- /dev/null +++ b/website/docs/user-guide/messaging/signal.md @@ -0,0 +1,223 @@ +--- +sidebar_position: 6 +title: "Signal" +description: "Set up Hermes Agent as a Signal messenger bot via signal-cli daemon" +--- + +# Signal Setup + +Hermes connects to Signal through the [signal-cli](https://github.com/AsamK/signal-cli) daemon running in HTTP mode. The adapter streams messages in real-time via SSE (Server-Sent Events) and sends responses via JSON-RPC. + +Signal is the most privacy-focused mainstream messenger — end-to-end encrypted by default, open-source protocol, minimal metadata collection. This makes it ideal for security-sensitive agent workflows. + +:::info No New Python Dependencies +The Signal adapter uses `httpx` (already a core Hermes dependency) for all communication. No additional Python packages are required. You just need signal-cli installed externally. +::: + +--- + +## Prerequisites + +- **signal-cli** — Java-based Signal client ([GitHub](https://github.com/AsamK/signal-cli)) +- **Java 17+** runtime — required by signal-cli +- **A phone number** with Signal installed (for linking as a secondary device) + +### Installing signal-cli + +```bash +# Linux (Debian/Ubuntu) +sudo apt install signal-cli + +# macOS +brew install signal-cli + +# Manual install (any platform) +# Download from https://github.com/AsamK/signal-cli/releases +# Extract and add to PATH +``` + +### Alternative: Docker (signal-cli-rest-api) + +If you prefer Docker, use the [signal-cli-rest-api](https://github.com/bbernhard/signal-cli-rest-api) container: + +```bash +docker run -d --name signal-cli \ + -p 8080:8080 \ + -v $HOME/.local/share/signal-cli:/home/.local/share/signal-cli \ + -e MODE=json-rpc \ + bbernhard/signal-cli-rest-api +``` + +:::tip +Use `MODE=json-rpc` for best performance. The `normal` mode spawns a JVM per request and is much slower. +::: + +--- + +## Step 1: Link Your Signal Account + +Signal-cli works as a **linked device** — like WhatsApp Web, but for Signal. Your phone stays the primary device. + +```bash +# Generate a linking URI (displays a QR code or link) +signal-cli link -n "HermesAgent" +``` + +1. Open **Signal** on your phone +2. Go to **Settings → Linked Devices** +3. Tap **Link New Device** +4. Scan the QR code or enter the URI + +--- + +## Step 2: Start the signal-cli Daemon + +```bash +# Replace +1234567890 with your Signal phone number (E.164 format) +signal-cli --account +1234567890 daemon --http 127.0.0.1:8080 +``` + +:::tip +Keep this running in the background. You can use `systemd`, `tmux`, `screen`, or run it as a service. +::: + +Verify it's running: + +```bash +curl http://127.0.0.1:8080/api/v1/check +# Should return: {"versions":{"signal-cli":...}} +``` + +--- + +## Step 3: Configure Hermes + +The easiest way: + +```bash +hermes gateway setup +``` + +Select **Signal** from the platform menu. The wizard will: + +1. Check if signal-cli is installed +2. Prompt for the HTTP URL (default: `http://127.0.0.1:8080`) +3. Test connectivity to the daemon +4. Ask for your account phone number +5. Configure allowed users and access policies + +### Manual Configuration + +Add to `~/.hermes/.env`: + +```bash +# Required +SIGNAL_HTTP_URL=http://127.0.0.1:8080 +SIGNAL_ACCOUNT=+1234567890 + +# Security (recommended) +SIGNAL_ALLOWED_USERS=+1234567890,+0987654321 # Comma-separated E.164 numbers or UUIDs + +# Optional +SIGNAL_GROUP_ALLOWED_USERS=groupId1,groupId2 # Enable groups (omit to disable, * for all) +SIGNAL_HOME_CHANNEL=+1234567890 # Default delivery target for cron jobs +``` + +Then start the gateway: + +```bash +hermes gateway # Foreground +hermes gateway install # Install as a system service +``` + +--- + +## Access Control + +### DM Access + +DM access follows the same pattern as all other Hermes platforms: + +1. **`SIGNAL_ALLOWED_USERS` set** → only those users can message +2. **No allowlist set** → unknown users get a DM pairing code (approve via `hermes pairing approve signal CODE`) +3. **`SIGNAL_ALLOW_ALL_USERS=true`** → anyone can message (use with caution) + +### Group Access + +Group access is controlled by the `SIGNAL_GROUP_ALLOWED_USERS` env var: + +| Configuration | Behavior | +|---------------|----------| +| Not set (default) | All group messages are ignored. The bot only responds to DMs. | +| Set with group IDs | Only listed groups are monitored (e.g., `groupId1,groupId2`). | +| Set to `*` | The bot responds in any group it's a member of. | + +--- + +## Features + +### Attachments + +The adapter supports sending and receiving: + +- **Images** — PNG, JPEG, GIF, WebP (auto-detected via magic bytes) +- **Audio** — MP3, OGG, WAV, M4A (voice messages transcribed if Whisper is configured) +- **Documents** — PDF, ZIP, and other file types + +Attachment size limit: **100 MB**. + +### Typing Indicators + +The bot sends typing indicators while processing messages, refreshing every 8 seconds. + +### Phone Number Redaction + +All phone numbers are automatically redacted in logs: +- `+15551234567` → `+155****4567` +- This applies to both Hermes gateway logs and the global redaction system + +### Health Monitoring + +The adapter monitors the SSE connection and automatically reconnects if: +- The connection drops (with exponential backoff: 2s → 60s) +- No activity is detected for 120 seconds (pings signal-cli to verify) + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| **"Cannot reach signal-cli"** during setup | Ensure signal-cli daemon is running: `signal-cli --account +YOUR_NUMBER daemon --http 127.0.0.1:8080` | +| **Messages not received** | Check that `SIGNAL_ALLOWED_USERS` includes the sender's number in E.164 format (with `+` prefix) | +| **"signal-cli not found on PATH"** | Install signal-cli and ensure it's in your PATH, or use Docker | +| **Connection keeps dropping** | Check signal-cli logs for errors. Ensure Java 17+ is installed. | +| **Group messages ignored** | `SIGNAL_GROUP_POLICY` defaults to `disabled`. Set to `allowlist` or `open`. | +| **Bot responds to everyone** | Set `SIGNAL_DM_POLICY=pairing` or `allowlist` and configure `SIGNAL_ALLOWED_USERS` | +| **Duplicate messages** | Ensure only one signal-cli instance is listening on your phone number | + +--- + +## Security + +:::warning +**Always configure access controls.** The bot has terminal access by default. Without `SIGNAL_ALLOWED_USERS` or DM pairing, the gateway denies all incoming messages as a safety measure. +::: + +- Phone numbers are redacted in all log output +- Use `SIGNAL_DM_POLICY=pairing` (default) for safe onboarding of new users +- Keep groups disabled unless you specifically need group support +- Signal's end-to-end encryption protects message content in transit +- The signal-cli session data in `~/.local/share/signal-cli/` contains account credentials — protect it like a password + +--- + +## Environment Variables Reference + +| Variable | Required | Default | Description | +|----------|----------|---------|-------------| +| `SIGNAL_HTTP_URL` | Yes | — | signal-cli HTTP endpoint | +| `SIGNAL_ACCOUNT` | Yes | — | Bot phone number (E.164) | +| `SIGNAL_ALLOWED_USERS` | No | — | Comma-separated phone numbers/UUIDs | +| `SIGNAL_GROUP_ALLOWED_USERS` | No | — | Group IDs to monitor, or `*` for all (omit to disable groups) | +| `SIGNAL_HOME_CHANNEL` | No | — | Default delivery target for cron jobs | diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md index b008d56b34..52dde5f6a9 100644 --- a/website/docs/user-guide/messaging/slack.md +++ b/website/docs/user-guide/messaging/slack.md @@ -1,57 +1,214 @@ --- sidebar_position: 4 title: "Slack" -description: "Set up Hermes Agent as a Slack bot" +description: "Set up Hermes Agent as a Slack bot using Socket Mode" --- # Slack Setup -Connect Hermes Agent to Slack using Socket Mode for real-time communication. +Connect Hermes Agent to Slack as a bot using Socket Mode. Socket Mode uses WebSockets instead of +public HTTP endpoints, so your Hermes instance doesn't need to be publicly accessible — it works +behind firewalls, on your laptop, or on a private server. -## Setup Steps +:::warning Classic Slack Apps Deprecated +Classic Slack apps (using RTM API) were **fully deprecated in March 2025**. Hermes uses the modern +Bolt SDK with Socket Mode. If you have an old classic app, you must create a new one following +the steps below. +::: -1. **Create an app:** Go to [Slack API](https://api.slack.com/apps), create a new app -2. **Enable Socket Mode:** In app settings → Socket Mode → Enable -3. **Get tokens:** - - Bot Token (`xoxb-...`): OAuth & Permissions → Install to Workspace - - App Token (`xapp-...`): Basic Information → App-Level Tokens → Generate (with `connections:write` scope) -4. **Configure:** Run `hermes gateway setup` and select Slack, or add to `~/.hermes/.env` manually: +## Overview + +| Component | Value | +|-----------|-------| +| **Library** | `@slack/bolt` (Socket Mode) | +| **Connection** | WebSocket — no public URL required | +| **Auth tokens needed** | Bot Token (`xoxb-`) + App-Level Token (`xapp-`) | +| **User identification** | Slack Member IDs (e.g., `U01ABC2DEF3`) | + +--- + +## Step 1: Create a Slack App + +1. Go to [https://api.slack.com/apps](https://api.slack.com/apps) +2. Click **Create New App** +3. Choose **From scratch** +4. Enter an app name (e.g., "Hermes Agent") and select your workspace +5. Click **Create App** + +You'll land on the app's **Basic Information** page. + +--- + +## Step 2: Configure Bot Token Scopes + +Navigate to **Features → OAuth & Permissions** in the sidebar. Scroll to **Scopes → Bot Token Scopes** and add the following: + +| Scope | Purpose | +|-------|---------| +| `chat:write` | Send messages as the bot | +| `app_mentions:read` | Respond when @mentioned in channels | +| `channels:history` | Read messages in public channels the bot is in | +| `channels:read` | List and get info about public channels | +| `im:history` | Read direct message history | +| `im:read` | View basic DM info | +| `im:write` | Open and manage DMs | +| `users:read` | Look up user information | + +**Optional scopes:** + +| Scope | Purpose | +|-------|---------| +| `groups:history` | Read messages in private channels the bot is invited to | +| `files:write` | Upload files (audio, images) | + +--- + +## Step 3: Enable Socket Mode + +Socket Mode lets the bot connect via WebSocket instead of requiring a public URL. + +1. In the sidebar, go to **Settings → Socket Mode** +2. Toggle **Enable Socket Mode** to ON +3. You'll be prompted to create an **App-Level Token**: + - Name it something like `hermes-socket` (the name doesn't matter) + - Add the **`connections:write`** scope + - Click **Generate** +4. **Copy the token** — it starts with `xapp-`. This is your `SLACK_APP_TOKEN` + +:::tip +You can always find or regenerate app-level tokens under **Settings → Basic Information → App-Level Tokens**. +::: + +--- + +## Step 4: Subscribe to Events + +1. In the sidebar, go to **Features → Event Subscriptions** +2. Toggle **Enable Events** to ON +3. Expand **Subscribe to bot events** and add: + +| Event | Purpose | +|-------|---------| +| `app_mention` | Bot responds when @mentioned in any channel | +| `message.im` | Bot responds to direct messages | + +**Optional event:** + +| Event | Purpose | +|-------|---------| +| `message.channels` | Bot sees all messages in public channels it's added to | + +4. Click **Save Changes** at the bottom of the page + +--- + +## Step 5: Install App to Workspace + +1. In the sidebar, go to **Settings → Install App** +2. Click **Install to Workspace** +3. Review the permissions and click **Allow** +4. After authorization, you'll see a **Bot User OAuth Token** starting with `xoxb-` +5. **Copy this token** — this is your `SLACK_BOT_TOKEN` + +:::tip +If you change scopes later, you'll need to **reinstall the app** for the new scopes to take effect. +The Install App page will show a banner prompting you to do so. +::: + +--- + +## Step 6: Find User IDs for the Allowlist + +Hermes uses Slack **Member IDs** (not usernames or display names) for the allowlist. + +To find a Member ID: + +1. In Slack, click on the user's name or avatar +2. Click **View full profile** +3. Click the **⋮** (more) button +4. Select **Copy member ID** + +Member IDs look like `U01ABC2DEF3`. You need your own Member ID at minimum. + +--- + +## Step 7: Configure Hermes + +Add the following to your `~/.hermes/.env` file: ```bash -SLACK_BOT_TOKEN=xoxb-... -SLACK_APP_TOKEN=xapp-... -SLACK_ALLOWED_USERS=U01234ABCDE # Comma-separated Slack user IDs +# Required +SLACK_BOT_TOKEN=xoxb-your-bot-token-here +SLACK_APP_TOKEN=xapp-your-app-level-token-here +SLACK_ALLOWED_USERS=U01ABC2DEF3 # Comma-separated Member IDs + +# Optional +SLACK_HOME_CHANNEL=C01234567890 # Default channel for cron/scheduled messages ``` -5. **Start the gateway:** +Or run the interactive setup: ```bash -hermes gateway +hermes gateway setup # Select Slack when prompted ``` -## Optional: Home Channel +Then start the gateway: -Set a default channel for cron job delivery: +```bash +hermes gateway # Foreground +hermes gateway install # Install as a system service +``` + +--- + +## Home Channel + +Set `SLACK_HOME_CHANNEL` to a channel ID where Hermes will deliver scheduled messages, +cron job results, and other proactive notifications. To find a channel ID: + +1. Right-click the channel name in Slack +2. Click **View channel details** +3. Scroll to the bottom — the Channel ID is shown there ```bash SLACK_HOME_CHANNEL=C01234567890 ``` -## Required Bot Scopes +Make sure the bot has been **invited to the channel** (`/invite @Hermes Agent`). -Make sure your Slack app has these OAuth scopes: - -- `chat:write` — Send messages -- `channels:history` — Read channel messages -- `im:history` — Read DM messages -- `files:write` — Upload files (audio, images) +--- ## Voice Messages -Voice messages on Slack are automatically transcribed (requires `VOICE_TOOLS_OPENAI_KEY`). TTS audio is sent as file attachments. +Hermes supports voice on Slack: + +- **Incoming:** Voice/audio messages are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`) +- **Outgoing:** TTS responses are sent as audio file attachments + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| Bot doesn't respond to DMs | Verify `message.im` is in your event subscriptions and the app is reinstalled | +| Bot doesn't respond to @mentions | Verify `app_mention` is in your event subscriptions | +| "not_authed" or "invalid_auth" errors | Regenerate your Bot Token and App Token, update `.env` | +| Bot responds but can't post in a channel | Invite the bot to the channel with `/invite @Hermes Agent` | +| "missing_scope" error | Add the required scope in OAuth & Permissions, then **reinstall** the app | +| Socket disconnects frequently | Check your network; Bolt auto-reconnects but unstable connections cause lag | + +--- ## Security :::warning -Always set `SLACK_ALLOWED_USERS` to restrict who can use the bot. Without it, the gateway denies all users by default. +**Always set `SLACK_ALLOWED_USERS`** with the Member IDs of authorized users. Without this setting, +the gateway will **deny all messages** by default as a safety measure. Never share your bot tokens — +treat them like passwords. ::: + +- Tokens should be stored in `~/.hermes/.env` (file permissions `600`) +- Rotate tokens periodically via the Slack app settings +- Audit who has access to your Hermes config directory +- Socket Mode means no public endpoint is exposed — one less attack surface diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index b802ba86b9..123b813971 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -1,51 +1,144 @@ --- -sidebar_position: 2 +sidebar_position: 1 title: "Telegram" description: "Set up Hermes Agent as a Telegram bot" --- # Telegram Setup -Connect Hermes Agent to Telegram so you can chat from your phone, send voice memos, and receive scheduled task results. +Hermes Agent integrates with Telegram as a full-featured conversational bot. Once connected, you can chat with your agent from any device, send voice memos that get auto-transcribed, receive scheduled task results, and use the agent in group chats. The integration is built on [python-telegram-bot](https://python-telegram-bot.org/) and supports text, voice, images, and file attachments. -## Setup Steps +## Step 1: Create a Bot via BotFather -1. **Create a bot:** Message [@BotFather](https://t.me/BotFather) on Telegram, use `/newbot` -2. **Get your user ID:** Message [@userinfobot](https://t.me/userinfobot) — it replies with your numeric ID -3. **Configure:** Run `hermes gateway setup` and select Telegram, or add to `~/.hermes/.env` manually: +Every Telegram bot requires an API token issued by [@BotFather](https://t.me/BotFather), Telegram's official bot management tool. -```bash -TELEGRAM_BOT_TOKEN=123456:ABC-DEF... -TELEGRAM_ALLOWED_USERS=YOUR_USER_ID # Comma-separated for multiple users +1. Open Telegram and search for **@BotFather**, or visit [t.me/BotFather](https://t.me/BotFather) +2. Send `/newbot` +3. Choose a **display name** (e.g., "Hermes Agent") — this can be anything +4. Choose a **username** — this must be unique and end in `bot` (e.g., `my_hermes_bot`) +5. BotFather replies with your **API token**. It looks like this: + +``` +123456789:ABCdefGHIjklMNOpqrSTUvwxYZ ``` -4. **Start the gateway:** +:::warning +Keep your bot token secret. Anyone with this token can control your bot. If it leaks, revoke it immediately via `/revoke` in BotFather. +::: + +## Step 2: Customize Your Bot (Optional) + +These BotFather commands improve the user experience. Message @BotFather and use: + +| Command | Purpose | +|---------|---------| +| `/setdescription` | The "What can this bot do?" text shown before a user starts chatting | +| `/setabouttext` | Short text on the bot's profile page | +| `/setuserpic` | Upload an avatar for your bot | +| `/setcommands` | Define the command menu (the `/` button in chat) | +| `/setprivacy` | Control whether the bot sees all group messages (see Step 3) | + +:::tip +For `/setcommands`, a useful starting set: + +``` +help - Show help information +new - Start a new conversation +sethome - Set this chat as the home channel +``` +::: + +## Step 3: Privacy Mode (Critical for Groups) + +Telegram bots have a **privacy mode** that is **enabled by default**. This is the single most common source of confusion when using bots in groups. + +**With privacy mode ON**, your bot can only see: +- Messages that start with a `/` command +- Replies directly to the bot's own messages +- Service messages (member joins/leaves, pinned messages, etc.) +- Messages in channels where the bot is an admin + +**With privacy mode OFF**, the bot receives every message in the group. + +### How to disable privacy mode + +1. Message **@BotFather** +2. Send `/mybots` +3. Select your bot +4. Go to **Bot Settings → Group Privacy → Turn off** + +:::warning +**You must remove and re-add the bot to any group** after changing the privacy setting. Telegram caches the privacy state when a bot joins a group, and it will not update until the bot is removed and re-added. +::: + +:::tip +An alternative to disabling privacy mode: promote the bot to **group admin**. Admin bots always receive all messages regardless of the privacy setting, and this avoids needing to toggle the global privacy mode. +::: + +## Step 4: Find Your User ID + +Hermes Agent uses numeric Telegram user IDs to control access. Your user ID is **not** your username — it's a number like `123456789`. + +**Method 1 (recommended):** Message [@userinfobot](https://t.me/userinfobot) — it instantly replies with your user ID. + +**Method 2:** Message [@get_id_bot](https://t.me/get_id_bot) — another reliable option. + +Save this number; you'll need it for the next step. + +## Step 5: Configure Hermes + +### Option A: Interactive Setup (Recommended) + +```bash +hermes gateway setup +``` + +Select **Telegram** when prompted. The wizard asks for your bot token and allowed user IDs, then writes the configuration for you. + +### Option B: Manual Configuration + +Add the following to `~/.hermes/.env`: + +```bash +TELEGRAM_BOT_TOKEN=123456789:ABCdefGHIjklMNOpqrSTUvwxYZ +TELEGRAM_ALLOWED_USERS=123456789 # Comma-separated for multiple users +``` + +### Start the Gateway ```bash hermes gateway ``` -## Optional: Home Channel +The bot should come online within seconds. Send it a message on Telegram to verify. -Set a home channel for cron job delivery: +## Home Channel + +Use the `/sethome` command in any Telegram chat (DM or group) to designate it as the **home channel**. Scheduled tasks (cron jobs) deliver their results to this channel. + +You can also set it manually in `~/.hermes/.env`: ```bash TELEGRAM_HOME_CHANNEL=-1001234567890 TELEGRAM_HOME_CHANNEL_NAME="My Notes" ``` -Or use the `/sethome` command in any Telegram chat to set it dynamically. +:::tip +Group chat IDs are negative numbers (e.g., `-1001234567890`). Your personal DM chat ID is the same as your user ID. +::: ## Voice Messages -Voice messages sent on Telegram are automatically transcribed using OpenAI's Whisper API and injected as text into the conversation. Requires `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. +### Incoming Voice (Speech-to-Text) -### Voice Bubbles (TTS) +Voice messages you send on Telegram are automatically transcribed using OpenAI's Whisper API and injected as text into the conversation. This requires `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. -When the agent generates audio via text-to-speech, it's delivered as native Telegram voice bubbles (the round, inline-playable kind). +### Outgoing Voice (Text-to-Speech) + +When the agent generates audio via TTS, it's delivered as native Telegram **voice bubbles** — the round, inline-playable kind. - **OpenAI and ElevenLabs** produce Opus natively — no extra setup needed -- **Edge TTS** (the default free provider) outputs MP3 and needs **ffmpeg** to convert to Opus: +- **Edge TTS** (the default free provider) outputs MP3 and requires **ffmpeg** to convert to Opus: ```bash # Ubuntu/Debian @@ -55,7 +148,34 @@ sudo apt install ffmpeg brew install ffmpeg ``` -Without ffmpeg, Edge TTS audio is sent as a regular audio file (still playable, but rectangular player instead of voice bubble). +Without ffmpeg, Edge TTS audio is sent as a regular audio file (still playable, but uses the rectangular player instead of a voice bubble). + +Configure the TTS provider in your `config.yaml` under the `tts.provider` key. + +## Group Chat Usage + +Hermes Agent works in Telegram group chats with a few considerations: + +- **Privacy mode** determines what messages the bot can see (see [Step 3](#step-3-privacy-mode-critical-for-groups)) +- When privacy mode is on, **@mention the bot** (e.g., `@my_hermes_bot what's the weather?`) or **reply to its messages** to interact +- When privacy mode is off (or bot is admin), the bot sees all messages and can participate naturally +- `TELEGRAM_ALLOWED_USERS` still applies — only authorized users can trigger the bot, even in groups + +## Recent Bot API Features (2024–2025) + +- **Privacy policy:** Telegram now requires bots to have a privacy policy. Set one via BotFather with `/setprivacy_policy`, or Telegram may auto-generate a placeholder. This is particularly important if your bot is public-facing. +- **Message streaming:** Bot API 9.x added support for streaming long responses, which can improve perceived latency for lengthy agent replies. + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| Bot not responding at all | Verify `TELEGRAM_BOT_TOKEN` is correct. Check `hermes gateway` logs for errors. | +| Bot responds with "unauthorized" | Your user ID is not in `TELEGRAM_ALLOWED_USERS`. Double-check with @userinfobot. | +| Bot ignores group messages | Privacy mode is likely on. Disable it (Step 3) or make the bot a group admin. **Remember to remove and re-add the bot after changing privacy.** | +| Voice messages not transcribed | Check that `VOICE_TOOLS_OPENAI_KEY` is set and valid in `~/.hermes/.env`. | +| Voice replies are files, not bubbles | Install `ffmpeg` (needed for Edge TTS Opus conversion). | +| Bot token revoked/invalid | Generate a new token via `/revoke` then `/newbot` or `/token` in BotFather. Update your `.env` file. | ## Exec Approval @@ -68,7 +188,9 @@ Reply "yes"/"y" to approve or "no"/"n" to deny. ## Security :::warning -Always set `TELEGRAM_ALLOWED_USERS` to restrict who can use the bot. Without it, the gateway denies all users by default. +Always set `TELEGRAM_ALLOWED_USERS` to restrict who can interact with your bot. Without it, the gateway denies all users by default as a safety measure. ::: -You can also use [DM pairing](/user-guide/messaging#dm-pairing-alternative-to-allowlists) for a more dynamic approach. +Never share your bot token publicly. If compromised, revoke it immediately via BotFather's `/revoke` command. + +For more details, see the [Security documentation](/user-guide/security). You can also use [DM pairing](/user-guide/messaging#dm-pairing-alternative-to-allowlists) for a more dynamic approach to user authorization. diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md index 4879db1cc8..22285eb63a 100644 --- a/website/docs/user-guide/messaging/whatsapp.md +++ b/website/docs/user-guide/messaging/whatsapp.md @@ -6,16 +6,57 @@ description: "Set up Hermes Agent as a WhatsApp bot via the built-in Baileys bri # WhatsApp Setup -WhatsApp doesn't have a simple bot API like Telegram or Discord. Hermes includes a built-in bridge using [Baileys](https://github.com/WhiskeySockets/Baileys) that connects via WhatsApp Web. +Hermes connects to WhatsApp through a built-in bridge using [whatsapp-web.js](https://github.com/pedroslopez/whatsapp-web.js) +(Baileys-based). This works by emulating a WhatsApp Web session — **not** through the official +WhatsApp Business API. No Meta developer account or Business verification is required. + +:::warning Unofficial API — Ban Risk +WhatsApp does **not** officially support third-party bots outside the Business API. Using +whatsapp-web.js carries a small risk of account restrictions. To minimize risk: +- **Use a dedicated phone number** for the bot (not your personal number) +- **Don't send bulk/spam messages** — keep usage conversational +- **Don't automate outbound messaging** to people who haven't messaged first +::: + +:::warning WhatsApp Web Protocol Updates +WhatsApp periodically updates their Web protocol, which can temporarily break compatibility +with whatsapp-web.js. When this happens, Hermes will update the bridge dependency. If the +bot stops working after a WhatsApp update, pull the latest Hermes version and re-pair. +::: ## Two Modes | Mode | How it works | Best for | |------|-------------|----------| -| **Separate bot number** (recommended) | Dedicate a phone number to the bot. People message that number directly. | Clean UX, multiple users | -| **Personal self-chat** | Use your own WhatsApp. You message yourself to talk to the agent. | Quick setup, single user | +| **Separate bot number** (recommended) | Dedicate a phone number to the bot. People message that number directly. | Clean UX, multiple users, lower ban risk | +| **Personal self-chat** | Use your own WhatsApp. You message yourself to talk to the agent. | Quick setup, single user, testing | -## Setup +--- + +## Prerequisites + +- **Node.js v18+** and **npm** — the WhatsApp bridge runs as a Node.js process +- **A phone with WhatsApp** installed (for scanning the QR code) + +**On Linux headless servers**, you also need Chromium/Puppeteer dependencies: + +```bash +# Debian / Ubuntu +sudo apt-get install -y \ + libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \ + libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \ + libpango-1.0-0 libcairo2 libasound2 libxshmfence1 + +# Fedora / RHEL +sudo dnf install -y \ + nss atk at-spi2-atk cups-libs libdrm libxkbcommon \ + libXcomposite libXdamage libXrandr mesa-libgbm \ + pango cairo alsa-lib +``` + +--- + +## Step 1: Run the Setup Wizard ```bash hermes whatsapp @@ -23,55 +64,130 @@ hermes whatsapp The wizard will: -1. Ask which mode you want -2. For **bot mode**: guide you through getting a second number -3. Configure the allowlist -4. Install bridge dependencies (Node.js required) -5. Display a QR code — scan from WhatsApp → Settings → Linked Devices → Link a Device -6. Exit once paired +1. Ask which mode you want (**bot** or **self-chat**) +2. Install bridge dependencies if needed +3. Display a **QR code** in your terminal +4. Wait for you to scan it -## Getting a Second Number (Bot Mode) +**To scan the QR code:** + +1. Open WhatsApp on your phone +2. Go to **Settings → Linked Devices** +3. Tap **Link a Device** +4. Point your camera at the terminal QR code + +Once paired, the wizard confirms the connection and exits. Your session is saved automatically. + +:::tip +If the QR code looks garbled, make sure your terminal is at least 60 columns wide and supports +Unicode. You can also try a different terminal emulator. +::: + +--- + +## Step 2: Getting a Second Phone Number (Bot Mode) + +For bot mode, you need a phone number that isn't already registered with WhatsApp. Three options: | Option | Cost | Notes | |--------|------|-------| -| WhatsApp Business app + dual-SIM | Free (if you have dual-SIM) | Install alongside personal WhatsApp, no second phone needed | -| Google Voice | Free (US only) | voice.google.com, verify WhatsApp via the Google Voice app | -| Prepaid SIM | $3-10/month | Any carrier; verify once, phone can go in a drawer on WiFi | +| **Google Voice** | Free | US only. Get a number at [voice.google.com](https://voice.google.com). Verify WhatsApp via SMS through the Google Voice app. | +| **Prepaid SIM** | $5–15 one-time | Any carrier. Activate, verify WhatsApp, then the SIM can sit in a drawer. Number must stay active (make a call every 90 days). | +| **VoIP services** | Free–$5/month | TextNow, TextFree, or similar. Some VoIP numbers are blocked by WhatsApp — try a few if the first doesn't work. | -## Starting the Gateway +After getting the number: + +1. Install WhatsApp on a phone (or use WhatsApp Business app with dual-SIM) +2. Register the new number with WhatsApp +3. Run `hermes whatsapp` and scan the QR code from that WhatsApp account + +--- + +## Step 3: Configure Hermes + +Add the following to your `~/.hermes/.env` file: ```bash -hermes gateway # Foreground -hermes gateway install # Or install as a system service +# Required +WHATSAPP_ENABLED=true +WHATSAPP_MODE=bot # "bot" or "self-chat" +WHATSAPP_ALLOWED_USERS=15551234567 # Comma-separated phone numbers (with country code, no +) + +# Optional +WHATSAPP_HOME_CONTACT=15551234567 # Default contact for proactive/scheduled messages +``` + +Then start the gateway: + +```bash +hermes gateway # Foreground +hermes gateway install # Install as a system service ``` The gateway starts the WhatsApp bridge automatically using the saved session. -## Environment Variables +--- + +## Session Persistence + +The whatsapp-web.js `LocalAuth` strategy saves your session to the `.wwebjs_auth` folder inside +your Hermes data directory (`~/.hermes/`). This means: + +- **Sessions survive restarts** — you don't need to re-scan the QR code every time +- The session data includes encryption keys and device credentials +- **Do not share or commit the `.wwebjs_auth` folder** — it grants full access to the WhatsApp account + +--- + +## Re-pairing + +If the session breaks (phone reset, WhatsApp update, manually unlinked), you'll see connection +errors in the gateway logs. To fix it: ```bash -WHATSAPP_ENABLED=true -WHATSAPP_MODE=bot # "bot" or "self-chat" -WHATSAPP_ALLOWED_USERS=15551234567 # Comma-separated phone numbers with country code +hermes whatsapp ``` -## Important Notes +This generates a fresh QR code. Scan it again and the session is re-established. The gateway +handles **temporary** disconnections (network blips, phone going offline briefly) automatically +with reconnection logic. -- Agent responses are prefixed with "⚕ **Hermes Agent**" for easy identification -- WhatsApp Web sessions can disconnect if WhatsApp updates their protocol -- The gateway reconnects automatically -- If you see persistent failures, re-pair with `hermes whatsapp` - -:::info Re-pairing -If WhatsApp Web sessions disconnect (protocol updates, phone reset), re-pair with `hermes whatsapp`. The gateway handles temporary disconnections automatically. -::: +--- ## Voice Messages -Voice messages sent on WhatsApp are automatically transcribed (requires `VOICE_TOOLS_OPENAI_KEY`). TTS audio is sent as MP3 file attachments. +Hermes supports voice on WhatsApp: + +- **Incoming:** Voice messages (`.ogg` opus) are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`) +- **Outgoing:** TTS responses are sent as MP3 audio file attachments +- Agent responses are prefixed with "⚕ **Hermes Agent**" for easy identification + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| **QR code not scanning** | Ensure terminal is wide enough (60+ columns). Try a different terminal. Make sure you're scanning from the correct WhatsApp account (bot number, not personal). | +| **QR code expires** | QR codes refresh every ~20 seconds. If it times out, restart `hermes whatsapp`. | +| **Session not persisting** | Check that `~/.hermes/.wwebjs_auth/` exists and is writable. On Docker, mount this as a volume. | +| **Logged out unexpectedly** | WhatsApp unlinks devices after ~14 days of phone inactivity. Keep the phone on and connected to WiFi. Re-pair with `hermes whatsapp`. | +| **"Execution context was destroyed"** | Chromium crashed. Install the Puppeteer dependencies listed in Prerequisites. On low-RAM servers, add swap space. | +| **Bot stops working after WhatsApp update** | Update Hermes to get the latest bridge version, then re-pair. | +| **Messages not being received** | Verify `WHATSAPP_ALLOWED_USERS` includes the sender's number (with country code, no `+` or spaces). | + +--- ## Security :::warning -Always set `WHATSAPP_ALLOWED_USERS` with phone numbers (including country code) to restrict who can use the bot. +**Always set `WHATSAPP_ALLOWED_USERS`** with phone numbers (including country code, without the `+`) +of authorized users. Without this setting, the gateway will **deny all incoming messages** as a +safety measure. ::: + +- The `.wwebjs_auth` folder contains full session credentials — protect it like a password +- Set file permissions: `chmod 700 ~/.hermes/.wwebjs_auth` +- Use a **dedicated phone number** for the bot to isolate risk from your personal account +- If you suspect compromise, unlink the device from WhatsApp → Settings → Linked Devices +- Phone numbers in logs are partially redacted, but review your log retention policy diff --git a/website/docs/user-guide/sessions.md b/website/docs/user-guide/sessions.md index 92f6e1218f..f468e632c2 100644 --- a/website/docs/user-guide/sessions.md +++ b/website/docs/user-guide/sessions.md @@ -17,6 +17,7 @@ Every conversation — whether from the CLI, Telegram, Discord, WhatsApp, or Sla The SQLite database stores: - Session ID, source platform, user ID +- **Session title** (unique, human-readable name) - Model name and configuration - System prompt snapshot - Full message history (role, content, tool calls, tool results) @@ -54,6 +55,19 @@ hermes chat -c This looks up the most recent `cli` session from the SQLite database and loads its full conversation history. +### Resume by Name + +If you've given a session a title (see [Session Naming](#session-naming) below), you can resume it by name: + +```bash +# Resume a named session +hermes -c "my project" + +# If there are lineage variants (my project, my project #2, my project #3), +# this automatically resumes the most recent one +hermes -c "my project" # → resumes "my project #3" +``` + ### Resume Specific Session ```bash @@ -61,16 +75,92 @@ This looks up the most recent `cli` session from the SQLite database and loads i hermes --resume 20250305_091523_a1b2c3d4 hermes -r 20250305_091523_a1b2c3d4 +# Resume by title +hermes --resume "refactoring auth" + # Or with the chat subcommand hermes chat --resume 20250305_091523_a1b2c3d4 ``` Session IDs are shown when you exit a CLI session, and can be found with `hermes sessions list`. +### Conversation Recap on Resume + +When you resume a session, Hermes displays a compact recap of the previous conversation in a styled panel before the input prompt: + +```text +╭─────────────────────────── Previous Conversation ────────────────────────────╮ +│ ● You: What is Python? │ +│ ◆ Hermes: Python is a high-level programming language. │ +│ ● You: How do I install it? │ +│ ◆ Hermes: [3 tool calls: web_search, web_extract, terminal] │ +│ ◆ Hermes: You can download Python from python.org... │ +╰──────────────────────────────────────────────────────────────────────────────╯ +``` + +The recap: +- Shows **user messages** (gold `●`) and **assistant responses** (green `◆`) +- **Truncates** long messages (300 chars for user, 200 chars / 3 lines for assistant) +- **Collapses tool calls** to a count with tool names (e.g., `[3 tool calls: terminal, web_search]`) +- **Hides** system messages, tool results, and internal reasoning +- **Caps** at the last 10 exchanges with a "... N earlier messages ..." indicator +- Uses **dim styling** to distinguish from the active conversation + +To disable the recap and keep the minimal one-liner behavior, set in `~/.hermes/config.yaml`: + +```yaml +display: + resume_display: minimal # default: full +``` + :::tip -Session IDs follow the format `YYYYMMDD_HHMMSS_<8-char-hex>`, e.g. `20250305_091523_a1b2c3d4`. You only need to provide enough of the ID to be unique. +Session IDs follow the format `YYYYMMDD_HHMMSS_<8-char-hex>`, e.g. `20250305_091523_a1b2c3d4`. You can resume by ID or by title — both work with `-c` and `-r`. ::: +## Session Naming + +Give sessions human-readable titles so you can find and resume them easily. + +### Setting a Title + +Use the `/title` slash command inside any chat session (CLI or gateway): + +``` +/title my research project +``` + +The title is applied immediately. If the session hasn't been created in the database yet (e.g., you run `/title` before sending your first message), it's queued and applied once the session starts. + +You can also rename existing sessions from the command line: + +```bash +hermes sessions rename 20250305_091523_a1b2c3d4 "refactoring auth module" +``` + +### Title Rules + +- **Unique** — no two sessions can share the same title +- **Max 100 characters** — keeps listing output clean +- **Sanitized** — control characters, zero-width chars, and RTL overrides are stripped automatically +- **Normal Unicode is fine** — emoji, CJK, accented characters all work + +### Auto-Lineage on Compression + +When a session's context is compressed (manually via `/compress` or automatically), Hermes creates a new continuation session. If the original had a title, the new session automatically gets a numbered title: + +``` +"my project" → "my project #2" → "my project #3" +``` + +When you resume by name (`hermes -c "my project"`), it automatically picks the most recent session in the lineage. + +### /title in Messaging Platforms + +The `/title` command works in all gateway platforms (Telegram, Discord, Slack, WhatsApp): + +- `/title My Research` — set the session title +- `/title` — show the current title + ## Session Management Commands Hermes provides a full set of session management commands via `hermes sessions`: @@ -88,13 +178,23 @@ hermes sessions list --source telegram hermes sessions list --limit 50 ``` -Output format: +When sessions have titles, the output shows titles, previews, and relative timestamps: ``` -ID Source Model Messages Started +Title Preview Last Active ID ──────────────────────────────────────────────────────────────────────────────────────────────── -20250305_091523_a1b2c3d4 cli anthropic/claude-opus-4.6 24 2025-03-05 09:15 -20250304_143022_e5f6g7h8 telegram anthropic/claude-opus-4.6 12 2025-03-04 14:30 (ended) +refactoring auth Help me refactor the auth module please 2h ago 20250305_091523_a +my project #3 Can you check the test failures? yesterday 20250304_143022_e +— What's the weather in Las Vegas? 3d ago 20250303_101500_f +``` + +When no sessions have titles, a simpler format is used: + +``` +Preview Last Active Src ID +────────────────────────────────────────────────────────────────────────────────────── +Help me refactor the auth module please 2h ago cli 20250305_091523_a +What's the weather in Las Vegas? 3d ago tele 20250303_101500_f ``` ### Export Sessions @@ -122,6 +222,18 @@ hermes sessions delete 20250305_091523_a1b2c3d4 hermes sessions delete 20250305_091523_a1b2c3d4 --yes ``` +### Rename a Session + +```bash +# Set or change a session's title +hermes sessions rename 20250305_091523_a1b2c3d4 "debugging auth flow" + +# Multi-word titles don't need quotes in the CLI +hermes sessions rename 20250305_091523_a1b2c3d4 debugging auth flow +``` + +If the title is already in use by another session, an error is shown. + ### Prune Old Sessions ```bash @@ -233,7 +345,7 @@ The SQLite database uses WAL mode for concurrent readers and a single writer, wh Key tables in `state.db`: -- **sessions** — session metadata (id, source, user_id, model, timestamps, token counts) +- **sessions** — session metadata (id, source, user_id, model, title, timestamps, token counts). Titles have a unique index (NULL titles allowed, only non-NULL must be unique). - **messages** — full message history (role, content, tool_calls, tool_name, token_count) - **messages_fts** — FTS5 virtual table for full-text search across message content diff --git a/website/sidebars.ts b/website/sidebars.ts index 919647f144..6d767bb1e0 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -10,6 +10,18 @@ const sidebars: SidebarsConfig = { 'getting-started/quickstart', 'getting-started/installation', 'getting-started/updating', + 'getting-started/learning-path', + ], + }, + { + type: 'category', + label: 'Guides & Tutorials', + collapsed: false, + items: [ + 'guides/tips', + 'guides/daily-briefing-bot', + 'guides/team-telegram-assistant', + 'guides/python-library', ], }, { @@ -35,24 +47,48 @@ const sidebars: SidebarsConfig = { }, { type: 'category', - label: 'Features', + label: 'Core Features', items: [ 'user-guide/features/tools', 'user-guide/features/skills', 'user-guide/features/memory', 'user-guide/features/context-files', 'user-guide/features/personality', - 'user-guide/features/mcp', + ], + }, + { + type: 'category', + label: 'Automation', + items: [ 'user-guide/features/cron', - 'user-guide/features/hooks', 'user-guide/features/delegation', 'user-guide/features/code-execution', + 'user-guide/features/hooks', + ], + }, + { + type: 'category', + label: 'Web & Media', + items: [ 'user-guide/features/browser', - 'user-guide/features/image-generation', 'user-guide/features/vision', + 'user-guide/features/image-generation', 'user-guide/features/tts', - 'user-guide/features/provider-routing', + ], + }, + { + type: 'category', + label: 'Integrations', + items: [ + 'user-guide/features/mcp', 'user-guide/features/honcho', + 'user-guide/features/provider-routing', + ], + }, + { + type: 'category', + label: 'Advanced', + items: [ 'user-guide/features/batch-processing', 'user-guide/features/rl-training', ], @@ -76,6 +112,7 @@ const sidebars: SidebarsConfig = { items: [ 'reference/cli-commands', 'reference/environment-variables', + 'reference/faq', ], }, ],
A real terminal interfaceFull TUI with multiline editing, slash-command autocomplete, conversation history, interrupt-and-redirect, and streaming tool output.
Lives where you doTelegram, Discord, Slack, WhatsApp, and CLI — all from a single gateway process. Voice memo transcription, cross-platform conversation continuity.
Lives where you doTelegram, Discord, Slack, WhatsApp, Signal, and CLI — all from a single gateway process. Voice memo transcription, cross-platform conversation continuity.
A closed learning loopAgent-curated memory with periodic nudges. Autonomous skill creation after complex tasks. Skills self-improve during use. FTS5 session search with LLM summarization for cross-session recall. Honcho dialectic user modeling. Compatible with the agentskills.io open standard.
Scheduled automationsBuilt-in cron scheduler with delivery to any platform. Daily reports, nightly backups, weekly audits — all in natural language, running unattended.
Delegates and parallelizesSpawn isolated subagents for parallel workstreams. Write Python scripts that call tools via RPC, collapsing multi-step pipelines into zero-context-cost turns.