2025-08-09 09:52:25 -07:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Model Tools Module
|
|
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
Thin orchestration layer over the tool registry. Each tool file in tools/
|
|
|
|
|
self-registers its schema, handler, and metadata via tools.registry.register().
|
|
|
|
|
This module triggers discovery (by importing all tool modules), then provides
|
|
|
|
|
the public API that run_agent.py, cli.py, batch_runner.py, and the RL
|
|
|
|
|
environments consume.
|
|
|
|
|
|
|
|
|
|
Public API (signatures preserved from the original 2,400-line version):
|
|
|
|
|
get_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode) -> list
|
|
|
|
|
handle_function_call(function_name, function_args, task_id, user_task) -> str
|
|
|
|
|
TOOL_TO_TOOLSET_MAP: dict (for batch_runner.py)
|
|
|
|
|
TOOLSET_REQUIREMENTS: dict (for cli.py, doctor.py)
|
|
|
|
|
get_all_tool_names() -> list
|
|
|
|
|
get_toolset_for_tool(name) -> str
|
|
|
|
|
get_available_toolsets() -> dict
|
|
|
|
|
check_toolset_requirements() -> dict
|
|
|
|
|
check_tool_availability(quiet) -> tuple
|
2025-08-09 09:52:25 -07:00
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
import asyncio
|
2026-02-21 20:22:33 -08:00
|
|
|
import logging
|
2026-03-20 09:44:50 -07:00
|
|
|
import threading
|
2026-02-02 19:28:27 -08:00
|
|
|
from typing import Dict, Any, List, Optional, Tuple
|
2025-08-09 09:52:25 -07:00
|
|
|
|
2026-04-14 18:02:25 -05:00
|
|
|
from tools.registry import discover_builtin_tools, registry
|
2026-02-20 23:23:32 -08:00
|
|
|
from toolsets import resolve_toolset, validate_toolset
|
2025-08-09 09:52:25 -07:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
2026-02-02 19:28:27 -08:00
|
|
|
|
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
|
|
|
# =============================================================================
|
2026-02-21 20:22:33 -08:00
|
|
|
# Async Bridging (single source of truth -- used by registry.dispatch too)
|
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
|
|
|
# =============================================================================
|
|
|
|
|
|
2026-03-20 09:44:50 -07:00
|
|
|
_tool_loop = None # persistent loop for the main (CLI) thread
|
|
|
|
|
_tool_loop_lock = threading.Lock()
|
2026-03-20 15:41:06 -04:00
|
|
|
_worker_thread_local = threading.local() # per-worker-thread persistent loops
|
2026-03-20 09:44:50 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_tool_loop():
|
|
|
|
|
"""Return a long-lived event loop for running async tool handlers.
|
|
|
|
|
|
|
|
|
|
Using a persistent loop (instead of asyncio.run() which creates and
|
|
|
|
|
*closes* a fresh loop every time) prevents "Event loop is closed"
|
|
|
|
|
errors that occur when cached httpx/AsyncOpenAI clients attempt to
|
|
|
|
|
close their transport on a dead loop during garbage collection.
|
|
|
|
|
"""
|
|
|
|
|
global _tool_loop
|
|
|
|
|
with _tool_loop_lock:
|
|
|
|
|
if _tool_loop is None or _tool_loop.is_closed():
|
|
|
|
|
_tool_loop = asyncio.new_event_loop()
|
|
|
|
|
return _tool_loop
|
|
|
|
|
|
|
|
|
|
|
2026-03-20 15:41:06 -04:00
|
|
|
def _get_worker_loop():
|
|
|
|
|
"""Return a persistent event loop for the current worker thread.
|
|
|
|
|
|
|
|
|
|
Each worker thread (e.g., delegate_task's ThreadPoolExecutor threads)
|
|
|
|
|
gets its own long-lived loop stored in thread-local storage. This
|
|
|
|
|
prevents the "Event loop is closed" errors that occurred when
|
|
|
|
|
asyncio.run() was used per-call: asyncio.run() creates a loop, runs
|
|
|
|
|
the coroutine, then *closes* the loop — but cached httpx/AsyncOpenAI
|
|
|
|
|
clients remain bound to that now-dead loop and raise RuntimeError
|
|
|
|
|
during garbage collection or subsequent use.
|
|
|
|
|
|
|
|
|
|
By keeping the loop alive for the thread's lifetime, cached clients
|
|
|
|
|
stay valid and their cleanup runs on a live loop.
|
|
|
|
|
"""
|
|
|
|
|
loop = getattr(_worker_thread_local, 'loop', None)
|
|
|
|
|
if loop is None or loop.is_closed():
|
|
|
|
|
loop = asyncio.new_event_loop()
|
|
|
|
|
asyncio.set_event_loop(loop)
|
|
|
|
|
_worker_thread_local.loop = loop
|
|
|
|
|
return loop
|
|
|
|
|
|
|
|
|
|
|
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
|
|
|
def _run_async(coro):
|
|
|
|
|
"""Run an async coroutine from a sync context.
|
|
|
|
|
|
|
|
|
|
If the current thread already has a running event loop (e.g., inside
|
|
|
|
|
the gateway's async stack or Atropos's event loop), we spin up a
|
|
|
|
|
disposable thread so asyncio.run() can create its own loop without
|
|
|
|
|
conflicting.
|
|
|
|
|
|
2026-03-20 09:44:50 -07:00
|
|
|
For the common CLI path (no running loop), we use a persistent event
|
|
|
|
|
loop so that cached async clients (httpx / AsyncOpenAI) remain bound
|
|
|
|
|
to a live loop and don't trigger "Event loop is closed" on GC.
|
|
|
|
|
|
2026-03-20 15:41:06 -04:00
|
|
|
When called from a worker thread (parallel tool execution), we use a
|
|
|
|
|
per-thread persistent loop to avoid both contention with the main
|
|
|
|
|
thread's shared loop AND the "Event loop is closed" errors caused by
|
|
|
|
|
asyncio.run()'s create-and-destroy lifecycle.
|
2026-03-20 11:39:13 -07:00
|
|
|
|
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
|
|
|
This is the single source of truth for sync->async bridging in tool
|
|
|
|
|
handlers. The RL paths (agent_loop.py, tool_context.py) also provide
|
|
|
|
|
outer thread-pool wrapping as defense-in-depth, but each handler is
|
|
|
|
|
self-protecting via this function.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
loop = asyncio.get_running_loop()
|
|
|
|
|
except RuntimeError:
|
|
|
|
|
loop = None
|
|
|
|
|
|
|
|
|
|
if loop and loop.is_running():
|
2026-03-20 11:39:13 -07:00
|
|
|
# Inside an async context (gateway, RL env) — run in a fresh thread.
|
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
|
|
|
import concurrent.futures
|
2026-04-22 08:02:42 +03:00
|
|
|
pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
|
|
|
|
|
future = pool.submit(asyncio.run, coro)
|
|
|
|
|
try:
|
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
|
|
|
return future.result(timeout=300)
|
2026-04-22 08:02:42 +03:00
|
|
|
except concurrent.futures.TimeoutError:
|
|
|
|
|
future.cancel()
|
|
|
|
|
raise
|
|
|
|
|
finally:
|
|
|
|
|
pool.shutdown(wait=False, cancel_futures=True)
|
2026-03-20 09:44:50 -07:00
|
|
|
|
2026-03-20 15:41:06 -04:00
|
|
|
# If we're on a worker thread (e.g., parallel tool execution in
|
|
|
|
|
# delegate_task), use a per-thread persistent loop. This avoids
|
|
|
|
|
# contention with the main thread's shared loop while keeping cached
|
|
|
|
|
# httpx/AsyncOpenAI clients bound to a live loop for the thread's
|
|
|
|
|
# lifetime — preventing "Event loop is closed" on GC cleanup.
|
2026-03-20 11:39:13 -07:00
|
|
|
if threading.current_thread() is not threading.main_thread():
|
2026-03-20 15:41:06 -04:00
|
|
|
worker_loop = _get_worker_loop()
|
|
|
|
|
return worker_loop.run_until_complete(coro)
|
2026-03-20 11:39:13 -07:00
|
|
|
|
2026-03-20 09:44:50 -07:00
|
|
|
tool_loop = _get_tool_loop()
|
|
|
|
|
return tool_loop.run_until_complete(coro)
|
refactor: deduplicate toolsets, unify async bridging, fix approval race condition, harden security
- Replace 4 copy-pasted messaging platform toolsets with shared _HERMES_CORE_TOOLS list
- Consolidate 5 ad-hoc async-bridging patterns into single _run_async() in model_tools.py
- Removes deprecated get_event_loop()/set_event_loop() calls
- Makes all tool handlers self-protecting regardless of caller's event loop state
- RL handler refactored from if/elif chain to dispatch dict
- Fix exec approval race condition: replace module-level globals with thread-safe
per-session tools/approval.py (submit_pending, pop_pending, approve_session, is_approved)
- Session A approving "rm" no longer approves it for all other sessions
- Fix config deep merge: user overriding tts.elevenlabs.voice_id no longer clobbers
tts.elevenlabs.model_id; migration detection now recurses to arbitrary depth
- Gateway default-deny: unauthenticated users denied unless GATEWAY_ALLOW_ALL_USERS=true
- Add 10 dangerous command patterns: rm --recursive, bash -c, python -e, curl|bash,
xargs rm, find -delete
- Sanitize gateway error messages: users see generic message, full traceback goes to logs
2026-02-21 18:28:49 -08:00
|
|
|
|
|
|
|
|
|
2026-02-02 19:28:27 -08:00
|
|
|
# =============================================================================
|
2026-02-21 20:22:33 -08:00
|
|
|
# Tool Discovery (importing each module triggers its registry.register calls)
|
2026-02-02 19:28:27 -08:00
|
|
|
# =============================================================================
|
|
|
|
|
|
2026-04-14 18:02:25 -05:00
|
|
|
discover_builtin_tools()
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
feat: add MCP (Model Context Protocol) client support
Connect to external MCP servers via stdio transport, discover their tools
at startup, and register them into the hermes-agent tool registry.
- New tools/mcp_tool.py: config loading, server connection via background
event loop, tool handler factories, discovery, and graceful shutdown
- model_tools.py: trigger MCP discovery after built-in tool imports
- cli.py: call shutdown_mcp_servers in _run_cleanup
- pyproject.toml: add mcp>=1.2.0 as optional dependency
- 27 unit tests covering config, schema conversion, handlers, registration,
SDK interaction, toolset injection, graceful fallback, and shutdown
Config format (in ~/.hermes/config.yaml):
mcp_servers:
filesystem:
command: "npx"
args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"]
2026-03-02 21:03:14 +03:00
|
|
|
# MCP tool discovery (external MCP servers from config)
|
|
|
|
|
try:
|
|
|
|
|
from tools.mcp_tool import discover_mcp_tools
|
|
|
|
|
discover_mcp_tools()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("MCP tool discovery failed: %s", e)
|
|
|
|
|
|
feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
2026-03-16 07:17:36 -07:00
|
|
|
# Plugin tool discovery (user/project/pip plugins)
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.plugins import discover_plugins
|
|
|
|
|
discover_plugins()
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Plugin discovery failed: %s", e)
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# =============================================================================
|
|
|
|
|
# Backward-compat constants (built once after discovery)
|
|
|
|
|
# =============================================================================
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
TOOL_TO_TOOLSET_MAP: Dict[str, str] = registry.get_tool_to_toolset_map()
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
TOOLSET_REQUIREMENTS: Dict[str, dict] = registry.get_toolset_requirements()
|
2025-11-17 01:14:31 -05:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# Resolved tool names from the last get_tool_definitions() call.
|
|
|
|
|
# Used by code_execution_tool to know which tools are available in this session.
|
|
|
|
|
_last_resolved_tool_names: List[str] = []
|
2025-11-17 01:14:31 -05:00
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# =============================================================================
|
|
|
|
|
# Legacy toolset name mapping (old _tools-suffixed names -> tool name lists)
|
|
|
|
|
# =============================================================================
|
2025-08-09 09:52:25 -07:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
_LEGACY_TOOLSET_MAP = {
|
|
|
|
|
"web_tools": ["web_search", "web_extract"],
|
|
|
|
|
"terminal_tools": ["terminal"],
|
|
|
|
|
"vision_tools": ["vision_analyze"],
|
|
|
|
|
"moa_tools": ["mixture_of_agents"],
|
|
|
|
|
"image_tools": ["image_generate"],
|
|
|
|
|
"skills_tools": ["skills_list", "skill_view", "skill_manage"],
|
|
|
|
|
"browser_tools": [
|
|
|
|
|
"browser_navigate", "browser_snapshot", "browser_click",
|
|
|
|
|
"browser_type", "browser_scroll", "browser_back",
|
refactor: remove browser_close tool — auto-cleanup handles it (#5792)
* refactor: remove browser_close tool — auto-cleanup handles it
The browser_close tool was called in only 9% of browser sessions (13/144
navigations across 66 sessions), always redundantly — cleanup_browser()
already runs via _cleanup_task_resources() at conversation end, and the
background inactivity reaper catches anything else.
Removing it saves one tool schema slot in every browser-enabled API call.
Also fixes a latent bug: cleanup_browser() now handles Camofox sessions
too (previously only Browserbase). Camofox sessions were never auto-cleaned
per-task because they live in a separate dict from _active_sessions.
Files changed (13):
- tools/browser_tool.py: remove function, schema, registry entry; add
camofox cleanup to cleanup_browser()
- toolsets.py, model_tools.py, prompt_builder.py, display.py,
acp_adapter/tools.py: remove browser_close from all tool lists
- tests/: remove browser_close test, update toolset assertion
- docs/skills: remove all browser_close references
* fix: repeat browser_scroll 5x per call for meaningful page movement
Most backends scroll ~100px per call — barely visible on a typical
viewport. Repeating 5x gives ~500px (~half a viewport), making each
scroll tool call actually useful.
Backend-agnostic approach: works across all 7+ browser backends without
needing to configure each one's scroll amount individually. Breaks
early on error for the agent-browser path.
* feat: auto-return compact snapshot from browser_navigate
Every browser session starts with navigate → snapshot. Now navigate
returns the compact accessibility tree snapshot inline, saving one
tool call per browser task.
The snapshot captures the full page DOM (not viewport-limited), so
scroll position doesn't affect it. browser_snapshot remains available
for refreshing after interactions or getting full=true content.
Both Browserbase and Camofox paths auto-snapshot. If the snapshot
fails for any reason, navigation still succeeds — the snapshot is
a bonus, not a requirement.
Schema descriptions updated to guide models: navigate mentions it
returns a snapshot, snapshot mentions it's for refresh/full content.
* refactor: slim cronjob tool schema — consolidate model/provider, drop unused params
Session data (151 calls across 67 sessions) showed several schema
properties were never used by models. Consolidated and cleaned up:
Removed from schema (still work via backend/CLI):
- skill (singular): use skills array instead
- reason: pause-only, unnecessary
- include_disabled: now defaults to true
- base_url: extreme edge case, zero usage
- provider (standalone): merged into model object
Consolidated:
- model + provider → single 'model' object with {model, provider} fields.
If provider is omitted, the current main provider is pinned at creation
time so the job stays stable even if the user changes their default.
Kept:
- script: useful data collection feature
- skills array: standard interface for skill loading
Schema shrinks from 14 to 10 properties. All backend functionality
preserved — the Python function signature and handler lambda still
accept every parameter.
* fix: remove mixture_of_agents from core toolsets — opt-in only via hermes tools
MoA was in _HERMES_CORE_TOOLS and composite toolsets (hermes-cli,
hermes-messaging, safe), which meant it appeared in every session
for anyone with OPENROUTER_API_KEY set. The _DEFAULT_OFF_TOOLSETS
gate only works after running 'hermes tools' explicitly.
Now MoA only appears when a user explicitly enables it via
'hermes tools'. The moa toolset definition and check_fn remain
unchanged — it just needs to be opted into.
2026-04-07 03:28:44 -07:00
|
|
|
"browser_press", "browser_get_images",
|
2026-03-17 02:02:49 -07:00
|
|
|
"browser_vision", "browser_console"
|
2026-02-21 20:22:33 -08:00
|
|
|
],
|
2026-03-14 12:21:50 -07:00
|
|
|
"cronjob_tools": ["cronjob"],
|
2026-02-21 20:22:33 -08:00
|
|
|
"rl_tools": [
|
|
|
|
|
"rl_list_environments", "rl_select_environment",
|
|
|
|
|
"rl_get_current_config", "rl_edit_config",
|
|
|
|
|
"rl_start_training", "rl_check_status",
|
|
|
|
|
"rl_stop_training", "rl_get_results",
|
|
|
|
|
"rl_list_runs", "rl_test_inference"
|
|
|
|
|
],
|
|
|
|
|
"file_tools": ["read_file", "write_file", "patch", "search_files"],
|
|
|
|
|
"tts_tools": ["text_to_speech"],
|
2026-02-08 20:19:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# =============================================================================
|
|
|
|
|
# get_tool_definitions (the main schema provider)
|
|
|
|
|
# =============================================================================
|
2026-02-19 23:23:43 -08:00
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
def get_tool_definitions(
|
|
|
|
|
enabled_toolsets: List[str] = None,
|
2026-01-31 06:30:48 +00:00
|
|
|
disabled_toolsets: List[str] = None,
|
|
|
|
|
quiet_mode: bool = False,
|
2025-08-09 09:52:25 -07:00
|
|
|
) -> List[Dict[str, Any]]:
|
|
|
|
|
"""
|
2025-09-10 00:43:55 -07:00
|
|
|
Get tool definitions for model API calls with toolset-based filtering.
|
2026-02-21 20:22:33 -08:00
|
|
|
|
|
|
|
|
All tools must be part of a toolset to be accessible.
|
|
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
Args:
|
2026-02-21 20:22:33 -08:00
|
|
|
enabled_toolsets: Only include tools from these toolsets.
|
|
|
|
|
disabled_toolsets: Exclude tools from these toolsets (if enabled_toolsets is None).
|
|
|
|
|
quiet_mode: Suppress status prints.
|
|
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
Returns:
|
2026-02-21 20:22:33 -08:00
|
|
|
Filtered list of OpenAI-format tool definitions.
|
2025-08-09 09:52:25 -07:00
|
|
|
"""
|
2026-02-21 20:22:33 -08:00
|
|
|
# Determine which tool names the caller wants
|
|
|
|
|
tools_to_include: set = set()
|
2025-11-17 01:14:31 -05:00
|
|
|
|
2026-03-30 21:10:05 -07:00
|
|
|
if enabled_toolsets is not None:
|
2025-08-09 09:52:25 -07:00
|
|
|
for toolset_name in enabled_toolsets:
|
2025-09-10 00:43:55 -07:00
|
|
|
if validate_toolset(toolset_name):
|
2026-02-21 20:22:33 -08:00
|
|
|
resolved = resolve_toolset(toolset_name)
|
|
|
|
|
tools_to_include.update(resolved)
|
|
|
|
|
if not quiet_mode:
|
|
|
|
|
print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}")
|
|
|
|
|
elif toolset_name in _LEGACY_TOOLSET_MAP:
|
|
|
|
|
legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name]
|
|
|
|
|
tools_to_include.update(legacy_tools)
|
2026-02-02 23:46:41 -08:00
|
|
|
if not quiet_mode:
|
2026-02-21 20:22:33 -08:00
|
|
|
print(f"✅ Enabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
|
2025-08-09 09:52:25 -07:00
|
|
|
else:
|
2026-02-21 20:22:33 -08:00
|
|
|
if not quiet_mode:
|
|
|
|
|
print(f"⚠️ Unknown toolset: {toolset_name}")
|
|
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
elif disabled_toolsets:
|
2025-09-10 00:43:55 -07:00
|
|
|
from toolsets import get_all_toolsets
|
2026-02-21 20:22:33 -08:00
|
|
|
for ts_name in get_all_toolsets():
|
|
|
|
|
tools_to_include.update(resolve_toolset(ts_name))
|
|
|
|
|
|
2025-09-10 00:43:55 -07:00
|
|
|
for toolset_name in disabled_toolsets:
|
|
|
|
|
if validate_toolset(toolset_name):
|
2026-02-21 20:22:33 -08:00
|
|
|
resolved = resolve_toolset(toolset_name)
|
|
|
|
|
tools_to_include.difference_update(resolved)
|
|
|
|
|
if not quiet_mode:
|
|
|
|
|
print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}")
|
|
|
|
|
elif toolset_name in _LEGACY_TOOLSET_MAP:
|
|
|
|
|
legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name]
|
|
|
|
|
tools_to_include.difference_update(legacy_tools)
|
2026-02-02 23:46:41 -08:00
|
|
|
if not quiet_mode:
|
2026-02-21 20:22:33 -08:00
|
|
|
print(f"🚫 Disabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
|
2025-09-10 00:43:55 -07:00
|
|
|
else:
|
2026-02-21 20:22:33 -08:00
|
|
|
if not quiet_mode:
|
|
|
|
|
print(f"⚠️ Unknown toolset: {toolset_name}")
|
2025-09-10 00:43:55 -07:00
|
|
|
else:
|
|
|
|
|
from toolsets import get_all_toolsets
|
2026-02-21 20:22:33 -08:00
|
|
|
for ts_name in get_all_toolsets():
|
|
|
|
|
tools_to_include.update(resolve_toolset(ts_name))
|
|
|
|
|
|
2026-03-22 04:55:34 -07:00
|
|
|
# Plugin-registered tools are now resolved through the normal toolset
|
|
|
|
|
# path — validate_toolset() / resolve_toolset() / get_all_toolsets()
|
|
|
|
|
# all check the tool registry for plugin-provided toolsets. No bypass
|
|
|
|
|
# needed; plugins respect enabled_toolsets / disabled_toolsets like any
|
|
|
|
|
# other toolset.
|
feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
2026-03-16 07:17:36 -07:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# Ask the registry for schemas (only returns tools whose check_fn passes)
|
|
|
|
|
filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode)
|
|
|
|
|
|
2026-03-19 10:08:14 -07:00
|
|
|
# The set of tool names that actually passed check_fn filtering.
|
|
|
|
|
# Use this (not tools_to_include) for any downstream schema that references
|
|
|
|
|
# other tools by name — otherwise the model sees tools mentioned in
|
|
|
|
|
# descriptions that don't actually exist, and hallucinates calls to them.
|
|
|
|
|
available_tool_names = {t["function"]["name"] for t in filtered_tools}
|
|
|
|
|
|
2026-03-06 17:36:06 -08:00
|
|
|
# Rebuild execute_code schema to only list sandbox tools that are actually
|
2026-03-19 10:08:14 -07:00
|
|
|
# available. Without this, the model sees "web_search is available in
|
|
|
|
|
# execute_code" even when the API key isn't configured or the toolset is
|
|
|
|
|
# disabled (#560-discord).
|
|
|
|
|
if "execute_code" in available_tool_names:
|
feat(execute_code): add project/strict execution modes, default to project (#11971)
Weaker models (Gemma-class) repeatedly rediscover and forget that
execute_code uses a different CWD and Python interpreter than terminal(),
causing them to flip-flop on whether user files exist and to hit import
errors on project dependencies like pandas.
Adds a new 'code_execution.mode' config key (default 'project') that
brings execute_code into line with terminal()'s filesystem/interpreter:
project (new default):
- cwd = session's TERMINAL_CWD (falls back to os.getcwd())
- python = active VIRTUAL_ENV/bin/python or CONDA_PREFIX/bin/python
with a Python 3.8+ version check; falls back cleanly to
sys.executable if no venv or the candidate fails
- result : 'import pandas' works, '.env' resolves, matches terminal()
strict (opt-in):
- cwd = staging tmpdir (today's behavior)
- python = sys.executable (today's behavior)
- result : maximum reproducibility and isolation; project deps
won't resolve
Security-critical invariants are identical across both modes and covered by
explicit regression tests:
- env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, *_PASSWORD,
*_CREDENTIAL, *_PASSWD, *_AUTH substrings)
- SANDBOX_ALLOWED_TOOLS whitelist (no execute_code recursion, no
delegate_task, no MCP from inside scripts)
- resource caps (5-min timeout, 50KB stdout, 50 tool calls)
Deliberately avoids 'sandbox'/'isolated'/'cloud' language in tool
descriptions (regression from commit 39b83f34 where agents on local
backends falsely believed they were sandboxed and refused networking).
Override via env var: HERMES_EXECUTE_CODE_MODE=strict|project
2026-04-18 01:46:25 -07:00
|
|
|
from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, _get_execution_mode
|
2026-03-19 10:08:14 -07:00
|
|
|
sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names
|
feat(execute_code): add project/strict execution modes, default to project (#11971)
Weaker models (Gemma-class) repeatedly rediscover and forget that
execute_code uses a different CWD and Python interpreter than terminal(),
causing them to flip-flop on whether user files exist and to hit import
errors on project dependencies like pandas.
Adds a new 'code_execution.mode' config key (default 'project') that
brings execute_code into line with terminal()'s filesystem/interpreter:
project (new default):
- cwd = session's TERMINAL_CWD (falls back to os.getcwd())
- python = active VIRTUAL_ENV/bin/python or CONDA_PREFIX/bin/python
with a Python 3.8+ version check; falls back cleanly to
sys.executable if no venv or the candidate fails
- result : 'import pandas' works, '.env' resolves, matches terminal()
strict (opt-in):
- cwd = staging tmpdir (today's behavior)
- python = sys.executable (today's behavior)
- result : maximum reproducibility and isolation; project deps
won't resolve
Security-critical invariants are identical across both modes and covered by
explicit regression tests:
- env scrubbing (strips *_API_KEY, *_TOKEN, *_SECRET, *_PASSWORD,
*_CREDENTIAL, *_PASSWD, *_AUTH substrings)
- SANDBOX_ALLOWED_TOOLS whitelist (no execute_code recursion, no
delegate_task, no MCP from inside scripts)
- resource caps (5-min timeout, 50KB stdout, 50 tool calls)
Deliberately avoids 'sandbox'/'isolated'/'cloud' language in tool
descriptions (regression from commit 39b83f34 where agents on local
backends falsely believed they were sandboxed and refused networking).
Override via env var: HERMES_EXECUTE_CODE_MODE=strict|project
2026-04-18 01:46:25 -07:00
|
|
|
dynamic_schema = build_execute_code_schema(sandbox_enabled, mode=_get_execution_mode())
|
2026-03-06 17:36:06 -08:00
|
|
|
for i, td in enumerate(filtered_tools):
|
|
|
|
|
if td.get("function", {}).get("name") == "execute_code":
|
|
|
|
|
filtered_tools[i] = {"type": "function", "function": dynamic_schema}
|
|
|
|
|
break
|
|
|
|
|
|
feat: add Discord server introspection and management tool (#4753)
* feat: add Discord server introspection and management tool
Add a discord_server tool that gives the agent the ability to interact
with Discord servers when running on the Discord gateway. Uses Discord
REST API directly with the bot token — no dependency on the gateway
adapter's discord.py client.
The tool is only included in the hermes-discord toolset (zero cost for
users on other platforms) and gated on DISCORD_BOT_TOKEN via check_fn.
Actions (14):
- Introspection: list_guilds, server_info, list_channels, channel_info,
list_roles, member_info, search_members
- Messages: fetch_messages, list_pins, pin_message, unpin_message
- Management: create_thread, add_role, remove_role
This addresses a gap where users on Discord could not ask Hermes to
review server structure, channels, roles, or members — a task competing
agents (OpenClaw) handle out of the box.
Files changed:
- tools/discord_tool.py (new): Tool implementation + registration
- model_tools.py: Add to discovery list
- toolsets.py: Add to hermes-discord toolset only
- tests/tools/test_discord_tool.py (new): 43 tests covering all actions,
validation, error handling, registration, and toolset scoping
* feat(discord): intent-aware schema filtering + config allowlist + schema cleanup
- _detect_capabilities() hits GET /applications/@me once per process
to read GUILD_MEMBERS / MESSAGE_CONTENT privileged intent bits.
- Schema is rebuilt per-session in model_tools.get_tool_definitions:
hides search_members / member_info when GUILD_MEMBERS intent is off,
annotates fetch_messages description when MESSAGE_CONTENT is off.
- New config key discord.server_actions (comma-separated or YAML list)
lets users restrict which actions the agent can call, intersected
with intent availability. Unknown names are warned and dropped.
- Defense-in-depth: runtime handler re-checks the allowlist so a stale
cached schema cannot bypass a tightened config.
- Schema description rewritten as an action-first manifest (signature
per action) instead of per-parameter 'required for X, Y, Z' cross-refs.
~25% shorter; model can see each action's required params at a glance.
- Added bounds: limit gets minimum=1 maximum=100, auto_archive_duration
becomes an enum of the 4 valid Discord values.
- 403 enrichment: runtime 403 errors are mapped to actionable guidance
(which permission is missing and what to do about it) instead of the
raw Discord error body.
- 36 new tests: capability detection with caching and force refresh,
config allowlist parsing (string/list/invalid/unknown), intent+allowlist
intersection, dynamic schema build, runtime allowlist enforcement,
403 enrichment, and model_tools integration wiring.
2026-04-19 11:52:19 -07:00
|
|
|
# Rebuild discord_server schema based on the bot's privileged intents
|
|
|
|
|
# (detected from GET /applications/@me) and the user's action allowlist
|
|
|
|
|
# in config. Hides actions the bot's intents don't support so the
|
|
|
|
|
# model never attempts them, and annotates fetch_messages when the
|
|
|
|
|
# MESSAGE_CONTENT intent is missing.
|
|
|
|
|
if "discord_server" in available_tool_names:
|
|
|
|
|
try:
|
|
|
|
|
from tools.discord_tool import get_dynamic_schema
|
|
|
|
|
dynamic = get_dynamic_schema()
|
|
|
|
|
except Exception: # pragma: no cover — defensive, fall back to static
|
|
|
|
|
dynamic = None
|
|
|
|
|
if dynamic is None:
|
|
|
|
|
# Tool filtered out entirely (empty allowlist or detection disabled
|
|
|
|
|
# the only remaining actions). Drop it from the schema list.
|
|
|
|
|
filtered_tools = [
|
|
|
|
|
t for t in filtered_tools
|
|
|
|
|
if t.get("function", {}).get("name") != "discord_server"
|
|
|
|
|
]
|
|
|
|
|
available_tool_names.discard("discord_server")
|
|
|
|
|
else:
|
|
|
|
|
for i, td in enumerate(filtered_tools):
|
|
|
|
|
if td.get("function", {}).get("name") == "discord_server":
|
|
|
|
|
filtered_tools[i] = {"type": "function", "function": dynamic}
|
|
|
|
|
break
|
|
|
|
|
|
2026-03-19 10:08:14 -07:00
|
|
|
# Strip web tool cross-references from browser_navigate description when
|
|
|
|
|
# web_search / web_extract are not available. The static schema says
|
|
|
|
|
# "prefer web_search or web_extract" which causes the model to hallucinate
|
|
|
|
|
# those tools when they're missing.
|
|
|
|
|
if "browser_navigate" in available_tool_names:
|
|
|
|
|
web_tools_available = {"web_search", "web_extract"} & available_tool_names
|
|
|
|
|
if not web_tools_available:
|
|
|
|
|
for i, td in enumerate(filtered_tools):
|
|
|
|
|
if td.get("function", {}).get("name") == "browser_navigate":
|
|
|
|
|
desc = td["function"].get("description", "")
|
|
|
|
|
desc = desc.replace(
|
|
|
|
|
" For simple information retrieval, prefer web_search or web_extract (faster, cheaper).",
|
|
|
|
|
"",
|
|
|
|
|
)
|
|
|
|
|
filtered_tools[i] = {
|
|
|
|
|
"type": "function",
|
|
|
|
|
"function": {**td["function"], "description": desc},
|
|
|
|
|
}
|
|
|
|
|
break
|
|
|
|
|
|
2026-01-31 06:30:48 +00:00
|
|
|
if not quiet_mode:
|
|
|
|
|
if filtered_tools:
|
|
|
|
|
tool_names = [t["function"]["name"] for t in filtered_tools]
|
|
|
|
|
print(f"🛠️ Final tool selection ({len(filtered_tools)} tools): {', '.join(tool_names)}")
|
|
|
|
|
else:
|
|
|
|
|
print("🛠️ No tools selected (all filtered out or unavailable)")
|
2026-02-21 20:22:33 -08:00
|
|
|
|
2026-02-19 23:23:43 -08:00
|
|
|
global _last_resolved_tool_names
|
|
|
|
|
_last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools]
|
2026-01-29 06:10:24 +00:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
return filtered_tools
|
2026-01-29 06:10:24 +00:00
|
|
|
|
2026-02-02 08:26:42 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# =============================================================================
|
|
|
|
|
# handle_function_call (the main dispatcher)
|
|
|
|
|
# =============================================================================
|
2026-02-02 08:26:42 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# Tools whose execution is intercepted by the agent loop (run_agent.py)
|
|
|
|
|
# because they need agent-level state (TodoStore, MemoryStore, etc.).
|
|
|
|
|
# The registry still holds their schemas; dispatch just returns a stub error
|
|
|
|
|
# so if something slips through, the LLM sees a sensible message.
|
|
|
|
|
_AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
|
2026-03-18 03:04:07 -07:00
|
|
|
_READ_SEARCH_TOOLS = {"read_file", "search_files"}
|
2026-02-03 23:41:26 -08:00
|
|
|
|
|
|
|
|
|
2026-04-05 10:57:34 -07:00
|
|
|
# =========================================================================
|
|
|
|
|
# Tool argument type coercion
|
|
|
|
|
# =========================================================================
|
|
|
|
|
|
|
|
|
|
def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
"""Coerce tool call arguments to match their JSON Schema types.
|
|
|
|
|
|
|
|
|
|
LLMs frequently return numbers as strings (``"42"`` instead of ``42``)
|
|
|
|
|
and booleans as strings (``"true"`` instead of ``true``). This compares
|
|
|
|
|
each argument value against the tool's registered JSON Schema and attempts
|
|
|
|
|
safe coercion when the value is a string but the schema expects a different
|
|
|
|
|
type. Original values are preserved when coercion fails.
|
|
|
|
|
|
|
|
|
|
Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``,
|
|
|
|
|
and union types (``"type": ["integer", "string"]``).
|
|
|
|
|
"""
|
|
|
|
|
if not args or not isinstance(args, dict):
|
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
|
schema = registry.get_schema(tool_name)
|
|
|
|
|
if not schema:
|
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
|
properties = (schema.get("parameters") or {}).get("properties")
|
|
|
|
|
if not properties:
|
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
|
for key, value in args.items():
|
|
|
|
|
if not isinstance(value, str):
|
|
|
|
|
continue
|
|
|
|
|
prop_schema = properties.get(key)
|
|
|
|
|
if not prop_schema:
|
|
|
|
|
continue
|
|
|
|
|
expected = prop_schema.get("type")
|
|
|
|
|
if not expected:
|
|
|
|
|
continue
|
|
|
|
|
coerced = _coerce_value(value, expected)
|
|
|
|
|
if coerced is not value:
|
|
|
|
|
args[key] = coerced
|
|
|
|
|
|
|
|
|
|
return args
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _coerce_value(value: str, expected_type):
|
|
|
|
|
"""Attempt to coerce a string *value* to *expected_type*.
|
|
|
|
|
|
|
|
|
|
Returns the original string when coercion is not applicable or fails.
|
|
|
|
|
"""
|
|
|
|
|
if isinstance(expected_type, list):
|
|
|
|
|
# Union type — try each in order, return first successful coercion
|
|
|
|
|
for t in expected_type:
|
|
|
|
|
result = _coerce_value(value, t)
|
|
|
|
|
if result is not value:
|
|
|
|
|
return result
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
if expected_type in ("integer", "number"):
|
|
|
|
|
return _coerce_number(value, integer_only=(expected_type == "integer"))
|
|
|
|
|
if expected_type == "boolean":
|
|
|
|
|
return _coerce_boolean(value)
|
2026-04-19 17:36:18 +00:00
|
|
|
if expected_type == "array":
|
|
|
|
|
return _coerce_json(value, list)
|
|
|
|
|
if expected_type == "object":
|
|
|
|
|
return _coerce_json(value, dict)
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _coerce_json(value: str, expected_python_type: type):
|
|
|
|
|
"""Parse *value* as JSON when the schema expects an array or object.
|
|
|
|
|
|
|
|
|
|
Handles model output drift where a complex oneOf/discriminated-union schema
|
|
|
|
|
causes the LLM to emit the array/object as a JSON string instead of a native
|
|
|
|
|
structure. Returns the original string if parsing fails or yields the wrong
|
|
|
|
|
Python type.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
parsed = json.loads(value)
|
|
|
|
|
except (ValueError, TypeError):
|
|
|
|
|
return value
|
|
|
|
|
if isinstance(parsed, expected_python_type):
|
|
|
|
|
logger.debug(
|
|
|
|
|
"coerce_tool_args: coerced string to %s via json.loads",
|
|
|
|
|
expected_python_type.__name__,
|
|
|
|
|
)
|
|
|
|
|
return parsed
|
2026-04-05 10:57:34 -07:00
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _coerce_number(value: str, integer_only: bool = False):
|
|
|
|
|
"""Try to parse *value* as a number. Returns original string on failure."""
|
|
|
|
|
try:
|
|
|
|
|
f = float(value)
|
|
|
|
|
except (ValueError, OverflowError):
|
|
|
|
|
return value
|
|
|
|
|
# Guard against inf/nan before int() conversion
|
|
|
|
|
if f != f or f == float("inf") or f == float("-inf"):
|
|
|
|
|
return f
|
|
|
|
|
# If it looks like an integer (no fractional part), return int
|
|
|
|
|
if f == int(f):
|
|
|
|
|
return int(f)
|
|
|
|
|
if integer_only:
|
|
|
|
|
# Schema wants an integer but value has decimals — keep as string
|
|
|
|
|
return value
|
|
|
|
|
return f
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _coerce_boolean(value: str):
|
|
|
|
|
"""Try to parse *value* as a boolean. Returns original string on failure."""
|
|
|
|
|
low = value.strip().lower()
|
|
|
|
|
if low == "true":
|
|
|
|
|
return True
|
|
|
|
|
if low == "false":
|
|
|
|
|
return False
|
|
|
|
|
return value
|
|
|
|
|
|
|
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
def handle_function_call(
|
2026-02-05 03:49:46 -08:00
|
|
|
function_name: str,
|
|
|
|
|
function_args: Dict[str, Any],
|
2026-01-29 06:10:24 +00:00
|
|
|
task_id: Optional[str] = None,
|
2026-03-29 12:26:44 +05:30
|
|
|
tool_call_id: Optional[str] = None,
|
|
|
|
|
session_id: Optional[str] = None,
|
2026-02-21 20:22:33 -08:00
|
|
|
user_task: Optional[str] = None,
|
2026-03-10 06:32:08 -07:00
|
|
|
enabled_tools: Optional[List[str]] = None,
|
2026-04-13 21:15:25 -07:00
|
|
|
skip_pre_tool_call_hook: bool = False,
|
2026-01-29 06:10:24 +00:00
|
|
|
) -> str:
|
2025-08-09 09:52:25 -07:00
|
|
|
"""
|
2026-02-21 20:22:33 -08:00
|
|
|
Main function call dispatcher that routes calls to the tool registry.
|
2025-11-03 17:42:23 -05:00
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
Args:
|
2026-02-21 20:22:33 -08:00
|
|
|
function_name: Name of the function to call.
|
|
|
|
|
function_args: Arguments for the function.
|
|
|
|
|
task_id: Unique identifier for terminal/browser session isolation.
|
|
|
|
|
user_task: The user's original task (for browser_snapshot context).
|
2026-03-10 06:32:08 -07:00
|
|
|
enabled_tools: Tool names enabled for this session. When provided,
|
|
|
|
|
execute_code uses this list to determine which sandbox
|
|
|
|
|
tools to generate. Falls back to the process-global
|
|
|
|
|
``_last_resolved_tool_names`` for backward compat.
|
2025-11-03 17:42:23 -05:00
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
Returns:
|
2026-02-21 20:22:33 -08:00
|
|
|
Function result as a JSON string.
|
2025-08-09 09:52:25 -07:00
|
|
|
"""
|
2026-04-05 10:57:34 -07:00
|
|
|
# Coerce string arguments to their schema-declared types (e.g. "42"→42)
|
|
|
|
|
function_args = coerce_tool_args(function_name, function_args)
|
|
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
try:
|
2026-02-21 20:22:33 -08:00
|
|
|
if function_name in _AGENT_LOOP_TOOLS:
|
|
|
|
|
return json.dumps({"error": f"{function_name} must be handled by the agent loop"})
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
2026-04-13 21:15:25 -07:00
|
|
|
# Check plugin hooks for a block directive (unless caller already
|
|
|
|
|
# checked — e.g. run_agent._invoke_tool passes skip=True to
|
|
|
|
|
# avoid double-firing the hook).
|
|
|
|
|
if not skip_pre_tool_call_hook:
|
|
|
|
|
block_message: Optional[str] = None
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.plugins import get_pre_tool_call_block_message
|
|
|
|
|
block_message = get_pre_tool_call_block_message(
|
|
|
|
|
function_name,
|
|
|
|
|
function_args,
|
|
|
|
|
task_id=task_id or "",
|
|
|
|
|
session_id=session_id or "",
|
|
|
|
|
tool_call_id=tool_call_id or "",
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if block_message is not None:
|
|
|
|
|
return json.dumps({"error": block_message}, ensure_ascii=False)
|
|
|
|
|
else:
|
|
|
|
|
# Still fire the hook for observers — just don't check for blocking
|
|
|
|
|
# (the caller already did that).
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.plugins import invoke_hook
|
|
|
|
|
invoke_hook(
|
|
|
|
|
"pre_tool_call",
|
|
|
|
|
tool_name=function_name,
|
|
|
|
|
args=function_args,
|
|
|
|
|
task_id=task_id or "",
|
|
|
|
|
session_id=session_id or "",
|
|
|
|
|
tool_call_id=tool_call_id or "",
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Notify the read-loop tracker when a non-read/search tool runs,
|
|
|
|
|
# so the *consecutive* counter resets (reads after other work are fine).
|
|
|
|
|
if function_name not in _READ_SEARCH_TOOLS:
|
|
|
|
|
try:
|
|
|
|
|
from tools.file_tools import notify_other_tool_call
|
|
|
|
|
notify_other_tool_call(task_id or "default")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass # file_tools may not be loaded yet
|
feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
2026-03-16 07:17:36 -07:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
if function_name == "execute_code":
|
2026-03-10 06:32:08 -07:00
|
|
|
# Prefer the caller-provided list so subagents can't overwrite
|
|
|
|
|
# the parent's tool set via the process-global.
|
|
|
|
|
sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names
|
feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
2026-03-16 07:17:36 -07:00
|
|
|
result = registry.dispatch(
|
2026-02-21 20:22:33 -08:00
|
|
|
function_name, function_args,
|
2026-02-19 23:23:43 -08:00
|
|
|
task_id=task_id,
|
2026-03-10 06:32:08 -07:00
|
|
|
enabled_tools=sandbox_enabled,
|
2026-02-19 23:23:43 -08:00
|
|
|
)
|
feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
2026-03-16 07:17:36 -07:00
|
|
|
else:
|
|
|
|
|
result = registry.dispatch(
|
|
|
|
|
function_name, function_args,
|
|
|
|
|
task_id=task_id,
|
|
|
|
|
user_task=user_task,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.plugins import invoke_hook
|
2026-03-29 12:26:44 +05:30
|
|
|
invoke_hook(
|
|
|
|
|
"post_tool_call",
|
|
|
|
|
tool_name=function_name,
|
|
|
|
|
args=function_args,
|
|
|
|
|
result=result,
|
|
|
|
|
task_id=task_id or "",
|
|
|
|
|
session_id=session_id or "",
|
|
|
|
|
tool_call_id=tool_call_id or "",
|
|
|
|
|
)
|
feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
2026-03-16 07:17:36 -07:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
2026-02-19 23:23:43 -08:00
|
|
|
|
feat(plugins): add transform_tool_result hook for generic tool-result rewriting (#12972)
Closes #8933 more fully, extending the per-tool transform_terminal_output
hook from #12929 to a generic seam that fires after every tool dispatch.
Plugins can rewrite any tool's result string (normalize formats, redact
fields, summarize verbose output) without wrapping individual tools.
Changes
- hermes_cli/plugins.py: add "transform_tool_result" to VALID_HOOKS
- model_tools.py: invoke the hook in handle_function_call after
post_tool_call (which remains observational); first valid str return
replaces the result; fail-open
- tests/test_transform_tool_result_hook.py: 9 new tests covering no-op,
None return, non-string return, first-match wins, kwargs, hook
exception fallback, post_tool_call observation invariant, ordering
vs post_tool_call, and an end-to-end real-plugin integration
- tests/hermes_cli/test_plugins.py: assert new hook in VALID_HOOKS
- tests/test_model_tools.py: extend the hook-call-sequence assertion
to include the new hook
Design
- transform_tool_result runs AFTER post_tool_call so observers always
see the original (untransformed) result. This keeps post_tool_call's
observational contract.
- transform_terminal_output (from #12929) still runs earlier, inside
terminal_tool, so plugins can canonicalize BEFORE the 50k truncation
drops middle content. Both hooks coexist; they target different layers.
2026-04-20 03:48:08 -07:00
|
|
|
# Generic tool-result canonicalization seam: plugins receive the
|
|
|
|
|
# final result string (JSON, usually) and may replace it by
|
|
|
|
|
# returning a string from transform_tool_result. Runs after
|
|
|
|
|
# post_tool_call (which stays observational) and before the result
|
|
|
|
|
# is appended back into conversation context. Fail-open; the first
|
|
|
|
|
# valid string return wins; non-string returns are ignored.
|
|
|
|
|
try:
|
|
|
|
|
from hermes_cli.plugins import invoke_hook
|
|
|
|
|
hook_results = invoke_hook(
|
|
|
|
|
"transform_tool_result",
|
|
|
|
|
tool_name=function_name,
|
|
|
|
|
args=function_args,
|
|
|
|
|
result=result,
|
|
|
|
|
task_id=task_id or "",
|
|
|
|
|
session_id=session_id or "",
|
|
|
|
|
tool_call_id=tool_call_id or "",
|
|
|
|
|
)
|
|
|
|
|
for hook_result in hook_results:
|
|
|
|
|
if isinstance(hook_result, str):
|
|
|
|
|
result = hook_result
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
feat: first-class plugin architecture (#1555)
Plugin system for extending Hermes with custom tools, hooks, and
integrations — no source code changes required.
Core system (hermes_cli/plugins.py):
- Plugin discovery from ~/.hermes/plugins/, .hermes/plugins/, and
pip entry_points (hermes_agent.plugins group)
- PluginContext with register_tool() and register_hook()
- 6 lifecycle hooks: pre/post tool_call, pre/post llm_call,
on_session_start/end
- Namespace package handling for relative imports in plugins
- Graceful error isolation — broken plugins never crash the agent
Integration (model_tools.py):
- Plugin discovery runs after built-in + MCP tools
- Plugin tools bypass toolset filter via get_plugin_tool_names()
- Pre/post tool call hooks fire in handle_function_call()
CLI:
- /plugins command shows loaded plugins, tool counts, status
- Added to COMMANDS dict for autocomplete
Docs:
- Getting started guide (build-a-hermes-plugin.md) — full tutorial
building a calculator plugin step by step
- Reference page (features/plugins.md) — quick overview + tables
- Covers: file structure, schemas, handlers, hooks, data files,
bundled skills, env var gating, pip distribution, common mistakes
Tests: 16 tests covering discovery, loading, hooks, tool visibility.
2026-03-16 07:17:36 -07:00
|
|
|
return result
|
2026-02-12 10:05:08 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
except Exception as e:
|
|
|
|
|
error_msg = f"Error executing {function_name}: {str(e)}"
|
|
|
|
|
logger.error(error_msg)
|
|
|
|
|
return json.dumps({"error": error_msg}, ensure_ascii=False)
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
2026-02-17 17:02:33 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
# =============================================================================
|
|
|
|
|
# Backward-compat wrapper functions
|
|
|
|
|
# =============================================================================
|
2026-02-19 00:57:31 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
def get_all_tool_names() -> List[str]:
|
|
|
|
|
"""Return all registered tool names."""
|
|
|
|
|
return registry.get_all_tool_names()
|
2026-02-19 00:57:31 -08:00
|
|
|
|
2026-02-20 03:15:53 -08:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
def get_toolset_for_tool(tool_name: str) -> Optional[str]:
|
|
|
|
|
"""Return the toolset a tool belongs to."""
|
|
|
|
|
return registry.get_toolset_for_tool(tool_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_available_toolsets() -> Dict[str, dict]:
|
|
|
|
|
"""Return toolset availability info for UI display."""
|
|
|
|
|
return registry.get_available_toolsets()
|
2025-08-09 09:52:25 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_toolset_requirements() -> Dict[str, bool]:
|
2026-02-21 20:22:33 -08:00
|
|
|
"""Return {toolset: available_bool} for every registered toolset."""
|
|
|
|
|
return registry.check_toolset_requirements()
|
2025-11-17 01:14:31 -05:00
|
|
|
|
2025-08-09 09:52:25 -07:00
|
|
|
|
2026-02-21 20:22:33 -08:00
|
|
|
def check_tool_availability(quiet: bool = False) -> Tuple[List[str], List[dict]]:
|
|
|
|
|
"""Return (available_toolsets, unavailable_info)."""
|
|
|
|
|
return registry.check_tool_availability(quiet=quiet)
|