Files
hermes-agent/gateway/platforms/whatsapp.py

997 lines
41 KiB
Python
Raw Normal View History

"""
WhatsApp platform adapter.
WhatsApp integration is more complex than Telegram/Discord because:
- No official bot API for personal accounts
- Business API requires Meta Business verification
- Most solutions use web-based automation
This adapter supports multiple backends:
1. WhatsApp Business API (requires Meta verification)
2. whatsapp-web.js (via Node.js subprocess) - for personal accounts
3. Baileys (via Node.js subprocess) - alternative for personal accounts
For simplicity, we'll implement a generic interface that can work
with different backends via a bridge pattern.
"""
import asyncio
import json
import logging
2026-02-25 21:04:36 -08:00
import os
import platform
import re
import subprocess
_IS_WINDOWS = platform.system() == "Windows"
from pathlib import Path
from typing import Dict, Optional, Any
from hermes_constants import get_hermes_dir
fix(cli): respect HERMES_HOME in all remaining hardcoded ~/.hermes paths Several files resolved paths via Path.home() / ".hermes" or os.path.expanduser("~/.hermes/..."), bypassing the HERMES_HOME environment variable. This broke isolation when running multiple Hermes instances with distinct HERMES_HOME directories. Replace all hardcoded paths with calls to get_hermes_home() from hermes_cli.config, consistent with the rest of the codebase. Files fixed: - tools/process_registry.py (processes.json) - gateway/pairing.py (pairing/) - gateway/sticker_cache.py (sticker_cache.json) - gateway/channel_directory.py (channel_directory.json, sessions.json) - gateway/config.py (gateway.json, config.yaml, sessions_dir) - gateway/mirror.py (sessions/) - gateway/hooks.py (hooks/) - gateway/platforms/base.py (image_cache/, audio_cache/, document_cache/) - gateway/platforms/whatsapp.py (whatsapp/session) - gateway/delivery.py (cron/output) - agent/auxiliary_client.py (auth.json) - agent/prompt_builder.py (SOUL.md) - cli.py (config.yaml, images/, pastes/, history) - run_agent.py (logs/) - tools/environments/base.py (sandboxes/) - tools/environments/modal.py (modal_snapshots.json) - tools/environments/singularity.py (singularity_snapshots.json) - tools/tts_tool.py (audio_cache) - hermes_cli/status.py (cron/jobs.json, sessions.json) - hermes_cli/gateway.py (logs/, whatsapp session) - hermes_cli/main.py (whatsapp/session) Tests updated to use HERMES_HOME env var instead of patching Path.home(). Closes #892 (cherry picked from commit 78ac1bba43b8b74a934c6172f2c29bb4d03164b9)
2026-03-11 07:31:41 +01:00
logger = logging.getLogger(__name__)
def _kill_port_process(port: int) -> None:
"""Kill any process listening on the given TCP port."""
try:
if _IS_WINDOWS:
# Use netstat to find the PID bound to this port, then taskkill
result = subprocess.run(
["netstat", "-ano", "-p", "TCP"],
capture_output=True, text=True, timeout=5,
)
for line in result.stdout.splitlines():
parts = line.split()
if len(parts) >= 5 and parts[3] == "LISTENING":
local_addr = parts[1]
if local_addr.endswith(f":{port}"):
try:
subprocess.run(
["taskkill", "/PID", parts[4], "/F"],
capture_output=True, timeout=5,
)
except subprocess.SubprocessError:
pass
else:
result = subprocess.run(
["fuser", f"{port}/tcp"],
capture_output=True, timeout=5,
)
if result.returncode == 0:
subprocess.run(
["fuser", "-k", f"{port}/tcp"],
capture_output=True, timeout=5,
)
except Exception:
pass
import sys
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
SendResult,
SUPPORTED_DOCUMENT_TYPES,
cache_image_from_url,
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
cache_audio_from_url,
)
def check_whatsapp_requirements() -> bool:
"""
Check if WhatsApp dependencies are available.
WhatsApp requires a Node.js bridge for most implementations.
"""
# Check for Node.js
try:
result = subprocess.run(
["node", "--version"],
capture_output=True,
text=True,
timeout=5
)
return result.returncode == 0
except Exception:
return False
class WhatsAppAdapter(BasePlatformAdapter):
"""
WhatsApp adapter.
This implementation uses a simple HTTP bridge pattern where:
1. A Node.js process runs the WhatsApp Web client
2. Messages are forwarded via HTTP/IPC to this Python adapter
3. Responses are sent back through the bridge
The actual Node.js bridge implementation can vary:
- whatsapp-web.js based
- Baileys based
- Business API based
Configuration:
- bridge_script: Path to the Node.js bridge script
- bridge_port: Port for HTTP communication (default: 3000)
- session_path: Path to store WhatsApp session data
"""
# WhatsApp message limits — practical UX limit, not protocol max.
# WhatsApp allows ~65K but long messages are unreadable on mobile.
MAX_MESSAGE_LENGTH = 4096
2026-02-25 21:04:36 -08:00
# Default bridge location relative to the hermes-agent install
_DEFAULT_BRIDGE_DIR = Path(__file__).resolve().parents[2] / "scripts" / "whatsapp-bridge"
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.WHATSAPP)
self._bridge_process: Optional[subprocess.Popen] = None
self._bridge_port: int = config.extra.get("bridge_port", 3000)
2026-02-25 21:04:36 -08:00
self._bridge_script: Optional[str] = config.extra.get(
"bridge_script",
str(self._DEFAULT_BRIDGE_DIR / "bridge.js"),
)
self._session_path: Path = Path(config.extra.get(
"session_path",
get_hermes_dir("platforms/whatsapp/session", "whatsapp/session")
))
feat: OpenAI-compatible API server + WhatsApp configurable reply prefix (#1756) * feat: OpenAI-compatible API server platform adapter Salvaged from PR #956, updated for current main. Adds an HTTP API server as a gateway platform adapter that exposes hermes-agent via the OpenAI Chat Completions and Responses APIs. Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat, AnythingLLM, NextChat, ChatBox, etc.) can connect by pointing at http://localhost:8642/v1. Endpoints: - POST /v1/chat/completions — stateless Chat Completions API - POST /v1/responses — stateful Responses API with chaining - GET /v1/responses/{id} — retrieve stored response - DELETE /v1/responses/{id} — delete stored response - GET /v1/models — list hermes-agent as available model - GET /health — health check Features: - Real SSE streaming via stream_delta_callback (uses main's streaming) - In-memory LRU response store for Responses API conversation chaining - Named conversations via 'conversation' parameter - Bearer token auth (optional, via API_SERVER_KEY) - CORS support for browser-based frontends - System prompt layering (frontend system messages on top of core) - Real token usage tracking in responses Integration points: - Platform.API_SERVER in gateway/config.py - _create_adapter() branch in gateway/run.py - API_SERVER_* env vars in hermes_cli/config.py - Env var overrides in gateway/config.py _apply_env_overrides() Changes vs original PR #956: - Removed streaming infrastructure (already on main via stream_consumer.py) - Removed Telegram reply_to_mode (separate feature, not included) - Updated _resolve_model() -> _resolve_gateway_model() - Updated stream_callback -> stream_delta_callback - Updated connect()/disconnect() to use _mark_connected()/_mark_disconnected() - Adapted to current Platform enum (includes MATTERMOST, MATRIX, DINGTALK) Tests: 72 new tests, all passing Docs: API server guide, Open WebUI integration guide, env var reference * feat(whatsapp): make reply prefix configurable via config.yaml Reworked from PR #1764 (ifrederico) to use config.yaml instead of .env. The WhatsApp bridge prepends a header to every outgoing message. This was hardcoded to '⚕ *Hermes Agent*'. Users can now customize or disable it via config.yaml: whatsapp: reply_prefix: '' # disable header reply_prefix: '🤖 *My Bot*\n───\n' # custom prefix How it works: - load_gateway_config() reads whatsapp.reply_prefix from config.yaml and stores it in PlatformConfig.extra['reply_prefix'] - WhatsAppAdapter reads it from config.extra at init - When spawning bridge.js, the adapter passes it as WHATSAPP_REPLY_PREFIX in the subprocess environment - bridge.js handles undefined (default), empty (no header), or custom values with \\n escape support - Self-chat echo suppression uses the configured prefix Also fixes _config_version: was 9 but ENV_VARS_BY_VERSION had a key 10 (TAVILY_API_KEY), so existing users at v9 would never be prompted for Tavily. Bumped to 10 to close the gap. Added a regression test to prevent this from happening again. Credit: ifrederico (PR #1764) for the bridge.js implementation and the config version gap discovery. --------- Co-authored-by: Test <test@test.com>
2026-03-17 10:44:37 -07:00
self._reply_prefix: Optional[str] = config.extra.get("reply_prefix")
self._mention_patterns = self._compile_mention_patterns()
self._message_queue: asyncio.Queue = asyncio.Queue()
self._bridge_log_fh = None
self._bridge_log: Optional[Path] = None
self._poll_task: Optional[asyncio.Task] = None
self._http_session: Optional["aiohttp.ClientSession"] = None
def _whatsapp_require_mention(self) -> bool:
configured = self.config.extra.get("require_mention")
if configured is not None:
if isinstance(configured, str):
return configured.lower() in ("true", "1", "yes", "on")
return bool(configured)
return os.getenv("WHATSAPP_REQUIRE_MENTION", "false").lower() in ("true", "1", "yes", "on")
def _whatsapp_free_response_chats(self) -> set[str]:
raw = self.config.extra.get("free_response_chats")
if raw is None:
raw = os.getenv("WHATSAPP_FREE_RESPONSE_CHATS", "")
if isinstance(raw, list):
return {str(part).strip() for part in raw if str(part).strip()}
return {part.strip() for part in str(raw).split(",") if part.strip()}
def _compile_mention_patterns(self):
patterns = self.config.extra.get("mention_patterns")
if patterns is None:
raw = os.getenv("WHATSAPP_MENTION_PATTERNS", "").strip()
if raw:
try:
patterns = json.loads(raw)
except Exception:
patterns = [part.strip() for part in raw.splitlines() if part.strip()]
if not patterns:
patterns = [part.strip() for part in raw.split(",") if part.strip()]
if patterns is None:
return []
if isinstance(patterns, str):
patterns = [patterns]
if not isinstance(patterns, list):
logger.warning("[%s] whatsapp mention_patterns must be a list or string; got %s", self.name, type(patterns).__name__)
return []
compiled = []
for pattern in patterns:
if not isinstance(pattern, str) or not pattern.strip():
continue
try:
compiled.append(re.compile(pattern, re.IGNORECASE))
except re.error as exc:
logger.warning("[%s] Invalid WhatsApp mention pattern %r: %s", self.name, pattern, exc)
if compiled:
logger.info("[%s] Loaded %d WhatsApp mention pattern(s)", self.name, len(compiled))
return compiled
@staticmethod
def _normalize_whatsapp_id(value: Optional[str]) -> str:
if not value:
return ""
normalized = str(value).strip()
if ":" in normalized and "@" in normalized:
normalized = normalized.replace(":", "@", 1)
return normalized
def _bot_ids_from_message(self, data: Dict[str, Any]) -> set[str]:
bot_ids = set()
for candidate in data.get("botIds") or []:
normalized = self._normalize_whatsapp_id(candidate)
if normalized:
bot_ids.add(normalized)
return bot_ids
def _message_is_reply_to_bot(self, data: Dict[str, Any]) -> bool:
quoted_participant = self._normalize_whatsapp_id(data.get("quotedParticipant"))
if not quoted_participant:
return False
return quoted_participant in self._bot_ids_from_message(data)
def _message_mentions_bot(self, data: Dict[str, Any]) -> bool:
bot_ids = self._bot_ids_from_message(data)
if not bot_ids:
return False
mentioned_ids = {
nid
for candidate in (data.get("mentionedIds") or [])
if (nid := self._normalize_whatsapp_id(candidate))
}
if mentioned_ids & bot_ids:
return True
body = str(data.get("body") or "")
lower_body = body.lower()
for bot_id in bot_ids:
bare_id = bot_id.split("@", 1)[0].lower()
if bare_id and (f"@{bare_id}" in lower_body or bare_id in lower_body):
return True
return False
def _message_matches_mention_patterns(self, data: Dict[str, Any]) -> bool:
if not self._mention_patterns:
return False
body = str(data.get("body") or "")
return any(pattern.search(body) for pattern in self._mention_patterns)
def _clean_bot_mention_text(self, text: str, data: Dict[str, Any]) -> str:
if not text:
return text
bot_ids = self._bot_ids_from_message(data)
cleaned = text
for bot_id in bot_ids:
bare_id = bot_id.split("@", 1)[0]
if bare_id:
cleaned = re.sub(rf"@{re.escape(bare_id)}\b[,:\-]*\s*", "", cleaned)
return cleaned.strip() or text
def _should_process_message(self, data: Dict[str, Any]) -> bool:
if not data.get("isGroup"):
return True
chat_id = str(data.get("chatId") or "")
if chat_id in self._whatsapp_free_response_chats():
return True
if not self._whatsapp_require_mention():
return True
body = str(data.get("body") or "").strip()
if body.startswith("/"):
return True
if self._message_is_reply_to_bot(data):
return True
if self._message_mentions_bot(data):
return True
return self._message_matches_mention_patterns(data)
async def connect(self) -> bool:
"""
Start the WhatsApp bridge.
This launches the Node.js bridge process and waits for it to be ready.
"""
if not check_whatsapp_requirements():
2026-02-25 21:04:36 -08:00
logger.warning("[%s] Node.js not found. WhatsApp requires Node.js.", self.name)
return False
bridge_path = Path(self._bridge_script)
if not bridge_path.exists():
2026-02-25 21:04:36 -08:00
logger.warning("[%s] Bridge script not found: %s", self.name, bridge_path)
return False
2026-02-25 21:04:36 -08:00
logger.info("[%s] Bridge found at %s", self.name, bridge_path)
feat: add profiles — run multiple isolated Hermes instances (#3681) Each profile is a fully independent HERMES_HOME with its own config, API keys, memory, sessions, skills, gateway, cron, and state.db. Core module: hermes_cli/profiles.py (~900 lines) - Profile CRUD: create, delete, list, show, rename - Three clone levels: blank, --clone (config), --clone-all (everything) - Export/import: tar.gz archive for backup and migration - Wrapper alias scripts (~/.local/bin/<name>) - Collision detection for alias names - Sticky default via ~/.hermes/active_profile - Skill seeding via subprocess (handles module-level caching) - Auto-stop gateway on delete with disable-before-stop for services - Tab completion generation for bash and zsh CLI integration (hermes_cli/main.py): - _apply_profile_override(): pre-import -p/--profile flag + sticky default - Full 'hermes profile' subcommand: list, use, create, delete, show, alias, rename, export, import - 'hermes completion bash/zsh' command - Multi-profile skill sync in hermes update Display (cli.py, banner.py, gateway/run.py): - CLI prompt: 'coder ❯' when using a non-default profile - Banner shows profile name - Gateway startup log includes profile name Gateway safety: - Token locks: Discord, Slack, WhatsApp, Signal (extends Telegram pattern) - Port conflict detection: API server, webhook adapter Diagnostics (hermes_cli/doctor.py): - Profile health section: lists profiles, checks config, .env, aliases - Orphan alias detection: warns when wrapper points to deleted profile Tests (tests/hermes_cli/test_profiles.py): - 71 automated tests covering: validation, CRUD, clone levels, rename, export/import, active profile, isolation, alias collision, completion - Full suite: 6760 passed, 0 new failures Documentation: - website/docs/user-guide/profiles.md: full user guide (12 sections) - website/docs/reference/profile-commands.md: command reference (12 commands) - website/docs/reference/faq.md: 6 profile FAQ entries - website/sidebars.ts: navigation updated
2026-03-29 10:41:20 -07:00
# Acquire scoped lock to prevent duplicate sessions
lock_acquired = False
feat: add profiles — run multiple isolated Hermes instances (#3681) Each profile is a fully independent HERMES_HOME with its own config, API keys, memory, sessions, skills, gateway, cron, and state.db. Core module: hermes_cli/profiles.py (~900 lines) - Profile CRUD: create, delete, list, show, rename - Three clone levels: blank, --clone (config), --clone-all (everything) - Export/import: tar.gz archive for backup and migration - Wrapper alias scripts (~/.local/bin/<name>) - Collision detection for alias names - Sticky default via ~/.hermes/active_profile - Skill seeding via subprocess (handles module-level caching) - Auto-stop gateway on delete with disable-before-stop for services - Tab completion generation for bash and zsh CLI integration (hermes_cli/main.py): - _apply_profile_override(): pre-import -p/--profile flag + sticky default - Full 'hermes profile' subcommand: list, use, create, delete, show, alias, rename, export, import - 'hermes completion bash/zsh' command - Multi-profile skill sync in hermes update Display (cli.py, banner.py, gateway/run.py): - CLI prompt: 'coder ❯' when using a non-default profile - Banner shows profile name - Gateway startup log includes profile name Gateway safety: - Token locks: Discord, Slack, WhatsApp, Signal (extends Telegram pattern) - Port conflict detection: API server, webhook adapter Diagnostics (hermes_cli/doctor.py): - Profile health section: lists profiles, checks config, .env, aliases - Orphan alias detection: warns when wrapper points to deleted profile Tests (tests/hermes_cli/test_profiles.py): - 71 automated tests covering: validation, CRUD, clone levels, rename, export/import, active profile, isolation, alias collision, completion - Full suite: 6760 passed, 0 new failures Documentation: - website/docs/user-guide/profiles.md: full user guide (12 sections) - website/docs/reference/profile-commands.md: command reference (12 commands) - website/docs/reference/faq.md: 6 profile FAQ entries - website/sidebars.ts: navigation updated
2026-03-29 10:41:20 -07:00
try:
refactor: extract shared helpers to deduplicate repeated code patterns (#7917) * refactor: add shared helper modules for code deduplication New modules: - gateway/platforms/helpers.py: MessageDeduplicator, TextBatchAggregator, strip_markdown, ThreadParticipationTracker, redact_phone - hermes_cli/cli_output.py: print_info/success/warning/error, prompt helpers - tools/path_security.py: validate_within_dir, has_traversal_component - utils.py additions: safe_json_loads, read_json_file, read_jsonl, append_jsonl, env_str/lower/int/bool helpers - hermes_constants.py additions: get_config_path, get_skills_dir, get_logs_dir, get_env_path * refactor: migrate gateway adapters to shared helpers - MessageDeduplicator: discord, slack, dingtalk, wecom, weixin, mattermost - strip_markdown: bluebubbles, feishu, sms - redact_phone: sms, signal - ThreadParticipationTracker: discord, matrix - _acquire/_release_platform_lock: telegram, discord, slack, whatsapp, signal, weixin Net -316 lines across 19 files. * refactor: migrate CLI modules to shared helpers - tools_config.py: use cli_output print/prompt + curses_radiolist (-117 lines) - setup.py: use cli_output print helpers + curses_radiolist (-101 lines) - mcp_config.py: use cli_output prompt (-15 lines) - memory_setup.py: use curses_radiolist (-86 lines) Net -263 lines across 5 files. * refactor: migrate to shared utility helpers - safe_json_loads: agent/display.py (4 sites) - get_config_path: skill_utils.py, hermes_logging.py, hermes_time.py - get_skills_dir: skill_utils.py, prompt_builder.py - Token estimation dedup: skills_tool.py imports from model_metadata - Path security: skills_tool, cronjob_tools, skill_manager_tool, credential_files - Non-atomic YAML writes: doctor.py, config.py now use atomic_yaml_write - Platform dict: new platforms.py, skills_config + tools_config derive from it - Anthropic key: new get_anthropic_key() in auth.py, used by doctor/status/config/main * test: update tests for shared helper migrations - test_dingtalk: use _dedup.is_duplicate() instead of _is_duplicate() - test_mattermost: use _dedup instead of _seen_posts/_prune_seen - test_signal: import redact_phone from helpers instead of signal - test_discord_connect: _platform_lock_identity instead of _token_lock_identity - test_telegram_conflict: updated lock error message format - test_skill_manager_tool: 'escapes' instead of 'boundary' in error msgs
2026-04-11 13:59:52 -07:00
if not self._acquire_platform_lock('whatsapp-session', str(self._session_path), 'WhatsApp session'):
feat: add profiles — run multiple isolated Hermes instances (#3681) Each profile is a fully independent HERMES_HOME with its own config, API keys, memory, sessions, skills, gateway, cron, and state.db. Core module: hermes_cli/profiles.py (~900 lines) - Profile CRUD: create, delete, list, show, rename - Three clone levels: blank, --clone (config), --clone-all (everything) - Export/import: tar.gz archive for backup and migration - Wrapper alias scripts (~/.local/bin/<name>) - Collision detection for alias names - Sticky default via ~/.hermes/active_profile - Skill seeding via subprocess (handles module-level caching) - Auto-stop gateway on delete with disable-before-stop for services - Tab completion generation for bash and zsh CLI integration (hermes_cli/main.py): - _apply_profile_override(): pre-import -p/--profile flag + sticky default - Full 'hermes profile' subcommand: list, use, create, delete, show, alias, rename, export, import - 'hermes completion bash/zsh' command - Multi-profile skill sync in hermes update Display (cli.py, banner.py, gateway/run.py): - CLI prompt: 'coder ❯' when using a non-default profile - Banner shows profile name - Gateway startup log includes profile name Gateway safety: - Token locks: Discord, Slack, WhatsApp, Signal (extends Telegram pattern) - Port conflict detection: API server, webhook adapter Diagnostics (hermes_cli/doctor.py): - Profile health section: lists profiles, checks config, .env, aliases - Orphan alias detection: warns when wrapper points to deleted profile Tests (tests/hermes_cli/test_profiles.py): - 71 automated tests covering: validation, CRUD, clone levels, rename, export/import, active profile, isolation, alias collision, completion - Full suite: 6760 passed, 0 new failures Documentation: - website/docs/user-guide/profiles.md: full user guide (12 sections) - website/docs/reference/profile-commands.md: command reference (12 commands) - website/docs/reference/faq.md: 6 profile FAQ entries - website/sidebars.ts: navigation updated
2026-03-29 10:41:20 -07:00
return False
lock_acquired = True
feat: add profiles — run multiple isolated Hermes instances (#3681) Each profile is a fully independent HERMES_HOME with its own config, API keys, memory, sessions, skills, gateway, cron, and state.db. Core module: hermes_cli/profiles.py (~900 lines) - Profile CRUD: create, delete, list, show, rename - Three clone levels: blank, --clone (config), --clone-all (everything) - Export/import: tar.gz archive for backup and migration - Wrapper alias scripts (~/.local/bin/<name>) - Collision detection for alias names - Sticky default via ~/.hermes/active_profile - Skill seeding via subprocess (handles module-level caching) - Auto-stop gateway on delete with disable-before-stop for services - Tab completion generation for bash and zsh CLI integration (hermes_cli/main.py): - _apply_profile_override(): pre-import -p/--profile flag + sticky default - Full 'hermes profile' subcommand: list, use, create, delete, show, alias, rename, export, import - 'hermes completion bash/zsh' command - Multi-profile skill sync in hermes update Display (cli.py, banner.py, gateway/run.py): - CLI prompt: 'coder ❯' when using a non-default profile - Banner shows profile name - Gateway startup log includes profile name Gateway safety: - Token locks: Discord, Slack, WhatsApp, Signal (extends Telegram pattern) - Port conflict detection: API server, webhook adapter Diagnostics (hermes_cli/doctor.py): - Profile health section: lists profiles, checks config, .env, aliases - Orphan alias detection: warns when wrapper points to deleted profile Tests (tests/hermes_cli/test_profiles.py): - 71 automated tests covering: validation, CRUD, clone levels, rename, export/import, active profile, isolation, alias collision, completion - Full suite: 6760 passed, 0 new failures Documentation: - website/docs/user-guide/profiles.md: full user guide (12 sections) - website/docs/reference/profile-commands.md: command reference (12 commands) - website/docs/reference/faq.md: 6 profile FAQ entries - website/sidebars.ts: navigation updated
2026-03-29 10:41:20 -07:00
except Exception as e:
logger.warning("[%s] Could not acquire session lock (non-fatal): %s", self.name, e)
try:
# Auto-install npm dependencies if node_modules doesn't exist
bridge_dir = bridge_path.parent
if not (bridge_dir / "node_modules").exists():
print(f"[{self.name}] Installing WhatsApp bridge dependencies...")
try:
install_result = subprocess.run(
["npm", "install", "--silent"],
cwd=str(bridge_dir),
capture_output=True,
text=True,
timeout=60,
)
if install_result.returncode != 0:
print(f"[{self.name}] npm install failed: {install_result.stderr}")
return False
print(f"[{self.name}] Dependencies installed")
except Exception as e:
print(f"[{self.name}] Failed to install dependencies: {e}")
return False
# Ensure session directory exists
self._session_path.mkdir(parents=True, exist_ok=True)
# Check if bridge is already running and connected
import aiohttp
import asyncio
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://127.0.0.1:{self._bridge_port}/health",
timeout=aiohttp.ClientTimeout(total=2)
) as resp:
if resp.status == 200:
data = await resp.json()
bridge_status = data.get("status", "unknown")
if bridge_status == "connected":
print(f"[{self.name}] Using existing bridge (status: {bridge_status})")
self._mark_connected()
self._bridge_process = None # Not managed by us
self._http_session = aiohttp.ClientSession()
self._poll_task = asyncio.create_task(self._poll_messages())
return True
else:
print(f"[{self.name}] Bridge found but not connected (status: {bridge_status}), restarting")
except Exception:
pass # Bridge not running, start a new one
2026-02-25 21:04:36 -08:00
# Kill any orphaned bridge from a previous gateway run
_kill_port_process(self._bridge_port)
await asyncio.sleep(1)
2026-02-25 21:04:36 -08:00
# Start the bridge process in its own process group.
# Route output to a log file so QR codes, errors, and reconnection
# messages are preserved for troubleshooting.
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
self._bridge_log = self._session_path.parent / "bridge.log"
bridge_log_fh = open(self._bridge_log, "a")
self._bridge_log_fh = bridge_log_fh
feat: OpenAI-compatible API server + WhatsApp configurable reply prefix (#1756) * feat: OpenAI-compatible API server platform adapter Salvaged from PR #956, updated for current main. Adds an HTTP API server as a gateway platform adapter that exposes hermes-agent via the OpenAI Chat Completions and Responses APIs. Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat, AnythingLLM, NextChat, ChatBox, etc.) can connect by pointing at http://localhost:8642/v1. Endpoints: - POST /v1/chat/completions — stateless Chat Completions API - POST /v1/responses — stateful Responses API with chaining - GET /v1/responses/{id} — retrieve stored response - DELETE /v1/responses/{id} — delete stored response - GET /v1/models — list hermes-agent as available model - GET /health — health check Features: - Real SSE streaming via stream_delta_callback (uses main's streaming) - In-memory LRU response store for Responses API conversation chaining - Named conversations via 'conversation' parameter - Bearer token auth (optional, via API_SERVER_KEY) - CORS support for browser-based frontends - System prompt layering (frontend system messages on top of core) - Real token usage tracking in responses Integration points: - Platform.API_SERVER in gateway/config.py - _create_adapter() branch in gateway/run.py - API_SERVER_* env vars in hermes_cli/config.py - Env var overrides in gateway/config.py _apply_env_overrides() Changes vs original PR #956: - Removed streaming infrastructure (already on main via stream_consumer.py) - Removed Telegram reply_to_mode (separate feature, not included) - Updated _resolve_model() -> _resolve_gateway_model() - Updated stream_callback -> stream_delta_callback - Updated connect()/disconnect() to use _mark_connected()/_mark_disconnected() - Adapted to current Platform enum (includes MATTERMOST, MATRIX, DINGTALK) Tests: 72 new tests, all passing Docs: API server guide, Open WebUI integration guide, env var reference * feat(whatsapp): make reply prefix configurable via config.yaml Reworked from PR #1764 (ifrederico) to use config.yaml instead of .env. The WhatsApp bridge prepends a header to every outgoing message. This was hardcoded to '⚕ *Hermes Agent*'. Users can now customize or disable it via config.yaml: whatsapp: reply_prefix: '' # disable header reply_prefix: '🤖 *My Bot*\n───\n' # custom prefix How it works: - load_gateway_config() reads whatsapp.reply_prefix from config.yaml and stores it in PlatformConfig.extra['reply_prefix'] - WhatsAppAdapter reads it from config.extra at init - When spawning bridge.js, the adapter passes it as WHATSAPP_REPLY_PREFIX in the subprocess environment - bridge.js handles undefined (default), empty (no header), or custom values with \\n escape support - Self-chat echo suppression uses the configured prefix Also fixes _config_version: was 9 but ENV_VARS_BY_VERSION had a key 10 (TAVILY_API_KEY), so existing users at v9 would never be prompted for Tavily. Bumped to 10 to close the gap. Added a regression test to prevent this from happening again. Credit: ifrederico (PR #1764) for the bridge.js implementation and the config version gap discovery. --------- Co-authored-by: Test <test@test.com>
2026-03-17 10:44:37 -07:00
# Build bridge subprocess environment.
# Pass WHATSAPP_REPLY_PREFIX from config.yaml so the Node bridge
# can use it without the user needing to set a separate env var.
bridge_env = os.environ.copy()
if self._reply_prefix is not None:
bridge_env["WHATSAPP_REPLY_PREFIX"] = self._reply_prefix
self._bridge_process = subprocess.Popen(
[
"node",
str(bridge_path),
"--port", str(self._bridge_port),
"--session", str(self._session_path),
"--mode", whatsapp_mode,
],
stdout=bridge_log_fh,
stderr=bridge_log_fh,
preexec_fn=None if _IS_WINDOWS else os.setsid,
feat: OpenAI-compatible API server + WhatsApp configurable reply prefix (#1756) * feat: OpenAI-compatible API server platform adapter Salvaged from PR #956, updated for current main. Adds an HTTP API server as a gateway platform adapter that exposes hermes-agent via the OpenAI Chat Completions and Responses APIs. Any OpenAI-compatible frontend (Open WebUI, LobeChat, LibreChat, AnythingLLM, NextChat, ChatBox, etc.) can connect by pointing at http://localhost:8642/v1. Endpoints: - POST /v1/chat/completions — stateless Chat Completions API - POST /v1/responses — stateful Responses API with chaining - GET /v1/responses/{id} — retrieve stored response - DELETE /v1/responses/{id} — delete stored response - GET /v1/models — list hermes-agent as available model - GET /health — health check Features: - Real SSE streaming via stream_delta_callback (uses main's streaming) - In-memory LRU response store for Responses API conversation chaining - Named conversations via 'conversation' parameter - Bearer token auth (optional, via API_SERVER_KEY) - CORS support for browser-based frontends - System prompt layering (frontend system messages on top of core) - Real token usage tracking in responses Integration points: - Platform.API_SERVER in gateway/config.py - _create_adapter() branch in gateway/run.py - API_SERVER_* env vars in hermes_cli/config.py - Env var overrides in gateway/config.py _apply_env_overrides() Changes vs original PR #956: - Removed streaming infrastructure (already on main via stream_consumer.py) - Removed Telegram reply_to_mode (separate feature, not included) - Updated _resolve_model() -> _resolve_gateway_model() - Updated stream_callback -> stream_delta_callback - Updated connect()/disconnect() to use _mark_connected()/_mark_disconnected() - Adapted to current Platform enum (includes MATTERMOST, MATRIX, DINGTALK) Tests: 72 new tests, all passing Docs: API server guide, Open WebUI integration guide, env var reference * feat(whatsapp): make reply prefix configurable via config.yaml Reworked from PR #1764 (ifrederico) to use config.yaml instead of .env. The WhatsApp bridge prepends a header to every outgoing message. This was hardcoded to '⚕ *Hermes Agent*'. Users can now customize or disable it via config.yaml: whatsapp: reply_prefix: '' # disable header reply_prefix: '🤖 *My Bot*\n───\n' # custom prefix How it works: - load_gateway_config() reads whatsapp.reply_prefix from config.yaml and stores it in PlatformConfig.extra['reply_prefix'] - WhatsAppAdapter reads it from config.extra at init - When spawning bridge.js, the adapter passes it as WHATSAPP_REPLY_PREFIX in the subprocess environment - bridge.js handles undefined (default), empty (no header), or custom values with \\n escape support - Self-chat echo suppression uses the configured prefix Also fixes _config_version: was 9 but ENV_VARS_BY_VERSION had a key 10 (TAVILY_API_KEY), so existing users at v9 would never be prompted for Tavily. Bumped to 10 to close the gap. Added a regression test to prevent this from happening again. Credit: ifrederico (PR #1764) for the bridge.js implementation and the config version gap discovery. --------- Co-authored-by: Test <test@test.com>
2026-03-17 10:44:37 -07:00
env=bridge_env,
)
# Wait for the bridge to connect to WhatsApp.
# Phase 1: wait for the HTTP server to come up (up to 15s).
# Phase 2: wait for WhatsApp status: connected (up to 15s more).
2026-02-25 21:04:36 -08:00
import aiohttp
http_ready = False
data = {}
2026-02-25 21:04:36 -08:00
for attempt in range(15):
await asyncio.sleep(1)
if self._bridge_process.poll() is not None:
print(f"[{self.name}] Bridge process died (exit code {self._bridge_process.returncode})")
print(f"[{self.name}] Check log: {self._bridge_log}")
self._close_bridge_log()
2026-02-25 21:04:36 -08:00
return False
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://127.0.0.1:{self._bridge_port}/health",
2026-02-25 21:04:36 -08:00
timeout=aiohttp.ClientTimeout(total=2)
) as resp:
if resp.status == 200:
http_ready = True
2026-02-25 21:04:36 -08:00
data = await resp.json()
if data.get("status") == "connected":
print(f"[{self.name}] Bridge ready (status: connected)")
break
2026-02-25 21:04:36 -08:00
except Exception:
continue
if not http_ready:
print(f"[{self.name}] Bridge HTTP server did not start in 15s")
print(f"[{self.name}] Check log: {self._bridge_log}")
self._close_bridge_log()
return False
# Phase 2: HTTP is up but WhatsApp may still be connecting.
# Give it more time to authenticate with saved credentials.
if data.get("status") != "connected":
print(f"[{self.name}] Bridge HTTP ready, waiting for WhatsApp connection...")
for attempt in range(15):
await asyncio.sleep(1)
if self._bridge_process.poll() is not None:
print(f"[{self.name}] Bridge process died during connection")
print(f"[{self.name}] Check log: {self._bridge_log}")
self._close_bridge_log()
return False
try:
async with aiohttp.ClientSession() as session:
async with session.get(
f"http://127.0.0.1:{self._bridge_port}/health",
timeout=aiohttp.ClientTimeout(total=2)
) as resp:
if resp.status == 200:
data = await resp.json()
if data.get("status") == "connected":
print(f"[{self.name}] Bridge ready (status: connected)")
break
except Exception:
continue
else:
# Still not connected — warn but proceed (bridge may
# auto-reconnect later, e.g. after a code 515 restart).
print(f"[{self.name}] ⚠ WhatsApp not connected after 30s")
print(f"[{self.name}] Bridge log: {self._bridge_log}")
print(f"[{self.name}] If session expired, re-pair: hermes whatsapp")
# Create a persistent HTTP session for all bridge communication
self._http_session = aiohttp.ClientSession()
# Start message polling task
self._poll_task = asyncio.create_task(self._poll_messages())
self._mark_connected()
print(f"[{self.name}] Bridge started on port {self._bridge_port}")
return True
except Exception as e:
2026-02-25 21:04:36 -08:00
logger.error("[%s] Failed to start bridge: %s", self.name, e, exc_info=True)
return False
finally:
if not self._running:
if lock_acquired:
self._release_platform_lock()
self._close_bridge_log()
def _close_bridge_log(self) -> None:
"""Close the bridge log file handle if open."""
if self._bridge_log_fh:
try:
self._bridge_log_fh.close()
except Exception:
pass
self._bridge_log_fh = None
async def _check_managed_bridge_exit(self) -> Optional[str]:
"""Return a fatal error message if the managed bridge child exited."""
if self._bridge_process is None:
return None
returncode = self._bridge_process.poll()
if returncode is None:
return None
message = f"WhatsApp bridge process exited unexpectedly (code {returncode})."
if not self.has_fatal_error:
logger.error("[%s] %s", self.name, message)
self._set_fatal_error("whatsapp_bridge_exited", message, retryable=True)
self._close_bridge_log()
await self._notify_fatal_error()
return self.fatal_error_message or message
async def disconnect(self) -> None:
2026-02-25 21:04:36 -08:00
"""Stop the WhatsApp bridge and clean up any orphaned processes."""
if self._bridge_process:
try:
2026-02-25 21:04:36 -08:00
# Kill the entire process group so child node processes die too
import signal
try:
if _IS_WINDOWS:
self._bridge_process.terminate()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGTERM)
2026-02-25 21:04:36 -08:00
except (ProcessLookupError, PermissionError):
self._bridge_process.terminate()
await asyncio.sleep(1)
if self._bridge_process.poll() is None:
2026-02-25 21:04:36 -08:00
try:
if _IS_WINDOWS:
self._bridge_process.kill()
else:
os.killpg(os.getpgid(self._bridge_process.pid), signal.SIGKILL)
2026-02-25 21:04:36 -08:00
except (ProcessLookupError, PermissionError):
self._bridge_process.kill()
except Exception as e:
print(f"[{self.name}] Error stopping bridge: {e}")
else:
# Bridge was not started by us, don't kill it
print(f"[{self.name}] Disconnecting (external bridge left running)")
# Cancel the poll task explicitly
if self._poll_task and not self._poll_task.done():
self._poll_task.cancel()
try:
await self._poll_task
except (asyncio.CancelledError, Exception):
pass
self._poll_task = None
# Close the persistent HTTP session
if self._http_session and not self._http_session.closed:
await self._http_session.close()
self._http_session = None
refactor: extract shared helpers to deduplicate repeated code patterns (#7917) * refactor: add shared helper modules for code deduplication New modules: - gateway/platforms/helpers.py: MessageDeduplicator, TextBatchAggregator, strip_markdown, ThreadParticipationTracker, redact_phone - hermes_cli/cli_output.py: print_info/success/warning/error, prompt helpers - tools/path_security.py: validate_within_dir, has_traversal_component - utils.py additions: safe_json_loads, read_json_file, read_jsonl, append_jsonl, env_str/lower/int/bool helpers - hermes_constants.py additions: get_config_path, get_skills_dir, get_logs_dir, get_env_path * refactor: migrate gateway adapters to shared helpers - MessageDeduplicator: discord, slack, dingtalk, wecom, weixin, mattermost - strip_markdown: bluebubbles, feishu, sms - redact_phone: sms, signal - ThreadParticipationTracker: discord, matrix - _acquire/_release_platform_lock: telegram, discord, slack, whatsapp, signal, weixin Net -316 lines across 19 files. * refactor: migrate CLI modules to shared helpers - tools_config.py: use cli_output print/prompt + curses_radiolist (-117 lines) - setup.py: use cli_output print helpers + curses_radiolist (-101 lines) - mcp_config.py: use cli_output prompt (-15 lines) - memory_setup.py: use curses_radiolist (-86 lines) Net -263 lines across 5 files. * refactor: migrate to shared utility helpers - safe_json_loads: agent/display.py (4 sites) - get_config_path: skill_utils.py, hermes_logging.py, hermes_time.py - get_skills_dir: skill_utils.py, prompt_builder.py - Token estimation dedup: skills_tool.py imports from model_metadata - Path security: skills_tool, cronjob_tools, skill_manager_tool, credential_files - Non-atomic YAML writes: doctor.py, config.py now use atomic_yaml_write - Platform dict: new platforms.py, skills_config + tools_config derive from it - Anthropic key: new get_anthropic_key() in auth.py, used by doctor/status/config/main * test: update tests for shared helper migrations - test_dingtalk: use _dedup.is_duplicate() instead of _is_duplicate() - test_mattermost: use _dedup instead of _seen_posts/_prune_seen - test_signal: import redact_phone from helpers instead of signal - test_discord_connect: _platform_lock_identity instead of _token_lock_identity - test_telegram_conflict: updated lock error message format - test_skill_manager_tool: 'escapes' instead of 'boundary' in error msgs
2026-04-11 13:59:52 -07:00
self._release_platform_lock()
feat: add profiles — run multiple isolated Hermes instances (#3681) Each profile is a fully independent HERMES_HOME with its own config, API keys, memory, sessions, skills, gateway, cron, and state.db. Core module: hermes_cli/profiles.py (~900 lines) - Profile CRUD: create, delete, list, show, rename - Three clone levels: blank, --clone (config), --clone-all (everything) - Export/import: tar.gz archive for backup and migration - Wrapper alias scripts (~/.local/bin/<name>) - Collision detection for alias names - Sticky default via ~/.hermes/active_profile - Skill seeding via subprocess (handles module-level caching) - Auto-stop gateway on delete with disable-before-stop for services - Tab completion generation for bash and zsh CLI integration (hermes_cli/main.py): - _apply_profile_override(): pre-import -p/--profile flag + sticky default - Full 'hermes profile' subcommand: list, use, create, delete, show, alias, rename, export, import - 'hermes completion bash/zsh' command - Multi-profile skill sync in hermes update Display (cli.py, banner.py, gateway/run.py): - CLI prompt: 'coder ❯' when using a non-default profile - Banner shows profile name - Gateway startup log includes profile name Gateway safety: - Token locks: Discord, Slack, WhatsApp, Signal (extends Telegram pattern) - Port conflict detection: API server, webhook adapter Diagnostics (hermes_cli/doctor.py): - Profile health section: lists profiles, checks config, .env, aliases - Orphan alias detection: warns when wrapper points to deleted profile Tests (tests/hermes_cli/test_profiles.py): - 71 automated tests covering: validation, CRUD, clone levels, rename, export/import, active profile, isolation, alias collision, completion - Full suite: 6760 passed, 0 new failures Documentation: - website/docs/user-guide/profiles.md: full user guide (12 sections) - website/docs/reference/profile-commands.md: command reference (12 commands) - website/docs/reference/faq.md: 6 profile FAQ entries - website/sidebars.ts: navigation updated
2026-03-29 10:41:20 -07:00
self._mark_disconnected()
self._bridge_process = None
self._close_bridge_log()
print(f"[{self.name}] Disconnected")
def format_message(self, content: str) -> str:
"""Convert standard markdown to WhatsApp-compatible formatting.
WhatsApp supports: *bold*, _italic_, ~strikethrough~, ```code```,
and monospaced `inline`. Standard markdown uses different syntax
for bold/italic/strikethrough, so we convert here.
Code blocks (``` fenced) and inline code (`) are protected from
conversion via placeholder substitution.
"""
if not content:
return content
# --- 1. Protect fenced code blocks from formatting changes ---
_FENCE_PH = "\x00FENCE"
fences: list[str] = []
def _save_fence(m: re.Match) -> str:
fences.append(m.group(0))
return f"{_FENCE_PH}{len(fences) - 1}\x00"
result = re.sub(r"```[\s\S]*?```", _save_fence, content)
# --- 2. Protect inline code ---
_CODE_PH = "\x00CODE"
codes: list[str] = []
def _save_code(m: re.Match) -> str:
codes.append(m.group(0))
return f"{_CODE_PH}{len(codes) - 1}\x00"
result = re.sub(r"`[^`\n]+`", _save_code, result)
# --- 3. Convert markdown formatting to WhatsApp syntax ---
# Bold: **text** or __text__ → *text*
result = re.sub(r"\*\*(.+?)\*\*", r"*\1*", result)
result = re.sub(r"__(.+?)__", r"*\1*", result)
# Strikethrough: ~~text~~ → ~text~
result = re.sub(r"~~(.+?)~~", r"~\1~", result)
# Italic: *text* is already WhatsApp italic — leave as-is
# _text_ is already WhatsApp italic — leave as-is
# --- 4. Convert markdown headers to bold text ---
# # Header → *Header*
result = re.sub(r"^#{1,6}\s+(.+)$", r"*\1*", result, flags=re.MULTILINE)
# --- 5. Convert markdown links: [text](url) → text (url) ---
result = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1 (\2)", result)
# --- 6. Restore protected sections ---
for i, fence in enumerate(fences):
result = result.replace(f"{_FENCE_PH}{i}\x00", fence)
for i, code in enumerate(codes):
result = result.replace(f"{_CODE_PH}{i}\x00", code)
return result
async def send(
self,
chat_id: str,
content: str,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> SendResult:
"""Send a message via the WhatsApp bridge.
Formats markdown for WhatsApp, splits long messages into chunks
that preserve code block boundaries, and sends each chunk sequentially.
"""
if not self._running or not self._http_session:
return SendResult(success=False, error="Not connected")
bridge_exit = await self._check_managed_bridge_exit()
if bridge_exit:
return SendResult(success=False, error=bridge_exit)
if not content or not content.strip():
return SendResult(success=True, message_id=None)
try:
import aiohttp
# Format and chunk the message
formatted = self.format_message(content)
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
last_message_id = None
for chunk in chunks:
payload: Dict[str, Any] = {
"chatId": chat_id,
"message": chunk,
}
if reply_to and last_message_id is None:
# Only reply-to on the first chunk
payload["replyTo"] = reply_to
async with self._http_session.post(
f"http://127.0.0.1:{self._bridge_port}/send",
json=payload,
timeout=aiohttp.ClientTimeout(total=30)
) as resp:
if resp.status == 200:
data = await resp.json()
last_message_id = data.get("messageId")
else:
error = await resp.text()
return SendResult(success=False, error=error)
# Small delay between chunks to avoid rate limiting
if len(chunks) > 1:
await asyncio.sleep(0.3)
return SendResult(
success=True,
message_id=last_message_id,
)
except Exception as e:
return SendResult(success=False, error=str(e))
2026-03-02 14:13:35 -03:00
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
*,
finalize: bool = False,
2026-03-02 14:13:35 -03:00
) -> SendResult:
"""Edit a previously sent message via the WhatsApp bridge."""
if not self._running or not self._http_session:
2026-03-02 14:13:35 -03:00
return SendResult(success=False, error="Not connected")
bridge_exit = await self._check_managed_bridge_exit()
if bridge_exit:
return SendResult(success=False, error=bridge_exit)
2026-03-02 14:13:35 -03:00
try:
import aiohttp
async with self._http_session.post(
f"http://127.0.0.1:{self._bridge_port}/edit",
json={
"chatId": chat_id,
"messageId": message_id,
"message": content,
},
timeout=aiohttp.ClientTimeout(total=15)
) as resp:
if resp.status == 200:
return SendResult(success=True, message_id=message_id)
else:
error = await resp.text()
return SendResult(success=False, error=error)
2026-03-02 14:13:35 -03:00
except Exception as e:
return SendResult(success=False, error=str(e))
async def _send_media_to_bridge(
self,
chat_id: str,
file_path: str,
media_type: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
) -> SendResult:
"""Send any media file via bridge /send-media endpoint."""
if not self._running or not self._http_session:
return SendResult(success=False, error="Not connected")
bridge_exit = await self._check_managed_bridge_exit()
if bridge_exit:
return SendResult(success=False, error=bridge_exit)
try:
import aiohttp
if not os.path.exists(file_path):
return SendResult(success=False, error=f"File not found: {file_path}")
payload: Dict[str, Any] = {
"chatId": chat_id,
"filePath": file_path,
"mediaType": media_type,
}
if caption:
payload["caption"] = caption
if file_name:
payload["fileName"] = file_name
async with self._http_session.post(
f"http://127.0.0.1:{self._bridge_port}/send-media",
json=payload,
timeout=aiohttp.ClientTimeout(total=120),
) as resp:
if resp.status == 200:
data = await resp.json()
return SendResult(
success=True,
message_id=data.get("messageId"),
raw_response=data,
)
else:
error = await resp.text()
return SendResult(success=False, error=error)
except Exception as e:
return SendResult(success=False, error=str(e))
async def send_image(
self,
chat_id: str,
image_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
) -> SendResult:
"""Download image URL to cache, send natively via bridge."""
try:
local_path = await cache_image_from_url(image_url)
return await self._send_media_to_bridge(chat_id, local_path, "image", caption)
except Exception:
return await super().send_image(chat_id, image_url, caption, reply_to)
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a local image file natively via bridge."""
return await self._send_media_to_bridge(chat_id, image_path, "image", caption)
async def send_video(
self,
chat_id: str,
video_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a video natively via bridge — plays inline in WhatsApp."""
return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a document/file as a downloadable attachment via bridge."""
return await self._send_media_to_bridge(
chat_id, file_path, "document", caption,
file_name or os.path.basename(file_path),
)
async def send_typing(self, chat_id: str, metadata=None) -> None:
"""Send typing indicator via bridge."""
if not self._running or not self._http_session:
return
if await self._check_managed_bridge_exit():
return
try:
import aiohttp
await self._http_session.post(
f"http://127.0.0.1:{self._bridge_port}/typing",
json={"chatId": chat_id},
timeout=aiohttp.ClientTimeout(total=5)
)
except Exception:
pass # Ignore typing indicator failures
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
"""Get information about a WhatsApp chat."""
if not self._running or not self._http_session:
return {"name": "Unknown", "type": "dm"}
if await self._check_managed_bridge_exit():
return {"name": chat_id, "type": "dm"}
try:
import aiohttp
async with self._http_session.get(
f"http://127.0.0.1:{self._bridge_port}/chat/{chat_id}",
timeout=aiohttp.ClientTimeout(total=10)
) as resp:
if resp.status == 200:
data = await resp.json()
return {
"name": data.get("name", chat_id),
"type": "group" if data.get("isGroup") else "dm",
"participants": data.get("participants", []),
}
except Exception as e:
logger.debug("Could not get WhatsApp chat info for %s: %s", chat_id, e)
return {"name": chat_id, "type": "dm"}
async def _poll_messages(self) -> None:
"""Poll the bridge for incoming messages."""
import aiohttp
while self._running:
if not self._http_session:
break
bridge_exit = await self._check_managed_bridge_exit()
if bridge_exit:
print(f"[{self.name}] {bridge_exit}")
break
try:
async with self._http_session.get(
f"http://127.0.0.1:{self._bridge_port}/messages",
timeout=aiohttp.ClientTimeout(total=30)
) as resp:
if resp.status == 200:
messages = await resp.json()
for msg_data in messages:
event = await self._build_message_event(msg_data)
if event:
await self.handle_message(event)
except asyncio.CancelledError:
break
except Exception as e:
bridge_exit = await self._check_managed_bridge_exit()
if bridge_exit:
print(f"[{self.name}] {bridge_exit}")
break
print(f"[{self.name}] Poll error: {e}")
await asyncio.sleep(5)
await asyncio.sleep(1) # Poll interval
async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
"""Build a MessageEvent from bridge message data, downloading images to cache."""
try:
if not self._should_process_message(data):
return None
# Determine message type
msg_type = MessageType.TEXT
if data.get("hasMedia"):
media_type = data.get("mediaType", "")
if "image" in media_type:
msg_type = MessageType.PHOTO
elif "video" in media_type:
msg_type = MessageType.VIDEO
elif "audio" in media_type or "ptt" in media_type: # ptt = voice note
msg_type = MessageType.VOICE
else:
msg_type = MessageType.DOCUMENT
# Determine chat type
is_group = data.get("isGroup", False)
chat_type = "group" if is_group else "dm"
# Build source
source = self.build_source(
chat_id=data.get("chatId", ""),
chat_name=data.get("chatName"),
chat_type=chat_type,
user_id=data.get("senderId"),
user_name=data.get("senderName"),
)
# Download media URLs to the local cache so agent tools
# can access them reliably regardless of URL expiration.
raw_urls = data.get("mediaUrls", [])
cached_urls = []
media_types = []
for url in raw_urls:
if msg_type == MessageType.PHOTO and url.startswith(("http://", "https://")):
try:
cached_path = await cache_image_from_url(url, ext=".jpg")
cached_urls.append(cached_path)
media_types.append("image/jpeg")
print(f"[{self.name}] Cached user image: {cached_path}", flush=True)
except Exception as e:
print(f"[{self.name}] Failed to cache image: {e}", flush=True)
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
cached_urls.append(url)
media_types.append("image/jpeg")
elif msg_type == MessageType.PHOTO and os.path.isabs(url):
# Local file path — bridge already downloaded the image
cached_urls.append(url)
media_types.append("image/jpeg")
print(f"[{self.name}] Using bridge-cached image: {url}", flush=True)
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
elif msg_type == MessageType.VOICE and url.startswith(("http://", "https://")):
try:
cached_path = await cache_audio_from_url(url, ext=".ogg")
cached_urls.append(cached_path)
media_types.append("audio/ogg")
print(f"[{self.name}] Cached user voice: {cached_path}", flush=True)
except Exception as e:
print(f"[{self.name}] Failed to cache voice: {e}", flush=True)
cached_urls.append(url)
media_types.append("audio/ogg")
elif msg_type == MessageType.VOICE and os.path.isabs(url):
# Local file path — bridge already downloaded the audio
cached_urls.append(url)
media_types.append("audio/ogg")
print(f"[{self.name}] Using bridge-cached audio: {url}", flush=True)
elif msg_type == MessageType.DOCUMENT and os.path.isabs(url):
# Local file path — bridge already downloaded the document
cached_urls.append(url)
ext = Path(url).suffix.lower()
mime = SUPPORTED_DOCUMENT_TYPES.get(ext, "application/octet-stream")
media_types.append(mime)
print(f"[{self.name}] Using bridge-cached document: {url}", flush=True)
elif msg_type == MessageType.VIDEO and os.path.isabs(url):
cached_urls.append(url)
media_types.append("video/mp4")
print(f"[{self.name}] Using bridge-cached video: {url}", flush=True)
else:
cached_urls.append(url)
media_types.append("unknown")
# For text-readable documents, inject file content directly into
# the message text so the agent can read it inline.
# Cap at 100KB to match Telegram/Discord/Slack behaviour.
body = data.get("body", "")
if data.get("isGroup"):
body = self._clean_bot_mention_text(body, data)
MAX_TEXT_INJECT_BYTES = 100 * 1024
if msg_type == MessageType.DOCUMENT and cached_urls:
for doc_path in cached_urls:
ext = Path(doc_path).suffix.lower()
if ext in (".txt", ".md", ".csv", ".json", ".xml", ".yaml", ".yml", ".log", ".py", ".js", ".ts", ".html", ".css"):
try:
file_size = Path(doc_path).stat().st_size
if file_size > MAX_TEXT_INJECT_BYTES:
print(f"[{self.name}] Skipping text injection for {doc_path} ({file_size} bytes > {MAX_TEXT_INJECT_BYTES})", flush=True)
continue
content = Path(doc_path).read_text(errors="replace")
fname = Path(doc_path).name
# Remove the doc_<hex>_ prefix for display
display_name = fname
if "_" in fname:
parts = fname.split("_", 2)
if len(parts) >= 3:
display_name = parts[2]
injection = f"[Content of {display_name}]:\n{content}"
if body:
body = f"{injection}\n\n{body}"
else:
body = injection
print(f"[{self.name}] Injected text content from: {doc_path}", flush=True)
except Exception as e:
print(f"[{self.name}] Failed to read document text: {e}", flush=True)
return MessageEvent(
text=body,
message_type=msg_type,
source=source,
raw_message=data,
message_id=data.get("messageId"),
media_urls=cached_urls,
media_types=media_types,
)
except Exception as e:
print(f"[{self.name}] Error building event: {e}")
return None