mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-05 18:27:04 +08:00
Compare commits
12 Commits
fix/dingta
...
fix/messag
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8b851e2eeb | ||
|
|
4433b83378 | ||
|
|
6405d389aa | ||
|
|
b16186a32a | ||
|
|
abdb4660d4 | ||
|
|
ed3bcae8bd | ||
|
|
75c5136e5a | ||
|
|
1781c05adb | ||
|
|
d87655afff | ||
|
|
efa778a0ef | ||
|
|
8b411b234d | ||
|
|
7c9beb5829 |
@@ -65,10 +65,15 @@ OPENCODE_GO_API_KEY=
|
||||
# TOOL API KEYS
|
||||
# =============================================================================
|
||||
|
||||
# Parallel API Key - AI-native web search and extract
|
||||
# Get at: https://parallel.ai
|
||||
PARALLEL_API_KEY=
|
||||
|
||||
# Firecrawl API Key - Web search, extract, and crawl
|
||||
# Get at: https://firecrawl.dev/
|
||||
FIRECRAWL_API_KEY=
|
||||
|
||||
|
||||
# FAL.ai API Key - Image generation
|
||||
# Get at: https://fal.ai/
|
||||
FAL_KEY=
|
||||
|
||||
@@ -44,7 +44,7 @@ hermes-agent/
|
||||
│ ├── terminal_tool.py # Terminal orchestration
|
||||
│ ├── process_registry.py # Background process management
|
||||
│ ├── file_tools.py # File read/write/search/patch
|
||||
│ ├── web_tools.py # Firecrawl search/extract
|
||||
│ ├── web_tools.py # Web search/extract (Parallel + Firecrawl)
|
||||
│ ├── browser_tool.py # Browserbase browser automation
|
||||
│ ├── code_execution_tool.py # execute_code sandbox
|
||||
│ ├── delegate_tool.py # Subagent delegation
|
||||
|
||||
@@ -147,7 +147,7 @@ hermes-agent/
|
||||
│ ├── approval.py # Dangerous command detection + per-session approval
|
||||
│ ├── terminal_tool.py # Terminal orchestration (sudo, env lifecycle, backends)
|
||||
│ ├── file_operations.py # read_file, write_file, search, patch, etc.
|
||||
│ ├── web_tools.py # web_search, web_extract (Firecrawl + Gemini summarization)
|
||||
│ ├── web_tools.py # web_search, web_extract (Parallel/Firecrawl + Gemini summarization)
|
||||
│ ├── vision_tools.py # Image analysis via multimodal models
|
||||
│ ├── delegate_tool.py # Subagent spawning and parallel task execution
|
||||
│ ├── code_execution_tool.py # Sandboxed Python with RPC tool access
|
||||
|
||||
@@ -963,8 +963,12 @@ def convert_messages_to_anthropic(
|
||||
elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
|
||||
fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
|
||||
else:
|
||||
# Keep the later message
|
||||
fixed[-1] = m
|
||||
# Mixed types — normalize both to list and merge
|
||||
if isinstance(prev_blocks, str):
|
||||
prev_blocks = [{"type": "text", "text": prev_blocks}]
|
||||
if isinstance(curr_blocks, str):
|
||||
curr_blocks = [{"type": "text", "text": curr_blocks}]
|
||||
fixed[-1]["content"] = prev_blocks + curr_blocks
|
||||
else:
|
||||
fixed.append(m)
|
||||
result = fixed
|
||||
|
||||
@@ -984,6 +984,16 @@ class GatewayRunner:
|
||||
):
|
||||
self._schedule_update_notification_watch()
|
||||
|
||||
# Drain any recovered process watchers (from crash recovery checkpoint)
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
while process_registry.pending_watchers:
|
||||
watcher = process_registry.pending_watchers.pop(0)
|
||||
asyncio.create_task(self._run_process_watcher(watcher))
|
||||
logger.info("Resumed watcher for recovered process %s", watcher.get("session_id"))
|
||||
except Exception as e:
|
||||
logger.error("Recovered watcher setup error: %s", e)
|
||||
|
||||
# Start background session expiry watcher for proactive memory flushing
|
||||
asyncio.create_task(self._session_expiry_watcher())
|
||||
|
||||
@@ -1542,8 +1552,9 @@ class GatewayRunner:
|
||||
# Read privacy.redact_pii from config (re-read per message)
|
||||
_redact_pii = False
|
||||
try:
|
||||
import yaml as _pii_yaml
|
||||
with open(_config_path, encoding="utf-8") as _pf:
|
||||
_pcfg = yaml.safe_load(_pf) or {}
|
||||
_pcfg = _pii_yaml.safe_load(_pf) or {}
|
||||
_redact_pii = bool((_pcfg.get("privacy") or {}).get("redact_pii", False))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -550,6 +550,14 @@ OPTIONAL_ENV_VARS = {
|
||||
},
|
||||
|
||||
# ── Tool API keys ──
|
||||
"PARALLEL_API_KEY": {
|
||||
"description": "Parallel API key for AI-native web search and extract",
|
||||
"prompt": "Parallel API key",
|
||||
"url": "https://parallel.ai/",
|
||||
"tools": ["web_search", "web_extract"],
|
||||
"password": True,
|
||||
"category": "tool",
|
||||
},
|
||||
"FIRECRAWL_API_KEY": {
|
||||
"description": "Firecrawl API key for web search and scraping",
|
||||
"prompt": "Firecrawl API key",
|
||||
@@ -1506,6 +1514,7 @@ def show_config():
|
||||
keys = [
|
||||
("OPENROUTER_API_KEY", "OpenRouter"),
|
||||
("VOICE_TOOLS_OPENAI_KEY", "OpenAI (STT/TTS)"),
|
||||
("PARALLEL_API_KEY", "Parallel"),
|
||||
("FIRECRAWL_API_KEY", "Firecrawl"),
|
||||
("BROWSERBASE_API_KEY", "Browserbase"),
|
||||
("BROWSER_USE_API_KEY", "Browser Use"),
|
||||
@@ -1655,7 +1664,7 @@ def set_config_value(key: str, value: str):
|
||||
# Check if it's an API key (goes to .env)
|
||||
api_keys = [
|
||||
'OPENROUTER_API_KEY', 'OPENAI_API_KEY', 'ANTHROPIC_API_KEY', 'VOICE_TOOLS_OPENAI_KEY',
|
||||
'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
|
||||
'PARALLEL_API_KEY', 'FIRECRAWL_API_KEY', 'FIRECRAWL_API_URL', 'BROWSERBASE_API_KEY', 'BROWSERBASE_PROJECT_ID', 'BROWSER_USE_API_KEY',
|
||||
'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN',
|
||||
'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY',
|
||||
'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN',
|
||||
|
||||
@@ -444,11 +444,11 @@ def _print_setup_summary(config: dict, hermes_home):
|
||||
else:
|
||||
tool_status.append(("Mixture of Agents", False, "OPENROUTER_API_KEY"))
|
||||
|
||||
# Firecrawl (web tools)
|
||||
if get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"):
|
||||
# Web tools (Parallel or Firecrawl)
|
||||
if get_env_value("PARALLEL_API_KEY") or get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"):
|
||||
tool_status.append(("Web Search & Extract", True, None))
|
||||
else:
|
||||
tool_status.append(("Web Search & Extract", False, "FIRECRAWL_API_KEY"))
|
||||
tool_status.append(("Web Search & Extract", False, "PARALLEL_API_KEY or FIRECRAWL_API_KEY"))
|
||||
|
||||
# Browser tools (local Chromium or Browserbase cloud)
|
||||
import shutil
|
||||
|
||||
@@ -151,19 +151,29 @@ TOOL_CATEGORIES = {
|
||||
"web": {
|
||||
"name": "Web Search & Extract",
|
||||
"setup_title": "Select Search Provider",
|
||||
"setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need Firecrawl.",
|
||||
"setup_note": "A free DuckDuckGo search skill is also included — skip this if you don't need a premium provider.",
|
||||
"icon": "🔍",
|
||||
"providers": [
|
||||
{
|
||||
"name": "Firecrawl Cloud",
|
||||
"tag": "Recommended - hosted service",
|
||||
"tag": "Hosted service - search, extract, and crawl",
|
||||
"web_backend": "firecrawl",
|
||||
"env_vars": [
|
||||
{"key": "FIRECRAWL_API_KEY", "prompt": "Firecrawl API key", "url": "https://firecrawl.dev"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Parallel",
|
||||
"tag": "AI-native search and extract",
|
||||
"web_backend": "parallel",
|
||||
"env_vars": [
|
||||
{"key": "PARALLEL_API_KEY", "prompt": "Parallel API key", "url": "https://parallel.ai"},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Firecrawl Self-Hosted",
|
||||
"tag": "Free - run your own instance",
|
||||
"web_backend": "firecrawl",
|
||||
"env_vars": [
|
||||
{"key": "FIRECRAWL_API_URL", "prompt": "Your Firecrawl instance URL (e.g., http://localhost:3002)"},
|
||||
],
|
||||
@@ -618,6 +628,9 @@ def _is_provider_active(provider: dict, config: dict) -> bool:
|
||||
if "browser_provider" in provider:
|
||||
current = config.get("browser", {}).get("cloud_provider")
|
||||
return provider["browser_provider"] == current
|
||||
if provider.get("web_backend"):
|
||||
current = config.get("web", {}).get("backend")
|
||||
return current == provider["web_backend"]
|
||||
return False
|
||||
|
||||
|
||||
@@ -650,6 +663,11 @@ def _configure_provider(provider: dict, config: dict):
|
||||
else:
|
||||
config.get("browser", {}).pop("cloud_provider", None)
|
||||
|
||||
# Set web search backend in config if applicable
|
||||
if provider.get("web_backend"):
|
||||
config.setdefault("web", {})["backend"] = provider["web_backend"]
|
||||
_print_success(f" Web backend set to: {provider['web_backend']}")
|
||||
|
||||
if not env_vars:
|
||||
_print_success(f" {provider['name']} - no configuration needed!")
|
||||
return
|
||||
|
||||
@@ -809,17 +809,18 @@ class SessionDB:
|
||||
offset: int = 0,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""List sessions, optionally filtered by source."""
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
|
||||
(source, limit, offset),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
|
||||
(limit, offset),
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
with self._lock:
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT * FROM sessions WHERE source = ? ORDER BY started_at DESC LIMIT ? OFFSET ?",
|
||||
(source, limit, offset),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT * FROM sessions ORDER BY started_at DESC LIMIT ? OFFSET ?",
|
||||
(limit, offset),
|
||||
)
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
# =========================================================================
|
||||
# Utility
|
||||
@@ -871,26 +872,28 @@ class SessionDB:
|
||||
|
||||
def clear_messages(self, session_id: str) -> None:
|
||||
"""Delete all messages for a session and reset its counters."""
|
||||
self._conn.execute(
|
||||
"DELETE FROM messages WHERE session_id = ?", (session_id,)
|
||||
)
|
||||
self._conn.execute(
|
||||
"UPDATE sessions SET message_count = 0, tool_call_count = 0 WHERE id = ?",
|
||||
(session_id,),
|
||||
)
|
||||
self._conn.commit()
|
||||
with self._lock:
|
||||
self._conn.execute(
|
||||
"DELETE FROM messages WHERE session_id = ?", (session_id,)
|
||||
)
|
||||
self._conn.execute(
|
||||
"UPDATE sessions SET message_count = 0, tool_call_count = 0 WHERE id = ?",
|
||||
(session_id,),
|
||||
)
|
||||
self._conn.commit()
|
||||
|
||||
def delete_session(self, session_id: str) -> bool:
|
||||
"""Delete a session and all its messages. Returns True if found."""
|
||||
cursor = self._conn.execute(
|
||||
"SELECT COUNT(*) FROM sessions WHERE id = ?", (session_id,)
|
||||
)
|
||||
if cursor.fetchone()[0] == 0:
|
||||
return False
|
||||
self._conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
|
||||
self._conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
|
||||
self._conn.commit()
|
||||
return True
|
||||
with self._lock:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT COUNT(*) FROM sessions WHERE id = ?", (session_id,)
|
||||
)
|
||||
if cursor.fetchone()[0] == 0:
|
||||
return False
|
||||
self._conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
|
||||
self._conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
|
||||
self._conn.commit()
|
||||
return True
|
||||
|
||||
def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int:
|
||||
"""
|
||||
@@ -900,22 +903,23 @@ class SessionDB:
|
||||
import time as _time
|
||||
cutoff = _time.time() - (older_than_days * 86400)
|
||||
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT id FROM sessions
|
||||
WHERE started_at < ? AND ended_at IS NOT NULL AND source = ?""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT id FROM sessions WHERE started_at < ? AND ended_at IS NOT NULL",
|
||||
(cutoff,),
|
||||
)
|
||||
session_ids = [row["id"] for row in cursor.fetchall()]
|
||||
with self._lock:
|
||||
if source:
|
||||
cursor = self._conn.execute(
|
||||
"""SELECT id FROM sessions
|
||||
WHERE started_at < ? AND ended_at IS NOT NULL AND source = ?""",
|
||||
(cutoff, source),
|
||||
)
|
||||
else:
|
||||
cursor = self._conn.execute(
|
||||
"SELECT id FROM sessions WHERE started_at < ? AND ended_at IS NOT NULL",
|
||||
(cutoff,),
|
||||
)
|
||||
session_ids = [row["id"] for row in cursor.fetchall()]
|
||||
|
||||
for sid in session_ids:
|
||||
self._conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
|
||||
self._conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
|
||||
for sid in session_ids:
|
||||
self._conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,))
|
||||
self._conn.execute("DELETE FROM sessions WHERE id = ?", (sid,))
|
||||
|
||||
self._conn.commit()
|
||||
self._conn.commit()
|
||||
return len(session_ids)
|
||||
|
||||
@@ -27,6 +27,7 @@ dependencies = [
|
||||
"prompt_toolkit",
|
||||
# Tools
|
||||
"firecrawl-py",
|
||||
"parallel-web>=0.4.2",
|
||||
"fal-client",
|
||||
# Text-to-speech (Edge TTS is free, no API key needed)
|
||||
"edge-tts",
|
||||
|
||||
@@ -18,6 +18,7 @@ PyJWT[crypto]
|
||||
|
||||
# Web tools
|
||||
firecrawl-py
|
||||
parallel-web>=0.4.2
|
||||
|
||||
# Image generation
|
||||
fal-client
|
||||
|
||||
@@ -112,9 +112,14 @@ class TestDefaultContextLengths:
|
||||
|
||||
def test_gpt4_models_128k(self):
|
||||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
if "gpt-4" in key:
|
||||
if "gpt-4" in key and "gpt-4.1" not in key:
|
||||
assert value == 128000, f"{key} should be 128000"
|
||||
|
||||
def test_gpt41_models_1m(self):
|
||||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
if "gpt-4.1" in key:
|
||||
assert value == 1047576, f"{key} should be 1047576"
|
||||
|
||||
def test_gemini_models_1m(self):
|
||||
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
|
||||
if "gemini" in key:
|
||||
|
||||
@@ -50,13 +50,16 @@ def _build_runner(monkeypatch, tmp_path, mode: str) -> GatewayRunner:
|
||||
return runner
|
||||
|
||||
|
||||
def _watcher_dict(session_id="proc_test"):
|
||||
return {
|
||||
def _watcher_dict(session_id="proc_test", thread_id=""):
|
||||
d = {
|
||||
"session_id": session_id,
|
||||
"check_interval": 0,
|
||||
"platform": "telegram",
|
||||
"chat_id": "123",
|
||||
}
|
||||
if thread_id:
|
||||
d["thread_id"] = thread_id
|
||||
return d
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -196,3 +199,47 @@ async def test_run_process_watcher_respects_notification_mode(
|
||||
if expected_fragment is not None:
|
||||
sent_message = adapter.send.await_args.args[1]
|
||||
assert expected_fragment in sent_message
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_thread_id_passed_to_send(monkeypatch, tmp_path):
|
||||
"""thread_id from watcher dict is forwarded as metadata to adapter.send()."""
|
||||
import tools.process_registry as pr_module
|
||||
|
||||
sessions = [SimpleNamespace(output_buffer="done\n", exited=True, exit_code=0)]
|
||||
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
||||
|
||||
async def _instant_sleep(*_a, **_kw):
|
||||
pass
|
||||
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
||||
|
||||
runner = _build_runner(monkeypatch, tmp_path, "all")
|
||||
adapter = runner.adapters[Platform.TELEGRAM]
|
||||
|
||||
await runner._run_process_watcher(_watcher_dict(thread_id="42"))
|
||||
|
||||
assert adapter.send.await_count == 1
|
||||
_, kwargs = adapter.send.call_args
|
||||
assert kwargs["metadata"] == {"thread_id": "42"}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_thread_id_sends_no_metadata(monkeypatch, tmp_path):
|
||||
"""When thread_id is empty, metadata should be None (general topic)."""
|
||||
import tools.process_registry as pr_module
|
||||
|
||||
sessions = [SimpleNamespace(output_buffer="done\n", exited=True, exit_code=0)]
|
||||
monkeypatch.setattr(pr_module, "process_registry", _FakeRegistry(sessions))
|
||||
|
||||
async def _instant_sleep(*_a, **_kw):
|
||||
pass
|
||||
monkeypatch.setattr(asyncio, "sleep", _instant_sleep)
|
||||
|
||||
runner = _build_runner(monkeypatch, tmp_path, "all")
|
||||
adapter = runner.adapters[Platform.TELEGRAM]
|
||||
|
||||
await runner._run_process_watcher(_watcher_dict())
|
||||
|
||||
assert adapter.send.await_count == 1
|
||||
_, kwargs = adapter.send.call_args
|
||||
assert kwargs["metadata"] is None
|
||||
|
||||
@@ -5,6 +5,13 @@ from hermes_cli.config import load_config, save_config
|
||||
from hermes_cli.setup import setup_model_provider
|
||||
|
||||
|
||||
def _maybe_keep_current_tts(question, choices):
|
||||
if question != "Select TTS provider:":
|
||||
return None
|
||||
assert choices[-1].startswith("Keep current (")
|
||||
return len(choices) - 1
|
||||
|
||||
|
||||
def _clear_provider_env(monkeypatch):
|
||||
for key in (
|
||||
"NOUS_API_KEY",
|
||||
@@ -25,16 +32,22 @@ def test_nous_oauth_setup_keeps_current_model_when_syncing_disk_provider(
|
||||
|
||||
config = load_config()
|
||||
|
||||
# Provider selection always comes first. Depending on available vision
|
||||
# backends, setup may either skip the optional vision step or prompt for
|
||||
# it before the default-model choice. Provide enough selections for both
|
||||
# paths while still ending on "keep current model".
|
||||
prompt_choices = iter([0, 2, 2])
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.prompt_choice",
|
||||
lambda *args, **kwargs: next(prompt_choices),
|
||||
)
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
if question == "Select your inference provider:":
|
||||
return 0
|
||||
if question == "Configure vision:":
|
||||
return len(choices) - 1
|
||||
if question == "Select default model:":
|
||||
assert choices[-1] == "Keep current (anthropic/claude-opus-4.6)"
|
||||
return len(choices) - 1
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
|
||||
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
||||
|
||||
def _fake_login_nous(*args, **kwargs):
|
||||
auth_path = tmp_path / "auth.json"
|
||||
@@ -74,20 +87,29 @@ def test_custom_setup_clears_active_oauth_provider(tmp_path, monkeypatch):
|
||||
|
||||
config = load_config()
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 3)
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
if question == "Select your inference provider:":
|
||||
return 3
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
|
||||
prompt_values = iter(
|
||||
[
|
||||
"https://custom.example/v1",
|
||||
"custom-api-key",
|
||||
"custom/model",
|
||||
"",
|
||||
]
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.prompt",
|
||||
lambda *args, **kwargs: next(prompt_values),
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
|
||||
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
||||
|
||||
setup_model_provider(config)
|
||||
save_config(config)
|
||||
@@ -109,11 +131,17 @@ def test_codex_setup_uses_runtime_access_token_for_live_model_list(tmp_path, mon
|
||||
|
||||
config = load_config()
|
||||
|
||||
prompt_choices = iter([1, 0])
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.prompt_choice",
|
||||
lambda *args, **kwargs: next(prompt_choices),
|
||||
)
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
if question == "Select your inference provider:":
|
||||
return 1
|
||||
if question == "Select default model:":
|
||||
return 0
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
|
||||
monkeypatch.setattr("hermes_cli.auth.detect_external_credentials", lambda: [])
|
||||
monkeypatch.setattr("hermes_cli.auth._login_openai_codex", lambda *args, **kwargs: None)
|
||||
|
||||
@@ -6,6 +6,13 @@ from hermes_cli.config import load_config, save_config, save_env_value
|
||||
from hermes_cli.setup import _print_setup_summary, setup_model_provider
|
||||
|
||||
|
||||
def _maybe_keep_current_tts(question, choices):
|
||||
if question != "Select TTS provider:":
|
||||
return None
|
||||
assert choices[-1].startswith("Keep current (")
|
||||
return len(choices) - 1
|
||||
|
||||
|
||||
def _read_env(home):
|
||||
env_path = home / ".env"
|
||||
data = {}
|
||||
@@ -50,13 +57,13 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
|
||||
}
|
||||
save_config(config)
|
||||
|
||||
calls = {"count": 0}
|
||||
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
calls["count"] += 1
|
||||
if calls["count"] == 1:
|
||||
if question == "Select your inference provider:":
|
||||
assert choices[-1] == "Keep current (Custom: https://example.invalid/v1)"
|
||||
return len(choices) - 1
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError("Model menu should not appear for keep-current custom")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
@@ -72,7 +79,6 @@ def test_setup_keep_current_custom_from_config_does_not_fall_through(tmp_path, m
|
||||
assert reloaded["model"]["provider"] == "custom"
|
||||
assert reloaded["model"]["default"] == "custom/model"
|
||||
assert reloaded["model"]["base_url"] == "https://example.invalid/v1"
|
||||
assert calls["count"] == 1
|
||||
|
||||
|
||||
def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch):
|
||||
@@ -86,6 +92,9 @@ def test_setup_custom_endpoint_saves_working_v1_base_url(tmp_path, monkeypatch):
|
||||
return 3 # Custom endpoint
|
||||
if question == "Configure vision:":
|
||||
return len(choices) - 1 # Skip
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
def fake_prompt(message, current=None, **kwargs):
|
||||
@@ -140,22 +149,23 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
|
||||
save_config(config)
|
||||
|
||||
captured = {"provider_choices": None, "model_choices": None}
|
||||
calls = {"count": 0}
|
||||
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
calls["count"] += 1
|
||||
if calls["count"] == 1:
|
||||
if question == "Select your inference provider:":
|
||||
captured["provider_choices"] = list(choices)
|
||||
assert choices[-1] == "Keep current (Anthropic)"
|
||||
return len(choices) - 1
|
||||
if calls["count"] == 2:
|
||||
if question == "Configure vision:":
|
||||
assert question == "Configure vision:"
|
||||
assert choices[-1] == "Skip for now"
|
||||
return len(choices) - 1
|
||||
if calls["count"] == 3:
|
||||
if question == "Select default model:":
|
||||
captured["model_choices"] = list(choices)
|
||||
return len(choices) - 1 # keep current model
|
||||
raise AssertionError("Unexpected extra prompt_choice call")
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
|
||||
@@ -172,7 +182,6 @@ def test_setup_keep_current_config_provider_uses_provider_specific_model_menu(tm
|
||||
assert captured["model_choices"] is not None
|
||||
assert captured["model_choices"][0] == "claude-opus-4-6"
|
||||
assert "anthropic/claude-opus-4.6 (recommended)" not in captured["model_choices"]
|
||||
assert calls["count"] == 3
|
||||
|
||||
|
||||
def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_path, monkeypatch):
|
||||
@@ -186,14 +195,24 @@ def test_setup_keep_current_anthropic_can_configure_openai_vision_default(tmp_pa
|
||||
}
|
||||
save_config(config)
|
||||
|
||||
picks = iter([
|
||||
10, # keep current provider (shifted +1 by kilocode insertion)
|
||||
1, # configure vision with OpenAI
|
||||
5, # use default gpt-4o-mini vision model
|
||||
4, # keep current Anthropic model
|
||||
])
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
if question == "Select your inference provider:":
|
||||
assert choices[-1] == "Keep current (Anthropic)"
|
||||
return len(choices) - 1
|
||||
if question == "Configure vision:":
|
||||
return 1
|
||||
if question == "Select vision model:":
|
||||
assert choices[-1] == "Use default (gpt-4o-mini)"
|
||||
return len(choices) - 1
|
||||
if question == "Select default model:":
|
||||
assert choices[-1] == "Keep current (claude-opus-4-6)"
|
||||
return len(choices) - 1
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: next(picks))
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.setup.prompt",
|
||||
lambda message, *args, **kwargs: "sk-openai" if "OpenAI API key" in message else "",
|
||||
@@ -229,8 +248,17 @@ def test_setup_switch_custom_to_codex_clears_custom_endpoint_and_updates_config(
|
||||
}
|
||||
save_config(config)
|
||||
|
||||
picks = iter([1, 0])
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: next(picks))
|
||||
def fake_prompt_choice(question, choices, default=0):
|
||||
if question == "Select your inference provider:":
|
||||
return 1
|
||||
if question == "Select default model:":
|
||||
return 0
|
||||
tts_idx = _maybe_keep_current_tts(question, choices)
|
||||
if tts_idx is not None:
|
||||
return tts_idx
|
||||
raise AssertionError(f"Unexpected prompt_choice call: {question}")
|
||||
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_choice", fake_prompt_choice)
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: "")
|
||||
monkeypatch.setattr("hermes_cli.setup.prompt_yes_no", lambda *args, **kwargs: False)
|
||||
monkeypatch.setattr("hermes_cli.auth.get_active_provider", lambda: None)
|
||||
|
||||
@@ -63,11 +63,13 @@ class TestFromEnv:
|
||||
|
||||
class TestFromGlobalConfig:
|
||||
def test_missing_config_falls_back_to_env(self, tmp_path):
|
||||
config = HonchoClientConfig.from_global_config(
|
||||
config_path=tmp_path / "nonexistent.json"
|
||||
)
|
||||
with patch.dict(os.environ, {}, clear=True):
|
||||
config = HonchoClientConfig.from_global_config(
|
||||
config_path=tmp_path / "nonexistent.json"
|
||||
)
|
||||
# Should fall back to from_env
|
||||
assert config.enabled is True or config.api_key is None # depends on env
|
||||
assert config.enabled is False
|
||||
assert config.api_key is None
|
||||
|
||||
def test_reads_full_config(self, tmp_path):
|
||||
config_file = tmp_path / "config.json"
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
Comprehensive Test Suite for Web Tools Module
|
||||
|
||||
This script tests all web tools functionality to ensure they work correctly.
|
||||
Run this after any updates to the web_tools.py module or Firecrawl library.
|
||||
Run this after any updates to the web_tools.py module or backend libraries.
|
||||
|
||||
Usage:
|
||||
python test_web_tools.py # Run all tests
|
||||
@@ -11,7 +11,7 @@ Usage:
|
||||
python test_web_tools.py --verbose # Show detailed output
|
||||
|
||||
Requirements:
|
||||
- FIRECRAWL_API_KEY environment variable must be set
|
||||
- PARALLEL_API_KEY or FIRECRAWL_API_KEY environment variable must be set
|
||||
- An auxiliary LLM provider (OPENROUTER_API_KEY or Nous Portal auth) (optional, for LLM tests)
|
||||
"""
|
||||
|
||||
@@ -28,12 +28,14 @@ from typing import List
|
||||
|
||||
# Import the web tools to test (updated path after moving tools/)
|
||||
from tools.web_tools import (
|
||||
web_search_tool,
|
||||
web_extract_tool,
|
||||
web_search_tool,
|
||||
web_extract_tool,
|
||||
web_crawl_tool,
|
||||
check_firecrawl_api_key,
|
||||
check_web_api_key,
|
||||
check_auxiliary_model,
|
||||
get_debug_session_info
|
||||
get_debug_session_info,
|
||||
_get_backend,
|
||||
)
|
||||
|
||||
|
||||
@@ -121,12 +123,13 @@ class WebToolsTester:
|
||||
"""Test environment setup and API keys"""
|
||||
print_section("Environment Check")
|
||||
|
||||
# Check Firecrawl API key
|
||||
if not check_firecrawl_api_key():
|
||||
self.log_result("Firecrawl API Key", "failed", "FIRECRAWL_API_KEY not set")
|
||||
# Check web backend API key (Parallel or Firecrawl)
|
||||
if not check_web_api_key():
|
||||
self.log_result("Web Backend API Key", "failed", "PARALLEL_API_KEY or FIRECRAWL_API_KEY not set")
|
||||
return False
|
||||
else:
|
||||
self.log_result("Firecrawl API Key", "passed", "Found")
|
||||
backend = _get_backend()
|
||||
self.log_result("Web Backend API Key", "passed", f"Using {backend} backend")
|
||||
|
||||
# Check auxiliary LLM provider (optional)
|
||||
if not check_auxiliary_model():
|
||||
@@ -578,7 +581,9 @@ class WebToolsTester:
|
||||
},
|
||||
"results": self.test_results,
|
||||
"environment": {
|
||||
"web_backend": _get_backend() if check_web_api_key() else None,
|
||||
"firecrawl_api_key": check_firecrawl_api_key(),
|
||||
"parallel_api_key": bool(os.getenv("PARALLEL_API_KEY")),
|
||||
"auxiliary_model": check_auxiliary_model(),
|
||||
"debug_mode": get_debug_session_info()["enabled"]
|
||||
}
|
||||
|
||||
@@ -98,11 +98,14 @@ class TestProviderRegistry:
|
||||
# =============================================================================
|
||||
|
||||
PROVIDER_ENV_VARS = (
|
||||
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN",
|
||||
"CLAUDE_CODE_OAUTH_TOKEN",
|
||||
"GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY",
|
||||
"KIMI_API_KEY", "KIMI_BASE_URL", "MINIMAX_API_KEY", "MINIMAX_CN_API_KEY",
|
||||
"AI_GATEWAY_API_KEY", "AI_GATEWAY_BASE_URL",
|
||||
"KILOCODE_API_KEY", "KILOCODE_BASE_URL",
|
||||
"DASHSCOPE_API_KEY", "OPENCODE_ZEN_API_KEY", "OPENCODE_GO_API_KEY",
|
||||
"NOUS_API_KEY",
|
||||
"OPENAI_BASE_URL",
|
||||
)
|
||||
|
||||
@@ -111,6 +114,7 @@ PROVIDER_ENV_VARS = (
|
||||
def _clear_provider_env(monkeypatch):
|
||||
for key in PROVIDER_ENV_VARS:
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setattr("hermes_cli.auth._load_auth_store", lambda: {})
|
||||
|
||||
|
||||
class TestResolveProvider:
|
||||
|
||||
@@ -2596,17 +2596,19 @@ class TestMCPSelectiveToolLoading:
|
||||
|
||||
async def run():
|
||||
with patch("tools.mcp_tool._connect_server", side_effect=fake_connect), \
|
||||
patch.dict("tools.mcp_tool._servers", {}, clear=True), \
|
||||
patch("tools.registry.registry", mock_registry), \
|
||||
patch("toolsets.create_custom_toolset"):
|
||||
return await _discover_and_register_server(
|
||||
registered = await _discover_and_register_server(
|
||||
"ink_existing",
|
||||
{"url": "https://mcp.example.com", "tools": {"include": ["create_service"]}},
|
||||
)
|
||||
return registered, _existing_tool_names()
|
||||
|
||||
try:
|
||||
registered = asyncio.run(run())
|
||||
registered, existing = asyncio.run(run())
|
||||
assert registered == ["mcp_ink_existing_create_service"]
|
||||
assert _existing_tool_names() == ["mcp_ink_existing_create_service"]
|
||||
assert existing == ["mcp_ink_existing_create_service"]
|
||||
finally:
|
||||
_servers.pop("ink_existing", None)
|
||||
|
||||
|
||||
@@ -294,6 +294,61 @@ class TestCheckpoint:
|
||||
recovered = registry.recover_from_checkpoint()
|
||||
assert recovered == 0
|
||||
|
||||
def test_write_checkpoint_includes_watcher_metadata(self, registry, tmp_path):
|
||||
with patch("tools.process_registry.CHECKPOINT_PATH", tmp_path / "procs.json"):
|
||||
s = _make_session()
|
||||
s.watcher_platform = "telegram"
|
||||
s.watcher_chat_id = "999"
|
||||
s.watcher_thread_id = "42"
|
||||
s.watcher_interval = 60
|
||||
registry._running[s.id] = s
|
||||
registry._write_checkpoint()
|
||||
|
||||
data = json.loads((tmp_path / "procs.json").read_text())
|
||||
assert len(data) == 1
|
||||
assert data[0]["watcher_platform"] == "telegram"
|
||||
assert data[0]["watcher_chat_id"] == "999"
|
||||
assert data[0]["watcher_thread_id"] == "42"
|
||||
assert data[0]["watcher_interval"] == 60
|
||||
|
||||
def test_recover_enqueues_watchers(self, registry, tmp_path):
|
||||
checkpoint = tmp_path / "procs.json"
|
||||
checkpoint.write_text(json.dumps([{
|
||||
"session_id": "proc_live",
|
||||
"command": "sleep 999",
|
||||
"pid": os.getpid(), # current process — guaranteed alive
|
||||
"task_id": "t1",
|
||||
"session_key": "sk1",
|
||||
"watcher_platform": "telegram",
|
||||
"watcher_chat_id": "123",
|
||||
"watcher_thread_id": "42",
|
||||
"watcher_interval": 60,
|
||||
}]))
|
||||
with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
|
||||
recovered = registry.recover_from_checkpoint()
|
||||
assert recovered == 1
|
||||
assert len(registry.pending_watchers) == 1
|
||||
w = registry.pending_watchers[0]
|
||||
assert w["session_id"] == "proc_live"
|
||||
assert w["platform"] == "telegram"
|
||||
assert w["chat_id"] == "123"
|
||||
assert w["thread_id"] == "42"
|
||||
assert w["check_interval"] == 60
|
||||
|
||||
def test_recover_skips_watcher_when_no_interval(self, registry, tmp_path):
|
||||
checkpoint = tmp_path / "procs.json"
|
||||
checkpoint.write_text(json.dumps([{
|
||||
"session_id": "proc_live",
|
||||
"command": "sleep 999",
|
||||
"pid": os.getpid(),
|
||||
"task_id": "t1",
|
||||
"watcher_interval": 0,
|
||||
}]))
|
||||
with patch("tools.process_registry.CHECKPOINT_PATH", checkpoint):
|
||||
recovered = registry.recover_from_checkpoint()
|
||||
assert recovered == 1
|
||||
assert len(registry.pending_watchers) == 0
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Kill process
|
||||
|
||||
@@ -25,7 +25,7 @@ def _make_config():
|
||||
|
||||
|
||||
def _install_telegram_mock(monkeypatch, bot):
|
||||
parse_mode = SimpleNamespace(MARKDOWN_V2="MarkdownV2")
|
||||
parse_mode = SimpleNamespace(MARKDOWN_V2="MarkdownV2", HTML="HTML")
|
||||
constants_mod = SimpleNamespace(ParseMode=parse_mode)
|
||||
telegram_mod = SimpleNamespace(Bot=lambda token: bot, constants=constants_mod)
|
||||
monkeypatch.setitem(sys.modules, "telegram", telegram_mod)
|
||||
@@ -391,3 +391,97 @@ class TestSendToPlatformChunking:
|
||||
assert len(sent_calls) >= 3
|
||||
assert all(call == [] for call in sent_calls[:-1])
|
||||
assert sent_calls[-1] == media
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTML auto-detection in Telegram send
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSendTelegramHtmlDetection:
|
||||
"""Verify that messages containing HTML tags are sent with parse_mode=HTML
|
||||
and that plain / markdown messages use MarkdownV2."""
|
||||
|
||||
def _make_bot(self):
|
||||
bot = MagicMock()
|
||||
bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=1))
|
||||
bot.send_photo = AsyncMock()
|
||||
bot.send_video = AsyncMock()
|
||||
bot.send_voice = AsyncMock()
|
||||
bot.send_audio = AsyncMock()
|
||||
bot.send_document = AsyncMock()
|
||||
return bot
|
||||
|
||||
def test_html_message_uses_html_parse_mode(self, monkeypatch):
|
||||
bot = self._make_bot()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
asyncio.run(
|
||||
_send_telegram("tok", "123", "<b>Hello</b> world")
|
||||
)
|
||||
|
||||
bot.send_message.assert_awaited_once()
|
||||
kwargs = bot.send_message.await_args.kwargs
|
||||
assert kwargs["parse_mode"] == "HTML"
|
||||
assert kwargs["text"] == "<b>Hello</b> world"
|
||||
|
||||
def test_plain_text_uses_markdown_v2(self, monkeypatch):
|
||||
bot = self._make_bot()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
asyncio.run(
|
||||
_send_telegram("tok", "123", "Just plain text, no tags")
|
||||
)
|
||||
|
||||
bot.send_message.assert_awaited_once()
|
||||
kwargs = bot.send_message.await_args.kwargs
|
||||
assert kwargs["parse_mode"] == "MarkdownV2"
|
||||
|
||||
def test_html_with_code_and_pre_tags(self, monkeypatch):
|
||||
bot = self._make_bot()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
html = "<pre>code block</pre> and <code>inline</code>"
|
||||
asyncio.run(_send_telegram("tok", "123", html))
|
||||
|
||||
kwargs = bot.send_message.await_args.kwargs
|
||||
assert kwargs["parse_mode"] == "HTML"
|
||||
|
||||
def test_closing_tag_detected(self, monkeypatch):
|
||||
bot = self._make_bot()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
asyncio.run(_send_telegram("tok", "123", "text </div> more"))
|
||||
|
||||
kwargs = bot.send_message.await_args.kwargs
|
||||
assert kwargs["parse_mode"] == "HTML"
|
||||
|
||||
def test_angle_brackets_in_math_not_detected(self, monkeypatch):
|
||||
"""Expressions like 'x < 5' or '3 > 2' should not trigger HTML mode."""
|
||||
bot = self._make_bot()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
asyncio.run(_send_telegram("tok", "123", "if x < 5 then y > 2"))
|
||||
|
||||
kwargs = bot.send_message.await_args.kwargs
|
||||
assert kwargs["parse_mode"] == "MarkdownV2"
|
||||
|
||||
def test_html_parse_failure_falls_back_to_plain(self, monkeypatch):
|
||||
"""If Telegram rejects the HTML, fall back to plain text."""
|
||||
bot = self._make_bot()
|
||||
bot.send_message = AsyncMock(
|
||||
side_effect=[
|
||||
Exception("Bad Request: can't parse entities: unsupported html tag"),
|
||||
SimpleNamespace(message_id=2), # plain fallback succeeds
|
||||
]
|
||||
)
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_telegram("tok", "123", "<invalid>broken html</invalid>")
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert bot.send_message.await_count == 2
|
||||
second_call = bot.send_message.await_args_list[1].kwargs
|
||||
assert second_call["parse_mode"] is None
|
||||
|
||||
@@ -1,8 +1,11 @@
|
||||
"""Tests for Firecrawl client configuration and singleton behavior.
|
||||
"""Tests for web backend client configuration and singleton behavior.
|
||||
|
||||
Coverage:
|
||||
_get_firecrawl_client() — configuration matrix, singleton caching,
|
||||
constructor failure recovery, return value verification, edge cases.
|
||||
_get_backend() — backend selection logic with env var combinations.
|
||||
_get_parallel_client() — Parallel client configuration, singleton caching.
|
||||
check_web_api_key() — unified availability check.
|
||||
"""
|
||||
|
||||
import os
|
||||
@@ -117,3 +120,157 @@ class TestFirecrawlClientConfig:
|
||||
from tools.web_tools import _get_firecrawl_client
|
||||
with pytest.raises(ValueError):
|
||||
_get_firecrawl_client()
|
||||
|
||||
|
||||
class TestBackendSelection:
|
||||
"""Test suite for _get_backend() backend selection logic.
|
||||
|
||||
The backend is configured via config.yaml (web.backend), set by
|
||||
``hermes tools``. Falls back to key-based detection for legacy/manual
|
||||
setups.
|
||||
"""
|
||||
|
||||
_ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL")
|
||||
|
||||
def setup_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
def teardown_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
# ── Config-based selection (web.backend in config.yaml) ───────────
|
||||
|
||||
def test_config_parallel(self):
|
||||
"""web.backend=parallel in config → 'parallel' regardless of keys."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "parallel"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
def test_config_firecrawl(self):
|
||||
"""web.backend=firecrawl in config → 'firecrawl' even if Parallel key set."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "firecrawl"}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_config_case_insensitive(self):
|
||||
"""web.backend=Parallel (mixed case) → 'parallel'."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "Parallel"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
# ── Fallback (no web.backend in config) ───────────────────────────
|
||||
|
||||
def test_fallback_parallel_only_key(self):
|
||||
"""Only PARALLEL_API_KEY set → 'parallel'."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
def test_fallback_both_keys_defaults_to_firecrawl(self):
|
||||
"""Both keys set, no config → 'firecrawl' (backward compat)."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key", "FIRECRAWL_API_KEY": "fc-test"}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_fallback_firecrawl_only_key(self):
|
||||
"""Only FIRECRAWL_API_KEY set → 'firecrawl'."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}), \
|
||||
patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_fallback_no_keys_defaults_to_firecrawl(self):
|
||||
"""No keys, no config → 'firecrawl' (will fail at client init)."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={}):
|
||||
assert _get_backend() == "firecrawl"
|
||||
|
||||
def test_invalid_config_falls_through_to_fallback(self):
|
||||
"""web.backend=invalid → ignored, uses key-based fallback."""
|
||||
from tools.web_tools import _get_backend
|
||||
with patch("tools.web_tools._load_web_config", return_value={"backend": "tavily"}), \
|
||||
patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
assert _get_backend() == "parallel"
|
||||
|
||||
|
||||
class TestParallelClientConfig:
|
||||
"""Test suite for Parallel client initialization."""
|
||||
|
||||
def setup_method(self):
|
||||
import tools.web_tools
|
||||
tools.web_tools._parallel_client = None
|
||||
os.environ.pop("PARALLEL_API_KEY", None)
|
||||
|
||||
def teardown_method(self):
|
||||
import tools.web_tools
|
||||
tools.web_tools._parallel_client = None
|
||||
os.environ.pop("PARALLEL_API_KEY", None)
|
||||
|
||||
def test_creates_client_with_key(self):
|
||||
"""PARALLEL_API_KEY set → creates Parallel client."""
|
||||
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
from tools.web_tools import _get_parallel_client
|
||||
from parallel import Parallel
|
||||
client = _get_parallel_client()
|
||||
assert client is not None
|
||||
assert isinstance(client, Parallel)
|
||||
|
||||
def test_no_key_raises_with_helpful_message(self):
|
||||
"""No PARALLEL_API_KEY → ValueError with guidance."""
|
||||
from tools.web_tools import _get_parallel_client
|
||||
with pytest.raises(ValueError, match="PARALLEL_API_KEY"):
|
||||
_get_parallel_client()
|
||||
|
||||
def test_singleton_returns_same_instance(self):
|
||||
"""Second call returns cached client."""
|
||||
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
from tools.web_tools import _get_parallel_client
|
||||
client1 = _get_parallel_client()
|
||||
client2 = _get_parallel_client()
|
||||
assert client1 is client2
|
||||
|
||||
|
||||
class TestCheckWebApiKey:
|
||||
"""Test suite for check_web_api_key() unified availability check."""
|
||||
|
||||
_ENV_KEYS = ("PARALLEL_API_KEY", "FIRECRAWL_API_KEY", "FIRECRAWL_API_URL")
|
||||
|
||||
def setup_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
def teardown_method(self):
|
||||
for key in self._ENV_KEYS:
|
||||
os.environ.pop(key, None)
|
||||
|
||||
def test_parallel_key_only(self):
|
||||
with patch.dict(os.environ, {"PARALLEL_API_KEY": "test-key"}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_firecrawl_key_only(self):
|
||||
with patch.dict(os.environ, {"FIRECRAWL_API_KEY": "fc-test"}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_firecrawl_url_only(self):
|
||||
with patch.dict(os.environ, {"FIRECRAWL_API_URL": "http://localhost:3002"}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
def test_no_keys_returns_false(self):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is False
|
||||
|
||||
def test_both_keys_returns_true(self):
|
||||
with patch.dict(os.environ, {
|
||||
"PARALLEL_API_KEY": "test-key",
|
||||
"FIRECRAWL_API_KEY": "fc-test",
|
||||
}):
|
||||
from tools.web_tools import check_web_api_key
|
||||
assert check_web_api_key() is True
|
||||
|
||||
@@ -426,6 +426,8 @@ async def test_web_extract_blocks_redirected_final_url(monkeypatch):
|
||||
async def test_web_crawl_short_circuits_blocked_url(monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
# web_crawl_tool checks for Firecrawl env before website policy
|
||||
monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
|
||||
monkeypatch.setattr(
|
||||
web_tools,
|
||||
"check_website_access",
|
||||
@@ -453,6 +455,9 @@ async def test_web_crawl_short_circuits_blocked_url(monkeypatch):
|
||||
async def test_web_crawl_blocks_redirected_final_url(monkeypatch):
|
||||
from tools import web_tools
|
||||
|
||||
# web_crawl_tool checks for Firecrawl env before website policy
|
||||
monkeypatch.setenv("FIRECRAWL_API_KEY", "fake-key")
|
||||
|
||||
def fake_check(url):
|
||||
if url == "https://allowed.test":
|
||||
return None
|
||||
|
||||
@@ -82,6 +82,9 @@ def _build_provider_env_blocklist() -> frozenset:
|
||||
"FIREWORKS_API_KEY", # Fireworks AI
|
||||
"XAI_API_KEY", # xAI (Grok)
|
||||
"HELICONE_API_KEY", # LLM Observability proxy
|
||||
"PARALLEL_API_KEY",
|
||||
"FIRECRAWL_API_KEY",
|
||||
"FIRECRAWL_API_URL",
|
||||
# Gateway/runtime config not represented in OPTIONAL_ENV_VARS.
|
||||
"TELEGRAM_HOME_CHANNEL",
|
||||
"TELEGRAM_HOME_CHANNEL_NAME",
|
||||
|
||||
@@ -78,6 +78,11 @@ class ProcessSession:
|
||||
output_buffer: str = "" # Rolling output (last MAX_OUTPUT_CHARS)
|
||||
max_output_chars: int = MAX_OUTPUT_CHARS
|
||||
detached: bool = False # True if recovered from crash (no pipe)
|
||||
# Watcher/notification metadata (persisted for crash recovery)
|
||||
watcher_platform: str = ""
|
||||
watcher_chat_id: str = ""
|
||||
watcher_thread_id: str = ""
|
||||
watcher_interval: int = 0 # 0 = no watcher configured
|
||||
_lock: threading.Lock = field(default_factory=threading.Lock)
|
||||
_reader_thread: Optional[threading.Thread] = field(default=None, repr=False)
|
||||
_pty: Any = field(default=None, repr=False) # ptyprocess handle (when use_pty=True)
|
||||
@@ -709,6 +714,10 @@ class ProcessRegistry:
|
||||
"started_at": s.started_at,
|
||||
"task_id": s.task_id,
|
||||
"session_key": s.session_key,
|
||||
"watcher_platform": s.watcher_platform,
|
||||
"watcher_chat_id": s.watcher_chat_id,
|
||||
"watcher_thread_id": s.watcher_thread_id,
|
||||
"watcher_interval": s.watcher_interval,
|
||||
})
|
||||
|
||||
# Atomic write to avoid corruption on crash
|
||||
@@ -755,12 +764,27 @@ class ProcessRegistry:
|
||||
cwd=entry.get("cwd"),
|
||||
started_at=entry.get("started_at", time.time()),
|
||||
detached=True, # Can't read output, but can report status + kill
|
||||
watcher_platform=entry.get("watcher_platform", ""),
|
||||
watcher_chat_id=entry.get("watcher_chat_id", ""),
|
||||
watcher_thread_id=entry.get("watcher_thread_id", ""),
|
||||
watcher_interval=entry.get("watcher_interval", 0),
|
||||
)
|
||||
with self._lock:
|
||||
self._running[session.id] = session
|
||||
recovered += 1
|
||||
logger.info("Recovered detached process: %s (pid=%d)", session.command[:60], pid)
|
||||
|
||||
# Re-enqueue watcher so gateway can resume notifications
|
||||
if session.watcher_interval > 0:
|
||||
self.pending_watchers.append({
|
||||
"session_id": session.id,
|
||||
"check_interval": session.watcher_interval,
|
||||
"session_key": session.session_key,
|
||||
"platform": session.watcher_platform,
|
||||
"chat_id": session.watcher_chat_id,
|
||||
"thread_id": session.watcher_thread_id,
|
||||
})
|
||||
|
||||
# Clear the checkpoint (will be rewritten as processes finish)
|
||||
try:
|
||||
from utils import atomic_json_write
|
||||
|
||||
@@ -355,20 +355,31 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
|
||||
"""Send via Telegram Bot API (one-shot, no polling needed).
|
||||
|
||||
Applies markdown→MarkdownV2 formatting (same as the gateway adapter)
|
||||
so that bold, links, and headers render correctly.
|
||||
so that bold, links, and headers render correctly. If the message
|
||||
already contains HTML tags, it is sent with ``parse_mode='HTML'``
|
||||
instead, bypassing MarkdownV2 conversion.
|
||||
"""
|
||||
try:
|
||||
from telegram import Bot
|
||||
from telegram.constants import ParseMode
|
||||
|
||||
# Reuse the gateway adapter's format_message for markdown→MarkdownV2
|
||||
try:
|
||||
from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2
|
||||
_adapter = TelegramAdapter.__new__(TelegramAdapter)
|
||||
formatted = _adapter.format_message(message)
|
||||
except Exception:
|
||||
# Fallback: send as-is if formatting unavailable
|
||||
# Auto-detect HTML tags — if present, skip MarkdownV2 and send as HTML.
|
||||
# Inspired by github.com/ashaney — PR #1568.
|
||||
_has_html = bool(re.search(r'<[a-zA-Z/][^>]*>', message))
|
||||
|
||||
if _has_html:
|
||||
formatted = message
|
||||
send_parse_mode = ParseMode.HTML
|
||||
else:
|
||||
# Reuse the gateway adapter's format_message for markdown→MarkdownV2
|
||||
try:
|
||||
from gateway.platforms.telegram import TelegramAdapter, _escape_mdv2, _strip_mdv2
|
||||
_adapter = TelegramAdapter.__new__(TelegramAdapter)
|
||||
formatted = _adapter.format_message(message)
|
||||
except Exception:
|
||||
# Fallback: send as-is if formatting unavailable
|
||||
formatted = message
|
||||
send_parse_mode = ParseMode.MARKDOWN_V2
|
||||
|
||||
bot = Bot(token=token)
|
||||
int_chat_id = int(chat_id)
|
||||
@@ -384,16 +395,19 @@ async def _send_telegram(token, chat_id, message, media_files=None, thread_id=No
|
||||
try:
|
||||
last_msg = await bot.send_message(
|
||||
chat_id=int_chat_id, text=formatted,
|
||||
parse_mode=ParseMode.MARKDOWN_V2, **thread_kwargs
|
||||
parse_mode=send_parse_mode, **thread_kwargs
|
||||
)
|
||||
except Exception as md_error:
|
||||
# MarkdownV2 failed, fall back to plain text
|
||||
if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
|
||||
logger.warning("MarkdownV2 parse failed in _send_telegram, falling back to plain text: %s", md_error)
|
||||
try:
|
||||
from gateway.platforms.telegram import _strip_mdv2
|
||||
plain = _strip_mdv2(formatted)
|
||||
except Exception:
|
||||
# Parse failed, fall back to plain text
|
||||
if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower() or "html" in str(md_error).lower():
|
||||
logger.warning("Parse mode %s failed in _send_telegram, falling back to plain text: %s", send_parse_mode, md_error)
|
||||
if not _has_html:
|
||||
try:
|
||||
from gateway.platforms.telegram import _strip_mdv2
|
||||
plain = _strip_mdv2(formatted)
|
||||
except Exception:
|
||||
plain = message
|
||||
else:
|
||||
plain = message
|
||||
last_msg = await bot.send_message(
|
||||
chat_id=int_chat_id, text=plain,
|
||||
|
||||
@@ -1082,13 +1082,23 @@ def terminal_tool(
|
||||
result_data["check_interval_note"] = (
|
||||
f"Requested {check_interval}s raised to minimum 30s"
|
||||
)
|
||||
watcher_platform = os.getenv("HERMES_SESSION_PLATFORM", "")
|
||||
watcher_chat_id = os.getenv("HERMES_SESSION_CHAT_ID", "")
|
||||
watcher_thread_id = os.getenv("HERMES_SESSION_THREAD_ID", "")
|
||||
|
||||
# Store on session for checkpoint persistence
|
||||
proc_session.watcher_platform = watcher_platform
|
||||
proc_session.watcher_chat_id = watcher_chat_id
|
||||
proc_session.watcher_thread_id = watcher_thread_id
|
||||
proc_session.watcher_interval = effective_interval
|
||||
|
||||
process_registry.pending_watchers.append({
|
||||
"session_id": proc_session.id,
|
||||
"check_interval": effective_interval,
|
||||
"session_key": session_key,
|
||||
"platform": os.getenv("HERMES_SESSION_PLATFORM", ""),
|
||||
"chat_id": os.getenv("HERMES_SESSION_CHAT_ID", ""),
|
||||
"thread_id": os.getenv("HERMES_SESSION_THREAD_ID", ""),
|
||||
"platform": watcher_platform,
|
||||
"chat_id": watcher_chat_id,
|
||||
"thread_id": watcher_thread_id,
|
||||
})
|
||||
|
||||
return json.dumps(result_data, ensure_ascii=False)
|
||||
|
||||
@@ -3,16 +3,16 @@
|
||||
Standalone Web Tools Module
|
||||
|
||||
This module provides generic web tools that work with multiple backend providers.
|
||||
Currently uses Firecrawl as the backend, and the interface makes it easy to swap
|
||||
providers without changing the function signatures.
|
||||
Backend is selected during ``hermes tools`` setup (web.backend in config.yaml).
|
||||
|
||||
Available tools:
|
||||
- web_search_tool: Search the web for information
|
||||
- web_extract_tool: Extract content from specific web pages
|
||||
- web_crawl_tool: Crawl websites with specific instructions
|
||||
- web_crawl_tool: Crawl websites with specific instructions (Firecrawl only)
|
||||
|
||||
Backend compatibility:
|
||||
- Firecrawl: https://docs.firecrawl.dev/introduction
|
||||
- Firecrawl: https://docs.firecrawl.dev/introduction (search, extract, crawl)
|
||||
- Parallel: https://docs.parallel.ai (search, extract)
|
||||
|
||||
LLM Processing:
|
||||
- Uses OpenRouter API with Gemini 3 Flash Preview for intelligent content extraction
|
||||
@@ -53,6 +53,39 @@ from tools.website_policy import check_website_access
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ─── Backend Selection ────────────────────────────────────────────────────────
|
||||
|
||||
def _load_web_config() -> dict:
|
||||
"""Load the ``web:`` section from ~/.hermes/config.yaml."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
return load_config().get("web", {})
|
||||
except (ImportError, Exception):
|
||||
return {}
|
||||
|
||||
|
||||
def _get_backend() -> str:
|
||||
"""Determine which web backend to use.
|
||||
|
||||
Reads ``web.backend`` from config.yaml (set by ``hermes tools``).
|
||||
Falls back to whichever API key is present for users who configured
|
||||
keys manually without running setup.
|
||||
"""
|
||||
configured = _load_web_config().get("backend", "").lower().strip()
|
||||
if configured in ("parallel", "firecrawl"):
|
||||
return configured
|
||||
# Fallback for manual / legacy config — use whichever key is present.
|
||||
has_firecrawl = bool(os.getenv("FIRECRAWL_API_KEY") or os.getenv("FIRECRAWL_API_URL"))
|
||||
has_parallel = bool(os.getenv("PARALLEL_API_KEY"))
|
||||
if has_parallel and not has_firecrawl:
|
||||
return "parallel"
|
||||
# Default to firecrawl (backward compat, or when both are set)
|
||||
return "firecrawl"
|
||||
|
||||
|
||||
# ─── Firecrawl Client ────────────────────────────────────────────────────────
|
||||
|
||||
_firecrawl_client = None
|
||||
|
||||
def _get_firecrawl_client():
|
||||
@@ -81,6 +114,47 @@ def _get_firecrawl_client():
|
||||
_firecrawl_client = Firecrawl(**kwargs)
|
||||
return _firecrawl_client
|
||||
|
||||
|
||||
# ─── Parallel Client ─────────────────────────────────────────────────────────
|
||||
|
||||
_parallel_client = None
|
||||
_async_parallel_client = None
|
||||
|
||||
def _get_parallel_client():
|
||||
"""Get or create the Parallel sync client (lazy initialization).
|
||||
|
||||
Requires PARALLEL_API_KEY environment variable.
|
||||
"""
|
||||
from parallel import Parallel
|
||||
global _parallel_client
|
||||
if _parallel_client is None:
|
||||
api_key = os.getenv("PARALLEL_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"PARALLEL_API_KEY environment variable not set. "
|
||||
"Get your API key at https://parallel.ai"
|
||||
)
|
||||
_parallel_client = Parallel(api_key=api_key)
|
||||
return _parallel_client
|
||||
|
||||
|
||||
def _get_async_parallel_client():
|
||||
"""Get or create the Parallel async client (lazy initialization).
|
||||
|
||||
Requires PARALLEL_API_KEY environment variable.
|
||||
"""
|
||||
from parallel import AsyncParallel
|
||||
global _async_parallel_client
|
||||
if _async_parallel_client is None:
|
||||
api_key = os.getenv("PARALLEL_API_KEY")
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"PARALLEL_API_KEY environment variable not set. "
|
||||
"Get your API key at https://parallel.ai"
|
||||
)
|
||||
_async_parallel_client = AsyncParallel(api_key=api_key)
|
||||
return _async_parallel_client
|
||||
|
||||
DEFAULT_MIN_LENGTH_FOR_SUMMARIZATION = 5000
|
||||
|
||||
# Allow per-task override via env var
|
||||
@@ -428,13 +502,89 @@ def clean_base64_images(text: str) -> str:
|
||||
return cleaned_text
|
||||
|
||||
|
||||
# ─── Parallel Search & Extract Helpers ────────────────────────────────────────
|
||||
|
||||
def _parallel_search(query: str, limit: int = 5) -> dict:
|
||||
"""Search using the Parallel SDK and return results as a dict."""
|
||||
from tools.interrupt import is_interrupted
|
||||
if is_interrupted():
|
||||
return {"error": "Interrupted", "success": False}
|
||||
|
||||
mode = os.getenv("PARALLEL_SEARCH_MODE", "agentic").lower().strip()
|
||||
if mode not in ("fast", "one-shot", "agentic"):
|
||||
mode = "agentic"
|
||||
|
||||
logger.info("Parallel search: '%s' (mode=%s, limit=%d)", query, mode, limit)
|
||||
response = _get_parallel_client().beta.search(
|
||||
search_queries=[query],
|
||||
objective=query,
|
||||
mode=mode,
|
||||
max_results=min(limit, 20),
|
||||
)
|
||||
|
||||
web_results = []
|
||||
for i, result in enumerate(response.results or []):
|
||||
excerpts = result.excerpts or []
|
||||
web_results.append({
|
||||
"url": result.url or "",
|
||||
"title": result.title or "",
|
||||
"description": " ".join(excerpts) if excerpts else "",
|
||||
"position": i + 1,
|
||||
})
|
||||
|
||||
return {"success": True, "data": {"web": web_results}}
|
||||
|
||||
|
||||
async def _parallel_extract(urls: List[str]) -> List[Dict[str, Any]]:
|
||||
"""Extract content from URLs using the Parallel async SDK.
|
||||
|
||||
Returns a list of result dicts matching the structure expected by the
|
||||
LLM post-processing pipeline (url, title, content, metadata).
|
||||
"""
|
||||
from tools.interrupt import is_interrupted
|
||||
if is_interrupted():
|
||||
return [{"url": u, "error": "Interrupted", "title": ""} for u in urls]
|
||||
|
||||
logger.info("Parallel extract: %d URL(s)", len(urls))
|
||||
response = await _get_async_parallel_client().beta.extract(
|
||||
urls=urls,
|
||||
full_content=True,
|
||||
)
|
||||
|
||||
results = []
|
||||
for result in response.results or []:
|
||||
content = result.full_content or ""
|
||||
if not content:
|
||||
content = "\n\n".join(result.excerpts or [])
|
||||
url = result.url or ""
|
||||
title = result.title or ""
|
||||
results.append({
|
||||
"url": url,
|
||||
"title": title,
|
||||
"content": content,
|
||||
"raw_content": content,
|
||||
"metadata": {"sourceURL": url, "title": title},
|
||||
})
|
||||
|
||||
for error in response.errors or []:
|
||||
results.append({
|
||||
"url": error.url or "",
|
||||
"title": "",
|
||||
"content": "",
|
||||
"error": error.content or error.error_type or "extraction failed",
|
||||
"metadata": {"sourceURL": error.url or ""},
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def web_search_tool(query: str, limit: int = 5) -> str:
|
||||
"""
|
||||
Search the web for information using available search API backend.
|
||||
|
||||
|
||||
This function provides a generic interface for web search that can work
|
||||
with multiple backends. Currently uses Firecrawl.
|
||||
|
||||
with multiple backends (Parallel or Firecrawl).
|
||||
|
||||
Note: This function returns search result metadata only (URLs, titles, descriptions).
|
||||
Use web_extract_tool to get full content from specific URLs.
|
||||
|
||||
@@ -478,17 +628,28 @@ def web_search_tool(query: str, limit: int = 5) -> str:
|
||||
if is_interrupted():
|
||||
return json.dumps({"error": "Interrupted", "success": False})
|
||||
|
||||
# Dispatch to the configured backend
|
||||
backend = _get_backend()
|
||||
if backend == "parallel":
|
||||
response_data = _parallel_search(query, limit)
|
||||
debug_call_data["results_count"] = len(response_data.get("data", {}).get("web", []))
|
||||
result_json = json.dumps(response_data, indent=2, ensure_ascii=False)
|
||||
debug_call_data["final_response_size"] = len(result_json)
|
||||
_debug.log_call("web_search_tool", debug_call_data)
|
||||
_debug.save()
|
||||
return result_json
|
||||
|
||||
logger.info("Searching the web for: '%s' (limit: %d)", query, limit)
|
||||
|
||||
|
||||
response = _get_firecrawl_client().search(
|
||||
query=query,
|
||||
limit=limit
|
||||
)
|
||||
|
||||
|
||||
# The response is a SearchData object with web, news, and images attributes
|
||||
# When not scraping, the results are directly in these attributes
|
||||
web_results = []
|
||||
|
||||
|
||||
# Check if response has web attribute (SearchData object)
|
||||
if hasattr(response, 'web'):
|
||||
# Response is a SearchData object with web attribute
|
||||
@@ -596,123 +757,130 @@ async def web_extract_tool(
|
||||
|
||||
try:
|
||||
logger.info("Extracting content from %d URL(s)", len(urls))
|
||||
|
||||
# Determine requested formats for Firecrawl v2
|
||||
formats: List[str] = []
|
||||
if format == "markdown":
|
||||
formats = ["markdown"]
|
||||
elif format == "html":
|
||||
formats = ["html"]
|
||||
|
||||
# Dispatch to the configured backend
|
||||
backend = _get_backend()
|
||||
|
||||
if backend == "parallel":
|
||||
results = await _parallel_extract(urls)
|
||||
else:
|
||||
# Default: request markdown for LLM-readiness and include html as backup
|
||||
formats = ["markdown", "html"]
|
||||
|
||||
# Always use individual scraping for simplicity and reliability
|
||||
# Batch scraping adds complexity without much benefit for small numbers of URLs
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
from tools.interrupt import is_interrupted as _is_interrupted
|
||||
for url in urls:
|
||||
if _is_interrupted():
|
||||
results.append({"url": url, "error": "Interrupted", "title": ""})
|
||||
continue
|
||||
# ── Firecrawl extraction ──
|
||||
# Determine requested formats for Firecrawl v2
|
||||
formats: List[str] = []
|
||||
if format == "markdown":
|
||||
formats = ["markdown"]
|
||||
elif format == "html":
|
||||
formats = ["html"]
|
||||
else:
|
||||
# Default: request markdown for LLM-readiness and include html as backup
|
||||
formats = ["markdown", "html"]
|
||||
|
||||
# Website policy check — block before fetching
|
||||
blocked = check_website_access(url)
|
||||
if blocked:
|
||||
logger.info("Blocked web_extract for %s by rule %s", blocked["host"], blocked["rule"])
|
||||
results.append({
|
||||
"url": url, "title": "", "content": "",
|
||||
"error": blocked["message"],
|
||||
"blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]},
|
||||
})
|
||||
continue
|
||||
# Always use individual scraping for simplicity and reliability
|
||||
# Batch scraping adds complexity without much benefit for small numbers of URLs
|
||||
results: List[Dict[str, Any]] = []
|
||||
|
||||
try:
|
||||
logger.info("Scraping: %s", url)
|
||||
scrape_result = _get_firecrawl_client().scrape(
|
||||
url=url,
|
||||
formats=formats
|
||||
)
|
||||
|
||||
# Process the result - properly handle object serialization
|
||||
metadata = {}
|
||||
title = ""
|
||||
content_markdown = None
|
||||
content_html = None
|
||||
|
||||
# Extract data from the scrape result
|
||||
if hasattr(scrape_result, 'model_dump'):
|
||||
# Pydantic model - use model_dump to get dict
|
||||
result_dict = scrape_result.model_dump()
|
||||
content_markdown = result_dict.get('markdown')
|
||||
content_html = result_dict.get('html')
|
||||
metadata = result_dict.get('metadata', {})
|
||||
elif hasattr(scrape_result, '__dict__'):
|
||||
# Regular object with attributes
|
||||
content_markdown = getattr(scrape_result, 'markdown', None)
|
||||
content_html = getattr(scrape_result, 'html', None)
|
||||
|
||||
# Handle metadata - convert to dict if it's an object
|
||||
metadata_obj = getattr(scrape_result, 'metadata', {})
|
||||
if hasattr(metadata_obj, 'model_dump'):
|
||||
metadata = metadata_obj.model_dump()
|
||||
elif hasattr(metadata_obj, '__dict__'):
|
||||
metadata = metadata_obj.__dict__
|
||||
elif isinstance(metadata_obj, dict):
|
||||
metadata = metadata_obj
|
||||
else:
|
||||
metadata = {}
|
||||
elif isinstance(scrape_result, dict):
|
||||
# Already a dictionary
|
||||
content_markdown = scrape_result.get('markdown')
|
||||
content_html = scrape_result.get('html')
|
||||
metadata = scrape_result.get('metadata', {})
|
||||
|
||||
# Ensure metadata is a dict (not an object)
|
||||
if not isinstance(metadata, dict):
|
||||
if hasattr(metadata, 'model_dump'):
|
||||
metadata = metadata.model_dump()
|
||||
elif hasattr(metadata, '__dict__'):
|
||||
metadata = metadata.__dict__
|
||||
else:
|
||||
metadata = {}
|
||||
|
||||
# Get title from metadata
|
||||
title = metadata.get("title", "")
|
||||
|
||||
# Re-check final URL after redirect
|
||||
final_url = metadata.get("sourceURL", url)
|
||||
final_blocked = check_website_access(final_url)
|
||||
if final_blocked:
|
||||
logger.info("Blocked redirected web_extract for %s by rule %s", final_blocked["host"], final_blocked["rule"])
|
||||
from tools.interrupt import is_interrupted as _is_interrupted
|
||||
for url in urls:
|
||||
if _is_interrupted():
|
||||
results.append({"url": url, "error": "Interrupted", "title": ""})
|
||||
continue
|
||||
|
||||
# Website policy check — block before fetching
|
||||
blocked = check_website_access(url)
|
||||
if blocked:
|
||||
logger.info("Blocked web_extract for %s by rule %s", blocked["host"], blocked["rule"])
|
||||
results.append({
|
||||
"url": final_url, "title": title, "content": "", "raw_content": "",
|
||||
"error": final_blocked["message"],
|
||||
"blocked_by_policy": {"host": final_blocked["host"], "rule": final_blocked["rule"], "source": final_blocked["source"]},
|
||||
"url": url, "title": "", "content": "",
|
||||
"error": blocked["message"],
|
||||
"blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]},
|
||||
})
|
||||
continue
|
||||
|
||||
# Choose content based on requested format
|
||||
chosen_content = content_markdown if (format == "markdown" or (format is None and content_markdown)) else content_html or content_markdown or ""
|
||||
|
||||
results.append({
|
||||
"url": final_url,
|
||||
"title": title,
|
||||
"content": chosen_content,
|
||||
"raw_content": chosen_content,
|
||||
"metadata": metadata # Now guaranteed to be a dict
|
||||
})
|
||||
|
||||
except Exception as scrape_err:
|
||||
logger.debug("Scrape failed for %s: %s", url, scrape_err)
|
||||
results.append({
|
||||
"url": url,
|
||||
"title": "",
|
||||
"content": "",
|
||||
"raw_content": "",
|
||||
"error": str(scrape_err)
|
||||
})
|
||||
try:
|
||||
logger.info("Scraping: %s", url)
|
||||
scrape_result = _get_firecrawl_client().scrape(
|
||||
url=url,
|
||||
formats=formats
|
||||
)
|
||||
|
||||
# Process the result - properly handle object serialization
|
||||
metadata = {}
|
||||
title = ""
|
||||
content_markdown = None
|
||||
content_html = None
|
||||
|
||||
# Extract data from the scrape result
|
||||
if hasattr(scrape_result, 'model_dump'):
|
||||
# Pydantic model - use model_dump to get dict
|
||||
result_dict = scrape_result.model_dump()
|
||||
content_markdown = result_dict.get('markdown')
|
||||
content_html = result_dict.get('html')
|
||||
metadata = result_dict.get('metadata', {})
|
||||
elif hasattr(scrape_result, '__dict__'):
|
||||
# Regular object with attributes
|
||||
content_markdown = getattr(scrape_result, 'markdown', None)
|
||||
content_html = getattr(scrape_result, 'html', None)
|
||||
|
||||
# Handle metadata - convert to dict if it's an object
|
||||
metadata_obj = getattr(scrape_result, 'metadata', {})
|
||||
if hasattr(metadata_obj, 'model_dump'):
|
||||
metadata = metadata_obj.model_dump()
|
||||
elif hasattr(metadata_obj, '__dict__'):
|
||||
metadata = metadata_obj.__dict__
|
||||
elif isinstance(metadata_obj, dict):
|
||||
metadata = metadata_obj
|
||||
else:
|
||||
metadata = {}
|
||||
elif isinstance(scrape_result, dict):
|
||||
# Already a dictionary
|
||||
content_markdown = scrape_result.get('markdown')
|
||||
content_html = scrape_result.get('html')
|
||||
metadata = scrape_result.get('metadata', {})
|
||||
|
||||
# Ensure metadata is a dict (not an object)
|
||||
if not isinstance(metadata, dict):
|
||||
if hasattr(metadata, 'model_dump'):
|
||||
metadata = metadata.model_dump()
|
||||
elif hasattr(metadata, '__dict__'):
|
||||
metadata = metadata.__dict__
|
||||
else:
|
||||
metadata = {}
|
||||
|
||||
# Get title from metadata
|
||||
title = metadata.get("title", "")
|
||||
|
||||
# Re-check final URL after redirect
|
||||
final_url = metadata.get("sourceURL", url)
|
||||
final_blocked = check_website_access(final_url)
|
||||
if final_blocked:
|
||||
logger.info("Blocked redirected web_extract for %s by rule %s", final_blocked["host"], final_blocked["rule"])
|
||||
results.append({
|
||||
"url": final_url, "title": title, "content": "", "raw_content": "",
|
||||
"error": final_blocked["message"],
|
||||
"blocked_by_policy": {"host": final_blocked["host"], "rule": final_blocked["rule"], "source": final_blocked["source"]},
|
||||
})
|
||||
continue
|
||||
|
||||
# Choose content based on requested format
|
||||
chosen_content = content_markdown if (format == "markdown" or (format is None and content_markdown)) else content_html or content_markdown or ""
|
||||
|
||||
results.append({
|
||||
"url": final_url,
|
||||
"title": title,
|
||||
"content": chosen_content,
|
||||
"raw_content": chosen_content,
|
||||
"metadata": metadata # Now guaranteed to be a dict
|
||||
})
|
||||
|
||||
except Exception as scrape_err:
|
||||
logger.debug("Scrape failed for %s: %s", url, scrape_err)
|
||||
results.append({
|
||||
"url": url,
|
||||
"title": "",
|
||||
"content": "",
|
||||
"raw_content": "",
|
||||
"error": str(scrape_err)
|
||||
})
|
||||
|
||||
response = {"results": results}
|
||||
|
||||
@@ -887,6 +1055,14 @@ async def web_crawl_tool(
|
||||
}
|
||||
|
||||
try:
|
||||
# web_crawl requires Firecrawl — Parallel has no crawl API
|
||||
if not (os.getenv("FIRECRAWL_API_KEY") or os.getenv("FIRECRAWL_API_URL")):
|
||||
return json.dumps({
|
||||
"error": "web_crawl requires Firecrawl. Set FIRECRAWL_API_KEY, "
|
||||
"or use web_search + web_extract instead.",
|
||||
"success": False,
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Ensure URL has protocol
|
||||
if not url.startswith(('http://', 'https://')):
|
||||
url = f'https://{url}'
|
||||
@@ -1151,13 +1327,22 @@ async def web_crawl_tool(
|
||||
def check_firecrawl_api_key() -> bool:
|
||||
"""
|
||||
Check if the Firecrawl API key is available in environment variables.
|
||||
|
||||
|
||||
Returns:
|
||||
bool: True if API key is set, False otherwise
|
||||
"""
|
||||
return bool(os.getenv("FIRECRAWL_API_KEY"))
|
||||
|
||||
|
||||
def check_web_api_key() -> bool:
|
||||
"""Check if any web backend API key is available (Parallel or Firecrawl)."""
|
||||
return bool(
|
||||
os.getenv("PARALLEL_API_KEY")
|
||||
or os.getenv("FIRECRAWL_API_KEY")
|
||||
or os.getenv("FIRECRAWL_API_URL")
|
||||
)
|
||||
|
||||
|
||||
def check_auxiliary_model() -> bool:
|
||||
"""Check if an auxiliary text model is available for LLM content processing."""
|
||||
try:
|
||||
@@ -1184,26 +1369,30 @@ if __name__ == "__main__":
|
||||
print("=" * 40)
|
||||
|
||||
# Check if API keys are available
|
||||
firecrawl_available = check_firecrawl_api_key()
|
||||
web_available = check_web_api_key()
|
||||
nous_available = check_auxiliary_model()
|
||||
|
||||
if not firecrawl_available:
|
||||
print("❌ FIRECRAWL_API_KEY environment variable not set")
|
||||
print("Please set your API key: export FIRECRAWL_API_KEY='your-key-here'")
|
||||
print("Get API key at: https://firecrawl.dev/")
|
||||
|
||||
if web_available:
|
||||
backend = _get_backend()
|
||||
print(f"✅ Web backend: {backend}")
|
||||
if backend == "parallel":
|
||||
print(" Using Parallel API (https://parallel.ai)")
|
||||
else:
|
||||
print(" Using Firecrawl API (https://firecrawl.dev)")
|
||||
else:
|
||||
print("✅ Firecrawl API key found")
|
||||
|
||||
print("❌ No web search backend configured")
|
||||
print("Set PARALLEL_API_KEY (https://parallel.ai) or FIRECRAWL_API_KEY (https://firecrawl.dev)")
|
||||
|
||||
if not nous_available:
|
||||
print("❌ No auxiliary model available for LLM content processing")
|
||||
print("Set OPENROUTER_API_KEY, configure Nous Portal, or set OPENAI_BASE_URL + OPENAI_API_KEY")
|
||||
print("⚠️ Without an auxiliary model, LLM content processing will be disabled")
|
||||
else:
|
||||
print(f"✅ Auxiliary model available: {DEFAULT_SUMMARIZER_MODEL}")
|
||||
|
||||
if not firecrawl_available:
|
||||
|
||||
if not web_available:
|
||||
exit(1)
|
||||
|
||||
|
||||
print("🛠️ Web tools ready for use!")
|
||||
|
||||
if nous_available:
|
||||
@@ -1301,8 +1490,8 @@ registry.register(
|
||||
toolset="web",
|
||||
schema=WEB_SEARCH_SCHEMA,
|
||||
handler=lambda args, **kw: web_search_tool(args.get("query", ""), limit=5),
|
||||
check_fn=check_firecrawl_api_key,
|
||||
requires_env=["FIRECRAWL_API_KEY"],
|
||||
check_fn=check_web_api_key,
|
||||
requires_env=["PARALLEL_API_KEY", "FIRECRAWL_API_KEY"],
|
||||
emoji="🔍",
|
||||
)
|
||||
registry.register(
|
||||
@@ -1311,8 +1500,8 @@ registry.register(
|
||||
schema=WEB_EXTRACT_SCHEMA,
|
||||
handler=lambda args, **kw: web_extract_tool(
|
||||
args.get("urls", [])[:5] if isinstance(args.get("urls"), list) else [], "markdown"),
|
||||
check_fn=check_firecrawl_api_key,
|
||||
requires_env=["FIRECRAWL_API_KEY"],
|
||||
check_fn=check_web_api_key,
|
||||
requires_env=["PARALLEL_API_KEY", "FIRECRAWL_API_KEY"],
|
||||
is_async=True,
|
||||
emoji="📄",
|
||||
)
|
||||
|
||||
@@ -130,6 +130,12 @@ TOOLSETS = {
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"messaging": {
|
||||
"description": "Cross-platform messaging: send messages to Telegram, Discord, Slack, SMS, etc.",
|
||||
"tools": ["send_message"],
|
||||
"includes": []
|
||||
},
|
||||
|
||||
"rl": {
|
||||
"description": "RL training tools for running reinforcement learning on Tinker-Atropos",
|
||||
"tools": [
|
||||
|
||||
@@ -61,6 +61,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `PARALLEL_API_KEY` | AI-native web search ([parallel.ai](https://parallel.ai/)) |
|
||||
| `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) |
|
||||
| `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) |
|
||||
| `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) |
|
||||
|
||||
Reference in New Issue
Block a user