Compare commits

...

3 Commits

Author SHA1 Message Date
Teknium
67bf763d20 feat(browser): add Camofox local anti-detection browser backend
Camofox-browser is a self-hosted Node.js server wrapping Camoufox
(Firefox fork with C++ fingerprint spoofing). When CAMOFOX_URL is set,
all 11 browser tools route through the Camofox REST API instead of
the agent-browser CLI.

Maps 1:1 to the existing browser tool interface:
- Navigate, snapshot, click, type, scroll, back, press, close
- Get images, vision (screenshot + LLM analysis)
- Console (returns empty with note — camofox limitation)

Setup: npm start in camofox-browser dir, or docker run -p 9377:9377
Then: CAMOFOX_URL=http://localhost:9377 in ~/.hermes/.env

Advantages over Browserbase (cloud):
- Free (no per-session API costs)
- Local (zero network latency for browser ops)
- Anti-detection at C++ level (bypasses Cloudflare/Google bot detection)
- Works offline, Docker-ready

Files:
- tools/browser_camofox.py: Full REST backend (~400 lines)
- tools/browser_tool.py: Routing at each tool function
- hermes_cli/config.py: CAMOFOX_URL env var entry
- tests/tools/test_browser_camofox.py: 20 tests
2026-03-30 13:10:32 -07:00
Teknium
9fd78c7a8e fix: use SKILLS_DIR not repo path for Telegram menu skill filter (#4005)
Skills are synced to ~/.hermes/skills/ (SKILLS_DIR), not the repo's
skills/ directory. The previous filter compared against the repo path
so no skills matched. Now checks SKILLS_DIR and excludes .hub/
subdirectory (user-installed hub skills).
2026-03-30 11:01:13 -07:00
Teknium
5ceed021dc feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap (#3934)
* feat(gateway): skill-aware slash commands, paginated /commands, Telegram 100-cap

Map active skills to Telegram's slash command menu so users can
discover and invoke skills directly. Three changes:

1. Telegram menu now includes active skill commands alongside built-in
   commands, capped at 100 entries (Telegram Bot API limit). Overflow
   commands remain callable but hidden from the picker. Logged at
   startup when cap is hit.

2. New /commands [page] gateway command for paginated browsing of all
   commands + skills. /help now shows first 10 skill commands and
   points to /commands for the full list.

3. When a user types a slash command that matches a disabled or
   uninstalled skill, they get actionable guidance:
   - Disabled: 'Enable it with: hermes skills config'
   - Optional (not installed): 'Install with: hermes skills install official/<path>'

Built on ideas from PR #3921 by @kshitijk4poor.

* chore: move 21 niche skills to optional-skills

Move specialized/niche skills from built-in (skills/) to optional
(optional-skills/) to reduce the default skill count. Users can
install them with: hermes skills install official/<category>/<name>

Moved skills (21):
- mlops: accelerate, chroma, faiss, flash-attention,
  hermes-atropos-environments, huggingface-tokenizers, instructor,
  lambda-labs, llava, nemo-curator, pinecone, pytorch-lightning,
  qdrant, saelens, simpo, slime, tensorrt-llm, torchtitan
- research: domain-intel, duckduckgo-search
- devops: inference-sh cli

Built-in skills: 96 → 75
Optional skills: 22 → 43

* fix: only include repo built-in skills in Telegram menu, not user-installed

User-installed skills (from hub or manually added) stay accessible via
/skills and by typing the command directly, but don't get registered
in the Telegram slash command picker. Only skills whose SKILL.md is
under the repo's skills/ directory are included in the menu.

This keeps the Telegram menu focused on the curated built-in set while
user-installed skills remain discoverable through /skills and /commands.
2026-03-30 10:57:30 -07:00
80 changed files with 1056 additions and 7 deletions

View File

@@ -622,10 +622,16 @@ class TelegramAdapter(BasePlatformAdapter):
# gateway command there automatically adds it to the Telegram menu.
try:
from telegram import BotCommand
from hermes_cli.commands import telegram_bot_commands
from hermes_cli.commands import telegram_menu_commands
menu_commands, hidden_count = telegram_menu_commands(max_commands=100)
await self._bot.set_my_commands([
BotCommand(name, desc) for name, desc in telegram_bot_commands()
BotCommand(name, desc) for name, desc in menu_commands
])
if hidden_count:
logger.info(
"[%s] Telegram menu: %d commands registered, %d hidden (over 100 limit). Use /commands for full list.",
self.name, len(menu_commands), hidden_count,
)
except Exception as e:
logger.warning(
"[%s] Could not register Telegram command menu: %s",

View File

@@ -301,6 +301,50 @@ def _resolve_runtime_agent_kwargs() -> dict:
}
def _check_unavailable_skill(command_name: str) -> str | None:
"""Check if a command matches a known-but-inactive skill.
Returns a helpful message if the skill exists but is disabled or only
available as an optional install. Returns None if no match found.
"""
# Normalize: command uses hyphens, skill names may use hyphens or underscores
normalized = command_name.lower().replace("_", "-")
try:
from tools.skills_tool import SKILLS_DIR, _get_disabled_skill_names
disabled = _get_disabled_skill_names()
# Check disabled built-in skills
for skill_md in SKILLS_DIR.rglob("SKILL.md"):
if any(part in ('.git', '.github', '.hub') for part in skill_md.parts):
continue
name = skill_md.parent.name.lower().replace("_", "-")
if name == normalized and name in disabled:
return (
f"The **{command_name}** skill is installed but disabled.\n"
f"Enable it with: `hermes skills config`"
)
# Check optional skills (shipped with repo but not installed)
from hermes_constants import get_hermes_home
repo_root = Path(__file__).resolve().parent.parent
optional_dir = repo_root / "optional-skills"
if optional_dir.exists():
for skill_md in optional_dir.rglob("SKILL.md"):
name = skill_md.parent.name.lower().replace("_", "-")
if name == normalized:
# Build install path: official/<category>/<name>
rel = skill_md.parent.relative_to(optional_dir)
parts = list(rel.parts)
install_path = f"official/{'/'.join(parts)}"
return (
f"The **{command_name}** skill is available but not installed.\n"
f"Install it with: `hermes skills install {install_path}`"
)
except Exception:
pass
return None
def _platform_config_key(platform: "Platform") -> str:
"""Map a Platform enum to its config.yaml key (LOCAL→"cli", rest→enum value)."""
return "cli" if platform == Platform.LOCAL else platform.value
@@ -1817,6 +1861,9 @@ class GatewayRunner:
if canonical == "help":
return await self._handle_help_command(event)
if canonical == "commands":
return await self._handle_commands_command(event)
if canonical == "status":
return await self._handle_status_command(event)
@@ -1974,6 +2021,12 @@ class GatewayRunner:
if msg:
event.text = msg
# Fall through to normal message processing with skill content
else:
# Not an active skill — check if it's a known-but-disabled or
# uninstalled skill and give actionable guidance.
_unavail_msg = _check_unavailable_skill(command)
if _unavail_msg:
return _unavail_msg
except Exception as e:
logger.debug("Skill command check failed (non-fatal): %s", e)
@@ -3065,12 +3118,69 @@ class GatewayRunner:
from agent.skill_commands import get_skill_commands
skill_cmds = get_skill_commands()
if skill_cmds:
lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} installed):")
for cmd in sorted(skill_cmds):
lines.append(f"\n⚡ **Skill Commands** ({len(skill_cmds)} active):")
# Show first 10, then point to /commands for the rest
sorted_cmds = sorted(skill_cmds)
for cmd in sorted_cmds[:10]:
lines.append(f"`{cmd}` — {skill_cmds[cmd]['description']}")
if len(sorted_cmds) > 10:
lines.append(f"\n... and {len(sorted_cmds) - 10} more. Use `/commands` for the full paginated list.")
except Exception:
pass
return "\n".join(lines)
async def _handle_commands_command(self, event: MessageEvent) -> str:
"""Handle /commands [page] - paginated list of all commands and skills."""
from hermes_cli.commands import gateway_help_lines
raw_args = event.get_command_args().strip()
if raw_args:
try:
requested_page = int(raw_args)
except ValueError:
return "Usage: `/commands [page]`"
else:
requested_page = 1
# Build combined entry list: built-in commands + skill commands
entries = list(gateway_help_lines())
try:
from agent.skill_commands import get_skill_commands
skill_cmds = get_skill_commands()
if skill_cmds:
entries.append("")
entries.append("⚡ **Skill Commands**:")
for cmd in sorted(skill_cmds):
desc = skill_cmds[cmd].get("description", "").strip() or "Skill command"
entries.append(f"`{cmd}` — {desc}")
except Exception:
pass
if not entries:
return "No commands available."
from gateway.config import Platform
page_size = 15 if event.source.platform == Platform.TELEGRAM else 20
total_pages = max(1, (len(entries) + page_size - 1) // page_size)
page = max(1, min(requested_page, total_pages))
start = (page - 1) * page_size
page_entries = entries[start:start + page_size]
lines = [
f"📚 **Commands** ({len(entries)} total, page {page}/{total_pages})",
"",
*page_entries,
]
if total_pages > 1:
nav_parts = []
if page > 1:
nav_parts.append(f"`/commands {page - 1}` ← prev")
if page < total_pages:
nav_parts.append(f"next → `/commands {page + 1}`")
lines.extend(["", " | ".join(nav_parts)])
if page != requested_page:
lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_")
return "\n".join(lines)
async def _handle_provider_command(self, event: MessageEvent) -> str:
"""Handle /provider command - show available providers."""

View File

@@ -118,6 +118,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
"Tools & Skills", cli_only=True),
# Info
CommandDef("commands", "Browse all commands and skills (paginated)", "Info",
gateway_only=True, args_hint="[page]"),
CommandDef("help", "Show available commands", "Info"),
CommandDef("usage", "Show token usage for the current session", "Info"),
CommandDef("insights", "Show usage insights and analytics", "Info",
@@ -361,6 +363,51 @@ def telegram_bot_commands() -> list[tuple[str, str]]:
return result
def telegram_menu_commands(max_commands: int = 100) -> tuple[list[tuple[str, str]], int]:
"""Return Telegram menu commands (built-in + active skills), capped to the Bot API limit.
Built-in commands come first, then active skill commands. Commands beyond
``max_commands`` remain callable in the gateway; they are just omitted from
Telegram's native slash-command picker.
Returns:
(menu_commands, hidden_count) where hidden_count is the number of
commands omitted due to the cap.
"""
all_commands = list(telegram_bot_commands())
# Append active BUILT-IN skill commands only (not user-installed hub skills).
# User-installed skills stay accessible via /skills and by typing the command
# directly, but don't clutter the Telegram menu.
try:
from agent.skill_commands import get_skill_commands
from tools.skills_tool import SKILLS_DIR
# Built-in skills are synced to SKILLS_DIR (~/.hermes/skills/).
# Hub-installed skills go into SKILLS_DIR/.hub/. Exclude .hub/ skills
# from the menu — they're user-installed, not repo built-in.
_skills_dir = str(SKILLS_DIR.resolve())
_hub_dir = str((SKILLS_DIR / ".hub").resolve())
skill_cmds = get_skill_commands()
for cmd_key in sorted(skill_cmds):
info = skill_cmds[cmd_key]
skill_path = info.get("skill_md_path", "")
if not skill_path.startswith(_skills_dir):
continue
if skill_path.startswith(_hub_dir):
continue # hub-installed, not built-in
name = cmd_key.lstrip("/").replace("-", "_")
desc = info.get("description", "")
# Telegram descriptions max 256 chars
if len(desc) > 256:
desc = desc[:253] + "..."
all_commands.append((name, desc))
except Exception:
pass
hidden_count = max(0, len(all_commands) - max_commands)
return all_commands[:max_commands], hidden_count
def slack_subcommand_map() -> dict[str, str]:
"""Return subcommand -> /command mapping for Slack /hermes handler.

View File

@@ -706,6 +706,14 @@ OPTIONAL_ENV_VARS = {
"password": True,
"category": "tool",
},
"CAMOFOX_URL": {
"description": "Camofox browser server URL for local anti-detection browsing (e.g. http://localhost:9377)",
"prompt": "Camofox server URL",
"url": "https://github.com/jo-inc/camofox-browser",
"tools": ["browser_navigate", "browser_click"],
"password": False,
"category": "tool",
},
"FAL_KEY": {
"description": "FAL API key for image generation",
"prompt": "FAL API key",

View File

@@ -601,13 +601,15 @@ def _print_setup_summary(config: dict, hermes_home):
Path(__file__).parent.parent / "node_modules" / ".bin" / "agent-browser"
).exists()
)
if get_env_value("BROWSERBASE_API_KEY"):
if get_env_value("CAMOFOX_URL"):
tool_status.append(("Browser Automation (Camofox)", True, None))
elif get_env_value("BROWSERBASE_API_KEY"):
tool_status.append(("Browser Automation (Browserbase)", True, None))
elif _ab_found:
tool_status.append(("Browser Automation (local)", True, None))
else:
tool_status.append(
("Browser Automation", False, "npm install -g agent-browser")
("Browser Automation", False, "npm install -g agent-browser or set CAMOFOX_URL")
)
# FAL (image generation)

View File

@@ -273,6 +273,16 @@ TOOL_CATEGORIES = {
"browser_provider": "browser-use",
"post_setup": "browserbase",
},
{
"name": "Camofox",
"tag": "Local anti-detection browser (Firefox/Camoufox)",
"env_vars": [
{"key": "CAMOFOX_URL", "prompt": "Camofox server URL", "default": "http://localhost:9377",
"url": "https://github.com/jo-inc/camofox-browser"},
],
"browser_provider": "camofox",
"post_setup": "camofox",
},
],
},
"homeassistant": {
@@ -337,6 +347,28 @@ def _run_post_setup(post_setup_key: str):
elif not node_modules.exists():
_print_warning(" Node.js not found - browser tools require: npm install (in hermes-agent directory)")
elif post_setup_key == "camofox":
camofox_dir = PROJECT_ROOT / "node_modules" / "@askjo" / "camoufox-browser"
if not camofox_dir.exists() and shutil.which("npm"):
_print_info(" Installing Camofox browser server...")
import subprocess
result = subprocess.run(
["npm", "install", "--silent"],
capture_output=True, text=True, cwd=str(PROJECT_ROOT)
)
if result.returncode == 0:
_print_success(" Camofox installed")
else:
_print_warning(" npm install failed - run manually: npm install")
if camofox_dir.exists():
_print_info(" Start the Camofox server:")
_print_info(" npx @askjo/camoufox-browser")
_print_info(" First run downloads the Camoufox engine (~300MB)")
_print_info(" Or use Docker: docker run -p 9377:9377 jo-inc/camofox-browser")
elif not shutil.which("npm"):
_print_warning(" Node.js not found. Install Camofox via Docker:")
_print_info(" docker run -p 9377:9377 jo-inc/camofox-browser")
elif post_setup_key == "rl_training":
try:
__import__("tinker_atropos")

View File

@@ -16,7 +16,8 @@
},
"homepage": "https://github.com/NousResearch/Hermes-Agent#readme",
"dependencies": {
"agent-browser": "^0.13.0"
"agent-browser": "^0.13.0",
"@askjo/camoufox-browser": "^1.0.0"
},
"engines": {
"node": ">=18.0.0"

View File

@@ -0,0 +1,290 @@
"""Tests for the Camofox browser backend."""
import json
import os
from unittest.mock import MagicMock, patch
import pytest
from tools.browser_camofox import (
camofox_back,
camofox_click,
camofox_close,
camofox_console,
camofox_get_images,
camofox_navigate,
camofox_press,
camofox_scroll,
camofox_snapshot,
camofox_type,
camofox_vision,
check_camofox_available,
cleanup_all_camofox_sessions,
is_camofox_mode,
)
# ---------------------------------------------------------------------------
# Configuration detection
# ---------------------------------------------------------------------------
class TestCamofoxMode:
def test_disabled_by_default(self, monkeypatch):
monkeypatch.delenv("CAMOFOX_URL", raising=False)
assert is_camofox_mode() is False
def test_enabled_when_url_set(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
assert is_camofox_mode() is True
def test_health_check_unreachable(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999")
assert check_camofox_available() is False
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _mock_response(status=200, json_data=None):
resp = MagicMock()
resp.status_code = status
resp.json.return_value = json_data or {}
resp.content = b"\x89PNG\r\n\x1a\nfake"
resp.raise_for_status = MagicMock()
return resp
# ---------------------------------------------------------------------------
# Navigate
# ---------------------------------------------------------------------------
class TestCamofoxNavigate:
@patch("tools.browser_camofox.requests.post")
def test_creates_tab_on_first_navigate(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab1", "url": "https://example.com"})
result = json.loads(camofox_navigate("https://example.com", task_id="t1"))
assert result["success"] is True
assert result["url"] == "https://example.com"
@patch("tools.browser_camofox.requests.post")
def test_navigates_existing_tab(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
# First call creates tab
mock_post.return_value = _mock_response(json_data={"tabId": "tab2", "url": "https://a.com"})
camofox_navigate("https://a.com", task_id="t2")
# Second call navigates
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://b.com"})
result = json.loads(camofox_navigate("https://b.com", task_id="t2"))
assert result["success"] is True
assert result["url"] == "https://b.com"
def test_connection_error_returns_helpful_message(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:19999")
result = json.loads(camofox_navigate("https://example.com", task_id="t_err"))
assert result["success"] is False
assert "Cannot connect" in result["error"]
# ---------------------------------------------------------------------------
# Snapshot
# ---------------------------------------------------------------------------
class TestCamofoxSnapshot:
def test_no_session_returns_error(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_snapshot(task_id="no_such_task"))
assert result["success"] is False
assert "browser_navigate" in result["error"]
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.get")
def test_returns_snapshot(self, mock_get, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
# Create session
mock_post.return_value = _mock_response(json_data={"tabId": "tab3", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t3")
# Return snapshot
mock_get.return_value = _mock_response(json_data={
"snapshot": "- heading \"Test\" [e1]\n- button \"Submit\" [e2]",
"refsCount": 2,
})
result = json.loads(camofox_snapshot(task_id="t3"))
assert result["success"] is True
assert "[e1]" in result["snapshot"]
assert result["element_count"] == 2
# ---------------------------------------------------------------------------
# Click / Type / Scroll / Back / Press
# ---------------------------------------------------------------------------
class TestCamofoxInteractions:
@patch("tools.browser_camofox.requests.post")
def test_click(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab4", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t4")
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://x.com"})
result = json.loads(camofox_click("@e5", task_id="t4"))
assert result["success"] is True
assert result["clicked"] == "e5"
@patch("tools.browser_camofox.requests.post")
def test_type(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab5", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t5")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_type("@e3", "hello world", task_id="t5"))
assert result["success"] is True
assert result["typed"] == "hello world"
@patch("tools.browser_camofox.requests.post")
def test_scroll(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab6", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t6")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_scroll("down", task_id="t6"))
assert result["success"] is True
assert result["scrolled"] == "down"
@patch("tools.browser_camofox.requests.post")
def test_back(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab7", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t7")
mock_post.return_value = _mock_response(json_data={"ok": True, "url": "https://prev.com"})
result = json.loads(camofox_back(task_id="t7"))
assert result["success"] is True
@patch("tools.browser_camofox.requests.post")
def test_press(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab8", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t8")
mock_post.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_press("Enter", task_id="t8"))
assert result["success"] is True
assert result["pressed"] == "Enter"
# ---------------------------------------------------------------------------
# Close
# ---------------------------------------------------------------------------
class TestCamofoxClose:
@patch("tools.browser_camofox.requests.delete")
@patch("tools.browser_camofox.requests.post")
def test_close_session(self, mock_post, mock_delete, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab9", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t9")
mock_delete.return_value = _mock_response(json_data={"ok": True})
result = json.loads(camofox_close(task_id="t9"))
assert result["success"] is True
assert result["closed"] is True
def test_close_nonexistent_session(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_close(task_id="nonexistent"))
assert result["success"] is True
# ---------------------------------------------------------------------------
# Console (limited support)
# ---------------------------------------------------------------------------
class TestCamofoxConsole:
def test_console_returns_empty_with_note(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
result = json.loads(camofox_console(task_id="t_console"))
assert result["success"] is True
assert result["total_messages"] == 0
assert "not available" in result["note"]
# ---------------------------------------------------------------------------
# Images
# ---------------------------------------------------------------------------
class TestCamofoxGetImages:
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.get")
def test_get_images(self, mock_get, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab10", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t10")
mock_get.return_value = _mock_response(json_data={
"images": [{"src": "https://x.com/img.png", "alt": "Logo"}],
})
result = json.loads(camofox_get_images(task_id="t10"))
assert result["success"] is True
assert result["count"] == 1
assert result["images"][0]["src"] == "https://x.com/img.png"
# ---------------------------------------------------------------------------
# Routing integration — verify browser_tool routes to camofox
# ---------------------------------------------------------------------------
class TestBrowserToolRouting:
"""Verify that browser_tool.py delegates to camofox when CAMOFOX_URL is set."""
@patch("tools.browser_camofox.requests.post")
def test_browser_navigate_routes_to_camofox(self, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab_rt", "url": "https://example.com"})
from tools.browser_tool import browser_navigate
# Bypass SSRF check for test URL
with patch("tools.browser_tool._is_safe_url", return_value=True):
result = json.loads(browser_navigate("https://example.com", task_id="t_route"))
assert result["success"] is True
def test_check_requirements_passes_with_camofox(self, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
from tools.browser_tool import check_browser_requirements
assert check_browser_requirements() is True
# ---------------------------------------------------------------------------
# Cleanup helper
# ---------------------------------------------------------------------------
class TestCamofoxCleanup:
@patch("tools.browser_camofox.requests.post")
@patch("tools.browser_camofox.requests.delete")
def test_cleanup_all(self, mock_delete, mock_post, monkeypatch):
monkeypatch.setenv("CAMOFOX_URL", "http://localhost:9377")
mock_post.return_value = _mock_response(json_data={"tabId": "tab_c", "url": "https://x.com"})
camofox_navigate("https://x.com", task_id="t_cleanup")
mock_delete.return_value = _mock_response(json_data={"ok": True})
cleanup_all_camofox_sessions()
# Session should be gone
result = json.loads(camofox_snapshot(task_id="t_cleanup"))
assert result["success"] is False

496
tools/browser_camofox.py Normal file
View File

@@ -0,0 +1,496 @@
"""Camofox browser backend — local anti-detection browser via REST API.
Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox
fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1
to our browser tool interface: accessibility snapshots with element refs,
click/type/scroll by ref, screenshots, etc.
When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser
tools route through this module instead of the ``agent-browser`` CLI.
Setup::
# Option 1: npm
git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser
npm install && npm start # downloads Camoufox (~300MB) on first run
# Option 2: Docker
docker run -p 9377:9377 jo-inc/camofox-browser
Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
"""
from __future__ import annotations
import base64
import json
import logging
import os
import threading
import time
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
import requests
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
def get_camofox_url() -> str:
"""Return the configured Camofox server URL, or empty string."""
return os.getenv("CAMOFOX_URL", "").rstrip("/")
def is_camofox_mode() -> bool:
"""True when Camofox backend is configured."""
return bool(get_camofox_url())
def check_camofox_available() -> bool:
"""Verify the Camofox server is reachable."""
url = get_camofox_url()
if not url:
return False
try:
resp = requests.get(f"{url}/health", timeout=5)
return resp.status_code == 200
except Exception:
return False
# ---------------------------------------------------------------------------
# Session management
# ---------------------------------------------------------------------------
# Maps task_id -> {"user_id": str, "tab_id": str|None}
_sessions: Dict[str, Dict[str, Any]] = {}
_sessions_lock = threading.Lock()
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
"""Get or create a camofox session for the given task."""
task_id = task_id or "default"
with _sessions_lock:
if task_id in _sessions:
return _sessions[task_id]
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
}
_sessions[task_id] = session
return session
def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]:
"""Ensure a tab exists for the session, creating one if needed."""
session = _get_session(task_id)
if session["tab_id"]:
return session
base = get_camofox_url()
resp = requests.post(
f"{base}/tabs",
json={
"userId": session["user_id"],
"sessionKey": session["session_key"],
"url": url,
},
timeout=_DEFAULT_TIMEOUT,
)
resp.raise_for_status()
data = resp.json()
session["tab_id"] = data.get("tabId")
return session
def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]:
"""Remove and return session info."""
task_id = task_id or "default"
with _sessions_lock:
return _sessions.pop(task_id, None)
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------
def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""POST JSON to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.post(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""GET from camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response:
"""GET from camofox and return raw response (for binary data)."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp
def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""DELETE to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.delete(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------
def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
"""Navigate to a URL via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
# Create tab with the target URL directly
session = _ensure_tab(task_id, url)
data = {"ok": True, "url": url}
else:
# Navigate existing tab
data = _post(
f"/tabs/{session['tab_id']}/navigate",
{"userId": session["user_id"], "url": url},
timeout=60,
)
return json.dumps({
"success": True,
"url": data.get("url", url),
"title": data.get("title", ""),
})
except requests.HTTPError as e:
return json.dumps({"success": False, "error": f"Navigation failed: {e}"})
except requests.ConnectionError:
return json.dumps({
"success": False,
"error": f"Cannot connect to Camofox at {get_camofox_url()}. "
"Is the server running? Start with: npm start (in camofox-browser dir) "
"or: docker run -p 9377:9377 jo-inc/camofox-browser",
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
user_task: Optional[str] = None) -> str:
"""Get accessibility tree snapshot from Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
refs_count = data.get("refsCount", 0)
# Apply same summarization logic as the main browser tool
from tools.browser_tool import (
SNAPSHOT_SUMMARIZE_THRESHOLD,
_extract_relevant_content,
_truncate_snapshot,
)
if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD:
if user_task:
snapshot = _extract_relevant_content(snapshot, user_task)
else:
snapshot = _truncate_snapshot(snapshot)
return json.dumps({
"success": True,
"snapshot": snapshot,
"element_count": refs_count,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
"""Click an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
# Strip @ prefix if present (our tool convention)
clean_ref = ref.lstrip("@")
data = _post(
f"/tabs/{session['tab_id']}/click",
{"userId": session["user_id"], "ref": clean_ref},
)
return json.dumps({
"success": True,
"clicked": clean_ref,
"url": data.get("url", ""),
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
"""Type text into an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
clean_ref = ref.lstrip("@")
_post(
f"/tabs/{session['tab_id']}/type",
{"userId": session["user_id"], "ref": clean_ref, "text": text},
)
return json.dumps({
"success": True,
"typed": text,
"element": clean_ref,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
"""Scroll the page via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
_post(
f"/tabs/{session['tab_id']}/scroll",
{"userId": session["user_id"], "direction": direction},
)
return json.dumps({"success": True, "scrolled": direction})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_back(task_id: Optional[str] = None) -> str:
"""Navigate back via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
data = _post(
f"/tabs/{session['tab_id']}/back",
{"userId": session["user_id"]},
)
return json.dumps({"success": True, "url": data.get("url", "")})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_press(key: str, task_id: Optional[str] = None) -> str:
"""Press a keyboard key via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
_post(
f"/tabs/{session['tab_id']}/press",
{"userId": session["user_id"], "key": key},
)
return json.dumps({"success": True, "pressed": key})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_close(task_id: Optional[str] = None) -> str:
"""Close the browser session via Camofox."""
try:
session = _drop_session(task_id)
if not session:
return json.dumps({"success": True, "closed": True})
_delete(
f"/sessions/{session['user_id']}",
)
return json.dumps({"success": True, "closed": True})
except Exception as e:
return json.dumps({"success": True, "closed": True, "warning": str(e)})
def camofox_get_images(task_id: Optional[str] = None) -> str:
"""Get images on the current page via Camofox.
Extracts image information from the accessibility tree snapshot,
since Camofox does not expose a dedicated /images endpoint.
"""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
import re
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
# Parse img elements from the accessibility tree.
# Format: img "alt text" or img "alt text" [eN]
# URLs appear on /url: lines following img entries
images = []
lines = snapshot.split("\n")
for i, line in enumerate(lines):
stripped = line.strip()
if stripped.startswith("- img ") or stripped.startswith("img "):
alt_match = re.search(r'img\s+"([^"]*)"', stripped)
alt = alt_match.group(1) if alt_match else ""
# Look for URL on the next line
src = ""
if i + 1 < len(lines):
url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip())
if url_match:
src = url_match.group(1)
if alt or src:
images.append({"src": src, "alt": alt})
return json.dumps({
"success": True,
"images": images,
"count": len(images),
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_vision(question: str, annotate: bool = False,
task_id: Optional[str] = None) -> str:
"""Take a screenshot and analyze it with vision AI via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return json.dumps({"success": False, "error": "No browser session. Call browser_navigate first."})
# Get screenshot as binary PNG
resp = _get_raw(
f"/tabs/{session['tab_id']}/screenshot",
params={"userId": session["user_id"]},
)
# Save screenshot to cache
from hermes_constants import get_hermes_home
screenshots_dir = get_hermes_home() / "browser_screenshots"
screenshots_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png")
with open(screenshot_path, "wb") as f:
f.write(resp.content)
# Encode for vision LLM
img_b64 = base64.b64encode(resp.content).decode("utf-8")
# Also get annotated snapshot if requested
annotation_context = ""
if annotate:
try:
snap_data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}"
except Exception:
pass
# Send to vision LLM
from agent.auxiliary_client import call_llm
vision_prompt = (
f"Analyze this browser screenshot and answer: {question}"
f"{annotation_context}"
)
try:
from hermes_cli.config import load_config
_cfg = load_config()
_vision_timeout = int(_cfg.get("auxiliary", {}).get("vision", {}).get("timeout", 120))
except Exception:
_vision_timeout = 120
analysis = call_llm(
messages=[{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_b64}",
},
},
],
}],
task="vision",
timeout=_vision_timeout,
)
return json.dumps({
"success": True,
"analysis": analysis,
"screenshot_path": screenshot_path,
})
except Exception as e:
return json.dumps({"success": False, "error": str(e)})
def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str:
"""Get console output — limited support in Camofox.
Camofox does not expose browser console logs via its REST API.
Returns an empty result with a note.
"""
return json.dumps({
"success": True,
"console_messages": [],
"js_errors": [],
"total_messages": 0,
"total_errors": 0,
"note": "Console log capture is not available with the Camofox backend. "
"Use browser_snapshot or browser_vision to inspect page state.",
})
# ---------------------------------------------------------------------------
# Cleanup
# ---------------------------------------------------------------------------
def cleanup_all_camofox_sessions() -> None:
"""Close all active camofox sessions."""
with _sessions_lock:
sessions = list(_sessions.items())
for task_id, session in sessions:
try:
_delete(f"/sessions/{session['user_id']}")
except Exception:
pass
with _sessions_lock:
_sessions.clear()

View File

@@ -79,6 +79,14 @@ from tools.browser_providers.base import CloudBrowserProvider
from tools.browser_providers.browserbase import BrowserbaseProvider
from tools.browser_providers.browser_use import BrowserUseProvider
# Camofox local anti-detection browser backend (optional).
# When CAMOFOX_URL is set, all browser operations route through the
# camofox REST API instead of the agent-browser CLI.
try:
from tools.browser_camofox import is_camofox_mode as _is_camofox_mode
except ImportError:
_is_camofox_mode = lambda: False # noqa: E731
logger = logging.getLogger(__name__)
# Standard PATH entries for environments with minimal PATH (e.g. systemd services).
@@ -1046,6 +1054,11 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str:
"blocked_by_policy": {"host": blocked["host"], "rule": blocked["rule"], "source": blocked["source"]},
})
# Camofox backend — delegate after safety checks pass
if _is_camofox_mode():
from tools.browser_camofox import camofox_navigate
return camofox_navigate(url, task_id)
effective_task_id = task_id or "default"
# Get session info to check if this is a new session
@@ -1135,6 +1148,10 @@ def browser_snapshot(
Returns:
JSON string with page snapshot
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_snapshot
return camofox_snapshot(full, task_id, user_task)
effective_task_id = task_id or "default"
# Build command args based on full flag
@@ -1180,6 +1197,10 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with click result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_click
return camofox_click(ref, task_id)
effective_task_id = task_id or "default"
# Ensure ref starts with @
@@ -1212,6 +1233,10 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with type result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_type
return camofox_type(ref, text, task_id)
effective_task_id = task_id or "default"
# Ensure ref starts with @
@@ -1245,6 +1270,10 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with scroll result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_scroll
return camofox_scroll(direction, task_id)
effective_task_id = task_id or "default"
# Validate direction
@@ -1278,6 +1307,10 @@ def browser_back(task_id: Optional[str] = None) -> str:
Returns:
JSON string with navigation result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_back
return camofox_back(task_id)
effective_task_id = task_id or "default"
result = _run_browser_command(effective_task_id, "back", [])
@@ -1305,6 +1338,10 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str:
Returns:
JSON string with key press result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_press
return camofox_press(key, task_id)
effective_task_id = task_id or "default"
result = _run_browser_command(effective_task_id, "press", [key])
@@ -1330,6 +1367,10 @@ def browser_close(task_id: Optional[str] = None) -> str:
Returns:
JSON string with close result
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_close
return camofox_close(task_id)
effective_task_id = task_id or "default"
with _cleanup_lock:
had_session = effective_task_id in _active_sessions
@@ -1358,6 +1399,10 @@ def browser_console(clear: bool = False, task_id: Optional[str] = None) -> str:
Returns:
JSON string with console messages and JS errors
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_console
return camofox_console(clear, task_id)
effective_task_id = task_id or "default"
console_args = ["--clear"] if clear else []
@@ -1452,6 +1497,10 @@ def browser_get_images(task_id: Optional[str] = None) -> str:
Returns:
JSON string with list of images (src and alt)
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_get_images
return camofox_get_images(task_id)
effective_task_id = task_id or "default"
# Use eval to run JavaScript that extracts images
@@ -1516,6 +1565,10 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str]
Returns:
JSON string with vision analysis results and screenshot_path
"""
if _is_camofox_mode():
from tools.browser_camofox import camofox_vision
return camofox_vision(question, annotate, task_id)
import base64
import uuid as uuid_mod
from pathlib import Path
@@ -1804,6 +1857,10 @@ def check_browser_requirements() -> bool:
Returns:
True if all requirements are met, False otherwise
"""
# Camofox backend — only needs the server URL, no agent-browser CLI
if _is_camofox_mode():
return True
# The agent-browser CLI is always required
try:
_find_agent_browser()