Files
hermes-agent/tools/browser_camofox.py

604 lines
21 KiB
Python
Raw Permalink Normal View History

"""Camofox browser backend — local anti-detection browser via REST API.
Camofox-browser is a self-hosted Node.js server wrapping Camoufox (Firefox
fork with C++ fingerprint spoofing). It exposes a REST API that maps 1:1
to our browser tool interface: accessibility snapshots with element refs,
click/type/scroll by ref, screenshots, etc.
When ``CAMOFOX_URL`` is set (e.g. ``http://localhost:9377``), the browser
tools route through this module instead of the ``agent-browser`` CLI.
Setup::
# Option 1: npm
git clone https://github.com/jo-inc/camofox-browser && cd camofox-browser
npm install && npm start # downloads Camoufox (~300MB) on first run
# Option 2: Docker
docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser
Then set ``CAMOFOX_URL=http://localhost:9377`` in ``~/.hermes/.env``.
"""
from __future__ import annotations
import base64
import json
import logging
import os
import threading
import uuid
from typing import Any, Dict, Optional
import requests
from hermes_cli.config import load_config
from tools.browser_camofox_state import get_camofox_identity
from tools.registry import tool_error
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
_DEFAULT_TIMEOUT = 30 # seconds per HTTP request
_SNAPSHOT_MAX_CHARS = 80_000 # camofox paginates at this limit
_vnc_url: Optional[str] = None # cached from /health response
_vnc_url_checked = False # only probe once per process
def get_camofox_url() -> str:
"""Return the configured Camofox server URL, or empty string."""
return os.getenv("CAMOFOX_URL", "").rstrip("/")
def is_camofox_mode() -> bool:
"""True when Camofox backend is configured and no CDP override is active.
When the user has explicitly connected to a live Chrome instance via
``/browser connect`` (which sets ``BROWSER_CDP_URL``), the CDP connection
takes priority over Camofox so the browser tools operate on the real
browser instead of being silently routed to the Camofox backend.
"""
if os.getenv("BROWSER_CDP_URL", "").strip():
return False
return bool(get_camofox_url())
def check_camofox_available() -> bool:
"""Verify the Camofox server is reachable."""
global _vnc_url, _vnc_url_checked
url = get_camofox_url()
if not url:
return False
try:
resp = requests.get(f"{url}/health", timeout=5)
if resp.status_code == 200 and not _vnc_url_checked:
try:
data = resp.json()
vnc_port = data.get("vncPort")
if isinstance(vnc_port, int) and 1 <= vnc_port <= 65535:
from urllib.parse import urlparse
parsed = urlparse(url)
host = parsed.hostname or "localhost"
_vnc_url = f"http://{host}:{vnc_port}"
except (ValueError, KeyError):
pass
_vnc_url_checked = True
return resp.status_code == 200
except Exception:
return False
def get_vnc_url() -> Optional[str]:
"""Return the VNC URL if the Camofox server exposes one, or None."""
if not _vnc_url_checked:
check_camofox_available()
return _vnc_url
def _managed_persistence_enabled() -> bool:
"""Return whether Hermes-managed persistence is enabled for Camofox.
When enabled, sessions use a stable profile-scoped userId so the
Camofox server can map it to a persistent browser profile directory.
When disabled (default), each session gets a random userId (ephemeral).
Controlled by ``browser.camofox.managed_persistence`` in config.yaml.
"""
try:
camofox_cfg = load_config().get("browser", {}).get("camofox", {})
fix(doctor): only check the active memory provider, not all providers unconditionally (#6285) * fix(tools): skip camofox auto-cleanup when managed persistence is enabled When managed_persistence is enabled, cleanup_browser() was calling camofox_close() which destroys the server-side browser context via DELETE /sessions/{userId}, killing login sessions across cron runs. Add camofox_soft_cleanup() — a public wrapper that drops only the in-memory session entry when managed persistence is on, returning True. When persistence is off it returns False so the caller falls back to the full camofox_close(). The inactivity reaper still handles idle resource cleanup. Also surface a logger.warning() when _managed_persistence_enabled() fails to load config, replacing a silent except-and-return-False. Salvaged from #6182 by el-analista (Eduardo Perea Fernandez). Added public API wrapper to avoid cross-module private imports, and test coverage for both persistence paths. Co-authored-by: Eduardo Perea Fernandez <el-analista@users.noreply.github.com> * fix(doctor): only check the active memory provider, not all providers unconditionally hermes doctor had hardcoded Honcho Memory and Mem0 Memory sections that always ran regardless of the user's memory.provider config setting. After the swappable memory provider update (#4623), users with leftover Honcho config but no active provider saw false 'broken' errors. Replaced both sections with a single Memory Provider section that reads memory.provider from config.yaml and only checks the configured provider. Users with no external provider see a green 'Built-in memory active' check. Reported by community user michaelruiz001, confirmed by Eri (Honcho). --------- Co-authored-by: Eduardo Perea Fernandez <el-analista@users.noreply.github.com>
2026-04-08 13:44:58 -07:00
except Exception as exc:
logger.warning("managed_persistence check failed, defaulting to disabled: %s", exc)
return False
return bool(camofox_cfg.get("managed_persistence"))
# ---------------------------------------------------------------------------
# Session management
# ---------------------------------------------------------------------------
# Maps task_id -> {"user_id": str, "tab_id": str|None}
_sessions: Dict[str, Dict[str, Any]] = {}
_sessions_lock = threading.Lock()
def _get_session(task_id: Optional[str]) -> Dict[str, Any]:
"""Get or create a camofox session for the given task.
When managed persistence is enabled, uses a deterministic userId
derived from the Hermes profile so the Camofox server can map it
to the same persistent browser profile across restarts.
"""
task_id = task_id or "default"
with _sessions_lock:
if task_id in _sessions:
return _sessions[task_id]
if _managed_persistence_enabled():
identity = get_camofox_identity(task_id)
session = {
"user_id": identity["user_id"],
"tab_id": None,
"session_key": identity["session_key"],
"managed": True,
}
else:
session = {
"user_id": f"hermes_{uuid.uuid4().hex[:10]}",
"tab_id": None,
"session_key": f"task_{task_id[:16]}",
"managed": False,
}
_sessions[task_id] = session
return session
def _ensure_tab(task_id: Optional[str], url: str = "about:blank") -> Dict[str, Any]:
"""Ensure a tab exists for the session, creating one if needed."""
session = _get_session(task_id)
if session["tab_id"]:
return session
base = get_camofox_url()
resp = requests.post(
f"{base}/tabs",
json={
"userId": session["user_id"],
"sessionKey": session["session_key"],
"url": url,
},
timeout=_DEFAULT_TIMEOUT,
)
resp.raise_for_status()
data = resp.json()
session["tab_id"] = data.get("tabId")
return session
def _drop_session(task_id: Optional[str]) -> Optional[Dict[str, Any]]:
"""Remove and return session info."""
task_id = task_id or "default"
with _sessions_lock:
return _sessions.pop(task_id, None)
fix(doctor): only check the active memory provider, not all providers unconditionally (#6285) * fix(tools): skip camofox auto-cleanup when managed persistence is enabled When managed_persistence is enabled, cleanup_browser() was calling camofox_close() which destroys the server-side browser context via DELETE /sessions/{userId}, killing login sessions across cron runs. Add camofox_soft_cleanup() — a public wrapper that drops only the in-memory session entry when managed persistence is on, returning True. When persistence is off it returns False so the caller falls back to the full camofox_close(). The inactivity reaper still handles idle resource cleanup. Also surface a logger.warning() when _managed_persistence_enabled() fails to load config, replacing a silent except-and-return-False. Salvaged from #6182 by el-analista (Eduardo Perea Fernandez). Added public API wrapper to avoid cross-module private imports, and test coverage for both persistence paths. Co-authored-by: Eduardo Perea Fernandez <el-analista@users.noreply.github.com> * fix(doctor): only check the active memory provider, not all providers unconditionally hermes doctor had hardcoded Honcho Memory and Mem0 Memory sections that always ran regardless of the user's memory.provider config setting. After the swappable memory provider update (#4623), users with leftover Honcho config but no active provider saw false 'broken' errors. Replaced both sections with a single Memory Provider section that reads memory.provider from config.yaml and only checks the configured provider. Users with no external provider see a green 'Built-in memory active' check. Reported by community user michaelruiz001, confirmed by Eri (Honcho). --------- Co-authored-by: Eduardo Perea Fernandez <el-analista@users.noreply.github.com>
2026-04-08 13:44:58 -07:00
def camofox_soft_cleanup(task_id: Optional[str] = None) -> bool:
"""Release the in-memory session without destroying the server-side context.
When managed persistence is enabled the browser profile (and its cookies)
must survive across agent tasks. This helper drops only the local tracking
entry and returns ``True``. When managed persistence is *not* enabled it
does nothing and returns ``False`` so the caller can fall back to
:func:`camofox_close`.
"""
if _managed_persistence_enabled():
_drop_session(task_id)
logger.debug("Camofox soft cleanup for task %s (managed persistence)", task_id)
return True
return False
# ---------------------------------------------------------------------------
# HTTP helpers
# ---------------------------------------------------------------------------
def _post(path: str, body: dict, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""POST JSON to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.post(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""GET from camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp.json()
def _get_raw(path: str, params: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> requests.Response:
"""GET from camofox and return raw response (for binary data)."""
url = f"{get_camofox_url()}{path}"
resp = requests.get(url, params=params, timeout=timeout)
resp.raise_for_status()
return resp
def _delete(path: str, body: dict = None, timeout: int = _DEFAULT_TIMEOUT) -> dict:
"""DELETE to camofox and return parsed response."""
url = f"{get_camofox_url()}{path}"
resp = requests.delete(url, json=body, timeout=timeout)
resp.raise_for_status()
return resp.json()
# ---------------------------------------------------------------------------
# Tool implementations
# ---------------------------------------------------------------------------
def camofox_navigate(url: str, task_id: Optional[str] = None) -> str:
"""Navigate to a URL via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
# Create tab with the target URL directly
session = _ensure_tab(task_id, url)
data = {"ok": True, "url": url}
else:
# Navigate existing tab
data = _post(
f"/tabs/{session['tab_id']}/navigate",
{"userId": session["user_id"], "url": url},
timeout=60,
)
result = {
"success": True,
"url": data.get("url", url),
"title": data.get("title", ""),
}
vnc = get_vnc_url()
if vnc:
result["vnc_url"] = vnc
result["vnc_hint"] = (
"Browser is visible via VNC. "
"Share this link with the user so they can watch the browser live."
)
refactor: remove browser_close tool — auto-cleanup handles it (#5792) * refactor: remove browser_close tool — auto-cleanup handles it The browser_close tool was called in only 9% of browser sessions (13/144 navigations across 66 sessions), always redundantly — cleanup_browser() already runs via _cleanup_task_resources() at conversation end, and the background inactivity reaper catches anything else. Removing it saves one tool schema slot in every browser-enabled API call. Also fixes a latent bug: cleanup_browser() now handles Camofox sessions too (previously only Browserbase). Camofox sessions were never auto-cleaned per-task because they live in a separate dict from _active_sessions. Files changed (13): - tools/browser_tool.py: remove function, schema, registry entry; add camofox cleanup to cleanup_browser() - toolsets.py, model_tools.py, prompt_builder.py, display.py, acp_adapter/tools.py: remove browser_close from all tool lists - tests/: remove browser_close test, update toolset assertion - docs/skills: remove all browser_close references * fix: repeat browser_scroll 5x per call for meaningful page movement Most backends scroll ~100px per call — barely visible on a typical viewport. Repeating 5x gives ~500px (~half a viewport), making each scroll tool call actually useful. Backend-agnostic approach: works across all 7+ browser backends without needing to configure each one's scroll amount individually. Breaks early on error for the agent-browser path. * feat: auto-return compact snapshot from browser_navigate Every browser session starts with navigate → snapshot. Now navigate returns the compact accessibility tree snapshot inline, saving one tool call per browser task. The snapshot captures the full page DOM (not viewport-limited), so scroll position doesn't affect it. browser_snapshot remains available for refreshing after interactions or getting full=true content. Both Browserbase and Camofox paths auto-snapshot. If the snapshot fails for any reason, navigation still succeeds — the snapshot is a bonus, not a requirement. Schema descriptions updated to guide models: navigate mentions it returns a snapshot, snapshot mentions it's for refresh/full content. * refactor: slim cronjob tool schema — consolidate model/provider, drop unused params Session data (151 calls across 67 sessions) showed several schema properties were never used by models. Consolidated and cleaned up: Removed from schema (still work via backend/CLI): - skill (singular): use skills array instead - reason: pause-only, unnecessary - include_disabled: now defaults to true - base_url: extreme edge case, zero usage - provider (standalone): merged into model object Consolidated: - model + provider → single 'model' object with {model, provider} fields. If provider is omitted, the current main provider is pinned at creation time so the job stays stable even if the user changes their default. Kept: - script: useful data collection feature - skills array: standard interface for skill loading Schema shrinks from 14 to 10 properties. All backend functionality preserved — the Python function signature and handler lambda still accept every parameter. * fix: remove mixture_of_agents from core toolsets — opt-in only via hermes tools MoA was in _HERMES_CORE_TOOLS and composite toolsets (hermes-cli, hermes-messaging, safe), which meant it appeared in every session for anyone with OPENROUTER_API_KEY set. The _DEFAULT_OFF_TOOLSETS gate only works after running 'hermes tools' explicitly. Now MoA only appears when a user explicitly enables it via 'hermes tools'. The moa toolset definition and check_fn remain unchanged — it just needs to be opted into.
2026-04-07 03:28:44 -07:00
# Auto-take a compact snapshot so the model can act immediately
try:
snap_data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot_text = snap_data.get("snapshot", "")
from tools.browser_tool import (
SNAPSHOT_SUMMARIZE_THRESHOLD,
_truncate_snapshot,
)
if len(snapshot_text) > SNAPSHOT_SUMMARIZE_THRESHOLD:
snapshot_text = _truncate_snapshot(snapshot_text)
result["snapshot"] = snapshot_text
result["element_count"] = snap_data.get("refsCount", 0)
except Exception:
pass # Navigation succeeded; snapshot is a bonus
return json.dumps(result)
except requests.HTTPError as e:
return tool_error(f"Navigation failed: {e}", success=False)
except requests.ConnectionError:
return json.dumps({
"success": False,
"error": f"Cannot connect to Camofox at {get_camofox_url()}. "
"Is the server running? Start with: npm start (in camofox-browser dir) "
"or: docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser",
})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_snapshot(full: bool = False, task_id: Optional[str] = None,
user_task: Optional[str] = None) -> str:
"""Get accessibility tree snapshot from Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
refs_count = data.get("refsCount", 0)
# Apply same summarization logic as the main browser tool
from tools.browser_tool import (
SNAPSHOT_SUMMARIZE_THRESHOLD,
_extract_relevant_content,
_truncate_snapshot,
)
if len(snapshot) > SNAPSHOT_SUMMARIZE_THRESHOLD:
if user_task:
snapshot = _extract_relevant_content(snapshot, user_task)
else:
snapshot = _truncate_snapshot(snapshot)
return json.dumps({
"success": True,
"snapshot": snapshot,
"element_count": refs_count,
})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_click(ref: str, task_id: Optional[str] = None) -> str:
"""Click an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
# Strip @ prefix if present (our tool convention)
clean_ref = ref.lstrip("@")
data = _post(
f"/tabs/{session['tab_id']}/click",
{"userId": session["user_id"], "ref": clean_ref},
)
return json.dumps({
"success": True,
"clicked": clean_ref,
"url": data.get("url", ""),
})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_type(ref: str, text: str, task_id: Optional[str] = None) -> str:
"""Type text into an element by ref via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
clean_ref = ref.lstrip("@")
_post(
f"/tabs/{session['tab_id']}/type",
{"userId": session["user_id"], "ref": clean_ref, "text": text},
)
return json.dumps({
"success": True,
"typed": text,
"element": clean_ref,
})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_scroll(direction: str, task_id: Optional[str] = None) -> str:
"""Scroll the page via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
_post(
f"/tabs/{session['tab_id']}/scroll",
{"userId": session["user_id"], "direction": direction},
)
return json.dumps({"success": True, "scrolled": direction})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_back(task_id: Optional[str] = None) -> str:
"""Navigate back via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
data = _post(
f"/tabs/{session['tab_id']}/back",
{"userId": session["user_id"]},
)
return json.dumps({"success": True, "url": data.get("url", "")})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_press(key: str, task_id: Optional[str] = None) -> str:
"""Press a keyboard key via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
_post(
f"/tabs/{session['tab_id']}/press",
{"userId": session["user_id"], "key": key},
)
return json.dumps({"success": True, "pressed": key})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_close(task_id: Optional[str] = None) -> str:
"""Close the browser session via Camofox."""
try:
session = _drop_session(task_id)
if not session:
return json.dumps({"success": True, "closed": True})
_delete(
f"/sessions/{session['user_id']}",
)
return json.dumps({"success": True, "closed": True})
except Exception as e:
return json.dumps({"success": True, "closed": True, "warning": str(e)})
def camofox_get_images(task_id: Optional[str] = None) -> str:
"""Get images on the current page via Camofox.
Extracts image information from the accessibility tree snapshot,
since Camofox does not expose a dedicated /images endpoint.
"""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
import re
data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
snapshot = data.get("snapshot", "")
# Parse img elements from the accessibility tree.
# Format: img "alt text" or img "alt text" [eN]
# URLs appear on /url: lines following img entries
images = []
lines = snapshot.split("\n")
for i, line in enumerate(lines):
stripped = line.strip()
refactor: codebase-wide lint cleanup — unused imports, dead code, and inefficient patterns (#5821) Comprehensive cleanup across 80 files based on automated (ruff, pyflakes, vulture) and manual analysis of the entire codebase. Changes by category: Unused imports removed (~95 across 55 files): - Removed genuinely unused imports from all major subsystems - agent/, hermes_cli/, tools/, gateway/, plugins/, cron/ - Includes imports in try/except blocks that were truly unused (vs availability checks which were left alone) Unused variables removed (~25): - Removed dead variables: connected, inner, channels, last_exc, source, new_server_names, verify, pconfig, default_terminal, result, pending_handled, temperature, loop - Dropped unused argparse subparser assignments in hermes_cli/main.py (12 instances of add_parser() where result was never used) Dead code removed: - run_agent.py: Removed dead ternary (None if False else None) and surrounding unreachable branch in identity fallback - run_agent.py: Removed write-only attribute _last_reported_tool - hermes_cli/providers.py: Removed dead @property decorator on module-level function (decorator has no effect outside a class) - gateway/run.py: Removed unused MCP config load before reconnect - gateway/platforms/slack.py: Removed dead SessionSource construction Undefined name bugs fixed (would cause NameError at runtime): - batch_runner.py: Added missing logger = logging.getLogger(__name__) - tools/environments/daytona.py: Added missing Dict and Path imports Unnecessary global statements removed (14): - tools/terminal_tool.py: 5 functions declared global for dicts they only mutated via .pop()/[key]=value (no rebinding) - tools/browser_tool.py: cleanup thread loop only reads flag - tools/rl_training_tool.py: 4 functions only do dict mutations - tools/mcp_oauth.py: only reads the global - hermes_time.py: only reads cached values Inefficient patterns fixed: - startswith/endswith tuple form: 15 instances of x.startswith('a') or x.startswith('b') consolidated to x.startswith(('a', 'b')) - len(x)==0 / len(x)>0: 13 instances replaced with pythonic truthiness checks (not x / bool(x)) - in dict.keys(): 5 instances simplified to in dict - Redefined unused name: removed duplicate _strip_mdv2 import in send_message_tool.py Other fixes: - hermes_cli/doctor.py: Replaced undefined logger.debug() with pass - hermes_cli/config.py: Consolidated chained .endswith() calls Test results: 3934 passed, 17 failed (all pre-existing on main), 19 skipped. Zero regressions.
2026-04-07 10:25:31 -07:00
if stripped.startswith(("- img ", "img ")):
alt_match = re.search(r'img\s+"([^"]*)"', stripped)
alt = alt_match.group(1) if alt_match else ""
# Look for URL on the next line
src = ""
if i + 1 < len(lines):
url_match = re.search(r'/url:\s*(\S+)', lines[i + 1].strip())
if url_match:
src = url_match.group(1)
if alt or src:
images.append({"src": src, "alt": alt})
return json.dumps({
"success": True,
"images": images,
"count": len(images),
})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_vision(question: str, annotate: bool = False,
task_id: Optional[str] = None) -> str:
"""Take a screenshot and analyze it with vision AI via Camofox."""
try:
session = _get_session(task_id)
if not session["tab_id"]:
return tool_error("No browser session. Call browser_navigate first.", success=False)
# Get screenshot as binary PNG
resp = _get_raw(
f"/tabs/{session['tab_id']}/screenshot",
params={"userId": session["user_id"]},
)
# Save screenshot to cache
from hermes_constants import get_hermes_home
screenshots_dir = get_hermes_home() / "browser_screenshots"
screenshots_dir.mkdir(parents=True, exist_ok=True)
screenshot_path = str(screenshots_dir / f"browser_screenshot_{uuid.uuid4().hex[:8]}.png")
with open(screenshot_path, "wb") as f:
f.write(resp.content)
# Encode for vision LLM
img_b64 = base64.b64encode(resp.content).decode("utf-8")
# Also get annotated snapshot if requested
annotation_context = ""
if annotate:
try:
snap_data = _get(
f"/tabs/{session['tab_id']}/snapshot",
params={"userId": session["user_id"]},
)
annotation_context = f"\n\nAccessibility tree (element refs for interaction):\n{snap_data.get('snapshot', '')[:3000]}"
except Exception:
pass
# Redact secrets from annotation context before sending to vision LLM.
# The screenshot image itself cannot be redacted, but at least the
# text-based accessibility tree snippet won't leak secret values.
from agent.redact import redact_sensitive_text
annotation_context = redact_sensitive_text(annotation_context)
# Send to vision LLM
from agent.auxiliary_client import call_llm
vision_prompt = (
f"Analyze this browser screenshot and answer: {question}"
f"{annotation_context}"
)
try:
_cfg = load_config()
_vision_cfg = _cfg.get("auxiliary", {}).get("vision", {})
_vision_timeout = float(_vision_cfg.get("timeout", 120))
_vision_temperature = float(_vision_cfg.get("temperature", 0.1))
except Exception:
_vision_timeout = 120.0
_vision_temperature = 0.1
response = call_llm(
messages=[{
"role": "user",
"content": [
{"type": "text", "text": vision_prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_b64}",
},
},
],
}],
task="vision",
temperature=_vision_temperature,
timeout=_vision_timeout,
)
analysis = (response.choices[0].message.content or "").strip() if response.choices else ""
# Redact secrets the vision LLM may have read from the screenshot.
from agent.redact import redact_sensitive_text
analysis = redact_sensitive_text(analysis)
return json.dumps({
"success": True,
"analysis": analysis,
"screenshot_path": screenshot_path,
})
except Exception as e:
return tool_error(str(e), success=False)
def camofox_console(clear: bool = False, task_id: Optional[str] = None) -> str:
"""Get console output — limited support in Camofox.
Camofox does not expose browser console logs via its REST API.
Returns an empty result with a note.
"""
return json.dumps({
"success": True,
"console_messages": [],
"js_errors": [],
"total_messages": 0,
"total_errors": 0,
"note": "Console log capture is not available with the Camofox backend. "
"Use browser_snapshot or browser_vision to inspect page state.",
})