mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 15:31:38 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/bro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4b8272f549 |
@@ -406,3 +406,184 @@ def test_check_fn_false_when_browser_requirements_fail(monkeypatch):
|
|||||||
bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
|
bt, "_get_cdp_override", lambda: "ws://localhost:9222/devtools/browser/x"
|
||||||
)
|
)
|
||||||
assert browser_cdp_tool._browser_cdp_check() is False
|
assert browser_cdp_tool._browser_cdp_check() is False
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# browser_dialog
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_invalid_action_returns_error():
|
||||||
|
result = json.loads(browser_cdp_tool.browser_dialog(action="yes"))
|
||||||
|
assert "error" in result
|
||||||
|
assert "accept" in result["error"] and "dismiss" in result["error"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_no_endpoint_returns_error(monkeypatch):
|
||||||
|
monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
|
||||||
|
result = json.loads(browser_cdp_tool.browser_dialog(action="accept"))
|
||||||
|
assert "error" in result
|
||||||
|
assert "/browser connect" in result["error"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_websockets_missing_returns_error(monkeypatch):
|
||||||
|
monkeypatch.setattr(browser_cdp_tool, "_WS_AVAILABLE", False)
|
||||||
|
result = json.loads(browser_cdp_tool.browser_dialog(action="accept"))
|
||||||
|
assert "error" in result
|
||||||
|
assert "websockets" in result["error"].lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_explicit_target_accept_flow(cdp_server):
|
||||||
|
"""With explicit target_id, we skip Target.getTargets and attach+handle."""
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.attachToTarget",
|
||||||
|
lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
|
||||||
|
)
|
||||||
|
cdp_server.on("Page.handleJavaScriptDialog", lambda params, sid: {})
|
||||||
|
|
||||||
|
result = json.loads(
|
||||||
|
browser_cdp_tool.browser_dialog(
|
||||||
|
action="accept", target_id="tab-A", prompt_text="hello"
|
||||||
|
)
|
||||||
|
)
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["action"] == "accept"
|
||||||
|
assert result["target_id"] == "tab-A"
|
||||||
|
|
||||||
|
calls = cdp_server.received()
|
||||||
|
# No Target.getTargets — we went straight to attach + handle
|
||||||
|
methods = [c["method"] for c in calls]
|
||||||
|
assert "Target.getTargets" not in methods
|
||||||
|
assert methods == ["Target.attachToTarget", "Page.handleJavaScriptDialog"]
|
||||||
|
handle = calls[1]
|
||||||
|
assert handle["params"] == {"accept": True, "promptText": "hello"}
|
||||||
|
assert handle["sessionId"] == "sess-tab-A"
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_explicit_target_dismiss_flow(cdp_server):
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.attachToTarget",
|
||||||
|
lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
|
||||||
|
)
|
||||||
|
cdp_server.on("Page.handleJavaScriptDialog", lambda params, sid: {})
|
||||||
|
|
||||||
|
result = json.loads(
|
||||||
|
browser_cdp_tool.browser_dialog(action="dismiss", target_id="tab-B")
|
||||||
|
)
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["action"] == "dismiss"
|
||||||
|
handle = cdp_server.received()[1]
|
||||||
|
assert handle["params"] == {"accept": False, "promptText": ""}
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_auto_resolve_single_page(cdp_server):
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.getTargets",
|
||||||
|
lambda params, sid: {
|
||||||
|
"targetInfos": [
|
||||||
|
{"targetId": "only-page", "type": "page", "title": "One", "url": "a"},
|
||||||
|
{"targetId": "bg", "type": "background_page", "title": "Bg", "url": "b"},
|
||||||
|
{"targetId": "sw", "type": "service_worker", "title": "SW", "url": "c"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.attachToTarget",
|
||||||
|
lambda params, sid: {"sessionId": f"sess-{params['targetId']}"},
|
||||||
|
)
|
||||||
|
cdp_server.on("Page.handleJavaScriptDialog", lambda params, sid: {})
|
||||||
|
|
||||||
|
result = json.loads(browser_cdp_tool.browser_dialog(action="accept"))
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["target_id"] == "only-page"
|
||||||
|
|
||||||
|
calls = cdp_server.received()
|
||||||
|
# Expect: Target.getTargets (browser-level), then attach, then handle
|
||||||
|
assert calls[0]["method"] == "Target.getTargets"
|
||||||
|
assert calls[1]["method"] == "Target.attachToTarget"
|
||||||
|
assert calls[1]["params"]["targetId"] == "only-page"
|
||||||
|
assert calls[2]["method"] == "Page.handleJavaScriptDialog"
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_auto_resolve_no_pages(cdp_server):
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.getTargets",
|
||||||
|
lambda params, sid: {
|
||||||
|
"targetInfos": [
|
||||||
|
{"targetId": "bg", "type": "background_page", "title": "Bg", "url": "x"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
result = json.loads(browser_cdp_tool.browser_dialog(action="accept"))
|
||||||
|
assert "error" in result
|
||||||
|
assert "No page tabs" in result["error"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_auto_resolve_multiple_pages_lists_tabs(cdp_server):
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.getTargets",
|
||||||
|
lambda params, sid: {
|
||||||
|
"targetInfos": [
|
||||||
|
{"targetId": "A", "type": "page", "title": "First", "url": "https://a.test"},
|
||||||
|
{"targetId": "B", "type": "page", "title": "Second", "url": "https://b.test"},
|
||||||
|
]
|
||||||
|
},
|
||||||
|
)
|
||||||
|
result = json.loads(browser_cdp_tool.browser_dialog(action="accept"))
|
||||||
|
assert "error" in result
|
||||||
|
assert "target_id" in result["error"]
|
||||||
|
assert result.get("page_count") == 2
|
||||||
|
tab_ids = {t["targetId"] for t in result.get("tabs", [])}
|
||||||
|
assert tab_ids == {"A", "B"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_passes_through_no_dialog_showing(cdp_server):
|
||||||
|
"""CDP's 'No dialog is showing' error should surface as a tool_error."""
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.attachToTarget",
|
||||||
|
lambda params, sid: {"sessionId": "sess"},
|
||||||
|
)
|
||||||
|
# No handler for Page.handleJavaScriptDialog -> mock returns CDP error
|
||||||
|
result = json.loads(
|
||||||
|
browser_cdp_tool.browser_dialog(action="dismiss", target_id="tab-X")
|
||||||
|
)
|
||||||
|
assert "error" in result
|
||||||
|
assert result.get("action") == "dismiss"
|
||||||
|
assert result.get("target_id") == "tab-X"
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_registered_in_browser_toolset_with_same_gate():
|
||||||
|
"""browser_dialog must use the same check_fn as browser_cdp so they
|
||||||
|
appear/disappear together."""
|
||||||
|
from tools.registry import registry
|
||||||
|
|
||||||
|
cdp_entry = registry.get_entry("browser_cdp")
|
||||||
|
dialog_entry = registry.get_entry("browser_dialog")
|
||||||
|
|
||||||
|
assert dialog_entry is not None
|
||||||
|
assert dialog_entry.toolset == "browser"
|
||||||
|
assert dialog_entry.schema["name"] == "browser_dialog"
|
||||||
|
assert dialog_entry.schema["parameters"]["required"] == ["action"]
|
||||||
|
assert set(dialog_entry.schema["parameters"]["properties"]["action"]["enum"]) == {
|
||||||
|
"accept",
|
||||||
|
"dismiss",
|
||||||
|
}
|
||||||
|
# Shared gate
|
||||||
|
assert dialog_entry.check_fn is cdp_entry.check_fn
|
||||||
|
|
||||||
|
|
||||||
|
def test_dialog_dispatch_through_registry(cdp_server):
|
||||||
|
from tools.registry import registry
|
||||||
|
|
||||||
|
cdp_server.on(
|
||||||
|
"Target.attachToTarget", lambda p, s: {"sessionId": "sess"}
|
||||||
|
)
|
||||||
|
cdp_server.on("Page.handleJavaScriptDialog", lambda p, s: {})
|
||||||
|
raw = registry.dispatch(
|
||||||
|
"browser_dialog",
|
||||||
|
{"action": "accept", "target_id": "tab-Z"},
|
||||||
|
task_id="t1",
|
||||||
|
)
|
||||||
|
result = json.loads(raw)
|
||||||
|
assert result["success"] is True
|
||||||
|
assert result["action"] == "accept"
|
||||||
|
|||||||
@@ -1,19 +1,24 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""
|
"""
|
||||||
Raw Chrome DevTools Protocol (CDP) passthrough tool.
|
Chrome DevTools Protocol (CDP) tools.
|
||||||
|
|
||||||
Exposes a single tool, ``browser_cdp``, that sends arbitrary CDP commands to
|
Exposes two tools that share the same CDP endpoint and availability gate:
|
||||||
the browser's DevTools WebSocket endpoint. Works when a CDP URL is
|
|
||||||
configured — either via ``/browser connect`` (sets ``BROWSER_CDP_URL``) or
|
|
||||||
``browser.cdp_url`` in ``config.yaml`` — or when a CDP-backed cloud provider
|
|
||||||
session is active.
|
|
||||||
|
|
||||||
This is the escape hatch for browser operations not covered by the main
|
* ``browser_cdp`` — raw CDP passthrough for arbitrary commands. Escape
|
||||||
browser tool surface (``browser_navigate``, ``browser_click``,
|
hatch for anything not covered by the wrapped browser tools.
|
||||||
``browser_console``, etc.) — handling native dialogs, iframe-scoped
|
* ``browser_dialog`` — ergonomic wrapper over ``Page.handleJavaScriptDialog``
|
||||||
evaluation, cookie/network control, low-level tab management, etc.
|
that accepts/dismisses a native JS dialog (alert/confirm/prompt/
|
||||||
|
beforeunload) blocking the page. Auto-resolves ``target_id`` when
|
||||||
|
exactly one page tab is open.
|
||||||
|
|
||||||
Method reference: https://chromedevtools.github.io/devtools-protocol/
|
Both tools are only registered when a CDP endpoint is actually reachable
|
||||||
|
from Python at session start — meaning ``/browser connect`` is active or
|
||||||
|
``browser.cdp_url`` is set in ``config.yaml``. Backends that don't
|
||||||
|
currently expose CDP (Camofox, default local agent-browser, cloud
|
||||||
|
providers whose per-session ``cdp_url`` isn't surfaced) don't see these
|
||||||
|
tools at all.
|
||||||
|
|
||||||
|
CDP method reference: https://chromedevtools.github.io/devtools-protocol/
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
@@ -414,3 +419,239 @@ registry.register(
|
|||||||
check_fn=_browser_cdp_check,
|
check_fn=_browser_cdp_check,
|
||||||
emoji="🧪",
|
emoji="🧪",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# browser_dialog — ergonomic wrapper over Page.handleJavaScriptDialog
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def browser_dialog(
|
||||||
|
action: str,
|
||||||
|
prompt_text: Optional[str] = None,
|
||||||
|
target_id: Optional[str] = None,
|
||||||
|
timeout: float = 30.0,
|
||||||
|
task_id: Optional[str] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Accept or dismiss a native JS dialog blocking the page.
|
||||||
|
|
||||||
|
Thin wrapper over the CDP ``Page.handleJavaScriptDialog`` verb that
|
||||||
|
also auto-resolves ``target_id`` when exactly one page tab is open.
|
||||||
|
Same CDP endpoint and availability gate as :func:`browser_cdp`.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: ``"accept"`` or ``"dismiss"``.
|
||||||
|
prompt_text: Text to enter when handling a ``prompt()`` dialog;
|
||||||
|
ignored for alert/confirm/beforeunload.
|
||||||
|
target_id: Target/tab ID from ``Target.getTargets``. Optional
|
||||||
|
when exactly one page tab is open; required otherwise.
|
||||||
|
timeout: Seconds to wait for the CDP round-trip (default 30).
|
||||||
|
task_id: Unused — accepted for uniformity with other browser tools.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
JSON string ``{"success": True, "action": ..., "target_id": ...}``
|
||||||
|
on success, or ``{"error": "..."}`` on failure. CDP's
|
||||||
|
``"No dialog is showing"`` error is passed through verbatim so
|
||||||
|
callers can use this as a probe for dialog presence.
|
||||||
|
"""
|
||||||
|
del task_id
|
||||||
|
|
||||||
|
# --- input validation ------------------------------------------------
|
||||||
|
if action not in ("accept", "dismiss"):
|
||||||
|
return tool_error(
|
||||||
|
f"'action' must be 'accept' or 'dismiss', got {action!r}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- shared gate checks (match browser_cdp) --------------------------
|
||||||
|
if not _WS_AVAILABLE:
|
||||||
|
return tool_error(
|
||||||
|
"The 'websockets' Python package is required but not installed. "
|
||||||
|
"Install it with: pip install websockets"
|
||||||
|
)
|
||||||
|
|
||||||
|
endpoint = _resolve_cdp_endpoint()
|
||||||
|
if not endpoint:
|
||||||
|
return tool_error(
|
||||||
|
"No CDP endpoint is available. Run '/browser connect' to attach "
|
||||||
|
"to a running Chrome, or set 'browser.cdp_url' in config.yaml.",
|
||||||
|
cdp_docs=CDP_DOCS_URL,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not endpoint.startswith(("ws://", "wss://")):
|
||||||
|
return tool_error(
|
||||||
|
f"CDP endpoint is not a WebSocket URL: {endpoint!r}. "
|
||||||
|
"Check that Chrome is actually listening on the debug port."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
safe_timeout = float(timeout) if timeout else 30.0
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
safe_timeout = 30.0
|
||||||
|
safe_timeout = max(1.0, min(safe_timeout, 300.0))
|
||||||
|
|
||||||
|
# --- auto-resolve target_id when not explicitly given ---------------
|
||||||
|
resolved_target_id = target_id
|
||||||
|
if not resolved_target_id:
|
||||||
|
try:
|
||||||
|
targets_result = _run_async(
|
||||||
|
_cdp_call(
|
||||||
|
endpoint, "Target.getTargets", {}, None, safe_timeout
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except (asyncio.TimeoutError, TimeoutError) as exc:
|
||||||
|
return tool_error(
|
||||||
|
f"Timed out listing tabs while resolving target: {exc}"
|
||||||
|
)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
return tool_error(
|
||||||
|
f"Failed to list tabs while resolving target: {exc}"
|
||||||
|
)
|
||||||
|
except WebSocketException as exc:
|
||||||
|
return tool_error(
|
||||||
|
f"WebSocket error while resolving target at {endpoint}: {exc}"
|
||||||
|
)
|
||||||
|
|
||||||
|
page_targets = [
|
||||||
|
t
|
||||||
|
for t in targets_result.get("targetInfos", [])
|
||||||
|
if t.get("type") == "page"
|
||||||
|
]
|
||||||
|
if len(page_targets) == 0:
|
||||||
|
return tool_error(
|
||||||
|
"No page tabs found — nothing to handle a dialog on."
|
||||||
|
)
|
||||||
|
if len(page_targets) > 1:
|
||||||
|
return tool_error(
|
||||||
|
"Multiple page tabs are open — pass target_id explicitly. "
|
||||||
|
"Use browser_cdp(method='Target.getTargets') to list them.",
|
||||||
|
page_count=len(page_targets),
|
||||||
|
tabs=[
|
||||||
|
{
|
||||||
|
"targetId": t.get("targetId"),
|
||||||
|
"title": t.get("title", ""),
|
||||||
|
"url": t.get("url", ""),
|
||||||
|
}
|
||||||
|
for t in page_targets
|
||||||
|
],
|
||||||
|
)
|
||||||
|
resolved_target_id = page_targets[0].get("targetId")
|
||||||
|
if not resolved_target_id:
|
||||||
|
return tool_error(
|
||||||
|
"Target.getTargets returned a page target without a targetId"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- dispatch the dialog handler -------------------------------------
|
||||||
|
cdp_params = {
|
||||||
|
"accept": action == "accept",
|
||||||
|
"promptText": prompt_text or "",
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
result = _run_async(
|
||||||
|
_cdp_call(
|
||||||
|
endpoint,
|
||||||
|
"Page.handleJavaScriptDialog",
|
||||||
|
cdp_params,
|
||||||
|
resolved_target_id,
|
||||||
|
safe_timeout,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except (asyncio.TimeoutError, TimeoutError) as exc:
|
||||||
|
return tool_error(
|
||||||
|
f"CDP call timed out after {safe_timeout}s: {exc}",
|
||||||
|
action=action,
|
||||||
|
target_id=resolved_target_id,
|
||||||
|
)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
# CDP returns a clear "No dialog is showing" error when there's
|
||||||
|
# nothing to handle — pass it through so callers can probe.
|
||||||
|
return tool_error(
|
||||||
|
str(exc), action=action, target_id=resolved_target_id
|
||||||
|
)
|
||||||
|
except WebSocketException as exc:
|
||||||
|
return tool_error(
|
||||||
|
f"WebSocket error talking to CDP at {endpoint}: {exc}. The "
|
||||||
|
"browser may have disconnected — try '/browser connect' again.",
|
||||||
|
action=action,
|
||||||
|
)
|
||||||
|
except Exception as exc: # pragma: no cover — unexpected
|
||||||
|
logger.exception("browser_dialog unexpected error")
|
||||||
|
return tool_error(
|
||||||
|
f"Unexpected error: {type(exc).__name__}: {exc}",
|
||||||
|
action=action,
|
||||||
|
)
|
||||||
|
|
||||||
|
return json.dumps(
|
||||||
|
{
|
||||||
|
"success": True,
|
||||||
|
"action": action,
|
||||||
|
"target_id": resolved_target_id,
|
||||||
|
"result": result,
|
||||||
|
},
|
||||||
|
ensure_ascii=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
BROWSER_DIALOG_SCHEMA: Dict[str, Any] = {
|
||||||
|
"name": "browser_dialog",
|
||||||
|
"description": (
|
||||||
|
"Accept or dismiss a native JS dialog (alert/confirm/prompt/"
|
||||||
|
"beforeunload) that's blocking a page.\n\n"
|
||||||
|
"**When to use:** native dialogs freeze the page's JS thread, so "
|
||||||
|
"browser_snapshot, browser_console, browser_click and similar tools "
|
||||||
|
"will hang or error until the dialog is handled. Use this tool to "
|
||||||
|
"unstick the page. Also safe as a probe — CDP returns a clean 'No "
|
||||||
|
"dialog is showing' error when there isn't one, so you can call "
|
||||||
|
"this to check whether a suspected dialog exists.\n\n"
|
||||||
|
"**Requires the same CDP endpoint as browser_cdp.** If this tool "
|
||||||
|
"is in your toolset, the endpoint is already reachable.\n\n"
|
||||||
|
"**target_id auto-resolution:** when exactly one page tab is "
|
||||||
|
"open, target_id can be omitted. With multiple page tabs, an "
|
||||||
|
"explicit target_id is required — the error response lists the "
|
||||||
|
"tabs so you can pick one."
|
||||||
|
),
|
||||||
|
"parameters": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"action": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["accept", "dismiss"],
|
||||||
|
"description": (
|
||||||
|
"'accept' confirms OK/Yes/Submit; 'dismiss' cancels. "
|
||||||
|
"For beforeunload dialogs, 'accept' leaves the page "
|
||||||
|
"and 'dismiss' stays on it."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"prompt_text": {
|
||||||
|
"type": "string",
|
||||||
|
"description": (
|
||||||
|
"Text to enter when handling a prompt() dialog. "
|
||||||
|
"Ignored for alert, confirm, and beforeunload dialogs."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
"target_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": (
|
||||||
|
"Target/tab ID from Target.getTargets. Optional when "
|
||||||
|
"exactly one page tab is open; required otherwise."
|
||||||
|
),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"required": ["action"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
registry.register(
|
||||||
|
name="browser_dialog",
|
||||||
|
toolset="browser",
|
||||||
|
schema=BROWSER_DIALOG_SCHEMA,
|
||||||
|
handler=lambda args, **kw: browser_dialog(
|
||||||
|
action=args.get("action", ""),
|
||||||
|
prompt_text=args.get("prompt_text"),
|
||||||
|
target_id=args.get("target_id"),
|
||||||
|
timeout=args.get("timeout", 30.0),
|
||||||
|
task_id=kw.get("task_id"),
|
||||||
|
),
|
||||||
|
check_fn=_browser_cdp_check,
|
||||||
|
emoji="💬",
|
||||||
|
)
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ _HERMES_CORE_TOOLS = [
|
|||||||
"browser_navigate", "browser_snapshot", "browser_click",
|
"browser_navigate", "browser_snapshot", "browser_click",
|
||||||
"browser_type", "browser_scroll", "browser_back",
|
"browser_type", "browser_scroll", "browser_back",
|
||||||
"browser_press", "browser_get_images",
|
"browser_press", "browser_get_images",
|
||||||
"browser_vision", "browser_console", "browser_cdp",
|
"browser_vision", "browser_console", "browser_cdp", "browser_dialog",
|
||||||
# Text-to-speech
|
# Text-to-speech
|
||||||
"text_to_speech",
|
"text_to_speech",
|
||||||
# Planning & memory
|
# Planning & memory
|
||||||
@@ -115,7 +115,7 @@ TOOLSETS = {
|
|||||||
"browser_navigate", "browser_snapshot", "browser_click",
|
"browser_navigate", "browser_snapshot", "browser_click",
|
||||||
"browser_type", "browser_scroll", "browser_back",
|
"browser_type", "browser_scroll", "browser_back",
|
||||||
"browser_press", "browser_get_images",
|
"browser_press", "browser_get_images",
|
||||||
"browser_vision", "browser_console", "browser_cdp", "web_search"
|
"browser_vision", "browser_console", "browser_cdp", "browser_dialog", "web_search"
|
||||||
],
|
],
|
||||||
"includes": []
|
"includes": []
|
||||||
},
|
},
|
||||||
@@ -249,7 +249,7 @@ TOOLSETS = {
|
|||||||
"browser_navigate", "browser_snapshot", "browser_click",
|
"browser_navigate", "browser_snapshot", "browser_click",
|
||||||
"browser_type", "browser_scroll", "browser_back",
|
"browser_type", "browser_scroll", "browser_back",
|
||||||
"browser_press", "browser_get_images",
|
"browser_press", "browser_get_images",
|
||||||
"browser_vision", "browser_console", "browser_cdp",
|
"browser_vision", "browser_console", "browser_cdp", "browser_dialog",
|
||||||
"todo", "memory",
|
"todo", "memory",
|
||||||
"session_search",
|
"session_search",
|
||||||
"execute_code", "delegate_task",
|
"execute_code", "delegate_task",
|
||||||
@@ -274,7 +274,7 @@ TOOLSETS = {
|
|||||||
"browser_navigate", "browser_snapshot", "browser_click",
|
"browser_navigate", "browser_snapshot", "browser_click",
|
||||||
"browser_type", "browser_scroll", "browser_back",
|
"browser_type", "browser_scroll", "browser_back",
|
||||||
"browser_press", "browser_get_images",
|
"browser_press", "browser_get_images",
|
||||||
"browser_vision", "browser_console", "browser_cdp",
|
"browser_vision", "browser_console", "browser_cdp", "browser_dialog",
|
||||||
# Planning & memory
|
# Planning & memory
|
||||||
"todo", "memory",
|
"todo", "memory",
|
||||||
# Session history search
|
# Session history search
|
||||||
|
|||||||
@@ -6,9 +6,9 @@ description: "Authoritative reference for Hermes built-in tools, grouped by tool
|
|||||||
|
|
||||||
# Built-in Tools Reference
|
# Built-in Tools Reference
|
||||||
|
|
||||||
This page documents all 53 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
|
This page documents all 54 built-in tools in the Hermes tool registry, grouped by toolset. Availability varies by platform, credentials, and enabled toolsets.
|
||||||
|
|
||||||
**Quick counts:** 11 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
|
**Quick counts:** 12 browser tools, 4 file tools, 10 RL tools, 4 Home Assistant tools, 2 terminal tools, 2 web tools, 5 Feishu tools, and 15 standalone tools across other toolsets.
|
||||||
|
|
||||||
:::tip MCP Tools
|
:::tip MCP Tools
|
||||||
In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
|
In addition to built-in tools, Hermes can load tools dynamically from MCP servers. MCP tools appear with a server-name prefix (e.g., `github_create_issue` for the `github` MCP server). See [MCP Integration](/docs/user-guide/features/mcp) for configuration.
|
||||||
@@ -20,6 +20,7 @@ In addition to built-in tools, Hermes can load tools dynamically from MCP server
|
|||||||
|------|-------------|----------------------|
|
|------|-------------|----------------------|
|
||||||
| `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
|
| `browser_back` | Navigate back to the previous page in browser history. Requires browser_navigate to be called first. | — |
|
||||||
| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — |
|
| `browser_cdp` | Send a raw Chrome DevTools Protocol (CDP) command. Escape hatch for browser operations not covered by browser_navigate, browser_click, browser_console, etc. Only available when a CDP endpoint is reachable at session start — via `/browser connect` or `browser.cdp_url` config. See https://chromedevtools.github.io/devtools-protocol/ | — |
|
||||||
|
| `browser_dialog` | Accept or dismiss a native JS dialog (alert/confirm/prompt/beforeunload) that's blocking a page. Auto-resolves target_id when exactly one page tab is open. Same CDP gate as browser_cdp. Safe as a probe — returns 'No dialog is showing' when nothing's pending. | — |
|
||||||
| `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
|
| `browser_click` | Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first. | — |
|
||||||
| `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
|
| `browser_console` | Get browser console output and JavaScript errors from the current page. Returns console.log/warn/error/info messages and uncaught JS exceptions. Use this to detect silent JavaScript errors, failed API calls, and application warnings. Requi… | — |
|
||||||
| `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
|
| `browser_get_images` | Get a list of all images on the current page with their URLs and alt text. Useful for finding images to analyze with the vision tool. Requires browser_navigate to be called first. | — |
|
||||||
|
|||||||
@@ -52,7 +52,7 @@ Or in-session:
|
|||||||
|
|
||||||
| Toolset | Tools | Purpose |
|
| Toolset | Tools | Purpose |
|
||||||
|---------|-------|---------|
|
|---------|-------|---------|
|
||||||
| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` is a raw CDP passthrough gated on a reachable CDP endpoint — it only appears when `/browser connect` is active or `browser.cdp_url` is set. |
|
| `browser` | `browser_back`, `browser_cdp`, `browser_click`, `browser_console`, `browser_dialog`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` | Full browser automation. Includes `web_search` as a fallback for quick lookups. `browser_cdp` and `browser_dialog` share a gate on a reachable CDP endpoint — both only appear when `/browser connect` is active or `browser.cdp_url` is set. |
|
||||||
| `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
|
| `clarify` | `clarify` | Ask the user a question when the agent needs clarification. |
|
||||||
| `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
|
| `code_execution` | `execute_code` | Run Python scripts that call Hermes tools programmatically. |
|
||||||
| `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
|
| `cronjob` | `cronjob` | Schedule and manage recurring tasks. |
|
||||||
|
|||||||
@@ -357,6 +357,32 @@ browser_cdp(method="Network.getAllCookies")
|
|||||||
|
|
||||||
Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls.
|
Browser-level methods (`Target.*`, `Browser.*`, `Storage.*`) omit `target_id`. Page-level methods (`Page.*`, `Runtime.*`, `DOM.*`, `Emulation.*`) require a `target_id` from `Target.getTargets`. Each call is independent — sessions do not persist between calls.
|
||||||
|
|
||||||
|
### `browser_dialog`
|
||||||
|
|
||||||
|
Accept or dismiss a native JS dialog (`alert`, `confirm`, `prompt`, `beforeunload`) that's blocking a page. Native dialogs freeze the page's JS thread, so `browser_snapshot`, `browser_console`, `browser_click` and related tools will hang or error until the dialog is handled.
|
||||||
|
|
||||||
|
**Same CDP gate as `browser_cdp`** — appears in the toolset when `/browser connect` is active or `browser.cdp_url` is set, and disappears otherwise.
|
||||||
|
|
||||||
|
```
|
||||||
|
# Accept (click OK / Yes / Submit)
|
||||||
|
browser_dialog(action="accept")
|
||||||
|
|
||||||
|
# Dismiss (click Cancel / No)
|
||||||
|
browser_dialog(action="dismiss")
|
||||||
|
|
||||||
|
# Fill a prompt() dialog
|
||||||
|
browser_dialog(action="accept", prompt_text="my answer")
|
||||||
|
|
||||||
|
# With multiple tabs open, specify which one
|
||||||
|
browser_dialog(action="accept", target_id="<tabId>")
|
||||||
|
```
|
||||||
|
|
||||||
|
`target_id` is auto-resolved when exactly one page tab is open. With multiple page tabs, the tool returns an error listing them so the agent can pick one explicitly.
|
||||||
|
|
||||||
|
Safe as a probe: CDP cleanly returns `"No dialog is showing"` when nothing's pending, so calling `browser_dialog(action="dismiss")` is a zero-risk way to check for a stuck dialog. If subsequent `browser_snapshot` / `browser_click` calls start hanging on a page that was working before, this is the first thing to try.
|
||||||
|
|
||||||
|
**Note on dialog detection:** Hermes does not currently auto-detect that a dialog is open — the agent infers from symptoms (calls hanging/erroring) and uses `browser_dialog` to unstick the page. Persistent dialog-event subscription is a larger architectural change (persistent CDP connections per session) and is a follow-up.
|
||||||
|
|
||||||
## Practical Examples
|
## Practical Examples
|
||||||
|
|
||||||
### Filling Out a Web Form
|
### Filling Out a Web Form
|
||||||
|
|||||||
Reference in New Issue
Block a user