mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-04 09:47:54 +08:00
Compare commits
1 Commits
main
...
feat/brows
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
15c75b1018 |
468
tests/tools/test_browser_coordinate_click.py
Normal file
468
tests/tools/test_browser_coordinate_click.py
Normal file
@@ -0,0 +1,468 @@
|
||||
"""Tests for compositor-level coordinate click (browser_click with x/y params).
|
||||
|
||||
Covers:
|
||||
- Input validation (ref vs x/y mutually exclusive)
|
||||
- CDP coordinate click path (via mock CDP server)
|
||||
- agent-browser mouse fallback path
|
||||
- Camofox passthrough still works with ref
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import threading
|
||||
from typing import Any, Dict, List
|
||||
import pytest
|
||||
|
||||
import websockets
|
||||
from websockets.asyncio.server import serve
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# In-process CDP mock server (reused from test_browser_cdp_tool.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class _CDPServer:
|
||||
"""Tiny CDP mock — replies to registered method handlers."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._handlers: Dict[str, Any] = {}
|
||||
self._responses: List[Dict[str, Any]] = []
|
||||
self._loop: asyncio.AbstractEventLoop | None = None
|
||||
self._server: Any = None
|
||||
self._thread: threading.Thread | None = None
|
||||
self._host = "127.0.0.1"
|
||||
self._port = 0
|
||||
|
||||
def on(self, method: str, handler):
|
||||
self._handlers[method] = handler
|
||||
|
||||
def start(self) -> str:
|
||||
ready = threading.Event()
|
||||
|
||||
def _run() -> None:
|
||||
self._loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(self._loop)
|
||||
|
||||
async def _handler(ws):
|
||||
try:
|
||||
async for raw in ws:
|
||||
msg = json.loads(raw)
|
||||
call_id = msg.get("id")
|
||||
method = msg.get("method", "")
|
||||
params = msg.get("params", {}) or {}
|
||||
session_id = msg.get("sessionId")
|
||||
self._responses.append(msg)
|
||||
|
||||
fn = self._handlers.get(method)
|
||||
if fn is None:
|
||||
reply = {
|
||||
"id": call_id,
|
||||
"error": {"code": -32601, "message": f"No handler for {method}"},
|
||||
}
|
||||
else:
|
||||
try:
|
||||
result = fn(params, session_id)
|
||||
reply = {"id": call_id, "result": result}
|
||||
except Exception as exc:
|
||||
reply = {"id": call_id, "error": {"code": -1, "message": str(exc)}}
|
||||
if session_id:
|
||||
reply["sessionId"] = session_id
|
||||
await ws.send(json.dumps(reply))
|
||||
except websockets.exceptions.ConnectionClosed:
|
||||
pass
|
||||
|
||||
async def _serve() -> None:
|
||||
self._server = await serve(_handler, self._host, 0)
|
||||
sock = next(iter(self._server.sockets))
|
||||
self._port = sock.getsockname()[1]
|
||||
ready.set()
|
||||
await self._server.wait_closed()
|
||||
|
||||
try:
|
||||
self._loop.run_until_complete(_serve())
|
||||
finally:
|
||||
self._loop.close()
|
||||
|
||||
self._thread = threading.Thread(target=_run, daemon=True)
|
||||
self._thread.start()
|
||||
if not ready.wait(timeout=5.0):
|
||||
raise RuntimeError("CDP mock server failed to start")
|
||||
return f"ws://{self._host}:{self._port}/devtools/browser/mock"
|
||||
|
||||
def stop(self) -> None:
|
||||
if self._loop and self._server:
|
||||
self._loop.call_soon_threadsafe(self._server.close)
|
||||
if self._thread:
|
||||
self._thread.join(timeout=3.0)
|
||||
|
||||
def received(self) -> List[Dict[str, Any]]:
|
||||
return list(self._responses)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def cdp_server(monkeypatch):
|
||||
"""Start a CDP mock and point browser_cdp_tool's resolver at it."""
|
||||
server = _CDPServer()
|
||||
ws_url = server.start()
|
||||
|
||||
import tools.browser_cdp_tool as cdp_mod
|
||||
monkeypatch.setattr(cdp_mod, "_resolve_cdp_endpoint", lambda: ws_url)
|
||||
try:
|
||||
yield server
|
||||
finally:
|
||||
server.stop()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Input validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestClickInputValidation:
|
||||
"""browser_click validates that exactly one of ref / (x,y) is provided."""
|
||||
|
||||
def test_neither_ref_nor_coords(self):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
result = json.loads(browser_click())
|
||||
assert result["success"] is False
|
||||
assert "ref" in result["error"].lower() or "x" in result["error"].lower()
|
||||
|
||||
def test_both_ref_and_coords(self):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
result = json.loads(browser_click(ref="@e1", x=100, y=200))
|
||||
assert result["success"] is False
|
||||
assert "not both" in result["error"].lower()
|
||||
|
||||
def test_x_without_y(self):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
result = json.loads(browser_click(x=100))
|
||||
assert result["success"] is False
|
||||
assert "both" in result["error"].lower()
|
||||
|
||||
def test_y_without_x(self):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
result = json.loads(browser_click(y=200))
|
||||
assert result["success"] is False
|
||||
assert "both" in result["error"].lower()
|
||||
|
||||
def test_empty_ref_treated_as_missing(self):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
result = json.loads(browser_click(ref=""))
|
||||
assert result["success"] is False
|
||||
assert "ref" in result["error"].lower() or "x" in result["error"].lower()
|
||||
|
||||
def test_non_numeric_coordinates(self):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
result = json.loads(browser_click(x="abc", y="def"))
|
||||
assert result["success"] is False
|
||||
assert "number" in result["error"].lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CDP coordinate click (happy path via mock server)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestCDPCoordinateClick:
|
||||
"""Coordinate clicks via CDP Input.dispatchMouseEvent."""
|
||||
|
||||
def test_cdp_click_dispatches_press_and_release(self, cdp_server):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
# Register handlers for the protocol calls
|
||||
cdp_server.on(
|
||||
"Target.getTargets",
|
||||
lambda p, s: {
|
||||
"targetInfos": [
|
||||
{"targetId": "page-1", "type": "page", "attached": True, "url": "https://example.com"},
|
||||
]
|
||||
},
|
||||
)
|
||||
cdp_server.on(
|
||||
"Target.attachToTarget",
|
||||
lambda p, s: {"sessionId": f"sess-{p['targetId']}"},
|
||||
)
|
||||
cdp_server.on(
|
||||
"Input.dispatchMouseEvent",
|
||||
lambda p, s: {},
|
||||
)
|
||||
|
||||
result = json.loads(browser_click(x=150, y=300))
|
||||
assert result["success"] is True
|
||||
assert result["clicked_at"] == {"x": 150, "y": 300}
|
||||
assert result["method"] == "cdp_compositor"
|
||||
|
||||
# Verify the CDP calls: Target.getTargets, attach, mousePressed, attach, mouseReleased
|
||||
calls = cdp_server.received()
|
||||
methods = [c["method"] for c in calls]
|
||||
assert "Target.getTargets" in methods
|
||||
assert "Input.dispatchMouseEvent" in methods
|
||||
|
||||
# Find the mouse events
|
||||
mouse_events = [c for c in calls if c["method"] == "Input.dispatchMouseEvent"]
|
||||
assert len(mouse_events) == 2
|
||||
assert mouse_events[0]["params"]["type"] == "mousePressed"
|
||||
assert mouse_events[0]["params"]["x"] == 150
|
||||
assert mouse_events[0]["params"]["y"] == 300
|
||||
assert mouse_events[0]["params"]["button"] == "left"
|
||||
assert mouse_events[1]["params"]["type"] == "mouseReleased"
|
||||
|
||||
def test_cdp_click_rounds_float_coordinates(self, cdp_server):
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
cdp_server.on(
|
||||
"Target.getTargets",
|
||||
lambda p, s: {"targetInfos": [{"targetId": "p1", "type": "page", "attached": True, "url": "..."}]},
|
||||
)
|
||||
cdp_server.on("Target.attachToTarget", lambda p, s: {"sessionId": "s1"})
|
||||
cdp_server.on("Input.dispatchMouseEvent", lambda p, s: {})
|
||||
|
||||
result = json.loads(browser_click(x=150.7, y=299.3))
|
||||
assert result["success"] is True
|
||||
assert result["clicked_at"] == {"x": 151, "y": 299}
|
||||
|
||||
def test_cdp_click_no_page_target_still_works(self, cdp_server):
|
||||
"""When Target.getTargets returns no page targets, click proceeds without target_id."""
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
cdp_server.on(
|
||||
"Target.getTargets",
|
||||
lambda p, s: {"targetInfos": [{"targetId": "sw1", "type": "service_worker"}]},
|
||||
)
|
||||
# No Target.attachToTarget needed — page_target is None so _cdp_call
|
||||
# sends without attaching
|
||||
cdp_server.on("Input.dispatchMouseEvent", lambda p, s: {})
|
||||
|
||||
result = json.loads(browser_click(x=50, y=50))
|
||||
assert result["success"] is True
|
||||
assert result["clicked_at"] == {"x": 50, "y": 50}
|
||||
|
||||
def test_cdp_dispatch_mouse_event_failure(self, cdp_server):
|
||||
"""When Input.dispatchMouseEvent returns a CDP error, return failure."""
|
||||
from tools.browser_tool import browser_click
|
||||
|
||||
cdp_server.on(
|
||||
"Target.getTargets",
|
||||
lambda p, s: {"targetInfos": [{"targetId": "p1", "type": "page", "attached": True, "url": "..."}]},
|
||||
)
|
||||
cdp_server.on("Target.attachToTarget", lambda p, s: {"sessionId": "s1"})
|
||||
# No handler for Input.dispatchMouseEvent — server returns CDP error
|
||||
|
||||
result = json.loads(browser_click(x=100, y=200))
|
||||
assert result["success"] is False
|
||||
assert "CDP coordinate click failed" in result["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# agent-browser mouse fallback
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAgentBrowserMouseFallback:
|
||||
"""When no CDP endpoint is available, fall back to agent-browser mouse commands."""
|
||||
|
||||
def test_falls_back_to_agent_browser_mouse(self, monkeypatch):
|
||||
from tools import browser_tool, browser_cdp_tool
|
||||
|
||||
# No CDP endpoint available
|
||||
monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
|
||||
|
||||
# Mock _run_browser_command and _last_session_key
|
||||
commands_sent = []
|
||||
|
||||
def mock_run_cmd(task_id, command, args=None, timeout=None):
|
||||
commands_sent.append((command, args))
|
||||
return {"success": True}
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_run_browser_command", mock_run_cmd)
|
||||
monkeypatch.setattr(browser_tool, "_last_session_key", lambda tid: tid)
|
||||
|
||||
result = json.loads(browser_tool.browser_click(x=200, y=400))
|
||||
assert result["success"] is True
|
||||
assert result["clicked_at"] == {"x": 200, "y": 400}
|
||||
assert result["method"] == "agent_browser_mouse"
|
||||
|
||||
# Should have sent: mouse move, mouse down, mouse up
|
||||
assert len(commands_sent) == 3
|
||||
assert commands_sent[0] == ("mouse", ["move", "200", "400"])
|
||||
assert commands_sent[1] == ("mouse", ["down"])
|
||||
assert commands_sent[2] == ("mouse", ["up"])
|
||||
|
||||
def test_mouse_move_failure_returns_error(self, monkeypatch):
|
||||
from tools import browser_tool, browser_cdp_tool
|
||||
|
||||
monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
|
||||
|
||||
def mock_run_cmd(task_id, command, args=None, timeout=None):
|
||||
if args and args[0] == "move":
|
||||
return {"success": False, "error": "mouse move not supported"}
|
||||
return {"success": True}
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_run_browser_command", mock_run_cmd)
|
||||
monkeypatch.setattr(browser_tool, "_last_session_key", lambda tid: tid)
|
||||
|
||||
result = json.loads(browser_tool.browser_click(x=100, y=100))
|
||||
assert result["success"] is False
|
||||
assert "mouse move" in result["error"]
|
||||
|
||||
def test_mouse_down_failure_returns_error(self, monkeypatch):
|
||||
from tools import browser_tool, browser_cdp_tool
|
||||
|
||||
monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
|
||||
|
||||
def mock_run_cmd(task_id, command, args=None, timeout=None):
|
||||
if args and args[0] == "down":
|
||||
return {"success": False, "error": "mouse down failed"}
|
||||
return {"success": True}
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_run_browser_command", mock_run_cmd)
|
||||
monkeypatch.setattr(browser_tool, "_last_session_key", lambda tid: tid)
|
||||
|
||||
result = json.loads(browser_tool.browser_click(x=100, y=100))
|
||||
assert result["success"] is False
|
||||
assert "mouse down" in result["error"]
|
||||
|
||||
def test_mouse_up_failure_returns_error(self, monkeypatch):
|
||||
from tools import browser_tool, browser_cdp_tool
|
||||
|
||||
monkeypatch.setattr(browser_cdp_tool, "_resolve_cdp_endpoint", lambda: "")
|
||||
|
||||
def mock_run_cmd(task_id, command, args=None, timeout=None):
|
||||
if args and args[0] == "up":
|
||||
return {"success": False, "error": "mouse up failed"}
|
||||
return {"success": True}
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_run_browser_command", mock_run_cmd)
|
||||
monkeypatch.setattr(browser_tool, "_last_session_key", lambda tid: tid)
|
||||
|
||||
result = json.loads(browser_tool.browser_click(x=100, y=100))
|
||||
assert result["success"] is False
|
||||
assert "mouse up" in result["error"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ref-based click unchanged
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRefClickPreserved:
|
||||
"""Existing ref-based click behavior is unchanged."""
|
||||
|
||||
def test_ref_click_still_works(self, monkeypatch):
|
||||
from tools import browser_tool
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
|
||||
monkeypatch.setattr(browser_tool, "_last_session_key", lambda tid: tid)
|
||||
|
||||
def mock_run_cmd(task_id, command, args=None, timeout=None):
|
||||
return {"success": True}
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_run_browser_command", mock_run_cmd)
|
||||
|
||||
result = json.loads(browser_tool.browser_click(ref="@e5"))
|
||||
assert result["success"] is True
|
||||
assert result["clicked"] == "@e5"
|
||||
|
||||
def test_ref_without_at_prefix_auto_added(self, monkeypatch):
|
||||
from tools import browser_tool
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
|
||||
monkeypatch.setattr(browser_tool, "_last_session_key", lambda tid: tid)
|
||||
|
||||
clicked_refs = []
|
||||
|
||||
def mock_run_cmd(task_id, command, args=None, timeout=None):
|
||||
clicked_refs.append(args)
|
||||
return {"success": True}
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_run_browser_command", mock_run_cmd)
|
||||
|
||||
browser_tool.browser_click(ref="e12")
|
||||
assert clicked_refs[0] == ["@e12"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Schema check
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestSchemaUpdated:
|
||||
"""The tool schema reflects x/y params and ref is no longer required."""
|
||||
|
||||
def test_schema_has_x_y_properties(self):
|
||||
from tools.browser_tool import _BROWSER_SCHEMA_MAP
|
||||
|
||||
schema = _BROWSER_SCHEMA_MAP["browser_click"]
|
||||
props = schema["parameters"]["properties"]
|
||||
assert "x" in props
|
||||
assert "y" in props
|
||||
assert props["x"]["type"] == "number"
|
||||
assert props["y"]["type"] == "number"
|
||||
|
||||
def test_schema_no_required_fields(self):
|
||||
from tools.browser_tool import _BROWSER_SCHEMA_MAP
|
||||
|
||||
schema = _BROWSER_SCHEMA_MAP["browser_click"]
|
||||
# ref is no longer required — either ref or x+y
|
||||
assert "required" not in schema["parameters"] or schema["parameters"]["required"] == []
|
||||
|
||||
def test_schema_ref_still_present(self):
|
||||
from tools.browser_tool import _BROWSER_SCHEMA_MAP
|
||||
|
||||
schema = _BROWSER_SCHEMA_MAP["browser_click"]
|
||||
assert "ref" in schema["parameters"]["properties"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Registry integration
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRegistryIntegration:
|
||||
"""browser_click is registered with x/y params wired through."""
|
||||
|
||||
def test_dispatch_with_coordinates(self, monkeypatch, cdp_server):
|
||||
from tools.registry import registry
|
||||
|
||||
cdp_server.on(
|
||||
"Target.getTargets",
|
||||
lambda p, s: {"targetInfos": [{"targetId": "p1", "type": "page", "attached": True, "url": "..."}]},
|
||||
)
|
||||
cdp_server.on("Target.attachToTarget", lambda p, s: {"sessionId": "s1"})
|
||||
cdp_server.on("Input.dispatchMouseEvent", lambda p, s: {})
|
||||
|
||||
raw = registry.dispatch(
|
||||
"browser_click", {"x": 42, "y": 84}, task_id="t1"
|
||||
)
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert result["clicked_at"] == {"x": 42, "y": 84}
|
||||
|
||||
def test_dispatch_with_ref(self, monkeypatch):
|
||||
from tools import browser_tool
|
||||
from tools.registry import registry
|
||||
|
||||
monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False)
|
||||
monkeypatch.setattr(browser_tool, "_last_session_key", lambda tid: tid)
|
||||
monkeypatch.setattr(
|
||||
browser_tool, "_run_browser_command",
|
||||
lambda tid, cmd, args=None, timeout=None: {"success": True},
|
||||
)
|
||||
|
||||
raw = registry.dispatch("browser_click", {"ref": "@e3"}, task_id="t1")
|
||||
result = json.loads(raw)
|
||||
assert result["success"] is True
|
||||
assert result["clicked"] == "@e3"
|
||||
@@ -1024,16 +1024,23 @@ BROWSER_TOOL_SCHEMAS = [
|
||||
},
|
||||
{
|
||||
"name": "browser_click",
|
||||
"description": "Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first.",
|
||||
"description": "Click on an element identified by its ref ID from the snapshot (e.g., '@e5'). The ref IDs are shown in square brackets in the snapshot output. Requires browser_navigate and browser_snapshot to be called first.\n\nAlternatively, click at exact viewport coordinates (x, y) using compositor-level input. This bypasses DOM selectors entirely — clicks pass through iframes, shadow DOM, cross-origin boundaries, and canvas elements. Use browser_vision with annotate=true to find coordinates, or browser_console to evaluate getBoundingClientRect(). Provide EITHER ref OR (x + y), not both.",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"ref": {
|
||||
"type": "string",
|
||||
"description": "The element reference from the snapshot (e.g., '@e5', '@e12')"
|
||||
},
|
||||
"x": {
|
||||
"type": "number",
|
||||
"description": "Viewport X coordinate for compositor-level click. Use with y instead of ref to click through iframes, shadow DOM, or canvas elements."
|
||||
},
|
||||
"y": {
|
||||
"type": "number",
|
||||
"description": "Viewport Y coordinate for compositor-level click. Use with x instead of ref."
|
||||
}
|
||||
},
|
||||
"required": ["ref"]
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
@@ -1922,17 +1929,197 @@ def browser_snapshot(
|
||||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_click(ref: str, task_id: Optional[str] = None) -> str:
|
||||
def _cdp_coordinate_click(
|
||||
x: float,
|
||||
y: float,
|
||||
task_id: str,
|
||||
button: str = "left",
|
||||
) -> str:
|
||||
"""Compositor-level click at viewport coordinates via CDP Input.dispatchMouseEvent.
|
||||
|
||||
This dispatches mouse events at the browser compositor level — Chrome does
|
||||
its own hit-testing to route the event to the correct renderer process.
|
||||
Works through iframes (same-origin and cross-origin OOPIFs), shadow DOM
|
||||
(open and closed), canvas/WebGL elements, and overlays — anything visible
|
||||
at those coordinates gets clicked.
|
||||
|
||||
Inspired by browser-harness's ``click_at_xy()`` strategy.
|
||||
"""
|
||||
Click on an element.
|
||||
|
||||
try:
|
||||
from tools.browser_cdp_tool import _cdp_call, _run_async, _resolve_cdp_endpoint, _WS_AVAILABLE
|
||||
except ImportError:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "browser_cdp_tool not available — coordinate clicks require the CDP tool module.",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
if not _WS_AVAILABLE:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "The 'websockets' package is required for coordinate clicks. Install with: pip install websockets",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
endpoint = _resolve_cdp_endpoint()
|
||||
if not endpoint:
|
||||
# Fall back to agent-browser mouse commands (3 subprocess calls)
|
||||
return _coordinate_click_via_agent_browser(x, y, task_id, button)
|
||||
|
||||
if not endpoint.startswith(("ws://", "wss://")):
|
||||
return _coordinate_click_via_agent_browser(x, y, task_id, button)
|
||||
|
||||
# Find the page target to scope the input events to.
|
||||
# Input.dispatchMouseEvent is a page-level method — we need a session.
|
||||
try:
|
||||
targets_result = _run_async(
|
||||
_cdp_call(endpoint, "Target.getTargets", {}, None, 10.0)
|
||||
)
|
||||
page_target = None
|
||||
for t in targets_result.get("targetInfos", []):
|
||||
if t.get("type") == "page" and t.get("attached", True):
|
||||
page_target = t["targetId"]
|
||||
break
|
||||
if not page_target:
|
||||
# No attached page target — try without target_id
|
||||
# (some CDP endpoints scope Input to the default page)
|
||||
page_target = None
|
||||
except Exception:
|
||||
page_target = None
|
||||
|
||||
ix, iy = int(round(x)), int(round(y))
|
||||
|
||||
try:
|
||||
# mousePressed
|
||||
_run_async(_cdp_call(endpoint, "Input.dispatchMouseEvent", {
|
||||
"type": "mousePressed",
|
||||
"x": ix,
|
||||
"y": iy,
|
||||
"button": button,
|
||||
"clickCount": 1,
|
||||
}, page_target, 10.0))
|
||||
# mouseReleased
|
||||
_run_async(_cdp_call(endpoint, "Input.dispatchMouseEvent", {
|
||||
"type": "mouseReleased",
|
||||
"x": ix,
|
||||
"y": iy,
|
||||
"button": button,
|
||||
"clickCount": 1,
|
||||
}, page_target, 10.0))
|
||||
except Exception as exc:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": f"CDP coordinate click failed: {type(exc).__name__}: {exc}",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"clicked_at": {"x": ix, "y": iy},
|
||||
"method": "cdp_compositor",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
def _coordinate_click_via_agent_browser(
|
||||
x: float,
|
||||
y: float,
|
||||
task_id: str,
|
||||
button: str = "left",
|
||||
) -> str:
|
||||
"""Fallback: coordinate click via agent-browser mouse subcommands."""
|
||||
effective_task_id = _last_session_key(task_id)
|
||||
ix, iy = int(round(x)), int(round(y))
|
||||
|
||||
# agent-browser mouse move <x> <y> + mouse down + mouse up
|
||||
move_result = _run_browser_command(effective_task_id, "mouse", ["move", str(ix), str(iy)])
|
||||
if not move_result.get("success"):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": f"mouse move failed: {move_result.get('error', 'unknown')}",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
btn_arg = [] if button == "left" else [button]
|
||||
down_result = _run_browser_command(effective_task_id, "mouse", ["down"] + btn_arg)
|
||||
if not down_result.get("success"):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": f"mouse down failed: {down_result.get('error', 'unknown')}",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
up_result = _run_browser_command(effective_task_id, "mouse", ["up"] + btn_arg)
|
||||
if not up_result.get("success"):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": f"mouse up failed: {up_result.get('error', 'unknown')}",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"clicked_at": {"x": ix, "y": iy},
|
||||
"method": "agent_browser_mouse",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
|
||||
def browser_click(
|
||||
ref: Optional[str] = None,
|
||||
x: Optional[float] = None,
|
||||
y: Optional[float] = None,
|
||||
task_id: Optional[str] = None,
|
||||
) -> str:
|
||||
"""
|
||||
Click on an element by ref ID, or at exact viewport coordinates.
|
||||
|
||||
Provide EITHER ``ref`` (selector-based, via agent-browser) OR ``x`` + ``y``
|
||||
(compositor-level, via CDP Input.dispatchMouseEvent). Coordinate clicks
|
||||
bypass DOM selectors entirely — they pass through iframes, shadow DOM,
|
||||
cross-origin boundaries, and canvas elements.
|
||||
|
||||
Args:
|
||||
ref: Element reference (e.g., "@e5")
|
||||
x: Viewport X coordinate for compositor-level click
|
||||
y: Viewport Y coordinate for compositor-level click
|
||||
task_id: Task identifier for session isolation
|
||||
|
||||
|
||||
Returns:
|
||||
JSON string with click result
|
||||
"""
|
||||
# --- Input validation ---------------------------------------------------
|
||||
has_ref = ref is not None and str(ref).strip() != ""
|
||||
has_coords = x is not None and y is not None
|
||||
|
||||
if has_ref and has_coords:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Provide either 'ref' or 'x'+'y', not both.",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
if (x is not None) != (y is not None):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Both 'x' and 'y' are required for coordinate clicks.",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
if not has_ref and not has_coords:
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Provide either 'ref' (element reference) or 'x'+'y' (viewport coordinates).",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# --- Coordinate-based click (compositor-level) --------------------------
|
||||
if has_coords:
|
||||
try:
|
||||
fx, fy = float(x), float(y) # type: ignore[arg-type]
|
||||
except (TypeError, ValueError):
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": f"x and y must be numbers, got x={x!r} y={y!r}",
|
||||
}, ensure_ascii=False)
|
||||
return _cdp_coordinate_click(fx, fy, task_id or "default")
|
||||
|
||||
# --- Ref-based click (existing path) ------------------------------------
|
||||
if not has_ref or ref is None:
|
||||
# Defensive guard — validation above should ensure we never reach here
|
||||
return json.dumps({
|
||||
"success": False,
|
||||
"error": "Internal error: expected ref parameter.",
|
||||
}, ensure_ascii=False)
|
||||
if _is_camofox_mode():
|
||||
from tools.browser_camofox import camofox_click
|
||||
return camofox_click(ref, task_id)
|
||||
@@ -2928,7 +3115,7 @@ registry.register(
|
||||
name="browser_click",
|
||||
toolset="browser",
|
||||
schema=_BROWSER_SCHEMA_MAP["browser_click"],
|
||||
handler=lambda args, **kw: browser_click(ref=args.get("ref", ""), task_id=kw.get("task_id")),
|
||||
handler=lambda args, **kw: browser_click(ref=args.get("ref"), x=args.get("x"), y=args.get("y"), task_id=kw.get("task_id")),
|
||||
check_fn=check_browser_requirements,
|
||||
emoji="👆",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user