diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index fbb5f0fa03c..4cd7ae4b856 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -734,13 +734,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) - lines = [ "# Nous Subscription", - "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.", + "Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser-Use) by default. Modal execution is optional.", "Current capability status:", ] lines.extend(_status_line(feature) for feature in features.items()) lines.extend( [ - "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.", + "When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.", "If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.", "Do not mention subscription unless the user asks about it or it directly solves the current missing capability.", "Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.", diff --git a/cli.py b/cli.py index 5802a31e2f3..bdc0e240677 100644 --- a/cli.py +++ b/cli.py @@ -4929,13 +4929,13 @@ class HermesCLI: pass print() print("🌐 Browser disconnected from live Chrome") - print(" Browser tools reverted to default mode (local headless or Browserbase)") + print(" Browser tools reverted to their configured default mode") print() if hasattr(self, '_pending_input'): self._pending_input.put( "[System note: The user has disconnected the browser tools from their live Chrome. " - "Browser tools are back to default mode (headless local browser or Browserbase cloud).]" + "Browser tools are back to their configured default mode (headless local browser or the configured cloud provider).]" ) else: print() @@ -4962,10 +4962,17 @@ class HermesCLI: print(" Status: ✓ reachable") except (OSError, Exception): print(" Status: ⚠ not reachable (Chrome may not be running)") - elif os.environ.get("BROWSERBASE_API_KEY"): - print("🌐 Browser: Browserbase (cloud)") else: - print("🌐 Browser: local headless Chromium (agent-browser)") + try: + from tools.browser_tool import _get_cloud_provider + provider = _get_cloud_provider() + except Exception: + provider = None + + if provider is not None: + print(f"🌐 Browser: {provider.provider_name()} (cloud)") + else: + print("🌐 Browser: local headless Chromium (agent-browser)") print() print(" /browser connect — connect to your live Chrome") print(" /browser disconnect — revert to default") diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index 02814f75d39..9289ae1a95f 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -165,20 +165,20 @@ def _resolve_browser_feature_state( if browser_provider_explicit: current_provider = browser_provider or "local" if current_provider == "browserbase": - provider_available = managed_browser_available or direct_browserbase + available = bool(browser_local_available and direct_browserbase) + active = bool(browser_tool_enabled and available) + return current_provider, available, active, False + if current_provider == "browser-use": + provider_available = managed_browser_available or direct_browser_use available = bool(browser_local_available and provider_available) managed = bool( browser_tool_enabled and browser_local_available and managed_browser_available - and not direct_browserbase + and not direct_browser_use ) active = bool(browser_tool_enabled and available) return current_provider, available, active, managed - if current_provider == "browser-use": - available = bool(browser_local_available and direct_browser_use) - active = bool(browser_tool_enabled and available) - return current_provider, available, active, False if current_provider == "camofox": return current_provider, False, False, False @@ -187,16 +187,21 @@ def _resolve_browser_feature_state( active = bool(browser_tool_enabled and available) return current_provider, available, active, False - if managed_browser_available or direct_browserbase: + if direct_browserbase: + available = bool(browser_local_available) + active = bool(browser_tool_enabled and available) + return "browserbase", available, active, False + + if managed_browser_available or direct_browser_use: available = bool(browser_local_available) managed = bool( browser_tool_enabled and browser_local_available and managed_browser_available - and not direct_browserbase + and not direct_browser_use ) active = bool(browser_tool_enabled and available) - return "browserbase", available, active, managed + return "browser-use", available, active, managed available = bool(browser_local_available) active = bool(browser_tool_enabled and available) @@ -260,7 +265,7 @@ def get_nous_subscription_features( managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl") managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue") managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio") - managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase") + managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use") managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal") modal_state = resolve_modal_backend_state( modal_mode, @@ -508,7 +513,7 @@ def apply_nous_managed_defaults( get_env_value("BROWSERBASE_API_KEY") or get_env_value("BROWSER_USE_API_KEY") ): - browser_cfg["cloud_provider"] = "browserbase" + browser_cfg["cloud_provider"] = "browser-use" changed.add("browser") if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"): diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 98b7541523c..074b4ab47f5 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -652,14 +652,14 @@ def _print_setup_summary(config: dict, hermes_home): # Browser tools (local Chromium, Camofox, Browserbase, or Browser Use) browser_provider = subscription_features.browser.current_provider if subscription_features.browser.managed_by_nous: - tool_status.append(("Browser Automation (Nous Browserbase)", True, None)) + tool_status.append(("Browser Automation (Nous Browser-Use)", True, None)) elif subscription_features.browser.available: label = "Browser Automation" if browser_provider: label = f"Browser Automation ({browser_provider})" tool_status.append((label, True, None)) else: - missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browserbase" + missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browser Use or Browserbase" if browser_provider == "Browserbase": missing_browser_hint = ( "npm install -g agent-browser and set " diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 67b15bab789..7fa8ad336df 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -124,6 +124,7 @@ def show_status(args): "Firecrawl": "FIRECRAWL_API_KEY", "Tavily": "TAVILY_API_KEY", "Browserbase": "BROWSERBASE_API_KEY", # Optional — local browser works without this + "Browser Use": "BROWSER_USE_API_KEY", # Optional — local browser works without this "FAL": "FAL_KEY", "Tinker": "TINKER_API_KEY", "WandB": "WANDB_API_KEY", diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 1a0b306708c..a2a946c106a 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -280,21 +280,21 @@ TOOL_CATEGORIES = { "icon": "🌐", "providers": [ { - "name": "Nous Subscription (Browserbase cloud)", - "tag": "Managed Browserbase billed to your subscription", + "name": "Nous Subscription (Browser-Use cloud)", + "tag": "Managed Browser-Use billed to your subscription", "env_vars": [], - "browser_provider": "browserbase", + "browser_provider": "browser-use", "requires_nous_auth": True, "managed_nous_feature": "browser", - "override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"], - "post_setup": "browserbase", + "override_env_vars": ["BROWSER_USE_API_KEY"], + "post_setup": "agent_browser", }, { "name": "Local Browser", "tag": "Free headless Chromium (no API key needed)", "env_vars": [], "browser_provider": "local", - "post_setup": "browserbase", # Same npm install for agent-browser + "post_setup": "agent_browser", }, { "name": "Browserbase", @@ -304,7 +304,7 @@ TOOL_CATEGORIES = { {"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"}, ], "browser_provider": "browserbase", - "post_setup": "browserbase", + "post_setup": "agent_browser", }, { "name": "Browser Use", @@ -313,7 +313,7 @@ TOOL_CATEGORIES = { {"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"}, ], "browser_provider": "browser-use", - "post_setup": "browserbase", + "post_setup": "agent_browser", }, { "name": "Camofox", @@ -372,7 +372,7 @@ TOOLSET_ENV_REQUIREMENTS = { def _run_post_setup(post_setup_key: str): """Run post-setup hooks for tools that need extra installation steps.""" import shutil - if post_setup_key == "browserbase": + if post_setup_key in ("agent_browser", "browserbase"): node_modules = PROJECT_ROOT / "node_modules" / "agent-browser" if not node_modules.exists() and shutil.which("npm"): _print_info(" Installing Node.js dependencies for browser tools...") diff --git a/tests/agent/test_prompt_builder.py b/tests/agent/test_prompt_builder.py index 791f7ea0eb2..648788209f1 100644 --- a/tests/agent/test_prompt_builder.py +++ b/tests/agent/test_prompt_builder.py @@ -422,7 +422,7 @@ class TestBuildNousSubscriptionPrompt: "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"), "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"), "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"), - "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"), + "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"), "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"), }, ), @@ -430,9 +430,9 @@ class TestBuildNousSubscriptionPrompt: prompt = build_nous_subscription_prompt({"web_search", "browser_navigate"}) - assert "Browserbase" in prompt + assert "Browser-Use" in prompt assert "Modal execution is optional" in prompt - assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys" in prompt + assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch): monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1") diff --git a/tests/hermes_cli/test_nous_subscription.py b/tests/hermes_cli/test_nous_subscription.py index 69428ab0806..041cabe5493 100644 --- a/tests/hermes_cli/test_nous_subscription.py +++ b/tests/hermes_cli/test_nous_subscription.py @@ -44,7 +44,60 @@ def test_get_nous_subscription_features_prefers_managed_modal_in_auto_mode(monke assert features.modal.direct_override is False -def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase(monkeypatch): +def test_get_nous_subscription_features_marks_browser_use_as_managed_when_gateway_ready(monkeypatch): + monkeypatch.setattr(ns, "get_env_value", lambda name: "") + monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True}) + monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True) + monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser") + monkeypatch.setattr(ns, "_has_agent_browser", lambda: True) + monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "") + monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False) + monkeypatch.setattr( + ns, + "is_managed_tool_gateway_ready", + lambda vendor: vendor == "browser-use", + ) + + features = ns.get_nous_subscription_features( + {"browser": {"cloud_provider": "browser-use"}} + ) + + assert features.browser.available is True + assert features.browser.active is True + assert features.browser.managed_by_nous is True + assert features.browser.direct_override is False + assert features.browser.current_provider == "Browser Use" + + +def test_get_nous_subscription_features_prefers_direct_browserbase_when_unconfigured(monkeypatch): + env = { + "BROWSERBASE_API_KEY": "bb-key", + "BROWSERBASE_PROJECT_ID": "bb-project", + } + + monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, "")) + monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True}) + monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True) + monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser") + monkeypatch.setattr(ns, "_has_agent_browser", lambda: True) + monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "") + monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False) + monkeypatch.setattr( + ns, + "is_managed_tool_gateway_ready", + lambda vendor: vendor == "browser-use", + ) + + features = ns.get_nous_subscription_features({}) + + assert features.browser.available is True + assert features.browser.active is True + assert features.browser.managed_by_nous is False + assert features.browser.direct_override is True + assert features.browser.current_provider == "Browserbase" + + +def test_get_nous_subscription_features_prefers_camofox_over_managed_browser_use(monkeypatch): env = {"CAMOFOX_URL": "http://localhost:9377"} monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, "")) @@ -57,11 +110,11 @@ def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase monkeypatch.setattr( ns, "is_managed_tool_gateway_ready", - lambda vendor: vendor == "browserbase", + lambda vendor: vendor == "browser-use", ) features = ns.get_nous_subscription_features( - {"browser": {"cloud_provider": "browserbase"}} + {"browser": {"cloud_provider": "browser-use"}} ) assert features.browser.available is True diff --git a/tests/hermes_cli/test_status_model_provider.py b/tests/hermes_cli/test_status_model_provider.py index 1e6531d3729..04221d88f10 100644 --- a/tests/hermes_cli/test_status_model_provider.py +++ b/tests/hermes_cli/test_status_model_provider.py @@ -88,7 +88,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path "web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"), "image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"), "tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"), - "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"), + "browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"), "modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"), }, ), diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index 946ba77fd83..b02b3c1fc43 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -330,7 +330,7 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch): assert config["web"]["backend"] == "firecrawl" assert config["tts"]["provider"] == "openai" - assert config["browser"]["cloud_provider"] == "browserbase" + assert config["browser"]["cloud_provider"] == "browser-use" assert configured == [] # ── Platform / toolset consistency ──────────────────────────────────────────── diff --git a/tests/tools/test_browser_cdp_override.py b/tests/tools/test_browser_cdp_override.py index a29971fabaa..aa388773821 100644 --- a/tests/tools/test_browser_cdp_override.py +++ b/tests/tools/test_browser_cdp_override.py @@ -45,3 +45,35 @@ class TestResolveCdpOverride: with patch("tools.browser_tool.requests.get", side_effect=RuntimeError("boom")): assert _resolve_cdp_override(HTTP_URL) == HTTP_URL + + def test_normalizes_provider_returned_http_cdp_url_when_creating_session(self, monkeypatch): + import tools.browser_tool as browser_tool + + provider = Mock() + provider.create_session.return_value = { + "session_name": "cloud-session", + "bb_session_id": "bu_123", + "cdp_url": "https://cdp.browser-use.example/session", + "features": {"browser_use": True}, + } + + response = Mock() + response.raise_for_status.return_value = None + response.json.return_value = {"webSocketDebuggerUrl": WS_URL} + + monkeypatch.setattr(browser_tool, "_active_sessions", {}) + monkeypatch.setattr(browser_tool, "_session_last_activity", {}) + monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None) + monkeypatch.setattr(browser_tool, "_update_session_activity", lambda task_id: None) + monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "") + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider) + + with patch("tools.browser_tool.requests.get", return_value=response) as mock_get: + session_info = browser_tool._get_session_info("task-browser-use") + + assert session_info["cdp_url"] == WS_URL + provider.create_session.assert_called_once_with("task-browser-use") + mock_get.assert_called_once_with( + "https://cdp.browser-use.example/session/json/version", + timeout=10, + ) diff --git a/tests/tools/test_managed_browserbase_and_modal.py b/tests/tools/test_managed_browser_use_and_modal.py similarity index 76% rename from tests/tools/test_managed_browserbase_and_modal.py rename to tests/tools/test_managed_browser_use_and_modal.py index 3c8bb1214ed..8d762a866fa 100644 --- a/tests/tools/test_managed_browserbase_and_modal.py +++ b/tests/tools/test_managed_browser_use_and_modal.py @@ -113,16 +113,15 @@ def _install_fake_tools_package(): sys.modules["tools.environments.managed_modal"] = types.SimpleNamespace(ManagedModalEnvironment=_DummyEnvironment) -def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path): +def test_browser_use_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path): _install_fake_tools_package() (tmp_path / "config.yaml").write_text("browser:\n cloud_provider: local\n", encoding="utf-8") env = os.environ.copy() - env.pop("BROWSERBASE_API_KEY", None) - env.pop("BROWSERBASE_PROJECT_ID", None) + env.pop("BROWSER_USE_API_KEY", None) env.update({ "HERMES_HOME": str(tmp_path), "TOOL_GATEWAY_USER_TOKEN": "nous-token", - "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009", }) with patch.dict(os.environ, env, clear=True): @@ -135,7 +134,7 @@ def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_i assert provider is None -def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_call_id(): +def test_browserbase_does_not_use_gateway_only_configuration(): _install_fake_tools_package() env = os.environ.copy() env.pop("BROWSERBASE_API_KEY", None) @@ -145,177 +144,195 @@ def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_ "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", }) - class _Response: - status_code = 200 - ok = True - text = "" - headers = {"x-external-call-id": "call-browserbase-1"} - - def json(self): - return { - "id": "bb_local_session_1", - "connectUrl": "wss://connect.browserbase.example/session", - } - - with patch.dict(os.environ, env, clear=True): - browserbase_module = _load_tool_module( - "tools.browser_providers.browserbase", - "browser_providers/browserbase.py", - ) - - with patch.object(browserbase_module.requests, "post", return_value=_Response()) as post: - provider = browserbase_module.BrowserbaseProvider() - session = provider.create_session("task-browserbase-managed") - - sent_headers = post.call_args.kwargs["headers"] - assert sent_headers["X-BB-API-Key"] == "nous-token" - assert sent_headers["X-Idempotency-Key"].startswith("browserbase-session-create:") - assert session["external_call_id"] == "call-browserbase-1" - - -def test_browserbase_managed_gateway_reuses_pending_idempotency_key_after_timeout(): - _install_fake_tools_package() - env = os.environ.copy() - env.pop("BROWSERBASE_API_KEY", None) - env.pop("BROWSERBASE_PROJECT_ID", None) - env.update({ - "TOOL_GATEWAY_USER_TOKEN": "nous-token", - "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", - }) - - class _Response: - status_code = 200 - ok = True - text = "" - headers = {"x-external-call-id": "call-browserbase-2"} - - def json(self): - return { - "id": "bb_local_session_2", - "connectUrl": "wss://connect.browserbase.example/session2", - } - with patch.dict(os.environ, env, clear=True): browserbase_module = _load_tool_module( "tools.browser_providers.browserbase", "browser_providers/browserbase.py", ) provider = browserbase_module.BrowserbaseProvider() - timeout = browserbase_module.requests.Timeout("timed out") + + assert provider.is_configured() is False + + +def test_browser_use_managed_gateway_adds_idempotency_key_and_persists_external_call_id(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("BROWSER_USE_API_KEY", None) + env.update({ + "TOOL_GATEWAY_USER_TOKEN": "nous-token", + "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009", + }) + + class _Response: + status_code = 201 + ok = True + text = "" + headers = {"x-external-call-id": "call-browser-use-1"} + + def json(self): + return { + "id": "bu_local_session_1", + "cdpUrl": "wss://connect.browser-use.example/session", + } + + with patch.dict(os.environ, env, clear=True): + browser_use_module = _load_tool_module( + "tools.browser_providers.browser_use", + "browser_providers/browser_use.py", + ) + with patch.object(browser_use_module.requests, "post", return_value=_Response()) as post: + provider = browser_use_module.BrowserUseProvider() + session = provider.create_session("task-browser-use-managed") + + sent_headers = post.call_args.kwargs["headers"] + sent_payload = post.call_args.kwargs["json"] + assert sent_headers["x-browser-use-api-key"] == "nous-token" + assert sent_headers["X-Idempotency-Key"].startswith("browser-use-session-create:") + assert sent_payload == {"timeout": 5, "proxyCountryCode": "us"} + assert session["external_call_id"] == "call-browser-use-1" + assert session["features"]["managed_gateway"] is True + + +def test_browser_use_managed_gateway_reuses_pending_idempotency_key_after_timeout(): + _install_fake_tools_package() + env = os.environ.copy() + env.pop("BROWSER_USE_API_KEY", None) + env.update({ + "TOOL_GATEWAY_USER_TOKEN": "nous-token", + "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009", + }) + + class _Response: + status_code = 201 + ok = True + text = "" + headers = {"x-external-call-id": "call-browser-use-2"} + + def json(self): + return { + "id": "bu_local_session_2", + "cdpUrl": "wss://connect.browser-use.example/session2", + } + + with patch.dict(os.environ, env, clear=True): + browser_use_module = _load_tool_module( + "tools.browser_providers.browser_use", + "browser_providers/browser_use.py", + ) + provider = browser_use_module.BrowserUseProvider() + timeout = browser_use_module.requests.Timeout("timed out") with patch.object( - browserbase_module.requests, + browser_use_module.requests, "post", side_effect=[timeout, _Response()], ) as post: try: - provider.create_session("task-browserbase-timeout") - except browserbase_module.requests.Timeout: + provider.create_session("task-browser-use-timeout") + except browser_use_module.requests.Timeout: pass else: - raise AssertionError("Expected Browserbase create_session to propagate timeout") + raise AssertionError("Expected Browser Use create_session to propagate timeout") - provider.create_session("task-browserbase-timeout") + provider.create_session("task-browser-use-timeout") first_headers = post.call_args_list[0].kwargs["headers"] second_headers = post.call_args_list[1].kwargs["headers"] assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"] -def test_browserbase_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts(): +def test_browser_use_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts(): _install_fake_tools_package() env = os.environ.copy() - env.pop("BROWSERBASE_API_KEY", None) - env.pop("BROWSERBASE_PROJECT_ID", None) + env.pop("BROWSER_USE_API_KEY", None) env.update({ "TOOL_GATEWAY_USER_TOKEN": "nous-token", - "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009", }) class _ConflictResponse: status_code = 409 ok = False - text = '{"error":{"code":"CONFLICT","message":"Managed Browserbase session creation is already in progress for this idempotency key"}}' + text = '{"error":{"code":"CONFLICT","message":"Managed Browser-Use session creation is already in progress for this idempotency key"}}' headers = {} def json(self): return { "error": { "code": "CONFLICT", - "message": "Managed Browserbase session creation is already in progress for this idempotency key", + "message": "Managed Browser-Use session creation is already in progress for this idempotency key", } } class _SuccessResponse: - status_code = 200 + status_code = 201 ok = True text = "" - headers = {"x-external-call-id": "call-browserbase-4"} + headers = {"x-external-call-id": "call-browser-use-4"} def json(self): return { - "id": "bb_local_session_4", - "connectUrl": "wss://connect.browserbase.example/session4", + "id": "bu_local_session_4", + "cdpUrl": "wss://connect.browser-use.example/session4", } with patch.dict(os.environ, env, clear=True): - browserbase_module = _load_tool_module( - "tools.browser_providers.browserbase", - "browser_providers/browserbase.py", + browser_use_module = _load_tool_module( + "tools.browser_providers.browser_use", + "browser_providers/browser_use.py", ) - provider = browserbase_module.BrowserbaseProvider() + provider = browser_use_module.BrowserUseProvider() with patch.object( - browserbase_module.requests, + browser_use_module.requests, "post", side_effect=[_ConflictResponse(), _SuccessResponse()], ) as post: try: - provider.create_session("task-browserbase-conflict") + provider.create_session("task-browser-use-conflict") except RuntimeError: pass else: - raise AssertionError("Expected Browserbase create_session to propagate the in-progress conflict") + raise AssertionError("Expected Browser Use create_session to propagate the in-progress conflict") - provider.create_session("task-browserbase-conflict") + provider.create_session("task-browser-use-conflict") first_headers = post.call_args_list[0].kwargs["headers"] second_headers = post.call_args_list[1].kwargs["headers"] assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"] -def test_browserbase_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success(): +def test_browser_use_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success(): _install_fake_tools_package() env = os.environ.copy() - env.pop("BROWSERBASE_API_KEY", None) - env.pop("BROWSERBASE_PROJECT_ID", None) + env.pop("BROWSER_USE_API_KEY", None) env.update({ "TOOL_GATEWAY_USER_TOKEN": "nous-token", - "BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009", + "BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009", }) class _Response: - status_code = 200 + status_code = 201 ok = True text = "" - headers = {"x-external-call-id": "call-browserbase-3"} + headers = {"x-external-call-id": "call-browser-use-3"} def json(self): return { - "id": "bb_local_session_3", - "connectUrl": "wss://connect.browserbase.example/session3", + "id": "bu_local_session_3", + "cdpUrl": "wss://connect.browser-use.example/session3", } with patch.dict(os.environ, env, clear=True): - browserbase_module = _load_tool_module( - "tools.browser_providers.browserbase", - "browser_providers/browserbase.py", + browser_use_module = _load_tool_module( + "tools.browser_providers.browser_use", + "browser_providers/browser_use.py", ) - provider = browserbase_module.BrowserbaseProvider() + provider = browser_use_module.BrowserUseProvider() - with patch.object(browserbase_module.requests, "post", side_effect=[_Response(), _Response()]) as post: - provider.create_session("task-browserbase-new") - provider.create_session("task-browserbase-new") + with patch.object(browser_use_module.requests, "post", side_effect=[_Response(), _Response()]) as post: + provider.create_session("task-browser-use-new") + provider.create_session("task-browser-use-new") first_headers = post.call_args_list[0].kwargs["headers"] second_headers = post.call_args_list[1].kwargs["headers"] diff --git a/tests/tools/test_managed_tool_gateway.py b/tests/tools/test_managed_tool_gateway.py index 39b9125e1a0..f854732b2f0 100644 --- a/tests/tools/test_managed_tool_gateway.py +++ b/tests/tools/test_managed_tool_gateway.py @@ -40,17 +40,17 @@ def test_resolve_managed_tool_gateway_uses_vendor_specific_override(): os.environ, { "HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1", - "BROWSERBASE_GATEWAY_URL": "http://browserbase-gateway.localhost:3009/", + "BROWSER_USE_GATEWAY_URL": "http://browser-use-gateway.localhost:3009/", }, clear=False, ): result = resolve_managed_tool_gateway( - "browserbase", + "browser-use", token_reader=lambda: "nous-token", ) assert result is not None - assert result.gateway_origin == "http://browserbase-gateway.localhost:3009" + assert result.gateway_origin == "http://browser-use-gateway.localhost:3009" def test_resolve_managed_tool_gateway_is_inactive_without_nous_token(): diff --git a/tools/browser_providers/browser_use.py b/tools/browser_providers/browser_use.py index 48a618400fd..569bd67589c 100644 --- a/tools/browser_providers/browser_use.py +++ b/tools/browser_providers/browser_use.py @@ -2,16 +2,62 @@ import logging import os +import threading import uuid -from typing import Dict +from typing import Any, Dict, Optional import requests from tools.browser_providers.base import CloudBrowserProvider +from tools.managed_tool_gateway import resolve_managed_tool_gateway +from tools.tool_backend_helpers import managed_nous_tools_enabled logger = logging.getLogger(__name__) -_BASE_URL = "https://api.browser-use.com/api/v2" +_DIRECT_BASE_URL = "https://api.browser-use.com/api/v3" +_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5 +_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us" +_pending_create_keys: Dict[str, str] = {} +_pending_create_keys_lock = threading.Lock() + + +def _get_or_create_pending_create_key(task_id: str) -> str: + with _pending_create_keys_lock: + existing = _pending_create_keys.get(task_id) + if existing: + return existing + + created = f"browser-use-session-create:{uuid.uuid4().hex}" + _pending_create_keys[task_id] = created + return created + + +def _clear_pending_create_key(task_id: str) -> None: + with _pending_create_keys_lock: + _pending_create_keys.pop(task_id, None) + + +def _should_preserve_pending_create_key(response: requests.Response) -> bool: + if response.status_code >= 500: + return True + + if response.status_code != 409: + return False + + try: + payload = response.json() + except Exception: + return False + + if not isinstance(payload, dict): + return False + + error = payload.get("error") + if not isinstance(error, dict): + return False + + message = str(error.get("message") or "").lower() + return "already in progress" in message class BrowserUseProvider(CloudBrowserProvider): @@ -21,85 +67,148 @@ class BrowserUseProvider(CloudBrowserProvider): return "Browser Use" def is_configured(self) -> bool: - return bool(os.environ.get("BROWSER_USE_API_KEY")) + return self._get_config_or_none() is not None # ------------------------------------------------------------------ # Session lifecycle # ------------------------------------------------------------------ - def _headers(self) -> Dict[str, str]: + def _get_config_or_none(self) -> Optional[Dict[str, Any]]: api_key = os.environ.get("BROWSER_USE_API_KEY") - if not api_key: - raise ValueError( - "BROWSER_USE_API_KEY environment variable is required. " + if api_key: + return { + "api_key": api_key, + "base_url": os.environ.get("BROWSER_USE_BASE_URL", _DIRECT_BASE_URL).rstrip("/"), + "managed_mode": False, + } + + managed = resolve_managed_tool_gateway("browser-use") + if managed is None: + return None + + return { + "api_key": managed.nous_user_token, + "base_url": managed.gateway_origin.rstrip("/"), + "managed_mode": True, + } + + def _get_config(self) -> Dict[str, Any]: + config = self._get_config_or_none() + if config is None: + message = ( + "Browser Use requires direct BROWSER_USE_API_KEY credentials. " "Get your key at https://browser-use.com" ) + if managed_nous_tools_enabled(): + message = ( + "Browser Use requires either direct BROWSER_USE_API_KEY credentials " + "or a managed Browser-Use gateway configuration." + ) + raise ValueError(message) + return config + + def _headers(self, api_key: str, *, managed_mode: bool) -> Dict[str, str]: + header_name = "x-browser-use-api-key" if managed_mode else "X-Browser-Use-API-Key" return { "Content-Type": "application/json", - "X-Browser-Use-API-Key": api_key, + header_name: api_key, + } + + def _create_endpoint(self, config: Dict[str, Any]) -> str: + return f"{config['base_url']}/browsers" + + def _session_endpoint(self, config: Dict[str, Any], session_id: str) -> str: + return f"{config['base_url']}/browsers/{session_id}" + + def _create_payload(self, *, managed_mode: bool) -> Dict[str, object]: + if not managed_mode: + return {} + + # Keep gateway-backed sessions short so billing authorization does not + # default to a long Browser-Use timeout when Hermes only needs a task + # scoped ephemeral browser. + return { + "timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES, + "proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE, } def create_session(self, task_id: str) -> Dict[str, object]: + config = self._get_config() + managed_mode = bool(config.get("managed_mode")) + headers = self._headers(config["api_key"], managed_mode=managed_mode) + if managed_mode: + headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id) + response = requests.post( - f"{_BASE_URL}/browsers", - headers=self._headers(), - json={}, + self._create_endpoint(config), + headers=headers, + json=self._create_payload(managed_mode=managed_mode), timeout=30, ) if not response.ok: + if managed_mode and not _should_preserve_pending_create_key(response): + _clear_pending_create_key(task_id) raise RuntimeError( f"Failed to create Browser Use session: " f"{response.status_code} {response.text}" ) session_data = response.json() + if managed_mode: + _clear_pending_create_key(task_id) session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}" logger.info("Created Browser Use session %s", session_name) - return { + result = { "session_name": session_name, "bb_session_id": session_data["id"], "cdp_url": session_data["cdpUrl"], - "features": {"browser_use": True}, + "features": {"browser_use": True, "managed_gateway": managed_mode}, } + if managed_mode: + result["external_call_id"] = response.headers.get("x-external-call-id") + return result def close_session(self, session_id: str) -> bool: + try: + config = self._get_config() + except ValueError: + logger.warning("Cannot close Browser Use session %s — missing credentials", session_id) + return False + try: response = requests.patch( - f"{_BASE_URL}/browsers/{session_id}", - headers=self._headers(), + self._session_endpoint(config, session_id), + headers=self._headers(config["api_key"], managed_mode=bool(config.get("managed_mode"))), json={"action": "stop"}, timeout=10, ) if response.status_code in (200, 201, 204): logger.debug("Successfully closed Browser Use session %s", session_id) return True - else: - logger.warning( - "Failed to close Browser Use session %s: HTTP %s - %s", - session_id, - response.status_code, - response.text[:200], - ) - return False + + logger.warning( + "Failed to close Browser Use session %s: HTTP %s - %s", + session_id, + response.status_code, + response.text[:200], + ) + return False except Exception as e: logger.error("Exception closing Browser Use session %s: %s", session_id, e) return False def emergency_cleanup(self, session_id: str) -> None: - api_key = os.environ.get("BROWSER_USE_API_KEY") - if not api_key: + config = self._get_config_or_none() + if config is None: logger.warning("Cannot emergency-cleanup Browser Use session %s — missing credentials", session_id) return try: requests.patch( - f"{_BASE_URL}/browsers/{session_id}", - headers={ - "Content-Type": "application/json", - "X-Browser-Use-API-Key": api_key, - }, + self._session_endpoint(config, session_id), + headers=self._headers(config["api_key"], managed_mode=bool(config.get("managed_mode"))), json={"action": "stop"}, timeout=5, ) diff --git a/tools/browser_providers/browserbase.py b/tools/browser_providers/browserbase.py index 5c580c3f3a3..5b292766125 100644 --- a/tools/browser_providers/browserbase.py +++ b/tools/browser_providers/browserbase.py @@ -2,58 +2,14 @@ import logging import os -import threading import uuid from typing import Any, Dict, Optional import requests from tools.browser_providers.base import CloudBrowserProvider -from tools.managed_tool_gateway import resolve_managed_tool_gateway -from tools.tool_backend_helpers import managed_nous_tools_enabled logger = logging.getLogger(__name__) -_pending_create_keys: Dict[str, str] = {} -_pending_create_keys_lock = threading.Lock() - - -def _get_or_create_pending_create_key(task_id: str) -> str: - with _pending_create_keys_lock: - existing = _pending_create_keys.get(task_id) - if existing: - return existing - - created = f"browserbase-session-create:{uuid.uuid4().hex}" - _pending_create_keys[task_id] = created - return created - - -def _clear_pending_create_key(task_id: str) -> None: - with _pending_create_keys_lock: - _pending_create_keys.pop(task_id, None) - - -def _should_preserve_pending_create_key(response: requests.Response) -> bool: - if response.status_code >= 500: - return True - - if response.status_code != 409: - return False - - try: - payload = response.json() - except Exception: - return False - - if not isinstance(payload, dict): - return False - - error = payload.get("error") - if not isinstance(error, dict): - return False - - message = str(error.get("message") or "").lower() - return "already in progress" in message class BrowserbaseProvider(CloudBrowserProvider): @@ -79,35 +35,18 @@ class BrowserbaseProvider(CloudBrowserProvider): "base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"), "managed_mode": False, } - - managed = resolve_managed_tool_gateway("browserbase") - if managed is None: - return None - - return { - "api_key": managed.nous_user_token, - "project_id": "managed", - "base_url": managed.gateway_origin.rstrip("/"), - "managed_mode": True, - } + return None def _get_config(self) -> Dict[str, Any]: config = self._get_config_or_none() if config is None: - message = ( + raise ValueError( "Browserbase requires direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID credentials." ) - if managed_nous_tools_enabled(): - message = ( - "Browserbase requires either direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID " - "credentials or a managed Browserbase gateway configuration." - ) - raise ValueError(message) return config def create_session(self, task_id: str) -> Dict[str, object]: config = self._get_config() - managed_mode = bool(config.get("managed_mode")) # Optional env-var knobs enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false" @@ -147,8 +86,6 @@ class BrowserbaseProvider(CloudBrowserProvider): "Content-Type": "application/json", "X-BB-API-Key": config["api_key"], } - if managed_mode: - headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id) response = requests.post( f"{config['base_url']}/v1/sessions", @@ -161,7 +98,7 @@ class BrowserbaseProvider(CloudBrowserProvider): keepalive_fallback = False # Handle 402 — paid features unavailable - if response.status_code == 402 and not managed_mode: + if response.status_code == 402: if enable_keep_alive: keepalive_fallback = True logger.warning( @@ -191,18 +128,13 @@ class BrowserbaseProvider(CloudBrowserProvider): ) if not response.ok: - if managed_mode and not _should_preserve_pending_create_key(response): - _clear_pending_create_key(task_id) raise RuntimeError( f"Failed to create Browserbase session: " f"{response.status_code} {response.text}" ) session_data = response.json() - if managed_mode: - _clear_pending_create_key(task_id) session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}" - external_call_id = response.headers.get("x-external-call-id") if managed_mode else None if enable_proxies and not proxies_fallback: features_enabled["proxies"] = True @@ -221,7 +153,6 @@ class BrowserbaseProvider(CloudBrowserProvider): "bb_session_id": session_data["id"], "cdp_url": session_data["connectUrl"], "features": features_enabled, - "external_call_id": external_call_id, } def close_session(self, session_id: str) -> bool: diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 546ed3cd169..492b3b6ca60 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -2,11 +2,11 @@ """ Browser Tool Module -This module provides browser automation tools using agent-browser CLI. It -supports two backends — **Browserbase** (cloud) and **local Chromium** — with -identical agent-facing behaviour. The backend is auto-detected: if -``BROWSERBASE_API_KEY`` is set the cloud service is used; otherwise a local -headless Chromium instance is launched automatically. +This module provides browser automation tools using agent-browser CLI. It +supports local Chromium plus multiple cloud backends, including Browserbase +and Browser Use, with identical agent-facing behaviour. The backend is selected +from config when present and otherwise falls back to any configured cloud +provider before using a local headless Chromium instance. The tool uses agent-browser's accessibility tree (ariaSnapshot) for text-based page representation, making it ideal for LLM agents without vision capabilities. @@ -17,8 +17,7 @@ Features: ``agent-browser install`` (downloads Chromium) or ``agent-browser install --with-deps`` (also installs system libraries for Debian/Ubuntu/Docker). -- **Cloud mode**: Browserbase cloud execution with stealth features, proxies, - and CAPTCHA solving. Activated when BROWSERBASE_API_KEY is set. +- **Cloud mode**: Browserbase or Browser Use cloud execution when configured. - Session isolation per task ID - Text-based page snapshots using accessibility tree - Element interaction via ref selectors (@e1, @e2, etc.) @@ -26,8 +25,9 @@ Features: - Automatic cleanup of browser sessions Environment Variables: -- BROWSERBASE_API_KEY: API key for Browserbase (enables cloud mode) -- BROWSERBASE_PROJECT_ID: Project ID for Browserbase (required for cloud mode) +- BROWSERBASE_API_KEY: API key for direct Browserbase cloud mode +- BROWSERBASE_PROJECT_ID: Project ID for direct Browserbase cloud mode +- BROWSER_USE_API_KEY: API key for direct Browser Use cloud mode - BROWSERBASE_PROXIES: Enable/disable residential proxies (default: "true") - BROWSERBASE_ADVANCED_STEALTH: Enable advanced stealth mode with custom Chromium, requires Scale Plan (default: "false") @@ -248,8 +248,7 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: Reads ``config["browser"]["cloud_provider"]`` once and caches the result for the process lifetime. An explicit ``local`` provider disables cloud - fallback. If unset, fall back to Browserbase when direct or managed - Browserbase credentials are available. + fallback. If unset, fall back to the first configured cloud provider. """ global _cached_cloud_provider, _cloud_provider_resolved if _cloud_provider_resolved: @@ -278,23 +277,14 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]: logger.debug("Could not read cloud_provider from config: %s", e) if _cached_cloud_provider is None: - fallback_provider = BrowserbaseProvider() - if fallback_provider.is_configured(): - _cached_cloud_provider = fallback_provider + for fallback_provider in (BrowserbaseProvider(), BrowserUseProvider()): + if fallback_provider.is_configured(): + _cached_cloud_provider = fallback_provider + break return _cached_cloud_provider -def _get_browserbase_config_or_none() -> Optional[Dict[str, Any]]: - """Return Browserbase direct or managed config, or None when unavailable.""" - return BrowserbaseProvider()._get_config_or_none() - - -def _get_browserbase_config() -> Dict[str, Any]: - """Return Browserbase config or raise when neither direct nor managed mode is available.""" - return BrowserbaseProvider()._get_config() - - def _is_local_mode() -> bool: """Return True when the browser tool will use a local browser backend.""" if _get_cdp_override(): @@ -615,7 +605,7 @@ BROWSER_TOOL_SCHEMAS = [ }, { "name": "browser_close", - "description": "Close the browser session and release resources. Call this when done with browser tasks to free up Browserbase session quota.", + "description": "Close the browser session and release resources. Call this when done with browser tasks to free up cloud browser session quota.", "parameters": { "type": "object", "properties": {}, @@ -738,6 +728,11 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: session_info = _create_local_session(task_id) else: session_info = provider.create_session(task_id) + if session_info.get("cdp_url"): + # Some cloud providers (including Browser-Use v3) return an HTTP + # CDP discovery URL instead of a raw websocket endpoint. + session_info = dict(session_info) + session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"])) with _cleanup_lock: # Double-check: another thread may have created a session while we @@ -872,11 +867,11 @@ def _run_browser_command( return {"success": False, "error": f"Failed to create browser session: {str(e)}"} # Build the command with the appropriate backend flag. - # Cloud mode: --cdp connects to Browserbase. + # Cloud mode: --cdp connects to a remote cloud browser. # Local mode: --session launches a local headless Chromium. # The rest of the command (--json, command, args) is identical. if session_info.get("cdp_url"): - # Cloud mode — connect to remote Browserbase browser via CDP + # Cloud mode — connect to a remote cloud browser via CDP # IMPORTANT: Do NOT use --session with --cdp. In agent-browser >=0.13, # --session creates a local browser instance and silently ignores --cdp. backend_args = ["--cdp", session_info["cdp_url"]]