Compare commits

...

1 Commits

Author SHA1 Message Date
rob-maron
7882537358 feat(browser): migrate managed browser integration to Browser-Use 2026-04-06 14:54:44 -04:00
16 changed files with 413 additions and 263 deletions

View File

@@ -734,13 +734,13 @@ def build_nous_subscription_prompt(valid_tool_names: "set[str] | None" = None) -
lines = [
"# Nous Subscription",
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browserbase) by default. Modal execution is optional.",
"Nous subscription includes managed web tools (Firecrawl), image generation (FAL), OpenAI TTS, and browser automation (Browser-Use) by default. Modal execution is optional.",
"Current capability status:",
]
lines.extend(_status_line(feature) for feature in features.items())
lines.extend(
[
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys.",
"When a Nous-managed feature is active, do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys.",
"If the user is not subscribed and asks for a capability that Nous subscription would unlock or simplify, suggest Nous subscription as one option alongside direct setup or local alternatives.",
"Do not mention subscription unless the user asks about it or it directly solves the current missing capability.",
"Useful commands: hermes setup, hermes setup tools, hermes setup terminal, hermes status.",

17
cli.py
View File

@@ -4929,13 +4929,13 @@ class HermesCLI:
pass
print()
print("🌐 Browser disconnected from live Chrome")
print(" Browser tools reverted to default mode (local headless or Browserbase)")
print(" Browser tools reverted to their configured default mode")
print()
if hasattr(self, '_pending_input'):
self._pending_input.put(
"[System note: The user has disconnected the browser tools from their live Chrome. "
"Browser tools are back to default mode (headless local browser or Browserbase cloud).]"
"Browser tools are back to their configured default mode (headless local browser or the configured cloud provider).]"
)
else:
print()
@@ -4962,10 +4962,17 @@ class HermesCLI:
print(" Status: ✓ reachable")
except (OSError, Exception):
print(" Status: ⚠ not reachable (Chrome may not be running)")
elif os.environ.get("BROWSERBASE_API_KEY"):
print("🌐 Browser: Browserbase (cloud)")
else:
print("🌐 Browser: local headless Chromium (agent-browser)")
try:
from tools.browser_tool import _get_cloud_provider
provider = _get_cloud_provider()
except Exception:
provider = None
if provider is not None:
print(f"🌐 Browser: {provider.provider_name()} (cloud)")
else:
print("🌐 Browser: local headless Chromium (agent-browser)")
print()
print(" /browser connect — connect to your live Chrome")
print(" /browser disconnect — revert to default")

View File

@@ -165,20 +165,20 @@ def _resolve_browser_feature_state(
if browser_provider_explicit:
current_provider = browser_provider or "local"
if current_provider == "browserbase":
provider_available = managed_browser_available or direct_browserbase
available = bool(browser_local_available and direct_browserbase)
active = bool(browser_tool_enabled and available)
return current_provider, available, active, False
if current_provider == "browser-use":
provider_available = managed_browser_available or direct_browser_use
available = bool(browser_local_available and provider_available)
managed = bool(
browser_tool_enabled
and browser_local_available
and managed_browser_available
and not direct_browserbase
and not direct_browser_use
)
active = bool(browser_tool_enabled and available)
return current_provider, available, active, managed
if current_provider == "browser-use":
available = bool(browser_local_available and direct_browser_use)
active = bool(browser_tool_enabled and available)
return current_provider, available, active, False
if current_provider == "camofox":
return current_provider, False, False, False
@@ -187,16 +187,21 @@ def _resolve_browser_feature_state(
active = bool(browser_tool_enabled and available)
return current_provider, available, active, False
if managed_browser_available or direct_browserbase:
if direct_browserbase:
available = bool(browser_local_available)
active = bool(browser_tool_enabled and available)
return "browserbase", available, active, False
if managed_browser_available or direct_browser_use:
available = bool(browser_local_available)
managed = bool(
browser_tool_enabled
and browser_local_available
and managed_browser_available
and not direct_browserbase
and not direct_browser_use
)
active = bool(browser_tool_enabled and available)
return "browserbase", available, active, managed
return "browser-use", available, active, managed
available = bool(browser_local_available)
active = bool(browser_tool_enabled and available)
@@ -260,7 +265,7 @@ def get_nous_subscription_features(
managed_web_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("firecrawl")
managed_image_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("fal-queue")
managed_tts_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("openai-audio")
managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browserbase")
managed_browser_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("browser-use")
managed_modal_available = managed_tools_flag and nous_auth_present and is_managed_tool_gateway_ready("modal")
modal_state = resolve_modal_backend_state(
modal_mode,
@@ -508,7 +513,7 @@ def apply_nous_managed_defaults(
get_env_value("BROWSERBASE_API_KEY")
or get_env_value("BROWSER_USE_API_KEY")
):
browser_cfg["cloud_provider"] = "browserbase"
browser_cfg["cloud_provider"] = "browser-use"
changed.add("browser")
if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):

View File

@@ -652,14 +652,14 @@ def _print_setup_summary(config: dict, hermes_home):
# Browser tools (local Chromium, Camofox, Browserbase, or Browser Use)
browser_provider = subscription_features.browser.current_provider
if subscription_features.browser.managed_by_nous:
tool_status.append(("Browser Automation (Nous Browserbase)", True, None))
tool_status.append(("Browser Automation (Nous Browser-Use)", True, None))
elif subscription_features.browser.available:
label = "Browser Automation"
if browser_provider:
label = f"Browser Automation ({browser_provider})"
tool_status.append((label, True, None))
else:
missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browserbase"
missing_browser_hint = "npm install -g agent-browser, set CAMOFOX_URL, or configure Browser Use or Browserbase"
if browser_provider == "Browserbase":
missing_browser_hint = (
"npm install -g agent-browser and set "

View File

@@ -124,6 +124,7 @@ def show_status(args):
"Firecrawl": "FIRECRAWL_API_KEY",
"Tavily": "TAVILY_API_KEY",
"Browserbase": "BROWSERBASE_API_KEY", # Optional — local browser works without this
"Browser Use": "BROWSER_USE_API_KEY", # Optional — local browser works without this
"FAL": "FAL_KEY",
"Tinker": "TINKER_API_KEY",
"WandB": "WANDB_API_KEY",

View File

@@ -280,21 +280,21 @@ TOOL_CATEGORIES = {
"icon": "🌐",
"providers": [
{
"name": "Nous Subscription (Browserbase cloud)",
"tag": "Managed Browserbase billed to your subscription",
"name": "Nous Subscription (Browser-Use cloud)",
"tag": "Managed Browser-Use billed to your subscription",
"env_vars": [],
"browser_provider": "browserbase",
"browser_provider": "browser-use",
"requires_nous_auth": True,
"managed_nous_feature": "browser",
"override_env_vars": ["BROWSERBASE_API_KEY", "BROWSERBASE_PROJECT_ID"],
"post_setup": "browserbase",
"override_env_vars": ["BROWSER_USE_API_KEY"],
"post_setup": "agent_browser",
},
{
"name": "Local Browser",
"tag": "Free headless Chromium (no API key needed)",
"env_vars": [],
"browser_provider": "local",
"post_setup": "browserbase", # Same npm install for agent-browser
"post_setup": "agent_browser",
},
{
"name": "Browserbase",
@@ -304,7 +304,7 @@ TOOL_CATEGORIES = {
{"key": "BROWSERBASE_PROJECT_ID", "prompt": "Browserbase project ID"},
],
"browser_provider": "browserbase",
"post_setup": "browserbase",
"post_setup": "agent_browser",
},
{
"name": "Browser Use",
@@ -313,7 +313,7 @@ TOOL_CATEGORIES = {
{"key": "BROWSER_USE_API_KEY", "prompt": "Browser Use API key", "url": "https://browser-use.com"},
],
"browser_provider": "browser-use",
"post_setup": "browserbase",
"post_setup": "agent_browser",
},
{
"name": "Camofox",
@@ -372,7 +372,7 @@ TOOLSET_ENV_REQUIREMENTS = {
def _run_post_setup(post_setup_key: str):
"""Run post-setup hooks for tools that need extra installation steps."""
import shutil
if post_setup_key == "browserbase":
if post_setup_key in ("agent_browser", "browserbase"):
node_modules = PROJECT_ROOT / "node_modules" / "agent-browser"
if not node_modules.exists() and shutil.which("npm"):
_print_info(" Installing Node.js dependencies for browser tools...")

View File

@@ -422,7 +422,7 @@ class TestBuildNousSubscriptionPrompt:
"web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
"image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"),
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
},
),
@@ -430,9 +430,9 @@ class TestBuildNousSubscriptionPrompt:
prompt = build_nous_subscription_prompt({"web_search", "browser_navigate"})
assert "Browserbase" in prompt
assert "Browser-Use" in prompt
assert "Modal execution is optional" in prompt
assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browserbase API keys" in prompt
assert "do not ask the user for Firecrawl, FAL, OpenAI TTS, or Browser-Use API keys" in prompt
def test_non_subscriber_prompt_includes_relevant_upgrade_guidance(self, monkeypatch):
monkeypatch.setenv("HERMES_ENABLE_NOUS_MANAGED_TOOLS", "1")

View File

@@ -44,7 +44,60 @@ def test_get_nous_subscription_features_prefers_managed_modal_in_auto_mode(monke
assert features.modal.direct_override is False
def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase(monkeypatch):
def test_get_nous_subscription_features_marks_browser_use_as_managed_when_gateway_ready(monkeypatch):
monkeypatch.setattr(ns, "get_env_value", lambda name: "")
monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
monkeypatch.setattr(
ns,
"is_managed_tool_gateway_ready",
lambda vendor: vendor == "browser-use",
)
features = ns.get_nous_subscription_features(
{"browser": {"cloud_provider": "browser-use"}}
)
assert features.browser.available is True
assert features.browser.active is True
assert features.browser.managed_by_nous is True
assert features.browser.direct_override is False
assert features.browser.current_provider == "Browser Use"
def test_get_nous_subscription_features_prefers_direct_browserbase_when_unconfigured(monkeypatch):
env = {
"BROWSERBASE_API_KEY": "bb-key",
"BROWSERBASE_PROJECT_ID": "bb-project",
}
monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
monkeypatch.setattr(ns, "get_nous_auth_status", lambda: {"logged_in": True})
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda: True)
monkeypatch.setattr(ns, "_toolset_enabled", lambda config, key: key == "browser")
monkeypatch.setattr(ns, "_has_agent_browser", lambda: True)
monkeypatch.setattr(ns, "resolve_openai_audio_api_key", lambda: "")
monkeypatch.setattr(ns, "has_direct_modal_credentials", lambda: False)
monkeypatch.setattr(
ns,
"is_managed_tool_gateway_ready",
lambda vendor: vendor == "browser-use",
)
features = ns.get_nous_subscription_features({})
assert features.browser.available is True
assert features.browser.active is True
assert features.browser.managed_by_nous is False
assert features.browser.direct_override is True
assert features.browser.current_provider == "Browserbase"
def test_get_nous_subscription_features_prefers_camofox_over_managed_browser_use(monkeypatch):
env = {"CAMOFOX_URL": "http://localhost:9377"}
monkeypatch.setattr(ns, "get_env_value", lambda name: env.get(name, ""))
@@ -57,11 +110,11 @@ def test_get_nous_subscription_features_prefers_camofox_over_managed_browserbase
monkeypatch.setattr(
ns,
"is_managed_tool_gateway_ready",
lambda vendor: vendor == "browserbase",
lambda vendor: vendor == "browser-use",
)
features = ns.get_nous_subscription_features(
{"browser": {"cloud_provider": "browserbase"}}
{"browser": {"cloud_provider": "browser-use"}}
)
assert features.browser.available is True

View File

@@ -88,7 +88,7 @@ def test_show_status_reports_managed_nous_features(monkeypatch, capsys, tmp_path
"web": NousFeatureState("web", "Web tools", True, True, True, True, False, True, "firecrawl"),
"image_gen": NousFeatureState("image_gen", "Image generation", True, True, True, True, False, True, "Nous Subscription"),
"tts": NousFeatureState("tts", "OpenAI TTS", True, True, True, True, False, True, "OpenAI TTS"),
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browserbase"),
"browser": NousFeatureState("browser", "Browser automation", True, True, True, True, False, True, "Browser Use"),
"modal": NousFeatureState("modal", "Modal execution", False, True, False, False, False, True, "local"),
},
),

View File

@@ -330,7 +330,7 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
assert config["web"]["backend"] == "firecrawl"
assert config["tts"]["provider"] == "openai"
assert config["browser"]["cloud_provider"] == "browserbase"
assert config["browser"]["cloud_provider"] == "browser-use"
assert configured == []
# ── Platform / toolset consistency ────────────────────────────────────────────

View File

@@ -45,3 +45,35 @@ class TestResolveCdpOverride:
with patch("tools.browser_tool.requests.get", side_effect=RuntimeError("boom")):
assert _resolve_cdp_override(HTTP_URL) == HTTP_URL
def test_normalizes_provider_returned_http_cdp_url_when_creating_session(self, monkeypatch):
import tools.browser_tool as browser_tool
provider = Mock()
provider.create_session.return_value = {
"session_name": "cloud-session",
"bb_session_id": "bu_123",
"cdp_url": "https://cdp.browser-use.example/session",
"features": {"browser_use": True},
}
response = Mock()
response.raise_for_status.return_value = None
response.json.return_value = {"webSocketDebuggerUrl": WS_URL}
monkeypatch.setattr(browser_tool, "_active_sessions", {})
monkeypatch.setattr(browser_tool, "_session_last_activity", {})
monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None)
monkeypatch.setattr(browser_tool, "_update_session_activity", lambda task_id: None)
monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "")
monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider)
with patch("tools.browser_tool.requests.get", return_value=response) as mock_get:
session_info = browser_tool._get_session_info("task-browser-use")
assert session_info["cdp_url"] == WS_URL
provider.create_session.assert_called_once_with("task-browser-use")
mock_get.assert_called_once_with(
"https://cdp.browser-use.example/session/json/version",
timeout=10,
)

View File

@@ -113,16 +113,15 @@ def _install_fake_tools_package():
sys.modules["tools.environments.managed_modal"] = types.SimpleNamespace(ManagedModalEnvironment=_DummyEnvironment)
def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path):
def test_browser_use_explicit_local_mode_stays_local_even_when_managed_gateway_is_ready(tmp_path):
_install_fake_tools_package()
(tmp_path / "config.yaml").write_text("browser:\n cloud_provider: local\n", encoding="utf-8")
env = os.environ.copy()
env.pop("BROWSERBASE_API_KEY", None)
env.pop("BROWSERBASE_PROJECT_ID", None)
env.pop("BROWSER_USE_API_KEY", None)
env.update({
"HERMES_HOME": str(tmp_path),
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
"BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
"BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
})
with patch.dict(os.environ, env, clear=True):
@@ -135,7 +134,7 @@ def test_browserbase_explicit_local_mode_stays_local_even_when_managed_gateway_i
assert provider is None
def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_call_id():
def test_browserbase_does_not_use_gateway_only_configuration():
_install_fake_tools_package()
env = os.environ.copy()
env.pop("BROWSERBASE_API_KEY", None)
@@ -145,177 +144,195 @@ def test_browserbase_managed_gateway_adds_idempotency_key_and_persists_external_
"BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
})
class _Response:
status_code = 200
ok = True
text = ""
headers = {"x-external-call-id": "call-browserbase-1"}
def json(self):
return {
"id": "bb_local_session_1",
"connectUrl": "wss://connect.browserbase.example/session",
}
with patch.dict(os.environ, env, clear=True):
browserbase_module = _load_tool_module(
"tools.browser_providers.browserbase",
"browser_providers/browserbase.py",
)
with patch.object(browserbase_module.requests, "post", return_value=_Response()) as post:
provider = browserbase_module.BrowserbaseProvider()
session = provider.create_session("task-browserbase-managed")
sent_headers = post.call_args.kwargs["headers"]
assert sent_headers["X-BB-API-Key"] == "nous-token"
assert sent_headers["X-Idempotency-Key"].startswith("browserbase-session-create:")
assert session["external_call_id"] == "call-browserbase-1"
def test_browserbase_managed_gateway_reuses_pending_idempotency_key_after_timeout():
_install_fake_tools_package()
env = os.environ.copy()
env.pop("BROWSERBASE_API_KEY", None)
env.pop("BROWSERBASE_PROJECT_ID", None)
env.update({
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
"BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
})
class _Response:
status_code = 200
ok = True
text = ""
headers = {"x-external-call-id": "call-browserbase-2"}
def json(self):
return {
"id": "bb_local_session_2",
"connectUrl": "wss://connect.browserbase.example/session2",
}
with patch.dict(os.environ, env, clear=True):
browserbase_module = _load_tool_module(
"tools.browser_providers.browserbase",
"browser_providers/browserbase.py",
)
provider = browserbase_module.BrowserbaseProvider()
timeout = browserbase_module.requests.Timeout("timed out")
assert provider.is_configured() is False
def test_browser_use_managed_gateway_adds_idempotency_key_and_persists_external_call_id():
_install_fake_tools_package()
env = os.environ.copy()
env.pop("BROWSER_USE_API_KEY", None)
env.update({
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
"BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
})
class _Response:
status_code = 201
ok = True
text = ""
headers = {"x-external-call-id": "call-browser-use-1"}
def json(self):
return {
"id": "bu_local_session_1",
"cdpUrl": "wss://connect.browser-use.example/session",
}
with patch.dict(os.environ, env, clear=True):
browser_use_module = _load_tool_module(
"tools.browser_providers.browser_use",
"browser_providers/browser_use.py",
)
with patch.object(browser_use_module.requests, "post", return_value=_Response()) as post:
provider = browser_use_module.BrowserUseProvider()
session = provider.create_session("task-browser-use-managed")
sent_headers = post.call_args.kwargs["headers"]
sent_payload = post.call_args.kwargs["json"]
assert sent_headers["x-browser-use-api-key"] == "nous-token"
assert sent_headers["X-Idempotency-Key"].startswith("browser-use-session-create:")
assert sent_payload == {"timeout": 5, "proxyCountryCode": "us"}
assert session["external_call_id"] == "call-browser-use-1"
assert session["features"]["managed_gateway"] is True
def test_browser_use_managed_gateway_reuses_pending_idempotency_key_after_timeout():
_install_fake_tools_package()
env = os.environ.copy()
env.pop("BROWSER_USE_API_KEY", None)
env.update({
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
"BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
})
class _Response:
status_code = 201
ok = True
text = ""
headers = {"x-external-call-id": "call-browser-use-2"}
def json(self):
return {
"id": "bu_local_session_2",
"cdpUrl": "wss://connect.browser-use.example/session2",
}
with patch.dict(os.environ, env, clear=True):
browser_use_module = _load_tool_module(
"tools.browser_providers.browser_use",
"browser_providers/browser_use.py",
)
provider = browser_use_module.BrowserUseProvider()
timeout = browser_use_module.requests.Timeout("timed out")
with patch.object(
browserbase_module.requests,
browser_use_module.requests,
"post",
side_effect=[timeout, _Response()],
) as post:
try:
provider.create_session("task-browserbase-timeout")
except browserbase_module.requests.Timeout:
provider.create_session("task-browser-use-timeout")
except browser_use_module.requests.Timeout:
pass
else:
raise AssertionError("Expected Browserbase create_session to propagate timeout")
raise AssertionError("Expected Browser Use create_session to propagate timeout")
provider.create_session("task-browserbase-timeout")
provider.create_session("task-browser-use-timeout")
first_headers = post.call_args_list[0].kwargs["headers"]
second_headers = post.call_args_list[1].kwargs["headers"]
assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"]
def test_browserbase_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts():
def test_browser_use_managed_gateway_preserves_pending_idempotency_key_for_in_progress_conflicts():
_install_fake_tools_package()
env = os.environ.copy()
env.pop("BROWSERBASE_API_KEY", None)
env.pop("BROWSERBASE_PROJECT_ID", None)
env.pop("BROWSER_USE_API_KEY", None)
env.update({
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
"BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
"BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
})
class _ConflictResponse:
status_code = 409
ok = False
text = '{"error":{"code":"CONFLICT","message":"Managed Browserbase session creation is already in progress for this idempotency key"}}'
text = '{"error":{"code":"CONFLICT","message":"Managed Browser-Use session creation is already in progress for this idempotency key"}}'
headers = {}
def json(self):
return {
"error": {
"code": "CONFLICT",
"message": "Managed Browserbase session creation is already in progress for this idempotency key",
"message": "Managed Browser-Use session creation is already in progress for this idempotency key",
}
}
class _SuccessResponse:
status_code = 200
status_code = 201
ok = True
text = ""
headers = {"x-external-call-id": "call-browserbase-4"}
headers = {"x-external-call-id": "call-browser-use-4"}
def json(self):
return {
"id": "bb_local_session_4",
"connectUrl": "wss://connect.browserbase.example/session4",
"id": "bu_local_session_4",
"cdpUrl": "wss://connect.browser-use.example/session4",
}
with patch.dict(os.environ, env, clear=True):
browserbase_module = _load_tool_module(
"tools.browser_providers.browserbase",
"browser_providers/browserbase.py",
browser_use_module = _load_tool_module(
"tools.browser_providers.browser_use",
"browser_providers/browser_use.py",
)
provider = browserbase_module.BrowserbaseProvider()
provider = browser_use_module.BrowserUseProvider()
with patch.object(
browserbase_module.requests,
browser_use_module.requests,
"post",
side_effect=[_ConflictResponse(), _SuccessResponse()],
) as post:
try:
provider.create_session("task-browserbase-conflict")
provider.create_session("task-browser-use-conflict")
except RuntimeError:
pass
else:
raise AssertionError("Expected Browserbase create_session to propagate the in-progress conflict")
raise AssertionError("Expected Browser Use create_session to propagate the in-progress conflict")
provider.create_session("task-browserbase-conflict")
provider.create_session("task-browser-use-conflict")
first_headers = post.call_args_list[0].kwargs["headers"]
second_headers = post.call_args_list[1].kwargs["headers"]
assert first_headers["X-Idempotency-Key"] == second_headers["X-Idempotency-Key"]
def test_browserbase_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success():
def test_browser_use_managed_gateway_uses_new_idempotency_key_for_a_new_session_after_success():
_install_fake_tools_package()
env = os.environ.copy()
env.pop("BROWSERBASE_API_KEY", None)
env.pop("BROWSERBASE_PROJECT_ID", None)
env.pop("BROWSER_USE_API_KEY", None)
env.update({
"TOOL_GATEWAY_USER_TOKEN": "nous-token",
"BROWSERBASE_GATEWAY_URL": "http://127.0.0.1:3009",
"BROWSER_USE_GATEWAY_URL": "http://127.0.0.1:3009",
})
class _Response:
status_code = 200
status_code = 201
ok = True
text = ""
headers = {"x-external-call-id": "call-browserbase-3"}
headers = {"x-external-call-id": "call-browser-use-3"}
def json(self):
return {
"id": "bb_local_session_3",
"connectUrl": "wss://connect.browserbase.example/session3",
"id": "bu_local_session_3",
"cdpUrl": "wss://connect.browser-use.example/session3",
}
with patch.dict(os.environ, env, clear=True):
browserbase_module = _load_tool_module(
"tools.browser_providers.browserbase",
"browser_providers/browserbase.py",
browser_use_module = _load_tool_module(
"tools.browser_providers.browser_use",
"browser_providers/browser_use.py",
)
provider = browserbase_module.BrowserbaseProvider()
provider = browser_use_module.BrowserUseProvider()
with patch.object(browserbase_module.requests, "post", side_effect=[_Response(), _Response()]) as post:
provider.create_session("task-browserbase-new")
provider.create_session("task-browserbase-new")
with patch.object(browser_use_module.requests, "post", side_effect=[_Response(), _Response()]) as post:
provider.create_session("task-browser-use-new")
provider.create_session("task-browser-use-new")
first_headers = post.call_args_list[0].kwargs["headers"]
second_headers = post.call_args_list[1].kwargs["headers"]

View File

@@ -40,17 +40,17 @@ def test_resolve_managed_tool_gateway_uses_vendor_specific_override():
os.environ,
{
"HERMES_ENABLE_NOUS_MANAGED_TOOLS": "1",
"BROWSERBASE_GATEWAY_URL": "http://browserbase-gateway.localhost:3009/",
"BROWSER_USE_GATEWAY_URL": "http://browser-use-gateway.localhost:3009/",
},
clear=False,
):
result = resolve_managed_tool_gateway(
"browserbase",
"browser-use",
token_reader=lambda: "nous-token",
)
assert result is not None
assert result.gateway_origin == "http://browserbase-gateway.localhost:3009"
assert result.gateway_origin == "http://browser-use-gateway.localhost:3009"
def test_resolve_managed_tool_gateway_is_inactive_without_nous_token():

View File

@@ -2,16 +2,62 @@
import logging
import os
import threading
import uuid
from typing import Dict
from typing import Any, Dict, Optional
import requests
from tools.browser_providers.base import CloudBrowserProvider
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled
logger = logging.getLogger(__name__)
_BASE_URL = "https://api.browser-use.com/api/v2"
_DIRECT_BASE_URL = "https://api.browser-use.com/api/v3"
_DEFAULT_MANAGED_TIMEOUT_MINUTES = 5
_DEFAULT_MANAGED_PROXY_COUNTRY_CODE = "us"
_pending_create_keys: Dict[str, str] = {}
_pending_create_keys_lock = threading.Lock()
def _get_or_create_pending_create_key(task_id: str) -> str:
with _pending_create_keys_lock:
existing = _pending_create_keys.get(task_id)
if existing:
return existing
created = f"browser-use-session-create:{uuid.uuid4().hex}"
_pending_create_keys[task_id] = created
return created
def _clear_pending_create_key(task_id: str) -> None:
with _pending_create_keys_lock:
_pending_create_keys.pop(task_id, None)
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
if response.status_code >= 500:
return True
if response.status_code != 409:
return False
try:
payload = response.json()
except Exception:
return False
if not isinstance(payload, dict):
return False
error = payload.get("error")
if not isinstance(error, dict):
return False
message = str(error.get("message") or "").lower()
return "already in progress" in message
class BrowserUseProvider(CloudBrowserProvider):
@@ -21,85 +67,148 @@ class BrowserUseProvider(CloudBrowserProvider):
return "Browser Use"
def is_configured(self) -> bool:
return bool(os.environ.get("BROWSER_USE_API_KEY"))
return self._get_config_or_none() is not None
# ------------------------------------------------------------------
# Session lifecycle
# ------------------------------------------------------------------
def _headers(self) -> Dict[str, str]:
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
api_key = os.environ.get("BROWSER_USE_API_KEY")
if not api_key:
raise ValueError(
"BROWSER_USE_API_KEY environment variable is required. "
if api_key:
return {
"api_key": api_key,
"base_url": os.environ.get("BROWSER_USE_BASE_URL", _DIRECT_BASE_URL).rstrip("/"),
"managed_mode": False,
}
managed = resolve_managed_tool_gateway("browser-use")
if managed is None:
return None
return {
"api_key": managed.nous_user_token,
"base_url": managed.gateway_origin.rstrip("/"),
"managed_mode": True,
}
def _get_config(self) -> Dict[str, Any]:
config = self._get_config_or_none()
if config is None:
message = (
"Browser Use requires direct BROWSER_USE_API_KEY credentials. "
"Get your key at https://browser-use.com"
)
if managed_nous_tools_enabled():
message = (
"Browser Use requires either direct BROWSER_USE_API_KEY credentials "
"or a managed Browser-Use gateway configuration."
)
raise ValueError(message)
return config
def _headers(self, api_key: str, *, managed_mode: bool) -> Dict[str, str]:
header_name = "x-browser-use-api-key" if managed_mode else "X-Browser-Use-API-Key"
return {
"Content-Type": "application/json",
"X-Browser-Use-API-Key": api_key,
header_name: api_key,
}
def _create_endpoint(self, config: Dict[str, Any]) -> str:
return f"{config['base_url']}/browsers"
def _session_endpoint(self, config: Dict[str, Any], session_id: str) -> str:
return f"{config['base_url']}/browsers/{session_id}"
def _create_payload(self, *, managed_mode: bool) -> Dict[str, object]:
if not managed_mode:
return {}
# Keep gateway-backed sessions short so billing authorization does not
# default to a long Browser-Use timeout when Hermes only needs a task
# scoped ephemeral browser.
return {
"timeout": _DEFAULT_MANAGED_TIMEOUT_MINUTES,
"proxyCountryCode": _DEFAULT_MANAGED_PROXY_COUNTRY_CODE,
}
def create_session(self, task_id: str) -> Dict[str, object]:
config = self._get_config()
managed_mode = bool(config.get("managed_mode"))
headers = self._headers(config["api_key"], managed_mode=managed_mode)
if managed_mode:
headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
response = requests.post(
f"{_BASE_URL}/browsers",
headers=self._headers(),
json={},
self._create_endpoint(config),
headers=headers,
json=self._create_payload(managed_mode=managed_mode),
timeout=30,
)
if not response.ok:
if managed_mode and not _should_preserve_pending_create_key(response):
_clear_pending_create_key(task_id)
raise RuntimeError(
f"Failed to create Browser Use session: "
f"{response.status_code} {response.text}"
)
session_data = response.json()
if managed_mode:
_clear_pending_create_key(task_id)
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
logger.info("Created Browser Use session %s", session_name)
return {
result = {
"session_name": session_name,
"bb_session_id": session_data["id"],
"cdp_url": session_data["cdpUrl"],
"features": {"browser_use": True},
"features": {"browser_use": True, "managed_gateway": managed_mode},
}
if managed_mode:
result["external_call_id"] = response.headers.get("x-external-call-id")
return result
def close_session(self, session_id: str) -> bool:
try:
config = self._get_config()
except ValueError:
logger.warning("Cannot close Browser Use session %s — missing credentials", session_id)
return False
try:
response = requests.patch(
f"{_BASE_URL}/browsers/{session_id}",
headers=self._headers(),
self._session_endpoint(config, session_id),
headers=self._headers(config["api_key"], managed_mode=bool(config.get("managed_mode"))),
json={"action": "stop"},
timeout=10,
)
if response.status_code in (200, 201, 204):
logger.debug("Successfully closed Browser Use session %s", session_id)
return True
else:
logger.warning(
"Failed to close Browser Use session %s: HTTP %s - %s",
session_id,
response.status_code,
response.text[:200],
)
return False
logger.warning(
"Failed to close Browser Use session %s: HTTP %s - %s",
session_id,
response.status_code,
response.text[:200],
)
return False
except Exception as e:
logger.error("Exception closing Browser Use session %s: %s", session_id, e)
return False
def emergency_cleanup(self, session_id: str) -> None:
api_key = os.environ.get("BROWSER_USE_API_KEY")
if not api_key:
config = self._get_config_or_none()
if config is None:
logger.warning("Cannot emergency-cleanup Browser Use session %s — missing credentials", session_id)
return
try:
requests.patch(
f"{_BASE_URL}/browsers/{session_id}",
headers={
"Content-Type": "application/json",
"X-Browser-Use-API-Key": api_key,
},
self._session_endpoint(config, session_id),
headers=self._headers(config["api_key"], managed_mode=bool(config.get("managed_mode"))),
json={"action": "stop"},
timeout=5,
)

View File

@@ -2,58 +2,14 @@
import logging
import os
import threading
import uuid
from typing import Any, Dict, Optional
import requests
from tools.browser_providers.base import CloudBrowserProvider
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.tool_backend_helpers import managed_nous_tools_enabled
logger = logging.getLogger(__name__)
_pending_create_keys: Dict[str, str] = {}
_pending_create_keys_lock = threading.Lock()
def _get_or_create_pending_create_key(task_id: str) -> str:
with _pending_create_keys_lock:
existing = _pending_create_keys.get(task_id)
if existing:
return existing
created = f"browserbase-session-create:{uuid.uuid4().hex}"
_pending_create_keys[task_id] = created
return created
def _clear_pending_create_key(task_id: str) -> None:
with _pending_create_keys_lock:
_pending_create_keys.pop(task_id, None)
def _should_preserve_pending_create_key(response: requests.Response) -> bool:
if response.status_code >= 500:
return True
if response.status_code != 409:
return False
try:
payload = response.json()
except Exception:
return False
if not isinstance(payload, dict):
return False
error = payload.get("error")
if not isinstance(error, dict):
return False
message = str(error.get("message") or "").lower()
return "already in progress" in message
class BrowserbaseProvider(CloudBrowserProvider):
@@ -79,35 +35,18 @@ class BrowserbaseProvider(CloudBrowserProvider):
"base_url": os.environ.get("BROWSERBASE_BASE_URL", "https://api.browserbase.com").rstrip("/"),
"managed_mode": False,
}
managed = resolve_managed_tool_gateway("browserbase")
if managed is None:
return None
return {
"api_key": managed.nous_user_token,
"project_id": "managed",
"base_url": managed.gateway_origin.rstrip("/"),
"managed_mode": True,
}
return None
def _get_config(self) -> Dict[str, Any]:
config = self._get_config_or_none()
if config is None:
message = (
raise ValueError(
"Browserbase requires direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID credentials."
)
if managed_nous_tools_enabled():
message = (
"Browserbase requires either direct BROWSERBASE_API_KEY/BROWSERBASE_PROJECT_ID "
"credentials or a managed Browserbase gateway configuration."
)
raise ValueError(message)
return config
def create_session(self, task_id: str) -> Dict[str, object]:
config = self._get_config()
managed_mode = bool(config.get("managed_mode"))
# Optional env-var knobs
enable_proxies = os.environ.get("BROWSERBASE_PROXIES", "true").lower() != "false"
@@ -147,8 +86,6 @@ class BrowserbaseProvider(CloudBrowserProvider):
"Content-Type": "application/json",
"X-BB-API-Key": config["api_key"],
}
if managed_mode:
headers["X-Idempotency-Key"] = _get_or_create_pending_create_key(task_id)
response = requests.post(
f"{config['base_url']}/v1/sessions",
@@ -161,7 +98,7 @@ class BrowserbaseProvider(CloudBrowserProvider):
keepalive_fallback = False
# Handle 402 — paid features unavailable
if response.status_code == 402 and not managed_mode:
if response.status_code == 402:
if enable_keep_alive:
keepalive_fallback = True
logger.warning(
@@ -191,18 +128,13 @@ class BrowserbaseProvider(CloudBrowserProvider):
)
if not response.ok:
if managed_mode and not _should_preserve_pending_create_key(response):
_clear_pending_create_key(task_id)
raise RuntimeError(
f"Failed to create Browserbase session: "
f"{response.status_code} {response.text}"
)
session_data = response.json()
if managed_mode:
_clear_pending_create_key(task_id)
session_name = f"hermes_{task_id}_{uuid.uuid4().hex[:8]}"
external_call_id = response.headers.get("x-external-call-id") if managed_mode else None
if enable_proxies and not proxies_fallback:
features_enabled["proxies"] = True
@@ -221,7 +153,6 @@ class BrowserbaseProvider(CloudBrowserProvider):
"bb_session_id": session_data["id"],
"cdp_url": session_data["connectUrl"],
"features": features_enabled,
"external_call_id": external_call_id,
}
def close_session(self, session_id: str) -> bool:

View File

@@ -2,11 +2,11 @@
"""
Browser Tool Module
This module provides browser automation tools using agent-browser CLI. It
supports two backends — **Browserbase** (cloud) and **local Chromium** — with
identical agent-facing behaviour. The backend is auto-detected: if
``BROWSERBASE_API_KEY`` is set the cloud service is used; otherwise a local
headless Chromium instance is launched automatically.
This module provides browser automation tools using agent-browser CLI. It
supports local Chromium plus multiple cloud backends, including Browserbase
and Browser Use, with identical agent-facing behaviour. The backend is selected
from config when present and otherwise falls back to any configured cloud
provider before using a local headless Chromium instance.
The tool uses agent-browser's accessibility tree (ariaSnapshot) for text-based
page representation, making it ideal for LLM agents without vision capabilities.
@@ -17,8 +17,7 @@ Features:
``agent-browser install`` (downloads Chromium) or
``agent-browser install --with-deps`` (also installs system libraries for
Debian/Ubuntu/Docker).
- **Cloud mode**: Browserbase cloud execution with stealth features, proxies,
and CAPTCHA solving. Activated when BROWSERBASE_API_KEY is set.
- **Cloud mode**: Browserbase or Browser Use cloud execution when configured.
- Session isolation per task ID
- Text-based page snapshots using accessibility tree
- Element interaction via ref selectors (@e1, @e2, etc.)
@@ -26,8 +25,9 @@ Features:
- Automatic cleanup of browser sessions
Environment Variables:
- BROWSERBASE_API_KEY: API key for Browserbase (enables cloud mode)
- BROWSERBASE_PROJECT_ID: Project ID for Browserbase (required for cloud mode)
- BROWSERBASE_API_KEY: API key for direct Browserbase cloud mode
- BROWSERBASE_PROJECT_ID: Project ID for direct Browserbase cloud mode
- BROWSER_USE_API_KEY: API key for direct Browser Use cloud mode
- BROWSERBASE_PROXIES: Enable/disable residential proxies (default: "true")
- BROWSERBASE_ADVANCED_STEALTH: Enable advanced stealth mode with custom Chromium,
requires Scale Plan (default: "false")
@@ -248,8 +248,7 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
Reads ``config["browser"]["cloud_provider"]`` once and caches the result
for the process lifetime. An explicit ``local`` provider disables cloud
fallback. If unset, fall back to Browserbase when direct or managed
Browserbase credentials are available.
fallback. If unset, fall back to the first configured cloud provider.
"""
global _cached_cloud_provider, _cloud_provider_resolved
if _cloud_provider_resolved:
@@ -278,23 +277,14 @@ def _get_cloud_provider() -> Optional[CloudBrowserProvider]:
logger.debug("Could not read cloud_provider from config: %s", e)
if _cached_cloud_provider is None:
fallback_provider = BrowserbaseProvider()
if fallback_provider.is_configured():
_cached_cloud_provider = fallback_provider
for fallback_provider in (BrowserbaseProvider(), BrowserUseProvider()):
if fallback_provider.is_configured():
_cached_cloud_provider = fallback_provider
break
return _cached_cloud_provider
def _get_browserbase_config_or_none() -> Optional[Dict[str, Any]]:
"""Return Browserbase direct or managed config, or None when unavailable."""
return BrowserbaseProvider()._get_config_or_none()
def _get_browserbase_config() -> Dict[str, Any]:
"""Return Browserbase config or raise when neither direct nor managed mode is available."""
return BrowserbaseProvider()._get_config()
def _is_local_mode() -> bool:
"""Return True when the browser tool will use a local browser backend."""
if _get_cdp_override():
@@ -615,7 +605,7 @@ BROWSER_TOOL_SCHEMAS = [
},
{
"name": "browser_close",
"description": "Close the browser session and release resources. Call this when done with browser tasks to free up Browserbase session quota.",
"description": "Close the browser session and release resources. Call this when done with browser tasks to free up cloud browser session quota.",
"parameters": {
"type": "object",
"properties": {},
@@ -738,6 +728,11 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]:
session_info = _create_local_session(task_id)
else:
session_info = provider.create_session(task_id)
if session_info.get("cdp_url"):
# Some cloud providers (including Browser-Use v3) return an HTTP
# CDP discovery URL instead of a raw websocket endpoint.
session_info = dict(session_info)
session_info["cdp_url"] = _resolve_cdp_override(str(session_info["cdp_url"]))
with _cleanup_lock:
# Double-check: another thread may have created a session while we
@@ -872,11 +867,11 @@ def _run_browser_command(
return {"success": False, "error": f"Failed to create browser session: {str(e)}"}
# Build the command with the appropriate backend flag.
# Cloud mode: --cdp <websocket_url> connects to Browserbase.
# Cloud mode: --cdp <websocket_url> connects to a remote cloud browser.
# Local mode: --session <name> launches a local headless Chromium.
# The rest of the command (--json, command, args) is identical.
if session_info.get("cdp_url"):
# Cloud mode — connect to remote Browserbase browser via CDP
# Cloud mode — connect to a remote cloud browser via CDP
# IMPORTANT: Do NOT use --session with --cdp. In agent-browser >=0.13,
# --session creates a local browser instance and silently ignores --cdp.
backend_args = ["--cdp", session_info["cdp_url"]]