Files
hermes-agent/tests/tools/test_registry.py

568 lines
18 KiB
Python
Raw Normal View History

"""Tests for the central tool registry."""
import json
import threading
from pathlib import Path
from unittest.mock import patch
from tools.registry import ToolRegistry, discover_builtin_tools
def _dummy_handler(args, **kwargs):
return json.dumps({"ok": True})
def _make_schema(name="test_tool"):
return {
"name": name,
"description": f"A {name}",
"parameters": {"type": "object", "properties": {}},
}
class TestRegisterAndDispatch:
def test_register_and_dispatch(self):
reg = ToolRegistry()
reg.register(
name="alpha",
toolset="core",
schema=_make_schema("alpha"),
handler=_dummy_handler,
)
result = json.loads(reg.dispatch("alpha", {}))
assert result == {"ok": True}
def test_dispatch_passes_args(self):
reg = ToolRegistry()
def echo_handler(args, **kw):
return json.dumps(args)
reg.register(
name="echo",
toolset="core",
schema=_make_schema("echo"),
handler=echo_handler,
)
result = json.loads(reg.dispatch("echo", {"msg": "hi"}))
assert result == {"msg": "hi"}
class TestGetDefinitions:
def test_returns_openai_format(self):
reg = ToolRegistry()
reg.register(
name="t1", toolset="s1", schema=_make_schema("t1"), handler=_dummy_handler
)
reg.register(
name="t2", toolset="s1", schema=_make_schema("t2"), handler=_dummy_handler
)
defs = reg.get_definitions({"t1", "t2"})
assert len(defs) == 2
assert all(d["type"] == "function" for d in defs)
names = {d["function"]["name"] for d in defs}
assert names == {"t1", "t2"}
def test_skips_unavailable_tools(self):
reg = ToolRegistry()
reg.register(
name="available",
toolset="s",
schema=_make_schema("available"),
handler=_dummy_handler,
check_fn=lambda: True,
)
reg.register(
name="unavailable",
toolset="s",
schema=_make_schema("unavailable"),
handler=_dummy_handler,
check_fn=lambda: False,
)
defs = reg.get_definitions({"available", "unavailable"})
assert len(defs) == 1
assert defs[0]["function"]["name"] == "available"
def test_reuses_shared_check_fn_once_per_call(self):
reg = ToolRegistry()
calls = {"count": 0}
def shared_check():
calls["count"] += 1
return True
reg.register(
name="first",
toolset="shared",
schema=_make_schema("first"),
handler=_dummy_handler,
check_fn=shared_check,
)
reg.register(
name="second",
toolset="shared",
schema=_make_schema("second"),
handler=_dummy_handler,
check_fn=shared_check,
)
defs = reg.get_definitions({"first", "second"})
assert len(defs) == 2
assert calls["count"] == 1
class TestUnknownToolDispatch:
def test_returns_error_json(self):
reg = ToolRegistry()
result = json.loads(reg.dispatch("nonexistent", {}))
assert "error" in result
assert "Unknown tool" in result["error"]
class TestToolsetAvailability:
def test_no_check_fn_is_available(self):
reg = ToolRegistry()
reg.register(
name="t", toolset="free", schema=_make_schema(), handler=_dummy_handler
)
assert reg.is_toolset_available("free") is True
def test_check_fn_controls_availability(self):
reg = ToolRegistry()
reg.register(
name="t",
toolset="locked",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: False,
)
assert reg.is_toolset_available("locked") is False
def test_check_toolset_requirements(self):
reg = ToolRegistry()
reg.register(
name="a",
toolset="ok",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: True,
)
reg.register(
name="b",
toolset="nope",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: False,
)
reqs = reg.check_toolset_requirements()
assert reqs["ok"] is True
assert reqs["nope"] is False
def test_get_all_tool_names(self):
reg = ToolRegistry()
reg.register(
name="z_tool", toolset="s", schema=_make_schema(), handler=_dummy_handler
)
reg.register(
name="a_tool", toolset="s", schema=_make_schema(), handler=_dummy_handler
)
assert reg.get_all_tool_names() == ["a_tool", "z_tool"]
def test_get_registered_toolset_names(self):
reg = ToolRegistry()
reg.register(
name="first", toolset="zeta", schema=_make_schema(), handler=_dummy_handler
)
reg.register(
name="second", toolset="alpha", schema=_make_schema(), handler=_dummy_handler
)
reg.register(
name="third", toolset="alpha", schema=_make_schema(), handler=_dummy_handler
)
assert reg.get_registered_toolset_names() == ["alpha", "zeta"]
def test_get_tool_names_for_toolset(self):
reg = ToolRegistry()
reg.register(
name="z_tool", toolset="grouped", schema=_make_schema(), handler=_dummy_handler
)
reg.register(
name="a_tool", toolset="grouped", schema=_make_schema(), handler=_dummy_handler
)
reg.register(
name="other_tool", toolset="other", schema=_make_schema(), handler=_dummy_handler
)
assert reg.get_tool_names_for_toolset("grouped") == ["a_tool", "z_tool"]
def test_handler_exception_returns_error(self):
reg = ToolRegistry()
def bad_handler(args, **kw):
raise RuntimeError("boom")
reg.register(
name="bad", toolset="s", schema=_make_schema(), handler=bad_handler
)
result = json.loads(reg.dispatch("bad", {}))
assert "error" in result
assert "RuntimeError" in result["error"]
class TestCheckFnExceptionHandling:
"""Verify that a raising check_fn is caught rather than crashing."""
def test_is_toolset_available_catches_exception(self):
reg = ToolRegistry()
reg.register(
name="t",
toolset="broken",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: 1 / 0, # ZeroDivisionError
)
# Should return False, not raise
assert reg.is_toolset_available("broken") is False
def test_check_toolset_requirements_survives_raising_check(self):
reg = ToolRegistry()
reg.register(
name="a",
toolset="good",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: True,
)
reg.register(
name="b",
toolset="bad",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: (_ for _ in ()).throw(ImportError("no module")),
)
reqs = reg.check_toolset_requirements()
assert reqs["good"] is True
assert reqs["bad"] is False
def test_get_definitions_skips_raising_check(self):
reg = ToolRegistry()
reg.register(
name="ok_tool",
toolset="s",
schema=_make_schema("ok_tool"),
handler=_dummy_handler,
check_fn=lambda: True,
)
reg.register(
name="bad_tool",
toolset="s2",
schema=_make_schema("bad_tool"),
handler=_dummy_handler,
check_fn=lambda: (_ for _ in ()).throw(OSError("network down")),
)
defs = reg.get_definitions({"ok_tool", "bad_tool"})
assert len(defs) == 1
assert defs[0]["function"]["name"] == "ok_tool"
def test_check_tool_availability_survives_raising_check(self):
reg = ToolRegistry()
reg.register(
name="a",
toolset="works",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: True,
)
reg.register(
name="b",
toolset="crashes",
schema=_make_schema(),
handler=_dummy_handler,
check_fn=lambda: 1 / 0,
)
available, unavailable = reg.check_tool_availability()
assert "works" in available
assert any(u["name"] == "crashes" for u in unavailable)
class TestBuiltinDiscovery:
def test_matches_previous_manual_builtin_tool_set(self):
expected = {
fix(tests): unstick CI — sweep stale tests from recent merges (#12670) One source fix (web_server category merge) + five test updates that didn't travel with their feature PRs. All 13 failures on the 04-19 CI run on main are now accounted for (5 already self-healed on main; 8 fixed here). Changes - web_server.py: add code_execution → agent to _CATEGORY_MERGE (new singleton section from #11971 broke no-single-field-category invariant). - test_browser_camofox_state: bump hardcoded _config_version 18 → 19 (also from #11971). - test_registry: add browser_cdp_tool (#12369) and discord_tool (#4753) to the expected built-in tool set. - test_run_agent::test_tool_call_accumulation: rewrite fragment chunks — #0f778f77 switched streaming name-accumulation from += to = to fix MiniMax/NIM duplication; the test still encoded the old fragment-per-chunk premise. - test_concurrent_interrupt::_Stub: no-op _apply_pending_steer_to_tool_results — #12116 added this call after concurrent tool batches; the hand-rolled stub was missing it. - test_codex_cli_model_picker: drop the two obsolete tests that asserted auto-import from ~/.codex/auth.json into the Hermes auth store. #12360 explicitly removed that behavior (refresh-token reuse races with Codex CLI / VS Code); adoption is now explicit via `hermes auth openai-codex`. Remaining 3 tests in the file (normal path, Claude Code fallback, negative case) still cover the picker. Validation - scripts/run_tests.sh across all 6 affected files + surrounding tests (54 tests total) all green locally.
2026-04-19 12:39:58 -07:00
"tools.browser_cdp_tool",
fix(tests): resolve 17 persistent CI test failures (#15084) Make the main-branch test suite pass again. Most failures were tests still asserting old shapes after recent refactors; two were real source bugs. Source fixes: - tools/mcp_tool.py: _kill_orphaned_mcp_children() slept 2s on every shutdown even when no tracked PIDs existed, making test_shutdown_is_parallel measure ~3s for 3 parallel 1s shutdowns. Early-return when pids is empty. - hermes_cli/tips.py: tip 105 was 157 chars; corpus max is 150. Test fixes (mostly stale mock targets / missing fixture fields): - test_zombie_process_cleanup, test_agent_cache: patch run_agent.cleanup_vm (the local name bound at import), not tools.terminal_tool.cleanup_vm. - test_browser_camofox: patch tools.browser_camofox.load_config, not hermes_cli.config.load_config (the source module, not the resolved one). - test_flush_memories_codex._chat_response_with_memory_call: add finish_reason, tool_call.id, tool_call.type so the chat_completions transport normalizer doesn't AttributeError. - test_concurrent_interrupt: polling_tool signature now accepts messages= kwarg that _invoke_tool() passes through. - test_minimax_provider: add _fallback_chain=[] to the __new__'d agent so switch_model() doesn't AttributeError. - test_skills_config: SKILLS_DIR MagicMock + .rglob stopped working after the scanner switched to agent.skill_utils.iter_skill_index_files (os.walk-based). Point SKILLS_DIR at a real tmp_path and patch agent.skill_utils.get_external_skills_dirs. - test_browser_cdp_tool: browser_cdp toolset was intentionally split into 'browser-cdp' (commit 96b0f3700) so its stricter check_fn doesn't gate the whole browser toolset; test now expects 'browser-cdp'. - test_registry: add tools.browser_dialog_tool to the expected builtin-discovery set (PR #14540 added it). - test_file_tools TestPatchHints: patch_tool surfaces hints as a '_hint' key on the JSON payload, not inline '[Hint: ...' text. - test_write_deny test_hermes_env: resolve .env via get_hermes_home() so the path matches the profile-aware denylist under hermetic HERMES_HOME. - test_checkpoint_manager test_falls_back_to_parent: guard the walk-up so a stray /tmp/pyproject.toml on the host doesn't pick up /tmp as the project root. - test_quick_commands: set cli.session_id in the __new__'d CLI so the alias-args path doesn't trip AttributeError when fuzzy-matching leaks a skill command across xdist test distribution.
2026-04-24 03:46:46 -07:00
"tools.browser_dialog_tool",
"tools.browser_tool",
"tools.clarify_tool",
"tools.code_execution_tool",
"tools.cronjob_tools",
"tools.delegate_tool",
fix(tests): unstick CI — sweep stale tests from recent merges (#12670) One source fix (web_server category merge) + five test updates that didn't travel with their feature PRs. All 13 failures on the 04-19 CI run on main are now accounted for (5 already self-healed on main; 8 fixed here). Changes - web_server.py: add code_execution → agent to _CATEGORY_MERGE (new singleton section from #11971 broke no-single-field-category invariant). - test_browser_camofox_state: bump hardcoded _config_version 18 → 19 (also from #11971). - test_registry: add browser_cdp_tool (#12369) and discord_tool (#4753) to the expected built-in tool set. - test_run_agent::test_tool_call_accumulation: rewrite fragment chunks — #0f778f77 switched streaming name-accumulation from += to = to fix MiniMax/NIM duplication; the test still encoded the old fragment-per-chunk premise. - test_concurrent_interrupt::_Stub: no-op _apply_pending_steer_to_tool_results — #12116 added this call after concurrent tool batches; the hand-rolled stub was missing it. - test_codex_cli_model_picker: drop the two obsolete tests that asserted auto-import from ~/.codex/auth.json into the Hermes auth store. #12360 explicitly removed that behavior (refresh-token reuse races with Codex CLI / VS Code); adoption is now explicit via `hermes auth openai-codex`. Remaining 3 tests in the file (normal path, Claude Code fallback, negative case) still cover the picker. Validation - scripts/run_tests.sh across all 6 affected files + surrounding tests (54 tests total) all green locally.
2026-04-19 12:39:58 -07:00
"tools.discord_tool",
test: update stale tests to match current code (#11963) Seven test files were asserting against older function signatures and behaviors. CI has been red on main because of accumulated test debt from other PRs; this catches the tests up. - tests/agent/test_subagent_progress.py: _build_child_progress_callback now takes (task_index, goal, parent_agent, task_count=1); update all call sites and rewrite tests that assumed the old 'batch-only' relay semantics (now relays per-tool AND flushes a summary at BATCH_SIZE). Renamed test_thinking_not_relayed_to_gateway → test_thinking_relayed_to_gateway since thinking IS now relayed as subagent.thinking. - tests/tools/test_delegate.py: _build_child_agent now requires task_count; add task_count=1 to all 8 call sites. - tests/cli/test_reasoning_command.py: AIAgent gained _stream_callback; stub it on the two test agent helpers that use spec=AIAgent / __new__. - tests/hermes_cli/test_cmd_update.py: cmd_update now runs npm install in repo root + ui-tui/ + web/ and 'npm run build' in web/; assert all four subprocess calls in the expected order. - tests/hermes_cli/test_model_validation.py: dissimilar unknown models now return accepted=False (previously True with warning); update both affected tests. - tests/tools/test_registry.py: include feishu_doc_tool and feishu_drive_tool in the expected builtin tool set. - tests/gateway/test_voice_command.py: missing-voice-deps message now suggests 'pip install PyNaCl' not 'hermes-agent[messaging]'. 411/411 pass locally across these 7 files.
2026-04-17 21:35:30 -07:00
"tools.feishu_doc_tool",
"tools.feishu_drive_tool",
"tools.file_tools",
"tools.homeassistant_tool",
"tools.image_generation_tool",
"tools.memory_tool",
"tools.mixture_of_agents_tool",
"tools.process_registry",
"tools.rl_training_tool",
"tools.send_message_tool",
"tools.session_search_tool",
"tools.skill_manager_tool",
"tools.skills_tool",
"tools.terminal_tool",
"tools.todo_tool",
"tools.tts_tool",
"tools.vision_tools",
"tools.web_tools",
}
with patch("tools.registry.importlib.import_module"):
imported = discover_builtin_tools(Path(__file__).resolve().parents[2] / "tools")
assert set(imported) == expected
def test_imports_only_self_registering_modules(self, tmp_path):
tools_dir = tmp_path / "tools"
tools_dir.mkdir()
(tools_dir / "__init__.py").write_text("", encoding="utf-8")
(tools_dir / "registry.py").write_text("", encoding="utf-8")
(tools_dir / "alpha.py").write_text(
"from tools.registry import registry\nregistry.register(name='alpha', toolset='x', schema={}, handler=lambda *_a, **_k: '{}')\n",
encoding="utf-8",
)
(tools_dir / "beta.py").write_text("VALUE = 1\n", encoding="utf-8")
with patch("tools.registry.importlib.import_module") as mock_import:
imported = discover_builtin_tools(tools_dir)
assert imported == ["tools.alpha"]
mock_import.assert_called_once_with("tools.alpha")
def test_skips_mcp_tool_even_if_it_registers(self, tmp_path):
tools_dir = tmp_path / "tools"
tools_dir.mkdir()
(tools_dir / "__init__.py").write_text("", encoding="utf-8")
(tools_dir / "mcp_tool.py").write_text(
"from tools.registry import registry\nregistry.register(name='mcp_alpha', toolset='mcp-test', schema={}, handler=lambda *_a, **_k: '{}')\n",
encoding="utf-8",
)
(tools_dir / "alpha.py").write_text(
"from tools.registry import registry\nregistry.register(name='alpha', toolset='x', schema={}, handler=lambda *_a, **_k: '{}')\n",
encoding="utf-8",
)
with patch("tools.registry.importlib.import_module") as mock_import:
imported = discover_builtin_tools(tools_dir)
assert imported == ["tools.alpha"]
mock_import.assert_called_once_with("tools.alpha")
class TestEmojiMetadata:
"""Verify per-tool emoji registration and lookup."""
def test_emoji_stored_on_entry(self):
reg = ToolRegistry()
reg.register(
name="t", toolset="s", schema=_make_schema(),
handler=_dummy_handler, emoji="🔥",
)
assert reg._tools["t"].emoji == "🔥"
def test_get_emoji_returns_registered(self):
reg = ToolRegistry()
reg.register(
name="t", toolset="s", schema=_make_schema(),
handler=_dummy_handler, emoji="🎯",
)
assert reg.get_emoji("t") == "🎯"
def test_get_emoji_returns_default_when_unset(self):
reg = ToolRegistry()
reg.register(
name="t", toolset="s", schema=_make_schema(),
handler=_dummy_handler,
)
assert reg.get_emoji("t") == ""
assert reg.get_emoji("t", default="🔧") == "🔧"
def test_get_emoji_returns_default_for_unknown_tool(self):
reg = ToolRegistry()
assert reg.get_emoji("nonexistent") == ""
assert reg.get_emoji("nonexistent", default="") == ""
def test_emoji_empty_string_treated_as_unset(self):
reg = ToolRegistry()
reg.register(
name="t", toolset="s", schema=_make_schema(),
handler=_dummy_handler, emoji="",
)
assert reg.get_emoji("t") == ""
class TestEntryLookup:
def test_get_entry_returns_registered_entry(self):
reg = ToolRegistry()
reg.register(
name="alpha", toolset="core", schema=_make_schema("alpha"), handler=_dummy_handler
)
entry = reg.get_entry("alpha")
assert entry is not None
assert entry.name == "alpha"
assert entry.toolset == "core"
def test_get_entry_returns_none_for_unknown_tool(self):
reg = ToolRegistry()
assert reg.get_entry("missing") is None
class TestSecretCaptureResultContract:
def test_secret_request_result_does_not_include_secret_value(self):
result = {
"success": True,
"stored_as": "TENOR_API_KEY",
"validated": False,
}
assert "secret" not in json.dumps(result).lower()
class TestThreadSafety:
def test_get_available_toolsets_uses_coherent_snapshot(self, monkeypatch):
reg = ToolRegistry()
reg.register(
name="alpha",
toolset="gated",
schema=_make_schema("alpha"),
handler=_dummy_handler,
check_fn=lambda: False,
)
entries, toolset_checks = reg._snapshot_state()
def snapshot_then_mutate():
reg.deregister("alpha")
return entries, toolset_checks
monkeypatch.setattr(reg, "_snapshot_state", snapshot_then_mutate)
toolsets = reg.get_available_toolsets()
assert toolsets["gated"]["available"] is False
assert toolsets["gated"]["tools"] == ["alpha"]
def test_check_tool_availability_tolerates_concurrent_register(self):
reg = ToolRegistry()
check_started = threading.Event()
writer_done = threading.Event()
errors = []
result_holder = {}
writer_completed_during_check = {}
def blocking_check():
check_started.set()
writer_completed_during_check["value"] = writer_done.wait(timeout=1)
return True
reg.register(
name="alpha",
toolset="gated",
schema=_make_schema("alpha"),
handler=_dummy_handler,
check_fn=blocking_check,
)
reg.register(
name="beta",
toolset="plain",
schema=_make_schema("beta"),
handler=_dummy_handler,
)
def reader():
try:
result_holder["value"] = reg.check_tool_availability()
except Exception as exc: # pragma: no cover - exercised on failure only
errors.append(exc)
def writer():
assert check_started.wait(timeout=1)
reg.register(
name="gamma",
toolset="new",
schema=_make_schema("gamma"),
handler=_dummy_handler,
)
writer_done.set()
reader_thread = threading.Thread(target=reader)
writer_thread = threading.Thread(target=writer)
reader_thread.start()
writer_thread.start()
reader_thread.join(timeout=2)
writer_thread.join(timeout=2)
assert not reader_thread.is_alive()
assert not writer_thread.is_alive()
assert writer_completed_during_check["value"] is True
assert errors == []
available, unavailable = result_holder["value"]
assert "gated" in available
assert "plain" in available
assert unavailable == []
def test_get_available_toolsets_tolerates_concurrent_deregister(self):
reg = ToolRegistry()
check_started = threading.Event()
writer_done = threading.Event()
errors = []
result_holder = {}
writer_completed_during_check = {}
def blocking_check():
check_started.set()
writer_completed_during_check["value"] = writer_done.wait(timeout=1)
return True
reg.register(
name="alpha",
toolset="gated",
schema=_make_schema("alpha"),
handler=_dummy_handler,
check_fn=blocking_check,
)
reg.register(
name="beta",
toolset="plain",
schema=_make_schema("beta"),
handler=_dummy_handler,
)
def reader():
try:
result_holder["value"] = reg.get_available_toolsets()
except Exception as exc: # pragma: no cover - exercised on failure only
errors.append(exc)
def writer():
assert check_started.wait(timeout=1)
reg.deregister("beta")
writer_done.set()
reader_thread = threading.Thread(target=reader)
writer_thread = threading.Thread(target=writer)
reader_thread.start()
writer_thread.start()
reader_thread.join(timeout=2)
writer_thread.join(timeout=2)
assert not reader_thread.is_alive()
assert not writer_thread.is_alive()
assert writer_completed_during_check["value"] is True
assert errors == []
toolsets = result_holder["value"]
assert "gated" in toolsets
assert toolsets["gated"]["available"] is True