mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 16:31:56 +08:00
CI Tests workflow has been red on main for 40+ consecutive runs. This commit recovers every failure visible in run 25130722163 (most recent completed run prior to this PR). Root causes, by group: Test-mock drift after product landed (fix: update mocks) - test_mcp_structured_content / test_mcp_dynamic_discovery (6 tests): product added _rpc_lock (#02ae15222) and _schedule_tools_refresh (#1350d12b0) without updating sibling test files. Install a real asyncio.Lock inside the fake run-loop and patch at _schedule_tools_refresh. - test_session.py: renamed normalize_whatsapp_identifier → canonical_ whatsapp_identifier upstream; keep a local alias so the legacy tests keep working. - test_run_progress_topics Slack DM test: PR #8006 made Slack default tool_progress=off; explicitly set it to 'all' in the test fixture so the progress-callback path still runs. Also read tool_progress_callback at call time rather than freezing it in FakeAgent.__init__ — production assigns it AFTER construction. - test_tui_gateway_server session-create/close race: session.create now defers _start_agent_build behind a 50ms timer — wait for the build thread to enter _make_agent before closing, otherwise the orphan- cleanup path never runs. - test_protocol session.resume: product get_messages_as_conversation now takes include_ancestors kwarg; accept **_kwargs in the test stub. - test_copilot_acp_client redaction: redactor is OFF by default (snapshots HERMES_REDACT_SECRETS at import); patch agent.redact._REDACT_ENABLED=True for the duration of the test. - test_minimax_provider: after #17171, dots in non-Anthropic model names stay dots even with preserve_dots=False. Assert the new invariant rather than the old 'broken for MiniMax' behavior. - test_update_autostash: updater now scans `ps -A` for dashboard PIDs; the test's catch-all subprocess.run stub needed stdout/stderr fields. - test_accretion_caps: read_timestamps dict is populated lazily when os.path.getmtime succeeds. Use .get("read_timestamps", {}) to tolerate CI filesystems where the stat races file creation. Change-detector tests (fix: rewrite as structural invariants) - test_credential_sources_registry_has_expected_steps: was a frozen set comparison that broke when minimax-oauth was added. Rewrite as an invariant check (every step has description, no dupes, core steps present) per AGENTS.md 'don't write change-detector tests'. xdist ordering / test pollution (fix: reset state, use module-local patches) - test_setup vercel: sibling test saved VERCEL_PROJECT_ID='project' to os.environ via save_env_value() and never cleared it. monkeypatch.delenv the VERCEL_* vars in the link-file test. - test_clipboard TestIsWsl: GitHub Actions is on Azure VMs whose real /proc/version often contains 'microsoft'. Patching builtins.open with mock_open didn't reliably intercept hermes_constants.is_wsl's call in xdist workers that had already cached _wsl_detected=True from an earlier test. Patch hermes_constants.open directly and add teardown_method to reset the cache after each test. Pytest-asyncio cancellation hangs (fix: bound product await with timeout) - test_session_split_brain_11016 (3 params) + test_gateway_shutdown cancel-inflight: under pytest-asyncio 1.3.0, 'await task' and 'asyncio.gather(cancelled_tasks)' can stall for 30s when the cancelled task's finally block awaits typing-task cleanup. Bound both with asyncio.wait_for(..., timeout=5.0) and asyncio.shield — the stragglers are released from adapter tracking and allowed to finish unwinding in the background. This is also a legitimate hardening: a wedged finally shouldn't stall the caller's dispatch or a gateway shutdown. Orphan UI config (fix: merge tiny tab into messaging category) - test_web_server test_no_single_field_categories: the telegram.reactions config field lived in its own 'telegram' schema category with no siblings. Fold it under 'discord' via _CATEGORY_MERGE so the dashboard doesn't render an orphan single-field tab. Local verification: 38/38 originally-failing tests pass; 4044/4044 gateway tests pass; 684/684 targeted subset (all 16 touched test files) passes.
166 lines
6.6 KiB
Python
166 lines
6.6 KiB
Python
"""Tests for MCP dynamic tool discovery (notifications/tools/list_changed)."""
|
|
|
|
import asyncio
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from tools.mcp_tool import MCPServerTask, _register_server_tools
|
|
from tools.registry import ToolRegistry
|
|
|
|
|
|
def _make_mcp_tool(name: str, desc: str = ""):
|
|
return SimpleNamespace(name=name, description=desc, inputSchema=None)
|
|
|
|
|
|
class TestRegisterServerTools:
|
|
"""Tests for the extracted _register_server_tools helper."""
|
|
|
|
@pytest.fixture
|
|
def mock_registry(self):
|
|
return ToolRegistry()
|
|
|
|
def test_exposes_live_server_aliases(self, mock_registry):
|
|
"""Registered MCP tools are reachable via live raw-server aliases."""
|
|
server = MCPServerTask("my_srv")
|
|
server._tools = [_make_mcp_tool("my_tool", "desc")]
|
|
server.session = MagicMock()
|
|
from toolsets import resolve_toolset, validate_toolset
|
|
|
|
with patch("tools.registry.registry", mock_registry):
|
|
registered = _register_server_tools("my_srv", server, {})
|
|
assert "mcp_my_srv_my_tool" in registered
|
|
assert "mcp_my_srv_my_tool" in mock_registry.get_all_tool_names()
|
|
assert validate_toolset("my_srv") is True
|
|
assert "mcp_my_srv_my_tool" in resolve_toolset("my_srv")
|
|
|
|
|
|
class TestRefreshTools:
|
|
"""Tests for MCPServerTask._refresh_tools nuke-and-repave cycle."""
|
|
|
|
@pytest.fixture
|
|
def mock_registry(self):
|
|
return ToolRegistry()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_nuke_and_repave(self, mock_registry):
|
|
"""Old tools are removed and new tools registered on refresh."""
|
|
server = MCPServerTask("live_srv")
|
|
server._refresh_lock = asyncio.Lock()
|
|
server._config = {}
|
|
from toolsets import resolve_toolset
|
|
|
|
# Seed initial state: one old tool registered
|
|
mock_registry.register(
|
|
name="mcp_live_srv_old_tool", toolset="mcp-live_srv", schema={},
|
|
handler=lambda x: x, check_fn=lambda: True, is_async=False,
|
|
description="", emoji="",
|
|
)
|
|
server._registered_tool_names = ["mcp_live_srv_old_tool"]
|
|
|
|
# New tool list from server
|
|
new_tool = _make_mcp_tool("new_tool", "new behavior")
|
|
server.session = SimpleNamespace(
|
|
list_tools=AsyncMock(
|
|
return_value=SimpleNamespace(tools=[new_tool])
|
|
)
|
|
)
|
|
|
|
with patch("tools.registry.registry", mock_registry):
|
|
await server._refresh_tools()
|
|
assert "mcp_live_srv_old_tool" not in mock_registry.get_all_tool_names()
|
|
assert "mcp_live_srv_old_tool" not in resolve_toolset("live_srv")
|
|
assert "mcp_live_srv_new_tool" in mock_registry.get_all_tool_names()
|
|
assert "mcp_live_srv_new_tool" in resolve_toolset("live_srv")
|
|
assert server._registered_tool_names == ["mcp_live_srv_new_tool"]
|
|
|
|
|
|
class TestMessageHandler:
|
|
"""Tests for MCPServerTask._make_message_handler dispatch."""
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_dispatches_tool_list_changed(self):
|
|
from tools.mcp_tool import _MCP_NOTIFICATION_TYPES
|
|
if not _MCP_NOTIFICATION_TYPES:
|
|
pytest.skip("MCP SDK ToolListChangedNotification not available")
|
|
|
|
from mcp.types import ServerNotification, ToolListChangedNotification
|
|
|
|
server = MCPServerTask("notif_srv")
|
|
# Product now schedules the refresh as a background task (see
|
|
# _schedule_tools_refresh in mcp_tool.py ~L918) rather than awaiting
|
|
# it directly, to avoid wedging the stdio JSON-RPC stream. Patch at
|
|
# the scheduler seam so we can still assert dispatch happened without
|
|
# reaching into asyncio.create_task internals.
|
|
with patch.object(MCPServerTask, "_schedule_tools_refresh") as mock_schedule:
|
|
handler = server._make_message_handler()
|
|
notification = ServerNotification(
|
|
root=ToolListChangedNotification(method="notifications/tools/list_changed")
|
|
)
|
|
await handler(notification)
|
|
mock_schedule.assert_called_once()
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_ignores_exceptions_and_other_messages(self):
|
|
server = MCPServerTask("notif_srv")
|
|
with patch.object(MCPServerTask, "_schedule_tools_refresh") as mock_schedule:
|
|
handler = server._make_message_handler()
|
|
# Exceptions should not trigger refresh
|
|
await handler(RuntimeError("connection dead"))
|
|
# Unknown message types should not trigger refresh
|
|
await handler({"jsonrpc": "2.0", "result": "ok"})
|
|
mock_schedule.assert_not_called()
|
|
|
|
|
|
class TestDeregister:
|
|
"""Tests for ToolRegistry.deregister."""
|
|
|
|
def test_removes_tool(self):
|
|
reg = ToolRegistry()
|
|
reg.register(name="foo", toolset="ts1", schema={}, handler=lambda x: x)
|
|
assert "foo" in reg.get_all_tool_names()
|
|
reg.deregister("foo")
|
|
assert "foo" not in reg.get_all_tool_names()
|
|
|
|
def test_cleans_up_toolset_check(self):
|
|
reg = ToolRegistry()
|
|
check = lambda: True # noqa: E731
|
|
reg.register(name="foo", toolset="ts1", schema={}, handler=lambda x: x, check_fn=check)
|
|
assert reg.is_toolset_available("ts1")
|
|
reg.deregister("foo")
|
|
# Toolset check should be gone since no tools remain
|
|
assert "ts1" not in reg._toolset_checks
|
|
|
|
def test_preserves_toolset_check_if_other_tools_remain(self):
|
|
reg = ToolRegistry()
|
|
check = lambda: True # noqa: E731
|
|
reg.register(name="foo", toolset="ts1", schema={}, handler=lambda x: x, check_fn=check)
|
|
reg.register(name="bar", toolset="ts1", schema={}, handler=lambda x: x)
|
|
reg.deregister("foo")
|
|
# bar still in ts1, so check should remain
|
|
assert "ts1" in reg._toolset_checks
|
|
|
|
def test_removes_toolset_alias_when_last_tool_is_removed(self):
|
|
reg = ToolRegistry()
|
|
reg.register(name="foo", toolset="mcp-srv", schema={}, handler=lambda x: x)
|
|
reg.register_toolset_alias("srv", "mcp-srv")
|
|
|
|
reg.deregister("foo")
|
|
|
|
assert reg.get_toolset_alias_target("srv") is None
|
|
|
|
def test_preserves_toolset_alias_while_toolset_still_exists(self):
|
|
reg = ToolRegistry()
|
|
reg.register(name="foo", toolset="mcp-srv", schema={}, handler=lambda x: x)
|
|
reg.register(name="bar", toolset="mcp-srv", schema={}, handler=lambda x: x)
|
|
reg.register_toolset_alias("srv", "mcp-srv")
|
|
|
|
reg.deregister("foo")
|
|
|
|
assert reg.get_toolset_alias_target("srv") == "mcp-srv"
|
|
|
|
def test_noop_for_unknown_tool(self):
|
|
reg = ToolRegistry()
|
|
reg.deregister("nonexistent") # Should not raise
|