mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 00:41:43 +08:00
CI Tests workflow has been red on main for 40+ consecutive runs. This commit recovers every failure visible in run 25130722163 (most recent completed run prior to this PR). Root causes, by group: Test-mock drift after product landed (fix: update mocks) - test_mcp_structured_content / test_mcp_dynamic_discovery (6 tests): product added _rpc_lock (#02ae15222) and _schedule_tools_refresh (#1350d12b0) without updating sibling test files. Install a real asyncio.Lock inside the fake run-loop and patch at _schedule_tools_refresh. - test_session.py: renamed normalize_whatsapp_identifier → canonical_ whatsapp_identifier upstream; keep a local alias so the legacy tests keep working. - test_run_progress_topics Slack DM test: PR #8006 made Slack default tool_progress=off; explicitly set it to 'all' in the test fixture so the progress-callback path still runs. Also read tool_progress_callback at call time rather than freezing it in FakeAgent.__init__ — production assigns it AFTER construction. - test_tui_gateway_server session-create/close race: session.create now defers _start_agent_build behind a 50ms timer — wait for the build thread to enter _make_agent before closing, otherwise the orphan- cleanup path never runs. - test_protocol session.resume: product get_messages_as_conversation now takes include_ancestors kwarg; accept **_kwargs in the test stub. - test_copilot_acp_client redaction: redactor is OFF by default (snapshots HERMES_REDACT_SECRETS at import); patch agent.redact._REDACT_ENABLED=True for the duration of the test. - test_minimax_provider: after #17171, dots in non-Anthropic model names stay dots even with preserve_dots=False. Assert the new invariant rather than the old 'broken for MiniMax' behavior. - test_update_autostash: updater now scans `ps -A` for dashboard PIDs; the test's catch-all subprocess.run stub needed stdout/stderr fields. - test_accretion_caps: read_timestamps dict is populated lazily when os.path.getmtime succeeds. Use .get("read_timestamps", {}) to tolerate CI filesystems where the stat races file creation. Change-detector tests (fix: rewrite as structural invariants) - test_credential_sources_registry_has_expected_steps: was a frozen set comparison that broke when minimax-oauth was added. Rewrite as an invariant check (every step has description, no dupes, core steps present) per AGENTS.md 'don't write change-detector tests'. xdist ordering / test pollution (fix: reset state, use module-local patches) - test_setup vercel: sibling test saved VERCEL_PROJECT_ID='project' to os.environ via save_env_value() and never cleared it. monkeypatch.delenv the VERCEL_* vars in the link-file test. - test_clipboard TestIsWsl: GitHub Actions is on Azure VMs whose real /proc/version often contains 'microsoft'. Patching builtins.open with mock_open didn't reliably intercept hermes_constants.is_wsl's call in xdist workers that had already cached _wsl_detected=True from an earlier test. Patch hermes_constants.open directly and add teardown_method to reset the cache after each test. Pytest-asyncio cancellation hangs (fix: bound product await with timeout) - test_session_split_brain_11016 (3 params) + test_gateway_shutdown cancel-inflight: under pytest-asyncio 1.3.0, 'await task' and 'asyncio.gather(cancelled_tasks)' can stall for 30s when the cancelled task's finally block awaits typing-task cleanup. Bound both with asyncio.wait_for(..., timeout=5.0) and asyncio.shield — the stragglers are released from adapter tracking and allowed to finish unwinding in the background. This is also a legitimate hardening: a wedged finally shouldn't stall the caller's dispatch or a gateway shutdown. Orphan UI config (fix: merge tiny tab into messaging category) - test_web_server test_no_single_field_categories: the telegram.reactions config field lived in its own 'telegram' schema category with no siblings. Fold it under 'discord' via _CATEGORY_MERGE so the dashboard doesn't render an orphan single-field tab. Local verification: 38/38 originally-failing tests pass; 4044/4044 gateway tests pass; 684/684 targeted subset (all 16 touched test files) passes.
143 lines
5.5 KiB
Python
143 lines
5.5 KiB
Python
"""Tests for MCP tool structuredContent preservation."""
|
|
|
|
import asyncio
|
|
import json
|
|
from types import SimpleNamespace
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
from tools import mcp_tool
|
|
|
|
|
|
class _FakeContentBlock:
|
|
"""Minimal content block with .text and .type attributes."""
|
|
|
|
def __init__(self, text: str, block_type: str = "text"):
|
|
self.text = text
|
|
self.type = block_type
|
|
|
|
|
|
class _FakeCallToolResult:
|
|
"""Minimal CallToolResult stand-in.
|
|
|
|
Uses camelCase ``structuredContent`` / ``isError`` to match the real
|
|
MCP SDK Pydantic model (``mcp.types.CallToolResult``).
|
|
"""
|
|
|
|
def __init__(self, content, is_error=False, structuredContent=None):
|
|
self.content = content
|
|
self.isError = is_error
|
|
self.structuredContent = structuredContent
|
|
|
|
|
|
def _fake_run_on_mcp_loop(coro, timeout=30):
|
|
"""Run an MCP coroutine directly in a fresh event loop."""
|
|
loop = asyncio.new_event_loop()
|
|
try:
|
|
# `_rpc_lock` must be created inside the loop that awaits it, or asyncio
|
|
# raises "attached to a different loop". Build it here and attach it to
|
|
# whatever fake server is currently registered under _servers.
|
|
async def _install_lock_and_run():
|
|
for srv in list(mcp_tool._servers.values()):
|
|
if getattr(srv, "_rpc_lock", None) is None:
|
|
srv._rpc_lock = asyncio.Lock()
|
|
return await coro
|
|
return loop.run_until_complete(_install_lock_and_run())
|
|
finally:
|
|
loop.close()
|
|
|
|
|
|
@pytest.fixture
|
|
def _patch_mcp_server():
|
|
"""Patch _servers and the MCP event loop so _make_tool_handler can run."""
|
|
fake_session = MagicMock()
|
|
# `_rpc_lock` is acquired by _make_tool_handler's call path (mcp_tool.py
|
|
# ~L2008) to serialize JSON-RPC against the server — build it inside the
|
|
# fresh loop that _fake_run_on_mcp_loop spins up, not at fixture import.
|
|
fake_server = SimpleNamespace(session=fake_session, _rpc_lock=None)
|
|
with patch.dict(mcp_tool._servers, {"test-server": fake_server}), \
|
|
patch("tools.mcp_tool._run_on_mcp_loop", side_effect=_fake_run_on_mcp_loop):
|
|
yield fake_session
|
|
|
|
|
|
class TestStructuredContentPreservation:
|
|
"""Ensure structuredContent from CallToolResult is forwarded."""
|
|
|
|
def test_text_only_result(self, _patch_mcp_server):
|
|
"""When no structuredContent, result is text-only (existing behaviour)."""
|
|
session = _patch_mcp_server
|
|
session.call_tool = AsyncMock(
|
|
return_value=_FakeCallToolResult(
|
|
content=[_FakeContentBlock("hello")],
|
|
)
|
|
)
|
|
handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
|
|
raw = handler({})
|
|
data = json.loads(raw)
|
|
assert data == {"result": "hello"}
|
|
|
|
def test_both_content_and_structured(self, _patch_mcp_server):
|
|
"""When both content and structuredContent are present, combine them."""
|
|
session = _patch_mcp_server
|
|
payload = {"value": "secret-123", "revealed": True}
|
|
session.call_tool = AsyncMock(
|
|
return_value=_FakeCallToolResult(
|
|
content=[_FakeContentBlock("OK")],
|
|
structuredContent=payload,
|
|
)
|
|
)
|
|
handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
|
|
raw = handler({})
|
|
data = json.loads(raw)
|
|
# content is the primary result, structuredContent is supplementary
|
|
assert data["result"] == "OK"
|
|
assert data["structuredContent"] == payload
|
|
|
|
def test_both_content_and_structured_desktop_commander(self, _patch_mcp_server):
|
|
"""Real-world case: Desktop Commander returns file text in content,
|
|
metadata in structuredContent. Agent must see file contents."""
|
|
session = _patch_mcp_server
|
|
file_text = "import os\nprint('hello')\n"
|
|
metadata = {"fileName": "main.py", "filePath": "/tmp/main.py", "fileType": "python"}
|
|
session.call_tool = AsyncMock(
|
|
return_value=_FakeCallToolResult(
|
|
content=[_FakeContentBlock(file_text)],
|
|
structuredContent=metadata,
|
|
)
|
|
)
|
|
handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
|
|
raw = handler({})
|
|
data = json.loads(raw)
|
|
assert data["result"] == file_text
|
|
assert data["structuredContent"] == metadata
|
|
|
|
def test_structured_content_none_falls_back_to_text(self, _patch_mcp_server):
|
|
"""When structuredContent is explicitly None, fall back to text."""
|
|
session = _patch_mcp_server
|
|
session.call_tool = AsyncMock(
|
|
return_value=_FakeCallToolResult(
|
|
content=[_FakeContentBlock("done")],
|
|
structuredContent=None,
|
|
)
|
|
)
|
|
handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
|
|
raw = handler({})
|
|
data = json.loads(raw)
|
|
assert data == {"result": "done"}
|
|
|
|
def test_empty_text_with_structured_content(self, _patch_mcp_server):
|
|
"""When content blocks are empty but structuredContent exists."""
|
|
session = _patch_mcp_server
|
|
payload = {"status": "ok", "data": [1, 2, 3]}
|
|
session.call_tool = AsyncMock(
|
|
return_value=_FakeCallToolResult(
|
|
content=[],
|
|
structuredContent=payload,
|
|
)
|
|
)
|
|
handler = mcp_tool._make_tool_handler("test-server", "my-tool", 30.0)
|
|
raw = handler({})
|
|
data = json.loads(raw)
|
|
assert data["result"] == payload
|