mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
194 lines
7.4 KiB
Python
194 lines
7.4 KiB
Python
|
|
"""End-to-end integration tests for the MCP OAuth consolidation.
|
||
|
|
|
||
|
|
Exercises the full chain — manager, provider subclass, disk watch, 401
|
||
|
|
dedup — with real file I/O and real imports (no transport mocks, no
|
||
|
|
subprocesses). These are the tests that would catch Cthulhu's original
|
||
|
|
BetterStack bug: an external process rewrites the tokens file on disk,
|
||
|
|
and the running Hermes session picks up the new tokens on the next auth
|
||
|
|
flow without requiring a restart.
|
||
|
|
"""
|
||
|
|
import asyncio
|
||
|
|
import json
|
||
|
|
import os
|
||
|
|
import time
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
|
||
|
|
pytest.importorskip("mcp.client.auth.oauth2", reason="MCP SDK 1.26.0+ required")
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_external_refresh_picked_up_without_restart(tmp_path, monkeypatch):
|
||
|
|
"""Simulate Cthulhu's cron workflow end-to-end.
|
||
|
|
|
||
|
|
1. A running Hermes session has OAuth tokens loaded in memory.
|
||
|
|
2. An external process (cron) writes fresh tokens to disk.
|
||
|
|
3. On the next auth flow, the manager's disk-watch invalidates the
|
||
|
|
in-memory state so the SDK re-reads from storage.
|
||
|
|
4. ``provider.context.current_tokens`` now reflects the new tokens
|
||
|
|
with no process restart required.
|
||
|
|
"""
|
||
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||
|
|
|
||
|
|
from tools.mcp_oauth_manager import MCPOAuthManager, reset_manager_for_tests
|
||
|
|
reset_manager_for_tests()
|
||
|
|
|
||
|
|
token_dir = tmp_path / "mcp-tokens"
|
||
|
|
token_dir.mkdir(parents=True)
|
||
|
|
tokens_file = token_dir / "srv.json"
|
||
|
|
client_info_file = token_dir / "srv.client.json"
|
||
|
|
|
||
|
|
# Pre-seed the baseline state: valid tokens the session loaded at startup.
|
||
|
|
tokens_file.write_text(json.dumps({
|
||
|
|
"access_token": "OLD_ACCESS",
|
||
|
|
"token_type": "Bearer",
|
||
|
|
"expires_in": 3600,
|
||
|
|
"refresh_token": "OLD_REFRESH",
|
||
|
|
}))
|
||
|
|
client_info_file.write_text(json.dumps({
|
||
|
|
"client_id": "test-client",
|
||
|
|
"redirect_uris": ["http://127.0.0.1:12345/callback"],
|
||
|
|
"grant_types": ["authorization_code", "refresh_token"],
|
||
|
|
"response_types": ["code"],
|
||
|
|
"token_endpoint_auth_method": "none",
|
||
|
|
}))
|
||
|
|
|
||
|
|
mgr = MCPOAuthManager()
|
||
|
|
provider = mgr.get_or_build_provider(
|
||
|
|
"srv", "https://example.com/mcp", None,
|
||
|
|
)
|
||
|
|
assert provider is not None
|
||
|
|
|
||
|
|
# The SDK's _initialize reads tokens from storage into memory. This
|
||
|
|
# is what happens on the first http request under normal operation.
|
||
|
|
await provider._initialize()
|
||
|
|
assert provider.context.current_tokens.access_token == "OLD_ACCESS"
|
||
|
|
|
||
|
|
# Now record the baseline mtime in the manager (this happens
|
||
|
|
# automatically via the HermesMCPOAuthProvider.async_auth_flow
|
||
|
|
# pre-hook on the first real request, but we exercise it directly
|
||
|
|
# here for test determinism).
|
||
|
|
await mgr.invalidate_if_disk_changed("srv")
|
||
|
|
|
||
|
|
# EXTERNAL PROCESS: cron rewrites the tokens file with fresh creds.
|
||
|
|
# The old refresh_token has been consumed by this external exchange.
|
||
|
|
future_mtime = time.time() + 1
|
||
|
|
tokens_file.write_text(json.dumps({
|
||
|
|
"access_token": "NEW_ACCESS",
|
||
|
|
"token_type": "Bearer",
|
||
|
|
"expires_in": 3600,
|
||
|
|
"refresh_token": "NEW_REFRESH",
|
||
|
|
}))
|
||
|
|
os.utime(tokens_file, (future_mtime, future_mtime))
|
||
|
|
|
||
|
|
# The next auth flow should detect the mtime change and reload.
|
||
|
|
changed = await mgr.invalidate_if_disk_changed("srv")
|
||
|
|
assert changed, "manager must detect the disk mtime change"
|
||
|
|
assert provider._initialized is False, "_initialized must flip so SDK re-reads storage"
|
||
|
|
|
||
|
|
# Simulate the next async_auth_flow: _initialize runs because _initialized=False.
|
||
|
|
await provider._initialize()
|
||
|
|
assert provider.context.current_tokens.access_token == "NEW_ACCESS"
|
||
|
|
assert provider.context.current_tokens.refresh_token == "NEW_REFRESH"
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_handle_401_deduplicates_concurrent_callers(tmp_path, monkeypatch):
|
||
|
|
"""Ten concurrent 401 handlers for the same token should fire one recovery.
|
||
|
|
|
||
|
|
Mirrors Claude Code's pending401Handlers dedup pattern — prevents N MCP
|
||
|
|
tool calls hitting 401 simultaneously from all independently clearing
|
||
|
|
caches and re-reading the keychain (which thrashes the storage and
|
||
|
|
bogs down startup per CC-1096).
|
||
|
|
"""
|
||
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||
|
|
|
||
|
|
from tools.mcp_oauth_manager import MCPOAuthManager, reset_manager_for_tests
|
||
|
|
reset_manager_for_tests()
|
||
|
|
|
||
|
|
token_dir = tmp_path / "mcp-tokens"
|
||
|
|
token_dir.mkdir(parents=True)
|
||
|
|
(token_dir / "srv.json").write_text(json.dumps({
|
||
|
|
"access_token": "TOK",
|
||
|
|
"token_type": "Bearer",
|
||
|
|
"expires_in": 3600,
|
||
|
|
}))
|
||
|
|
|
||
|
|
mgr = MCPOAuthManager()
|
||
|
|
provider = mgr.get_or_build_provider(
|
||
|
|
"srv", "https://example.com/mcp", None,
|
||
|
|
)
|
||
|
|
assert provider is not None
|
||
|
|
|
||
|
|
# Count how many times invalidate_if_disk_changed is called — proxy for
|
||
|
|
# how many actual recovery attempts fire.
|
||
|
|
call_count = 0
|
||
|
|
real_invalidate = mgr.invalidate_if_disk_changed
|
||
|
|
|
||
|
|
async def counting(name):
|
||
|
|
nonlocal call_count
|
||
|
|
call_count += 1
|
||
|
|
return await real_invalidate(name)
|
||
|
|
|
||
|
|
monkeypatch.setattr(mgr, "invalidate_if_disk_changed", counting)
|
||
|
|
|
||
|
|
# Fire 10 concurrent handlers with the same failed token.
|
||
|
|
results = await asyncio.gather(*(
|
||
|
|
mgr.handle_401("srv", "SAME_FAILED_TOKEN") for _ in range(10)
|
||
|
|
))
|
||
|
|
|
||
|
|
# All callers get the same result (the shared future's resolution).
|
||
|
|
assert all(r == results[0] for r in results), "dedup must return identical result"
|
||
|
|
# Exactly ONE recovery ran — the rest awaited the same pending future.
|
||
|
|
assert call_count == 1, f"expected 1 recovery attempt, got {call_count}"
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_handle_401_returns_false_when_no_provider(tmp_path, monkeypatch):
|
||
|
|
"""handle_401 for an unknown server returns False cleanly."""
|
||
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||
|
|
from tools.mcp_oauth_manager import MCPOAuthManager, reset_manager_for_tests
|
||
|
|
reset_manager_for_tests()
|
||
|
|
|
||
|
|
mgr = MCPOAuthManager()
|
||
|
|
result = await mgr.handle_401("nonexistent", "any_token")
|
||
|
|
assert result is False
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_invalidate_if_disk_changed_handles_missing_file(tmp_path, monkeypatch):
|
||
|
|
"""invalidate_if_disk_changed returns False when tokens file doesn't exist."""
|
||
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||
|
|
from tools.mcp_oauth_manager import MCPOAuthManager, reset_manager_for_tests
|
||
|
|
reset_manager_for_tests()
|
||
|
|
|
||
|
|
mgr = MCPOAuthManager()
|
||
|
|
mgr.get_or_build_provider("srv", "https://example.com/mcp", None)
|
||
|
|
|
||
|
|
# No tokens file exists yet — this is the pre-auth state
|
||
|
|
result = await mgr.invalidate_if_disk_changed("srv")
|
||
|
|
assert result is False
|
||
|
|
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_provider_is_reused_across_reconnects(tmp_path, monkeypatch):
|
||
|
|
"""The manager caches providers; multiple reconnects reuse the same instance.
|
||
|
|
|
||
|
|
This is what makes the disk-watch stick across reconnects: tearing down
|
||
|
|
the MCP session and rebuilding it (Task 5's _reconnect_event path) must
|
||
|
|
not create a new provider, otherwise ``last_mtime_ns`` resets and the
|
||
|
|
first post-reconnect auth flow would spuriously "detect" a change.
|
||
|
|
"""
|
||
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||
|
|
from tools.mcp_oauth_manager import MCPOAuthManager, reset_manager_for_tests
|
||
|
|
reset_manager_for_tests()
|
||
|
|
|
||
|
|
mgr = MCPOAuthManager()
|
||
|
|
p1 = mgr.get_or_build_provider("srv", "https://example.com/mcp", None)
|
||
|
|
|
||
|
|
# Simulate a reconnect: _run_http calls get_or_build_provider again
|
||
|
|
p2 = mgr.get_or_build_provider("srv", "https://example.com/mcp", None)
|
||
|
|
|
||
|
|
assert p1 is p2, "manager must cache the provider across reconnects"
|