mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 00:41:43 +08:00
Reloading MCP servers rebuilds the tool set for the active session, which invalidates the provider prompt cache (tool schemas are baked into the system prompt). The next message re-sends full input tokens — can be expensive on long-context or high-reasoning models. To surface that cost, /reload-mcp now routes through a new slash-confirm primitive with three options: Approve Once / Always Approve / Cancel. 'Always Approve' persists approvals.mcp_reload_confirm: false so future reloads run silently. Coverage: * Classic CLI (cli.py) — interactive numbered prompt. * TUI (tui_gateway + Ink ops.ts) — text warning on first call; `now` / `always` args skip the gate; `always` also persists the opt-out. * Messenger gateway — button UI on Telegram (inline keyboard), Discord (discord.ui.View), Slack (Block Kit actions); text fallback on every other platform via /approve /always /cancel replies intercepted in gateway/run.py _handle_message. * Config key: approvals.mcp_reload_confirm (default true). * Auto-reload paths (CLI file watcher, TUI config-sync mtime poll) pass confirm=true so they do NOT prompt. Implementation: * tools/slash_confirm.py — module-level pending-state store used by all adapters and by the CLI prompt. Thread-safe register/resolve/clear. * gateway/platforms/base.py — send_slash_confirm hook (default 'Not supported' → text fallback). * gateway/run.py — _request_slash_confirm helper + text intercept in _handle_message (yields to in-progress tool-exec approvals so dangerous-command /approve still unblocks the tool thread first). Tests: * tests/tools/test_slash_confirm.py — primitive lifecycle + async resolution + double-click atomicity (16 tests). * tests/hermes_cli/test_mcp_reload_confirm_gate.py — default-config shape + deep-merge preserves user opt-out (5 tests). Targeted runs (hermetic): 89 passed (slash-confirm, config gate, existing agent cache, existing telegram approval buttons).
198 lines
6.1 KiB
Python
198 lines
6.1 KiB
Python
"""Tests for tools/slash_confirm.py — the generic slash-command confirmation primitive.
|
|
|
|
Covers register/resolve/clear lifecycle, stale-entry behavior, confirm_id
|
|
mismatch, handler exceptions, and async resolution.
|
|
"""
|
|
|
|
import asyncio
|
|
import time
|
|
|
|
import pytest
|
|
|
|
from tools import slash_confirm
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _clean_pending():
|
|
"""Every test gets a clean primitive state."""
|
|
slash_confirm._pending.clear()
|
|
yield
|
|
slash_confirm._pending.clear()
|
|
|
|
|
|
class TestRegisterAndGetPending:
|
|
def test_register_stores_entry(self):
|
|
async def handler(choice):
|
|
return f"got {choice}"
|
|
|
|
slash_confirm.register("sess1", "cid1", "reload-mcp", handler)
|
|
|
|
pending = slash_confirm.get_pending("sess1")
|
|
assert pending is not None
|
|
assert pending["confirm_id"] == "cid1"
|
|
assert pending["command"] == "reload-mcp"
|
|
assert pending["handler"] is handler
|
|
assert "created_at" in pending
|
|
|
|
def test_get_pending_missing_returns_none(self):
|
|
assert slash_confirm.get_pending("nobody") is None
|
|
|
|
def test_register_supersedes_prior_entry(self):
|
|
async def h1(choice):
|
|
return "first"
|
|
|
|
async def h2(choice):
|
|
return "second"
|
|
|
|
slash_confirm.register("sess1", "cid1", "reload-mcp", h1)
|
|
slash_confirm.register("sess1", "cid2", "reload-mcp", h2)
|
|
|
|
pending = slash_confirm.get_pending("sess1")
|
|
assert pending["confirm_id"] == "cid2"
|
|
assert pending["handler"] is h2
|
|
|
|
def test_get_pending_returns_copy_not_reference(self):
|
|
async def h(choice):
|
|
return "x"
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", h)
|
|
|
|
p1 = slash_confirm.get_pending("sess1")
|
|
p1["command"] = "mutated"
|
|
|
|
p2 = slash_confirm.get_pending("sess1")
|
|
assert p2["command"] == "cmd"
|
|
|
|
|
|
class TestResolve:
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_runs_handler_and_pops_entry(self):
|
|
calls = []
|
|
|
|
async def handler(choice):
|
|
calls.append(choice)
|
|
return f"resolved {choice}"
|
|
|
|
slash_confirm.register("sess1", "cid1", "reload-mcp", handler)
|
|
|
|
result = await slash_confirm.resolve("sess1", "cid1", "once")
|
|
assert result == "resolved once"
|
|
assert calls == ["once"]
|
|
|
|
# Entry should be popped.
|
|
assert slash_confirm.get_pending("sess1") is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_no_pending_returns_none(self):
|
|
result = await slash_confirm.resolve("sess1", "cid1", "once")
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_confirm_id_mismatch_returns_none(self):
|
|
async def handler(choice):
|
|
return "should not run"
|
|
|
|
slash_confirm.register("sess1", "cid_real", "cmd", handler)
|
|
|
|
result = await slash_confirm.resolve("sess1", "cid_wrong", "once")
|
|
assert result is None
|
|
|
|
# Stale entry should still be present (mismatch doesn't pop).
|
|
assert slash_confirm.get_pending("sess1") is not None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_stale_entry_returns_none(self):
|
|
async def handler(choice):
|
|
return "should not run"
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", handler)
|
|
# Force entry age past timeout
|
|
slash_confirm._pending["sess1"]["created_at"] = time.time() - 10000
|
|
|
|
result = await slash_confirm.resolve("sess1", "cid1", "once")
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_handler_exception_returns_error_string(self):
|
|
async def handler(choice):
|
|
raise RuntimeError("boom")
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", handler)
|
|
|
|
result = await slash_confirm.resolve("sess1", "cid1", "once")
|
|
assert result is not None
|
|
assert "boom" in result
|
|
# Entry should still be popped even when handler raises.
|
|
assert slash_confirm.get_pending("sess1") is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_non_string_return_becomes_none(self):
|
|
async def handler(choice):
|
|
return {"not": "a string"}
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", handler)
|
|
result = await slash_confirm.resolve("sess1", "cid1", "once")
|
|
assert result is None
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_resolve_double_click_only_runs_handler_once(self):
|
|
calls = []
|
|
|
|
async def handler(choice):
|
|
calls.append(choice)
|
|
return "ran"
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", handler)
|
|
|
|
# Simulate two near-simultaneous button clicks.
|
|
r1, r2 = await asyncio.gather(
|
|
slash_confirm.resolve("sess1", "cid1", "once"),
|
|
slash_confirm.resolve("sess1", "cid1", "once"),
|
|
)
|
|
# Exactly one should have run the handler.
|
|
assert calls == ["once"]
|
|
assert (r1 == "ran") ^ (r2 == "ran")
|
|
|
|
|
|
class TestClear:
|
|
def test_clear_removes_entry(self):
|
|
async def h(c):
|
|
return "x"
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", h)
|
|
assert slash_confirm.get_pending("sess1") is not None
|
|
|
|
slash_confirm.clear("sess1")
|
|
assert slash_confirm.get_pending("sess1") is None
|
|
|
|
def test_clear_missing_is_noop(self):
|
|
# Should not raise.
|
|
slash_confirm.clear("nobody")
|
|
|
|
|
|
class TestClearIfStale:
|
|
def test_clears_stale_entry(self):
|
|
async def h(c):
|
|
return "x"
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", h)
|
|
slash_confirm._pending["sess1"]["created_at"] = time.time() - 10000
|
|
|
|
cleared = slash_confirm.clear_if_stale("sess1", timeout=300)
|
|
assert cleared is True
|
|
assert slash_confirm.get_pending("sess1") is None
|
|
|
|
def test_preserves_fresh_entry(self):
|
|
async def h(c):
|
|
return "x"
|
|
|
|
slash_confirm.register("sess1", "cid1", "cmd", h)
|
|
|
|
cleared = slash_confirm.clear_if_stale("sess1", timeout=300)
|
|
assert cleared is False
|
|
assert slash_confirm.get_pending("sess1") is not None
|
|
|
|
def test_returns_false_for_missing_entry(self):
|
|
cleared = slash_confirm.clear_if_stale("nobody")
|
|
assert cleared is False
|