mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
PR #13734 fixed the concurrent-tool-executor vector (ThreadPoolExecutor
workers didn't inherit the CLI's TLS approval callback). Two vectors
remained that could still land in the deadlocking input() fallback:
1. _spawn_background_review spawns a raw threading.Thread with no
approval callback installed, so any dangerous-command guard the
review agent trips falls back to input() -> deadlock against the
parent's prompt_toolkit TUI (same class as delegate_task subagents,
fixed in 023b1bff1 / #15491). Install a _bg_review_auto_deny
callback at thread start, clear on finally.
2. prompt_dangerous_approval's fallback unconditionally spawned a
daemon thread calling input() when approval_callback was None.
That fallback can never succeed under prompt_toolkit because the
user's Enter goes to pt's raw-mode stdin capture. Detect an active
pt Application via get_app_or_none() and fail closed (deny + log)
instead, so future threads that forget to install a callback
degrade gracefully instead of hanging 60s invisibly.
Regression guards:
- tests/run_agent/test_background_review.py verifies the review
worker thread sees a callable auto-deny callback mid-run and that
the slot is cleared in the finally block.
- tests/tools/test_approval.py TestFailClosedUnderPromptToolkit
verifies prompt_dangerous_approval returns 'deny' fast under a
mocked pt Application, and that a real callback still wins over
the guard.
130 lines
4.0 KiB
Python
130 lines
4.0 KiB
Python
"""Regression tests for background review agent cleanup."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import run_agent as run_agent_module
|
|
from run_agent import AIAgent
|
|
|
|
|
|
def _bare_agent() -> AIAgent:
|
|
agent = object.__new__(AIAgent)
|
|
agent.model = "fake-model"
|
|
agent.platform = "telegram"
|
|
agent.provider = "openai"
|
|
agent.base_url = ""
|
|
agent.api_key = ""
|
|
agent.api_mode = ""
|
|
agent.session_id = "test-session"
|
|
agent._parent_session_id = ""
|
|
agent._credential_pool = None
|
|
agent._memory_store = object()
|
|
agent._memory_enabled = True
|
|
agent._user_profile_enabled = False
|
|
agent._MEMORY_REVIEW_PROMPT = "review memory"
|
|
agent._SKILL_REVIEW_PROMPT = "review skills"
|
|
agent._COMBINED_REVIEW_PROMPT = "review both"
|
|
agent.background_review_callback = None
|
|
agent.status_callback = None
|
|
agent._safe_print = lambda *_args, **_kwargs: None
|
|
return agent
|
|
|
|
|
|
class ImmediateThread:
|
|
def __init__(self, *, target, daemon=None, name=None):
|
|
self._target = target
|
|
|
|
def start(self):
|
|
self._target()
|
|
|
|
|
|
def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
|
|
events = []
|
|
|
|
class FakeReviewAgent:
|
|
def __init__(self, **kwargs):
|
|
events.append(("init", kwargs))
|
|
self._session_messages = []
|
|
|
|
def run_conversation(self, **kwargs):
|
|
events.append(("run_conversation", kwargs))
|
|
|
|
def shutdown_memory_provider(self):
|
|
events.append(("shutdown_memory_provider", None))
|
|
|
|
def close(self):
|
|
events.append(("close", None))
|
|
|
|
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
|
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
|
|
|
agent = _bare_agent()
|
|
|
|
AIAgent._spawn_background_review(
|
|
agent,
|
|
messages_snapshot=[{"role": "user", "content": "hello"}],
|
|
review_memory=True,
|
|
)
|
|
|
|
assert [name for name, _payload in events] == [
|
|
"init",
|
|
"run_conversation",
|
|
"shutdown_memory_provider",
|
|
"close",
|
|
]
|
|
|
|
|
|
def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
|
|
"""Regression guard for #15216.
|
|
|
|
The background review thread must install a non-interactive approval
|
|
callback. If it doesn't, any dangerous-command guard the review agent
|
|
trips falls back to input() on a daemon thread, which deadlocks against
|
|
the parent's prompt_toolkit TUI.
|
|
"""
|
|
import tools.terminal_tool as tt
|
|
|
|
observed: dict = {"during_run": "<unread>", "after_finally": "<unread>"}
|
|
|
|
class FakeReviewAgent:
|
|
def __init__(self, **kwargs):
|
|
self._session_messages = []
|
|
|
|
def run_conversation(self, **kwargs):
|
|
# Capture what the callback looks like mid-run. It must be
|
|
# a callable (the auto-deny) -- not None.
|
|
observed["during_run"] = tt._get_approval_callback()
|
|
|
|
def shutdown_memory_provider(self):
|
|
pass
|
|
|
|
def close(self):
|
|
pass
|
|
|
|
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
|
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
|
|
|
# Start from a clean slot.
|
|
tt.set_approval_callback(None)
|
|
agent = _bare_agent()
|
|
|
|
AIAgent._spawn_background_review(
|
|
agent,
|
|
messages_snapshot=[{"role": "user", "content": "hello"}],
|
|
review_memory=True,
|
|
)
|
|
|
|
observed["after_finally"] = tt._get_approval_callback()
|
|
|
|
assert callable(observed["during_run"]), (
|
|
"Background review did not install an approval callback on its "
|
|
"worker thread; dangerous-command prompts will deadlock against "
|
|
"the parent TUI (#15216)."
|
|
)
|
|
# The installed callback must deny (it's a safety gate, not a prompt).
|
|
assert observed["during_run"]("rm -rf /", "test") == "deny"
|
|
|
|
assert observed["after_finally"] is None, (
|
|
"Background review leaked its approval callback into the worker "
|
|
"thread's TLS slot; a recycled thread-id could reuse it."
|
|
)
|