mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-27 22:41:19 +08:00
fix(approval): close remaining prompt_toolkit deadlock vectors (#15216)
PR #13734 fixed the concurrent-tool-executor vector (ThreadPoolExecutor
workers didn't inherit the CLI's TLS approval callback). Two vectors
remained that could still land in the deadlocking input() fallback:
1. _spawn_background_review spawns a raw threading.Thread with no
approval callback installed, so any dangerous-command guard the
review agent trips falls back to input() -> deadlock against the
parent's prompt_toolkit TUI (same class as delegate_task subagents,
fixed in 023b1bff1 / #15491). Install a _bg_review_auto_deny
callback at thread start, clear on finally.
2. prompt_dangerous_approval's fallback unconditionally spawned a
daemon thread calling input() when approval_callback was None.
That fallback can never succeed under prompt_toolkit because the
user's Enter goes to pt's raw-mode stdin capture. Detect an active
pt Application via get_app_or_none() and fail closed (deny + log)
instead, so future threads that forget to install a callback
degrade gracefully instead of hanging 60s invisibly.
Regression guards:
- tests/run_agent/test_background_review.py verifies the review
worker thread sees a callable auto-deny callback mid-run and that
the slot is cleared in the finally block.
- tests/tools/test_approval.py TestFailClosedUnderPromptToolkit
verifies prompt_dangerous_approval returns 'deny' fast under a
mocked pt Application, and that a real callback still wins over
the guard.
This commit is contained in:
21
run_agent.py
21
run_agent.py
@@ -3247,6 +3247,21 @@ class AIAgent:
|
||||
|
||||
def _run_review():
|
||||
import contextlib
|
||||
# Install a non-interactive approval callback on this worker
|
||||
# thread so any dangerous-command guard the review agent trips
|
||||
# resolves to "deny" instead of falling back to input() -- which
|
||||
# deadlocks against the parent's prompt_toolkit TUI (#15216).
|
||||
# Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
|
||||
def _bg_review_auto_deny(command, description, **kwargs):
|
||||
logger.warning(
|
||||
"Background review auto-denied dangerous command: %s (%s)",
|
||||
command, description,
|
||||
)
|
||||
return "deny"
|
||||
try:
|
||||
_set_approval_callback(_bg_review_auto_deny)
|
||||
except Exception:
|
||||
pass
|
||||
review_agent = None
|
||||
try:
|
||||
with open(os.devnull, "w") as _devnull, \
|
||||
@@ -3329,6 +3344,12 @@ class AIAgent:
|
||||
review_agent.close()
|
||||
except Exception:
|
||||
pass
|
||||
# Clear the approval callback on this bg-review thread so a
|
||||
# recycled thread-id doesn't inherit a stale reference.
|
||||
try:
|
||||
_set_approval_callback(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
|
||||
t.start()
|
||||
|
||||
@@ -71,3 +71,59 @@ def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
|
||||
"shutdown_memory_provider",
|
||||
"close",
|
||||
]
|
||||
|
||||
|
||||
def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
|
||||
"""Regression guard for #15216.
|
||||
|
||||
The background review thread must install a non-interactive approval
|
||||
callback. If it doesn't, any dangerous-command guard the review agent
|
||||
trips falls back to input() on a daemon thread, which deadlocks against
|
||||
the parent's prompt_toolkit TUI.
|
||||
"""
|
||||
import tools.terminal_tool as tt
|
||||
|
||||
observed: dict = {"during_run": "<unread>", "after_finally": "<unread>"}
|
||||
|
||||
class FakeReviewAgent:
|
||||
def __init__(self, **kwargs):
|
||||
self._session_messages = []
|
||||
|
||||
def run_conversation(self, **kwargs):
|
||||
# Capture what the callback looks like mid-run. It must be
|
||||
# a callable (the auto-deny) -- not None.
|
||||
observed["during_run"] = tt._get_approval_callback()
|
||||
|
||||
def shutdown_memory_provider(self):
|
||||
pass
|
||||
|
||||
def close(self):
|
||||
pass
|
||||
|
||||
monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
|
||||
monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
|
||||
|
||||
# Start from a clean slot.
|
||||
tt.set_approval_callback(None)
|
||||
agent = _bare_agent()
|
||||
|
||||
AIAgent._spawn_background_review(
|
||||
agent,
|
||||
messages_snapshot=[{"role": "user", "content": "hello"}],
|
||||
review_memory=True,
|
||||
)
|
||||
|
||||
observed["after_finally"] = tt._get_approval_callback()
|
||||
|
||||
assert callable(observed["during_run"]), (
|
||||
"Background review did not install an approval callback on its "
|
||||
"worker thread; dangerous-command prompts will deadlock against "
|
||||
"the parent TUI (#15216)."
|
||||
)
|
||||
# The installed callback must deny (it's a safety gate, not a prompt).
|
||||
assert observed["during_run"]("rm -rf /", "test") == "deny"
|
||||
|
||||
assert observed["after_finally"] is None, (
|
||||
"Background review leaked its approval callback into the worker "
|
||||
"thread's TLS slot; a recycled thread-id could reuse it."
|
||||
)
|
||||
|
||||
@@ -906,3 +906,62 @@ class TestChmodExecuteCombo:
|
||||
cmd = "chmod +x script.sh"
|
||||
dangerous, _, _ = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
|
||||
class TestFailClosedUnderPromptToolkit:
|
||||
"""Regression guard for #15216.
|
||||
|
||||
When prompt_toolkit owns the terminal and no approval callback is
|
||||
registered on the calling thread, prompt_dangerous_approval() must
|
||||
deny fast instead of falling through to the input() fallback -- which
|
||||
deadlocks because the user's keystrokes go to prompt_toolkit's raw-mode
|
||||
stdin capture, not to input().
|
||||
"""
|
||||
|
||||
def test_denies_when_prompt_toolkit_active_and_no_callback(self):
|
||||
import threading
|
||||
import prompt_toolkit.application.current as ptc
|
||||
|
||||
orig = ptc.get_app_or_none
|
||||
ptc.get_app_or_none = lambda: object() # pretend a pt app is running
|
||||
result = []
|
||||
try:
|
||||
def run():
|
||||
result.append(
|
||||
prompt_dangerous_approval(
|
||||
"rm -rf /",
|
||||
"test danger",
|
||||
timeout_seconds=30,
|
||||
approval_callback=None,
|
||||
)
|
||||
)
|
||||
|
||||
t = threading.Thread(target=run, daemon=True)
|
||||
t.start()
|
||||
t.join(timeout=3)
|
||||
assert not t.is_alive(), (
|
||||
"prompt_dangerous_approval deadlocked under prompt_toolkit "
|
||||
"with no callback -- fail-closed guard is broken"
|
||||
)
|
||||
assert result == ["deny"]
|
||||
finally:
|
||||
ptc.get_app_or_none = orig
|
||||
|
||||
def test_callback_path_still_wins_over_guard(self):
|
||||
"""Guard must not short-circuit a valid callback."""
|
||||
import prompt_toolkit.application.current as ptc
|
||||
|
||||
orig = ptc.get_app_or_none
|
||||
ptc.get_app_or_none = lambda: object()
|
||||
try:
|
||||
def cb(command, description, **kwargs):
|
||||
return "once"
|
||||
|
||||
result = prompt_dangerous_approval(
|
||||
"rm -rf /",
|
||||
"test danger",
|
||||
approval_callback=cb,
|
||||
)
|
||||
assert result == "once"
|
||||
finally:
|
||||
ptc.get_app_or_none = orig
|
||||
|
||||
@@ -536,6 +536,33 @@ def prompt_dangerous_approval(command: str, description: str,
|
||||
logger.error("Approval callback failed: %s", e, exc_info=True)
|
||||
return "deny"
|
||||
|
||||
# Fail-closed guard: if prompt_toolkit owns the terminal (interactive
|
||||
# CLI session) and no approval callback is registered on this thread,
|
||||
# the input() fallback below would spawn a daemon thread whose read
|
||||
# can never see Enter -- the user's keystrokes go to prompt_toolkit,
|
||||
# not input(), producing an invisible 60s deadlock (issue #15216).
|
||||
# Deny fast and log loudly instead so the caller can surface a real
|
||||
# error to the agent. Any thread that needs interactive approval must
|
||||
# install a callback via tools.terminal_tool.set_approval_callback()
|
||||
# before reaching this point (see delegate_tool.py, run_agent.py
|
||||
# _execute_tool_calls_concurrent / _spawn_background_review for the
|
||||
# established pattern).
|
||||
try:
|
||||
from prompt_toolkit.application.current import get_app_or_none
|
||||
if get_app_or_none() is not None:
|
||||
logger.warning(
|
||||
"Dangerous-command approval requested on a thread with no "
|
||||
"approval callback while prompt_toolkit is active; denying "
|
||||
"to avoid stdin deadlock. command=%r description=%r",
|
||||
command, description,
|
||||
)
|
||||
return "deny"
|
||||
except Exception:
|
||||
# prompt_toolkit not installed, or detection failed -- fall through
|
||||
# to the legacy input() path (safe in non-TUI contexts: scripts,
|
||||
# tests, sshd, etc.).
|
||||
pass
|
||||
|
||||
os.environ["HERMES_SPINNER_PAUSE"] = "1"
|
||||
try:
|
||||
while True:
|
||||
|
||||
Reference in New Issue
Block a user