fix(approval): close remaining prompt_toolkit deadlock vectors (#15216)

PR #13734 fixed the concurrent-tool-executor vector (ThreadPoolExecutor workers didn't inherit the CLI's TLS approval callback). Two vectors remained that could still land in the deadlocking input() fallback: 1. _spawn_background_review spawns a raw threading.Thread with no approval callback installed, so any dangerous-command guard the review agent trips falls back to input() -> deadlock against the parent's prompt_toolkit TUI (same class as delegate_task subagents, fixed in 023b1bff1 / #15491). Install a _bg_review_auto_deny callback at thread start, clear on finally. 2. prompt_dangerous_approval's fallback unconditionally spawned a daemon thread calling input() when approval_callback was None. That fallback can never succeed under prompt_toolkit because the user's Enter goes to pt's raw-mode stdin capture. Detect an active pt Application via get_app_or_none() and fail closed (deny + log) instead, so future threads that forget to install a callback degrade gracefully instead of hanging 60s invisibly. Regression guards: - tests/run_agent/test_background_review.py verifies the review worker thread sees a callable auto-deny callback mid-run and that the slot is cleared in the finally block. - tests/tools/test_approval.py TestFailClosedUnderPromptToolkit verifies prompt_dangerous_approval returns 'deny' fast under a mocked pt Application, and that a real callback still wins over the guard.
2026-04-27 22:41:19 +08:00 · 2026-04-27 06:41:02 -07:00
parent 0046d170dc
commit 008860a23f
4 changed files with 163 additions and 0 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -3247,6 +3247,21 @@ class AIAgent:

        def _run_review():
            import contextlib
+            # Install a non-interactive approval callback on this worker
+            # thread so any dangerous-command guard the review agent trips
+            # resolves to "deny" instead of falling back to input() -- which
+            # deadlocks against the parent's prompt_toolkit TUI (#15216).
+            # Same pattern as _subagent_auto_deny in tools/delegate_tool.py.
+            def _bg_review_auto_deny(command, description, **kwargs):
+                logger.warning(
+                    "Background review auto-denied dangerous command: %s (%s)",
+                    command, description,
+                )
+                return "deny"
+            try:
+                _set_approval_callback(_bg_review_auto_deny)
+            except Exception:
+                pass
            review_agent = None
            try:
                with open(os.devnull, "w") as _devnull, \
@@ -3329,6 +3344,12 @@ class AIAgent:
                        review_agent.close()
                    except Exception:
                        pass
+                # Clear the approval callback on this bg-review thread so a
+                # recycled thread-id doesn't inherit a stale reference.
+                try:
+                    _set_approval_callback(None)
+                except Exception:
+                    pass

        t = threading.Thread(target=_run_review, daemon=True, name="bg-review")
        t.start()
--- a/tests/run_agent/test_background_review.py
+++ b/tests/run_agent/test_background_review.py
@@ -71,3 +71,59 @@ def test_background_review_shuts_down_memory_provider_before_close(monkeypatch):
        "shutdown_memory_provider",
        "close",
    ]
+
+
+def test_background_review_installs_auto_deny_approval_callback(monkeypatch):
+    """Regression guard for #15216.
+
+    The background review thread must install a non-interactive approval
+    callback. If it doesn't, any dangerous-command guard the review agent
+    trips falls back to input() on a daemon thread, which deadlocks against
+    the parent's prompt_toolkit TUI.
+    """
+    import tools.terminal_tool as tt
+
+    observed: dict = {"during_run": "<unread>", "after_finally": "<unread>"}
+
+    class FakeReviewAgent:
+        def __init__(self, **kwargs):
+            self._session_messages = []
+
+        def run_conversation(self, **kwargs):
+            # Capture what the callback looks like mid-run. It must be
+            # a callable (the auto-deny) -- not None.
+            observed["during_run"] = tt._get_approval_callback()
+
+        def shutdown_memory_provider(self):
+            pass
+
+        def close(self):
+            pass
+
+    monkeypatch.setattr(run_agent_module, "AIAgent", FakeReviewAgent)
+    monkeypatch.setattr(run_agent_module.threading, "Thread", ImmediateThread)
+
+    # Start from a clean slot.
+    tt.set_approval_callback(None)
+    agent = _bare_agent()
+
+    AIAgent._spawn_background_review(
+        agent,
+        messages_snapshot=[{"role": "user", "content": "hello"}],
+        review_memory=True,
+    )
+
+    observed["after_finally"] = tt._get_approval_callback()
+
+    assert callable(observed["during_run"]), (
+        "Background review did not install an approval callback on its "
+        "worker thread; dangerous-command prompts will deadlock against "
+        "the parent TUI (#15216)."
+    )
+    # The installed callback must deny (it's a safety gate, not a prompt).
+    assert observed["during_run"]("rm -rf /", "test") == "deny"
+
+    assert observed["after_finally"] is None, (
+        "Background review leaked its approval callback into the worker "
+        "thread's TLS slot; a recycled thread-id could reuse it."
+    )
--- a/tests/tools/test_approval.py
+++ b/tests/tools/test_approval.py
@@ -906,3 +906,62 @@ class TestChmodExecuteCombo:
        cmd = "chmod +x script.sh"
        dangerous, _, _ = detect_dangerous_command(cmd)
        assert dangerous is False
+
+
+class TestFailClosedUnderPromptToolkit:
+    """Regression guard for #15216.
+
+    When prompt_toolkit owns the terminal and no approval callback is
+    registered on the calling thread, prompt_dangerous_approval() must
+    deny fast instead of falling through to the input() fallback -- which
+    deadlocks because the user's keystrokes go to prompt_toolkit's raw-mode
+    stdin capture, not to input().
+    """
+
+    def test_denies_when_prompt_toolkit_active_and_no_callback(self):
+        import threading
+        import prompt_toolkit.application.current as ptc
+
+        orig = ptc.get_app_or_none
+        ptc.get_app_or_none = lambda: object()  # pretend a pt app is running
+        result = []
+        try:
+            def run():
+                result.append(
+                    prompt_dangerous_approval(
+                        "rm -rf /",
+                        "test danger",
+                        timeout_seconds=30,
+                        approval_callback=None,
+                    )
+                )
+
+            t = threading.Thread(target=run, daemon=True)
+            t.start()
+            t.join(timeout=3)
+            assert not t.is_alive(), (
+                "prompt_dangerous_approval deadlocked under prompt_toolkit "
+                "with no callback -- fail-closed guard is broken"
+            )
+            assert result == ["deny"]
+        finally:
+            ptc.get_app_or_none = orig
+
+    def test_callback_path_still_wins_over_guard(self):
+        """Guard must not short-circuit a valid callback."""
+        import prompt_toolkit.application.current as ptc
+
+        orig = ptc.get_app_or_none
+        ptc.get_app_or_none = lambda: object()
+        try:
+            def cb(command, description, **kwargs):
+                return "once"
+
+            result = prompt_dangerous_approval(
+                "rm -rf /",
+                "test danger",
+                approval_callback=cb,
+            )
+            assert result == "once"
+        finally:
+            ptc.get_app_or_none = orig
--- a/tools/approval.py
+++ b/tools/approval.py
@@ -536,6 +536,33 @@ def prompt_dangerous_approval(command: str, description: str,
            logger.error("Approval callback failed: %s", e, exc_info=True)
            return "deny"

+    # Fail-closed guard: if prompt_toolkit owns the terminal (interactive
+    # CLI session) and no approval callback is registered on this thread,
+    # the input() fallback below would spawn a daemon thread whose read
+    # can never see Enter -- the user's keystrokes go to prompt_toolkit,
+    # not input(), producing an invisible 60s deadlock (issue #15216).
+    # Deny fast and log loudly instead so the caller can surface a real
+    # error to the agent. Any thread that needs interactive approval must
+    # install a callback via tools.terminal_tool.set_approval_callback()
+    # before reaching this point (see delegate_tool.py, run_agent.py
+    # _execute_tool_calls_concurrent / _spawn_background_review for the
+    # established pattern).
+    try:
+        from prompt_toolkit.application.current import get_app_or_none
+        if get_app_or_none() is not None:
+            logger.warning(
+                "Dangerous-command approval requested on a thread with no "
+                "approval callback while prompt_toolkit is active; denying "
+                "to avoid stdin deadlock. command=%r description=%r",
+                command, description,
+            )
+            return "deny"
+    except Exception:
+        # prompt_toolkit not installed, or detection failed -- fall through
+        # to the legacy input() path (safe in non-TUI contexts: scripts,
+        # tests, sshd, etc.).
+        pass
+
    os.environ["HERMES_SPINNER_PAUSE"] = "1"
    try:
        while True: