From 32f22057542c2cda19baa41bcd7b68dea3b582fb Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 7 Apr 2026 00:06:19 -0700 Subject: [PATCH] fix(gateway): /stop and /new bypass Level 1 active-session guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The base adapter's Level 1 guard intercepted ALL messages while an agent was running, including /stop and /new. These commands were queued as pending messages instead of being dispatched to the gateway runner's Level 2 handler. When the agent eventually stopped (via the interrupt mechanism), the command text leaked into the conversation as a user message — the model would receive '/stop' as input and respond to it. Fix: Add /stop, /new, and /reset to the bypass set in base.py alongside /approve, /deny, and /status. Consolidate the three separate bypass blocks into one. Commands in the bypass set are dispatched inline to the gateway runner, where Level 2 handles them correctly (hard-kill for /stop, session reset for /new). Also add a safety net in _run_agent's pending-message processing: if the pending text resolves to a known slash command, discard it instead of passing it to the agent. This catches edge cases where command text leaks through the interrupt_message fallback. Refs: #5244 --- gateway/platforms/base.py | 44 ++++++++++++--------------------------- gateway/run.py | 21 +++++++++++++++++++ 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 4335a51f112..66fc5bac22f 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1103,16 +1103,20 @@ class BasePlatformAdapter(ABC): # Check if there's already an active handler for this session if session_key in self._active_sessions: - # /approve and /deny must bypass the active-session guard. - # The agent thread is blocked on threading.Event.wait() inside - # tools/approval.py — queuing these commands creates a deadlock: - # the agent waits for approval, approval waits for agent to finish. - # Dispatch directly to the message handler without touching session - # lifecycle (no competing background task, no session guard removal). + # Certain commands must bypass the active-session guard and be + # dispatched directly to the gateway runner. Without this, they + # are queued as pending messages and either: + # - leak into the conversation as user text (/stop, /new), or + # - deadlock (/approve, /deny — agent is blocked on Event.wait) + # + # Dispatch inline: call the message handler directly and send the + # response. Do NOT use _process_message_background — it manages + # session lifecycle and its cleanup races with the running task + # (see PR #4926). cmd = event.get_command() - if cmd in ("approve", "deny"): + if cmd in ("approve", "deny", "status", "stop", "new", "reset"): logger.debug( - "[%s] Approval command '/%s' bypassing active-session guard for %s", + "[%s] Command '/%s' bypassing active-session guard for %s", self.name, cmd, session_key, ) try: @@ -1126,29 +1130,7 @@ class BasePlatformAdapter(ABC): metadata=_thread_meta, ) except Exception as e: - logger.error("[%s] Approval dispatch failed: %s", self.name, e, exc_info=True) - return - - # /status must also bypass the active-session guard so it always - # returns a system-generated response instead of being queued as - # user text and passed to the agent (#5046). - if cmd == "status": - logger.debug( - "[%s] Status command bypassing active-session guard for %s", - self.name, session_key, - ) - try: - _thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None - response = await self._message_handler(event) - if response: - await self._send_with_retry( - chat_id=event.source.chat_id, - content=response, - reply_to=event.message_id, - metadata=_thread_meta, - ) - except Exception as e: - logger.error("[%s] Status dispatch failed: %s", self.name, e, exc_info=True) + logger.error("[%s] Command '/%s' dispatch failed: %s", self.name, cmd, e, exc_info=True) return # Special case: photo bursts/albums frequently arrive as multiple near- diff --git a/gateway/run.py b/gateway/run.py index 9d5ac5aa2c6..14661ea9001 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -7021,6 +7021,27 @@ class GatewayRunner: if pending: logger.debug("Processing queued message after agent completion: '%s...'", pending[:40]) + # Safety net: if the pending text is a slash command (e.g. "/stop", + # "/new"), discard it — commands should never be passed to the agent + # as user input. The primary fix is in base.py (commands bypass the + # active-session guard), but this catches edge cases where command + # text leaks through the interrupt_message fallback. + if pending and pending.strip().startswith("/"): + _pending_parts = pending.strip().split(None, 1) + _pending_cmd_word = _pending_parts[0][1:].lower() if _pending_parts else "" + if _pending_cmd_word: + try: + from hermes_cli.commands import resolve_command as _rc_pending + if _rc_pending(_pending_cmd_word): + logger.info( + "Discarding command '/%s' from pending queue — " + "commands must not be passed as agent input", + _pending_cmd_word, + ) + pending = None + except Exception: + pass + if pending: logger.debug("Processing pending message: '%s...'", pending[:40])