From 39713ba2ae888966b30b1483460629ff31900a0f Mon Sep 17 00:00:00 2001 From: dontcallmejames Date: Sat, 18 Apr 2026 13:27:25 -0400 Subject: [PATCH] fix: strip leaked memory context from commentary --- run_agent.py | 2 +- .../test_run_agent_codex_responses.py | 24 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index 85321628e6..6c9118137a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6051,7 +6051,7 @@ class AIAgent: if cb is None or not isinstance(assistant_msg, dict): return content = assistant_msg.get("content") - visible = self._strip_think_blocks(content or "").strip() + visible = sanitize_context(self._strip_think_blocks(content or "")).strip() if not visible or visible == "(empty)": return already_streamed = self._interim_content_was_streamed(visible) diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index b906355900..2ca76dca4b 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -1115,6 +1115,30 @@ def test_interim_commentary_is_not_marked_already_streamed_when_stream_callback_ } +def test_interim_commentary_strips_leaked_memory_context(monkeypatch): + agent = _build_agent(monkeypatch) + observed = {} + agent.interim_assistant_callback = lambda text, *, already_streamed=False: observed.update( + {"text": text, "already_streamed": already_streamed} + ) + + leaked = ( + "\n" + "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n" + "## Honcho Context\n" + "stale memory\n" + "\n\n" + "I'll inspect the repo structure first." + ) + + agent._emit_interim_assistant_message({"role": "assistant", "content": leaked}) + + assert observed == { + "text": "I'll inspect the repo structure first.", + "already_streamed": False, + } + + def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch): agent = _build_agent(monkeypatch) responses = [