fix(memory): drop scrub from interim commentary + final response

Same layering concern as the persisted-assistant scrub already removed: _emit_interim_assistant_message and the final_response return path were mutating model output broadly. Streaming scrubber covers real leaks delta-by-delta; these post-stream scrubs were redundant.
2026-04-28 06:51:16 +08:00 · 2026-04-27 14:53:53 -04:00
parent 49e3a1d8ee
commit 4a9ac5c355
2 changed files with 10 additions and 11 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -6069,7 +6069,7 @@ class AIAgent:
        if cb is None or not isinstance(assistant_msg, dict):
            return
        content = assistant_msg.get("content")
-        visible = sanitize_context(self._strip_think_blocks(content or "")).strip()
+        visible = self._strip_think_blocks(content or "").strip()
        if not visible or visible == "(empty)":
            return
        already_streamed = self._interim_content_was_streamed(visible)
@@ -12748,9 +12748,7 @@ class AIAgent:
                        truncated_response_prefix = ""
                        length_continue_retries = 0
                    
-                    # Strip internal context / reasoning wrappers from the user-facing
-                    # response (keep only clean visible text in transcript + UI).
-                    final_response = sanitize_context(self._strip_think_blocks(final_response)).strip()
+                    final_response = self._strip_think_blocks(final_response).strip()
                    
                    final_msg = self._build_assistant_message(assistant_message, finish_reason)

--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -1115,14 +1115,17 @@ def test_interim_commentary_is_not_marked_already_streamed_when_stream_callback_
    }


-def test_interim_commentary_strips_leaked_memory_context(monkeypatch):
+def test_interim_commentary_preserves_assistant_content(monkeypatch):
+    """Interim commentary must not silently mutate assistant text containing
+    literal <memory-context> markers — that's legitimate model output (docs,
+    code).  Streaming-path leak prevention happens delta-by-delta upstream."""
    agent = _build_agent(monkeypatch)
    observed = {}
    agent.interim_assistant_callback = lambda text, *, already_streamed=False: observed.update(
        {"text": text, "already_streamed": already_streamed}
    )

-    leaked = (
+    content = (
        "<memory-context>\n"
        "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n"
        "## Honcho Context\n"
@@ -1131,12 +1134,10 @@ def test_interim_commentary_strips_leaked_memory_context(monkeypatch):
        "I'll inspect the repo structure first."
    )

-    agent._emit_interim_assistant_message({"role": "assistant", "content": leaked})
+    agent._emit_interim_assistant_message({"role": "assistant", "content": content})

-    assert observed == {
-        "text": "I'll inspect the repo structure first.",
-        "already_streamed": False,
-    }
+    assert "<memory-context>" in observed["text"]
+    assert "I'll inspect the repo structure first." in observed["text"]


 def test_stream_delta_strips_leaked_memory_context(monkeypatch):