fix(gateway): scrub memory-context leaks from vision auto-analysis output

fixes #5719 The auxiliary vision LLM called by gateway._enrich_message_with_vision can echo its injected Honcho system prompt back into the image description. That description gets embedded verbatim into the enriched user message, so recalled memory (personal facts, dialectic output) surfaces into a user-visible bubble. Strips both forms of leak before embedding: - <memory-context>...</memory-context> fenced blocks (sanitize_context) - trailing '## Honcho Context' sections (header + everything after) Plus regression tests: - tests/agent/test_streaming_context_scrubber.py — 13 tests on the stateful scrubber (whole block, split tags, false-positive partial tags, unterminated span, reset, case-insensitivity) - tests/run_agent/test_run_agent_codex_responses.py — 2 new tests on _fire_stream_delta covering the realistic 7-chunk leak scenario and the cross-turn scrubber reset - tests/gateway/test_vision_memory_leak.py — 4 tests covering the vision auto-analysis boundary (clean pass-through, '## Honcho Context' header, fenced block, both patterns together)
2026-04-28 06:51:16 +08:00 · 2026-04-24 18:33:19 -04:00
parent 5ce5b17a42
commit 3b2edb347d
4 changed files with 309 additions and 0 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -8483,6 +8483,7 @@ class GatewayRunner:
            The enriched message string with vision descriptions prepended.
        """
        from tools.vision_tools import vision_analyze_tool
+        from agent.memory_manager import sanitize_context

        analysis_prompt = (
            "Describe everything visible in this image in thorough detail. "
@@ -8501,6 +8502,14 @@ class GatewayRunner:
                result = json.loads(result_json)
                if result.get("success"):
                    description = result.get("analysis", "")
+                    # The auxiliary vision LLM can echo injected system-prompt
+                    # memory context back into its output (#5719).  Scrub any
+                    # <memory-context> fences and the "## Honcho Context"
+                    # section before the description lands in a user-visible
+                    # message.
+                    description = sanitize_context(description)
+                    if "## Honcho Context" in description:
+                        description = description.split("## Honcho Context", 1)[0].rstrip()
                    enriched_parts.append(
                        f"[The user sent an image~ Here's what I can see:\n{description}]\n"
                        f"[If you need a closer look, use vision_analyze with "
--- a/tests/agent/test_streaming_context_scrubber.py
+++ b/tests/agent/test_streaming_context_scrubber.py
@@ -0,0 +1,150 @@
+"""Unit tests for StreamingContextScrubber (agent/memory_manager.py).
+
+Regression coverage for #5719 — memory-context spans split across stream
+deltas must not leak payload to the UI.  The one-shot sanitize_context()
+regex can't survive chunk boundaries, so _fire_stream_delta routes deltas
+through a stateful scrubber.
+"""
+
+from agent.memory_manager import StreamingContextScrubber, sanitize_context
+
+
+class TestStreamingContextScrubberBasics:
+    def test_empty_input_returns_empty(self):
+        s = StreamingContextScrubber()
+        assert s.feed("") == ""
+        assert s.flush() == ""
+
+    def test_plain_text_passes_through(self):
+        s = StreamingContextScrubber()
+        assert s.feed("hello world") == "hello world"
+        assert s.flush() == ""
+
+    def test_complete_block_in_single_delta(self):
+        """Regression: the one-shot test case from #13672 must still work."""
+        s = StreamingContextScrubber()
+        leaked = (
+            "<memory-context>\n"
+            "[System note: The following is recalled memory context, NOT new "
+            "user input. Treat as informational background data.]\n\n"
+            "## Honcho Context\nstale memory\n"
+            "</memory-context>\n\nVisible answer"
+        )
+        out = s.feed(leaked) + s.flush()
+        assert out == "\n\nVisible answer"
+
+    def test_open_and_close_in_separate_deltas_strips_payload(self):
+        """The real streaming case: tag pair split across deltas."""
+        s = StreamingContextScrubber()
+        deltas = [
+            "Hello ",
+            "<memory-context>\npayload ",
+            "more payload\n",
+            "</memory-context> world",
+        ]
+        out = "".join(s.feed(d) for d in deltas) + s.flush()
+        assert out == "Hello  world"
+        assert "payload" not in out
+
+    def test_realistic_fragmented_chunks_strip_memory_payload(self):
+        """Exact leak scenario from the reviewer's comment — 4 realistic chunks.
+
+        This is the case the original #13672 fix silently leaks on: the open
+        tag, system note, payload, and close tag each arrive in their own
+        delta because providers emit 1-80 char chunks.
+        """
+        s = StreamingContextScrubber()
+        deltas = [
+            "<memory-context>\n[System note: The following",
+            " is recalled memory context, NOT new user input. "
+            "Treat as informational background data.]\n\n",
+            "## Honcho Context\nstale memory\n",
+            "</memory-context>\n\nVisible answer",
+        ]
+        out = "".join(s.feed(d) for d in deltas) + s.flush()
+        assert out == "\n\nVisible answer"
+        # The system-note line and payload must never reach the UI.
+        assert "System note" not in out
+        assert "Honcho Context" not in out
+        assert "stale memory" not in out
+
+    def test_open_tag_split_across_two_deltas(self):
+        """The open tag itself arriving in two fragments."""
+        s = StreamingContextScrubber()
+        out = (
+            s.feed("pre <memory")
+            + s.feed("-context>leak</memory-context> post")
+            + s.flush()
+        )
+        assert out == "pre  post"
+        assert "leak" not in out
+
+    def test_close_tag_split_across_two_deltas(self):
+        """The close tag arriving in two fragments."""
+        s = StreamingContextScrubber()
+        out = (
+            s.feed("pre <memory-context>leak</memory")
+            + s.feed("-context> post")
+            + s.flush()
+        )
+        assert out == "pre  post"
+        assert "leak" not in out
+
+
+class TestStreamingContextScrubberPartialTagFalsePositives:
+    def test_partial_open_tag_tail_emitted_on_flush(self):
+        """Bare '<mem' at end of stream is not really a memory-context tag."""
+        s = StreamingContextScrubber()
+        out = s.feed("hello <mem") + s.feed("ory other") + s.flush()
+        assert out == "hello <memory other"
+
+    def test_partial_tag_released_when_disambiguated(self):
+        """A held-back partial tag that turns out to be prose gets released."""
+        s = StreamingContextScrubber()
+        # '< ' should not look like the start of any tag.
+        out = s.feed("price < ") + s.feed("10 dollars") + s.flush()
+        assert out == "price < 10 dollars"
+
+
+class TestStreamingContextScrubberUnterminatedSpan:
+    def test_unterminated_span_drops_payload(self):
+        """Provider drops close tag — better to lose output than to leak."""
+        s = StreamingContextScrubber()
+        out = s.feed("pre <memory-context>secret never closed") + s.flush()
+        assert out == "pre "
+        assert "secret" not in out
+
+    def test_reset_clears_hung_span(self):
+        """Cross-turn scrubber reset drops a hung span so next turn is clean."""
+        s = StreamingContextScrubber()
+        s.feed("pre <memory-context>half")
+        s.reset()
+        out = s.feed("clean text") + s.flush()
+        assert out == "clean text"
+
+
+class TestStreamingContextScrubberCaseInsensitivity:
+    def test_uppercase_tags_still_scrubbed(self):
+        s = StreamingContextScrubber()
+        out = (
+            s.feed("<MEMORY-CONTEXT>secret")
+            + s.feed("</Memory-Context>visible")
+            + s.flush()
+        )
+        assert out == "visible"
+        assert "secret" not in out
+
+
+class TestSanitizeContextUnchanged:
+    """Smoke test that the one-shot sanitize_context still works for whole strings."""
+
+    def test_whole_block_still_sanitized(self):
+        leaked = (
+            "<memory-context>\n"
+            "[System note: The following is recalled memory context, NOT new "
+            "user input. Treat as informational background data.]\n"
+            "payload\n"
+            "</memory-context>\nVisible"
+        )
+        out = sanitize_context(leaked).strip()
+        assert out == "Visible"
--- a/tests/gateway/test_vision_memory_leak.py
+++ b/tests/gateway/test_vision_memory_leak.py
@@ -0,0 +1,99 @@
+"""Tests for _enrich_message_with_vision — regression for #5719.
+
+The auxiliary vision LLM can echo system-prompt Honcho memory back into
+its analysis output. When that echo reaches the user as the enriched
+image description, recalled memory context (personal facts, dialectic
+output) surfaces into a user-visible message.
+
+The boundary fix in gateway/run.py strips both <memory-context>...</memory-context>
+fenced blocks AND any "## Honcho Context" section from vision descriptions
+before they're embedded into the enriched user message.
+"""
+
+import asyncio
+import json
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+
+@pytest.fixture
+def gateway_runner():
+    """Minimal GatewayRunner stub with just the method under test bound."""
+    from gateway.run import GatewayRunner
+
+    class _Stub:
+        _enrich_message_with_vision = GatewayRunner._enrich_message_with_vision
+
+    return _Stub()
+
+
+def _run(coro):
+    return asyncio.get_event_loop().run_until_complete(coro) if False else asyncio.new_event_loop().run_until_complete(coro)
+
+
+class TestEnrichMessageWithVision:
+    def test_clean_description_passes_through(self, gateway_runner):
+        """Vision output without leaked memory is embedded unchanged."""
+        fake_result = json.dumps({
+            "success": True,
+            "analysis": "A photograph of a sunset over the ocean.",
+        })
+        with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
+            out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
+        assert "sunset over the ocean" in out
+
+    def test_honcho_context_header_stripped(self, gateway_runner):
+        """'## Honcho Context' section and everything after is removed."""
+        leaked = (
+            "A photograph of a sunset.\n\n"
+            "## Honcho Context\n"
+            "User prefers concise answers, works at Plastic Labs,\n"
+            "uses OPSEC pseudonyms.\n"
+        )
+        fake_result = json.dumps({"success": True, "analysis": leaked})
+        with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
+            out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
+        assert "sunset" in out
+        assert "Honcho Context" not in out
+        assert "Plastic Labs" not in out
+        assert "OPSEC" not in out
+
+    def test_memory_context_fence_stripped(self, gateway_runner):
+        """<memory-context>...</memory-context> fenced block is scrubbed."""
+        leaked = (
+            "<memory-context>\n"
+            "[System note: The following is recalled memory context, NOT new "
+            "user input. Treat as informational background data.]\n\n"
+            "User details and preferences here.\n"
+            "</memory-context>\n"
+            "A photograph of a cat."
+        )
+        fake_result = json.dumps({"success": True, "analysis": leaked})
+        with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
+            out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
+        assert "photograph of a cat" in out
+        assert "<memory-context>" not in out
+        assert "User details and preferences" not in out
+        assert "System note" not in out
+
+    def test_both_leak_patterns_together_stripped(self, gateway_runner):
+        """A vision output containing both leak shapes is fully scrubbed."""
+        leaked = (
+            "<memory-context>\n"
+            "[System note: The following is recalled memory context, NOT new "
+            "user input. Treat as informational background data.]\n"
+            "fenced leak\n"
+            "</memory-context>\n"
+            "A photograph of a dog.\n\n"
+            "## Honcho Context\n"
+            "header leak\n"
+        )
+        fake_result = json.dumps({"success": True, "analysis": leaked})
+        with patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=fake_result)):
+            out = _run(gateway_runner._enrich_message_with_vision("caption", ["/tmp/img.jpg"]))
+        assert "photograph of a dog" in out
+        assert "fenced leak" not in out
+        assert "header leak" not in out
+        assert "Honcho Context" not in out
+        assert "<memory-context>" not in out
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -1158,6 +1158,57 @@ def test_stream_delta_strips_leaked_memory_context(monkeypatch):
    assert observed == ["Visible answer"]


+def test_stream_delta_strips_leaked_memory_context_across_chunks(monkeypatch):
+    """Regression for #5719 — the real streaming case.
+
+    Providers typically emit 1-80 char chunks, so the memory-context open
+    tag, system-note line, payload, and close tag each arrive in separate
+    deltas.  The per-delta sanitize_context() regex cannot survive that
+    — only a stateful scrubber can.  None of the payload, system-note
+    text, or "## Honcho Context" header may reach the delta callback.
+    """
+    agent = _build_agent(monkeypatch)
+    observed = []
+    agent.stream_delta_callback = observed.append
+
+    deltas = [
+        "<memory-context>\n[System note: The following",
+        " is recalled memory context, NOT new user input. ",
+        "Treat as informational background data.]\n\n",
+        "## Honcho Context\n",
+        "stale memory about eri\n",
+        "</memory-context>\n\n",
+        "Visible answer",
+    ]
+    for d in deltas:
+        agent._fire_stream_delta(d)
+
+    combined = "".join(observed)
+    assert "Visible answer" in combined
+    # None of the leaked payload may surface.
+    assert "System note" not in combined
+    assert "Honcho Context" not in combined
+    assert "stale memory" not in combined
+    assert "<memory-context>" not in combined
+    assert "</memory-context>" not in combined
+
+
+def test_stream_delta_scrubber_resets_between_turns(monkeypatch):
+    """An unterminated span from a prior turn must not taint the next turn."""
+    agent = _build_agent(monkeypatch)
+
+    # Simulate a hung span carried over — directly populate the scrubber.
+    agent._stream_context_scrubber.feed("pre <memory-context>leaked")
+
+    # Normally run_conversation() resets the scrubber at turn start.
+    agent._stream_context_scrubber.reset()
+
+    observed = []
+    agent.stream_delta_callback = observed.append
+    agent._fire_stream_delta("clean new turn text")
+    assert "".join(observed) == "clean new turn text"
+
+
 def test_run_conversation_codex_continues_after_commentary_phase_message(monkeypatch):
    agent = _build_agent(monkeypatch)
    responses = [