fix(gateway): scrub memory-context leaks from vision auto-analysis output
fixes #5719
The auxiliary vision LLM called by gateway._enrich_message_with_vision
can echo its injected Honcho system prompt back into the image
description. That description gets embedded verbatim into the enriched
user message, so recalled memory (personal facts, dialectic output)
surfaces into a user-visible bubble.
Strips both forms of leak before embedding:
- <memory-context>...</memory-context> fenced blocks (sanitize_context)
- trailing '## Honcho Context' sections (header + everything after)
Plus regression tests:
- tests/agent/test_streaming_context_scrubber.py — 13 tests on the
stateful scrubber (whole block, split tags, false-positive partial
tags, unterminated span, reset, case-insensitivity)
- tests/run_agent/test_run_agent_codex_responses.py — 2 new tests on
_fire_stream_delta covering the realistic 7-chunk leak scenario and
the cross-turn scrubber reset
- tests/gateway/test_vision_memory_leak.py — 4 tests covering the
vision auto-analysis boundary (clean pass-through, '## Honcho Context'
header, fenced block, both patterns together)
2026-04-24 18:33:19 -04:00
|
|
|
"""Unit tests for StreamingContextScrubber (agent/memory_manager.py).
|
|
|
|
|
|
|
|
|
|
Regression coverage for #5719 — memory-context spans split across stream
|
|
|
|
|
deltas must not leak payload to the UI. The one-shot sanitize_context()
|
|
|
|
|
regex can't survive chunk boundaries, so _fire_stream_delta routes deltas
|
|
|
|
|
through a stateful scrubber.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from agent.memory_manager import StreamingContextScrubber, sanitize_context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStreamingContextScrubberBasics:
|
|
|
|
|
def test_empty_input_returns_empty(self):
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
assert s.feed("") == ""
|
|
|
|
|
assert s.flush() == ""
|
|
|
|
|
|
|
|
|
|
def test_plain_text_passes_through(self):
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
assert s.feed("hello world") == "hello world"
|
|
|
|
|
assert s.flush() == ""
|
|
|
|
|
|
|
|
|
|
def test_complete_block_in_single_delta(self):
|
|
|
|
|
"""Regression: the one-shot test case from #13672 must still work."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
leaked = (
|
|
|
|
|
"<memory-context>\n"
|
|
|
|
|
"[System note: The following is recalled memory context, NOT new "
|
|
|
|
|
"user input. Treat as informational background data.]\n\n"
|
|
|
|
|
"## Honcho Context\nstale memory\n"
|
|
|
|
|
"</memory-context>\n\nVisible answer"
|
|
|
|
|
)
|
|
|
|
|
out = s.feed(leaked) + s.flush()
|
|
|
|
|
assert out == "\n\nVisible answer"
|
|
|
|
|
|
|
|
|
|
def test_open_and_close_in_separate_deltas_strips_payload(self):
|
|
|
|
|
"""The real streaming case: tag pair split across deltas."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
deltas = [
|
|
|
|
|
"Hello ",
|
|
|
|
|
"<memory-context>\npayload ",
|
|
|
|
|
"more payload\n",
|
|
|
|
|
"</memory-context> world",
|
|
|
|
|
]
|
|
|
|
|
out = "".join(s.feed(d) for d in deltas) + s.flush()
|
|
|
|
|
assert out == "Hello world"
|
|
|
|
|
assert "payload" not in out
|
|
|
|
|
|
|
|
|
|
def test_realistic_fragmented_chunks_strip_memory_payload(self):
|
|
|
|
|
"""Exact leak scenario from the reviewer's comment — 4 realistic chunks.
|
|
|
|
|
|
|
|
|
|
This is the case the original #13672 fix silently leaks on: the open
|
|
|
|
|
tag, system note, payload, and close tag each arrive in their own
|
|
|
|
|
delta because providers emit 1-80 char chunks.
|
|
|
|
|
"""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
deltas = [
|
|
|
|
|
"<memory-context>\n[System note: The following",
|
|
|
|
|
" is recalled memory context, NOT new user input. "
|
|
|
|
|
"Treat as informational background data.]\n\n",
|
|
|
|
|
"## Honcho Context\nstale memory\n",
|
|
|
|
|
"</memory-context>\n\nVisible answer",
|
|
|
|
|
]
|
|
|
|
|
out = "".join(s.feed(d) for d in deltas) + s.flush()
|
|
|
|
|
assert out == "\n\nVisible answer"
|
|
|
|
|
# The system-note line and payload must never reach the UI.
|
|
|
|
|
assert "System note" not in out
|
|
|
|
|
assert "Honcho Context" not in out
|
|
|
|
|
assert "stale memory" not in out
|
|
|
|
|
|
|
|
|
|
def test_open_tag_split_across_two_deltas(self):
|
|
|
|
|
"""The open tag itself arriving in two fragments."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
out = (
|
|
|
|
|
s.feed("pre <memory")
|
|
|
|
|
+ s.feed("-context>leak</memory-context> post")
|
|
|
|
|
+ s.flush()
|
|
|
|
|
)
|
|
|
|
|
assert out == "pre post"
|
|
|
|
|
assert "leak" not in out
|
|
|
|
|
|
|
|
|
|
def test_close_tag_split_across_two_deltas(self):
|
|
|
|
|
"""The close tag arriving in two fragments."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
out = (
|
|
|
|
|
s.feed("pre <memory-context>leak</memory")
|
|
|
|
|
+ s.feed("-context> post")
|
|
|
|
|
+ s.flush()
|
|
|
|
|
)
|
|
|
|
|
assert out == "pre post"
|
|
|
|
|
assert "leak" not in out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStreamingContextScrubberPartialTagFalsePositives:
|
|
|
|
|
def test_partial_open_tag_tail_emitted_on_flush(self):
|
|
|
|
|
"""Bare '<mem' at end of stream is not really a memory-context tag."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
out = s.feed("hello <mem") + s.feed("ory other") + s.flush()
|
|
|
|
|
assert out == "hello <memory other"
|
|
|
|
|
|
|
|
|
|
def test_partial_tag_released_when_disambiguated(self):
|
|
|
|
|
"""A held-back partial tag that turns out to be prose gets released."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
# '< ' should not look like the start of any tag.
|
|
|
|
|
out = s.feed("price < ") + s.feed("10 dollars") + s.flush()
|
|
|
|
|
assert out == "price < 10 dollars"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStreamingContextScrubberUnterminatedSpan:
|
|
|
|
|
def test_unterminated_span_drops_payload(self):
|
|
|
|
|
"""Provider drops close tag — better to lose output than to leak."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
out = s.feed("pre <memory-context>secret never closed") + s.flush()
|
|
|
|
|
assert out == "pre "
|
|
|
|
|
assert "secret" not in out
|
|
|
|
|
|
|
|
|
|
def test_reset_clears_hung_span(self):
|
|
|
|
|
"""Cross-turn scrubber reset drops a hung span so next turn is clean."""
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
s.feed("pre <memory-context>half")
|
|
|
|
|
s.reset()
|
|
|
|
|
out = s.feed("clean text") + s.flush()
|
|
|
|
|
assert out == "clean text"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStreamingContextScrubberCaseInsensitivity:
|
|
|
|
|
def test_uppercase_tags_still_scrubbed(self):
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
out = (
|
|
|
|
|
s.feed("<MEMORY-CONTEXT>secret")
|
|
|
|
|
+ s.feed("</Memory-Context>visible")
|
|
|
|
|
+ s.flush()
|
|
|
|
|
)
|
|
|
|
|
assert out == "visible"
|
|
|
|
|
assert "secret" not in out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestSanitizeContextUnchanged:
|
|
|
|
|
"""Smoke test that the one-shot sanitize_context still works for whole strings."""
|
|
|
|
|
|
|
|
|
|
def test_whole_block_still_sanitized(self):
|
|
|
|
|
leaked = (
|
|
|
|
|
"<memory-context>\n"
|
|
|
|
|
"[System note: The following is recalled memory context, NOT new "
|
|
|
|
|
"user input. Treat as informational background data.]\n"
|
|
|
|
|
"payload\n"
|
|
|
|
|
"</memory-context>\nVisible"
|
|
|
|
|
)
|
|
|
|
|
out = sanitize_context(leaked).strip()
|
|
|
|
|
assert out == "Visible"
|
fix(memory): narrow scrub surface to known wrapper boundaries
Reviewer pushback on the original boundary-hardening commits — three
overreach points pulled plugin-specific policy into shared core paths:
1. gateway/run.py hardcoded a '## Honcho Context' literal split for
vision-LLM output. Plugin-format heading in framework code; could
truncate legitimate output naturally containing that header.
Drop the literal split; keep generic sanitize_context (the wrapper
strip is plugin-agnostic). Plugin-specific cleanup belongs at the
provider boundary, not the shared gateway path.
2. run_agent.run_conversation scrubbed user_message and
persist_user_message before the conversation loop. User text is
sacred — if a user types a literal <memory-context> tag we must
not silently delete it. The producer (build_memory_context_block)
is the only legitimate emitter; user input should never need the
reverse op.
3. _build_assistant_message scrubbed model output before persistence.
Same hazard: would silently mutate legitimate documentation/code
the model emits containing the literal markers. The streaming
scrubber catches real leaks delta-by-delta before content is
concatenated; persist-time scrub was redundant belt-and-suspenders.
4. _fire_stream_delta stripped leading newlines from every delta unless
a paragraph break flag was set. Mid-stream '\n' is legitimate
markdown — lists, code fences, paragraph breaks — and chunk
boundaries are arbitrary. Narrow lstrip to the very first delta
of the stream only (so stale provider preamble still gets cleaned
on turn start, but mid-stream formatting survives).
Plus: build_memory_context_block now logs a warning when its defensive
sanitize_context strips something — surfaces buggy providers returning
pre-wrapped text instead of silently double-fencing.
Net architectural change: scrub surface collapses from 8 sites to 3
(StreamingContextScrubber on output deltas, plugin→backend send,
build_memory_context_block input-validation). Plugin-specific strings
stay out of shared runtime paths. User input and persisted assistant
output are no longer mutated.
Tests: rescoped TestMemoryContextSanitization (helper-correctness only,
no source-inspection of removed call sites), updated vision tests to
drop '## Honcho Context' literal-split assertions, updated
_build_assistant_message persistence test to assert preservation.
Added: cross-turn scrubber reset, build_memory_context_block warn-on-
violation, mid-stream newline preservation (plain + code fence).
2026-04-27 14:32:20 -04:00
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestStreamingContextScrubberCrossTurn:
|
|
|
|
|
"""A scrubber instance is reused across turns (per agent). reset() must
|
|
|
|
|
clear any held state so a partial-tag tail from turn N doesn't bleed
|
|
|
|
|
into turn N+1's first delta."""
|
|
|
|
|
|
|
|
|
|
def test_reset_clears_held_partial_tag(self):
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
# Feed a partial open-tag prefix that gets held back as buffer.
|
|
|
|
|
out_turn_1 = s.feed("answer<memo")
|
|
|
|
|
assert out_turn_1 == "answer"
|
|
|
|
|
|
|
|
|
|
# Reset for next turn — buffer must clear.
|
|
|
|
|
s.reset()
|
|
|
|
|
|
|
|
|
|
# New turn: plain text starting with a "<m" must NOT be treated as
|
|
|
|
|
# the continuation of the held "<memo".
|
|
|
|
|
out_turn_2 = s.feed("<marker>fresh content")
|
|
|
|
|
assert out_turn_2 == "<marker>fresh content"
|
|
|
|
|
|
|
|
|
|
def test_reset_clears_in_span_state(self):
|
|
|
|
|
s = StreamingContextScrubber()
|
|
|
|
|
s.feed("text<memory-context>secret-tail")
|
|
|
|
|
# Mid-span state held — without reset, subsequent text would be
|
|
|
|
|
# discarded until we see </memory-context>.
|
|
|
|
|
s.reset()
|
|
|
|
|
out = s.feed("post-reset visible text")
|
|
|
|
|
assert out == "post-reset visible text"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestBuildMemoryContextBlockWarnsOnViolation:
|
|
|
|
|
"""Providers must return raw context — not pre-wrapped. When they do,
|
|
|
|
|
we strip and warn so the buggy provider surfaces."""
|
|
|
|
|
|
|
|
|
|
def test_provider_emitting_wrapper_warns(self, caplog):
|
|
|
|
|
import logging
|
|
|
|
|
from agent.memory_manager import build_memory_context_block
|
|
|
|
|
|
|
|
|
|
prewrapped = (
|
|
|
|
|
"<memory-context>\n"
|
|
|
|
|
"[System note: ...]\n\n"
|
|
|
|
|
"real fact\n"
|
|
|
|
|
"</memory-context>"
|
|
|
|
|
)
|
|
|
|
|
with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
|
|
|
|
|
out = build_memory_context_block(prewrapped)
|
|
|
|
|
|
2026-04-27 14:46:33 -04:00
|
|
|
assert any("pre-wrapped" in rec.message for rec in caplog.records)
|
fix(memory): narrow scrub surface to known wrapper boundaries
Reviewer pushback on the original boundary-hardening commits — three
overreach points pulled plugin-specific policy into shared core paths:
1. gateway/run.py hardcoded a '## Honcho Context' literal split for
vision-LLM output. Plugin-format heading in framework code; could
truncate legitimate output naturally containing that header.
Drop the literal split; keep generic sanitize_context (the wrapper
strip is plugin-agnostic). Plugin-specific cleanup belongs at the
provider boundary, not the shared gateway path.
2. run_agent.run_conversation scrubbed user_message and
persist_user_message before the conversation loop. User text is
sacred — if a user types a literal <memory-context> tag we must
not silently delete it. The producer (build_memory_context_block)
is the only legitimate emitter; user input should never need the
reverse op.
3. _build_assistant_message scrubbed model output before persistence.
Same hazard: would silently mutate legitimate documentation/code
the model emits containing the literal markers. The streaming
scrubber catches real leaks delta-by-delta before content is
concatenated; persist-time scrub was redundant belt-and-suspenders.
4. _fire_stream_delta stripped leading newlines from every delta unless
a paragraph break flag was set. Mid-stream '\n' is legitimate
markdown — lists, code fences, paragraph breaks — and chunk
boundaries are arbitrary. Narrow lstrip to the very first delta
of the stream only (so stale provider preamble still gets cleaned
on turn start, but mid-stream formatting survives).
Plus: build_memory_context_block now logs a warning when its defensive
sanitize_context strips something — surfaces buggy providers returning
pre-wrapped text instead of silently double-fencing.
Net architectural change: scrub surface collapses from 8 sites to 3
(StreamingContextScrubber on output deltas, plugin→backend send,
build_memory_context_block input-validation). Plugin-specific strings
stay out of shared runtime paths. User input and persisted assistant
output are no longer mutated.
Tests: rescoped TestMemoryContextSanitization (helper-correctness only,
no source-inspection of removed call sites), updated vision tests to
drop '## Honcho Context' literal-split assertions, updated
_build_assistant_message persistence test to assert preservation.
Added: cross-turn scrubber reset, build_memory_context_block warn-on-
violation, mid-stream newline preservation (plain + code fence).
2026-04-27 14:32:20 -04:00
|
|
|
assert out.count("<memory-context>") == 1
|
|
|
|
|
assert out.count("</memory-context>") == 1
|
|
|
|
|
|
|
|
|
|
def test_clean_provider_output_does_not_warn(self, caplog):
|
|
|
|
|
import logging
|
|
|
|
|
from agent.memory_manager import build_memory_context_block
|
|
|
|
|
|
|
|
|
|
with caplog.at_level(logging.WARNING, logger="agent.memory_manager"):
|
|
|
|
|
out = build_memory_context_block("plain fact about user")
|
|
|
|
|
|
2026-04-27 14:46:33 -04:00
|
|
|
assert not any("pre-wrapped" in rec.message for rec in caplog.records)
|
fix(memory): narrow scrub surface to known wrapper boundaries
Reviewer pushback on the original boundary-hardening commits — three
overreach points pulled plugin-specific policy into shared core paths:
1. gateway/run.py hardcoded a '## Honcho Context' literal split for
vision-LLM output. Plugin-format heading in framework code; could
truncate legitimate output naturally containing that header.
Drop the literal split; keep generic sanitize_context (the wrapper
strip is plugin-agnostic). Plugin-specific cleanup belongs at the
provider boundary, not the shared gateway path.
2. run_agent.run_conversation scrubbed user_message and
persist_user_message before the conversation loop. User text is
sacred — if a user types a literal <memory-context> tag we must
not silently delete it. The producer (build_memory_context_block)
is the only legitimate emitter; user input should never need the
reverse op.
3. _build_assistant_message scrubbed model output before persistence.
Same hazard: would silently mutate legitimate documentation/code
the model emits containing the literal markers. The streaming
scrubber catches real leaks delta-by-delta before content is
concatenated; persist-time scrub was redundant belt-and-suspenders.
4. _fire_stream_delta stripped leading newlines from every delta unless
a paragraph break flag was set. Mid-stream '\n' is legitimate
markdown — lists, code fences, paragraph breaks — and chunk
boundaries are arbitrary. Narrow lstrip to the very first delta
of the stream only (so stale provider preamble still gets cleaned
on turn start, but mid-stream formatting survives).
Plus: build_memory_context_block now logs a warning when its defensive
sanitize_context strips something — surfaces buggy providers returning
pre-wrapped text instead of silently double-fencing.
Net architectural change: scrub surface collapses from 8 sites to 3
(StreamingContextScrubber on output deltas, plugin→backend send,
build_memory_context_block input-validation). Plugin-specific strings
stay out of shared runtime paths. User input and persisted assistant
output are no longer mutated.
Tests: rescoped TestMemoryContextSanitization (helper-correctness only,
no source-inspection of removed call sites), updated vision tests to
drop '## Honcho Context' literal-split assertions, updated
_build_assistant_message persistence test to assert preservation.
Added: cross-turn scrubber reset, build_memory_context_block warn-on-
violation, mid-stream newline preservation (plain + code fence).
2026-04-27 14:32:20 -04:00
|
|
|
assert "plain fact about user" in out
|