Compare commits

...

5 Commits

Author SHA1 Message Date
Mariano Nicolini
aaa2f78b18 Merge branch 'main' into compaction-secrets-preservation 2026-04-14 17:05:19 -03:00
Mariano Nicolini
ef32968408 Merge branch 'main' into compaction-secrets-preservation 2026-04-14 11:18:20 -03:00
Mariano Nicolini
fcae077d65 redact secrets from summarizer output and add test coverage 2026-04-13 15:59:21 -03:00
Mariano Nicolini
dacb629028 rollback uv.lock changes 2026-04-13 15:40:51 -03:00
Mariano Nicolini
1f804d171a redact secrets from serialized content before going into summarizer LLM 2026-04-13 15:11:39 -03:00
2 changed files with 97 additions and 6 deletions

View File

@@ -28,6 +28,7 @@ from agent.model_metadata import (
get_model_context_length,
estimate_messages_tokens_rough,
)
from agent.redact import redact_sensitive_text
logger = logging.getLogger(__name__)
@@ -270,11 +271,15 @@ class ContextCompressor(ContextEngine):
Includes tool call arguments and result content (up to
``_CONTENT_MAX`` chars per message) so the summarizer can preserve
specific details like file paths, commands, and outputs.
All content is redacted before serialization to prevent secrets
(API keys, tokens, passwords) from leaking into the summary that
gets sent to the auxiliary model and persisted across compactions.
"""
parts = []
for msg in turns:
role = msg.get("role", "unknown")
content = msg.get("content") or ""
content = redact_sensitive_text(msg.get("content") or "")
# Tool results: keep enough content for the summarizer
if role == "tool":
@@ -295,7 +300,7 @@ class ContextCompressor(ContextEngine):
if isinstance(tc, dict):
fn = tc.get("function", {})
name = fn.get("name", "?")
args = fn.get("arguments", "")
args = redact_sensitive_text(fn.get("arguments", ""))
# Truncate long arguments but keep enough for context
if len(args) > self._TOOL_ARGS_MAX:
args = args[:self._TOOL_ARGS_HEAD] + "..."
@@ -353,7 +358,11 @@ class ContextCompressor(ContextEngine):
"assistant that continues the conversation. "
"Do NOT respond to any questions or requests in the conversation — "
"only output the structured summary. "
"Do NOT include any preamble, greeting, or prefix."
"Do NOT include any preamble, greeting, or prefix. "
"NEVER include API keys, tokens, passwords, secrets, credentials, "
"or connection strings in the summary — replace any that appear "
"with [REDACTED]. Note that the user had credentials present, but "
"do not preserve their values."
)
# Shared structured template (used by both paths).
@@ -394,7 +403,7 @@ class ContextCompressor(ContextEngine):
[What remains to be done — framed as context, not instructions]
## Critical Context
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation]
[Any specific values, error messages, configuration details, or data that would be lost without explicit preservation. NEVER include API keys, tokens, passwords, or credentials — write [REDACTED] instead.]
## Tools & Patterns
[Which tools were used, how they were used effectively, and any tool-specific discoveries]
@@ -437,7 +446,7 @@ Use this exact structure:
prompt += f"""
FOCUS TOPIC: "{focus_topic}"
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget."""
The user has requested that this compaction PRIORITISE preserving all information related to the focus topic above. For content related to "{focus_topic}", include full detail — exact values, file paths, command outputs, error messages, and decisions. For content NOT related to the focus topic, summarise more aggressively (brief one-liners or omit if truly irrelevant). The focus topic sections should receive roughly 60-70% of the summary token budget. Even for the focus topic, NEVER preserve API keys, tokens, passwords, or credentials — use [REDACTED]."""
try:
call_kwargs = {
@@ -460,7 +469,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
# Handle cases where content is not a string (e.g., dict from llama.cpp)
if not isinstance(content, str):
content = str(content) if content else ""
summary = content.strip()
# Redact the summary output as well — the summarizer LLM may
# ignore prompt instructions and echo back secrets verbatim.
summary = redact_sensitive_text(content.strip())
# Store for iterative updates on next compaction
self._previous_summary = summary
self._summary_failure_cooldown_until = 0.0

View File

@@ -781,3 +781,83 @@ class TestTokenBudgetTailProtection:
# Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
# so it might or might not be pruned depending on boundary
assert isinstance(pruned, int)
class TestSerializeRedactsSecrets:
"""Verify that _serialize_for_summary strips secrets before they reach the summarizer LLM."""
def _make_compressor(self):
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
return ContextCompressor(model="test", quiet_mode=True)
def test_redacts_api_key_in_tool_result(self):
c = self._make_compressor()
turns = [{"role": "tool", "content": "OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012", "tool_call_id": "tc1"}]
result = c._serialize_for_summary(turns)
assert "abc123def456" not in result
assert "sk-proj" not in result
def test_redacts_api_key_in_user_message(self):
c = self._make_compressor()
turns = [{"role": "user", "content": "My key is sk-proj-abc123def456ghi789jkl012"}]
result = c._serialize_for_summary(turns)
assert "abc123def456" not in result
def test_redacts_secret_in_tool_call_arguments(self):
c = self._make_compressor()
turns = [{
"role": "assistant",
"content": "",
"tool_calls": [{
"function": {
"name": "bash",
"arguments": '{"command": "export OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012"}',
},
}],
}]
result = c._serialize_for_summary(turns)
assert "abc123def456" not in result
def test_redacts_github_pat_in_assistant_content(self):
c = self._make_compressor()
turns = [{"role": "assistant", "content": "Found token: ghp_abcdef1234567890abcdef1234567890abcd"}]
result = c._serialize_for_summary(turns)
assert "abcdef1234567890" not in result
def test_preserves_non_secret_content(self):
c = self._make_compressor()
turns = [
{"role": "user", "content": "Please fix the bug in src/main.py"},
{"role": "assistant", "content": "I found the issue on line 42."},
]
result = c._serialize_for_summary(turns)
assert "src/main.py" in result
assert "line 42" in result
class TestGenerateSummaryRedactsOutput:
"""Verify that _generate_summary redacts the summarizer LLM's output."""
def test_summary_output_is_redacted(self):
"""If the summarizer LLM echoes a secret despite instructions, it gets redacted."""
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = (
"## Goal\nDeploy app.\n## Critical Context\n"
"User's API key: sk-proj-abc123def456ghi789jkl012"
)
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(model="test", quiet_mode=True)
messages = [
{"role": "user", "content": "deploy my app"},
{"role": "assistant", "content": "deploying now"},
]
with patch("agent.context_compressor.call_llm", return_value=mock_response):
summary = c._generate_summary(messages)
assert "abc123def456" not in summary
# Also verify _previous_summary is redacted (iterative update path)
assert "abc123def456" not in (c._previous_summary or "")