Compare commits

...

2 Commits

Author SHA1 Message Date
teknium1
c122a53744 test: pin context compaction handoff prompt 2026-03-14 02:13:30 -07:00
teknium1
7e714ac48e feat: use Codex-style compaction prompt for context compression
Replace the generic summarization prompt ('Summarize these conversation
turns concisely') with a task-oriented handoff prompt inspired by
OpenAI's Codex CLI compaction flow (researched in #499).

The new prompt frames compression as a 'CONTEXT CHECKPOINT COMPACTION'
and instructs the summarization model to produce a structured handoff
summary that includes:
- Current progress and key decisions
- User preferences and constraints discovered
- Clear next steps remaining
- Critical data (file paths, URLs, error messages, code snippets)
- Tool calls made and their key results

This produces better summaries because the model understands the summary
will be used by another LLM to continue the work, rather than treating
it as a generic text compression task.

No behavioral change to the compression algorithm itself — same
positional protection, same role alternation, same [CONTEXT SUMMARY]:
prefix. Only the prompt sent to the summarization model changes.

Inspired by PR #776 by @kshitijk4poor.
2026-03-14 02:11:24 -07:00
2 changed files with 45 additions and 16 deletions

View File

@@ -102,22 +102,24 @@ class ContextCompressor:
parts.append(f"[{role.upper()}]: {content}")
content_to_summarize = "\n\n".join(parts)
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
Write from a neutral perspective describing:
1. What actions were taken (tool calls, searches, file operations)
2. Key information or results obtained
3. Important decisions or findings
4. Relevant data, file names, or outputs
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
---
TURNS TO SUMMARIZE:
{content_to_summarize}
---
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
prompt = (
"You are performing a CONTEXT CHECKPOINT COMPACTION. Create a handoff "
"summary for the AI assistant that will resume this conversation.\n\n"
"Include:\n"
"- Current progress and key decisions made\n"
"- Important context, constraints, or user preferences discovered\n"
"- What remains to be done (clear next steps)\n"
"- Any critical data: file paths, variable names, URLs, error messages, "
"or code snippets needed to continue\n"
"- Tool calls made and their key results\n\n"
"Be concise, structured, and focused on helping the assistant seamlessly "
"continue the work without re-doing what's already been done.\n\n"
f"Target roughly {self.summary_target_tokens} tokens.\n\n"
"---\n"
f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
"---\n\n"
'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
)
# Use the centralized LLM router — handles provider resolution,
# auth, and fallback internally.

View File

@@ -153,6 +153,33 @@ class TestGenerateSummaryNoneContent:
assert len(result) < len(msgs)
class TestSummaryPrompt:
def test_generate_summary_uses_handoff_compaction_prompt(self):
mock_response = MagicMock()
mock_response.choices = [MagicMock()]
mock_response.choices[0].message.content = "[CONTEXT SUMMARY]: compacted"
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
c = ContextCompressor(model="test", quiet_mode=True, summary_target_tokens=1234)
messages = [
{"role": "user", "content": "Investigate the failing test"},
{"role": "assistant", "content": "I'll inspect the traceback."},
]
with patch("agent.context_compressor.call_llm", return_value=mock_response) as mock_call_llm:
c._generate_summary(messages)
prompt = mock_call_llm.call_args.kwargs["messages"][0]["content"]
assert "CONTEXT CHECKPOINT COMPACTION" in prompt
assert "Current progress and key decisions made" in prompt
assert "Important context, constraints, or user preferences discovered" in prompt
assert "What remains to be done (clear next steps)" in prompt
assert "Tool calls made and their key results" in prompt
assert "Target roughly 1234 tokens." in prompt
assert 'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.' in prompt
class TestNonStringContent:
"""Regression: content as dict (e.g., llama.cpp tool calls) must not crash."""