mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat: Codex-style handoff prefix for compressed context summaries
Replace the old '[CONTEXT SUMMARY]:' prefix on compressed summaries
with a Codex-inspired handoff framing that tells the model what happened
and how to use the summary.
What changes:
1. New SUMMARY_PREFIX constant — the text prepended to every
compressed summary:
[CONTEXT COMPACTION] An earlier part of this conversation was
summarized to preserve context space. Below is the summary — use
it to build on the work already done and avoid duplicating effort:
2. _with_summary_prefix() helper — normalizes model output by stripping
any legacy '[CONTEXT SUMMARY]:' prefix the summarization model may
have produced, then prepends the new SUMMARY_PREFIX.
3. System message annotation updated — the note appended to the system
prompt on first compression now says 'compacted into a handoff
summary' and instructs 'build on that summary rather than re-doing
work' instead of the old generic note.
Why this is better:
The old prefix ('[CONTEXT SUMMARY]: <raw text>') gave the model no
context about what the summary is or how to use it. The new prefix
explicitly frames it as a context compaction event and instructs the
model to build on prior work rather than re-doing it. This reduces
redundant tool calls and file re-reads after compression.
What does NOT change:
- The compression algorithm (positional protection, boundary alignment)
- The role alternation logic (summary role adapts to avoid consecutive
same-role messages)
- The summarization model or trigger thresholds
- LEGACY_SUMMARY_PREFIX is exported for backward compatibility
Inspired by PR #776 by @kshitijk4poor and the research in #499.
This commit is contained in:
@@ -17,6 +17,13 @@ from agent.model_metadata import (
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
SUMMARY_PREFIX = (
|
||||
"[CONTEXT COMPACTION] An earlier part of this conversation was "
|
||||
"summarized to preserve context space. Below is the summary — use it "
|
||||
"to build on the work already done and avoid duplicating effort:"
|
||||
)
|
||||
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
|
||||
|
||||
|
||||
class ContextCompressor:
|
||||
"""Compresses conversation context when approaching the model's context limit.
|
||||
@@ -166,9 +173,19 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
raise
|
||||
|
||||
summary = response.choices[0].message.content.strip()
|
||||
if not summary.startswith("[CONTEXT SUMMARY]:"):
|
||||
summary = "[CONTEXT SUMMARY]: " + summary
|
||||
return summary
|
||||
return self._with_summary_prefix(summary)
|
||||
|
||||
@staticmethod
|
||||
def _with_summary_prefix(summary: str) -> str:
|
||||
"""Normalize the summary prefix to the current standard.
|
||||
|
||||
Strips any legacy ``[CONTEXT SUMMARY]:`` prefix the model may have
|
||||
produced and prepends the current ``SUMMARY_PREFIX`` handoff text.
|
||||
"""
|
||||
text = (summary or "").strip()
|
||||
if text.startswith(LEGACY_SUMMARY_PREFIX):
|
||||
text = text[len(LEGACY_SUMMARY_PREFIX):].lstrip()
|
||||
return f"{SUMMARY_PREFIX}\n{text}"
|
||||
|
||||
def _get_fallback_client(self):
|
||||
"""Try to build a fallback client from the main model's endpoint config.
|
||||
@@ -338,7 +355,7 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
||||
for i in range(compress_start):
|
||||
msg = messages[i].copy()
|
||||
if i == 0 and msg.get("role") == "system" and self.compression_count == 0:
|
||||
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns may be summarized to preserve context space.]"
|
||||
msg["content"] = (msg.get("content") or "") + "\n\n[Note: Some earlier conversation turns have been compacted into a handoff summary to preserve context space. Build on that summary rather than re-doing work.]"
|
||||
compressed.append(msg)
|
||||
|
||||
if summary:
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
from agent.context_compressor import ContextCompressor
|
||||
from agent.context_compressor import ContextCompressor, SUMMARY_PREFIX
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
@@ -141,7 +141,7 @@ class TestGenerateSummaryNoneContent:
|
||||
|
||||
summary = c._generate_summary(messages)
|
||||
assert isinstance(summary, str)
|
||||
assert "CONTEXT SUMMARY" in summary
|
||||
assert summary.startswith(SUMMARY_PREFIX)
|
||||
|
||||
def test_none_content_in_system_message_compress(self):
|
||||
"""System message with content=None should not crash during compress."""
|
||||
@@ -174,7 +174,7 @@ class TestCompressWithClient:
|
||||
|
||||
# Should have summary message in the middle
|
||||
contents = [m.get("content", "") for m in result]
|
||||
assert any("CONTEXT SUMMARY" in c for c in contents)
|
||||
assert any(c.startswith(SUMMARY_PREFIX) for c in contents)
|
||||
assert len(result) < len(msgs)
|
||||
|
||||
def test_summarization_does_not_split_tool_call_pairs(self):
|
||||
@@ -246,7 +246,7 @@ class TestCompressWithClient:
|
||||
{"role": "assistant", "content": "msg 5"},
|
||||
]
|
||||
result = c.compress(msgs)
|
||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||
summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
|
||||
assert len(summary_msg) == 1
|
||||
assert summary_msg[0]["role"] == "user"
|
||||
|
||||
@@ -274,7 +274,7 @@ class TestCompressWithClient:
|
||||
{"role": "assistant", "content": "msg 7"},
|
||||
]
|
||||
result = c.compress(msgs)
|
||||
summary_msg = [m for m in result if "CONTEXT SUMMARY" in (m.get("content") or "")]
|
||||
summary_msg = [m for m in result if (m.get("content") or "").startswith(SUMMARY_PREFIX)]
|
||||
assert len(summary_msg) == 1
|
||||
assert summary_msg[0]["role"] == "assistant"
|
||||
|
||||
|
||||
@@ -12,6 +12,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.context_compressor import SUMMARY_PREFIX
|
||||
from run_agent import AIAgent
|
||||
|
||||
|
||||
@@ -335,7 +336,7 @@ class TestPreflightCompression:
|
||||
# Simulate compression reducing messages
|
||||
mock_compress.return_value = (
|
||||
[
|
||||
{"role": "user", "content": "[CONTEXT SUMMARY]: Previous conversation"},
|
||||
{"role": "user", "content": f"{SUMMARY_PREFIX}\nPrevious conversation"},
|
||||
{"role": "user", "content": "hello"},
|
||||
],
|
||||
"new system prompt",
|
||||
|
||||
Reference in New Issue
Block a user