feat: configurable custom compaction prompt for context compression

Add a compression.prompt config option that lets users override the default summarization prompt used during context compression. What changes: 1. ContextCompressor.__init__() accepts compaction_prompt_override param. When set (non-empty string), it replaces the default summarization instructions in _generate_summary(). The framing (token target, turns to summarize, [CONTEXT SUMMARY]: prefix instruction) stays the same. 2. run_agent.py reads CONTEXT_COMPRESSION_PROMPT env var and passes it to ContextCompressor. 3. Config wiring — the new 'prompt' key under 'compression' section is mapped to CONTEXT_COMPRESSION_PROMPT env var in: - cli.py (load_cli_config defaults + env mapping) - hermes_cli/config.py (DEFAULT_CONFIG + show_config display) - gateway/run.py (gateway env mapping) Usage in config.yaml: compression: prompt: 'Your custom summarization instructions here' Or via environment variable: CONTEXT_COMPRESSION_PROMPT='Your custom instructions' When empty (default), the built-in summarization prompt is used unchanged. This gives power users control over how context is compressed without modifying source code. Inspired by PR #776 by @kshitijk4poor and the research in #499.
2026-04-28 23:11:37 +08:00 · 2026-03-11 05:45:24 -07:00
5 changed files with 37 additions and 17 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -34,6 +34,7 @@ class ContextCompressor:
        summary_target_tokens: int = 2500,
        quiet_mode: bool = False,
        summary_model_override: str = None,
        compaction_prompt_override: str = None,
        base_url: str = "",
    ):
        self.model = model
@@ -55,6 +56,11 @@ class ContextCompressor:
        self.client, default_model = get_text_auxiliary_client("compression")
        self.summary_model = summary_model_override or default_model
        self.compaction_prompt = (
            compaction_prompt_override.strip()
            if compaction_prompt_override and compaction_prompt_override.strip()
            else None
        )
    def update_from_response(self, usage: Dict[str, Any]):
        """Update tracked token usage from API response."""
@@ -103,22 +109,25 @@ class ContextCompressor:
            parts.append(f"[{role.upper()}]: {content}")
        content_to_summarize = "\n\n".join(parts)
-        prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
+        default_instructions = (
-
+            "Summarize these conversation turns concisely. This summary will "
-Write from a neutral perspective describing:
+            "replace these turns in the conversation history.\n\n"
-1. What actions were taken (tool calls, searches, file operations)
+            "Write from a neutral perspective describing:\n"
-2. Key information or results obtained
+            "1. What actions were taken (tool calls, searches, file operations)\n"
-3. Important decisions or findings
+            "2. Key information or results obtained\n"
-4. Relevant data, file names, or outputs
+            "3. Important decisions or findings\n"
-
+            "4. Relevant data, file names, or outputs\n\n"
-Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
+            "Keep factual and informative."
-
+        )
---
+        instructions = self.compaction_prompt or default_instructions
-TURNS TO SUMMARIZE:
+        prompt = (
-{content_to_summarize}
+            f"{instructions}\n\n"
---
+            f"Target ~{self.summary_target_tokens} tokens.\n\n"
-
+            "---\n"
-Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
+            f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
            "---\n\n"
            'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
        )
        # 1. Try the auxiliary model (cheap/fast)
        if self.client:
--- a/cli.py
+++ b/cli.py
@@ -177,6 +177,7 @@ def load_cli_config() -> Dict[str, Any]:
            "enabled": True,      # Auto-compress when approaching context limit
            "threshold": 0.85,    # Compress at 85% of model's context limit
            "summary_model": "google/gemini-3-flash-preview",  # Fast/cheap model for summaries
            "prompt": "",         # Custom compaction prompt (empty = use default)
        },
        "agent": {
            "max_turns": 90,  # Default max tool-calling iterations (shared with subagents)
@@ -350,6 +351,7 @@ def load_cli_config() -> Dict[str, Any]:
        "enabled": "CONTEXT_COMPRESSION_ENABLED",
        "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
        "summary_model": "CONTEXT_COMPRESSION_MODEL",
        "prompt": "CONTEXT_COMPRESSION_PROMPT",
        "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
    }
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -91,6 +91,7 @@ if _config_path.exists():
                "enabled": "CONTEXT_COMPRESSION_ENABLED",
                "threshold": "CONTEXT_COMPRESSION_THRESHOLD",
                "summary_model": "CONTEXT_COMPRESSION_MODEL",
                "prompt": "CONTEXT_COMPRESSION_PROMPT",
                "summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
            }
            for _cfg_key, _env_var in _compression_env_map.items():
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -122,6 +122,7 @@ DEFAULT_CONFIG = {
        "enabled": True,
        "threshold": 0.85,
        "summary_model": "google/gemini-3-flash-preview",
        "prompt": "",
        "summary_provider": "auto",
    },
@@ -1069,6 +1070,11 @@ def show_config():
    if enabled:
        print(f"  Threshold:    {compression.get('threshold', 0.85) * 100:.0f}%")
        print(f"  Model:        {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
        custom_prompt = compression.get("prompt", "")
        if custom_prompt:
            # Show first 60 chars of custom prompt
            display_prompt = custom_prompt[:60] + ("..." if len(custom_prompt) > 60 else "")
            print(f"  Prompt:       {display_prompt}")
        comp_provider = compression.get('summary_provider', 'auto')
        if comp_provider != 'auto':
            print(f"  Provider:     {comp_provider}")
--- a/run_agent.py
+++ b/run_agent.py
@@ -604,14 +604,16 @@ class AIAgent:
        compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
        compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
        compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
        compression_prompt = os.getenv("CONTEXT_COMPRESSION_PROMPT") or None
        self.context_compressor = ContextCompressor(
            model=self.model,
            threshold_percent=compression_threshold,
            protect_first_n=3,
            protect_last_n=4,
-            summary_target_tokens=500,
+            summary_target_tokens=2500,
            summary_model_override=compression_summary_model,
            compaction_prompt_override=compression_prompt,
            quiet_mode=self.quiet_mode,
            base_url=self.base_url,
        )