diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 01aa2af804..1298095879 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -34,6 +34,7 @@ class ContextCompressor: summary_target_tokens: int = 2500, quiet_mode: bool = False, summary_model_override: str = None, + compaction_prompt_override: str = None, base_url: str = "", ): self.model = model @@ -55,6 +56,11 @@ class ContextCompressor: self.client, default_model = get_text_auxiliary_client("compression") self.summary_model = summary_model_override or default_model + self.compaction_prompt = ( + compaction_prompt_override.strip() + if compaction_prompt_override and compaction_prompt_override.strip() + else None + ) def update_from_response(self, usage: Dict[str, Any]): """Update tracked token usage from API response.""" @@ -103,22 +109,25 @@ class ContextCompressor: parts.append(f"[{role.upper()}]: {content}") content_to_summarize = "\n\n".join(parts) - prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history. - -Write from a neutral perspective describing: -1. What actions were taken (tool calls, searches, file operations) -2. Key information or results obtained -3. Important decisions or findings -4. Relevant data, file names, or outputs - -Keep factual and informative. Target ~{self.summary_target_tokens} tokens. - ---- -TURNS TO SUMMARIZE: -{content_to_summarize} ---- - -Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.""" + default_instructions = ( + "Summarize these conversation turns concisely. This summary will " + "replace these turns in the conversation history.\n\n" + "Write from a neutral perspective describing:\n" + "1. What actions were taken (tool calls, searches, file operations)\n" + "2. Key information or results obtained\n" + "3. Important decisions or findings\n" + "4. Relevant data, file names, or outputs\n\n" + "Keep factual and informative." + ) + instructions = self.compaction_prompt or default_instructions + prompt = ( + f"{instructions}\n\n" + f"Target ~{self.summary_target_tokens} tokens.\n\n" + "---\n" + f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n" + "---\n\n" + 'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.' + ) # 1. Try the auxiliary model (cheap/fast) if self.client: diff --git a/cli.py b/cli.py index 5eb9577bbf..785c8132c0 100755 --- a/cli.py +++ b/cli.py @@ -177,6 +177,7 @@ def load_cli_config() -> Dict[str, Any]: "enabled": True, # Auto-compress when approaching context limit "threshold": 0.85, # Compress at 85% of model's context limit "summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries + "prompt": "", # Custom compaction prompt (empty = use default) }, "agent": { "max_turns": 90, # Default max tool-calling iterations (shared with subagents) @@ -350,6 +351,7 @@ def load_cli_config() -> Dict[str, Any]: "enabled": "CONTEXT_COMPRESSION_ENABLED", "threshold": "CONTEXT_COMPRESSION_THRESHOLD", "summary_model": "CONTEXT_COMPRESSION_MODEL", + "prompt": "CONTEXT_COMPRESSION_PROMPT", "summary_provider": "CONTEXT_COMPRESSION_PROVIDER", } diff --git a/gateway/run.py b/gateway/run.py index 63131dcec8..77546b0a61 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -91,6 +91,7 @@ if _config_path.exists(): "enabled": "CONTEXT_COMPRESSION_ENABLED", "threshold": "CONTEXT_COMPRESSION_THRESHOLD", "summary_model": "CONTEXT_COMPRESSION_MODEL", + "prompt": "CONTEXT_COMPRESSION_PROMPT", "summary_provider": "CONTEXT_COMPRESSION_PROVIDER", } for _cfg_key, _env_var in _compression_env_map.items(): diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e8df6f3f43..892aa74d88 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -122,6 +122,7 @@ DEFAULT_CONFIG = { "enabled": True, "threshold": 0.85, "summary_model": "google/gemini-3-flash-preview", + "prompt": "", "summary_provider": "auto", }, @@ -1069,6 +1070,11 @@ def show_config(): if enabled: print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%") print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}") + custom_prompt = compression.get("prompt", "") + if custom_prompt: + # Show first 60 chars of custom prompt + display_prompt = custom_prompt[:60] + ("..." if len(custom_prompt) > 60 else "") + print(f" Prompt: {display_prompt}") comp_provider = compression.get('summary_provider', 'auto') if comp_provider != 'auto': print(f" Provider: {comp_provider}") diff --git a/run_agent.py b/run_agent.py index 9b3a7dba35..f8d5e9b85a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -604,14 +604,16 @@ class AIAgent: compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85")) compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes") compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None + compression_prompt = os.getenv("CONTEXT_COMPRESSION_PROMPT") or None self.context_compressor = ContextCompressor( model=self.model, threshold_percent=compression_threshold, protect_first_n=3, protect_last_n=4, - summary_target_tokens=500, + summary_target_tokens=2500, summary_model_override=compression_summary_model, + compaction_prompt_override=compression_prompt, quiet_mode=self.quiet_mode, base_url=self.base_url, )