mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Compare commits
1 Commits
skill/gith
...
feat/custo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
32c89fed18 |
@@ -34,6 +34,7 @@ class ContextCompressor:
|
|||||||
summary_target_tokens: int = 2500,
|
summary_target_tokens: int = 2500,
|
||||||
quiet_mode: bool = False,
|
quiet_mode: bool = False,
|
||||||
summary_model_override: str = None,
|
summary_model_override: str = None,
|
||||||
|
compaction_prompt_override: str = None,
|
||||||
base_url: str = "",
|
base_url: str = "",
|
||||||
):
|
):
|
||||||
self.model = model
|
self.model = model
|
||||||
@@ -55,6 +56,11 @@ class ContextCompressor:
|
|||||||
|
|
||||||
self.client, default_model = get_text_auxiliary_client("compression")
|
self.client, default_model = get_text_auxiliary_client("compression")
|
||||||
self.summary_model = summary_model_override or default_model
|
self.summary_model = summary_model_override or default_model
|
||||||
|
self.compaction_prompt = (
|
||||||
|
compaction_prompt_override.strip()
|
||||||
|
if compaction_prompt_override and compaction_prompt_override.strip()
|
||||||
|
else None
|
||||||
|
)
|
||||||
|
|
||||||
def update_from_response(self, usage: Dict[str, Any]):
|
def update_from_response(self, usage: Dict[str, Any]):
|
||||||
"""Update tracked token usage from API response."""
|
"""Update tracked token usage from API response."""
|
||||||
@@ -103,22 +109,25 @@ class ContextCompressor:
|
|||||||
parts.append(f"[{role.upper()}]: {content}")
|
parts.append(f"[{role.upper()}]: {content}")
|
||||||
|
|
||||||
content_to_summarize = "\n\n".join(parts)
|
content_to_summarize = "\n\n".join(parts)
|
||||||
prompt = f"""Summarize these conversation turns concisely. This summary will replace these turns in the conversation history.
|
default_instructions = (
|
||||||
|
"Summarize these conversation turns concisely. This summary will "
|
||||||
Write from a neutral perspective describing:
|
"replace these turns in the conversation history.\n\n"
|
||||||
1. What actions were taken (tool calls, searches, file operations)
|
"Write from a neutral perspective describing:\n"
|
||||||
2. Key information or results obtained
|
"1. What actions were taken (tool calls, searches, file operations)\n"
|
||||||
3. Important decisions or findings
|
"2. Key information or results obtained\n"
|
||||||
4. Relevant data, file names, or outputs
|
"3. Important decisions or findings\n"
|
||||||
|
"4. Relevant data, file names, or outputs\n\n"
|
||||||
Keep factual and informative. Target ~{self.summary_target_tokens} tokens.
|
"Keep factual and informative."
|
||||||
|
)
|
||||||
---
|
instructions = self.compaction_prompt or default_instructions
|
||||||
TURNS TO SUMMARIZE:
|
prompt = (
|
||||||
{content_to_summarize}
|
f"{instructions}\n\n"
|
||||||
---
|
f"Target ~{self.summary_target_tokens} tokens.\n\n"
|
||||||
|
"---\n"
|
||||||
Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
|
f"TURNS TO SUMMARIZE:\n{content_to_summarize}\n"
|
||||||
|
"---\n\n"
|
||||||
|
'Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix.'
|
||||||
|
)
|
||||||
|
|
||||||
# 1. Try the auxiliary model (cheap/fast)
|
# 1. Try the auxiliary model (cheap/fast)
|
||||||
if self.client:
|
if self.client:
|
||||||
|
|||||||
2
cli.py
2
cli.py
@@ -177,6 +177,7 @@ def load_cli_config() -> Dict[str, Any]:
|
|||||||
"enabled": True, # Auto-compress when approaching context limit
|
"enabled": True, # Auto-compress when approaching context limit
|
||||||
"threshold": 0.85, # Compress at 85% of model's context limit
|
"threshold": 0.85, # Compress at 85% of model's context limit
|
||||||
"summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries
|
"summary_model": "google/gemini-3-flash-preview", # Fast/cheap model for summaries
|
||||||
|
"prompt": "", # Custom compaction prompt (empty = use default)
|
||||||
},
|
},
|
||||||
"agent": {
|
"agent": {
|
||||||
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
|
"max_turns": 90, # Default max tool-calling iterations (shared with subagents)
|
||||||
@@ -350,6 +351,7 @@ def load_cli_config() -> Dict[str, Any]:
|
|||||||
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
||||||
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
||||||
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
||||||
|
"prompt": "CONTEXT_COMPRESSION_PROMPT",
|
||||||
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
|
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -91,6 +91,7 @@ if _config_path.exists():
|
|||||||
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
"enabled": "CONTEXT_COMPRESSION_ENABLED",
|
||||||
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
"threshold": "CONTEXT_COMPRESSION_THRESHOLD",
|
||||||
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
"summary_model": "CONTEXT_COMPRESSION_MODEL",
|
||||||
|
"prompt": "CONTEXT_COMPRESSION_PROMPT",
|
||||||
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
|
"summary_provider": "CONTEXT_COMPRESSION_PROVIDER",
|
||||||
}
|
}
|
||||||
for _cfg_key, _env_var in _compression_env_map.items():
|
for _cfg_key, _env_var in _compression_env_map.items():
|
||||||
|
|||||||
@@ -122,6 +122,7 @@ DEFAULT_CONFIG = {
|
|||||||
"enabled": True,
|
"enabled": True,
|
||||||
"threshold": 0.85,
|
"threshold": 0.85,
|
||||||
"summary_model": "google/gemini-3-flash-preview",
|
"summary_model": "google/gemini-3-flash-preview",
|
||||||
|
"prompt": "",
|
||||||
"summary_provider": "auto",
|
"summary_provider": "auto",
|
||||||
},
|
},
|
||||||
|
|
||||||
@@ -1069,6 +1070,11 @@ def show_config():
|
|||||||
if enabled:
|
if enabled:
|
||||||
print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%")
|
print(f" Threshold: {compression.get('threshold', 0.85) * 100:.0f}%")
|
||||||
print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
|
print(f" Model: {compression.get('summary_model', 'google/gemini-3-flash-preview')}")
|
||||||
|
custom_prompt = compression.get("prompt", "")
|
||||||
|
if custom_prompt:
|
||||||
|
# Show first 60 chars of custom prompt
|
||||||
|
display_prompt = custom_prompt[:60] + ("..." if len(custom_prompt) > 60 else "")
|
||||||
|
print(f" Prompt: {display_prompt}")
|
||||||
comp_provider = compression.get('summary_provider', 'auto')
|
comp_provider = compression.get('summary_provider', 'auto')
|
||||||
if comp_provider != 'auto':
|
if comp_provider != 'auto':
|
||||||
print(f" Provider: {comp_provider}")
|
print(f" Provider: {comp_provider}")
|
||||||
|
|||||||
@@ -604,14 +604,16 @@ class AIAgent:
|
|||||||
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
|
compression_threshold = float(os.getenv("CONTEXT_COMPRESSION_THRESHOLD", "0.85"))
|
||||||
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
|
compression_enabled = os.getenv("CONTEXT_COMPRESSION_ENABLED", "true").lower() in ("true", "1", "yes")
|
||||||
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
|
compression_summary_model = os.getenv("CONTEXT_COMPRESSION_MODEL") or None
|
||||||
|
compression_prompt = os.getenv("CONTEXT_COMPRESSION_PROMPT") or None
|
||||||
|
|
||||||
self.context_compressor = ContextCompressor(
|
self.context_compressor = ContextCompressor(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
threshold_percent=compression_threshold,
|
threshold_percent=compression_threshold,
|
||||||
protect_first_n=3,
|
protect_first_n=3,
|
||||||
protect_last_n=4,
|
protect_last_n=4,
|
||||||
summary_target_tokens=500,
|
summary_target_tokens=2500,
|
||||||
summary_model_override=compression_summary_model,
|
summary_model_override=compression_summary_model,
|
||||||
|
compaction_prompt_override=compression_prompt,
|
||||||
quiet_mode=self.quiet_mode,
|
quiet_mode=self.quiet_mode,
|
||||||
base_url=self.base_url,
|
base_url=self.base_url,
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user