mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 16:57:36 +08:00
Compare commits
2 Commits
fix/compre
...
fix/hygien
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aed17de774 | ||
|
|
8f8dd83443 |
@@ -1125,10 +1125,16 @@ class GatewayRunner:
|
|||||||
get_model_context_length,
|
get_model_context_length,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Read model + compression config from config.yaml — same
|
# Read model + compression config from config.yaml.
|
||||||
# source of truth the agent itself uses.
|
# NOTE: hygiene threshold is intentionally HIGHER than the agent's
|
||||||
|
# own compressor (0.85 vs 0.50). Hygiene is a safety net for
|
||||||
|
# sessions that grew too large between turns — it fires pre-agent
|
||||||
|
# to prevent API failures. The agent's own compressor handles
|
||||||
|
# normal context management during its tool loop with accurate
|
||||||
|
# real token counts. Having hygiene at 0.50 caused premature
|
||||||
|
# compression on every turn in long gateway sessions.
|
||||||
_hyg_model = "anthropic/claude-sonnet-4.6"
|
_hyg_model = "anthropic/claude-sonnet-4.6"
|
||||||
_hyg_threshold_pct = 0.50
|
_hyg_threshold_pct = 0.85
|
||||||
_hyg_compression_enabled = True
|
_hyg_compression_enabled = True
|
||||||
try:
|
try:
|
||||||
_hyg_cfg_path = _hermes_home / "config.yaml"
|
_hyg_cfg_path = _hermes_home / "config.yaml"
|
||||||
@@ -1144,22 +1150,18 @@ class GatewayRunner:
|
|||||||
elif isinstance(_model_cfg, dict):
|
elif isinstance(_model_cfg, dict):
|
||||||
_hyg_model = _model_cfg.get("default", _hyg_model)
|
_hyg_model = _model_cfg.get("default", _hyg_model)
|
||||||
|
|
||||||
# Read compression settings
|
# Read compression settings — only use enabled flag.
|
||||||
|
# The threshold is intentionally separate from the agent's
|
||||||
|
# compression.threshold (hygiene runs higher).
|
||||||
_comp_cfg = _hyg_data.get("compression", {})
|
_comp_cfg = _hyg_data.get("compression", {})
|
||||||
if isinstance(_comp_cfg, dict):
|
if isinstance(_comp_cfg, dict):
|
||||||
_hyg_threshold_pct = float(
|
|
||||||
_comp_cfg.get("threshold", _hyg_threshold_pct)
|
|
||||||
)
|
|
||||||
_hyg_compression_enabled = str(
|
_hyg_compression_enabled = str(
|
||||||
_comp_cfg.get("enabled", True)
|
_comp_cfg.get("enabled", True)
|
||||||
).lower() in ("true", "1", "yes")
|
).lower() in ("true", "1", "yes")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# Also check env overrides (same as run_agent.py)
|
# Check env override for disabling compression entirely
|
||||||
_hyg_threshold_pct = float(
|
|
||||||
os.getenv("CONTEXT_COMPRESSION_THRESHOLD", str(_hyg_threshold_pct))
|
|
||||||
)
|
|
||||||
if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"):
|
if os.getenv("CONTEXT_COMPRESSION_ENABLED", "").lower() in ("false", "0", "no"):
|
||||||
_hyg_compression_enabled = False
|
_hyg_compression_enabled = False
|
||||||
|
|
||||||
@@ -1446,6 +1448,11 @@ class GatewayRunner:
|
|||||||
response = agent_result.get("final_response", "")
|
response = agent_result.get("final_response", "")
|
||||||
agent_messages = agent_result.get("messages", [])
|
agent_messages = agent_result.get("messages", [])
|
||||||
|
|
||||||
|
# If the agent's session_id changed during compression, update
|
||||||
|
# session_entry so transcript writes below go to the right session.
|
||||||
|
if agent_result.get("session_id") and agent_result["session_id"] != session_entry.session_id:
|
||||||
|
session_entry.session_id = agent_result["session_id"]
|
||||||
|
|
||||||
# Prepend reasoning/thinking if display is enabled
|
# Prepend reasoning/thinking if display is enabled
|
||||||
if getattr(self, "_show_reasoning", False) and response:
|
if getattr(self, "_show_reasoning", False) and response:
|
||||||
last_reasoning = agent_result.get("last_reasoning")
|
last_reasoning = agent_result.get("last_reasoning")
|
||||||
@@ -3495,6 +3502,23 @@ class GatewayRunner:
|
|||||||
unique_tags.insert(0, "[[audio_as_voice]]")
|
unique_tags.insert(0, "[[audio_as_voice]]")
|
||||||
final_response = final_response + "\n" + "\n".join(unique_tags)
|
final_response = final_response + "\n" + "\n".join(unique_tags)
|
||||||
|
|
||||||
|
# Sync session_id: the agent may have created a new session during
|
||||||
|
# mid-run context compression (_compress_context splits sessions).
|
||||||
|
# If so, update the session store entry so the NEXT message loads
|
||||||
|
# the compressed transcript, not the stale pre-compression one.
|
||||||
|
agent = agent_holder[0]
|
||||||
|
if agent and session_key and hasattr(agent, 'session_id') and agent.session_id != session_id:
|
||||||
|
logger.info(
|
||||||
|
"Session split detected: %s → %s (compression)",
|
||||||
|
session_id, agent.session_id,
|
||||||
|
)
|
||||||
|
entry = self.session_store._entries.get(session_key)
|
||||||
|
if entry:
|
||||||
|
entry.session_id = agent.session_id
|
||||||
|
self.session_store._save()
|
||||||
|
|
||||||
|
effective_session_id = getattr(agent, 'session_id', session_id) if agent else session_id
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"final_response": final_response,
|
"final_response": final_response,
|
||||||
"last_reasoning": result.get("last_reasoning"),
|
"last_reasoning": result.get("last_reasoning"),
|
||||||
@@ -3503,6 +3527,7 @@ class GatewayRunner:
|
|||||||
"tools": tools_holder[0] or [],
|
"tools": tools_holder[0] or [],
|
||||||
"history_offset": len(agent_history),
|
"history_offset": len(agent_history),
|
||||||
"last_prompt_tokens": _last_prompt_toks,
|
"last_prompt_tokens": _last_prompt_toks,
|
||||||
|
"session_id": effective_session_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
# Start progress message sender if enabled
|
# Start progress message sender if enabled
|
||||||
|
|||||||
Reference in New Issue
Block a user