mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-05 02:07:34 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0546e935b4 |
@@ -2361,7 +2361,18 @@ class GatewayRunner:
|
|||||||
# 85% * 1.4 = 119% of context — which exceeds the model's limit
|
# 85% * 1.4 = 119% of context — which exceeds the model's limit
|
||||||
# and prevented hygiene from ever firing for ~200K models (GLM-5).
|
# and prevented hygiene from ever firing for ~200K models (GLM-5).
|
||||||
|
|
||||||
_needs_compress = _approx_tokens >= _compress_token_threshold
|
# Hard safety valve: force compression if message count is
|
||||||
|
# extreme, regardless of token estimates. This breaks the
|
||||||
|
# death spiral where API disconnects prevent token data
|
||||||
|
# collection, which prevents compression, which causes more
|
||||||
|
# disconnects. 400 messages is well above normal sessions
|
||||||
|
# but catches runaway growth before it becomes unrecoverable.
|
||||||
|
# (#2153)
|
||||||
|
_HARD_MSG_LIMIT = 400
|
||||||
|
_needs_compress = (
|
||||||
|
_approx_tokens >= _compress_token_threshold
|
||||||
|
or _msg_count >= _HARD_MSG_LIMIT
|
||||||
|
)
|
||||||
|
|
||||||
if _needs_compress:
|
if _needs_compress:
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
43
run_agent.py
43
run_agent.py
@@ -7486,6 +7486,32 @@ class AIAgent:
|
|||||||
force=True,
|
force=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Server disconnects on large sessions are often caused by
|
||||||
|
# the request exceeding the provider's context/payload limit
|
||||||
|
# without a proper HTTP error response. Treat these as
|
||||||
|
# context-length errors to trigger compression rather than
|
||||||
|
# burning through retries that will all fail the same way.
|
||||||
|
# This breaks the death spiral: disconnect → no token data
|
||||||
|
# → no compression → bigger session → more disconnects.
|
||||||
|
# (#2153)
|
||||||
|
if not is_context_length_error and not status_code:
|
||||||
|
_is_server_disconnect = (
|
||||||
|
'server disconnected' in error_msg
|
||||||
|
or 'peer closed connection' in error_msg
|
||||||
|
or error_type in ('ReadError', 'RemoteProtocolError', 'ServerDisconnectedError')
|
||||||
|
)
|
||||||
|
if _is_server_disconnect:
|
||||||
|
ctx_len = getattr(getattr(self, 'context_compressor', None), 'context_length', 200000)
|
||||||
|
_is_large = approx_tokens > ctx_len * 0.6 or len(api_messages) > 200
|
||||||
|
if _is_large:
|
||||||
|
is_context_length_error = True
|
||||||
|
self._vprint(
|
||||||
|
f"{self.log_prefix}⚠️ Server disconnected with large session "
|
||||||
|
f"(~{approx_tokens:,} tokens, {len(api_messages)} msgs) — "
|
||||||
|
f"treating as context-length error, attempting compression.",
|
||||||
|
force=True,
|
||||||
|
)
|
||||||
|
|
||||||
if is_context_length_error:
|
if is_context_length_error:
|
||||||
compressor = self.context_compressor
|
compressor = self.context_compressor
|
||||||
old_ctx = compressor.context_length
|
old_ctx = compressor.context_length
|
||||||
@@ -8120,11 +8146,20 @@ class AIAgent:
|
|||||||
# threshold (default 50%) leaves ample headroom; if tool
|
# threshold (default 50%) leaves ample headroom; if tool
|
||||||
# results push past it, the next API call will report the
|
# results push past it, the next API call will report the
|
||||||
# real total and trigger compression then.
|
# real total and trigger compression then.
|
||||||
|
#
|
||||||
|
# If last_prompt_tokens is 0 (stale after API disconnect
|
||||||
|
# or provider returned no usage data), fall back to rough
|
||||||
|
# estimate to avoid missing compression. Without this,
|
||||||
|
# a session can grow unbounded after disconnects because
|
||||||
|
# should_compress(0) never fires. (#2153)
|
||||||
_compressor = self.context_compressor
|
_compressor = self.context_compressor
|
||||||
_real_tokens = (
|
if _compressor.last_prompt_tokens > 0:
|
||||||
_compressor.last_prompt_tokens
|
_real_tokens = (
|
||||||
+ _compressor.last_completion_tokens
|
_compressor.last_prompt_tokens
|
||||||
)
|
+ _compressor.last_completion_tokens
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
_real_tokens = estimate_messages_tokens_rough(messages)
|
||||||
|
|
||||||
# ── Context pressure warnings (user-facing only) ──────────
|
# ── Context pressure warnings (user-facing only) ──────────
|
||||||
# Notify the user (NOT the LLM) as context approaches the
|
# Notify the user (NOT the LLM) as context approaches the
|
||||||
|
|||||||
Reference in New Issue
Block a user