mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
Merge PR #403: Fix context overrun crash with local LLM backends
Authored by ch3ronsa. Fixes #348. Adds 'context size' (LM Studio) and 'context window' (Ollama) to context-length error detection phrases so local backend 400 errors trigger compression instead of aborting. Also removes 'error code: 400' from the non-retryable error list as defense in depth.
This commit is contained in:
22
run_agent.py
22
run_agent.py
@@ -3333,23 +3333,24 @@ class AIAgent:
|
||||
}
|
||||
|
||||
# Check for context-length errors BEFORE generic 4xx handler.
|
||||
# OpenRouter returns 400 (not 413) for "maximum context length"
|
||||
# errors — if we let the generic 4xx handler catch those first,
|
||||
# it aborts immediately instead of attempting compression+retry.
|
||||
# Local backends (LM Studio, Ollama, llama.cpp) often return
|
||||
# HTTP 400 with messages like "Context size has been exceeded"
|
||||
# which must trigger compression, not an immediate abort.
|
||||
is_context_length_error = any(phrase in error_msg for phrase in [
|
||||
'context length', 'maximum context', 'token limit',
|
||||
'too many tokens', 'reduce the length', 'exceeds the limit',
|
||||
'context length', 'context size', 'maximum context',
|
||||
'token limit', 'too many tokens', 'reduce the length',
|
||||
'exceeds the limit', 'context window',
|
||||
'request entity too large', # OpenRouter/Nous 413 safety net
|
||||
])
|
||||
|
||||
if is_context_length_error:
|
||||
print(f"{self.log_prefix}⚠️ Context length exceeded - attempting compression...")
|
||||
|
||||
|
||||
original_len = len(messages)
|
||||
messages, active_system_prompt = self._compress_context(
|
||||
messages, system_message, approx_tokens=approx_tokens
|
||||
)
|
||||
|
||||
|
||||
if len(messages) < original_len:
|
||||
print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...")
|
||||
continue # Retry with compressed messages
|
||||
@@ -3370,11 +3371,10 @@ class AIAgent:
|
||||
# Check for non-retryable client errors (4xx HTTP status codes).
|
||||
# These indicate a problem with the request itself (bad model ID,
|
||||
# invalid API key, forbidden, etc.) and will never succeed on retry.
|
||||
# Note: 413 and context-length errors are excluded — handled above
|
||||
# via compression.
|
||||
# Note: 413 and context-length errors are excluded — handled above.
|
||||
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
|
||||
is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
|
||||
'error code: 400', 'error code: 401', 'error code: 403',
|
||||
'error code: 401', 'error code: 403',
|
||||
'error code: 404', 'error code: 422',
|
||||
'is not a valid model', 'invalid model', 'model not found',
|
||||
'invalid api key', 'invalid_api_key', 'authentication',
|
||||
@@ -3397,7 +3397,7 @@ class AIAgent:
|
||||
"failed": True,
|
||||
"error": str(api_error),
|
||||
}
|
||||
|
||||
|
||||
if retry_count >= max_retries:
|
||||
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
|
||||
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")
|
||||
|
||||
Reference in New Issue
Block a user