Merge PR #403: Fix context overrun crash with local LLM backends

Authored by ch3ronsa. Fixes #348.

Adds 'context size' (LM Studio) and 'context window' (Ollama) to
context-length error detection phrases so local backend 400 errors
trigger compression instead of aborting. Also removes 'error code: 400'
from the non-retryable error list as defense in depth.
This commit is contained in:
teknium1
2026-03-04 17:48:44 -08:00

View File

@@ -3333,23 +3333,24 @@ class AIAgent:
}
# Check for context-length errors BEFORE generic 4xx handler.
# OpenRouter returns 400 (not 413) for "maximum context length"
# errors — if we let the generic 4xx handler catch those first,
# it aborts immediately instead of attempting compression+retry.
# Local backends (LM Studio, Ollama, llama.cpp) often return
# HTTP 400 with messages like "Context size has been exceeded"
# which must trigger compression, not an immediate abort.
is_context_length_error = any(phrase in error_msg for phrase in [
'context length', 'maximum context', 'token limit',
'too many tokens', 'reduce the length', 'exceeds the limit',
'context length', 'context size', 'maximum context',
'token limit', 'too many tokens', 'reduce the length',
'exceeds the limit', 'context window',
'request entity too large', # OpenRouter/Nous 413 safety net
])
if is_context_length_error:
print(f"{self.log_prefix}⚠️ Context length exceeded - attempting compression...")
original_len = len(messages)
messages, active_system_prompt = self._compress_context(
messages, system_message, approx_tokens=approx_tokens
)
if len(messages) < original_len:
print(f"{self.log_prefix} 🗜️ Compressed {original_len}{len(messages)} messages, retrying...")
continue # Retry with compressed messages
@@ -3370,11 +3371,10 @@ class AIAgent:
# Check for non-retryable client errors (4xx HTTP status codes).
# These indicate a problem with the request itself (bad model ID,
# invalid API key, forbidden, etc.) and will never succeed on retry.
# Note: 413 and context-length errors are excluded — handled above
# via compression.
# Note: 413 and context-length errors are excluded — handled above.
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
'error code: 400', 'error code: 401', 'error code: 403',
'error code: 401', 'error code: 403',
'error code: 404', 'error code: 422',
'is not a valid model', 'invalid model', 'model not found',
'invalid api key', 'invalid_api_key', 'authentication',
@@ -3397,7 +3397,7 @@ class AIAgent:
"failed": True,
"error": str(api_error),
}
if retry_count >= max_retries:
print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.")
logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")