diff --git a/run_agent.py b/run_agent.py index 62d641b7b28..8df43d75002 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3333,23 +3333,24 @@ class AIAgent: } # Check for context-length errors BEFORE generic 4xx handler. - # OpenRouter returns 400 (not 413) for "maximum context length" - # errors — if we let the generic 4xx handler catch those first, - # it aborts immediately instead of attempting compression+retry. + # Local backends (LM Studio, Ollama, llama.cpp) often return + # HTTP 400 with messages like "Context size has been exceeded" + # which must trigger compression, not an immediate abort. is_context_length_error = any(phrase in error_msg for phrase in [ - 'context length', 'maximum context', 'token limit', - 'too many tokens', 'reduce the length', 'exceeds the limit', + 'context length', 'context size', 'maximum context', + 'token limit', 'too many tokens', 'reduce the length', + 'exceeds the limit', 'context window', 'request entity too large', # OpenRouter/Nous 413 safety net ]) if is_context_length_error: print(f"{self.log_prefix}⚠️ Context length exceeded - attempting compression...") - + original_len = len(messages) messages, active_system_prompt = self._compress_context( messages, system_message, approx_tokens=approx_tokens ) - + if len(messages) < original_len: print(f"{self.log_prefix} 🗜️ Compressed {original_len} → {len(messages)} messages, retrying...") continue # Retry with compressed messages @@ -3370,11 +3371,10 @@ class AIAgent: # Check for non-retryable client errors (4xx HTTP status codes). # These indicate a problem with the request itself (bad model ID, # invalid API key, forbidden, etc.) and will never succeed on retry. - # Note: 413 and context-length errors are excluded — handled above - # via compression. + # Note: 413 and context-length errors are excluded — handled above. is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413 is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [ - 'error code: 400', 'error code: 401', 'error code: 403', + 'error code: 401', 'error code: 403', 'error code: 404', 'error code: 422', 'is not a valid model', 'invalid model', 'model not found', 'invalid api key', 'invalid_api_key', 'authentication', @@ -3397,7 +3397,7 @@ class AIAgent: "failed": True, "error": str(api_error), } - + if retry_count >= max_retries: print(f"{self.log_prefix}❌ Max retries ({max_retries}) exceeded. Giving up.") logging.error(f"{self.log_prefix}API call failed after {max_retries} retries. Last error: {api_error}")