fix: reduce max_retries to 3 and make ValueError/TypeError non-retryable

- max_retries reduced from 6 to 3 — 6 retries with exponential backoff
  could stall for ~275s total on persistent errors
- ValueError and TypeError now detected as non-retryable client errors
  and abort immediately instead of being retried with backoff (these are
  local validation/programming errors that will never succeed on retry)
This commit is contained in:
teknium1
2026-03-11 07:04:46 -07:00
parent d2dee43825
commit 683c8b24d4

View File

@@ -3469,7 +3469,7 @@ class AIAgent:
api_start_time = time.time()
retry_count = 0
max_retries = 6 # Increased to allow longer backoff periods
max_retries = 3
compression_attempts = 0
max_compression_attempts = 3
codex_auth_retry_attempted = False
@@ -3939,8 +3939,11 @@ class AIAgent:
# These indicate a problem with the request itself (bad model ID,
# invalid API key, forbidden, etc.) and will never succeed on retry.
# Note: 413 and context-length errors are excluded — handled above.
# Also catch local validation errors (ValueError, TypeError) — these
# are programming bugs, not transient failures.
is_local_validation_error = isinstance(api_error, (ValueError, TypeError))
is_client_status_error = isinstance(status_code, int) and 400 <= status_code < 500 and status_code != 413
is_client_error = (is_client_status_error or any(phrase in error_msg for phrase in [
is_client_error = (is_local_validation_error or is_client_status_error or any(phrase in error_msg for phrase in [
'error code: 401', 'error code: 403',
'error code: 404', 'error code: 422',
'is not a valid model', 'invalid model', 'model not found',