fix(error_classifier): avoid large-context false overflow heuristics

Generic 400 and server-disconnect heuristics used absolute token/message-count fallbacks that are too aggressive for 1M context sessions. Gate those absolute fallbacks to smaller context windows while preserving relative pressure checks.

Fixes #16351
This commit is contained in:
Dejie Guo
2026-04-27 12:09:53 +08:00
committed by Teknium
parent 026a5e47df
commit d29f90e89d
2 changed files with 44 additions and 2 deletions

View File

@@ -410,6 +410,24 @@ class TestClassifyApiError:
result = classify_api_error(e, approx_tokens=1000, context_length=200000)
assert result.reason == FailoverReason.format_error
def test_400_generic_many_messages_below_large_context_pressure_is_format_error(self):
"""Large-context sessions should not overflow solely due to message count."""
e = MockAPIError(
"Error",
status_code=400,
body={"error": {"message": "Error"}},
)
result = classify_api_error(
e,
provider="openai-codex",
model="gpt-5.5",
approx_tokens=74320,
context_length=1_000_000,
num_messages=432,
)
assert result.reason == FailoverReason.format_error
assert result.should_compress is False
# ── Server disconnect + large session ──
def test_disconnect_large_session_context_overflow(self):
@@ -425,6 +443,20 @@ class TestClassifyApiError:
result = classify_api_error(e, approx_tokens=5000, context_length=200000)
assert result.reason == FailoverReason.timeout
def test_disconnect_many_messages_below_large_context_pressure_is_timeout(self):
"""Large-context disconnects should not overflow solely due to message count."""
e = Exception("server disconnected without sending complete message")
result = classify_api_error(
e,
provider="openai-codex",
model="gpt-5.5",
approx_tokens=74320,
context_length=1_000_000,
num_messages=432,
)
assert result.reason == FailoverReason.timeout
assert result.should_compress is False
# ── Provider-specific: Anthropic thinking signature ──
def test_anthropic_thinking_signature(self):