fix(error_classifier): avoid large-context false overflow heuristics

Generic 400 and server-disconnect heuristics used absolute token/message-count fallbacks that are too aggressive for 1M context sessions. Gate those absolute fallbacks to smaller context windows while preserving relative pressure checks. Fixes #16351
2026-05-06 10:47:12 +08:00 · 2026-04-27 12:09:53 +08:00
parent 026a5e47df
commit d29f90e89d
2 changed files with 44 additions and 2 deletions
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -410,6 +410,24 @@ class TestClassifyApiError:
        result = classify_api_error(e, approx_tokens=1000, context_length=200000)
        assert result.reason == FailoverReason.format_error

+    def test_400_generic_many_messages_below_large_context_pressure_is_format_error(self):
+        """Large-context sessions should not overflow solely due to message count."""
+        e = MockAPIError(
+            "Error",
+            status_code=400,
+            body={"error": {"message": "Error"}},
+        )
+        result = classify_api_error(
+            e,
+            provider="openai-codex",
+            model="gpt-5.5",
+            approx_tokens=74320,
+            context_length=1_000_000,
+            num_messages=432,
+        )
+        assert result.reason == FailoverReason.format_error
+        assert result.should_compress is False
+
    # ── Server disconnect + large session ──

    def test_disconnect_large_session_context_overflow(self):
@@ -425,6 +443,20 @@ class TestClassifyApiError:
        result = classify_api_error(e, approx_tokens=5000, context_length=200000)
        assert result.reason == FailoverReason.timeout

+    def test_disconnect_many_messages_below_large_context_pressure_is_timeout(self):
+        """Large-context disconnects should not overflow solely due to message count."""
+        e = Exception("server disconnected without sending complete message")
+        result = classify_api_error(
+            e,
+            provider="openai-codex",
+            model="gpt-5.5",
+            approx_tokens=74320,
+            context_length=1_000_000,
+            num_messages=432,
+        )
+        assert result.reason == FailoverReason.timeout
+        assert result.should_compress is False
+
    # ── Provider-specific: Anthropic thinking signature ──

    def test_anthropic_thinking_signature(self):