Compare commits

...

1 Commits

Author SHA1 Message Date
teknium1
c0b3b73bf4 fix(codex): surface error code in Responses 'failed' status errors
When a Codex Responses turn ends with status=failed, the response carries
the failure details under `response.error` as
`{code, message, param, ...}`. The previous extractor pulled only
`message`, so users seeing a rate-limit failure got a bare "Slow down"
string indistinguishable from a generic stream truncation; an
internal_error with empty message degraded to a dict dump
("{'code': 'internal_error', 'message': ''}").

Extract a `_format_responses_error()` helper that:
- prefixes `code` when both code and message are present
  (e.g. 'rate_limit_exceeded: Slow down')
- falls back to the bare `code` when message is empty
- accepts both dict and attribute-style payloads (SDK and JSON-RPC paths)
- preserves the prior status-only fallback when no error payload exists

Apply the same helper at the sibling site in
`codex_app_server_session.run_turn()` so codex-CLI subprocess turn
failures get the same treatment.

Tests:
- 8 new unit tests for `_format_responses_error` covering both shapes,
  empty/missing fields, non-string fields, and the status-only fallback.
- 2 regression tests on `_normalize_codex_response` for failed status
  with and without a code, asserting the exact RuntimeError message.
- All 3603 tests in tests/agent/ pass.

Adapted from anomalyco/opencode#28757.
2026-05-28 17:10:25 -07:00
3 changed files with 159 additions and 6 deletions

View File

@@ -980,6 +980,48 @@ def _extract_responses_reasoning_text(item: Any) -> str:
return ""
def _format_responses_error(error_obj: Any, response_status: str) -> str:
"""Build a human-readable error string from a Responses ``response.error`` payload.
The OpenAI Responses API carries failure details under ``response.error``
on terminal ``response.failed`` events, in the shape
``{"code": "rate_limit_exceeded", "message": "Slow down", "param": ...}``.
Earlier code only surfaced ``message``, which left users staring at bare
strings like ``"Slow down"`` while the failure mode (rate limit vs
context-length vs internal_error vs model-overloaded) was hidden in
``code``. We now prefix ``code`` when both are present so consumers can
distinguish failure modes without parsing the bare message.
Falls back to ``code`` alone when ``message`` is empty, and to a stable
default referencing the response status when no error payload is
available at all. Adapted from anomalyco/opencode#28757.
"""
# Pull code and message from either dict or attribute-style payloads.
code: Any = None
message: Any = None
if isinstance(error_obj, dict):
code = error_obj.get("code")
message = error_obj.get("message")
elif error_obj is not None:
code = getattr(error_obj, "code", None)
message = getattr(error_obj, "message", None)
code_str = str(code).strip() if isinstance(code, str) else (str(code).strip() if code else "")
message_str = str(message).strip() if isinstance(message, str) else (str(message).strip() if message else "")
if code_str and message_str:
return f"{code_str}: {message_str}"
if message_str:
return message_str
if code_str:
return code_str
if error_obj:
# Last-resort: stringify whatever the provider sent so it's at least
# visible in logs/UI rather than silently swallowed.
return str(error_obj)
return f"Responses API returned status '{response_status}'"
# ---------------------------------------------------------------------------
# Full response normalization
# ---------------------------------------------------------------------------
@@ -1023,10 +1065,7 @@ def _normalize_codex_response(
if response_status in {"failed", "cancelled"}:
error_obj = getattr(response, "error", None)
if isinstance(error_obj, dict):
error_msg = error_obj.get("message") or str(error_obj)
else:
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
error_msg = _format_responses_error(error_obj, response_status)
raise RuntimeError(error_msg)
content_parts: List[str] = []

View File

@@ -31,6 +31,7 @@ import time
from dataclasses import dataclass, field
from typing import Any, Callable, Optional
from agent.codex_responses_adapter import _format_responses_error
from agent.redact import redact_sensitive_text
from agent.transports.codex_app_server import (
CodexAppServerClient,
@@ -581,7 +582,7 @@ class CodexAppServerSession:
(note.get("params") or {}).get("turn") or {}
).get("error")
if err_obj:
err_msg = err_obj.get("message") or str(err_obj)
err_msg = _format_responses_error(err_obj, str(turn_status))
# If the turn failed for an auth/refresh reason,
# rewrite the error into a re-auth hint AND mark
# the session for retirement.

View File

@@ -1,6 +1,11 @@
from types import SimpleNamespace
from agent.codex_responses_adapter import _normalize_codex_response
import pytest
from agent.codex_responses_adapter import (
_format_responses_error,
_normalize_codex_response,
)
def test_normalize_codex_response_drops_transient_rs_tmp_reasoning_items():
@@ -61,3 +66,111 @@ def test_normalize_codex_response_treats_summary_only_reasoning_as_incomplete():
assert assistant_message.content == ""
assert assistant_message.reasoning == "still thinking"
assert assistant_message.codex_reasoning_items is None
# ---------------------------------------------------------------------------
# _format_responses_error — adapted from anomalyco/opencode#28757.
# Provider failures should surface BOTH the code (rate_limit_exceeded /
# context_length_exceeded / internal_error / server_error) and the message,
# so consumers can tell rate limits apart from context-length failures and
# both apart from generic stream drops.
# ---------------------------------------------------------------------------
def test_format_responses_error_combines_code_and_message():
err = {"code": "rate_limit_exceeded", "message": "Slow down"}
assert _format_responses_error(err, "failed") == "rate_limit_exceeded: Slow down"
def test_format_responses_error_message_only():
err = {"message": "Upstream model unavailable"}
assert _format_responses_error(err, "failed") == "Upstream model unavailable"
def test_format_responses_error_code_only_when_message_empty():
# Some providers/proxies emit a code with an empty message body. We
# used to fall back to ``str(error_obj)`` — a dict dump — which leaked
# ``{'code': 'internal_error', 'message': ''}`` into chat output. Now
# the bare code is surfaced, which is the meaningful field.
err = {"code": "internal_error", "message": ""}
assert _format_responses_error(err, "failed") == "internal_error"
def test_format_responses_error_code_only_when_message_missing():
err = {"code": "server_error"}
assert _format_responses_error(err, "failed") == "server_error"
def test_format_responses_error_attribute_style_payload():
# SDK objects expose ``code``/``message`` as attributes rather than dict
# keys. The helper must accept both shapes since the Responses SDK
# returns SimpleNamespace-style objects on ``response.failed``.
err = SimpleNamespace(code="context_length_exceeded", message="too long")
assert _format_responses_error(err, "failed") == "context_length_exceeded: too long"
def test_format_responses_error_falls_back_to_status_when_empty():
assert (
_format_responses_error(None, "failed")
== "Responses API returned status 'failed'"
)
assert (
_format_responses_error(None, "cancelled")
== "Responses API returned status 'cancelled'"
)
def test_format_responses_error_stringifies_opaque_payload():
# Last-resort: a provider sent something that isn't a dict and has no
# code/message attributes. Surface its repr rather than swallow it
# silently — at least it's visible in logs.
assert _format_responses_error("opaque sentinel", "failed") == "opaque sentinel"
def test_format_responses_error_ignores_non_string_code_message():
# Defensive: a malformed gateway could send numbers/objects in these
# fields. We don't want to crash; we want a best-effort string.
err = {"code": 500, "message": None}
assert _format_responses_error(err, "failed") == "500"
def test_normalize_codex_response_failed_includes_code_in_error():
"""Regression: response_status == 'failed' should surface the error
code, not just the message. Used to leak a bare 'Slow down' string
that was indistinguishable from a generic stream truncation."""
# ``output`` non-empty so we don't trip the "no output items" guard
# before reaching the failed-status branch. Real failed responses
# often DO carry a partial message item alongside the error.
response = SimpleNamespace(
status="failed",
output=[
SimpleNamespace(
type="message",
role="assistant",
status="incomplete",
content=[SimpleNamespace(type="output_text", text="partial")],
),
],
error={"code": "rate_limit_exceeded", "message": "Slow down"},
)
with pytest.raises(RuntimeError, match=r"^rate_limit_exceeded: Slow down$"):
_normalize_codex_response(response)
def test_normalize_codex_response_failed_with_message_only():
"""Backwards-compat: a failed response with only a message field
(no code) should still surface that message verbatim."""
response = SimpleNamespace(
status="failed",
output=[
SimpleNamespace(
type="message",
role="assistant",
status="incomplete",
content=[SimpleNamespace(type="output_text", text="partial")],
),
],
error={"message": "model error"},
)
with pytest.raises(RuntimeError, match=r"^model error$"):
_normalize_codex_response(response)