fix(codex): ensure reasoning items have required following item in API input

Follow-up to the reasoning-only response fix. Three additional issues found by tracing the full replay path: 1. _chat_messages_to_responses_input: when a reasoning-only interim message was converted to Responses API input, the reasoning items were emitted as the last items with no following item. The Responses API requires a following item after each reasoning item (otherwise: 'missing_following_item' error, as seen in OpenHands #11406). Now emits an empty assistant message as the required following item when content is empty but reasoning items were added. 2. Duplicate detection: two consecutive reasoning-only incomplete messages with identical empty content/reasoning but different encrypted codex_reasoning_items were incorrectly treated as duplicates, silently dropping the second response's reasoning state. Now includes codex_reasoning_items in the duplicate comparison. 3. Added tests for both the API input conversion path and the duplicate detection edge case. Research context: verified against OpenCode (uses Vercel AI SDK, no retry loop so avoids the issue), Clawdbot (drops orphaned reasoning blocks entirely), and OpenHands (hit the missing_following_item error). Our approach preserves reasoning continuity while satisfying the API constraint.
fix(codex): treat reasoning-only responses as incomplete, not stop
2026-05-06 02:37:05 +08:00 · 2026-03-19 10:22:46 -07:00 · 2026-03-19 10:00:43 -07:00
2 changed files with 236 additions and 1 deletions
--- a/run_agent.py
+++ b/run_agent.py
@@ -2356,13 +2356,22 @@ class AIAgent:
                    # Replay encrypted reasoning items from previous turns
                    # so the API can maintain coherent reasoning chains.
                    codex_reasoning = msg.get("codex_reasoning_items")
                    has_codex_reasoning = False
                    if isinstance(codex_reasoning, list):
                        for ri in codex_reasoning:
                            if isinstance(ri, dict) and ri.get("encrypted_content"):
                                items.append(ri)
                                has_codex_reasoning = True
                    if content_text.strip():
                        items.append({"role": "assistant", "content": content_text})
                    elif has_codex_reasoning:
                        # The Responses API requires a following item after each
                        # reasoning item (otherwise: missing_following_item error).
                        # When the assistant produced only reasoning with no visible
                        # content, emit an empty assistant message as the required
                        # following item.
                        items.append({"role": "assistant", "content": ""})
                    tool_calls = msg.get("tool_calls")
                    if isinstance(tool_calls, list):
@@ -2804,6 +2813,14 @@ class AIAgent:
            finish_reason = "tool_calls"
        elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase):
            finish_reason = "incomplete"
        elif reasoning_items_raw and not final_text:
            # Response contains only reasoning (encrypted thinking state) with
            # no visible content or tool calls.  The model is still thinking and
            # needs another turn to produce the actual answer.  Marking this as
            # "stop" would send it into the empty-content retry loop which burns
            # 3 retries then fails — treat it as incomplete instead so the Codex
            # continuation path handles it correctly.
            finish_reason = "incomplete"
        else:
            finish_reason = "stop"
        return assistant_message, finish_reason
@@ -6214,15 +6231,24 @@ class AIAgent:
                    interim_msg = self._build_assistant_message(assistant_message, finish_reason)
                    interim_has_content = bool((interim_msg.get("content") or "").strip())
                    interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False
                    interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items"))
-                    if interim_has_content or interim_has_reasoning:
+                    if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning:
                        last_msg = messages[-1] if messages else None
                        # Duplicate detection: two consecutive incomplete assistant
                        # messages with identical content AND reasoning are collapsed.
                        # For reasoning-only messages (codex_reasoning_items differ but
                        # visible content/reasoning are both empty), we also compare
                        # the encrypted items to avoid silently dropping new state.
                        last_codex_items = last_msg.get("codex_reasoning_items") if isinstance(last_msg, dict) else None
                        interim_codex_items = interim_msg.get("codex_reasoning_items")
                        duplicate_interim = (
                            isinstance(last_msg, dict)
                            and last_msg.get("role") == "assistant"
                            and last_msg.get("finish_reason") == "incomplete"
                            and (last_msg.get("content") or "") == (interim_msg.get("content") or "")
                            and (last_msg.get("reasoning") or "") == (interim_msg.get("reasoning") or "")
                            and last_codex_items == interim_codex_items
                        )
                        if not duplicate_interim:
                            messages.append(interim_msg)
--- a/tests/test_run_agent_codex_responses.py
+++ b/tests/test_run_agent_codex_responses.py
@@ -830,3 +830,212 @@ def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path)
    payload = json.loads(dump_file.read_text())
    assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions"
 # --- Reasoning-only response tests (fix for empty content retry loop) ---
 def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."):
    """Codex response containing only reasoning items — no message text, no tool calls."""
    return SimpleNamespace(
        output=[
            SimpleNamespace(
                type="reasoning",
                id="rs_001",
                encrypted_content=encrypted_content,
                summary=[SimpleNamespace(type="summary_text", text=summary_text)],
                status="completed",
            )
        ],
        usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
        status="completed",
        model="gpt-5-codex",
    )
 def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch):
    """A response with only reasoning items and no content should be 'incomplete', not 'stop'.
    Without this fix, reasoning-only responses get finish_reason='stop' which
    sends them into the empty-content retry loop (3 retries then failure).
    """
    agent = _build_agent(monkeypatch)
    assistant_message, finish_reason = agent._normalize_codex_response(
        _codex_reasoning_only_response()
    )
    assert finish_reason == "incomplete"
    assert assistant_message.content == ""
    assert assistant_message.codex_reasoning_items is not None
    assert len(assistant_message.codex_reasoning_items) == 1
    assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123"
 def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch):
    """If a response has both reasoning and message content, it should still be 'stop'."""
    agent = _build_agent(monkeypatch)
    response = SimpleNamespace(
        output=[
            SimpleNamespace(
                type="reasoning",
                id="rs_001",
                encrypted_content="enc_xyz",
                summary=[SimpleNamespace(type="summary_text", text="Thinking...")],
                status="completed",
            ),
            SimpleNamespace(
                type="message",
                content=[SimpleNamespace(type="output_text", text="Here is the answer.")],
                status="completed",
            ),
        ],
        usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
        status="completed",
        model="gpt-5-codex",
    )
    assistant_message, finish_reason = agent._normalize_codex_response(response)
    assert finish_reason == "stop"
    assert "Here is the answer" in assistant_message.content
 def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch):
    """End-to-end: reasoning-only → final message should succeed, not hit retry loop."""
    agent = _build_agent(monkeypatch)
    responses = [
        _codex_reasoning_only_response(),
        _codex_message_response("The final answer is 42."),
    ]
    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
    result = agent.run_conversation("what is the answer?")
    assert result["completed"] is True
    assert result["final_response"] == "The final answer is 42."
    # The reasoning-only turn should be in messages as an incomplete interim
    assert any(
        msg.get("role") == "assistant"
        and msg.get("finish_reason") == "incomplete"
        and msg.get("codex_reasoning_items") is not None
        for msg in result["messages"]
    )
 def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch):
    """Encrypted codex_reasoning_items must be preserved in interim messages
    even when there is no visible reasoning text or content."""
    agent = _build_agent(monkeypatch)
    # Response with encrypted reasoning but no human-readable summary
    reasoning_response = SimpleNamespace(
        output=[
            SimpleNamespace(
                type="reasoning",
                id="rs_002",
                encrypted_content="enc_opaque_blob",
                summary=[],
                status="completed",
            )
        ],
        usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
        status="completed",
        model="gpt-5-codex",
    )
    responses = [
        reasoning_response,
        _codex_message_response("Done thinking."),
    ]
    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
    result = agent.run_conversation("think hard")
    assert result["completed"] is True
    assert result["final_response"] == "Done thinking."
    # The interim message must have codex_reasoning_items preserved
    interim_msgs = [
        msg for msg in result["messages"]
        if msg.get("role") == "assistant"
        and msg.get("finish_reason") == "incomplete"
    ]
    assert len(interim_msgs) >= 1
    assert interim_msgs[0].get("codex_reasoning_items") is not None
    assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob"
 def test_chat_messages_to_responses_input_reasoning_only_has_following_item(monkeypatch):
    """When converting a reasoning-only interim message to Responses API input,
    the reasoning items must be followed by an assistant message (even if empty)
    to satisfy the API's 'required following item' constraint."""
    agent = _build_agent(monkeypatch)
    messages = [
        {"role": "user", "content": "think hard"},
        {
            "role": "assistant",
            "content": "",
            "reasoning": None,
            "finish_reason": "incomplete",
            "codex_reasoning_items": [
                {"type": "reasoning", "id": "rs_001", "encrypted_content": "enc_abc", "summary": []},
            ],
        },
    ]
    items = agent._chat_messages_to_responses_input(messages)
    # Find the reasoning item
    reasoning_indices = [i for i, it in enumerate(items) if it.get("type") == "reasoning"]
    assert len(reasoning_indices) == 1
    ri_idx = reasoning_indices[0]
    # There must be a following item after the reasoning
    assert ri_idx < len(items) - 1, "Reasoning item must not be the last item (missing_following_item)"
    following = items[ri_idx + 1]
    assert following.get("role") == "assistant"
 def test_duplicate_detection_distinguishes_different_codex_reasoning(monkeypatch):
    """Two consecutive reasoning-only responses with different encrypted content
    must NOT be treated as duplicates."""
    agent = _build_agent(monkeypatch)
    responses = [
        # First reasoning-only response
        SimpleNamespace(
            output=[
                SimpleNamespace(
                    type="reasoning", id="rs_001",
                    encrypted_content="enc_first", summary=[], status="completed",
                )
            ],
            usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
            status="completed", model="gpt-5-codex",
        ),
        # Second reasoning-only response (different encrypted content)
        SimpleNamespace(
            output=[
                SimpleNamespace(
                    type="reasoning", id="rs_002",
                    encrypted_content="enc_second", summary=[], status="completed",
                )
            ],
            usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150),
            status="completed", model="gpt-5-codex",
        ),
        _codex_message_response("Final answer after thinking."),
    ]
    monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0))
    result = agent.run_conversation("think very hard")
    assert result["completed"] is True
    assert result["final_response"] == "Final answer after thinking."
    # Both reasoning-only interim messages should be in history (not collapsed)
    interim_msgs = [
        msg for msg in result["messages"]
        if msg.get("role") == "assistant"
        and msg.get("finish_reason") == "incomplete"
    ]
    assert len(interim_msgs) == 2
    encrypted_contents = [
        msg["codex_reasoning_items"][0]["encrypted_content"]
        for msg in interim_msgs
    ]
    assert "enc_first" in encrypted_contents
    assert "enc_second" in encrypted_contents