diff --git a/run_agent.py b/run_agent.py index 76d4ffcf45..1b74dd8eab 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2804,6 +2804,14 @@ class AIAgent: finish_reason = "tool_calls" elif has_incomplete_items or (saw_commentary_phase and not saw_final_answer_phase): finish_reason = "incomplete" + elif reasoning_items_raw and not final_text: + # Response contains only reasoning (encrypted thinking state) with + # no visible content or tool calls. The model is still thinking and + # needs another turn to produce the actual answer. Marking this as + # "stop" would send it into the empty-content retry loop which burns + # 3 retries then fails — treat it as incomplete instead so the Codex + # continuation path handles it correctly. + finish_reason = "incomplete" else: finish_reason = "stop" return assistant_message, finish_reason @@ -6214,8 +6222,9 @@ class AIAgent: interim_msg = self._build_assistant_message(assistant_message, finish_reason) interim_has_content = bool((interim_msg.get("content") or "").strip()) interim_has_reasoning = bool(interim_msg.get("reasoning", "").strip()) if isinstance(interim_msg.get("reasoning"), str) else False + interim_has_codex_reasoning = bool(interim_msg.get("codex_reasoning_items")) - if interim_has_content or interim_has_reasoning: + if interim_has_content or interim_has_reasoning or interim_has_codex_reasoning: last_msg = messages[-1] if messages else None duplicate_interim = ( isinstance(last_msg, dict) diff --git a/tests/test_run_agent_codex_responses.py b/tests/test_run_agent_codex_responses.py index 42e41ec7ba..81f0548a2b 100644 --- a/tests/test_run_agent_codex_responses.py +++ b/tests/test_run_agent_codex_responses.py @@ -830,3 +830,132 @@ def test_dump_api_request_debug_uses_chat_completions_url(monkeypatch, tmp_path) payload = json.loads(dump_file.read_text()) assert payload["request"]["url"] == "http://127.0.0.1:9208/v1/chat/completions" + + +# --- Reasoning-only response tests (fix for empty content retry loop) --- + + +def _codex_reasoning_only_response(*, encrypted_content="enc_abc123", summary_text="Thinking..."): + """Codex response containing only reasoning items — no message text, no tool calls.""" + return SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", + id="rs_001", + encrypted_content=encrypted_content, + summary=[SimpleNamespace(type="summary_text", text=summary_text)], + status="completed", + ) + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", + model="gpt-5-codex", + ) + + +def test_normalize_codex_response_marks_reasoning_only_as_incomplete(monkeypatch): + """A response with only reasoning items and no content should be 'incomplete', not 'stop'. + + Without this fix, reasoning-only responses get finish_reason='stop' which + sends them into the empty-content retry loop (3 retries then failure). + """ + agent = _build_agent(monkeypatch) + assistant_message, finish_reason = agent._normalize_codex_response( + _codex_reasoning_only_response() + ) + + assert finish_reason == "incomplete" + assert assistant_message.content == "" + assert assistant_message.codex_reasoning_items is not None + assert len(assistant_message.codex_reasoning_items) == 1 + assert assistant_message.codex_reasoning_items[0]["encrypted_content"] == "enc_abc123" + + +def test_normalize_codex_response_reasoning_with_content_is_stop(monkeypatch): + """If a response has both reasoning and message content, it should still be 'stop'.""" + agent = _build_agent(monkeypatch) + response = SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", + id="rs_001", + encrypted_content="enc_xyz", + summary=[SimpleNamespace(type="summary_text", text="Thinking...")], + status="completed", + ), + SimpleNamespace( + type="message", + content=[SimpleNamespace(type="output_text", text="Here is the answer.")], + status="completed", + ), + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", + model="gpt-5-codex", + ) + assistant_message, finish_reason = agent._normalize_codex_response(response) + + assert finish_reason == "stop" + assert "Here is the answer" in assistant_message.content + + +def test_run_conversation_codex_continues_after_reasoning_only_response(monkeypatch): + """End-to-end: reasoning-only → final message should succeed, not hit retry loop.""" + agent = _build_agent(monkeypatch) + responses = [ + _codex_reasoning_only_response(), + _codex_message_response("The final answer is 42."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + result = agent.run_conversation("what is the answer?") + + assert result["completed"] is True + assert result["final_response"] == "The final answer is 42." + # The reasoning-only turn should be in messages as an incomplete interim + assert any( + msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + and msg.get("codex_reasoning_items") is not None + for msg in result["messages"] + ) + + +def test_run_conversation_codex_preserves_encrypted_reasoning_in_interim(monkeypatch): + """Encrypted codex_reasoning_items must be preserved in interim messages + even when there is no visible reasoning text or content.""" + agent = _build_agent(monkeypatch) + # Response with encrypted reasoning but no human-readable summary + reasoning_response = SimpleNamespace( + output=[ + SimpleNamespace( + type="reasoning", + id="rs_002", + encrypted_content="enc_opaque_blob", + summary=[], + status="completed", + ) + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=100, total_tokens=150), + status="completed", + model="gpt-5-codex", + ) + responses = [ + reasoning_response, + _codex_message_response("Done thinking."), + ] + monkeypatch.setattr(agent, "_interruptible_api_call", lambda api_kwargs: responses.pop(0)) + + result = agent.run_conversation("think hard") + + assert result["completed"] is True + assert result["final_response"] == "Done thinking." + # The interim message must have codex_reasoning_items preserved + interim_msgs = [ + msg for msg in result["messages"] + if msg.get("role") == "assistant" + and msg.get("finish_reason") == "incomplete" + ] + assert len(interim_msgs) >= 1 + assert interim_msgs[0].get("codex_reasoning_items") is not None + assert interim_msgs[0]["codex_reasoning_items"][0]["encrypted_content"] == "enc_opaque_blob"