diff --git a/run_agent.py b/run_agent.py index f7a929118c4..444dc178195 100644 --- a/run_agent.py +++ b/run_agent.py @@ -8010,17 +8010,20 @@ class AIAgent: response = None if not _aux_available and self.api_mode == "codex_responses": - # No auxiliary client -- use the Codex Responses path directly + # No auxiliary client -- use the Codex Responses path directly. + # The Responses API does not accept `temperature` on any + # supported backend (chatgpt.com/backend-api/codex rejects it + # outright; api.openai.com + gpt-5/o-series reasoning models + # and Copilot Responses reject it on reasoning models). The + # transport intentionally never sets it — strip any leftover + # here so the flush fallback matches the main-loop behavior. codex_kwargs = self._build_api_kwargs(api_messages) _ct_flush = self._get_transport() if _ct_flush is not None: codex_kwargs["tools"] = _ct_flush.convert_tools([memory_tool_def]) elif not codex_kwargs.get("tools"): codex_kwargs["tools"] = [memory_tool_def] - if _flush_temperature is not None: - codex_kwargs["temperature"] = _flush_temperature - else: - codex_kwargs.pop("temperature", None) + codex_kwargs.pop("temperature", None) if "max_output_tokens" in codex_kwargs: codex_kwargs["max_output_tokens"] = 5120 response = self._run_codex_stream(codex_kwargs) diff --git a/tests/run_agent/test_flush_memories_codex.py b/tests/run_agent/test_flush_memories_codex.py index 4879580be1a..9863235bf9c 100644 --- a/tests/run_agent/test_flush_memories_codex.py +++ b/tests/run_agent/test_flush_memories_codex.py @@ -327,3 +327,72 @@ class TestFlushMemoriesCodexFallback: mock_stream.assert_called_once() mock_memory.assert_called_once() assert mock_memory.call_args.kwargs["content"] == "Codex flush test" + + @pytest.mark.parametrize( + "provider,base_url", + [ + # chatgpt.com/backend-api/codex — rejects temperature unconditionally + ("openai-codex", "https://chatgpt.com/backend-api/codex"), + # Native OpenAI Responses — rejects temperature on gpt-5/o-series reasoning models + ("openai", "https://api.openai.com/v1"), + # Copilot Responses — rejects temperature on reasoning models + ("copilot", "https://api.githubcopilot.com"), + ], + ) + def test_codex_fallback_never_sends_temperature(self, monkeypatch, provider, base_url): + """Regression for the ``⚠ Auxiliary memory flush failed: HTTP 400: + Unsupported parameter: temperature`` error. + + The codex_responses fallback must strip temperature before calling + _run_codex_stream — the Responses API does not accept it on any + supported backend, matching the transport's behavior.""" + agent = _make_agent(monkeypatch, api_mode="codex_responses", provider=provider) + agent.base_url = base_url + + codex_response = SimpleNamespace( + output=[ + SimpleNamespace( + type="function_call", + call_id="call_1", + name="memory", + arguments=json.dumps({ + "action": "add", + "target": "notes", + "content": "no-temp test", + }), + ), + ], + usage=SimpleNamespace(input_tokens=50, output_tokens=10, total_tokens=60), + status="completed", + model="gpt-5.5", + ) + + with patch("agent.auxiliary_client.call_llm", side_effect=RuntimeError("no provider")), \ + patch.object(agent, "_run_codex_stream", return_value=codex_response) as mock_stream, \ + patch.object(agent, "_build_api_kwargs") as mock_build, \ + patch("tools.memory_tool.memory_tool", return_value="Saved."): + # Simulate a transport that (correctly) never includes temperature, + # but also verify we strip any stray temperature the fallback used + # to inject before the fix. + mock_build.return_value = { + "model": "gpt-5.5", + "instructions": "test", + "input": [], + "tools": [], + "max_output_tokens": 4096, + # Intentionally poison the dict to prove we pop it: + "temperature": 0.3, + } + messages = [ + {"role": "user", "content": "Hello"}, + {"role": "assistant", "content": "Hi"}, + {"role": "user", "content": "Save this"}, + ] + agent.flush_memories(messages) + + mock_stream.assert_called_once() + sent_kwargs = mock_stream.call_args.args[0] + assert "temperature" not in sent_kwargs, ( + f"codex_responses fallback must strip temperature before calling " + f"_run_codex_stream, got: {sent_kwargs.get('temperature')!r}" + )