"""Regression tests for the generic unsupported-parameter detector in ``agent.auxiliary_client``. The original temperature-specific detector (PR #15621) was generalized so the same reactive-retry strategy covers any provider that rejects an arbitrary request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization pattern. These tests lock in: * ``_is_unsupported_parameter_error(exc, param)`` across common phrasings * the back-compat wrapper ``_is_unsupported_temperature_error`` still works * the max_tokens retry branch no longer pops a key that was never set (``max_tokens is None`` gate) * the max_tokens retry branch matches via the generic helper on top of the legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks """ from unittest.mock import patch, MagicMock, AsyncMock import pytest from agent.auxiliary_client import ( call_llm, async_call_llm, _is_unsupported_parameter_error, _is_unsupported_temperature_error, ) class TestIsUnsupportedParameterError: """The generic detector must match real provider phrasings for any param.""" @pytest.mark.parametrize("param,message", [ # temperature phrasings (regression coverage via the generic API) ("temperature", "HTTP 400: Unsupported parameter: temperature"), ("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"), ("temperature", "this model does not support temperature"), # max_tokens phrasings ("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"), ("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"), ("max_tokens", "Invalid parameter: max_tokens is not supported"), # arbitrary future params ("seed", "HTTP 400: unrecognized parameter: seed"), ("top_p", "Error: top_p is not supported for this model"), ]) def test_matches_real_provider_messages(self, param, message): assert _is_unsupported_parameter_error(RuntimeError(message), param) is True @pytest.mark.parametrize("param,message", [ # Param not mentioned at all ("temperature", "HTTP 400: max_tokens is too large"), # Param mentioned but not flagged as unsupported ("temperature", "temperature must be between 0 and 2"), # Totally unrelated 400 ("max_tokens", "Rate limit exceeded"), # Connection-level errors ("temperature", "Connection reset by peer"), ]) def test_does_not_match_unrelated_errors(self, param, message): assert _is_unsupported_parameter_error(RuntimeError(message), param) is False def test_empty_param_returns_false(self): assert _is_unsupported_parameter_error( RuntimeError("HTTP 400: Unsupported parameter: temperature"), "" ) is False def test_temperature_wrapper_delegates_to_generic(self): """Back-compat: ``_is_unsupported_temperature_error`` still routes through.""" msg = "HTTP 400: Unsupported parameter: temperature" assert _is_unsupported_temperature_error(RuntimeError(msg)) is True # And the unrelated-case still holds assert _is_unsupported_temperature_error( RuntimeError("max_tokens is too large")) is False def _dummy_response(): """Sentinel — real code calls ``_validate_llm_response`` which we patch out.""" return {"ok": True} class TestMaxTokensRetryHardening: """The max_tokens retry branch now (a) gates on ``max_tokens is not None`` and (b) also matches the generic phrasings via the helper. """ def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self): """No max_tokens kwarg → must not pop/retry even if the error mentions it. Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but ``kwargs["max_completion_tokens"] = max_tokens`` would set a None value and hit the provider again. The gate skips the whole branch. """ client = MagicMock() client.base_url = "https://api.openai.com/v1" err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens") client.chat.completions.create.side_effect = err with ( patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.5", None, None, None)), patch("agent.auxiliary_client._get_cached_client", return_value=(client, "gpt-5.5")), patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), ): with pytest.raises(RuntimeError): call_llm( task="session_search", messages=[{"role": "user", "content": "hi"}], temperature=0.3, # max_tokens omitted on purpose ) # Only the initial attempt — no retry because the gate blocked it assert client.chat.completions.create.call_count == 1 def test_sync_max_tokens_retry_matches_generic_phrasing(self): """A 400 saying "Unknown parameter: max_tokens" (not the legacy substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token) now triggers the retry via the generic helper. """ client = MagicMock() client.base_url = "https://api.openai.com/v1" err = RuntimeError("Unknown parameter: max_tokens") response = _dummy_response() client.chat.completions.create.side_effect = [err, response] with ( patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.5", None, None, None)), patch("agent.auxiliary_client._get_cached_client", return_value=(client, "gpt-5.5")), patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), ): result = call_llm( task="session_search", messages=[{"role": "user", "content": "hi"}], temperature=0.3, max_tokens=512, ) assert result is response assert client.chat.completions.create.call_count == 2 second_call = client.chat.completions.create.call_args_list[1] assert "max_tokens" not in second_call.kwargs assert second_call.kwargs["max_completion_tokens"] == 512 @pytest.mark.asyncio async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self): client = MagicMock() client.base_url = "https://api.openai.com/v1" err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens") client.chat.completions.create = AsyncMock(side_effect=err) with ( patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.5", None, None, None)), patch("agent.auxiliary_client._get_cached_client", return_value=(client, "gpt-5.5")), patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), ): with pytest.raises(RuntimeError): await async_call_llm( task="session_search", messages=[{"role": "user", "content": "hi"}], temperature=0.3, ) assert client.chat.completions.create.call_count == 1 @pytest.mark.asyncio async def test_async_max_tokens_retry_matches_generic_phrasing(self): client = MagicMock() client.base_url = "https://api.openai.com/v1" err = RuntimeError("Unknown parameter: max_tokens") response = _dummy_response() client.chat.completions.create = AsyncMock(side_effect=[err, response]) with ( patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.5", None, None, None)), patch("agent.auxiliary_client._get_cached_client", return_value=(client, "gpt-5.5")), patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp), ): result = await async_call_llm( task="session_search", messages=[{"role": "user", "content": "hi"}], temperature=0.3, max_tokens=512, ) assert result is response assert client.chat.completions.create.await_count == 2 second_call = client.chat.completions.create.call_args_list[1] assert "max_tokens" not in second_call.kwargs assert second_call.kwargs["max_completion_tokens"] == 512