tests/agent/test_unsupported_parameter_retry.py

"""Regression tests for the generic unsupported-parameter detector in
``agent.auxiliary_client``.

The original temperature-specific detector (PR #15621) was generalized so the
same reactive-retry strategy covers any provider that rejects an arbitrary
request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not
just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization
pattern.

These tests lock in:
  * ``_is_unsupported_parameter_error(exc, param)`` across common phrasings
  * the back-compat wrapper ``_is_unsupported_temperature_error`` still works
  * the max_tokens retry branch no longer pops a key that was never set
    (``max_tokens is None`` gate)
  * the max_tokens retry branch matches via the generic helper on top of the
    legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks
"""

from unittest.mock import patch, MagicMock, AsyncMock

import pytest

from agent.auxiliary_client import (
    call_llm,
    async_call_llm,
    _is_unsupported_parameter_error,
    _is_unsupported_temperature_error,
)


class TestIsUnsupportedParameterError:
    """The generic detector must match real provider phrasings for any param."""

    @pytest.mark.parametrize("param,message", [
        # temperature phrasings (regression coverage via the generic API)
        ("temperature", "HTTP 400: Unsupported parameter: temperature"),
        ("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"),
        ("temperature", "this model does not support temperature"),
        # max_tokens phrasings
        ("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"),
        ("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"),
        ("max_tokens", "Invalid parameter: max_tokens is not supported"),
        # arbitrary future params
        ("seed", "HTTP 400: unrecognized parameter: seed"),
        ("top_p", "Error: top_p is not supported for this model"),
    ])
    def test_matches_real_provider_messages(self, param, message):
        assert _is_unsupported_parameter_error(RuntimeError(message), param) is True

    @pytest.mark.parametrize("param,message", [
        # Param not mentioned at all
        ("temperature", "HTTP 400: max_tokens is too large"),
        # Param mentioned but not flagged as unsupported
        ("temperature", "temperature must be between 0 and 2"),
        # Totally unrelated 400
        ("max_tokens", "Rate limit exceeded"),
        # Connection-level errors
        ("temperature", "Connection reset by peer"),
    ])
    def test_does_not_match_unrelated_errors(self, param, message):
        assert _is_unsupported_parameter_error(RuntimeError(message), param) is False

    def test_empty_param_returns_false(self):
        assert _is_unsupported_parameter_error(
            RuntimeError("HTTP 400: Unsupported parameter: temperature"), ""
        ) is False

    def test_temperature_wrapper_delegates_to_generic(self):
        """Back-compat: ``_is_unsupported_temperature_error`` still routes through."""
        msg = "HTTP 400: Unsupported parameter: temperature"
        assert _is_unsupported_temperature_error(RuntimeError(msg)) is True
        # And the unrelated-case still holds
        assert _is_unsupported_temperature_error(
            RuntimeError("max_tokens is too large")) is False


def _dummy_response():
    """Sentinel — real code calls ``_validate_llm_response`` which we patch out."""
    return {"ok": True}


class TestMaxTokensRetryHardening:
    """The max_tokens retry branch now (a) gates on ``max_tokens is not None``
    and (b) also matches the generic phrasings via the helper.
    """

    def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self):
        """No max_tokens kwarg → must not pop/retry even if the error mentions it.

        Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but
        ``kwargs["max_completion_tokens"] = max_tokens`` would set a None
        value and hit the provider again. The gate skips the whole branch.
        """
        client = MagicMock()
        client.base_url = "https://api.openai.com/v1"
        err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
        client.chat.completions.create.side_effect = err

        with (
            patch("agent.auxiliary_client._resolve_task_provider_model",
                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
            patch("agent.auxiliary_client._get_cached_client",
                  return_value=(client, "gpt-5.5")),
            patch("agent.auxiliary_client._validate_llm_response",
                  side_effect=lambda resp, _task: resp),
        ):
            with pytest.raises(RuntimeError):
                call_llm(
                    task="session_search",
                    messages=[{"role": "user", "content": "hi"}],
                    temperature=0.3,
                    # max_tokens omitted on purpose
                )

        # Only the initial attempt — no retry because the gate blocked it
        assert client.chat.completions.create.call_count == 1

    def test_sync_max_tokens_retry_matches_generic_phrasing(self):
        """A 400 saying "Unknown parameter: max_tokens" (not the legacy
        substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token)
        now triggers the retry via the generic helper.
        """
        client = MagicMock()
        client.base_url = "https://api.openai.com/v1"
        err = RuntimeError("Unknown parameter: max_tokens")
        response = _dummy_response()
        client.chat.completions.create.side_effect = [err, response]

        with (
            patch("agent.auxiliary_client._resolve_task_provider_model",
                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
            patch("agent.auxiliary_client._get_cached_client",
                  return_value=(client, "gpt-5.5")),
            patch("agent.auxiliary_client._validate_llm_response",
                  side_effect=lambda resp, _task: resp),
        ):
            result = call_llm(
                task="session_search",
                messages=[{"role": "user", "content": "hi"}],
                temperature=0.3,
                max_tokens=512,
            )

        assert result is response
        assert client.chat.completions.create.call_count == 2
        second_call = client.chat.completions.create.call_args_list[1]
        assert "max_tokens" not in second_call.kwargs
        assert second_call.kwargs["max_completion_tokens"] == 512

    @pytest.mark.asyncio
    async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self):
        client = MagicMock()
        client.base_url = "https://api.openai.com/v1"
        err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
        client.chat.completions.create = AsyncMock(side_effect=err)

        with (
            patch("agent.auxiliary_client._resolve_task_provider_model",
                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
            patch("agent.auxiliary_client._get_cached_client",
                  return_value=(client, "gpt-5.5")),
            patch("agent.auxiliary_client._validate_llm_response",
                  side_effect=lambda resp, _task: resp),
        ):
            with pytest.raises(RuntimeError):
                await async_call_llm(
                    task="session_search",
                    messages=[{"role": "user", "content": "hi"}],
                    temperature=0.3,
                )

        assert client.chat.completions.create.call_count == 1

    @pytest.mark.asyncio
    async def test_async_max_tokens_retry_matches_generic_phrasing(self):
        client = MagicMock()
        client.base_url = "https://api.openai.com/v1"
        err = RuntimeError("Unknown parameter: max_tokens")
        response = _dummy_response()
        client.chat.completions.create = AsyncMock(side_effect=[err, response])

        with (
            patch("agent.auxiliary_client._resolve_task_provider_model",
                  return_value=("openai-codex", "gpt-5.5", None, None, None)),
            patch("agent.auxiliary_client._get_cached_client",
                  return_value=(client, "gpt-5.5")),
            patch("agent.auxiliary_client._validate_llm_response",
                  side_effect=lambda resp, _task: resp),
        ):
            result = await async_call_llm(
                task="session_search",
                messages=[{"role": "user", "content": "hi"}],
                temperature=0.3,
                max_tokens=512,
            )

        assert result is response
        assert client.chat.completions.create.await_count == 2
        second_call = client.chat.completions.create.call_args_list[1]
        assert "max_tokens" not in second_call.kwargs
        assert second_call.kwargs["max_completion_tokens"] == 512
fix(auxiliary): generalize unsupported-parameter detector and harden max_tokens retry (#15633) Generalize the temperature-specific 400 retry that shipped in PR #15621 so the same reactive strategy covers any provider that rejects an arbitrary request parameter — — not just temperature. - agent/auxiliary_client.py: * New _is_unsupported_parameter_error(exc, param): matches the same six phrasings the old temperature detector did plus 'unrecognized parameter' and 'invalid parameter', against any named param. * _is_unsupported_temperature_error is now a thin back-compat wrapper so existing imports and tests keep working. * The max_tokens → max_completion_tokens retry branch in call_llm and async_call_llm now (a) gates on 'max_tokens is not None' so we do not pop a key that was never set and silently substitute a None value on the retry, and (b) also matches the generic helper in addition to the legacy 'max_tokens' / 'unsupported_parameter' substring checks — picking up phrasings like 'Unknown parameter: max_tokens' that previously slipped through. - tests/agent/test_unsupported_parameter_retry.py: 18 new tests covering the generic detector across params, the back-compat wrapper, and the two hardenings to the max_tokens retry branch (None gate + generic phrasing). Credit: retry-generalization pattern from @nicholasrae's PR #15416. That PR also proposed the reactive temperature retry which landed independently via PR #15621 + #15623 (co-authored with @BlueBirdBack). This commit salvages the remaining hardening ideas onto current main. 2026-04-25 05:50:34 -07:00			`"""Regression tests for the generic unsupported-parameter detector in`
			``agent.auxiliary_client``.

			`The original temperature-specific detector (PR #15621) was generalized so the`
			`same reactive-retry strategy covers any provider that rejects an arbitrary`
			request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not
			just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization
			`pattern.`

			`These tests lock in:`
			* ``_is_unsupported_parameter_error(exc, param)`` across common phrasings
			* the back-compat wrapper ``_is_unsupported_temperature_error`` still works
			`* the max_tokens retry branch no longer pops a key that was never set`
			(``max_tokens is None`` gate)
			`* the max_tokens retry branch matches via the generic helper on top of the`
			legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks
			`"""`

			`from unittest.mock import patch, MagicMock, AsyncMock`

			`import pytest`

			`from agent.auxiliary_client import (`
			`call_llm,`
			`async_call_llm,`
			`_is_unsupported_parameter_error,`
			`_is_unsupported_temperature_error,`
			`)`


			`class TestIsUnsupportedParameterError:`
			`"""The generic detector must match real provider phrasings for any param."""`

			`@pytest.mark.parametrize("param,message", [`
			`# temperature phrasings (regression coverage via the generic API)`
			`("temperature", "HTTP 400: Unsupported parameter: temperature"),`
			`("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"),`
			`("temperature", "this model does not support temperature"),`
			`# max_tokens phrasings`
			`("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"),`
			`("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"),`
			`("max_tokens", "Invalid parameter: max_tokens is not supported"),`
			`# arbitrary future params`
			`("seed", "HTTP 400: unrecognized parameter: seed"),`
			`("top_p", "Error: top_p is not supported for this model"),`
			`])`
			`def test_matches_real_provider_messages(self, param, message):`
			`assert _is_unsupported_parameter_error(RuntimeError(message), param) is True`

			`@pytest.mark.parametrize("param,message", [`
			`# Param not mentioned at all`
			`("temperature", "HTTP 400: max_tokens is too large"),`
			`# Param mentioned but not flagged as unsupported`
			`("temperature", "temperature must be between 0 and 2"),`
			`# Totally unrelated 400`
			`("max_tokens", "Rate limit exceeded"),`
			`# Connection-level errors`
			`("temperature", "Connection reset by peer"),`
			`])`
			`def test_does_not_match_unrelated_errors(self, param, message):`
			`assert _is_unsupported_parameter_error(RuntimeError(message), param) is False`

			`def test_empty_param_returns_false(self):`
			`assert _is_unsupported_parameter_error(`
			`RuntimeError("HTTP 400: Unsupported parameter: temperature"), ""`
			`) is False`

			`def test_temperature_wrapper_delegates_to_generic(self):`
			"""Back-compat: ``_is_unsupported_temperature_error`` still routes through."""
			`msg = "HTTP 400: Unsupported parameter: temperature"`
			`assert _is_unsupported_temperature_error(RuntimeError(msg)) is True`
			`# And the unrelated-case still holds`
			`assert _is_unsupported_temperature_error(`
			`RuntimeError("max_tokens is too large")) is False`


			`def _dummy_response():`
			"""Sentinel — real code calls ``_validate_llm_response`` which we patch out."""
			`return {"ok": True}`


			`class TestMaxTokensRetryHardening:`
			"""The max_tokens retry branch now (a) gates on ``max_tokens is not None``
			`and (b) also matches the generic phrasings via the helper.`
			`"""`

			`def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self):`
			`"""No max_tokens kwarg → must not pop/retry even if the error mentions it.`

			Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but
			``kwargs["max_completion_tokens"] = max_tokens`` would set a None
			`value and hit the provider again. The gate skips the whole branch.`
			`"""`
			`client = MagicMock()`
			`client.base_url = "https://api.openai.com/v1"`
			`err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")`
			`client.chat.completions.create.side_effect = err`

			`with (`
			`patch("agent.auxiliary_client._resolve_task_provider_model",`
			`return_value=("openai-codex", "gpt-5.5", None, None, None)),`
			`patch("agent.auxiliary_client._get_cached_client",`
			`return_value=(client, "gpt-5.5")),`
			`patch("agent.auxiliary_client._validate_llm_response",`
			`side_effect=lambda resp, _task: resp),`
			`):`
			`with pytest.raises(RuntimeError):`
			`call_llm(`
			`task="session_search",`
			`messages=[{"role": "user", "content": "hi"}],`
			`temperature=0.3,`
			`# max_tokens omitted on purpose`
			`)`

			`# Only the initial attempt — no retry because the gate blocked it`
			`assert client.chat.completions.create.call_count == 1`

			`def test_sync_max_tokens_retry_matches_generic_phrasing(self):`
			`"""A 400 saying "Unknown parameter: max_tokens" (not the legacy`
			substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token)
			`now triggers the retry via the generic helper.`
			`"""`
			`client = MagicMock()`
			`client.base_url = "https://api.openai.com/v1"`
			`err = RuntimeError("Unknown parameter: max_tokens")`
			`response = _dummy_response()`
			`client.chat.completions.create.side_effect = [err, response]`

			`with (`
			`patch("agent.auxiliary_client._resolve_task_provider_model",`
			`return_value=("openai-codex", "gpt-5.5", None, None, None)),`
			`patch("agent.auxiliary_client._get_cached_client",`
			`return_value=(client, "gpt-5.5")),`
			`patch("agent.auxiliary_client._validate_llm_response",`
			`side_effect=lambda resp, _task: resp),`
			`):`
			`result = call_llm(`
			`task="session_search",`
			`messages=[{"role": "user", "content": "hi"}],`
			`temperature=0.3,`
			`max_tokens=512,`
			`)`

			`assert result is response`
			`assert client.chat.completions.create.call_count == 2`
			`second_call = client.chat.completions.create.call_args_list[1]`
			`assert "max_tokens" not in second_call.kwargs`
			`assert second_call.kwargs["max_completion_tokens"] == 512`

			`@pytest.mark.asyncio`
			`async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self):`
			`client = MagicMock()`
			`client.base_url = "https://api.openai.com/v1"`
			`err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")`
			`client.chat.completions.create = AsyncMock(side_effect=err)`

			`with (`
			`patch("agent.auxiliary_client._resolve_task_provider_model",`
			`return_value=("openai-codex", "gpt-5.5", None, None, None)),`
			`patch("agent.auxiliary_client._get_cached_client",`
			`return_value=(client, "gpt-5.5")),`
			`patch("agent.auxiliary_client._validate_llm_response",`
			`side_effect=lambda resp, _task: resp),`
			`):`
			`with pytest.raises(RuntimeError):`
			`await async_call_llm(`
			`task="session_search",`
			`messages=[{"role": "user", "content": "hi"}],`
			`temperature=0.3,`
			`)`

			`assert client.chat.completions.create.call_count == 1`

			`@pytest.mark.asyncio`
			`async def test_async_max_tokens_retry_matches_generic_phrasing(self):`
			`client = MagicMock()`
			`client.base_url = "https://api.openai.com/v1"`
			`err = RuntimeError("Unknown parameter: max_tokens")`
			`response = _dummy_response()`
			`client.chat.completions.create = AsyncMock(side_effect=[err, response])`

			`with (`
			`patch("agent.auxiliary_client._resolve_task_provider_model",`
			`return_value=("openai-codex", "gpt-5.5", None, None, None)),`
			`patch("agent.auxiliary_client._get_cached_client",`
			`return_value=(client, "gpt-5.5")),`
			`patch("agent.auxiliary_client._validate_llm_response",`
			`side_effect=lambda resp, _task: resp),`
			`):`
			`result = await async_call_llm(`
			`task="session_search",`
			`messages=[{"role": "user", "content": "hi"}],`
			`temperature=0.3,`
			`max_tokens=512,`
			`)`

			`assert result is response`
			`assert client.chat.completions.create.await_count == 2`
			`second_call = client.chat.completions.create.call_args_list[1]`
			`assert "max_tokens" not in second_call.kwargs`
			`assert second_call.kwargs["max_completion_tokens"] == 512`