mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
202 lines
8.8 KiB
Python
202 lines
8.8 KiB
Python
|
|
"""Regression tests for the generic unsupported-parameter detector in
|
||
|
|
``agent.auxiliary_client``.
|
||
|
|
|
||
|
|
The original temperature-specific detector (PR #15621) was generalized so the
|
||
|
|
same reactive-retry strategy covers any provider that rejects an arbitrary
|
||
|
|
request parameter — ``max_tokens``, ``seed``, ``top_p``, future quirks — not
|
||
|
|
just ``temperature``. Credit @nicholasrae (PR #15416) for the generalization
|
||
|
|
pattern.
|
||
|
|
|
||
|
|
These tests lock in:
|
||
|
|
* ``_is_unsupported_parameter_error(exc, param)`` across common phrasings
|
||
|
|
* the back-compat wrapper ``_is_unsupported_temperature_error`` still works
|
||
|
|
* the max_tokens retry branch no longer pops a key that was never set
|
||
|
|
(``max_tokens is None`` gate)
|
||
|
|
* the max_tokens retry branch matches via the generic helper on top of the
|
||
|
|
legacy ``"max_tokens"`` / ``"unsupported_parameter"`` substring checks
|
||
|
|
"""
|
||
|
|
|
||
|
|
from unittest.mock import patch, MagicMock, AsyncMock
|
||
|
|
|
||
|
|
import pytest
|
||
|
|
|
||
|
|
from agent.auxiliary_client import (
|
||
|
|
call_llm,
|
||
|
|
async_call_llm,
|
||
|
|
_is_unsupported_parameter_error,
|
||
|
|
_is_unsupported_temperature_error,
|
||
|
|
)
|
||
|
|
|
||
|
|
|
||
|
|
class TestIsUnsupportedParameterError:
|
||
|
|
"""The generic detector must match real provider phrasings for any param."""
|
||
|
|
|
||
|
|
@pytest.mark.parametrize("param,message", [
|
||
|
|
# temperature phrasings (regression coverage via the generic API)
|
||
|
|
("temperature", "HTTP 400: Unsupported parameter: temperature"),
|
||
|
|
("temperature", "Error code: 400 - {'error': {'code': 'unsupported_parameter', 'param': 'temperature'}}"),
|
||
|
|
("temperature", "this model does not support temperature"),
|
||
|
|
# max_tokens phrasings
|
||
|
|
("max_tokens", "HTTP 400: Unsupported parameter: max_tokens"),
|
||
|
|
("max_tokens", "Unknown parameter: max_tokens — use max_completion_tokens"),
|
||
|
|
("max_tokens", "Invalid parameter: max_tokens is not supported"),
|
||
|
|
# arbitrary future params
|
||
|
|
("seed", "HTTP 400: unrecognized parameter: seed"),
|
||
|
|
("top_p", "Error: top_p is not supported for this model"),
|
||
|
|
])
|
||
|
|
def test_matches_real_provider_messages(self, param, message):
|
||
|
|
assert _is_unsupported_parameter_error(RuntimeError(message), param) is True
|
||
|
|
|
||
|
|
@pytest.mark.parametrize("param,message", [
|
||
|
|
# Param not mentioned at all
|
||
|
|
("temperature", "HTTP 400: max_tokens is too large"),
|
||
|
|
# Param mentioned but not flagged as unsupported
|
||
|
|
("temperature", "temperature must be between 0 and 2"),
|
||
|
|
# Totally unrelated 400
|
||
|
|
("max_tokens", "Rate limit exceeded"),
|
||
|
|
# Connection-level errors
|
||
|
|
("temperature", "Connection reset by peer"),
|
||
|
|
])
|
||
|
|
def test_does_not_match_unrelated_errors(self, param, message):
|
||
|
|
assert _is_unsupported_parameter_error(RuntimeError(message), param) is False
|
||
|
|
|
||
|
|
def test_empty_param_returns_false(self):
|
||
|
|
assert _is_unsupported_parameter_error(
|
||
|
|
RuntimeError("HTTP 400: Unsupported parameter: temperature"), ""
|
||
|
|
) is False
|
||
|
|
|
||
|
|
def test_temperature_wrapper_delegates_to_generic(self):
|
||
|
|
"""Back-compat: ``_is_unsupported_temperature_error`` still routes through."""
|
||
|
|
msg = "HTTP 400: Unsupported parameter: temperature"
|
||
|
|
assert _is_unsupported_temperature_error(RuntimeError(msg)) is True
|
||
|
|
# And the unrelated-case still holds
|
||
|
|
assert _is_unsupported_temperature_error(
|
||
|
|
RuntimeError("max_tokens is too large")) is False
|
||
|
|
|
||
|
|
|
||
|
|
def _dummy_response():
|
||
|
|
"""Sentinel — real code calls ``_validate_llm_response`` which we patch out."""
|
||
|
|
return {"ok": True}
|
||
|
|
|
||
|
|
|
||
|
|
class TestMaxTokensRetryHardening:
|
||
|
|
"""The max_tokens retry branch now (a) gates on ``max_tokens is not None``
|
||
|
|
and (b) also matches the generic phrasings via the helper.
|
||
|
|
"""
|
||
|
|
|
||
|
|
def test_sync_max_tokens_retry_skipped_when_max_tokens_is_none(self):
|
||
|
|
"""No max_tokens kwarg → must not pop/retry even if the error mentions it.
|
||
|
|
|
||
|
|
Before the hardening, ``kwargs.pop("max_tokens", None)`` was safe but
|
||
|
|
``kwargs["max_completion_tokens"] = max_tokens`` would set a None
|
||
|
|
value and hit the provider again. The gate skips the whole branch.
|
||
|
|
"""
|
||
|
|
client = MagicMock()
|
||
|
|
client.base_url = "https://api.openai.com/v1"
|
||
|
|
err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
|
||
|
|
client.chat.completions.create.side_effect = err
|
||
|
|
|
||
|
|
with (
|
||
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||
|
|
patch("agent.auxiliary_client._get_cached_client",
|
||
|
|
return_value=(client, "gpt-5.5")),
|
||
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
||
|
|
side_effect=lambda resp, _task: resp),
|
||
|
|
):
|
||
|
|
with pytest.raises(RuntimeError):
|
||
|
|
call_llm(
|
||
|
|
task="session_search",
|
||
|
|
messages=[{"role": "user", "content": "hi"}],
|
||
|
|
temperature=0.3,
|
||
|
|
# max_tokens omitted on purpose
|
||
|
|
)
|
||
|
|
|
||
|
|
# Only the initial attempt — no retry because the gate blocked it
|
||
|
|
assert client.chat.completions.create.call_count == 1
|
||
|
|
|
||
|
|
def test_sync_max_tokens_retry_matches_generic_phrasing(self):
|
||
|
|
"""A 400 saying "Unknown parameter: max_tokens" (not the legacy
|
||
|
|
substring ``"max_tokens"`` bare + no ``unsupported_parameter`` token)
|
||
|
|
now triggers the retry via the generic helper.
|
||
|
|
"""
|
||
|
|
client = MagicMock()
|
||
|
|
client.base_url = "https://api.openai.com/v1"
|
||
|
|
err = RuntimeError("Unknown parameter: max_tokens")
|
||
|
|
response = _dummy_response()
|
||
|
|
client.chat.completions.create.side_effect = [err, response]
|
||
|
|
|
||
|
|
with (
|
||
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||
|
|
patch("agent.auxiliary_client._get_cached_client",
|
||
|
|
return_value=(client, "gpt-5.5")),
|
||
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
||
|
|
side_effect=lambda resp, _task: resp),
|
||
|
|
):
|
||
|
|
result = call_llm(
|
||
|
|
task="session_search",
|
||
|
|
messages=[{"role": "user", "content": "hi"}],
|
||
|
|
temperature=0.3,
|
||
|
|
max_tokens=512,
|
||
|
|
)
|
||
|
|
|
||
|
|
assert result is response
|
||
|
|
assert client.chat.completions.create.call_count == 2
|
||
|
|
second_call = client.chat.completions.create.call_args_list[1]
|
||
|
|
assert "max_tokens" not in second_call.kwargs
|
||
|
|
assert second_call.kwargs["max_completion_tokens"] == 512
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_async_max_tokens_retry_skipped_when_max_tokens_is_none(self):
|
||
|
|
client = MagicMock()
|
||
|
|
client.base_url = "https://api.openai.com/v1"
|
||
|
|
err = RuntimeError("HTTP 400: Unsupported parameter: max_tokens")
|
||
|
|
client.chat.completions.create = AsyncMock(side_effect=err)
|
||
|
|
|
||
|
|
with (
|
||
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||
|
|
patch("agent.auxiliary_client._get_cached_client",
|
||
|
|
return_value=(client, "gpt-5.5")),
|
||
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
||
|
|
side_effect=lambda resp, _task: resp),
|
||
|
|
):
|
||
|
|
with pytest.raises(RuntimeError):
|
||
|
|
await async_call_llm(
|
||
|
|
task="session_search",
|
||
|
|
messages=[{"role": "user", "content": "hi"}],
|
||
|
|
temperature=0.3,
|
||
|
|
)
|
||
|
|
|
||
|
|
assert client.chat.completions.create.call_count == 1
|
||
|
|
|
||
|
|
@pytest.mark.asyncio
|
||
|
|
async def test_async_max_tokens_retry_matches_generic_phrasing(self):
|
||
|
|
client = MagicMock()
|
||
|
|
client.base_url = "https://api.openai.com/v1"
|
||
|
|
err = RuntimeError("Unknown parameter: max_tokens")
|
||
|
|
response = _dummy_response()
|
||
|
|
client.chat.completions.create = AsyncMock(side_effect=[err, response])
|
||
|
|
|
||
|
|
with (
|
||
|
|
patch("agent.auxiliary_client._resolve_task_provider_model",
|
||
|
|
return_value=("openai-codex", "gpt-5.5", None, None, None)),
|
||
|
|
patch("agent.auxiliary_client._get_cached_client",
|
||
|
|
return_value=(client, "gpt-5.5")),
|
||
|
|
patch("agent.auxiliary_client._validate_llm_response",
|
||
|
|
side_effect=lambda resp, _task: resp),
|
||
|
|
):
|
||
|
|
result = await async_call_llm(
|
||
|
|
task="session_search",
|
||
|
|
messages=[{"role": "user", "content": "hi"}],
|
||
|
|
temperature=0.3,
|
||
|
|
max_tokens=512,
|
||
|
|
)
|
||
|
|
|
||
|
|
assert result is response
|
||
|
|
assert client.chat.completions.create.await_count == 2
|
||
|
|
second_call = client.chat.completions.create.call_args_list[1]
|
||
|
|
assert "max_tokens" not in second_call.kwargs
|
||
|
|
assert second_call.kwargs["max_completion_tokens"] == 512
|