mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(agent): support Azure OpenAI gpt-5.x on chat/completions endpoint
Azure OpenAI exposes an OpenAI-compatible endpoint at
`{resource}.openai.azure.com/openai/v1` that accepts the standard
`openai` Python client. Two issues prevented gpt-5.x models from working:
1. `_max_tokens_param()` only sent `max_completion_tokens` for
`api.openai.com` URLs. Azure also requires `max_completion_tokens`
for gpt-5.x models.
2. The `codex_responses` upgrade gate unconditionally upgraded gpt-5.x
to Responses API. Azure does NOT support the Responses API — it serves
gpt-5.x on the regular `/chat/completions` path, causing a 404.
Fix: add `_is_azure_openai_url()` that matches `openai.azure.com` URLs.
- `_max_tokens_param()` now returns `max_completion_tokens` for Azure.
- The `codex_responses` upgrade gate skips Azure so gpt-5.x stays on
`chat_completions` where Azure actually serves it.
- The fallback-provider api_mode picker also recognises Azure and stays
on chat_completions.
- Tests cover max_tokens routing, api_mode behaviour, and URL detection.
gpt-4.x models on Azure are unaffected (already used chat_completions +
max_tokens, which Azure accepts for those models).
Salvage of PR #10086 — rewritten against current main where the
codex_responses upgrade gate gained copilot-acp / explicit-api_mode
exclusions.
This commit is contained in:
@@ -3386,6 +3386,61 @@ class TestMaxTokensParam:
|
||||
result = agent._max_tokens_param(4096)
|
||||
assert result == {"max_tokens": 4096}
|
||||
|
||||
def test_returns_max_completion_tokens_for_azure(self, agent):
|
||||
"""Azure OpenAI requires max_completion_tokens for gpt-5.x models."""
|
||||
agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
|
||||
result = agent._max_tokens_param(4096)
|
||||
assert result == {"max_completion_tokens": 4096}
|
||||
|
||||
|
||||
class TestAzureOpenAIRouting:
|
||||
"""Verify Azure OpenAI endpoints stay on chat_completions for gpt-5.x."""
|
||||
|
||||
def test_azure_gpt5_stays_on_chat_completions(self, agent):
|
||||
"""Azure serves gpt-5.x on /chat/completions — must not upgrade to codex_responses."""
|
||||
agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
|
||||
agent.api_mode = "chat_completions"
|
||||
agent.model = "gpt-5.4-mini"
|
||||
# Mirror the routing logic from __init__
|
||||
if (
|
||||
agent.api_mode == "chat_completions"
|
||||
and not agent._is_azure_openai_url()
|
||||
and (
|
||||
agent._is_direct_openai_url()
|
||||
or agent._provider_model_requires_responses_api(
|
||||
agent.model, provider=agent.provider,
|
||||
)
|
||||
)
|
||||
):
|
||||
agent.api_mode = "codex_responses"
|
||||
assert agent.api_mode == "chat_completions"
|
||||
|
||||
def test_non_azure_gpt5_upgrades_to_codex_responses(self, agent):
|
||||
"""On api.openai.com, gpt-5.x must still upgrade to codex_responses."""
|
||||
agent.base_url = "https://api.openai.com/v1"
|
||||
agent.api_mode = "chat_completions"
|
||||
agent.model = "gpt-5.4-mini"
|
||||
if (
|
||||
agent.api_mode == "chat_completions"
|
||||
and not agent._is_azure_openai_url()
|
||||
and (
|
||||
agent._is_direct_openai_url()
|
||||
or agent._provider_model_requires_responses_api(
|
||||
agent.model, provider=agent.provider,
|
||||
)
|
||||
)
|
||||
):
|
||||
agent.api_mode = "codex_responses"
|
||||
assert agent.api_mode == "codex_responses"
|
||||
|
||||
def test_is_azure_openai_url_detection(self, agent):
|
||||
assert agent._is_azure_openai_url("https://foo.openai.azure.com/openai/v1") is True
|
||||
assert agent._is_azure_openai_url("https://api.openai.com/v1") is False
|
||||
assert agent._is_azure_openai_url("https://openrouter.ai/api/v1") is False
|
||||
# Path-embedded azure string should still detect — we're ~substring matching
|
||||
agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
|
||||
assert agent._is_azure_openai_url() is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# System prompt stability for prompt caching
|
||||
|
||||
Reference in New Issue
Block a user