From ac571142841cd3337a59f0b5e5881c4a40f3bf2e Mon Sep 17 00:00:00 2001 From: akhater Date: Sat, 25 Apr 2026 18:31:27 -0700 Subject: [PATCH] fix(agent): support Azure OpenAI gpt-5.x on chat/completions endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Azure OpenAI exposes an OpenAI-compatible endpoint at `{resource}.openai.azure.com/openai/v1` that accepts the standard `openai` Python client. Two issues prevented gpt-5.x models from working: 1. `_max_tokens_param()` only sent `max_completion_tokens` for `api.openai.com` URLs. Azure also requires `max_completion_tokens` for gpt-5.x models. 2. The `codex_responses` upgrade gate unconditionally upgraded gpt-5.x to Responses API. Azure does NOT support the Responses API — it serves gpt-5.x on the regular `/chat/completions` path, causing a 404. Fix: add `_is_azure_openai_url()` that matches `openai.azure.com` URLs. - `_max_tokens_param()` now returns `max_completion_tokens` for Azure. - The `codex_responses` upgrade gate skips Azure so gpt-5.x stays on `chat_completions` where Azure actually serves it. - The fallback-provider api_mode picker also recognises Azure and stays on chat_completions. - Tests cover max_tokens routing, api_mode behaviour, and URL detection. gpt-4.x models on Azure are unaffected (already used chat_completions + max_tokens, which Azure accepts for those models). Salvage of PR #10086 — rewritten against current main where the codex_responses upgrade gate gained copilot-acp / explicit-api_mode exclusions. --- run_agent.py | 33 +++++++++++++++++-- tests/run_agent/test_run_agent.py | 55 +++++++++++++++++++++++++++++++ 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/run_agent.py b/run_agent.py index b874ab6acc..1f2a062127 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1034,12 +1034,16 @@ class AIAgent: # surface. # When api_mode was explicitly provided, respect it — the user # knows what their endpoint supports (#10473). + # Exception: Azure OpenAI serves gpt-5.x on /chat/completions and + # does NOT support the Responses API — skip the upgrade for Azure + # (openai.azure.com), even though it looks OpenAI-compatible. if ( api_mode is None and self.api_mode == "chat_completions" and self.provider != "copilot-acp" and not str(self.base_url or "").lower().startswith("acp://copilot") and not str(self.base_url or "").lower().startswith("acp+tcp://") + and not self._is_azure_openai_url() and ( self._is_direct_openai_url() or self._provider_model_requires_responses_api( @@ -2553,6 +2557,22 @@ class AIAgent: ) return hostname == "api.openai.com" + def _is_azure_openai_url(self, base_url: str = None) -> bool: + """Return True when a base URL targets Azure OpenAI. + + Azure OpenAI exposes an OpenAI-compatible endpoint at + ``{resource}.openai.azure.com/openai/v1`` that accepts the + standard ``openai`` Python client. Unlike api.openai.com it + does NOT support the Responses API — gpt-5.x models are served + on the regular ``/chat/completions`` path — so routing decisions + must treat Azure separately from direct OpenAI. + """ + if base_url is not None: + url = str(base_url).lower() + else: + url = getattr(self, "_base_url_lower", "") or "" + return "openai.azure.com" in url + def _resolved_api_call_timeout(self) -> float: """Resolve the effective per-call request timeout in seconds. @@ -2724,12 +2744,14 @@ class AIAgent: def _max_tokens_param(self, value: int) -> dict: """Return the correct max tokens kwarg for the current provider. - + OpenAI's newer models (gpt-4o, o-series, gpt-5+) require - 'max_completion_tokens'. OpenRouter, local models, and older + 'max_completion_tokens'. Azure OpenAI also requires + 'max_completion_tokens' for gpt-5.x models served via the + OpenAI-compatible endpoint. OpenRouter, local models, and older OpenAI models use 'max_tokens'. """ - if self._is_direct_openai_url(): + if self._is_direct_openai_url() or self._is_azure_openai_url(): return {"max_completion_tokens": value} return {"max_tokens": value} @@ -6848,10 +6870,15 @@ class AIAgent: # Determine api_mode from provider / base URL / model fb_api_mode = "chat_completions" fb_base_url = str(fb_client.base_url) + _fb_is_azure = self._is_azure_openai_url(fb_base_url) if fb_provider == "openai-codex": fb_api_mode = "codex_responses" elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"): fb_api_mode = "anthropic_messages" + elif _fb_is_azure: + # Azure OpenAI serves gpt-5.x on /chat/completions — does NOT + # support the Responses API. Stay on chat_completions. + fb_api_mode = "chat_completions" elif self._is_direct_openai_url(fb_base_url): fb_api_mode = "codex_responses" elif self._provider_model_requires_responses_api( diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index c7b039561b..f58ebbf14c 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -3386,6 +3386,61 @@ class TestMaxTokensParam: result = agent._max_tokens_param(4096) assert result == {"max_tokens": 4096} + def test_returns_max_completion_tokens_for_azure(self, agent): + """Azure OpenAI requires max_completion_tokens for gpt-5.x models.""" + agent.base_url = "https://my-resource.openai.azure.com/openai/v1" + result = agent._max_tokens_param(4096) + assert result == {"max_completion_tokens": 4096} + + +class TestAzureOpenAIRouting: + """Verify Azure OpenAI endpoints stay on chat_completions for gpt-5.x.""" + + def test_azure_gpt5_stays_on_chat_completions(self, agent): + """Azure serves gpt-5.x on /chat/completions — must not upgrade to codex_responses.""" + agent.base_url = "https://my-resource.openai.azure.com/openai/v1" + agent.api_mode = "chat_completions" + agent.model = "gpt-5.4-mini" + # Mirror the routing logic from __init__ + if ( + agent.api_mode == "chat_completions" + and not agent._is_azure_openai_url() + and ( + agent._is_direct_openai_url() + or agent._provider_model_requires_responses_api( + agent.model, provider=agent.provider, + ) + ) + ): + agent.api_mode = "codex_responses" + assert agent.api_mode == "chat_completions" + + def test_non_azure_gpt5_upgrades_to_codex_responses(self, agent): + """On api.openai.com, gpt-5.x must still upgrade to codex_responses.""" + agent.base_url = "https://api.openai.com/v1" + agent.api_mode = "chat_completions" + agent.model = "gpt-5.4-mini" + if ( + agent.api_mode == "chat_completions" + and not agent._is_azure_openai_url() + and ( + agent._is_direct_openai_url() + or agent._provider_model_requires_responses_api( + agent.model, provider=agent.provider, + ) + ) + ): + agent.api_mode = "codex_responses" + assert agent.api_mode == "codex_responses" + + def test_is_azure_openai_url_detection(self, agent): + assert agent._is_azure_openai_url("https://foo.openai.azure.com/openai/v1") is True + assert agent._is_azure_openai_url("https://api.openai.com/v1") is False + assert agent._is_azure_openai_url("https://openrouter.ai/api/v1") is False + # Path-embedded azure string should still detect — we're ~substring matching + agent.base_url = "https://my-resource.openai.azure.com/openai/v1" + assert agent._is_azure_openai_url() is True + # --------------------------------------------------------------------------- # System prompt stability for prompt caching