From ac571142841cd3337a59f0b5e5881c4a40f3bf2e Mon Sep 17 00:00:00 2001
From: akhater <akhater@gmail.com>
Date: Sat, 25 Apr 2026 18:31:27 -0700
Subject: [PATCH] fix(agent): support Azure OpenAI gpt-5.x on chat/completions
 endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Azure OpenAI exposes an OpenAI-compatible endpoint at
`{resource}.openai.azure.com/openai/v1` that accepts the standard
`openai` Python client. Two issues prevented gpt-5.x models from working:

1. `_max_tokens_param()` only sent `max_completion_tokens` for
   `api.openai.com` URLs. Azure also requires `max_completion_tokens`
   for gpt-5.x models.

2. The `codex_responses` upgrade gate unconditionally upgraded gpt-5.x
   to Responses API. Azure does NOT support the Responses API — it serves
   gpt-5.x on the regular `/chat/completions` path, causing a 404.

Fix: add `_is_azure_openai_url()` that matches `openai.azure.com` URLs.
- `_max_tokens_param()` now returns `max_completion_tokens` for Azure.
- The `codex_responses` upgrade gate skips Azure so gpt-5.x stays on
  `chat_completions` where Azure actually serves it.
- The fallback-provider api_mode picker also recognises Azure and stays
  on chat_completions.
- Tests cover max_tokens routing, api_mode behaviour, and URL detection.

gpt-4.x models on Azure are unaffected (already used chat_completions +
max_tokens, which Azure accepts for those models).

Salvage of PR #10086 — rewritten against current main where the
codex_responses upgrade gate gained copilot-acp / explicit-api_mode
exclusions.
---
 run_agent.py                      | 33 +++++++++++++++++--
 tests/run_agent/test_run_agent.py | 55 +++++++++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/run_agent.py b/run_agent.py
index b874ab6acc..1f2a062127 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1034,12 +1034,16 @@ class AIAgent:
         # surface.
         # When api_mode was explicitly provided, respect it — the user
         # knows what their endpoint supports (#10473).
+        # Exception: Azure OpenAI serves gpt-5.x on /chat/completions and
+        # does NOT support the Responses API — skip the upgrade for Azure
+        # (openai.azure.com), even though it looks OpenAI-compatible.
         if (
             api_mode is None
             and self.api_mode == "chat_completions"
             and self.provider != "copilot-acp"
             and not str(self.base_url or "").lower().startswith("acp://copilot")
             and not str(self.base_url or "").lower().startswith("acp+tcp://")
+            and not self._is_azure_openai_url()
             and (
                 self._is_direct_openai_url()
                 or self._provider_model_requires_responses_api(
@@ -2553,6 +2557,22 @@ class AIAgent:
             )
         return hostname == "api.openai.com"
 
+    def _is_azure_openai_url(self, base_url: str = None) -> bool:
+        """Return True when a base URL targets Azure OpenAI.
+
+        Azure OpenAI exposes an OpenAI-compatible endpoint at
+        ``{resource}.openai.azure.com/openai/v1`` that accepts the
+        standard ``openai`` Python client.  Unlike api.openai.com it
+        does NOT support the Responses API — gpt-5.x models are served
+        on the regular ``/chat/completions`` path — so routing decisions
+        must treat Azure separately from direct OpenAI.
+        """
+        if base_url is not None:
+            url = str(base_url).lower()
+        else:
+            url = getattr(self, "_base_url_lower", "") or ""
+        return "openai.azure.com" in url
+
     def _resolved_api_call_timeout(self) -> float:
         """Resolve the effective per-call request timeout in seconds.
 
@@ -2724,12 +2744,14 @@ class AIAgent:
 
     def _max_tokens_param(self, value: int) -> dict:
         """Return the correct max tokens kwarg for the current provider.
-        
+
         OpenAI's newer models (gpt-4o, o-series, gpt-5+) require
-        'max_completion_tokens'. OpenRouter, local models, and older
+        'max_completion_tokens'. Azure OpenAI also requires
+        'max_completion_tokens' for gpt-5.x models served via the
+        OpenAI-compatible endpoint. OpenRouter, local models, and older
         OpenAI models use 'max_tokens'.
         """
-        if self._is_direct_openai_url():
+        if self._is_direct_openai_url() or self._is_azure_openai_url():
             return {"max_completion_tokens": value}
         return {"max_tokens": value}
 
@@ -6848,10 +6870,15 @@ class AIAgent:
             # Determine api_mode from provider / base URL / model
             fb_api_mode = "chat_completions"
             fb_base_url = str(fb_client.base_url)
+            _fb_is_azure = self._is_azure_openai_url(fb_base_url)
             if fb_provider == "openai-codex":
                 fb_api_mode = "codex_responses"
             elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
                 fb_api_mode = "anthropic_messages"
+            elif _fb_is_azure:
+                # Azure OpenAI serves gpt-5.x on /chat/completions — does NOT
+                # support the Responses API. Stay on chat_completions.
+                fb_api_mode = "chat_completions"
             elif self._is_direct_openai_url(fb_base_url):
                 fb_api_mode = "codex_responses"
             elif self._provider_model_requires_responses_api(
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index c7b039561b..f58ebbf14c 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -3386,6 +3386,61 @@ class TestMaxTokensParam:
         result = agent._max_tokens_param(4096)
         assert result == {"max_tokens": 4096}
 
+    def test_returns_max_completion_tokens_for_azure(self, agent):
+        """Azure OpenAI requires max_completion_tokens for gpt-5.x models."""
+        agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
+        result = agent._max_tokens_param(4096)
+        assert result == {"max_completion_tokens": 4096}
+
+
+class TestAzureOpenAIRouting:
+    """Verify Azure OpenAI endpoints stay on chat_completions for gpt-5.x."""
+
+    def test_azure_gpt5_stays_on_chat_completions(self, agent):
+        """Azure serves gpt-5.x on /chat/completions — must not upgrade to codex_responses."""
+        agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
+        agent.api_mode = "chat_completions"
+        agent.model = "gpt-5.4-mini"
+        # Mirror the routing logic from __init__
+        if (
+            agent.api_mode == "chat_completions"
+            and not agent._is_azure_openai_url()
+            and (
+                agent._is_direct_openai_url()
+                or agent._provider_model_requires_responses_api(
+                    agent.model, provider=agent.provider,
+                )
+            )
+        ):
+            agent.api_mode = "codex_responses"
+        assert agent.api_mode == "chat_completions"
+
+    def test_non_azure_gpt5_upgrades_to_codex_responses(self, agent):
+        """On api.openai.com, gpt-5.x must still upgrade to codex_responses."""
+        agent.base_url = "https://api.openai.com/v1"
+        agent.api_mode = "chat_completions"
+        agent.model = "gpt-5.4-mini"
+        if (
+            agent.api_mode == "chat_completions"
+            and not agent._is_azure_openai_url()
+            and (
+                agent._is_direct_openai_url()
+                or agent._provider_model_requires_responses_api(
+                    agent.model, provider=agent.provider,
+                )
+            )
+        ):
+            agent.api_mode = "codex_responses"
+        assert agent.api_mode == "codex_responses"
+
+    def test_is_azure_openai_url_detection(self, agent):
+        assert agent._is_azure_openai_url("https://foo.openai.azure.com/openai/v1") is True
+        assert agent._is_azure_openai_url("https://api.openai.com/v1") is False
+        assert agent._is_azure_openai_url("https://openrouter.ai/api/v1") is False
+        # Path-embedded azure string should still detect — we're ~substring matching
+        agent.base_url = "https://my-resource.openai.azure.com/openai/v1"
+        assert agent._is_azure_openai_url() is True
+
 
 # ---------------------------------------------------------------------------
 # System prompt stability for prompt caching