diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 8a2fecf5d6..e1307e51f8 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -1162,6 +1162,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup if token: source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}" active_sources.add(source_name) + pconfig = PROVIDER_REGISTRY.get(provider) changed |= _upsert_entry( entries, provider, @@ -1170,6 +1171,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup "source": source_name, "auth_type": AUTH_TYPE_API_KEY, "access_token": token, + "base_url": pconfig.inference_base_url if pconfig else "", "label": source, }, ) diff --git a/run_agent.py b/run_agent.py index b011078142..956a1e9638 100644 --- a/run_agent.py +++ b/run_agent.py @@ -714,12 +714,13 @@ class AIAgent: except Exception: pass - # GPT-5.x models require the Responses API path — they are rejected - # on /v1/chat/completions by both OpenAI and OpenRouter. Also - # auto-upgrade for direct OpenAI URLs (api.openai.com) since all - # newer tool-calling models prefer Responses there. - # ACP runtimes are excluded: CopilotACPClient handles its own - # routing and does not implement the Responses API surface. + # GPT-5.x models usually require the Responses API path, but some + # providers have exceptions (for example Copilot's gpt-5-mini still + # uses chat completions). Also auto-upgrade for direct OpenAI URLs + # (api.openai.com) since all newer tool-calling models prefer + # Responses there. ACP runtimes are excluded: CopilotACPClient + # handles its own routing and does not implement the Responses API + # surface. if ( self.api_mode == "chat_completions" and self.provider != "copilot-acp" @@ -727,7 +728,10 @@ class AIAgent: and not str(self.base_url or "").lower().startswith("acp+tcp://") and ( self._is_direct_openai_url() - or self._model_requires_responses_api(self.model) + or self._provider_model_requires_responses_api( + self.model, + provider=self.provider, + ) ) ): self.api_mode = "codex_responses" @@ -1960,6 +1964,24 @@ class AIAgent: m = m.rsplit("/", 1)[-1] return m.startswith("gpt-5") + @staticmethod + def _provider_model_requires_responses_api( + model: str, + *, + provider: Optional[str] = None, + ) -> bool: + """Return True when this provider/model pair should use Responses API.""" + normalized_provider = (provider or "").strip().lower() + if normalized_provider == "copilot": + try: + from hermes_cli.models import _should_use_copilot_responses_api + return _should_use_copilot_responses_api(model) + except Exception: + # Fall back to the generic GPT-5 rule if Copilot-specific + # logic is unavailable for any reason. + pass + return AIAgent._model_requires_responses_api(model) + def _max_tokens_param(self, value: int) -> dict: """Return the correct max tokens kwarg for the current provider. @@ -5729,9 +5751,13 @@ class AIAgent: fb_api_mode = "anthropic_messages" elif self._is_direct_openai_url(fb_base_url): fb_api_mode = "codex_responses" - elif self._model_requires_responses_api(fb_model): - # GPT-5.x models need Responses API on every provider - # (OpenRouter, Copilot, direct OpenAI, etc.) + elif self._provider_model_requires_responses_api( + fb_model, + provider=fb_provider, + ): + # GPT-5.x models usually need Responses API, but keep + # provider-specific exceptions like Copilot gpt-5-mini on + # chat completions. fb_api_mode = "codex_responses" old_model = self.model diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py index ca232c12f9..c11782f690 100644 --- a/tests/agent/test_credential_pool.py +++ b/tests/agent/test_credential_pool.py @@ -1091,6 +1091,7 @@ def test_load_pool_seeds_copilot_via_gh_auth_token(tmp_path, monkeypatch): assert len(entries) == 1 assert entries[0].source == "gh_cli" assert entries[0].access_token == "gho_fake_token_abc123" + assert entries[0].base_url == "https://api.githubcopilot.com" def test_load_pool_does_not_seed_copilot_when_no_token(tmp_path, monkeypatch): diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py index 2b22955653..4ff00018d2 100644 --- a/tests/run_agent/test_run_agent_codex_responses.py +++ b/tests/run_agent/test_run_agent_codex_responses.py @@ -259,6 +259,23 @@ def test_copilot_acp_stays_on_chat_completions_for_gpt_5_models(monkeypatch): assert agent.api_mode == "chat_completions" +def test_copilot_gpt_5_mini_stays_on_chat_completions(monkeypatch): + _patch_agent_bootstrap(monkeypatch) + agent = run_agent.AIAgent( + model="gpt-5-mini", + base_url="https://api.githubcopilot.com", + provider="copilot", + api_key="gh-token", + api_mode="chat_completions", + quiet_mode=True, + max_iterations=1, + skip_context_files=True, + skip_memory=True, + ) + assert agent.provider == "copilot" + assert agent.api_mode == "chat_completions" + + def test_build_api_kwargs_codex(monkeypatch): agent = _build_agent(monkeypatch) kwargs = agent._build_api_kwargs(