diff --git a/agent/credential_pool.py b/agent/credential_pool.py
index 8a2fecf5d6..e1307e51f8 100644
--- a/agent/credential_pool.py
+++ b/agent/credential_pool.py
@@ -1162,6 +1162,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
             if token:
                 source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
                 active_sources.add(source_name)
+                pconfig = PROVIDER_REGISTRY.get(provider)
                 changed |= _upsert_entry(
                     entries,
                     provider,
@@ -1170,6 +1171,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
                         "source": source_name,
                         "auth_type": AUTH_TYPE_API_KEY,
                         "access_token": token,
+                        "base_url": pconfig.inference_base_url if pconfig else "",
                         "label": source,
                     },
                 )
diff --git a/run_agent.py b/run_agent.py
index b011078142..956a1e9638 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -714,12 +714,13 @@ class AIAgent:
         except Exception:
             pass
 
-        # GPT-5.x models require the Responses API path — they are rejected
-        # on /v1/chat/completions by both OpenAI and OpenRouter.  Also
-        # auto-upgrade for direct OpenAI URLs (api.openai.com) since all
-        # newer tool-calling models prefer Responses there.
-        # ACP runtimes are excluded: CopilotACPClient handles its own
-        # routing and does not implement the Responses API surface.
+        # GPT-5.x models usually require the Responses API path, but some
+        # providers have exceptions (for example Copilot's gpt-5-mini still
+        # uses chat completions). Also auto-upgrade for direct OpenAI URLs
+        # (api.openai.com) since all newer tool-calling models prefer
+        # Responses there. ACP runtimes are excluded: CopilotACPClient
+        # handles its own routing and does not implement the Responses API
+        # surface.
         if (
             self.api_mode == "chat_completions"
             and self.provider != "copilot-acp"
@@ -727,7 +728,10 @@ class AIAgent:
             and not str(self.base_url or "").lower().startswith("acp+tcp://")
             and (
                 self._is_direct_openai_url()
-                or self._model_requires_responses_api(self.model)
+                or self._provider_model_requires_responses_api(
+                    self.model,
+                    provider=self.provider,
+                )
             )
         ):
             self.api_mode = "codex_responses"
@@ -1960,6 +1964,24 @@ class AIAgent:
             m = m.rsplit("/", 1)[-1]
         return m.startswith("gpt-5")
 
+    @staticmethod
+    def _provider_model_requires_responses_api(
+        model: str,
+        *,
+        provider: Optional[str] = None,
+    ) -> bool:
+        """Return True when this provider/model pair should use Responses API."""
+        normalized_provider = (provider or "").strip().lower()
+        if normalized_provider == "copilot":
+            try:
+                from hermes_cli.models import _should_use_copilot_responses_api
+                return _should_use_copilot_responses_api(model)
+            except Exception:
+                # Fall back to the generic GPT-5 rule if Copilot-specific
+                # logic is unavailable for any reason.
+                pass
+        return AIAgent._model_requires_responses_api(model)
+
     def _max_tokens_param(self, value: int) -> dict:
         """Return the correct max tokens kwarg for the current provider.
         
@@ -5729,9 +5751,13 @@ class AIAgent:
                 fb_api_mode = "anthropic_messages"
             elif self._is_direct_openai_url(fb_base_url):
                 fb_api_mode = "codex_responses"
-            elif self._model_requires_responses_api(fb_model):
-                # GPT-5.x models need Responses API on every provider
-                # (OpenRouter, Copilot, direct OpenAI, etc.)
+            elif self._provider_model_requires_responses_api(
+                fb_model,
+                provider=fb_provider,
+            ):
+                # GPT-5.x models usually need Responses API, but keep
+                # provider-specific exceptions like Copilot gpt-5-mini on
+                # chat completions.
                 fb_api_mode = "codex_responses"
 
             old_model = self.model
diff --git a/tests/agent/test_credential_pool.py b/tests/agent/test_credential_pool.py
index ca232c12f9..c11782f690 100644
--- a/tests/agent/test_credential_pool.py
+++ b/tests/agent/test_credential_pool.py
@@ -1091,6 +1091,7 @@ def test_load_pool_seeds_copilot_via_gh_auth_token(tmp_path, monkeypatch):
     assert len(entries) == 1
     assert entries[0].source == "gh_cli"
     assert entries[0].access_token == "gho_fake_token_abc123"
+    assert entries[0].base_url == "https://api.githubcopilot.com"
 
 
 def test_load_pool_does_not_seed_copilot_when_no_token(tmp_path, monkeypatch):
diff --git a/tests/run_agent/test_run_agent_codex_responses.py b/tests/run_agent/test_run_agent_codex_responses.py
index 2b22955653..4ff00018d2 100644
--- a/tests/run_agent/test_run_agent_codex_responses.py
+++ b/tests/run_agent/test_run_agent_codex_responses.py
@@ -259,6 +259,23 @@ def test_copilot_acp_stays_on_chat_completions_for_gpt_5_models(monkeypatch):
     assert agent.api_mode == "chat_completions"
 
 
+def test_copilot_gpt_5_mini_stays_on_chat_completions(monkeypatch):
+    _patch_agent_bootstrap(monkeypatch)
+    agent = run_agent.AIAgent(
+        model="gpt-5-mini",
+        base_url="https://api.githubcopilot.com",
+        provider="copilot",
+        api_key="gh-token",
+        api_mode="chat_completions",
+        quiet_mode=True,
+        max_iterations=1,
+        skip_context_files=True,
+        skip_memory=True,
+    )
+    assert agent.provider == "copilot"
+    assert agent.api_mode == "chat_completions"
+
+
 def test_build_api_kwargs_codex(monkeypatch):
     agent = _build_agent(monkeypatch)
     kwargs = agent._build_api_kwargs(