diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 3cad451136a..cf7124a1f8e 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -42,6 +42,7 @@ import time from pathlib import Path # noqa: F401 — used by test mocks from types import SimpleNamespace from typing import Any, Dict, List, Optional, Tuple +from urllib.parse import urlparse, parse_qs, urlunparse from openai import OpenAI @@ -52,6 +53,17 @@ from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_ logger = logging.getLogger(__name__) + +def _extract_url_query_params(url: str): + """Extract query params from URL, return (clean_url, default_query dict or None).""" + parsed = urlparse(url) + if parsed.query: + clean = urlunparse(parsed._replace(query="")) + params = {k: v[0] for k, v in parse_qs(parsed.query).items()} + return clean, params + return url, None + + # Module-level flag: only warn once per process about stale OPENAI_BASE_URL. _stale_base_url_warned = False @@ -1157,8 +1169,10 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: return None, None model = _read_main_model() or "gpt-4o-mini" logger.debug("Auxiliary client: custom endpoint (%s, api_mode=%s)", model, custom_mode or "chat_completions") + _clean_base, _dq = _extract_url_query_params(custom_base) + _extra = {"default_query": _dq} if _dq else {} if custom_mode == "codex_responses": - real_client = OpenAI(api_key=custom_key, base_url=custom_base) + real_client = OpenAI(api_key=custom_key, base_url=_clean_base, **_extra) return CodexAuxiliaryClient(real_client, model), model if custom_mode == "anthropic_messages": # Third-party Anthropic-compatible gateway (MiniMax, Zhipu GLM, @@ -1172,12 +1186,12 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]: "Custom endpoint declares api_mode=anthropic_messages but the " "anthropic SDK is not installed — falling back to OpenAI-wire." ) - return OpenAI(api_key=custom_key, base_url=custom_base), model + return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model return ( AnthropicAuxiliaryClient(real_client, model, custom_key, custom_base, is_oauth=False), model, ) - return OpenAI(api_key=custom_key, base_url=custom_base), model + return OpenAI(api_key=custom_key, base_url=_clean_base, **_extra), model def _try_codex() -> Tuple[Optional[Any], Optional[str]]: @@ -1825,12 +1839,15 @@ def resolve_provider_client( provider, ) extra = {} + _clean_base, _dq = _extract_url_query_params(custom_base) + if _dq: + extra["default_query"] = _dq if base_url_host_matches(custom_base, "api.kimi.com"): extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} elif base_url_host_matches(custom_base, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() - client = OpenAI(api_key=custom_key, base_url=custom_base, **extra) + client = OpenAI(api_key=custom_key, base_url=_clean_base, **extra) client = _wrap_if_needed(client, final_model, custom_base) return (_to_async_client(client, final_model) if async_mode else (client, final_model)) @@ -1867,6 +1884,8 @@ def resolve_provider_client( model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini", provider, ) + _clean_base2, _dq2 = _extract_url_query_params(custom_base) + _extra2 = {"default_query": _dq2} if _dq2 else {} logger.debug( "resolve_provider_client: named custom provider %r (%s, api_mode=%s)", provider, final_model, entry_api_mode or "chat_completions") @@ -1884,7 +1903,7 @@ def resolve_provider_client( "installed — falling back to OpenAI-wire.", provider, ) - client = OpenAI(api_key=custom_key, base_url=custom_base) + client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2) return (_to_async_client(client, final_model) if async_mode else (client, final_model)) sync_anthropic = AnthropicAuxiliaryClient( @@ -1893,7 +1912,7 @@ def resolve_provider_client( if async_mode: return AsyncAnthropicAuxiliaryClient(sync_anthropic), final_model return sync_anthropic, final_model - client = OpenAI(api_key=custom_key, base_url=custom_base) + client = OpenAI(api_key=custom_key, base_url=_clean_base2, **_extra2) # codex_responses or inherited auto-detect (via _wrap_if_needed). # _wrap_if_needed reads the closed-over `api_mode` (the task-level # override). Named-provider entry api_mode=codex_responses also diff --git a/run_agent.py b/run_agent.py index ea1ea4af6b9..b874ab6acc3 100644 --- a/run_agent.py +++ b/run_agent.py @@ -40,6 +40,7 @@ from types import SimpleNamespace import urllib.request import uuid from typing import List, Dict, Any, Optional +from urllib.parse import urlparse, parse_qs, urlunparse from openai import OpenAI import fire from datetime import datetime @@ -1314,7 +1315,22 @@ class AIAgent: if api_key and base_url: # Explicit credentials from CLI/gateway — construct directly. # The runtime provider resolver already handled auth for us. - client_kwargs = {"api_key": api_key, "base_url": base_url} + # Extract query params (e.g. Azure api-version) from base_url + # and pass via default_query to prevent loss during SDK URL + # joining (httpx drops query string when joining paths). + _parsed_url = urlparse(base_url) + if _parsed_url.query: + _clean_url = urlunparse(_parsed_url._replace(query="")) + _query_params = { + k: v[0] for k, v in parse_qs(_parsed_url.query).items() + } + client_kwargs = { + "api_key": api_key, + "base_url": _clean_url, + "default_query": _query_params, + } + else: + client_kwargs = {"api_key": api_key, "base_url": base_url} if _provider_timeout is not None: client_kwargs["timeout"] = _provider_timeout if self.provider == "copilot-acp":