Compare commits

...

6 Commits

Author SHA1 Message Date
Teknium
8f676ea90c fix: update async fallback test mock to 5-tuple for api_mode 2026-04-11 01:43:01 -07:00
kshitijk4poor
74a07738f1 fix: warn and clear stale OPENAI_BASE_URL on provider switch (#5161) 2026-04-11 01:38:37 -07:00
kshitijk4poor
ad8c8731b3 fix(auxiliary): validate response shape in call_llm/async_call_llm (#7264)
async_call_llm (and call_llm) can return non-OpenAI objects from
custom providers or adapter shims, crashing downstream consumers
with misleading AttributeError ('str' has no attribute 'choices').

Add _validate_llm_response() that checks the response has the
expected .choices[0].message shape before returning. Wraps all
return paths in call_llm, async_call_llm, and fallback paths.
Fails fast with a clear RuntimeError identifying the task, response
type, and a preview of the malformed payload.

Closes #7264
2026-04-11 01:37:48 -07:00
ran
484b1fc17b fix: drop incompatible model slugs on auxiliary client cache hit
`resolve_provider_client()` already drops OpenRouter-format model slugs
(containing "/") when the resolved provider is not OpenRouter (line 1097).
However, `_get_cached_client()` returns `model or cached_default` directly
on cache hits, bypassing this check entirely.

When the main provider is openai-codex, the auto-detection chain (Step 1
of `_resolve_auto`) caches a CodexAuxiliaryClient. Subsequent auxiliary
calls for different tasks (e.g. compression with `summary_model:
google/gemini-3-flash-preview`) hit the cache and pass the OpenRouter-
format model slug straight to the Codex Responses API, which does not
understand it and returns an empty `response.output`.

This causes two user-visible failures:
- "Invalid API response shape" (empty output after 3 retries)
- "Context length exceeded, cannot compress further" (compression itself
  fails through the same path)

Add `_compat_model()` helper that mirrors the "/" check from
`resolve_provider_client()` and call it on the cache-hit return path.
2026-04-11 01:37:03 -07:00
kshitijk4poor
65d1fbd668 fix(auxiliary): harden fallback behavior for non-OpenRouter users
Four fixes to auxiliary_client.py:

1. Respect explicit provider as hard constraint (#7559)
   When auxiliary.{task}.provider is explicitly set (not 'auto'),
   connection/payment errors no longer silently fallback to cloud
   providers. Local-only users (Ollama, vLLM) will no longer get
   unexpected OpenRouter billing from auxiliary tasks.

2. Eliminate model='default' sentinel (#7512)
   _resolve_api_key_provider() no longer sends literal 'default' as
   model name to APIs. Providers without a known aux model in
   _API_KEY_PROVIDER_AUX_MODELS are skipped instead of producing
   model_not_supported errors.

3. Add payment/connection fallback to async_call_llm (#7512)
   async_call_llm now mirrors sync call_llm's fallback logic for
   payment (402) and connection errors. Previously, async consumers
   (session_search, web_tools, vision) got hard failures with no
   recovery. Also fixes hardcoded 'openrouter' fallback to use the
   full auto-detection chain.

4. Use accurate error reason in fallback logs (#7512)
   _try_payment_fallback() now accepts a reason parameter and uses
   it in log messages. Connection timeouts are no longer misleadingly
   logged as 'payment error'.

Closes #7559
Closes #7512
2026-04-11 01:36:56 -07:00
kshitijk4poor
2dc980c676 fix(auxiliary): honor api_mode in auxiliary client (#6800)
The auxiliary client always calls client.chat.completions.create(),
ignoring the api_mode config flag. This breaks codex-family models
(e.g. gpt-5.3-codex) on direct OpenAI API keys, which need the
/v1/responses endpoint.

Changes:
- Expand _resolve_task_provider_model to return api_mode (5-tuple)
- Read api_mode from auxiliary.{task}.api_mode config and env vars
  (AUXILIARY_{TASK}_API_MODE)
- Pass api_mode through _get_cached_client to resolve_provider_client
- Add _needs_codex_wrap/_wrap_if_needed helpers that wrap plain OpenAI
  clients in CodexAuxiliaryClient when api_mode=codex_responses or
  when auto-detection finds api.openai.com + codex model pattern
- Apply wrapping at all custom endpoint, named custom provider, and
  API-key provider return paths
- Update test mocks for the new 5-tuple return format

Users can now set:
  auxiliary:
    compression:
      model: gpt-5.3-codex
      base_url: https://api.openai.com/v1
      api_mode: codex_responses

Closes #6800
2026-04-11 01:35:32 -07:00
4 changed files with 666 additions and 52 deletions

View File

@@ -59,6 +59,9 @@ from hermes_constants import OPENROUTER_BASE_URL
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Module-level flag: only warn once per process about stale OPENAI_BASE_URL.
_stale_base_url_warned = False
_PROVIDER_ALIASES = { _PROVIDER_ALIASES = {
"google": "gemini", "google": "gemini",
"google-gemini": "gemini", "google-gemini": "gemini",
@@ -707,7 +710,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
base_url = _to_openai_base_url( base_url = _to_openai_base_url(
_pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
) )
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default") model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model) logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
extra = {} extra = {}
if "api.kimi.com" in base_url.lower(): if "api.kimi.com" in base_url.lower():
@@ -726,7 +731,9 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
base_url = _to_openai_base_url( base_url = _to_openai_base_url(
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
) )
model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id, "default") model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model) logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
extra = {} extra = {}
if "api.kimi.com" in base_url.lower(): if "api.kimi.com" in base_url.lower():
@@ -1075,11 +1082,12 @@ def _is_connection_error(exc: Exception) -> bool:
def _try_payment_fallback( def _try_payment_fallback(
failed_provider: str, failed_provider: str,
task: str = None, task: str = None,
reason: str = "payment error",
) -> Tuple[Optional[Any], Optional[str], str]: ) -> Tuple[Optional[Any], Optional[str], str]:
"""Try alternative providers after a payment/credit error. """Try alternative providers after a payment/credit or connection error.
Iterates the standard auto-detection chain, skipping the provider that Iterates the standard auto-detection chain, skipping the provider that
returned a payment error. failed.
Returns: Returns:
(client, model, provider_label) or (None, None, "") if no fallback. (client, model, provider_label) or (None, None, "") if no fallback.
@@ -1105,15 +1113,15 @@ def _try_payment_fallback(
client, model = try_fn() client, model = try_fn()
if client is not None: if client is not None:
logger.info( logger.info(
"Auxiliary %s: payment error on %s — falling back to %s (%s)", "Auxiliary %s: %s on %s — falling back to %s (%s)",
task or "call", failed_provider, label, model or "default", task or "call", reason, failed_provider, label, model or "default",
) )
return client, model, label return client, model, label
tried.append(label) tried.append(label)
logger.warning( logger.warning(
"Auxiliary %s: payment error on %s and no fallback available (tried: %s)", "Auxiliary %s: %s on %s and no fallback available (tried: %s)",
task or "call", failed_provider, ", ".join(tried), task or "call", reason, failed_provider, ", ".join(tried),
) )
return None, None, "" return None, None, ""
@@ -1128,9 +1136,28 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
provider they already have credentials for — no OpenRouter key needed. provider they already have credentials for — no OpenRouter key needed.
2. OpenRouter → Nous → custom → Codex → API-key providers (original chain). 2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
""" """
global auxiliary_is_nous global auxiliary_is_nous, _stale_base_url_warned
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
# ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
# provider (not 'custom'). This catches the common "env poisoning"
# scenario where a user switches providers via `hermes model` but the
# old OPENAI_BASE_URL lingers in ~/.hermes/.env. ──
if not _stale_base_url_warned:
_env_base = os.getenv("OPENAI_BASE_URL", "").strip()
_cfg_provider = _read_main_provider()
if (_env_base and _cfg_provider
and _cfg_provider != "custom"
and not _cfg_provider.startswith("custom:")):
logger.warning(
"OPENAI_BASE_URL is set (%s) but model.provider is '%s'. "
"Auxiliary clients may route to the wrong endpoint. "
"Run: hermes model to reconfigure, or remove "
"OPENAI_BASE_URL from ~/.hermes/.env",
_env_base, _cfg_provider,
)
_stale_base_url_warned = True
# ── Step 1: non-aggregator main provider → use main model directly ── # ── Step 1: non-aggregator main provider → use main model directly ──
main_provider = _read_main_provider() main_provider = _read_main_provider()
main_model = _read_main_model() main_model = _read_main_model()
@@ -1217,6 +1244,7 @@ def resolve_provider_client(
raw_codex: bool = False, raw_codex: bool = False,
explicit_base_url: str = None, explicit_base_url: str = None,
explicit_api_key: str = None, explicit_api_key: str = None,
api_mode: str = None,
) -> Tuple[Optional[Any], Optional[str]]: ) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a """Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format. configured client with the correct auth, base URL, and API format.
@@ -1240,6 +1268,10 @@ def resolve_provider_client(
the main agent loop). the main agent loop).
explicit_base_url: Optional direct OpenAI-compatible endpoint. explicit_base_url: Optional direct OpenAI-compatible endpoint.
explicit_api_key: Optional API key paired with explicit_base_url. explicit_api_key: Optional API key paired with explicit_base_url.
api_mode: API mode override. One of "chat_completions",
"codex_responses", or None (auto-detect). When set to
"codex_responses", the client is wrapped in
CodexAuxiliaryClient to route through the Responses API.
Returns: Returns:
(client, resolved_model) or (None, None) if auth is unavailable. (client, resolved_model) or (None, None) if auth is unavailable.
@@ -1247,6 +1279,40 @@ def resolve_provider_client(
# Normalise aliases # Normalise aliases
provider = _normalize_aux_provider(provider) provider = _normalize_aux_provider(provider)
def _needs_codex_wrap(client_obj, base_url_str: str, model_str: str) -> bool:
"""Decide if a plain OpenAI client should be wrapped for Responses API.
Returns True when api_mode is explicitly "codex_responses", or when
auto-detection (api.openai.com + codex-family model) suggests it.
Already-wrapped clients (CodexAuxiliaryClient) are skipped.
"""
if isinstance(client_obj, CodexAuxiliaryClient):
return False
if raw_codex:
return False
if api_mode == "codex_responses":
return True
# Auto-detect: api.openai.com + codex model name pattern
if api_mode and api_mode != "codex_responses":
return False # explicit non-codex mode
normalized_base = (base_url_str or "").strip().lower()
if "api.openai.com" in normalized_base and "openrouter" not in normalized_base:
model_lower = (model_str or "").lower()
if "codex" in model_lower:
return True
return False
def _wrap_if_needed(client_obj, final_model_str: str, base_url_str: str = ""):
"""Wrap a plain OpenAI client in CodexAuxiliaryClient if Responses API is needed."""
if _needs_codex_wrap(client_obj, base_url_str, final_model_str):
logger.debug(
"resolve_provider_client: wrapping client in CodexAuxiliaryClient "
"(api_mode=%s, model=%s, base_url=%s)",
api_mode or "auto-detected", final_model_str,
base_url_str[:60] if base_url_str else "")
return CodexAuxiliaryClient(client_obj, final_model_str)
return client_obj
# ── Auto: try all providers in priority order ──────────────────── # ── Auto: try all providers in priority order ────────────────────
if provider == "auto": if provider == "auto":
client, resolved = _resolve_auto() client, resolved = _resolve_auto()
@@ -1336,6 +1402,7 @@ def resolve_provider_client(
from hermes_cli.models import copilot_default_headers from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers() extra["default_headers"] = copilot_default_headers()
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra) client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
client = _wrap_if_needed(client, final_model, custom_base)
return (_to_async_client(client, final_model) if async_mode return (_to_async_client(client, final_model) if async_mode
else (client, final_model)) else (client, final_model))
# Try custom first, then codex, then API-key providers # Try custom first, then codex, then API-key providers
@@ -1344,6 +1411,8 @@ def resolve_provider_client(
client, default = try_fn() client, default = try_fn()
if client is not None: if client is not None:
final_model = _normalize_resolved_model(model or default, provider) final_model = _normalize_resolved_model(model or default, provider)
_cbase = str(getattr(client, "base_url", "") or "")
client = _wrap_if_needed(client, final_model, _cbase)
return (_to_async_client(client, final_model) if async_mode return (_to_async_client(client, final_model) if async_mode
else (client, final_model)) else (client, final_model))
logger.warning("resolve_provider_client: custom/main requested " logger.warning("resolve_provider_client: custom/main requested "
@@ -1363,6 +1432,7 @@ def resolve_provider_client(
provider, provider,
) )
client = OpenAI(api_key=custom_key, base_url=custom_base) client = OpenAI(api_key=custom_key, base_url=custom_base)
client = _wrap_if_needed(client, final_model, custom_base)
logger.debug( logger.debug(
"resolve_provider_client: named custom provider %r (%s)", "resolve_provider_client: named custom provider %r (%s)",
provider, final_model) provider, final_model)
@@ -1442,6 +1512,11 @@ def resolve_provider_client(
except ImportError: except ImportError:
pass pass
# Honor api_mode for any API-key provider (e.g. direct OpenAI with
# codex-family models). The copilot-specific wrapping above handles
# copilot; this covers the general case (#6800).
client = _wrap_if_needed(client, final_model, base_url)
logger.debug("resolve_provider_client: %s (%s)", provider, final_model) logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model) if async_mode return (_to_async_client(client, final_model) if async_mode
else (client, final_model)) else (client, final_model))
@@ -1474,12 +1549,13 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
Callers may override the returned model with a per-task env var Callers may override the returned model with a per-task env var
(e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL). (e.g. CONTEXT_COMPRESSION_MODEL, AUXILIARY_WEB_EXTRACT_MODEL).
""" """
provider, model, base_url, api_key = _resolve_task_provider_model(task or None) provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
return resolve_provider_client( return resolve_provider_client(
provider, provider,
model=model, model=model,
explicit_base_url=base_url, explicit_base_url=base_url,
explicit_api_key=api_key, explicit_api_key=api_key,
api_mode=api_mode,
) )
@@ -1490,13 +1566,14 @@ def get_async_text_auxiliary_client(task: str = ""):
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API. (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available. Returns (None, None) when no provider is available.
""" """
provider, model, base_url, api_key = _resolve_task_provider_model(task or None) provider, model, base_url, api_key, api_mode = _resolve_task_provider_model(task or None)
return resolve_provider_client( return resolve_provider_client(
provider, provider,
model=model, model=model,
async_mode=True, async_mode=True,
explicit_base_url=base_url, explicit_base_url=base_url,
explicit_api_key=api_key, explicit_api_key=api_key,
api_mode=api_mode,
) )
@@ -1569,7 +1646,7 @@ def resolve_vision_provider_client(
backends, so users can intentionally force experimental providers. Auto mode backends, so users can intentionally force experimental providers. Auto mode
stays conservative and only tries vision backends known to work today. stays conservative and only tries vision backends known to work today.
""" """
requested, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model( requested, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
"vision", provider, model, base_url, api_key "vision", provider, model, base_url, api_key
) )
requested = _normalize_vision_provider(requested) requested = _normalize_vision_provider(requested)
@@ -1785,12 +1862,30 @@ def cleanup_stale_async_clients() -> None:
del _client_cache[key] del _client_cache[key]
def _is_openrouter_client(client: Any) -> bool:
for obj in (client, getattr(client, "_client", None), getattr(client, "client", None)):
if obj and "openrouter" in str(getattr(obj, "base_url", "") or "").lower():
return True
return False
def _compat_model(client: Any, model: Optional[str], cached_default: Optional[str]) -> Optional[str]:
"""Drop OpenRouter-format model slugs (with '/') for non-OpenRouter clients.
Mirrors the guard in resolve_provider_client() which is skipped on cache hits.
"""
if model and "/" in model and not _is_openrouter_client(client):
return cached_default
return model or cached_default
def _get_cached_client( def _get_cached_client(
provider: str, provider: str,
model: str = None, model: str = None,
async_mode: bool = False, async_mode: bool = False,
base_url: str = None, base_url: str = None,
api_key: str = None, api_key: str = None,
api_mode: str = None,
) -> Tuple[Optional[Any], Optional[str]]: ) -> Tuple[Optional[Any], Optional[str]]:
"""Get or create a cached client for the given provider. """Get or create a cached client for the given provider.
@@ -1814,7 +1909,7 @@ def _get_cached_client(
loop_id = id(current_loop) loop_id = id(current_loop)
except RuntimeError: except RuntimeError:
pass pass
cache_key = (provider, async_mode, base_url or "", api_key or "", loop_id) cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", loop_id)
with _client_cache_lock: with _client_cache_lock:
if cache_key in _client_cache: if cache_key in _client_cache:
cached_client, cached_default, cached_loop = _client_cache[cache_key] cached_client, cached_default, cached_loop = _client_cache[cache_key]
@@ -1826,9 +1921,11 @@ def _get_cached_client(
_force_close_async_httpx(cached_client) _force_close_async_httpx(cached_client)
del _client_cache[cache_key] del _client_cache[cache_key]
else: else:
return cached_client, model or cached_default effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
else: else:
return cached_client, model or cached_default effective = _compat_model(cached_client, model, cached_default)
return cached_client, effective
# Build outside the lock # Build outside the lock
client, default_model = resolve_provider_client( client, default_model = resolve_provider_client(
provider, provider,
@@ -1836,6 +1933,7 @@ def _get_cached_client(
async_mode, async_mode,
explicit_base_url=base_url, explicit_base_url=base_url,
explicit_api_key=api_key, explicit_api_key=api_key,
api_mode=api_mode,
) )
if client is not None: if client is not None:
# For async clients, remember which loop they were created on so we # For async clients, remember which loop they were created on so we
@@ -1855,7 +1953,7 @@ def _resolve_task_provider_model(
model: str = None, model: str = None,
base_url: str = None, base_url: str = None,
api_key: str = None, api_key: str = None,
) -> Tuple[str, Optional[str], Optional[str], Optional[str]]: ) -> Tuple[str, Optional[str], Optional[str], Optional[str], Optional[str]]:
"""Determine provider + model for a call. """Determine provider + model for a call.
Priority: Priority:
@@ -1864,15 +1962,17 @@ def _resolve_task_provider_model(
3. Config file (auxiliary.{task}.* or compression.*) 3. Config file (auxiliary.{task}.* or compression.*)
4. "auto" (full auto-detection chain) 4. "auto" (full auto-detection chain)
Returns (provider, model, base_url, api_key) where model may be None Returns (provider, model, base_url, api_key, api_mode) where model may
(use provider default). When base_url is set, provider is forced to be None (use provider default). When base_url is set, provider is forced
"custom" and the task uses that direct endpoint. to "custom" and the task uses that direct endpoint. api_mode is one of
"chat_completions", "codex_responses", or None (auto-detect).
""" """
config = {} config = {}
cfg_provider = None cfg_provider = None
cfg_model = None cfg_model = None
cfg_base_url = None cfg_base_url = None
cfg_api_key = None cfg_api_key = None
cfg_api_mode = None
if task: if task:
try: try:
@@ -1889,6 +1989,7 @@ def _resolve_task_provider_model(
cfg_model = str(task_config.get("model", "")).strip() or None cfg_model = str(task_config.get("model", "")).strip() or None
cfg_base_url = str(task_config.get("base_url", "")).strip() or None cfg_base_url = str(task_config.get("base_url", "")).strip() or None
cfg_api_key = str(task_config.get("api_key", "")).strip() or None cfg_api_key = str(task_config.get("api_key", "")).strip() or None
cfg_api_mode = str(task_config.get("api_mode", "")).strip() or None
# Backwards compat: compression section has its own keys. # Backwards compat: compression section has its own keys.
# The auxiliary.compression defaults to provider="auto", so treat # The auxiliary.compression defaults to provider="auto", so treat
@@ -1902,30 +2003,32 @@ def _resolve_task_provider_model(
cfg_base_url = cfg_base_url or _sbu.strip() or None cfg_base_url = cfg_base_url or _sbu.strip() or None
env_model = _get_auxiliary_env_override(task, "MODEL") if task else None env_model = _get_auxiliary_env_override(task, "MODEL") if task else None
env_api_mode = _get_auxiliary_env_override(task, "API_MODE") if task else None
resolved_model = model or env_model or cfg_model resolved_model = model or env_model or cfg_model
resolved_api_mode = env_api_mode or cfg_api_mode
if base_url: if base_url:
return "custom", resolved_model, base_url, api_key return "custom", resolved_model, base_url, api_key, resolved_api_mode
if provider: if provider:
return provider, resolved_model, base_url, api_key return provider, resolved_model, base_url, api_key, resolved_api_mode
if task: if task:
env_base_url = _get_auxiliary_env_override(task, "BASE_URL") env_base_url = _get_auxiliary_env_override(task, "BASE_URL")
env_api_key = _get_auxiliary_env_override(task, "API_KEY") env_api_key = _get_auxiliary_env_override(task, "API_KEY")
if env_base_url: if env_base_url:
return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key return "custom", resolved_model, env_base_url, env_api_key or cfg_api_key, resolved_api_mode
env_provider = _get_auxiliary_provider(task) env_provider = _get_auxiliary_provider(task)
if env_provider != "auto": if env_provider != "auto":
return env_provider, resolved_model, None, None return env_provider, resolved_model, None, None, resolved_api_mode
if cfg_base_url: if cfg_base_url:
return "custom", resolved_model, cfg_base_url, cfg_api_key return "custom", resolved_model, cfg_base_url, cfg_api_key, resolved_api_mode
if cfg_provider and cfg_provider != "auto": if cfg_provider and cfg_provider != "auto":
return cfg_provider, resolved_model, None, None return cfg_provider, resolved_model, None, None, resolved_api_mode
return "auto", resolved_model, None, None return "auto", resolved_model, None, None, resolved_api_mode
return "auto", resolved_model, None, None return "auto", resolved_model, None, None, resolved_api_mode
_DEFAULT_AUX_TIMEOUT = 30.0 _DEFAULT_AUX_TIMEOUT = 30.0
@@ -1997,6 +2100,37 @@ def _build_call_kwargs(
return kwargs return kwargs
def _validate_llm_response(response: Any, task: str = None) -> Any:
"""Validate that an LLM response has the expected .choices[0].message shape.
Fails fast with a clear error instead of letting malformed payloads
propagate to downstream consumers where they crash with misleading
AttributeError (e.g. "'str' object has no attribute 'choices'").
See #7264.
"""
if response is None:
raise RuntimeError(
f"Auxiliary {task or 'call'}: LLM returned None response"
)
# Allow SimpleNamespace responses from adapters (CodexAuxiliaryClient,
# AnthropicAuxiliaryClient) — they have .choices[0].message.
try:
choices = response.choices
if not choices or not hasattr(choices[0], "message"):
raise AttributeError("missing choices[0].message")
except (AttributeError, TypeError, IndexError) as exc:
response_type = type(response).__name__
response_preview = str(response)[:120]
raise RuntimeError(
f"Auxiliary {task or 'call'}: LLM returned invalid response "
f"(type={response_type}): {response_preview!r}. "
f"Expected object with .choices[0].message — check provider "
f"adapter or custom endpoint compatibility."
) from exc
return response
def call_llm( def call_llm(
task: str = None, task: str = None,
*, *,
@@ -2035,7 +2169,7 @@ def call_llm(
Raises: Raises:
RuntimeError: If no provider is configured. RuntimeError: If no provider is configured.
""" """
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model( resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
task, provider, model, base_url, api_key) task, provider, model, base_url, api_key)
if task == "vision": if task == "vision":
@@ -2068,6 +2202,7 @@ def call_llm(
resolved_model, resolved_model,
base_url=resolved_base_url, base_url=resolved_base_url,
api_key=resolved_api_key, api_key=resolved_api_key,
api_mode=resolved_api_mode,
) )
if client is None: if client is None:
# When the user explicitly chose a non-OpenRouter provider but no # When the user explicitly chose a non-OpenRouter provider but no
@@ -2111,18 +2246,20 @@ def call_llm(
# Handle max_tokens vs max_completion_tokens retry, then payment fallback. # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
try: try:
return client.chat.completions.create(**kwargs) return _validate_llm_response(
client.chat.completions.create(**kwargs), task)
except Exception as first_err: except Exception as first_err:
err_str = str(first_err) err_str = str(first_err)
if "max_tokens" in err_str or "unsupported_parameter" in err_str: if "max_tokens" in err_str or "unsupported_parameter" in err_str:
kwargs.pop("max_tokens", None) kwargs.pop("max_tokens", None)
kwargs["max_completion_tokens"] = max_tokens kwargs["max_completion_tokens"] = max_tokens
try: try:
return client.chat.completions.create(**kwargs) return _validate_llm_response(
client.chat.completions.create(**kwargs), task)
except Exception as retry_err: except Exception as retry_err:
# If the max_tokens retry also hits a payment error, # If the max_tokens retry also hits a payment or connection
# fall through to the payment fallback below. # error, fall through to the fallback chain below.
if not _is_payment_error(retry_err): if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
raise raise
first_err = retry_err first_err = retry_err
@@ -2139,19 +2276,24 @@ def call_llm(
# and providers the user never configured that got picked up by # and providers the user never configured that got picked up by
# the auto-detection chain. # the auto-detection chain.
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
if should_fallback: # Only try alternative providers when the user didn't explicitly
# configure this task's provider. Explicit provider = hard constraint;
# auto (the default) = best-effort fallback chain. (#7559)
is_auto = resolved_provider in ("auto", "", None)
if should_fallback and is_auto:
reason = "payment error" if _is_payment_error(first_err) else "connection error" reason = "payment error" if _is_payment_error(first_err) else "connection error"
logger.info("Auxiliary %s: %s on %s (%s), trying fallback", logger.info("Auxiliary %s: %s on %s (%s), trying fallback",
task or "call", reason, resolved_provider, first_err) task or "call", reason, resolved_provider, first_err)
fb_client, fb_model, fb_label = _try_payment_fallback( fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task) resolved_provider, task, reason=reason)
if fb_client is not None: if fb_client is not None:
fb_kwargs = _build_call_kwargs( fb_kwargs = _build_call_kwargs(
fb_label, fb_model, messages, fb_label, fb_model, messages,
temperature=temperature, max_tokens=max_tokens, temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=effective_timeout, tools=tools, timeout=effective_timeout,
extra_body=extra_body) extra_body=extra_body)
return fb_client.chat.completions.create(**fb_kwargs) return _validate_llm_response(
fb_client.chat.completions.create(**fb_kwargs), task)
raise raise
@@ -2229,7 +2371,7 @@ async def async_call_llm(
Same as call_llm() but async. See call_llm() for full documentation. Same as call_llm() but async. See call_llm() for full documentation.
""" """
resolved_provider, resolved_model, resolved_base_url, resolved_api_key = _resolve_task_provider_model( resolved_provider, resolved_model, resolved_base_url, resolved_api_key, resolved_api_mode = _resolve_task_provider_model(
task, provider, model, base_url, api_key) task, provider, model, base_url, api_key)
if task == "vision": if task == "vision":
@@ -2263,6 +2405,7 @@ async def async_call_llm(
async_mode=True, async_mode=True,
base_url=resolved_base_url, base_url=resolved_base_url,
api_key=resolved_api_key, api_key=resolved_api_key,
api_mode=resolved_api_mode,
) )
if client is None: if client is None:
_explicit = (resolved_provider or "").strip().lower() _explicit = (resolved_provider or "").strip().lower()
@@ -2273,11 +2416,9 @@ async def async_call_llm(
f"variable, or switch to a different provider with `hermes model`." f"variable, or switch to a different provider with `hermes model`."
) )
if not resolved_base_url: if not resolved_base_url:
logger.warning("Provider %s unavailable, falling back to openrouter", logger.info("Auxiliary %s: provider %s unavailable, trying auto-detection chain",
resolved_provider) task or "call", resolved_provider)
client, final_model = _get_cached_client( client, final_model = _get_cached_client("auto", async_mode=True)
"openrouter", resolved_model or _OPENROUTER_MODEL,
async_mode=True)
if client is None: if client is None:
raise RuntimeError( raise RuntimeError(
f"No LLM provider configured for task={task} provider={resolved_provider}. " f"No LLM provider configured for task={task} provider={resolved_provider}. "
@@ -2292,11 +2433,42 @@ async def async_call_llm(
base_url=resolved_base_url) base_url=resolved_base_url)
try: try:
return await client.chat.completions.create(**kwargs) return _validate_llm_response(
await client.chat.completions.create(**kwargs), task)
except Exception as first_err: except Exception as first_err:
err_str = str(first_err) err_str = str(first_err)
if "max_tokens" in err_str or "unsupported_parameter" in err_str: if "max_tokens" in err_str or "unsupported_parameter" in err_str:
kwargs.pop("max_tokens", None) kwargs.pop("max_tokens", None)
kwargs["max_completion_tokens"] = max_tokens kwargs["max_completion_tokens"] = max_tokens
return await client.chat.completions.create(**kwargs) try:
return _validate_llm_response(
await client.chat.completions.create(**kwargs), task)
except Exception as retry_err:
# If the max_tokens retry also hits a payment or connection
# error, fall through to the fallback chain below.
if not (_is_payment_error(retry_err) or _is_connection_error(retry_err)):
raise
first_err = retry_err
# ── Payment / connection fallback (mirrors sync call_llm) ─────
should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err)
is_auto = resolved_provider in ("auto", "", None)
if should_fallback and is_auto:
reason = "payment error" if _is_payment_error(first_err) else "connection error"
logger.info("Auxiliary %s (async): %s on %s (%s), trying fallback",
task or "call", reason, resolved_provider, first_err)
fb_client, fb_model, fb_label = _try_payment_fallback(
resolved_provider, task, reason=reason)
if fb_client is not None:
fb_kwargs = _build_call_kwargs(
fb_label, fb_model, messages,
temperature=temperature, max_tokens=max_tokens,
tools=tools, timeout=effective_timeout,
extra_body=extra_body)
# Convert sync fallback client to async
async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "")
if async_fb_model and async_fb_model != fb_kwargs.get("model"):
fb_kwargs["model"] = async_fb_model
return _validate_llm_response(
await async_fb.chat.completions.create(**fb_kwargs), task)
raise raise

View File

@@ -1080,6 +1080,42 @@ def select_provider_and_model(args=None):
elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"): elif selected_provider in ("gemini", "zai", "minimax", "minimax-cn", "kilocode", "opencode-zen", "opencode-go", "ai-gateway", "alibaba", "huggingface"):
_model_flow_api_key_provider(config, selected_provider, current_model) _model_flow_api_key_provider(config, selected_provider, current_model)
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
# When the user switches to a named provider (anything except "custom"),
# a leftover OPENAI_BASE_URL in ~/.hermes/.env can poison auxiliary
# clients that use provider:auto. Clear it proactively. (#5161)
if selected_provider not in ("custom", "cancel", "remove-custom") \
and not selected_provider.startswith("custom:"):
_clear_stale_openai_base_url()
def _clear_stale_openai_base_url():
"""Remove OPENAI_BASE_URL from ~/.hermes/.env if the active provider is not 'custom'.
After a provider switch, a leftover OPENAI_BASE_URL causes auxiliary
clients (compression, vision, delegation) with provider:auto to route
requests to the old custom endpoint instead of the newly selected
provider. See issue #5161.
"""
from hermes_cli.config import get_env_value, save_env_value, load_config
cfg = load_config()
model_cfg = cfg.get("model", {})
if isinstance(model_cfg, dict):
provider = (model_cfg.get("provider") or "").strip().lower()
else:
provider = ""
if provider == "custom" or not provider:
return # custom provider legitimately uses OPENAI_BASE_URL
stale_url = get_env_value("OPENAI_BASE_URL")
if stale_url:
save_env_value("OPENAI_BASE_URL", "")
print(f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url[:40]}...)"
if len(stale_url) > 40
else f"Cleared stale OPENAI_BASE_URL from .env (was: {stale_url})")
def _prompt_provider_choice(choices, *, default=0): def _prompt_provider_choice(choices, *, default=0):
"""Show provider selection menu with curses arrow-key navigation. """Show provider selection menu with curses arrow-key navigation.

View File

@@ -1,9 +1,10 @@
"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides.""" """Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""
import json import json
import logging
import os import os
from pathlib import Path from pathlib import Path
from unittest.mock import patch, MagicMock from unittest.mock import patch, MagicMock, AsyncMock
import pytest import pytest
@@ -14,6 +15,7 @@ from agent.auxiliary_client import (
resolve_provider_client, resolve_provider_client,
auxiliary_max_tokens_param, auxiliary_max_tokens_param,
call_llm, call_llm,
async_call_llm,
_read_codex_access_token, _read_codex_access_token,
_get_auxiliary_provider, _get_auxiliary_provider,
_get_provider_chain, _get_provider_chain,
@@ -1122,8 +1124,8 @@ class TestCallLlmPaymentFallback:
exc.status_code = 402 exc.status_code = 402
return exc return exc
def test_402_triggers_fallback(self, monkeypatch): def test_402_triggers_fallback_when_auto(self, monkeypatch):
"""When the primary provider returns 402, call_llm tries the next one.""" """When provider is auto and returns 402, call_llm tries the next one."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock() primary_client = MagicMock()
@@ -1136,7 +1138,7 @@ class TestCallLlmPaymentFallback:
with patch("agent.auxiliary_client._get_cached_client", with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "google/gemini-3-flash-preview")), \ return_value=(primary_client, "google/gemini-3-flash-preview")), \
patch("agent.auxiliary_client._resolve_task_provider_model", patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \ return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
patch("agent.auxiliary_client._try_payment_fallback", patch("agent.auxiliary_client._try_payment_fallback",
return_value=(fallback_client, "gpt-5.2-codex", "openai-codex")) as mock_fb: return_value=(fallback_client, "gpt-5.2-codex", "openai-codex")) as mock_fb:
result = call_llm( result = call_llm(
@@ -1145,13 +1147,62 @@ class TestCallLlmPaymentFallback:
) )
assert result is fallback_response assert result is fallback_response
mock_fb.assert_called_once_with("openrouter", "compression") mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
# Fallback call should use the fallback model # Fallback call should use the fallback model
fb_kwargs = fallback_client.chat.completions.create.call_args.kwargs fb_kwargs = fallback_client.chat.completions.create.call_args.kwargs
assert fb_kwargs["model"] == "gpt-5.2-codex" assert fb_kwargs["model"] == "gpt-5.2-codex"
def test_402_no_fallback_when_explicit_provider(self, monkeypatch):
"""When provider is explicitly configured (not auto), 402 should NOT fallback (#7559)."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock()
primary_client.chat.completions.create.side_effect = self._make_402_error()
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "local-model")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("custom", "local-model", None, None, None)), \
patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
with pytest.raises(Exception, match="insufficient credits"):
call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
# Fallback should NOT be attempted when provider is explicit
mock_fb.assert_not_called()
def test_connection_error_triggers_fallback_when_auto(self, monkeypatch):
"""Connection errors also trigger fallback when provider is auto."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock()
conn_err = Exception("Connection refused")
conn_err.status_code = None
primary_client.chat.completions.create.side_effect = conn_err
fallback_client = MagicMock()
fallback_response = MagicMock()
fallback_client.chat.completions.create.return_value = fallback_response
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "model")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "model", None, None, None)), \
patch("agent.auxiliary_client._is_connection_error", return_value=True), \
patch("agent.auxiliary_client._try_payment_fallback",
return_value=(fallback_client, "fb-model", "nous")) as mock_fb:
result = call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
assert result is fallback_response
mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
def test_non_payment_error_not_caught(self, monkeypatch): def test_non_payment_error_not_caught(self, monkeypatch):
"""Non-payment errors (500, connection, etc.) should NOT trigger fallback.""" """Non-payment/non-connection errors (500) should NOT trigger fallback."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock() primary_client = MagicMock()
@@ -1162,7 +1213,7 @@ class TestCallLlmPaymentFallback:
with patch("agent.auxiliary_client._get_cached_client", with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "google/gemini-3-flash-preview")), \ return_value=(primary_client, "google/gemini-3-flash-preview")), \
patch("agent.auxiliary_client._resolve_task_provider_model", patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("openrouter", "google/gemini-3-flash-preview", None, None)): return_value=("auto", "google/gemini-3-flash-preview", None, None, None)):
with pytest.raises(Exception, match="Internal Server Error"): with pytest.raises(Exception, match="Internal Server Error"):
call_llm( call_llm(
task="compression", task="compression",
@@ -1179,7 +1230,7 @@ class TestCallLlmPaymentFallback:
with patch("agent.auxiliary_client._get_cached_client", with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "google/gemini-3-flash-preview")), \ return_value=(primary_client, "google/gemini-3-flash-preview")), \
patch("agent.auxiliary_client._resolve_task_provider_model", patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("openrouter", "google/gemini-3-flash-preview", None, None)), \ return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
patch("agent.auxiliary_client._try_payment_fallback", patch("agent.auxiliary_client._try_payment_fallback",
return_value=(None, None, "")): return_value=(None, None, "")):
with pytest.raises(Exception, match="insufficient credits"): with pytest.raises(Exception, match="insufficient credits"):
@@ -1229,3 +1280,283 @@ def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
assert "anthropic" not in called, \ assert "anthropic" not in called, \
"_try_anthropic() should not be called when anthropic is not explicitly configured" "_try_anthropic() should not be called when anthropic is not explicitly configured"
# ---------------------------------------------------------------------------
# model="default" elimination (#7512)
# ---------------------------------------------------------------------------
class TestModelDefaultElimination:
"""_resolve_api_key_provider must skip providers without known aux models."""
def test_unknown_provider_skipped(self, monkeypatch):
"""Providers not in _API_KEY_PROVIDER_AUX_MODELS are skipped, not sent model='default'."""
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
# Verify our known providers have entries
assert "gemini" in _API_KEY_PROVIDER_AUX_MODELS
assert "kimi-coding" in _API_KEY_PROVIDER_AUX_MODELS
# A random provider_id not in the dict should return None
assert _API_KEY_PROVIDER_AUX_MODELS.get("totally-unknown-provider") is None
def test_known_provider_gets_real_model(self):
"""Known providers get a real model name, not 'default'."""
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
for provider_id, model in _API_KEY_PROVIDER_AUX_MODELS.items():
assert model != "default", f"{provider_id} should not map to 'default'"
assert isinstance(model, str) and model.strip(), \
f"{provider_id} should have a non-empty model string"
# ---------------------------------------------------------------------------
# _try_payment_fallback reason parameter (#7512 bug 3)
# ---------------------------------------------------------------------------
class TestTryPaymentFallbackReason:
"""_try_payment_fallback uses the reason parameter in log messages."""
def test_reason_parameter_passed_through(self, monkeypatch):
"""The reason= parameter is accepted without error."""
from agent.auxiliary_client import _try_payment_fallback
# Mock the provider chain to return nothing
monkeypatch.setattr(
"agent.auxiliary_client._get_provider_chain",
lambda: [],
)
monkeypatch.setattr(
"agent.auxiliary_client._read_main_provider",
lambda: "",
)
client, model, label = _try_payment_fallback(
"openrouter", task="compression", reason="connection error"
)
assert client is None
assert label == ""
# ---------------------------------------------------------------------------
# _is_connection_error coverage
# ---------------------------------------------------------------------------
class TestIsConnectionError:
"""Tests for _is_connection_error detection."""
def test_connection_refused(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Connection refused")
assert _is_connection_error(err) is True
def test_timeout(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Request timed out.")
assert _is_connection_error(err) is True
def test_dns_failure(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Name or service not known")
assert _is_connection_error(err) is True
def test_normal_api_error_not_connection(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Bad Request: invalid model")
err.status_code = 400
assert _is_connection_error(err) is False
def test_500_not_connection(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Internal Server Error")
err.status_code = 500
assert _is_connection_error(err) is False
# ---------------------------------------------------------------------------
# async_call_llm payment / connection fallback (#7512 bug 2)
# ---------------------------------------------------------------------------
class TestAsyncCallLlmFallback:
"""async_call_llm mirrors call_llm fallback behavior."""
def _make_402_error(self, msg="Payment Required: insufficient credits"):
exc = Exception(msg)
exc.status_code = 402
return exc
@pytest.mark.asyncio
async def test_402_triggers_async_fallback_when_auto(self, monkeypatch):
"""When provider is auto and returns 402, async_call_llm tries fallback."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock()
primary_client.chat.completions.create = AsyncMock(
side_effect=self._make_402_error())
# Fallback client (sync) returned by _try_payment_fallback
fb_sync_client = MagicMock()
fb_async_client = MagicMock()
fb_response = MagicMock()
fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "google/gemini-3-flash-preview")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "google/gemini-3-flash-preview", None, None, None)), \
patch("agent.auxiliary_client._try_payment_fallback",
return_value=(fb_sync_client, "gpt-5.2-codex", "openai-codex")) as mock_fb, \
patch("agent.auxiliary_client._to_async_client",
return_value=(fb_async_client, "gpt-5.2-codex")):
result = await async_call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
assert result is fb_response
mock_fb.assert_called_once_with("auto", "compression", reason="payment error")
@pytest.mark.asyncio
async def test_402_no_async_fallback_when_explicit(self, monkeypatch):
"""When provider is explicit, 402 should NOT trigger async fallback."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock()
primary_client.chat.completions.create = AsyncMock(
side_effect=self._make_402_error())
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "local-model")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("custom", "local-model", None, None, None)), \
patch("agent.auxiliary_client._try_payment_fallback") as mock_fb:
with pytest.raises(Exception, match="insufficient credits"):
await async_call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
mock_fb.assert_not_called()
@pytest.mark.asyncio
async def test_connection_error_triggers_async_fallback(self, monkeypatch):
"""Connection errors trigger async fallback when provider is auto."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock()
conn_err = Exception("Connection refused")
conn_err.status_code = None
primary_client.chat.completions.create = AsyncMock(side_effect=conn_err)
fb_sync_client = MagicMock()
fb_async_client = MagicMock()
fb_response = MagicMock()
fb_async_client.chat.completions.create = AsyncMock(return_value=fb_response)
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "model")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "model", None, None, None)), \
patch("agent.auxiliary_client._is_connection_error", return_value=True), \
patch("agent.auxiliary_client._try_payment_fallback",
return_value=(fb_sync_client, "fb-model", "nous")) as mock_fb, \
patch("agent.auxiliary_client._to_async_client",
return_value=(fb_async_client, "fb-model")):
result = await async_call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
assert result is fb_response
mock_fb.assert_called_once_with("auto", "compression", reason="connection error")
class TestStaleBaseUrlWarning:
"""_resolve_auto() warns when OPENAI_BASE_URL conflicts with config provider (#5161)."""
def test_warns_when_openai_base_url_set_with_named_provider(self, monkeypatch, caplog):
"""Warning fires when OPENAI_BASE_URL is set but provider is a named provider."""
import agent.auxiliary_client as mod
# Reset the module-level flag so the warning fires
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
_resolve_auto()
assert any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Expected a warning about stale OPENAI_BASE_URL"
assert mod._stale_base_url_warned is True
def test_no_warning_when_provider_is_custom(self, monkeypatch, caplog):
"""No warning when the provider is 'custom' — OPENAI_BASE_URL is expected."""
import agent.auxiliary_client as mod
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
with patch("agent.auxiliary_client._read_main_provider", return_value="custom"), \
patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
patch("agent.auxiliary_client._resolve_custom_runtime",
return_value=("http://localhost:11434/v1", "test-key", None)), \
patch("agent.auxiliary_client.OpenAI") as mock_openai, \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
mock_openai.return_value = MagicMock()
_resolve_auto()
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Should NOT warn when provider is 'custom'"
def test_no_warning_when_provider_is_named_custom(self, monkeypatch, caplog):
"""No warning when the provider is 'custom:myname' — base_url comes from config."""
import agent.auxiliary_client as mod
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
with patch("agent.auxiliary_client._read_main_provider", return_value="custom:ollama-local"), \
patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
patch("agent.auxiliary_client.resolve_provider_client",
return_value=(MagicMock(), "llama3")), \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
_resolve_auto()
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Should NOT warn when provider is 'custom:*'"
def test_no_warning_when_openai_base_url_not_set(self, monkeypatch, caplog):
"""No warning when OPENAI_BASE_URL is absent."""
import agent.auxiliary_client as mod
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
_resolve_auto()
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Should NOT warn when OPENAI_BASE_URL is not set"
def test_warning_only_fires_once(self, monkeypatch, caplog):
"""Warning is suppressed after the first invocation."""
import agent.auxiliary_client as mod
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
_resolve_auto()
caplog.clear()
_resolve_auto()
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Warning should not fire a second time"

View File

@@ -0,0 +1,75 @@
"""Tests for _clear_stale_openai_base_url() cleanup after provider switch (#5161)."""
from __future__ import annotations
from unittest.mock import patch
from hermes_cli.config import load_config, save_config, save_env_value, get_env_value
def _write_provider(provider: str, model: str = "test-model"):
"""Helper: write a provider + model to config.yaml."""
cfg = load_config()
model_cfg = cfg.get("model", {})
if not isinstance(model_cfg, dict):
model_cfg = {}
model_cfg["provider"] = provider
model_cfg["default"] = model
cfg["model"] = model_cfg
save_config(cfg)
class TestClearStaleOpenaiBaseUrl:
"""_clear_stale_openai_base_url() removes OPENAI_BASE_URL when provider is not custom."""
def test_clears_when_provider_is_named(self, monkeypatch):
"""OPENAI_BASE_URL is cleared when config provider is a named provider."""
from hermes_cli.main import _clear_stale_openai_base_url
_write_provider("openrouter")
save_env_value("OPENAI_BASE_URL", "http://localhost:11434/v1")
_clear_stale_openai_base_url()
result = get_env_value("OPENAI_BASE_URL")
assert not result, f"Expected OPENAI_BASE_URL to be cleared, got: {result!r}"
def test_preserves_when_provider_is_custom(self, monkeypatch):
"""OPENAI_BASE_URL is NOT cleared when config provider is 'custom'."""
from hermes_cli.main import _clear_stale_openai_base_url
_write_provider("custom")
save_env_value("OPENAI_BASE_URL", "http://localhost:11434/v1")
_clear_stale_openai_base_url()
result = get_env_value("OPENAI_BASE_URL")
assert result == "http://localhost:11434/v1", \
f"Expected OPENAI_BASE_URL to be preserved, got: {result!r}"
def test_noop_when_no_openai_base_url(self, monkeypatch):
"""No error when OPENAI_BASE_URL is not set."""
from hermes_cli.main import _clear_stale_openai_base_url
_write_provider("openrouter")
# Ensure it's not set
save_env_value("OPENAI_BASE_URL", "")
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
# Should not raise
_clear_stale_openai_base_url()
def test_noop_when_provider_empty(self, monkeypatch):
"""No cleanup when provider is not set in config."""
from hermes_cli.main import _clear_stale_openai_base_url
cfg = load_config()
cfg.pop("model", None)
save_config(cfg)
save_env_value("OPENAI_BASE_URL", "http://localhost:11434/v1")
_clear_stale_openai_base_url()
result = get_env_value("OPENAI_BASE_URL")
assert result == "http://localhost:11434/v1", \
"Should not clear when provider is not configured"