feat: centralized provider router + fix Codex vision bypass + vision error handling

Three interconnected fixes for auxiliary client infrastructure:

1. CENTRALIZED PROVIDER ROUTER (auxiliary_client.py)
   Add resolve_provider_client(provider, model, async_mode) — a single
   entry point for creating properly configured clients. Given a provider
   name and optional model, it handles auth lookup (env vars, OAuth
   tokens, auth.json), base URL resolution, provider-specific headers,
   and API format differences (Chat Completions vs Responses API for
   Codex). All auxiliary consumers should route through this instead of
   ad-hoc env var lookups.

   Refactored get_text_auxiliary_client, get_async_text_auxiliary_client,
   and get_vision_auxiliary_client to use the router internally.

2. FIX CODEX VISION BYPASS (vision_tools.py)
   vision_tools.py was constructing a raw AsyncOpenAI client from the
   sync vision client's api_key/base_url, completely bypassing the Codex
   Responses API adapter. When the vision provider resolved to Codex,
   the raw client would hit chatgpt.com/backend-api/codex with
   chat.completions.create() which only supports the Responses API.

   Fix: Added get_async_vision_auxiliary_client() which properly wraps
   Codex into AsyncCodexAuxiliaryClient. vision_tools.py now uses this
   instead of manual client construction.

3. FIX COMPRESSION FALLBACK + VISION ERROR HANDLING
   - context_compressor.py: Removed _get_fallback_client() which blindly
     looked for OPENAI_API_KEY + OPENAI_BASE_URL (fails for Codex OAuth,
     API-key providers, users without OPENAI_BASE_URL set). Replaced
     with fallback loop through resolve_provider_client() for each
     known provider, with same-provider dedup.

   - vision_tools.py: Added error detection for vision capability
     failures. Returns clear message to the model when the configured
     model doesn't support vision, instead of a generic error.

Addresses #886
This commit is contained in:
teknium1
2026-03-11 19:46:47 -07:00
parent a8409a161f
commit 8805e705a7
3 changed files with 256 additions and 78 deletions

View File

@@ -499,6 +499,188 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None return None, None
# ── Centralized Provider Router ─────────────────────────────────────────────
#
# resolve_provider_client() is the single entry point for creating a properly
# configured client given a (provider, model) pair. It handles auth lookup,
# base URL resolution, provider-specific headers, and API format differences
# (Chat Completions vs Responses API for Codex).
#
# All auxiliary consumer code should go through this or the public helpers
# below — never look up auth env vars ad-hoc.
def _to_async_client(sync_client, model: str):
"""Convert a sync client to its async counterpart, preserving Codex routing."""
from openai import AsyncOpenAI
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
base_lower = str(sync_client.base_url).lower()
if "openrouter" in base_lower:
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif "api.kimi.com" in base_lower:
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
return AsyncOpenAI(**async_kwargs), model
def resolve_provider_client(
provider: str,
model: str = None,
async_mode: bool = False,
) -> Tuple[Optional[Any], Optional[str]]:
"""Central router: given a provider name and optional model, return a
configured client with the correct auth, base URL, and API format.
The returned client always exposes ``.chat.completions.create()`` — for
Codex/Responses API providers, an adapter handles the translation
transparently.
Args:
provider: Provider identifier. One of:
"openrouter", "nous", "openai-codex" (or "codex"),
"zai", "kimi-coding", "minimax", "minimax-cn", "nous-api",
"custom" (OPENAI_BASE_URL + OPENAI_API_KEY),
"auto" (full auto-detection chain).
model: Model slug override. If None, uses the provider's default
auxiliary model.
async_mode: If True, return an async-compatible client.
Returns:
(client, resolved_model) or (None, None) if auth is unavailable.
"""
# Normalise aliases
provider = (provider or "auto").strip().lower()
if provider == "codex":
provider = "openai-codex"
if provider == "main":
provider = "custom"
# ── Auto: try all providers in priority order ────────────────────
if provider == "auto":
client, resolved = _resolve_auto()
if client is None:
return None, None
final_model = model or resolved
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenRouter ───────────────────────────────────────────────────
if provider == "openrouter":
client, default = _try_openrouter()
if client is None:
logger.warning("resolve_provider_client: openrouter requested "
"but OPENROUTER_API_KEY not set")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Nous Portal (OAuth) ──────────────────────────────────────────
if provider == "nous":
client, default = _try_nous()
if client is None:
logger.warning("resolve_provider_client: nous requested "
"but Nous Portal not configured (run: hermes login)")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
if provider == "openai-codex":
client, default = _try_codex()
if client is None:
logger.warning("resolve_provider_client: openai-codex requested "
"but no Codex OAuth token found (run: hermes model)")
return None, None
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom":
# Try custom first, then codex, then API-key providers
for try_fn in (_try_custom_endpoint, _try_codex,
_resolve_api_key_provider):
client, default = try_fn()
if client is not None:
final_model = model or default
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
logger.warning("resolve_provider_client: custom/main requested "
"but no endpoint credentials found")
return None, None
# ── API-key providers from PROVIDER_REGISTRY ─────────────────────
try:
from hermes_cli.auth import PROVIDER_REGISTRY, _resolve_kimi_base_url
except ImportError:
logger.debug("hermes_cli.auth not available for provider %s", provider)
return None, None
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig is None:
logger.warning("resolve_provider_client: unknown provider %r", provider)
return None, None
if pconfig.auth_type == "api_key":
# Find the first configured API key
api_key = ""
for env_var in pconfig.api_key_env_vars:
api_key = os.getenv(env_var, "").strip()
if api_key:
break
if not api_key:
logger.warning("resolve_provider_client: provider %s has no API "
"key configured (tried: %s)",
provider, ", ".join(pconfig.api_key_env_vars))
return None, None
# Resolve base URL (env override → provider-specific logic → default)
base_url_override = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
if provider == "kimi-coding":
base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, base_url_override)
elif base_url_override:
base_url = base_url_override
else:
base_url = pconfig.inference_base_url
default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
final_model = model or default_model
# Provider-specific headers
headers = {}
if "api.kimi.com" in base_url.lower():
headers["User-Agent"] = "KimiCLI/1.0"
client = OpenAI(api_key=api_key, base_url=base_url,
**({"default_headers": headers} if headers else {}))
logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
return (_to_async_client(client, final_model) if async_mode
else (client, final_model))
elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
# OAuth providers — route through their specific try functions
if provider == "nous":
return resolve_provider_client("nous", model, async_mode)
if provider == "openai-codex":
return resolve_provider_client("openai-codex", model, async_mode)
# nous-api is api_key type so it's handled above
logger.warning("resolve_provider_client: OAuth provider %s not "
"directly supported, try 'auto'", provider)
return None, None
logger.warning("resolve_provider_client: unhandled auth_type %s for %s",
pconfig.auth_type, provider)
return None, None
# ── Public API ────────────────────────────────────────────────────────────── # ── Public API ──────────────────────────────────────────────────────────────
def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]: def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -513,8 +695,8 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
""" """
forced = _get_auxiliary_provider(task) forced = _get_auxiliary_provider(task)
if forced != "auto": if forced != "auto":
return _resolve_forced_provider(forced) return resolve_provider_client(forced)
return _resolve_auto() return resolve_provider_client("auto")
def get_async_text_auxiliary_client(task: str = ""): def get_async_text_auxiliary_client(task: str = ""):
@@ -524,24 +706,10 @@ def get_async_text_auxiliary_client(task: str = ""):
(AsyncCodexAuxiliaryClient, model) which wraps the Responses API. (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
Returns (None, None) when no provider is available. Returns (None, None) when no provider is available.
""" """
from openai import AsyncOpenAI forced = _get_auxiliary_provider(task)
if forced != "auto":
sync_client, model = get_text_auxiliary_client(task) return resolve_provider_client(forced, async_mode=True)
if sync_client is None: return resolve_provider_client("auto", async_mode=True)
return None, None
if isinstance(sync_client, CodexAuxiliaryClient):
return AsyncCodexAuxiliaryClient(sync_client), model
async_kwargs = {
"api_key": sync_client.api_key,
"base_url": str(sync_client.base_url),
}
if "openrouter" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = dict(_OR_HEADERS)
elif "api.kimi.com" in str(sync_client.base_url).lower():
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
return AsyncOpenAI(**async_kwargs), model
def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]: def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -559,7 +727,7 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
""" """
forced = _get_auxiliary_provider("vision") forced = _get_auxiliary_provider("vision")
if forced != "auto": if forced != "auto":
return _resolve_forced_provider(forced) return resolve_provider_client(forced)
# Auto: try providers known to support multimodal first, then fall # Auto: try providers known to support multimodal first, then fall
# back to the user's custom endpoint. Many local models (Qwen-VL, # back to the user's custom endpoint. Many local models (Qwen-VL,
# LLaVA, Pixtral, etc.) support vision — skipping them entirely # LLaVA, Pixtral, etc.) support vision — skipping them entirely
@@ -573,6 +741,21 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
return None, None return None, None
def get_async_vision_auxiliary_client():
"""Return (async_client, model_slug) for async vision consumers.
Properly handles Codex routing — unlike manually constructing
AsyncOpenAI from a sync client, this preserves the Responses API
adapter for Codex providers.
Returns (None, None) when no provider is available.
"""
sync_client, model = get_vision_auxiliary_client()
if sync_client is None:
return None, None
return _to_async_client(sync_client, model)
def get_auxiliary_extra_body() -> dict: def get_auxiliary_extra_body() -> dict:
"""Return extra_body kwargs for auxiliary API calls. """Return extra_body kwargs for auxiliary API calls.

View File

@@ -127,20 +127,38 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
except Exception as e: except Exception as e:
logging.warning(f"Failed to generate context summary with auxiliary model: {e}") logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
# 2. Fallback: try the user's main model endpoint # 2. Fallback: re-try via the centralized provider router.
fallback_client, fallback_model = self._get_fallback_client() # This covers all configured providers (Codex OAuth, API-key
if fallback_client is not None: # providers, etc.) without ad-hoc env var lookups.
from agent.auxiliary_client import resolve_provider_client
fallback_providers = ["custom", "openrouter", "nous", "codex"]
for fb_provider in fallback_providers:
try: try:
logger.info("Retrying context summary with main model (%s)", fallback_model) fb_client, fb_model = resolve_provider_client(
summary = self._call_summary_model(fallback_client, fallback_model, prompt) fb_provider, model=self.model)
self.client = fallback_client if fb_client is None:
self.summary_model = fallback_model continue
# Don't retry the same client that just failed
if (self.client is not None
and hasattr(fb_client, "base_url")
and hasattr(self.client, "base_url")
and str(fb_client.base_url) == str(self.client.base_url)):
continue
logger.info("Retrying context summary with fallback provider "
"%s (%s)", fb_provider, fb_model)
summary = self._call_summary_model(fb_client, fb_model, prompt)
# Promote successful fallback for future compressions
self.client = fb_client
self.summary_model = fb_model
return summary return summary
except Exception as fallback_err: except Exception as fallback_err:
logging.warning(f"Main model summary also failed: {fallback_err}") logging.warning("Fallback provider %s failed: %s",
fb_provider, fallback_err)
# 3. All models failed — return None so the caller drops turns without a summary # 3. All providers failed — return None so the caller drops turns
logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.") # without a summary.
logging.warning("Context compression: no provider available for "
"summary. Middle turns will be dropped without summary.")
return None return None
def _call_summary_model(self, client, model: str, prompt: str) -> str: def _call_summary_model(self, client, model: str, prompt: str) -> str:
@@ -170,35 +188,6 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
summary = "[CONTEXT SUMMARY]: " + summary summary = "[CONTEXT SUMMARY]: " + summary
return summary return summary
def _get_fallback_client(self):
"""Try to build a fallback client from the main model's endpoint config.
When the primary auxiliary client fails (e.g. stale OpenRouter key), this
creates a client using the user's active custom endpoint (OPENAI_BASE_URL)
so compression can still produce a real summary instead of a static string.
Returns (client, model) or (None, None).
"""
custom_base = os.getenv("OPENAI_BASE_URL")
custom_key = os.getenv("OPENAI_API_KEY")
if not custom_base or not custom_key:
return None, None
# Don't fallback to the same provider that just failed
from hermes_constants import OPENROUTER_BASE_URL
if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"):
return None, None
model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model
try:
from openai import OpenAI as _OpenAI
client = _OpenAI(api_key=custom_key, base_url=custom_base)
logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base)
return client, model
except Exception as exc:
logger.debug("Could not build fallback auxiliary client: %s", exc)
return None, None
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Tool-call / tool-result pair integrity helpers # Tool-call / tool-result pair integrity helpers
# ------------------------------------------------------------------ # ------------------------------------------------------------------

View File

@@ -37,27 +37,15 @@ from pathlib import Path
from typing import Any, Awaitable, Dict, Optional from typing import Any, Awaitable, Dict, Optional
from urllib.parse import urlparse from urllib.parse import urlparse
import httpx import httpx
from openai import AsyncOpenAI from agent.auxiliary_client import get_async_vision_auxiliary_client
from agent.auxiliary_client import get_vision_auxiliary_client
from tools.debug_helpers import DebugSession from tools.debug_helpers import DebugSession
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Resolve vision auxiliary client at module level; build an async wrapper. # Resolve vision auxiliary client at module level.
_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client() # Uses get_async_vision_auxiliary_client() which properly handles Codex
_aux_async_client: AsyncOpenAI | None = None # routing (Responses API adapter) instead of raw AsyncOpenAI construction.
if _aux_sync_client is not None: _aux_async_client, DEFAULT_VISION_MODEL = get_async_vision_auxiliary_client()
_async_kwargs = {
"api_key": _aux_sync_client.api_key,
"base_url": str(_aux_sync_client.base_url),
}
if "openrouter" in str(_aux_sync_client.base_url).lower():
_async_kwargs["default_headers"] = {
"HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
"X-OpenRouter-Title": "Hermes Agent",
"X-OpenRouter-Categories": "productivity,cli-agent",
}
_aux_async_client = AsyncOpenAI(**_async_kwargs)
_debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG") _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@@ -359,10 +347,28 @@ async def vision_analyze_tool(
error_msg = f"Error analyzing image: {str(e)}" error_msg = f"Error analyzing image: {str(e)}"
logger.error("%s", error_msg, exc_info=True) logger.error("%s", error_msg, exc_info=True)
# Detect vision capability errors — give the model a clear message
# so it can inform the user instead of a cryptic API error.
err_str = str(e).lower()
if any(hint in err_str for hint in (
"does not support", "not support image", "invalid_request",
"content_policy", "image_url", "multimodal",
"unrecognized request argument", "image input",
)):
analysis = (
f"{model} does not support vision or our request was not "
f"accepted by the server. Error: {e}"
)
else:
analysis = (
"There was a problem with the request and the image could not "
f"be analyzed. Error: {e}"
)
# Prepare error response # Prepare error response
result = { result = {
"success": False, "success": False,
"analysis": "There was a problem with the request and the image could not be analyzed." "analysis": analysis,
} }
debug_call_data["error"] = error_msg debug_call_data["error"] = error_msg