feat: centralized provider router + fix Codex vision bypass + vision error handling

Three interconnected fixes for auxiliary client infrastructure: 1. CENTRALIZED PROVIDER ROUTER (auxiliary_client.py) Add resolve_provider_client(provider, model, async_mode) — a single entry point for creating properly configured clients. Given a provider name and optional model, it handles auth lookup (env vars, OAuth tokens, auth.json), base URL resolution, provider-specific headers, and API format differences (Chat Completions vs Responses API for Codex). All auxiliary consumers should route through this instead of ad-hoc env var lookups. Refactored get_text_auxiliary_client, get_async_text_auxiliary_client, and get_vision_auxiliary_client to use the router internally. 2. FIX CODEX VISION BYPASS (vision_tools.py) vision_tools.py was constructing a raw AsyncOpenAI client from the sync vision client's api_key/base_url, completely bypassing the Codex Responses API adapter. When the vision provider resolved to Codex, the raw client would hit chatgpt.com/backend-api/codex with chat.completions.create() which only supports the Responses API. Fix: Added get_async_vision_auxiliary_client() which properly wraps Codex into AsyncCodexAuxiliaryClient. vision_tools.py now uses this instead of manual client construction. 3. FIX COMPRESSION FALLBACK + VISION ERROR HANDLING - context_compressor.py: Removed _get_fallback_client() which blindly looked for OPENAI_API_KEY + OPENAI_BASE_URL (fails for Codex OAuth, API-key providers, users without OPENAI_BASE_URL set). Replaced with fallback loop through resolve_provider_client() for each known provider, with same-provider dedup. - vision_tools.py: Added error detection for vision capability failures. Returns clear message to the model when the configured model doesn't support vision, instead of a generic error. Addresses #886
2026-04-28 06:51:16 +08:00 · 2026-03-11 19:46:47 -07:00
parent a8409a161f
commit 8805e705a7
3 changed files with 256 additions and 78 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -499,6 +499,188 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
    return None, None
 # ── Centralized Provider Router ─────────────────────────────────────────────
 #
 # resolve_provider_client() is the single entry point for creating a properly
 # configured client given a (provider, model) pair.  It handles auth lookup,
 # base URL resolution, provider-specific headers, and API format differences
 # (Chat Completions vs Responses API for Codex).
 #
 # All auxiliary consumer code should go through this or the public helpers
 # below — never look up auth env vars ad-hoc.
 def _to_async_client(sync_client, model: str):
    """Convert a sync client to its async counterpart, preserving Codex routing."""
    from openai import AsyncOpenAI
    if isinstance(sync_client, CodexAuxiliaryClient):
        return AsyncCodexAuxiliaryClient(sync_client), model
    async_kwargs = {
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
    base_lower = str(sync_client.base_url).lower()
    if "openrouter" in base_lower:
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    elif "api.kimi.com" in base_lower:
        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
    return AsyncOpenAI(**async_kwargs), model
 def resolve_provider_client(
    provider: str,
    model: str = None,
    async_mode: bool = False,
 ) -> Tuple[Optional[Any], Optional[str]]:
    """Central router: given a provider name and optional model, return a
    configured client with the correct auth, base URL, and API format.
    The returned client always exposes ``.chat.completions.create()`` — for
    Codex/Responses API providers, an adapter handles the translation
    transparently.
    Args:
        provider: Provider identifier.  One of:
            "openrouter", "nous", "openai-codex" (or "codex"),
            "zai", "kimi-coding", "minimax", "minimax-cn", "nous-api",
            "custom" (OPENAI_BASE_URL + OPENAI_API_KEY),
            "auto" (full auto-detection chain).
        model: Model slug override.  If None, uses the provider's default
               auxiliary model.
        async_mode: If True, return an async-compatible client.
    Returns:
        (client, resolved_model) or (None, None) if auth is unavailable.
    """
    # Normalise aliases
    provider = (provider or "auto").strip().lower()
    if provider == "codex":
        provider = "openai-codex"
    if provider == "main":
        provider = "custom"
    # ── Auto: try all providers in priority order ────────────────────
    if provider == "auto":
        client, resolved = _resolve_auto()
        if client is None:
            return None, None
        final_model = model or resolved
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))
    # ── OpenRouter ───────────────────────────────────────────────────
    if provider == "openrouter":
        client, default = _try_openrouter()
        if client is None:
            logger.warning("resolve_provider_client: openrouter requested "
                           "but OPENROUTER_API_KEY not set")
            return None, None
        final_model = model or default
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))
    # ── Nous Portal (OAuth) ──────────────────────────────────────────
    if provider == "nous":
        client, default = _try_nous()
        if client is None:
            logger.warning("resolve_provider_client: nous requested "
                           "but Nous Portal not configured (run: hermes login)")
            return None, None
        final_model = model or default
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))
    # ── OpenAI Codex (OAuth → Responses API) ─────────────────────────
    if provider == "openai-codex":
        client, default = _try_codex()
        if client is None:
            logger.warning("resolve_provider_client: openai-codex requested "
                           "but no Codex OAuth token found (run: hermes model)")
            return None, None
        final_model = model or default
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))
    # ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
    if provider == "custom":
        # Try custom first, then codex, then API-key providers
        for try_fn in (_try_custom_endpoint, _try_codex,
                       _resolve_api_key_provider):
            client, default = try_fn()
            if client is not None:
                final_model = model or default
                return (_to_async_client(client, final_model) if async_mode
                        else (client, final_model))
        logger.warning("resolve_provider_client: custom/main requested "
                       "but no endpoint credentials found")
        return None, None
    # ── API-key providers from PROVIDER_REGISTRY ─────────────────────
    try:
        from hermes_cli.auth import PROVIDER_REGISTRY, _resolve_kimi_base_url
    except ImportError:
        logger.debug("hermes_cli.auth not available for provider %s", provider)
        return None, None
    pconfig = PROVIDER_REGISTRY.get(provider)
    if pconfig is None:
        logger.warning("resolve_provider_client: unknown provider %r", provider)
        return None, None
    if pconfig.auth_type == "api_key":
        # Find the first configured API key
        api_key = ""
        for env_var in pconfig.api_key_env_vars:
            api_key = os.getenv(env_var, "").strip()
            if api_key:
                break
        if not api_key:
            logger.warning("resolve_provider_client: provider %s has no API "
                           "key configured (tried: %s)",
                           provider, ", ".join(pconfig.api_key_env_vars))
            return None, None
        # Resolve base URL (env override → provider-specific logic → default)
        base_url_override = os.getenv(pconfig.base_url_env_var, "").strip() if pconfig.base_url_env_var else ""
        if provider == "kimi-coding":
            base_url = _resolve_kimi_base_url(api_key, pconfig.inference_base_url, base_url_override)
        elif base_url_override:
            base_url = base_url_override
        else:
            base_url = pconfig.inference_base_url
        default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
        final_model = model or default_model
        # Provider-specific headers
        headers = {}
        if "api.kimi.com" in base_url.lower():
            headers["User-Agent"] = "KimiCLI/1.0"
        client = OpenAI(api_key=api_key, base_url=base_url,
                        **({"default_headers": headers} if headers else {}))
        logger.debug("resolve_provider_client: %s (%s)", provider, final_model)
        return (_to_async_client(client, final_model) if async_mode
                else (client, final_model))
    elif pconfig.auth_type in ("oauth_device_code", "oauth_external"):
        # OAuth providers — route through their specific try functions
        if provider == "nous":
            return resolve_provider_client("nous", model, async_mode)
        if provider == "openai-codex":
            return resolve_provider_client("openai-codex", model, async_mode)
        # nous-api is api_key type so it's handled above
        logger.warning("resolve_provider_client: OAuth provider %s not "
                       "directly supported, try 'auto'", provider)
        return None, None
    logger.warning("resolve_provider_client: unhandled auth_type %s for %s",
                   pconfig.auth_type, provider)
    return None, None
 # ── Public API ──────────────────────────────────────────────────────────────
 def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -513,8 +695,8 @@ def get_text_auxiliary_client(task: str = "") -> Tuple[Optional[OpenAI], Optiona
    """
    forced = _get_auxiliary_provider(task)
    if forced != "auto":
-        return _resolve_forced_provider(forced)
+        return resolve_provider_client(forced)
-    return _resolve_auto()
+    return resolve_provider_client("auto")
 def get_async_text_auxiliary_client(task: str = ""):
@@ -524,24 +706,10 @@ def get_async_text_auxiliary_client(task: str = ""):
    (AsyncCodexAuxiliaryClient, model) which wraps the Responses API.
    Returns (None, None) when no provider is available.
    """
-    from openai import AsyncOpenAI
+    forced = _get_auxiliary_provider(task)
-
+    if forced != "auto":
-    sync_client, model = get_text_auxiliary_client(task)
+        return resolve_provider_client(forced, async_mode=True)
-    if sync_client is None:
+    return resolve_provider_client("auto", async_mode=True)
        return None, None
    if isinstance(sync_client, CodexAuxiliaryClient):
        return AsyncCodexAuxiliaryClient(sync_client), model
    async_kwargs = {
        "api_key": sync_client.api_key,
        "base_url": str(sync_client.base_url),
    }
    if "openrouter" in str(sync_client.base_url).lower():
        async_kwargs["default_headers"] = dict(_OR_HEADERS)
    elif "api.kimi.com" in str(sync_client.base_url).lower():
        async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
    return AsyncOpenAI(**async_kwargs), model
 def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
@@ -559,7 +727,7 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    """
    forced = _get_auxiliary_provider("vision")
    if forced != "auto":
-        return _resolve_forced_provider(forced)
+        return resolve_provider_client(forced)
    # Auto: try providers known to support multimodal first, then fall
    # back to the user's custom endpoint.  Many local models (Qwen-VL,
    # LLaVA, Pixtral, etc.) support vision — skipping them entirely
@@ -573,6 +741,21 @@ def get_vision_auxiliary_client() -> Tuple[Optional[OpenAI], Optional[str]]:
    return None, None
 def get_async_vision_auxiliary_client():
    """Return (async_client, model_slug) for async vision consumers.
    Properly handles Codex routing — unlike manually constructing
    AsyncOpenAI from a sync client, this preserves the Responses API
    adapter for Codex providers.
    Returns (None, None) when no provider is available.
    """
    sync_client, model = get_vision_auxiliary_client()
    if sync_client is None:
        return None, None
    return _to_async_client(sync_client, model)
 def get_auxiliary_extra_body() -> dict:
    """Return extra_body kwargs for auxiliary API calls.
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -127,20 +127,38 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
            except Exception as e:
                logging.warning(f"Failed to generate context summary with auxiliary model: {e}")
-        # 2. Fallback: try the user's main model endpoint
+        # 2. Fallback: re-try via the centralized provider router.
-        fallback_client, fallback_model = self._get_fallback_client()
+        #    This covers all configured providers (Codex OAuth, API-key
-        if fallback_client is not None:
+        #    providers, etc.) without ad-hoc env var lookups.
        from agent.auxiliary_client import resolve_provider_client
        fallback_providers = ["custom", "openrouter", "nous", "codex"]
        for fb_provider in fallback_providers:
            try:
-                logger.info("Retrying context summary with main model (%s)", fallback_model)
+                fb_client, fb_model = resolve_provider_client(
-                summary = self._call_summary_model(fallback_client, fallback_model, prompt)
+                    fb_provider, model=self.model)
-                self.client = fallback_client
+                if fb_client is None:
-                self.summary_model = fallback_model
+                    continue
                # Don't retry the same client that just failed
                if (self.client is not None
                        and hasattr(fb_client, "base_url")
                        and hasattr(self.client, "base_url")
                        and str(fb_client.base_url) == str(self.client.base_url)):
                    continue
                logger.info("Retrying context summary with fallback provider "
                            "%s (%s)", fb_provider, fb_model)
                summary = self._call_summary_model(fb_client, fb_model, prompt)
                # Promote successful fallback for future compressions
                self.client = fb_client
                self.summary_model = fb_model
                return summary
            except Exception as fallback_err:
-                logging.warning(f"Main model summary also failed: {fallback_err}")
+                logging.warning("Fallback provider %s failed: %s",
                                fb_provider, fallback_err)
-        # 3. All models failed — return None so the caller drops turns without a summary
+        # 3. All providers failed — return None so the caller drops turns
-        logging.warning("Context compression: no model available for summary. Middle turns will be dropped without summary.")
+        #    without a summary.
        logging.warning("Context compression: no provider available for "
                        "summary. Middle turns will be dropped without summary.")
        return None
    def _call_summary_model(self, client, model: str, prompt: str) -> str:
@@ -170,35 +188,6 @@ Write only the summary, starting with "[CONTEXT SUMMARY]:" prefix."""
            summary = "[CONTEXT SUMMARY]: " + summary
        return summary
    def _get_fallback_client(self):
        """Try to build a fallback client from the main model's endpoint config.
        When the primary auxiliary client fails (e.g. stale OpenRouter key), this
        creates a client using the user's active custom endpoint (OPENAI_BASE_URL)
        so compression can still produce a real summary instead of a static string.
        Returns (client, model) or (None, None).
        """
        custom_base = os.getenv("OPENAI_BASE_URL")
        custom_key = os.getenv("OPENAI_API_KEY")
        if not custom_base or not custom_key:
            return None, None
        # Don't fallback to the same provider that just failed
        from hermes_constants import OPENROUTER_BASE_URL
        if custom_base.rstrip("/") == OPENROUTER_BASE_URL.rstrip("/"):
            return None, None
        model = os.getenv("LLM_MODEL") or os.getenv("OPENAI_MODEL") or self.model
        try:
            from openai import OpenAI as _OpenAI
            client = _OpenAI(api_key=custom_key, base_url=custom_base)
            logger.debug("Built fallback auxiliary client: %s via %s", model, custom_base)
            return client, model
        except Exception as exc:
            logger.debug("Could not build fallback auxiliary client: %s", exc)
            return None, None
    # ------------------------------------------------------------------
    # Tool-call / tool-result pair integrity helpers
    # ------------------------------------------------------------------
--- a/tools/vision_tools.py
+++ b/tools/vision_tools.py
@@ -37,27 +37,15 @@ from pathlib import Path
 from typing import Any, Awaitable, Dict, Optional
 from urllib.parse import urlparse
 import httpx
-from openai import AsyncOpenAI
+from agent.auxiliary_client import get_async_vision_auxiliary_client
 from agent.auxiliary_client import get_vision_auxiliary_client
 from tools.debug_helpers import DebugSession
 logger = logging.getLogger(__name__)
-# Resolve vision auxiliary client at module level; build an async wrapper.
+# Resolve vision auxiliary client at module level.
-_aux_sync_client, DEFAULT_VISION_MODEL = get_vision_auxiliary_client()
+# Uses get_async_vision_auxiliary_client() which properly handles Codex
-_aux_async_client: AsyncOpenAI | None = None
+# routing (Responses API adapter) instead of raw AsyncOpenAI construction.
-if _aux_sync_client is not None:
+_aux_async_client, DEFAULT_VISION_MODEL = get_async_vision_auxiliary_client()
    _async_kwargs = {
        "api_key": _aux_sync_client.api_key,
        "base_url": str(_aux_sync_client.base_url),
    }
    if "openrouter" in str(_aux_sync_client.base_url).lower():
        _async_kwargs["default_headers"] = {
            "HTTP-Referer": "https://github.com/NousResearch/hermes-agent",
            "X-OpenRouter-Title": "Hermes Agent",
                "X-OpenRouter-Categories": "productivity,cli-agent",
        }
    _aux_async_client = AsyncOpenAI(**_async_kwargs)
 _debug = DebugSession("vision_tools", env_var="VISION_TOOLS_DEBUG")
@@ -359,10 +347,28 @@ async def vision_analyze_tool(
        error_msg = f"Error analyzing image: {str(e)}"
        logger.error("%s", error_msg, exc_info=True)
        # Detect vision capability errors — give the model a clear message
        # so it can inform the user instead of a cryptic API error.
        err_str = str(e).lower()
        if any(hint in err_str for hint in (
            "does not support", "not support image", "invalid_request",
            "content_policy", "image_url", "multimodal",
            "unrecognized request argument", "image input",
        )):
            analysis = (
                f"{model} does not support vision or our request was not "
                f"accepted by the server. Error: {e}"
            )
        else:
            analysis = (
                "There was a problem with the request and the image could not "
                f"be analyzed. Error: {e}"
            )
        # Prepare error response
        result = {
            "success": False,
-            "analysis": "There was a problem with the request and the image could not be analyzed."
+            "analysis": analysis,
        }
        debug_call_data["error"] = error_msg