fix(vision): resolve Nous vision model correctly in auto-detect path

Two changes:
1. _PROVIDER_VISION_MODELS: add 'nous' -> 'xiaomi/mimo-v2-omni' entry
   so the vision auto-detect chain picks the correct multimodal model.

2. resolve_provider_client: detect when the requested model is a vision
   model (from _PROVIDER_VISION_MODELS or known vision model names) and
   pass vision=True to _try_nous().  Previously, _try_nous() was always
   called without vision=True in resolve_provider_client(), causing it to
   return the default text model (gemini-3-flash-preview or mimo-v2-pro)
   instead of the vision-capable mimo-v2-omni.

The _try_nous() function already handled free-tier vision correctly, but
the resolve_provider_client() path (used by the auto-detect vision chain)
never signaled that a vision task was in progress.

Verified: xiaomi/mimo-v2-omni returns HTTP 200 with image inputs on Nous
inference API. google/gemini-3-flash-preview returns 404 with images.
This commit is contained in:
Esteban
2026-04-19 20:08:03 +00:00
committed by Teknium
parent 3e1a3372ab
commit 0301787653

View File

@@ -152,6 +152,7 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
_PROVIDER_VISION_MODELS: Dict[str, str] = {
"xiaomi": "mimo-v2-omni",
"zai": "glm-5v-turbo",
"nous": "xiaomi/mimo-v2-omni",
}
# OpenRouter app attribution headers
@@ -933,20 +934,28 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]:
model = _NOUS_MODEL
# Free-tier users can't use paid auxiliary models — use the free
# models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks.
# For vision tasks, always use mimo-v2-omni regardless of tier —
# Nous inference API does not support image inputs for gemini models.
try:
from hermes_cli.models import check_nous_free_tier
if check_nous_free_tier():
model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL
logger.debug("Free-tier Nous account — using %s for auxiliary/%s",
model, "vision" if vision else "text")
elif vision:
model = _NOUS_FREE_TIER_VISION_MODEL
logger.debug("Nous vision task — using %s (gemini models lack "
"image support on Nous inference API)", model)
except Exception:
pass
if vision:
model = _NOUS_FREE_TIER_VISION_MODEL
if vision:
logger.debug("Nous vision: final model = %s", model)
if runtime is not None:
api_key, base_url = runtime
else:
api_key = _nous_api_key(nous or {})
base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/")
return (
OpenAI(
api_key=api_key,
@@ -1610,7 +1619,13 @@ def resolve_provider_client(
# ── Nous Portal (OAuth) ──────────────────────────────────────────
if provider == "nous":
client, default = _try_nous()
# Detect vision tasks: either explicit model override from
# _PROVIDER_VISION_MODELS, or caller passed a known vision model.
_is_vision = (
model in _PROVIDER_VISION_MODELS.values()
or (model or "").strip().lower() == "mimo-v2-omni"
)
client, default = _try_nous(vision=_is_vision)
if client is None:
logger.warning("resolve_provider_client: nous requested "
"but Nous Portal not configured (run: hermes auth)")