feat(auxiliary): default 'auto' routing to main model for all users (#11900)

Before: aggregator users (OpenRouter / Nous Portal) running 'auto'
routing for auxiliary tasks — compression, vision, web extraction,
session search, etc. — got routed to a cheap provider-side default
model (Gemini Flash).  Non-aggregator users already got their main
model.  Behavior was inconsistent and surprising — users picked
Claude / GPT / their preferred model, but side tasks ran on
Gemini Flash.

After: 'auto' means "use my main chat model" for every user,
regardless of provider type.  Only when the main provider has no
working client does the fallback chain run (OpenRouter → Nous →
custom → Codex → API-key providers).  Explicit per-task overrides
in config.yaml (auxiliary.<task>.provider / .model) still win —
they are a hard constraint, not subject to the auto policy.

Vision auto-detection follows the same policy: try main provider +
main model first (with _PROVIDER_VISION_MODELS overrides preserved
for providers like xiaomi and zai that ship a dedicated multimodal
model distinct from their chat model).  Aggregator strict vision
backends are fallbacks, not the primary path.

Changes:
  - agent/auxiliary_client.py: _resolve_auto() drops the
    `_AGGREGATOR_PROVIDERS` guard.  resolve_vision_provider_client()
    auto branch unifies aggregator and exotic-provider paths —
    everyone goes through resolve_provider_client() with main_model.
    Dead _AGGREGATOR_PROVIDERS constant removed (was only used by
    the guard we just removed).
  - hermes_cli/main.py: aux config menu copy updated to reflect
    the new semantics ("'auto' means 'use my main model'").
  - tests/agent/test_auxiliary_main_first.py: 12 regression tests
    covering OpenRouter/Nous/DeepSeek main paths, runtime-override
    wins, explicit-config wins, vision override preservation for
    exotic providers, and fallback-chain activation when the main
    provider has no working client.

Co-authored-by: teknium1 <teknium@nousresearch.com>
This commit is contained in:
Teknium
2026-04-17 19:13:23 -07:00
committed by GitHub
parent b449a0e049
commit a155b4a159
3 changed files with 352 additions and 37 deletions

View File

@@ -1075,8 +1075,6 @@ _AUTO_PROVIDER_LABELS = {
"_resolve_api_key_provider": "api-key",
}
_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
_MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")
@@ -1207,11 +1205,15 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
"""Full auto-detection chain.
Priority:
1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
use their main provider + main model directly. This ensures users on
Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
provider they already have credentials for — no OpenRouter key needed.
2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
1. User's main provider + main model, regardless of provider type.
This means auxiliary tasks (compression, vision, web extraction,
session search, etc.) use the same model the user configured for
chat. Users on OpenRouter/Nous get their chosen chat model; users
on DeepSeek/ZAI/Alibaba get theirs; etc. Running aux tasks on the
user's picked model keeps behavior predictable — no surprise
switches to a cheap fallback model for side tasks.
2. OpenRouter → Nous → custom → Codex → API-key providers (fallback
chain, only used when the main provider has no working client).
"""
global auxiliary_is_nous, _stale_base_url_warned
auxiliary_is_nous = False # Reset — _try_nous() will set True if it wins
@@ -1241,11 +1243,16 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
)
_stale_base_url_warned = True
# ── Step 1: non-aggregator main provider → use main model directly ──
# ── Step 1: main provider + main model → use them directly ──
#
# This is the primary aux backend for every user. "auto" means
# "use my main chat model for side tasks as well" — including users
# on aggregators (OpenRouter, Nous) who previously got routed to a
# cheap provider-side default. Explicit per-task overrides set via
# config.yaml (auxiliary.<task>.provider) still win over this.
main_provider = runtime_provider or _read_main_provider()
main_model = runtime_model or _read_main_model()
if (main_provider and main_model
and main_provider not in _AGGREGATOR_PROVIDERS
and main_provider not in ("auto", "")):
resolved_provider = main_provider
explicit_base_url = None
@@ -1828,34 +1835,31 @@ def resolve_vision_provider_client(
if requested == "auto":
# Vision auto-detection order:
# 1. Active provider + model (user's main chat config)
# 2. OpenRouter (known vision-capable default model)
# 3. Nous Portal (known vision-capable default model)
# 1. User's main provider + main model (including aggregators).
# _PROVIDER_VISION_MODELS provides per-provider vision model
# overrides when the provider has a dedicated multimodal model
# that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
# zai → glm-5v-turbo).
# 2. OpenRouter (vision-capable aggregator fallback)
# 3. Nous Portal (vision-capable aggregator fallback)
# 4. Stop
main_provider = _read_main_provider()
main_model = _read_main_model()
if main_provider and main_provider not in ("auto", ""):
if main_provider in _VISION_AUTO_PROVIDER_ORDER:
# Known strict backend — use its defaults.
sync_client, default_model = _resolve_strict_vision_backend(main_provider)
if sync_client is not None:
return _finalize(main_provider, sync_client, default_model)
else:
# Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
# Use provider-specific vision model if available, otherwise main model.
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using active provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
rpc_client, rpc_model = resolve_provider_client(
main_provider, vision_model,
api_mode=resolved_api_mode)
if rpc_client is not None:
logger.info(
"Vision auto-detect: using main provider %s (%s)",
main_provider, rpc_model or vision_model,
)
return _finalize(
main_provider, rpc_client, rpc_model or vision_model)
# Fall back through aggregators.
# Fall back through aggregators (uses their dedicated vision model,
# not the user's main model) when main provider has no client.
for candidate in _VISION_AUTO_PROVIDER_ORDER:
if candidate == main_provider:
continue # already tried above