feat(auxiliary): default 'auto' routing to main model for all users (#11900)

Before: aggregator users (OpenRouter / Nous Portal) running 'auto' routing for auxiliary tasks — compression, vision, web extraction, session search, etc. — got routed to a cheap provider-side default model (Gemini Flash). Non-aggregator users already got their main model. Behavior was inconsistent and surprising — users picked Claude / GPT / their preferred model, but side tasks ran on Gemini Flash. After: 'auto' means "use my main chat model" for every user, regardless of provider type. Only when the main provider has no working client does the fallback chain run (OpenRouter → Nous → custom → Codex → API-key providers). Explicit per-task overrides in config.yaml (auxiliary.<task>.provider / .model) still win — they are a hard constraint, not subject to the auto policy. Vision auto-detection follows the same policy: try main provider + main model first (with _PROVIDER_VISION_MODELS overrides preserved for providers like xiaomi and zai that ship a dedicated multimodal model distinct from their chat model). Aggregator strict vision backends are fallbacks, not the primary path. Changes: - agent/auxiliary_client.py: _resolve_auto() drops the `_AGGREGATOR_PROVIDERS` guard. resolve_vision_provider_client() auto branch unifies aggregator and exotic-provider paths — everyone goes through resolve_provider_client() with main_model. Dead _AGGREGATOR_PROVIDERS constant removed (was only used by the guard we just removed). - hermes_cli/main.py: aux config menu copy updated to reflect the new semantics ("'auto' means 'use my main model'"). - tests/agent/test_auxiliary_main_first.py: 12 regression tests covering OpenRouter/Nous/DeepSeek main paths, runtime-override wins, explicit-config wins, vision override preservation for exotic providers, and fallback-chain activation when the main provider has no working client. Co-authored-by: teknium1 <teknium@nousresearch.com>
2026-04-28 06:51:16 +08:00 · 2026-04-17 19:13:23 -07:00
parent b449a0e049
commit a155b4a159
3 changed files with 352 additions and 37 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -1075,8 +1075,6 @@ _AUTO_PROVIDER_LABELS = {
    "_resolve_api_key_provider": "api-key",
 }

-_AGGREGATOR_PROVIDERS = frozenset({"openrouter", "nous"})
-
 _MAIN_RUNTIME_FIELDS = ("provider", "model", "base_url", "api_key", "api_mode")


@@ -1207,11 +1205,15 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
    """Full auto-detection chain.

    Priority:
-      1. If the user's main provider is NOT an aggregator (OpenRouter / Nous),
-         use their main provider + main model directly.  This ensures users on
-         Alibaba, DeepSeek, ZAI, etc. get auxiliary tasks handled by the same
-         provider they already have credentials for — no OpenRouter key needed.
-      2. OpenRouter → Nous → custom → Codex → API-key providers (original chain).
+      1. User's main provider + main model, regardless of provider type.
+         This means auxiliary tasks (compression, vision, web extraction,
+         session search, etc.) use the same model the user configured for
+         chat.  Users on OpenRouter/Nous get their chosen chat model; users
+         on DeepSeek/ZAI/Alibaba get theirs; etc.  Running aux tasks on the
+         user's picked model keeps behavior predictable — no surprise
+         switches to a cheap fallback model for side tasks.
+      2. OpenRouter → Nous → custom → Codex → API-key providers (fallback
+         chain, only used when the main provider has no working client).
    """
    global auxiliary_is_nous, _stale_base_url_warned
    auxiliary_is_nous = False  # Reset — _try_nous() will set True if it wins
@@ -1241,11 +1243,16 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
            )
            _stale_base_url_warned = True

-    # ── Step 1: non-aggregator main provider → use main model directly ──
+    # ── Step 1: main provider + main model → use them directly ──
+    #
+    # This is the primary aux backend for every user.  "auto" means
+    # "use my main chat model for side tasks as well" — including users
+    # on aggregators (OpenRouter, Nous) who previously got routed to a
+    # cheap provider-side default.  Explicit per-task overrides set via
+    # config.yaml (auxiliary.<task>.provider) still win over this.
    main_provider = runtime_provider or _read_main_provider()
    main_model = runtime_model or _read_main_model()
    if (main_provider and main_model
-            and main_provider not in _AGGREGATOR_PROVIDERS
            and main_provider not in ("auto", "")):
        resolved_provider = main_provider
        explicit_base_url = None
@@ -1828,34 +1835,31 @@ def resolve_vision_provider_client(

    if requested == "auto":
        # Vision auto-detection order:
-        #   1. Active provider + model (user's main chat config)
-        #   2. OpenRouter  (known vision-capable default model)
-        #   3. Nous Portal (known vision-capable default model)
+        #   1. User's main provider + main model (including aggregators).
+        #      _PROVIDER_VISION_MODELS provides per-provider vision model
+        #      overrides when the provider has a dedicated multimodal model
+        #      that differs from the chat model (e.g. xiaomi → mimo-v2-omni,
+        #      zai → glm-5v-turbo).
+        #   2. OpenRouter  (vision-capable aggregator fallback)
+        #   3. Nous Portal (vision-capable aggregator fallback)
        #   4. Stop
        main_provider = _read_main_provider()
        main_model = _read_main_model()
        if main_provider and main_provider not in ("auto", ""):
-            if main_provider in _VISION_AUTO_PROVIDER_ORDER:
-                # Known strict backend — use its defaults.
-                sync_client, default_model = _resolve_strict_vision_backend(main_provider)
-                if sync_client is not None:
-                    return _finalize(main_provider, sync_client, default_model)
-            else:
-                # Exotic provider (DeepSeek, Alibaba, Xiaomi, named custom, etc.)
-                # Use provider-specific vision model if available, otherwise main model.
-                vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
-                rpc_client, rpc_model = resolve_provider_client(
-                    main_provider, vision_model,
-                    api_mode=resolved_api_mode)
-                if rpc_client is not None:
-                    logger.info(
-                        "Vision auto-detect: using active provider %s (%s)",
-                        main_provider, rpc_model or vision_model,
-                    )
-                    return _finalize(
-                        main_provider, rpc_client, rpc_model or vision_model)
+            vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model)
+            rpc_client, rpc_model = resolve_provider_client(
+                main_provider, vision_model,
+                api_mode=resolved_api_mode)
+            if rpc_client is not None:
+                logger.info(
+                    "Vision auto-detect: using main provider %s (%s)",
+                    main_provider, rpc_model or vision_model,
+                )
+                return _finalize(
+                    main_provider, rpc_client, rpc_model or vision_model)

-        # Fall back through aggregators.
+        # Fall back through aggregators (uses their dedicated vision model,
+        # not the user's main model) when main provider has no client.
        for candidate in _VISION_AUTO_PROVIDER_ORDER:
            if candidate == main_provider:
                continue  # already tried above