fix(model): preserve custom endpoint credentials and accept cloud models not in /v1/models

When switching models on a custom endpoint (ollama-launch): - Same-provider switches no longer re-resolve credentials (fixes base_url being lost for 'custom' provider on subsequent switches) - Named providers (ollama-launch) are resolved via user_providers so switch_model can find their base_url from config - Models not in the /v1/models probe but present in the user's saved provider config are accepted with a warning instead of rejected - CLI /model and TUI /model both pass user_providers/custom_providers to switch_model so the config model list is available for validation Closes #15088
2026-04-28 06:51:16 +08:00 · 2026-04-25 14:10:42 +05:30
parent e5647d7863
commit 0d3d2a2631
4 changed files with 56 additions and 24 deletions
--- a/cli.py
+++ b/cli.py
@@ -5270,24 +5270,22 @@ class HermesCLI:
        # Parse --provider and --global flags
        model_input, explicit_provider, persist_global = parse_model_flags(raw_args)

+        # Load providers for switch_model (picker path needs them below)
        user_provs = None
        custom_provs = None
+        try:
+            from hermes_cli.config import get_compatible_custom_providers, load_config
+            cfg = load_config()
+            user_provs = cfg.get("providers")
+            custom_provs = get_compatible_custom_providers(cfg)
+        except Exception:
+            pass

        # No args at all: open prompt_toolkit-native picker modal
        if not model_input and not explicit_provider:
            model_display = self.model or "unknown"
            provider_display = get_label(self.provider) if self.provider else "unknown"

-            user_provs = None
-            custom_provs = None
-            try:
-                from hermes_cli.config import get_compatible_custom_providers, load_config
-                cfg = load_config()
-                user_provs = cfg.get("providers")
-                custom_provs = get_compatible_custom_providers(cfg)
-            except Exception:
-                pass
-
            try:
                providers = list_authenticated_providers(
                    current_provider=self.provider or "",
--- a/hermes_cli/model_switch.py
+++ b/hermes_cli/model_switch.py
@@ -831,9 +831,14 @@ def switch_model(
                requested=current_provider,
                target_model=new_model,
            )
-            api_key = runtime.get("api_key", "")
-            base_url = runtime.get("base_url", "")
-            api_mode = runtime.get("api_mode", "")
+            # If resolution fell through to "custom" (e.g. named custom provider like
+            # "ollama-launch" that resolve_runtime_provider doesn't know), keep existing
+            # credentials. Otherwise use the resolved values (picks up credential rotation,
+            # base_url adjustments for OpenCode, etc.).
+            if runtime.get("provider") != "custom":
+                api_key = runtime.get("api_key", "")
+                base_url = runtime.get("base_url", "")
+                api_mode = runtime.get("api_mode", "")
        except Exception:
            pass

@@ -867,16 +872,31 @@ def switch_model(
            "message": f"Could not validate `{new_model}`: {e}",
        }

+    # Override rejection if model is in the user's saved provider config.
+    # API /v1/models may not list cloud/aliased models even though the server supports them.
    if not validation.get("accepted"):
-        msg = validation.get("message", "Invalid model")
-        return ModelSwitchResult(
-            success=False,
-            new_model=new_model,
-            target_provider=target_provider,
-            provider_label=provider_label,
-            is_global=is_global,
-            error_message=msg,
-        )
+        override = False
+        if user_providers:
+            for up in user_providers:
+                if isinstance(up, dict) and up.get("provider") == target_provider:
+                    cfg_models = up.get("models", [])
+                    if new_model in cfg_models or any(
+                        m.get("name") == new_model for m in cfg_models if isinstance(m, dict)
+                    ):
+                        override = True
+                        break
+        if override:
+            validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")}
+        else:
+            msg = validation.get("message", "Invalid model")
+            return ModelSwitchResult(
+                success=False,
+                new_model=new_model,
+                target_provider=target_provider,
+                provider_label=provider_label,
+                is_global=is_global,
+                error_message=msg,
+            )

    # Apply auto-correction if validation found a closer match
    if validation.get("corrected_model"):
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -2571,8 +2571,8 @@ def validate_requested_model(
                )

            return {
-                "accepted": False,
-                "persist": False,
+                "accepted": True,
+                "persist": True,
                "recognized": False,
                "message": message,
            }
--- a/tui_gateway/server.py
+++ b/tui_gateway/server.py
@@ -712,6 +712,18 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        current_base_url = str(runtime.get("base_url", "") or "")
        current_api_key = str(runtime.get("api_key", "") or "")

+    # Load user-defined providers so switch_model can resolve named custom
+    # endpoints (e.g. "ollama-launch") and validate against saved model lists.
+    user_provs = None
+    custom_provs = None
+    try:
+        from hermes_cli.config import get_compatible_custom_providers, load_config
+        cfg = load_config()
+        user_provs = [{"provider": k, **v} for k, v in (cfg.get("providers") or {}).items()]
+        custom_provs = get_compatible_custom_providers(cfg)
+    except Exception:
+        pass
+
    result = switch_model(
        raw_input=model_input,
        current_provider=current_provider,
@@ -720,6 +732,8 @@ def _apply_model_switch(sid: str, session: dict, raw_input: str) -> dict:
        current_api_key=current_api_key,
        is_global=persist_global,
        explicit_provider=explicit_provider,
+        user_providers=user_provs,
+        custom_providers=custom_provs,
    )
    if not result.success:
        raise ValueError(result.error_message or "model switch failed")