refactor(ai-gateway): single source of truth for model catalog (#13304)

Delete the stale literal `_PROVIDER_MODELS["ai-gateway"]` (gpt-5, gemini-2.5-pro, claude-4.5 — outdated the moment PR #13223 landed with its curated `AI_GATEWAY_MODELS` snapshot) and derive it from `AI_GATEWAY_MODELS` instead, so the picker tuples and the bare-id fallback catalog stay in sync automatically. Also fixes `get_default_model_for_provider('ai-gateway')` to return kimi-k2.6 (the curated recommendation) instead of claude-opus-4.6.
2026-04-28 06:51:16 +08:00 · 2026-04-20 22:21:21 -07:00
parent 70d7f79bef
commit b4edf9e6be
2 changed files with 15 additions and 23 deletions
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -72,7 +72,7 @@ _openrouter_catalog_cache: list[tuple[str, str]] | None = None
 # OSS / open-weight models prioritized first, then closed-source by family.
 # Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
 # zai/ and xai/ without hyphens).
-AI_GATEWAY_MODELS: list[tuple[str, str]] = [
+VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
    ("moonshotai/kimi-k2.6",                 "recommended"),
    ("alibaba/qwen3.6-plus",                 ""),
    ("zai/glm-5.1",                          ""),
@@ -300,20 +300,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
        "minimax-m2.7",
        "minimax-m2.5",
    ],
-    "ai-gateway": [
-        "anthropic/claude-opus-4.6",
-        "anthropic/claude-sonnet-4.6",
-        "anthropic/claude-sonnet-4.5",
-        "anthropic/claude-haiku-4.5",
-        "openai/gpt-5",
-        "openai/gpt-4.1",
-        "openai/gpt-4.1-mini",
-        "google/gemini-3-pro-preview",
-        "google/gemini-3-flash",
-        "google/gemini-2.5-pro",
-        "google/gemini-2.5-flash",
-        "deepseek/deepseek-v3.2",
-    ],
    "kilocode": [
        "anthropic/claude-opus-4.6",
        "anthropic/claude-sonnet-4.6",
@@ -366,6 +352,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
    ],
 }

+# Vercel AI Gateway: derive the bare-model-id catalog from the curated
+# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
+# and the static fallback catalog (bare ids) stay in sync from a single
+# source of truth.
+_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
+
 # ---------------------------------------------------------------------------
 # Nous Portal free-model filtering
 # ---------------------------------------------------------------------------
@@ -777,7 +769,7 @@ def fetch_ai_gateway_models(

    from hermes_constants import AI_GATEWAY_BASE_URL

-    fallback = list(AI_GATEWAY_MODELS)
+    fallback = list(VERCEL_AI_GATEWAY_MODELS)
    preferred_ids = [mid for mid, _ in fallback]

    try:
--- a/tests/hermes_cli/test_ai_gateway_models.py
+++ b/tests/hermes_cli/test_ai_gateway_models.py
@@ -10,7 +10,7 @@ from unittest.mock import patch, MagicMock

 from hermes_cli import models as models_module
 from hermes_cli.models import (
-    AI_GATEWAY_MODELS,
+    VERCEL_AI_GATEWAY_MODELS,
    _ai_gateway_model_is_free,
    fetch_ai_gateway_models,
    fetch_ai_gateway_pricing,
@@ -89,7 +89,7 @@ def test_ai_gateway_free_detector():

 def test_fetch_ai_gateway_models_filters_against_live_catalog():
    _reset_caches()
-    preferred = [mid for mid, _ in AI_GATEWAY_MODELS]
+    preferred = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
    live_ids = preferred[:3]  # only first three exist live
    payload = {
        "data": [
@@ -106,8 +106,8 @@ def test_fetch_ai_gateway_models_filters_against_live_catalog():

 def test_fetch_ai_gateway_models_tags_free_models():
    _reset_caches()
-    first_id = AI_GATEWAY_MODELS[0][0]
-    second_id = AI_GATEWAY_MODELS[1][0]
+    first_id = VERCEL_AI_GATEWAY_MODELS[0][0]
+    second_id = VERCEL_AI_GATEWAY_MODELS[1][0]
    payload = {
        "data": [
            {"id": first_id, "pricing": {"input": "0.001", "output": "0.002"}},
@@ -124,7 +124,7 @@ def test_fetch_ai_gateway_models_tags_free_models():

 def test_free_moonshot_model_auto_promoted_to_top_even_if_not_curated():
    _reset_caches()
-    first_curated = AI_GATEWAY_MODELS[0][0]
+    first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
    unlisted_free_moonshot = "moonshotai/kimi-coder-free-preview"
    payload = {
        "data": [
@@ -141,7 +141,7 @@ def test_free_moonshot_model_auto_promoted_to_top_even_if_not_curated():

 def test_paid_moonshot_does_not_get_auto_promoted():
    _reset_caches()
-    first_curated = AI_GATEWAY_MODELS[0][0]
+    first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
    payload = {
        "data": [
            {"id": first_curated, "pricing": {"input": "0.001", "output": "0.002"}},
@@ -158,4 +158,4 @@ def test_fetch_ai_gateway_models_falls_back_on_error():
    _reset_caches()
    with patch("urllib.request.urlopen", side_effect=OSError("network")):
        result = fetch_ai_gateway_models(force_refresh=True)
-    assert result == list(AI_GATEWAY_MODELS)
+    assert result == list(VERCEL_AI_GATEWAY_MODELS)