refactor(ai-gateway): single source of truth for model catalog (#13304)

Delete the stale literal `_PROVIDER_MODELS["ai-gateway"]` (gpt-5,
gemini-2.5-pro, claude-4.5 — outdated the moment PR #13223 landed with
its curated `AI_GATEWAY_MODELS` snapshot) and derive it from
`AI_GATEWAY_MODELS` instead, so the picker tuples and the bare-id
fallback catalog stay in sync automatically. Also fixes
`get_default_model_for_provider('ai-gateway')` to return kimi-k2.6
(the curated recommendation) instead of claude-opus-4.6.
This commit is contained in:
Teknium
2026-04-20 22:21:21 -07:00
committed by GitHub
parent 70d7f79bef
commit b4edf9e6be
2 changed files with 15 additions and 23 deletions

View File

@@ -72,7 +72,7 @@ _openrouter_catalog_cache: list[tuple[str, str]] | None = None
# OSS / open-weight models prioritized first, then closed-source by family.
# Slugs match Vercel's actual /v1/models catalog (e.g. alibaba/ for Qwen,
# zai/ and xai/ without hyphens).
AI_GATEWAY_MODELS: list[tuple[str, str]] = [
VERCEL_AI_GATEWAY_MODELS: list[tuple[str, str]] = [
("moonshotai/kimi-k2.6", "recommended"),
("alibaba/qwen3.6-plus", ""),
("zai/glm-5.1", ""),
@@ -300,20 +300,6 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"minimax-m2.7",
"minimax-m2.5",
],
"ai-gateway": [
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-haiku-4.5",
"openai/gpt-5",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"google/gemini-3-pro-preview",
"google/gemini-3-flash",
"google/gemini-2.5-pro",
"google/gemini-2.5-flash",
"deepseek/deepseek-v3.2",
],
"kilocode": [
"anthropic/claude-opus-4.6",
"anthropic/claude-sonnet-4.6",
@@ -366,6 +352,12 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
],
}
# Vercel AI Gateway: derive the bare-model-id catalog from the curated
# ``VERCEL_AI_GATEWAY_MODELS`` snapshot so both the picker (tuples with descriptions)
# and the static fallback catalog (bare ids) stay in sync from a single
# source of truth.
_PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
# ---------------------------------------------------------------------------
# Nous Portal free-model filtering
# ---------------------------------------------------------------------------
@@ -777,7 +769,7 @@ def fetch_ai_gateway_models(
from hermes_constants import AI_GATEWAY_BASE_URL
fallback = list(AI_GATEWAY_MODELS)
fallback = list(VERCEL_AI_GATEWAY_MODELS)
preferred_ids = [mid for mid, _ in fallback]
try:

View File

@@ -10,7 +10,7 @@ from unittest.mock import patch, MagicMock
from hermes_cli import models as models_module
from hermes_cli.models import (
AI_GATEWAY_MODELS,
VERCEL_AI_GATEWAY_MODELS,
_ai_gateway_model_is_free,
fetch_ai_gateway_models,
fetch_ai_gateway_pricing,
@@ -89,7 +89,7 @@ def test_ai_gateway_free_detector():
def test_fetch_ai_gateway_models_filters_against_live_catalog():
_reset_caches()
preferred = [mid for mid, _ in AI_GATEWAY_MODELS]
preferred = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS]
live_ids = preferred[:3] # only first three exist live
payload = {
"data": [
@@ -106,8 +106,8 @@ def test_fetch_ai_gateway_models_filters_against_live_catalog():
def test_fetch_ai_gateway_models_tags_free_models():
_reset_caches()
first_id = AI_GATEWAY_MODELS[0][0]
second_id = AI_GATEWAY_MODELS[1][0]
first_id = VERCEL_AI_GATEWAY_MODELS[0][0]
second_id = VERCEL_AI_GATEWAY_MODELS[1][0]
payload = {
"data": [
{"id": first_id, "pricing": {"input": "0.001", "output": "0.002"}},
@@ -124,7 +124,7 @@ def test_fetch_ai_gateway_models_tags_free_models():
def test_free_moonshot_model_auto_promoted_to_top_even_if_not_curated():
_reset_caches()
first_curated = AI_GATEWAY_MODELS[0][0]
first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
unlisted_free_moonshot = "moonshotai/kimi-coder-free-preview"
payload = {
"data": [
@@ -141,7 +141,7 @@ def test_free_moonshot_model_auto_promoted_to_top_even_if_not_curated():
def test_paid_moonshot_does_not_get_auto_promoted():
_reset_caches()
first_curated = AI_GATEWAY_MODELS[0][0]
first_curated = VERCEL_AI_GATEWAY_MODELS[0][0]
payload = {
"data": [
{"id": first_curated, "pricing": {"input": "0.001", "output": "0.002"}},
@@ -158,4 +158,4 @@ def test_fetch_ai_gateway_models_falls_back_on_error():
_reset_caches()
with patch("urllib.request.urlopen", side_effect=OSError("network")):
result = fetch_ai_gateway_models(force_refresh=True)
assert result == list(AI_GATEWAY_MODELS)
assert result == list(VERCEL_AI_GATEWAY_MODELS)