mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix: update Gemini model catalog + wire models.dev as live model source
Follow-up for salvaged PR #5494: - Update model catalog to Gemini 3.x + Gemma 4 (drop deprecated 2.0) - Add list_agentic_models() to models_dev.py with noise filter - Wire models.dev into _model_flow_api_key_provider as primary source (static curated list serves as offline fallback) - Add gemini -> google mapping in PROVIDER_TO_MODELS_DEV - Fix Gemma 4 context lengths to 256K (models.dev values) - Update auxiliary model to gemini-3-flash-preview - Expand tests: 3.x catalog, context lengths, models.dev integration
This commit is contained in:
@@ -55,7 +55,7 @@ logger = logging.getLogger(__name__)
|
||||
|
||||
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
|
||||
_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
|
||||
"gemini": "gemini-2.5-flash",
|
||||
"gemini": "gemini-3-flash-preview",
|
||||
"zai": "glm-4.5-flash",
|
||||
"kimi-coding": "kimi-k2-turbo-preview",
|
||||
"minimax": "MiniMax-M2.7-highspeed",
|
||||
|
||||
@@ -103,10 +103,8 @@ DEFAULT_CONTEXT_LENGTHS = {
|
||||
# Google
|
||||
"gemini": 1048576,
|
||||
# Gemma (open models served via AI Studio)
|
||||
"gemma-4-31b": 262144,
|
||||
"gemma-4-26b": 262144,
|
||||
"gemma-4-e4b": 131072,
|
||||
"gemma-4-e2b": 131072,
|
||||
"gemma-4-31b": 256000,
|
||||
"gemma-4-26b": 256000,
|
||||
"gemma-3": 131072,
|
||||
"gemma": 8192, # fallback for older gemma models
|
||||
# DeepSeek
|
||||
|
||||
@@ -160,6 +160,7 @@ PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
|
||||
"kilocode": "kilo",
|
||||
"fireworks": "fireworks-ai",
|
||||
"huggingface": "huggingface",
|
||||
"gemini": "google",
|
||||
"google": "google",
|
||||
"xai": "xai",
|
||||
"nvidia": "nvidia",
|
||||
@@ -422,6 +423,39 @@ def list_provider_models(provider: str) -> List[str]:
|
||||
return list(models.keys())
|
||||
|
||||
|
||||
# Patterns that indicate non-agentic or noise models (TTS, embedding,
|
||||
# dated preview snapshots, live/streaming-only, image-only).
|
||||
import re
|
||||
_NOISE_PATTERNS: re.Pattern = re.compile(
|
||||
r"-tts\b|embedding|live-|-(preview|exp)-\d{2,4}[-_]|"
|
||||
r"-image\b|-image-preview\b|-customtools\b",
|
||||
re.IGNORECASE,
|
||||
)
|
||||
|
||||
|
||||
def list_agentic_models(provider: str) -> List[str]:
|
||||
"""Return model IDs suitable for agentic use from models.dev.
|
||||
|
||||
Filters for tool_call=True and excludes noise (TTS, embedding,
|
||||
dated preview snapshots, live/streaming, image-only models).
|
||||
Returns an empty list on any failure.
|
||||
"""
|
||||
models = _get_provider_models(provider)
|
||||
if models is None:
|
||||
return []
|
||||
|
||||
result = []
|
||||
for mid, entry in models.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
if not entry.get("tool_call", False):
|
||||
continue
|
||||
if _NOISE_PATTERNS.search(mid):
|
||||
continue
|
||||
result.append(mid)
|
||||
return result
|
||||
|
||||
|
||||
def search_models_dev(
|
||||
query: str, provider: str = None, limit: int = 5
|
||||
) -> List[Dict[str, Any]]:
|
||||
|
||||
@@ -2211,24 +2211,37 @@ def _model_flow_api_key_provider(config, provider_id, current_model=""):
|
||||
save_env_value(base_url_env, override)
|
||||
effective_base = override
|
||||
|
||||
# Model selection — try live /models endpoint first, fall back to defaults.
|
||||
# Providers with large live catalogs (100+ models) use a curated list instead
|
||||
# so users see familiar model names rather than an overwhelming dump.
|
||||
# Model selection — resolution order:
|
||||
# 1. models.dev registry (cached, filtered for agentic/tool-capable models)
|
||||
# 2. Curated static fallback list (offline insurance)
|
||||
# 3. Live /models endpoint probe (small providers without models.dev data)
|
||||
curated = _PROVIDER_MODELS.get(provider_id, [])
|
||||
if curated and len(curated) >= 8:
|
||||
|
||||
# Try models.dev first — returns tool-capable models, filtered for noise
|
||||
mdev_models: list = []
|
||||
try:
|
||||
from agent.models_dev import list_agentic_models
|
||||
mdev_models = list_agentic_models(provider_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if mdev_models:
|
||||
model_list = mdev_models
|
||||
print(f" Found {len(model_list)} model(s) from models.dev registry")
|
||||
elif curated and len(curated) >= 8:
|
||||
# Curated list is substantial — use it directly, skip live probe
|
||||
live_models = None
|
||||
model_list = curated
|
||||
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
|
||||
else:
|
||||
api_key_for_probe = existing_key or (get_env_value(key_env) if key_env else "")
|
||||
live_models = fetch_api_models(api_key_for_probe, effective_base)
|
||||
|
||||
if live_models and len(live_models) >= len(curated):
|
||||
model_list = live_models
|
||||
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
|
||||
else:
|
||||
model_list = curated
|
||||
if model_list:
|
||||
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
|
||||
if live_models and len(live_models) >= len(curated):
|
||||
model_list = live_models
|
||||
print(f" Found {len(model_list)} model(s) from {pconfig.name} API")
|
||||
else:
|
||||
model_list = curated
|
||||
if model_list:
|
||||
print(f" Showing {len(model_list)} curated models — use \"Enter custom model name\" for others.")
|
||||
# else: no defaults either, will fall through to raw input
|
||||
|
||||
if provider_id in {"opencode-zen", "opencode-go"}:
|
||||
|
||||
@@ -112,15 +112,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
||||
"grok-code-fast-1",
|
||||
],
|
||||
"gemini": [
|
||||
"gemini-3.1-pro-preview",
|
||||
"gemini-3-flash-preview",
|
||||
"gemini-3.1-flash-lite-preview",
|
||||
"gemini-2.5-pro",
|
||||
"gemini-2.5-flash",
|
||||
"gemini-2.0-flash",
|
||||
"gemini-2.0-flash-lite",
|
||||
"gemini-2.5-flash-lite",
|
||||
# Gemma open models (also served via AI Studio)
|
||||
"gemma-4-31b-it",
|
||||
"gemma-4-26b-a4b-it",
|
||||
"gemma-4-e4b-it",
|
||||
"gemma-4-e2b-it",
|
||||
"gemma-4-26b-it",
|
||||
],
|
||||
"zai": [
|
||||
"glm-5",
|
||||
|
||||
@@ -112,8 +112,9 @@ _DEFAULT_PROVIDER_MODELS = {
|
||||
"grok-code-fast-1",
|
||||
],
|
||||
"gemini": [
|
||||
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.0-flash", "gemini-2.0-flash-lite",
|
||||
"gemma-4-31b-it", "gemma-4-26b-a4b-it", "gemma-4-e4b-it", "gemma-4-e2b-it",
|
||||
"gemini-3.1-pro-preview", "gemini-3-flash-preview", "gemini-3.1-flash-lite-preview",
|
||||
"gemini-2.5-pro", "gemini-2.5-flash", "gemini-2.5-flash-lite",
|
||||
"gemma-4-31b-it", "gemma-4-26b-it",
|
||||
],
|
||||
"zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
|
||||
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
|
||||
|
||||
@@ -8,6 +8,7 @@ from hermes_cli.auth import PROVIDER_REGISTRY, resolve_provider, resolve_api_key
|
||||
from hermes_cli.models import _PROVIDER_MODELS, _PROVIDER_LABELS, _PROVIDER_ALIASES, normalize_provider
|
||||
from hermes_cli.model_normalize import normalize_model_for_provider, detect_vendor
|
||||
from agent.model_metadata import get_model_context_length
|
||||
from agent.models_dev import PROVIDER_TO_MODELS_DEV, list_agentic_models, _NOISE_PATTERNS
|
||||
|
||||
|
||||
# ── Provider Registry ──
|
||||
@@ -131,6 +132,12 @@ class TestGeminiModelCatalog:
|
||||
assert "gemini-2.5-flash" in models
|
||||
assert "gemma-4-31b-it" in models
|
||||
|
||||
def test_provider_models_has_3x(self):
|
||||
models = _PROVIDER_MODELS["gemini"]
|
||||
assert "gemini-3.1-pro-preview" in models
|
||||
assert "gemini-3-flash-preview" in models
|
||||
assert "gemini-3.1-flash-lite-preview" in models
|
||||
|
||||
def test_provider_label(self):
|
||||
assert "gemini" in _PROVIDER_LABELS
|
||||
assert _PROVIDER_LABELS["gemini"] == "Google AI Studio"
|
||||
@@ -165,11 +172,15 @@ class TestGeminiModelNormalization:
|
||||
class TestGeminiContextLength:
|
||||
def test_gemma_4_31b_context(self):
|
||||
ctx = get_model_context_length("gemma-4-31b-it", provider="gemini")
|
||||
assert ctx == 262144
|
||||
assert ctx == 256000
|
||||
|
||||
def test_gemma_4_e4b_context(self):
|
||||
ctx = get_model_context_length("gemma-4-e4b-it", provider="gemini")
|
||||
assert ctx == 131072
|
||||
def test_gemma_4_26b_context(self):
|
||||
ctx = get_model_context_length("gemma-4-26b-it", provider="gemini")
|
||||
assert ctx == 256000
|
||||
|
||||
def test_gemini_3_context(self):
|
||||
ctx = get_model_context_length("gemini-3.1-pro-preview", provider="gemini")
|
||||
assert ctx == 1048576
|
||||
|
||||
|
||||
# ── Agent Init (no SyntaxError) ──
|
||||
@@ -195,3 +206,64 @@ class TestGeminiAgentInit:
|
||||
)
|
||||
assert agent.api_mode == "chat_completions"
|
||||
assert agent.provider == "gemini"
|
||||
|
||||
|
||||
# ── models.dev Integration ──
|
||||
|
||||
class TestGeminiModelsDev:
|
||||
def test_gemini_mapped_to_google(self):
|
||||
assert PROVIDER_TO_MODELS_DEV.get("gemini") == "google"
|
||||
|
||||
def test_noise_filter_excludes_tts(self):
|
||||
assert _NOISE_PATTERNS.search("gemini-2.5-pro-preview-tts")
|
||||
|
||||
def test_noise_filter_excludes_dated_preview(self):
|
||||
assert _NOISE_PATTERNS.search("gemini-2.5-flash-preview-04-17")
|
||||
|
||||
def test_noise_filter_excludes_embedding(self):
|
||||
assert _NOISE_PATTERNS.search("gemini-embedding-001")
|
||||
|
||||
def test_noise_filter_excludes_live(self):
|
||||
assert _NOISE_PATTERNS.search("gemini-live-2.5-flash")
|
||||
|
||||
def test_noise_filter_excludes_image(self):
|
||||
assert _NOISE_PATTERNS.search("gemini-2.5-flash-image")
|
||||
|
||||
def test_noise_filter_excludes_customtools(self):
|
||||
assert _NOISE_PATTERNS.search("gemini-3.1-pro-preview-customtools")
|
||||
|
||||
def test_noise_filter_passes_stable(self):
|
||||
assert not _NOISE_PATTERNS.search("gemini-2.5-flash")
|
||||
|
||||
def test_noise_filter_passes_preview(self):
|
||||
# Non-dated preview (e.g. gemini-3-flash-preview) should pass
|
||||
assert not _NOISE_PATTERNS.search("gemini-3-flash-preview")
|
||||
|
||||
def test_noise_filter_passes_gemma(self):
|
||||
assert not _NOISE_PATTERNS.search("gemma-4-31b-it")
|
||||
|
||||
def test_list_agentic_models_with_mock_data(self):
|
||||
"""list_agentic_models filters correctly from mock models.dev data."""
|
||||
mock_data = {
|
||||
"google": {
|
||||
"models": {
|
||||
"gemini-3-flash-preview": {"tool_call": True},
|
||||
"gemini-2.5-pro": {"tool_call": True},
|
||||
"gemini-embedding-001": {"tool_call": False},
|
||||
"gemini-2.5-flash-preview-tts": {"tool_call": False},
|
||||
"gemini-live-2.5-flash": {"tool_call": True},
|
||||
"gemini-2.5-flash-preview-04-17": {"tool_call": True},
|
||||
"gemma-4-31b-it": {"tool_call": True},
|
||||
}
|
||||
}
|
||||
}
|
||||
with patch("agent.models_dev.fetch_models_dev", return_value=mock_data):
|
||||
result = list_agentic_models("gemini")
|
||||
assert "gemini-3-flash-preview" in result
|
||||
assert "gemini-2.5-pro" in result
|
||||
assert "gemma-4-31b-it" in result
|
||||
# Filtered out:
|
||||
assert "gemini-embedding-001" not in result # no tool_call
|
||||
assert "gemini-2.5-flash-preview-tts" not in result # no tool_call
|
||||
assert "gemini-live-2.5-flash" not in result # noise: live-
|
||||
assert "gemini-2.5-flash-preview-04-17" not in result # noise: dated preview
|
||||
|
||||
Reference in New Issue
Block a user