Files
hermes-agent/tests/hermes_cli/test_model_normalize.py

264 lines
11 KiB
Python
Raw Normal View History

"""Tests for hermes_cli.model_normalize — provider-aware model name normalization.
Covers issue #5211: opencode-go model names with dots (e.g. minimax-m2.7)
must NOT be mangled to hyphens (minimax-m2-7).
"""
import pytest
from hermes_cli.model_normalize import (
normalize_model_for_provider,
_DOT_TO_HYPHEN_PROVIDERS,
_AGGREGATOR_PROVIDERS,
fix(model-normalize): pass DeepSeek V-series IDs through instead of folding to deepseek-chat `_normalize_for_deepseek` was mapping every non-reasoner input into `deepseek-chat` on the assumption that DeepSeek's API accepts only two model IDs. That assumption no longer holds — `deepseek-v4-pro` and `deepseek-v4-flash` are first-class IDs accepted by the direct API, and on aggregators `deepseek-chat` routes explicitly to V3 (DeepInfra backend returns `deepseek-chat-v3`). So a user picking V4 Pro through the model picker was being silently downgraded to V3. Verified 2026-04-24 against Nous portal's OpenAI-compat surface: - `deepseek/deepseek-v4-flash` → provider: DeepSeek, model: deepseek-v4-flash-20260423 - `deepseek/deepseek-chat` → provider: DeepInfra, model: deepseek/deepseek-chat-v3 Fix: - Add `deepseek-v4-pro` and `deepseek-v4-flash` to `_DEEPSEEK_CANONICAL_MODELS` so exact matches pass through. - Add `_DEEPSEEK_V_SERIES_RE` (`^deepseek-v\d+(...)?$`) so future V-series IDs (`deepseek-v5-*`, dated variants) keep passing through without another code change. - Update docstring + module header to reflect the new rule. Tests: - New `TestDeepseekVSeriesPassThrough` — 8 parametrized cases covering bare, vendor-prefixed, case-variant, dated, and future V-series IDs plus end-to-end `normalize_model_for_provider(..., "deepseek")`. - New `TestDeepseekCanonicalAndReasonerMapping` — regression coverage for canonical pass-through, reasoner-keyword folding, and fall-back-to-chat behaviour. - 77/77 pass. Reported on Discord (Ufonik, Don Piedro): `/model > Deepseek > deepseek-v4-pro` surfaced `Normalized 'deepseek-v4-pro' to 'deepseek-chat'`. Picker listing showed the v4 names, so validation also rejected the post-normalize `deepseek-chat` as "not in provider listing" — the contradiction users saw. Normalizer now respects the picker's choice.
2026-04-24 15:14:53 +03:00
_normalize_for_deepseek,
detect_vendor,
)
# ── Regression: issue #5211 ────────────────────────────────────────────
class TestIssue5211OpenCodeGoDotPreservation:
"""OpenCode Go model names with dots must pass through unchanged."""
@pytest.mark.parametrize("model,expected", [
("minimax-m2.7", "minimax-m2.7"),
("minimax-m2.5", "minimax-m2.5"),
("glm-4.5", "glm-4.5"),
("kimi-k2.5", "kimi-k2.5"),
("some-model-1.0.3", "some-model-1.0.3"),
])
def test_opencode_go_preserves_dots(self, model, expected):
result = normalize_model_for_provider(model, "opencode-go")
assert result == expected, f"Expected {expected!r}, got {result!r}"
def test_opencode_go_not_in_dot_to_hyphen_set(self):
"""opencode-go must NOT be in the dot-to-hyphen provider set."""
assert "opencode-go" not in _DOT_TO_HYPHEN_PROVIDERS
# ── Anthropic dot-to-hyphen conversion (regression) ────────────────────
class TestAnthropicDotToHyphen:
"""Anthropic API still needs dots→hyphens."""
@pytest.mark.parametrize("model,expected", [
("claude-sonnet-4.6", "claude-sonnet-4-6"),
("claude-opus-4.5", "claude-opus-4-5"),
])
def test_anthropic_converts_dots(self, model, expected):
result = normalize_model_for_provider(model, "anthropic")
assert result == expected
def test_anthropic_strips_vendor_prefix(self):
result = normalize_model_for_provider("anthropic/claude-sonnet-4.6", "anthropic")
assert result == "claude-sonnet-4-6"
# ── OpenCode Zen regression ────────────────────────────────────────────
class TestOpenCodeZenModelNormalization:
"""OpenCode Zen preserves dots for most models, but Claude stays hyphenated."""
@pytest.mark.parametrize("model,expected", [
("claude-sonnet-4.6", "claude-sonnet-4-6"),
("opencode-zen/claude-opus-4.5", "claude-opus-4-5"),
("glm-4.5", "glm-4.5"),
("glm-5.1", "glm-5.1"),
("gpt-5.4", "gpt-5.4"),
("minimax-m2.5-free", "minimax-m2.5-free"),
("kimi-k2.5", "kimi-k2.5"),
])
def test_zen_normalizes_models(self, model, expected):
result = normalize_model_for_provider(model, "opencode-zen")
assert result == expected
def test_zen_strips_vendor_prefix(self):
result = normalize_model_for_provider("opencode-zen/claude-sonnet-4.6", "opencode-zen")
assert result == "claude-sonnet-4-6"
def test_zen_strips_vendor_prefix_for_non_claude(self):
result = normalize_model_for_provider("opencode-zen/glm-5.1", "opencode-zen")
assert result == "glm-5.1"
# ── Copilot dot preservation (regression) ──────────────────────────────
class TestCopilotDotPreservation:
"""Copilot preserves dots in model names."""
@pytest.mark.parametrize("model,expected", [
("claude-sonnet-4.6", "claude-sonnet-4.6"),
("gpt-5.4", "gpt-5.4"),
])
def test_copilot_preserves_dots(self, model, expected):
result = normalize_model_for_provider(model, "copilot")
assert result == expected
fix(copilot): normalize vendor-prefixed and dash-notation model IDs (#6879) (#11561) The Copilot API returns HTTP 400 "model_not_supported" when it receives a model ID it doesn't recognize (vendor-prefixed like `anthropic/claude-sonnet-4.6` or dash-notation like `claude-sonnet-4-6`). Two bugs combined to leave both formats unhandled: 1. `_COPILOT_MODEL_ALIASES` in hermes_cli/models.py only covered bare dot-notation and vendor-prefixed dot-notation. Hermes' default Claude IDs elsewhere use hyphens (anthropic native format), and users with an aggregator-style config who switch `model.provider` to `copilot` inherit `anthropic/claude-X-4.6` — neither case was in the table. 2. The Copilot branch of `normalize_model_for_provider()` only stripped the vendor prefix when it matched the target provider (`copilot/`) or was the special-cased `openai/` for openai-codex. Every other vendor prefix survived to the Copilot request unchanged. Fix: - Add dash-notation aliases (`claude-{opus,sonnet,haiku}-4-{5,6}` and the `anthropic/`-prefixed variants) to the alias table. - Rewire the Copilot / Copilot-ACP branch of `normalize_model_for_provider()` to delegate to the existing `normalize_copilot_model_id()`. That function already does alias lookups, catalog-aware resolution, and vendor-prefix fallback — it was being bypassed for the generic normalisation entry point. Because `switch_model()` already calls `normalize_model_for_provider()` for every `/model` switch (line 685 in model_switch.py), this single fix covers the CLI startup path (cli.py), the `/model` slash command path, and the gateway load-from-config path. Closes #6879 Credits dsr-restyn (#6743) who independently diagnosed the dash-notation case; their aliases are folded into this consolidated fix alongside the vendor-prefix stripping repair.
2026-04-17 04:19:36 -07:00
# ── Copilot model-name normalization (issue #6879 regression) ──────────
class TestCopilotModelNormalization:
"""Copilot requires bare dot-notation model IDs.
Regression coverage for issue #6879 and the broken Copilot branch
that previously left vendor-prefixed Anthropic IDs (e.g.
``anthropic/claude-sonnet-4.6``) and dash-notation Claude IDs (e.g.
``claude-sonnet-4-6``) unchanged, causing the Copilot API to reject
the request with HTTP 400 "model_not_supported".
"""
@pytest.mark.parametrize("model,expected", [
# Vendor-prefixed Anthropic IDs — prefix must be stripped.
("anthropic/claude-opus-4.6", "claude-opus-4.6"),
("anthropic/claude-sonnet-4.6", "claude-sonnet-4.6"),
("anthropic/claude-sonnet-4.5", "claude-sonnet-4.5"),
("anthropic/claude-haiku-4.5", "claude-haiku-4.5"),
# Vendor-prefixed OpenAI IDs — prefix must be stripped.
("openai/gpt-5.4", "gpt-5.4"),
("openai/gpt-4o", "gpt-4o"),
("openai/gpt-4o-mini", "gpt-4o-mini"),
# Dash-notation Claude IDs — must be converted to dot-notation.
("claude-opus-4-6", "claude-opus-4.6"),
("claude-sonnet-4-6", "claude-sonnet-4.6"),
("claude-sonnet-4-5", "claude-sonnet-4.5"),
("claude-haiku-4-5", "claude-haiku-4.5"),
# Combined: vendor-prefixed + dash-notation.
("anthropic/claude-opus-4-6", "claude-opus-4.6"),
("anthropic/claude-sonnet-4-6", "claude-sonnet-4.6"),
# Already-canonical inputs pass through unchanged.
("claude-sonnet-4.6", "claude-sonnet-4.6"),
("gpt-5.4", "gpt-5.4"),
("gpt-5-mini", "gpt-5-mini"),
])
def test_copilot_normalization(self, model, expected):
assert normalize_model_for_provider(model, "copilot") == expected
@pytest.mark.parametrize("model,expected", [
("anthropic/claude-sonnet-4.6", "claude-sonnet-4.6"),
("claude-sonnet-4-6", "claude-sonnet-4.6"),
("claude-opus-4-6", "claude-opus-4.6"),
("openai/gpt-5.4", "gpt-5.4"),
])
def test_copilot_acp_normalization(self, model, expected):
"""Copilot ACP shares the same API expectations as HTTP Copilot."""
assert normalize_model_for_provider(model, "copilot-acp") == expected
def test_openai_codex_still_strips_openai_prefix(self):
"""Regression: openai-codex must still strip the openai/ prefix."""
assert normalize_model_for_provider("openai/gpt-5.4", "openai-codex") == "gpt-5.4"
# ── Aggregator providers (regression) ──────────────────────────────────
class TestAggregatorProviders:
"""Aggregators need vendor/model slugs."""
def test_openrouter_prepends_vendor(self):
result = normalize_model_for_provider("claude-sonnet-4.6", "openrouter")
assert result == "anthropic/claude-sonnet-4.6"
def test_nous_prepends_vendor(self):
result = normalize_model_for_provider("gpt-5.4", "nous")
assert result == "openai/gpt-5.4"
def test_vendor_already_present(self):
result = normalize_model_for_provider("anthropic/claude-sonnet-4.6", "openrouter")
assert result == "anthropic/claude-sonnet-4.6"
class TestIssue6211NativeProviderPrefixNormalization:
@pytest.mark.parametrize("model,target_provider,expected", [
("zai/glm-5.1", "zai", "glm-5.1"),
("google/gemini-2.5-pro", "gemini", "google/gemini-2.5-pro"),
("moonshot/kimi-k2.5", "kimi-coding", "kimi-k2.5"),
("anthropic/claude-sonnet-4.6", "openrouter", "anthropic/claude-sonnet-4.6"),
("Qwen/Qwen3.5-397B-A17B", "huggingface", "Qwen/Qwen3.5-397B-A17B"),
("modal/zai-org/GLM-5-FP8", "custom", "modal/zai-org/GLM-5-FP8"),
])
def test_native_provider_prefixes_are_only_stripped_on_matching_provider(
self, model, target_provider, expected
):
assert normalize_model_for_provider(model, target_provider) == expected
# ── detect_vendor ──────────────────────────────────────────────────────
class TestDetectVendor:
@pytest.mark.parametrize("model,expected", [
("claude-sonnet-4.6", "anthropic"),
("gpt-5.4-mini", "openai"),
("minimax-m2.7", "minimax"),
("glm-4.5", "z-ai"),
("kimi-k2.5", "moonshotai"),
])
def test_detects_known_vendors(self, model, expected):
assert detect_vendor(model) == expected
fix(model-normalize): pass DeepSeek V-series IDs through instead of folding to deepseek-chat `_normalize_for_deepseek` was mapping every non-reasoner input into `deepseek-chat` on the assumption that DeepSeek's API accepts only two model IDs. That assumption no longer holds — `deepseek-v4-pro` and `deepseek-v4-flash` are first-class IDs accepted by the direct API, and on aggregators `deepseek-chat` routes explicitly to V3 (DeepInfra backend returns `deepseek-chat-v3`). So a user picking V4 Pro through the model picker was being silently downgraded to V3. Verified 2026-04-24 against Nous portal's OpenAI-compat surface: - `deepseek/deepseek-v4-flash` → provider: DeepSeek, model: deepseek-v4-flash-20260423 - `deepseek/deepseek-chat` → provider: DeepInfra, model: deepseek/deepseek-chat-v3 Fix: - Add `deepseek-v4-pro` and `deepseek-v4-flash` to `_DEEPSEEK_CANONICAL_MODELS` so exact matches pass through. - Add `_DEEPSEEK_V_SERIES_RE` (`^deepseek-v\d+(...)?$`) so future V-series IDs (`deepseek-v5-*`, dated variants) keep passing through without another code change. - Update docstring + module header to reflect the new rule. Tests: - New `TestDeepseekVSeriesPassThrough` — 8 parametrized cases covering bare, vendor-prefixed, case-variant, dated, and future V-series IDs plus end-to-end `normalize_model_for_provider(..., "deepseek")`. - New `TestDeepseekCanonicalAndReasonerMapping` — regression coverage for canonical pass-through, reasoner-keyword folding, and fall-back-to-chat behaviour. - 77/77 pass. Reported on Discord (Ufonik, Don Piedro): `/model > Deepseek > deepseek-v4-pro` surfaced `Normalized 'deepseek-v4-pro' to 'deepseek-chat'`. Picker listing showed the v4 names, so validation also rejected the post-normalize `deepseek-chat` as "not in provider listing" — the contradiction users saw. Normalizer now respects the picker's choice.
2026-04-24 15:14:53 +03:00
# ── DeepSeek V-series pass-through (bug: V4 models silently folded to V3) ──
class TestDeepseekVSeriesPassThrough:
"""DeepSeek's V-series IDs (``deepseek-v4-pro``, ``deepseek-v4-flash``,
and future ``deepseek-v<N>-*`` variants) are first-class model IDs
accepted directly by DeepSeek's Chat Completions API. Earlier code
folded every non-reasoner name into ``deepseek-chat``, which on
aggregators (Nous portal, OpenRouter via DeepInfra) routes to V3
silently downgrading users who picked V4.
"""
@pytest.mark.parametrize("model", [
"deepseek-v4-pro",
"deepseek-v4-flash",
"deepseek/deepseek-v4-pro", # vendor-prefixed
"deepseek/deepseek-v4-flash",
"DeepSeek-V4-Pro", # case-insensitive
"deepseek-v4-flash-20260423", # dated variant
"deepseek-v5-pro", # future V-series
"deepseek-v10-ultra", # double-digit future
])
def test_v_series_passes_through(self, model):
expected = model.split("/", 1)[-1].lower()
assert _normalize_for_deepseek(model) == expected
def test_deepseek_provider_preserves_v4_pro(self):
"""End-to-end via normalize_model_for_provider — user selecting
V4 Pro must reach DeepSeek's API as V4 Pro, not V3 alias."""
result = normalize_model_for_provider("deepseek-v4-pro", "deepseek")
assert result == "deepseek-v4-pro"
def test_deepseek_provider_preserves_v4_flash(self):
result = normalize_model_for_provider("deepseek-v4-flash", "deepseek")
assert result == "deepseek-v4-flash"
# ── DeepSeek regressions (existing behaviour still holds) ──────────────
class TestDeepseekCanonicalAndReasonerMapping:
"""Canonical pass-through and reasoner-keyword folding stay intact."""
@pytest.mark.parametrize("model,expected", [
("deepseek-chat", "deepseek-chat"),
("deepseek-reasoner", "deepseek-reasoner"),
("DEEPSEEK-CHAT", "deepseek-chat"),
])
def test_canonical_models_pass_through(self, model, expected):
assert _normalize_for_deepseek(model) == expected
@pytest.mark.parametrize("model", [
"deepseek-r1",
"deepseek-r1-0528",
"deepseek-think-v3",
"deepseek-reasoning-preview",
"deepseek-cot-experimental",
])
def test_reasoner_keywords_map_to_reasoner(self, model):
assert _normalize_for_deepseek(model) == "deepseek-reasoner"
@pytest.mark.parametrize("model", [
"deepseek-chat-v3.1", # 'chat' prefix, not V-series pattern
"unknown-model",
"something-random",
"gpt-5", # non-DeepSeek names still fall through
])
def test_unknown_names_fall_back_to_chat(self, model):
assert _normalize_for_deepseek(model) == "deepseek-chat"