Compare commits

...

1 Commits

Author SHA1 Message Date
kshitij
ba5906b2b2 fix(minimax): correct context lengths, model catalog, thinking guard, aux model, and config base_url
Cherry-picked from PR #6046 by kshitijk4poor with dead code stripped.

- Context lengths: 204800 → 1M (M1) / 1048576 (M2.5/M2.7) per official docs
- Model catalog: add M1 family, remove deprecated M2.1 and highspeed variants
- Thinking guard: skip extended thinking for MiniMax (Anthropic-compat endpoint)
- Aux model: MiniMax-M2.7-highspeed → MiniMax-M2.7 (same model, half price)
- Config base_url: honour model.base_url for API-key providers (fixes China users)
- Stripped unused get_minimax_max_output() / _MINIMAX_MAX_OUTPUT (no consumer)

Fixes #5777, #4082, #6039. Closes #3895.
2026-04-08 01:39:28 -07:00
9 changed files with 203 additions and 20 deletions

View File

@@ -1224,9 +1224,9 @@ def build_anthropic_kwargs(
# Map reasoning_config to Anthropic's thinking parameter.
# Claude 4.6 models use adaptive thinking + output_config.effort.
# Older models use manual thinking with budget_tokens.
# Haiku models do NOT support extended thinking at all — skip entirely.
# Haiku and MiniMax models do NOT support extended thinking — skip entirely.
if reasoning_config and isinstance(reasoning_config, dict):
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
if reasoning_config.get("enabled") is not False and "haiku" not in model.lower() and "minimax" not in model.lower():
effort = str(reasoning_config.get("effort", "medium")).lower()
budget = THINKING_BUDGET.get(effort, 8000)
if _supports_adaptive_thinking(model):

View File

@@ -99,8 +99,8 @@ _API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
"gemini": "gemini-3-flash-preview",
"zai": "glm-4.5-flash",
"kimi-coding": "kimi-k2-turbo-preview",
"minimax": "MiniMax-M2.7-highspeed",
"minimax-cn": "MiniMax-M2.7-highspeed",
"minimax": "MiniMax-M2.7",
"minimax-cn": "MiniMax-M2.7",
"anthropic": "claude-haiku-4-5-20251001",
"ai-gateway": "google/gemini-3-flash",
"opencode-zen": "gemini-3-flash",

View File

@@ -113,8 +113,15 @@ DEFAULT_CONTEXT_LENGTHS = {
"llama": 131072,
# Qwen
"qwen": 131072,
# MiniMax
"minimax": 204800,
# MiniMax (lowercase — lookup lowercases model names at line 973)
"minimax-m1-256k": 1000000,
"minimax-m1-128k": 1000000,
"minimax-m1-80k": 1000000,
"minimax-m1-40k": 1000000,
"minimax-m1": 1000000,
"minimax-m2.5": 1048576,
"minimax-m2.7": 1048576,
"minimax": 1048576,
# GLM
"glm": 202752,
# Kimi
@@ -127,7 +134,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"minimaxai/minimax-m2.5": 1048576,
"XiaomiMiMo/MiMo-V2-Flash": 32768,
"mimo-v2-pro": 1048576,
"mimo-v2-omni": 1048576,

View File

@@ -144,18 +144,22 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"kimi-k2-0905-preview",
],
"minimax": [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
"MiniMax-M1",
"MiniMax-M1-40k",
"MiniMax-M1-80k",
"MiniMax-M1-128k",
"MiniMax-M1-256k",
"MiniMax-M2.5",
"MiniMax-M2.5-highspeed",
"MiniMax-M2.1",
"MiniMax-M2.7",
],
"minimax-cn": [
"MiniMax-M2.7",
"MiniMax-M2.7-highspeed",
"MiniMax-M1",
"MiniMax-M1-40k",
"MiniMax-M1-80k",
"MiniMax-M1-128k",
"MiniMax-M1-256k",
"MiniMax-M2.5",
"MiniMax-M2.5-highspeed",
"MiniMax-M2.1",
"MiniMax-M2.7",
],
"anthropic": [
"claude-opus-4-6",

View File

@@ -163,6 +163,16 @@ def _resolve_runtime_from_pool_entry(
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
# Honour model.base_url from config.yaml when the configured provider
# matches this provider — same pattern as the Anthropic branch above.
# Only override when the pool entry has no explicit base_url (i.e. it
# fell back to the hardcoded default). Env var overrides win (#6039).
pconfig = PROVIDER_REGISTRY.get(provider)
pool_url_is_default = pconfig and base_url.rstrip("/") == pconfig.inference_base_url.rstrip("/")
if configured_provider == provider and pool_url_is_default:
cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
if cfg_base_url:
base_url = cfg_base_url
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
if configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
api_mode = configured_mode
@@ -724,7 +734,15 @@ def resolve_runtime_provider(
pconfig = PROVIDER_REGISTRY.get(provider)
if pconfig and pconfig.auth_type == "api_key":
creds = resolve_api_key_provider_credentials(provider)
base_url = creds.get("base_url", "").rstrip("/")
# Honour model.base_url from config.yaml when the configured provider
# matches this provider — mirrors the Anthropic path above. Without
# this, users who set model.base_url to e.g. api.minimaxi.com/anthropic
# (China endpoint) still get the hardcoded api.minimax.io default (#6039).
cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
cfg_base_url = ""
if cfg_provider == provider:
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
base_url = cfg_base_url or creds.get("base_url", "").rstrip("/")
api_mode = "chat_completions"
if provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))

View File

@@ -105,8 +105,8 @@ _DEFAULT_PROVIDER_MODELS = {
],
"zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"],
"kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"],
"minimax": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
"minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"],
"minimax": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
"minimax-cn": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"],
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],

View File

@@ -0,0 +1,105 @@
"""Tests for MiniMax provider hardening — context lengths, thinking guard, catalog."""
class TestMinimaxContextLengths:
"""Verify per-model context length entries for MiniMax models."""
def test_m1_variants_have_1m_context(self):
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
# Keys are lowercase because the lookup lowercases model names
for model in ("minimax-m1", "minimax-m1-40k", "minimax-m1-80k",
"minimax-m1-128k", "minimax-m1-256k"):
assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths"
assert DEFAULT_CONTEXT_LENGTHS[model] == 1_000_000, f"{model} expected 1M"
def test_m2_variants_have_1m_context(self):
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
# Keys are lowercase because the lookup lowercases model names
for model in ("minimax-m2.5", "minimax-m2.7"):
assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths"
assert DEFAULT_CONTEXT_LENGTHS[model] == 1_048_576, f"{model} expected 1048576"
def test_minimax_prefix_fallback(self):
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
# The generic "minimax" prefix entry should be 1M for unknown models
assert DEFAULT_CONTEXT_LENGTHS["minimax"] == 1_048_576
class TestMinimaxThinkingGuard:
"""Verify that build_anthropic_kwargs does NOT add thinking params for MiniMax models."""
def test_no_thinking_for_minimax_m27(self):
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="MiniMax-M2.7",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
)
assert "thinking" not in kwargs
assert "output_config" not in kwargs
def test_no_thinking_for_minimax_m1(self):
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="MiniMax-M1-128k",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "high"},
)
assert "thinking" not in kwargs
def test_thinking_still_works_for_claude(self):
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
)
assert "thinking" in kwargs
class TestMinimaxAuxModel:
"""Verify auxiliary model is standard (not highspeed)."""
def test_minimax_aux_is_standard(self):
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
assert _API_KEY_PROVIDER_AUX_MODELS["minimax"] == "MiniMax-M2.7"
assert _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] == "MiniMax-M2.7"
def test_minimax_aux_not_highspeed(self):
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax"]
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
class TestMinimaxModelCatalog:
"""Verify the model catalog includes M1 family and excludes deprecated models."""
def test_catalog_includes_m1_family(self):
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M1" in models
assert "MiniMax-M1-40k" in models
assert "MiniMax-M1-80k" in models
assert "MiniMax-M1-128k" in models
assert "MiniMax-M1-256k" in models
def test_catalog_excludes_deprecated(self):
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.1" not in models
def test_catalog_excludes_highspeed(self):
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.7-highspeed" not in models
assert "MiniMax-M2.5-highspeed" not in models

View File

@@ -808,6 +808,55 @@ def test_minimax_explicit_api_mode_respected(monkeypatch):
assert resolved["api_mode"] == "chat_completions"
def test_minimax_config_base_url_overrides_hardcoded_default(monkeypatch):
"""model.base_url in config.yaml should override the hardcoded default (#6039)."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
monkeypatch.setattr(rp, "_get_model_config", lambda: {
"provider": "minimax",
"base_url": "https://api.minimaxi.com/anthropic",
})
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
resolved = rp.resolve_runtime_provider(requested="minimax")
assert resolved["provider"] == "minimax"
assert resolved["base_url"] == "https://api.minimaxi.com/anthropic"
assert resolved["api_mode"] == "anthropic_messages"
def test_minimax_env_base_url_still_wins_over_config(monkeypatch):
"""MINIMAX_BASE_URL env var should take priority over config.yaml model.base_url."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
monkeypatch.setattr(rp, "_get_model_config", lambda: {
"provider": "minimax",
"base_url": "https://api.minimaxi.com/anthropic",
})
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
monkeypatch.setenv("MINIMAX_BASE_URL", "https://custom.example.com/v1")
resolved = rp.resolve_runtime_provider(requested="minimax")
# Env var wins because resolve_api_key_provider_credentials prefers it
assert resolved["base_url"] == "https://custom.example.com/v1"
def test_minimax_config_base_url_ignored_for_different_provider(monkeypatch):
"""model.base_url should NOT be used when model.provider doesn't match."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
monkeypatch.setattr(rp, "_get_model_config", lambda: {
"provider": "openrouter",
"base_url": "https://some-other-endpoint.com/v1",
})
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
resolved = rp.resolve_runtime_provider(requested="minimax")
# Should use the default, NOT the config base_url from a different provider
assert resolved["base_url"] == "https://api.minimax.io/anthropic"
def test_alibaba_default_coding_intl_endpoint_uses_chat_completions(monkeypatch):
"""Alibaba default coding-intl /v1 URL should use chat_completions mode."""
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "alibaba")

View File

@@ -34,8 +34,8 @@ class TestSetupProviderModelSelection:
@pytest.mark.parametrize("provider_id,expected_defaults", [
("zai", ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"]),
("kimi-coding", ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"]),
("minimax", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
("minimax-cn", ["MiniMax-M2.7", "MiniMax-M2.7-highspeed", "MiniMax-M2.5", "MiniMax-M2.5-highspeed", "MiniMax-M2.1"]),
("minimax", ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"]),
("minimax-cn", ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"]),
("opencode-zen", ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash"]),
("opencode-go", ["glm-5", "kimi-k2.5", "minimax-m2.5", "minimax-m2.7"]),
])