diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 9156eaa26f..c9d83f3b7c 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -116,8 +116,25 @@ _KIMI_THINKING_MODELS: frozenset = frozenset({ "kimi-k2-thinking-turbo", }) +# Moonshot's public chat endpoint (api.moonshot.ai/v1) enforces a different +# temperature contract than the Coding Plan endpoint above. Empirically, +# `kimi-k2.5` on the public API rejects 0.6 with HTTP 400 +# "invalid temperature: only 1 is allowed for this model" — the Coding Plan +# lock (0.6 for non-thinking) does not apply. `kimi-k2-turbo-preview` and the +# thinking variants already match the Coding Plan contract on the public +# endpoint, so we only override the models that diverge. +# Users hit this endpoint when `KIMI_API_KEY` is a legacy `sk-*` key (the +# `sk-kimi-*` prefix routes to api.kimi.com/coding/v1 instead — see +# hermes_cli/auth.py:_kimi_base_url_for_key). +_KIMI_PUBLIC_API_OVERRIDES: Dict[str, float] = { + "kimi-k2.5": 1.0, +} -def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]: + +def _fixed_temperature_for_model( + model: Optional[str], + base_url: Optional[str] = None, +) -> Optional[float]: """Return a required temperature override for models with strict contracts. Moonshot's kimi-for-coding endpoint rejects any non-approved temperature on @@ -125,15 +142,31 @@ def _fixed_temperature_for_model(model: Optional[str]) -> Optional[float]: variants require 1.0. An optional ``vendor/`` prefix (e.g. ``moonshotai/kimi-k2.5``) is tolerated for aggregator routings. + When ``base_url`` points to Moonshot's public chat endpoint + (``api.moonshot.ai``), the contract changes for ``kimi-k2.5``: the public + API only accepts ``temperature=1``, not 0.6. That override takes precedence + over the Coding Plan defaults above. + Returns ``None`` for every other model, including ``kimi-k2-instruct*`` which is the separate non-coding K2 family with variable temperature. """ normalized = (model or "").strip().lower() + bare = normalized.rsplit("/", 1)[-1] + + # Public Moonshot API has a stricter contract for some models than the + # Coding Plan endpoint — check it first so it wins on conflict. + if base_url and "api.moonshot.ai" in base_url.lower(): + public = _KIMI_PUBLIC_API_OVERRIDES.get(bare) + if public is not None: + logger.debug( + "Forcing temperature=%s for %r on public Moonshot API", public, model + ) + return public + fixed = _FIXED_TEMPERATURE_MODELS.get(normalized) if fixed is not None: logger.debug("Forcing temperature=%s for model %r (fixed map)", fixed, model) return fixed - bare = normalized.rsplit("/", 1)[-1] if bare in _KIMI_THINKING_MODELS: logger.debug("Forcing temperature=1.0 for kimi thinking model %r", model) return 1.0 @@ -2417,7 +2450,7 @@ def _build_call_kwargs( "timeout": timeout, } - fixed_temperature = _fixed_temperature_for_model(model) + fixed_temperature = _fixed_temperature_for_model(model, base_url) if fixed_temperature is not None: temperature = fixed_temperature @@ -2598,11 +2631,14 @@ def call_llm( task, resolved_provider or "auto", final_model or "default", f" at {_base_info}" if _base_info and "openrouter" not in _base_info else "") + # Pass the client's actual base_url (not just resolved_base_url) so + # endpoint-specific temperature overrides can distinguish + # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes. kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, extra_body=extra_body, - base_url=resolved_base_url) + base_url=_base_info or resolved_base_url) # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) _client_base = str(getattr(client, "base_url", "") or "") @@ -2656,7 +2692,8 @@ def call_llm( fb_label, fb_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, - extra_body=extra_body) + extra_body=extra_body, + base_url=str(getattr(fb_client, "base_url", "") or "")) return _validate_llm_response( fb_client.chat.completions.create(**fb_kwargs), task) raise @@ -2791,14 +2828,17 @@ async def async_call_llm( effective_timeout = timeout if timeout is not None else _get_task_timeout(task) + # Pass the client's actual base_url (not just resolved_base_url) so + # endpoint-specific temperature overrides can distinguish + # api.moonshot.ai vs api.kimi.com/coding even on auto-detected routes. + _client_base = str(getattr(client, "base_url", "") or "") kwargs = _build_call_kwargs( resolved_provider, final_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, extra_body=extra_body, - base_url=resolved_base_url) + base_url=_client_base or resolved_base_url) # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) - _client_base = str(getattr(client, "base_url", "") or "") if _is_anthropic_compat_endpoint(resolved_provider, _client_base): kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"]) @@ -2834,7 +2874,8 @@ async def async_call_llm( fb_label, fb_model, messages, temperature=temperature, max_tokens=max_tokens, tools=tools, timeout=effective_timeout, - extra_body=extra_body) + extra_body=extra_body, + base_url=str(getattr(fb_client, "base_url", "") or "")) # Convert sync fallback client to async async_fb, async_fb_model = _to_async_client(fb_client, fb_model or "") if async_fb_model and async_fb_model != fb_kwargs.get("model"): diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index aea8152a53..efce666e58 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -832,6 +832,92 @@ class TestKimiForCodingTemperature: assert kwargs["temperature"] == 0.3 + # ── Endpoint-aware overrides: api.moonshot.ai vs api.kimi.com/coding ── + # The public Moonshot chat endpoint and the Coding Plan endpoint enforce + # different temperature contracts for the same model name. `kimi-k2.5` on + # api.moonshot.ai rejects 0.6 with HTTP 400 "only 1 is allowed for this + # model", while the Coding Plan docs mandate 0.6. Override must pick the + # right value per base_url. + + @pytest.mark.parametrize( + "base_url", + [ + "https://api.moonshot.ai/v1", + "https://api.moonshot.ai/v1/", + "https://API.MOONSHOT.AI/v1", + ], + ) + def test_kimi_k2_5_public_api_forces_temperature_1(self, base_url): + """kimi-k2.5 on the public Moonshot API only accepts temperature=1.""" + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="kimi-coding", + model="kimi-k2.5", + messages=[{"role": "user", "content": "hello"}], + temperature=0.1, + base_url=base_url, + ) + + assert kwargs["temperature"] == 1.0 + + def test_kimi_k2_5_coding_plan_keeps_temperature_0_6(self): + """kimi-k2.5 on api.kimi.com/coding keeps the Coding Plan's 0.6 lock.""" + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="kimi-coding", + model="kimi-k2.5", + messages=[{"role": "user", "content": "hello"}], + temperature=0.1, + base_url="https://api.kimi.com/coding/v1", + ) + + assert kwargs["temperature"] == 0.6 + + def test_kimi_k2_5_no_base_url_falls_back_to_coding_plan_lock(self): + """Without a base_url hint, the Coding Plan default (0.6) applies. + + Preserves PR #12144 backward compatibility for callers that don't thread + the client's base_url through. + """ + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="kimi-coding", + model="kimi-k2.5", + messages=[{"role": "user", "content": "hello"}], + temperature=0.1, + ) + + assert kwargs["temperature"] == 0.6 + + @pytest.mark.parametrize( + "model,expected", + [ + # Only kimi-k2.5 diverges on api.moonshot.ai; the rest keep the + # Coding Plan lock (empirically verified against Moonshot in April + # 2026: turbo-preview accepts 0.6, thinking-turbo accepts 1.0). + ("kimi-k2-turbo-preview", 0.6), + ("kimi-k2-0905-preview", 0.6), + ("kimi-k2-thinking", 1.0), + ("kimi-k2-thinking-turbo", 1.0), + ("moonshotai/kimi-k2-thinking-turbo", 1.0), + ], + ) + def test_other_kimi_k2_family_unchanged_on_public_api(self, model, expected): + from agent.auxiliary_client import _build_call_kwargs + + kwargs = _build_call_kwargs( + provider="kimi-coding", + model=model, + messages=[{"role": "user", "content": "hello"}], + temperature=0.1, + base_url="https://api.moonshot.ai/v1", + ) + + assert kwargs["temperature"] == expected + # --------------------------------------------------------------------------- # async_call_llm payment / connection fallback (#7512 bug 2)