tests/test_mini_swe_runner.py

from types import SimpleNamespace
from unittest.mock import MagicMock, patch


def test_run_task_kimi_omits_temperature():
    """Kimi models should NOT have client-side temperature overrides.

    The Kimi gateway selects the correct temperature server-side.
    """
    with patch("openai.OpenAI") as mock_openai:
        client = MagicMock()
        client.chat.completions.create.return_value = SimpleNamespace(
            choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))]
        )
        mock_openai.return_value = client

        from mini_swe_runner import MiniSWERunner

        runner = MiniSWERunner(
            model="kimi-for-coding",
            base_url="https://api.kimi.com/coding/v1",
            api_key="test-key",
            env_type="local",
            max_iterations=1,
        )
        runner._create_env = MagicMock()
        runner._cleanup_env = MagicMock()

        result = runner.run_task("2+2")

    assert result["completed"] is True
    assert "temperature" not in client.chat.completions.create.call_args.kwargs


def test_run_task_public_moonshot_kimi_k2_5_omits_temperature():
    """kimi-k2.5 on the public Moonshot API should not get a forced temperature."""
    with patch("openai.OpenAI") as mock_openai:
        client = MagicMock()
        client.base_url = "https://api.moonshot.ai/v1"
        client.chat.completions.create.return_value = SimpleNamespace(
            choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))]
        )
        mock_openai.return_value = client

        from mini_swe_runner import MiniSWERunner

        runner = MiniSWERunner(
            model="kimi-k2.5",
            base_url="https://api.moonshot.ai/v1",
            api_key="test-key",
            env_type="local",
            max_iterations=1,
        )
        runner._create_env = MagicMock()
        runner._cleanup_env = MagicMock()

        result = runner.run_task("2+2")

    assert result["completed"] is True
    assert "temperature" not in client.chat.completions.create.call_args.kwargs
fix(kimi): cover remaining fixed-temperature bypasses 2026-04-17 20:39:24 -06:00			`from types import SimpleNamespace`
			`from unittest.mock import MagicMock, patch`


fix(kimi): omit temperature entirely for Kimi/Moonshot models (#13157) Kimi's gateway selects the correct temperature server-side based on the active mode (thinking -> 1.0, non-thinking -> 0.6). Sending any temperature value — even the previously "correct" one — conflicts with gateway-managed defaults. Replaces the old approach of forcing specific temperature values (0.6 for non-thinking, 1.0 for thinking) with an OMIT_TEMPERATURE sentinel that tells all call sites to strip the temperature key from API kwargs entirely. Changes: - agent/auxiliary_client.py: OMIT_TEMPERATURE sentinel, _is_kimi_model() prefix check (covers all kimi-* models), _fixed_temperature_for_model() returns sentinel for kimi models. _build_call_kwargs() strips temp. - run_agent.py: _build_api_kwargs, flush_memories, and summary generation paths all handle the sentinel by popping/omitting temperature. - trajectory_compressor.py: _effective_temperature_for_model returns None for kimi (sentinel mapped), direct client calls use kwargs dict to conditionally include temperature. - mini_swe_runner.py: same sentinel handling via wrapper function. - 6 test files updated: all 'forces temperature X' assertions replaced with 'temperature not in kwargs' assertions. Net: -76 lines (171 added, 247 removed). Inspired by PR #13137 (@kshitijk4poor). 2026-04-20 12:23:05 -07:00			`def test_run_task_kimi_omits_temperature():`
			`"""Kimi models should NOT have client-side temperature overrides.`

			`The Kimi gateway selects the correct temperature server-side.`
			`"""`
fix(kimi): cover remaining fixed-temperature bypasses 2026-04-17 20:39:24 -06:00			`with patch("openai.OpenAI") as mock_openai:`
			`client = MagicMock()`
			`client.chat.completions.create.return_value = SimpleNamespace(`
			`choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))]`
			`)`
			`mock_openai.return_value = client`

			`from mini_swe_runner import MiniSWERunner`

			`runner = MiniSWERunner(`
			`model="kimi-for-coding",`
			`base_url="https://api.kimi.com/coding/v1",`
			`api_key="test-key",`
			`env_type="local",`
			`max_iterations=1,`
			`)`
			`runner._create_env = MagicMock()`
			`runner._cleanup_env = MagicMock()`

			`result = runner.run_task("2+2")`

			`assert result["completed"] is True`
fix(kimi): omit temperature entirely for Kimi/Moonshot models (#13157) Kimi's gateway selects the correct temperature server-side based on the active mode (thinking -> 1.0, non-thinking -> 0.6). Sending any temperature value — even the previously "correct" one — conflicts with gateway-managed defaults. Replaces the old approach of forcing specific temperature values (0.6 for non-thinking, 1.0 for thinking) with an OMIT_TEMPERATURE sentinel that tells all call sites to strip the temperature key from API kwargs entirely. Changes: - agent/auxiliary_client.py: OMIT_TEMPERATURE sentinel, _is_kimi_model() prefix check (covers all kimi-* models), _fixed_temperature_for_model() returns sentinel for kimi models. _build_call_kwargs() strips temp. - run_agent.py: _build_api_kwargs, flush_memories, and summary generation paths all handle the sentinel by popping/omitting temperature. - trajectory_compressor.py: _effective_temperature_for_model returns None for kimi (sentinel mapped), direct client calls use kwargs dict to conditionally include temperature. - mini_swe_runner.py: same sentinel handling via wrapper function. - 6 test files updated: all 'forces temperature X' assertions replaced with 'temperature not in kwargs' assertions. Net: -76 lines (171 added, 247 removed). Inspired by PR #13137 (@kshitijk4poor). 2026-04-20 12:23:05 -07:00			`assert "temperature" not in client.chat.completions.create.call_args.kwargs`
fix: propagate kimi base-url temperature overrides Follow up salvaged PR #12668 by threading base_url through the remaining direct-call sites so kimi-k2.5 uses temperature=1.0 on api.moonshot.ai and keeps 0.6 on api.kimi.com/coding. Add focused regression tests for run_agent, trajectory_compressor, and mini_swe_runner. 2026-04-20 01:35:42 +05:30

fix(kimi): omit temperature entirely for Kimi/Moonshot models (#13157) Kimi's gateway selects the correct temperature server-side based on the active mode (thinking -> 1.0, non-thinking -> 0.6). Sending any temperature value — even the previously "correct" one — conflicts with gateway-managed defaults. Replaces the old approach of forcing specific temperature values (0.6 for non-thinking, 1.0 for thinking) with an OMIT_TEMPERATURE sentinel that tells all call sites to strip the temperature key from API kwargs entirely. Changes: - agent/auxiliary_client.py: OMIT_TEMPERATURE sentinel, _is_kimi_model() prefix check (covers all kimi-* models), _fixed_temperature_for_model() returns sentinel for kimi models. _build_call_kwargs() strips temp. - run_agent.py: _build_api_kwargs, flush_memories, and summary generation paths all handle the sentinel by popping/omitting temperature. - trajectory_compressor.py: _effective_temperature_for_model returns None for kimi (sentinel mapped), direct client calls use kwargs dict to conditionally include temperature. - mini_swe_runner.py: same sentinel handling via wrapper function. - 6 test files updated: all 'forces temperature X' assertions replaced with 'temperature not in kwargs' assertions. Net: -76 lines (171 added, 247 removed). Inspired by PR #13137 (@kshitijk4poor). 2026-04-20 12:23:05 -07:00			`def test_run_task_public_moonshot_kimi_k2_5_omits_temperature():`
			`"""kimi-k2.5 on the public Moonshot API should not get a forced temperature."""`
fix: propagate kimi base-url temperature overrides Follow up salvaged PR #12668 by threading base_url through the remaining direct-call sites so kimi-k2.5 uses temperature=1.0 on api.moonshot.ai and keeps 0.6 on api.kimi.com/coding. Add focused regression tests for run_agent, trajectory_compressor, and mini_swe_runner. 2026-04-20 01:35:42 +05:30			`with patch("openai.OpenAI") as mock_openai:`
			`client = MagicMock()`
			`client.base_url = "https://api.moonshot.ai/v1"`
			`client.chat.completions.create.return_value = SimpleNamespace(`
			`choices=[SimpleNamespace(message=SimpleNamespace(content="done", tool_calls=[]))]`
			`)`
			`mock_openai.return_value = client`

			`from mini_swe_runner import MiniSWERunner`

			`runner = MiniSWERunner(`
			`model="kimi-k2.5",`
			`base_url="https://api.moonshot.ai/v1",`
			`api_key="test-key",`
			`env_type="local",`
			`max_iterations=1,`
			`)`
			`runner._create_env = MagicMock()`
			`runner._cleanup_env = MagicMock()`

			`result = runner.run_task("2+2")`

			`assert result["completed"] is True`
fix(kimi): omit temperature entirely for Kimi/Moonshot models (#13157) Kimi's gateway selects the correct temperature server-side based on the active mode (thinking -> 1.0, non-thinking -> 0.6). Sending any temperature value — even the previously "correct" one — conflicts with gateway-managed defaults. Replaces the old approach of forcing specific temperature values (0.6 for non-thinking, 1.0 for thinking) with an OMIT_TEMPERATURE sentinel that tells all call sites to strip the temperature key from API kwargs entirely. Changes: - agent/auxiliary_client.py: OMIT_TEMPERATURE sentinel, _is_kimi_model() prefix check (covers all kimi-* models), _fixed_temperature_for_model() returns sentinel for kimi models. _build_call_kwargs() strips temp. - run_agent.py: _build_api_kwargs, flush_memories, and summary generation paths all handle the sentinel by popping/omitting temperature. - trajectory_compressor.py: _effective_temperature_for_model returns None for kimi (sentinel mapped), direct client calls use kwargs dict to conditionally include temperature. - mini_swe_runner.py: same sentinel handling via wrapper function. - 6 test files updated: all 'forces temperature X' assertions replaced with 'temperature not in kwargs' assertions. Net: -76 lines (171 added, 247 removed). Inspired by PR #13137 (@kshitijk4poor). 2026-04-20 12:23:05 -07:00			`assert "temperature" not in client.chat.completions.create.call_args.kwargs`