fix(xiaomi): consolidate MiMo accounting and error recovery

Salvages the Xiaomi MiMo pricing, usage-normalization, and error-classifier fixes from #41734/#41815, #41614/#42665, and #35972/#37478 into one focused cluster. Co-authored-by: luarss <39641663+luarss@users.noreply.github.com> Co-authored-by: Rylen Anil <rylen.anil@gmail.com> Co-authored-by: liuhao1024 <sunsky.lau@gmail.com> Co-authored-by: annguyenNous <annguyenNous@users.noreply.github.com> Co-authored-by: Sujeet <64351924+sujeet111@users.noreply.github.com>
2026-06-17 07:31:21 +08:00 · 2026-06-15 06:32:09 -07:00
5 changed files with 201 additions and 1 deletions
--- a/agent/error_classifier.py
+++ b/agent/error_classifier.py
@@ -491,11 +491,13 @@ def classify_api_error(
    # "context length exceeded") is only in the inner JSON.
    _raw_msg = str(error).lower()
    _body_msg = ""
+    _param_msg = ""
    _metadata_msg = ""
    if isinstance(body, dict):
        _err_obj = body.get("error", {})
        if isinstance(_err_obj, dict):
            _body_msg = str(_err_obj.get("message") or "").lower()
+            _param_msg = str(_err_obj.get("param") or "").lower().replace("`", "")
            # Parse metadata.raw for wrapped provider errors
            _metadata = _err_obj.get("metadata", {})
            if isinstance(_metadata, dict):
@@ -512,10 +514,14 @@ def classify_api_error(
                        pass
        if not _body_msg:
            _body_msg = str(body.get("message") or "").lower()
+        if not _param_msg:
+            _param_msg = str(body.get("param") or "").lower().replace("`", "")
    # Combine all message sources for pattern matching
    parts = [_raw_msg]
    if _body_msg and _body_msg not in _raw_msg:
        parts.append(_body_msg)
+    if _param_msg and _param_msg not in _raw_msg and _param_msg not in _body_msg:
+        parts.append(_param_msg)
    if _metadata_msg and _metadata_msg not in _raw_msg and _metadata_msg not in _body_msg:
        parts.append(_metadata_msg)
    error_msg = " ".join(parts)
--- a/agent/usage_pricing.py
+++ b/agent/usage_pricing.py
@@ -410,6 +410,29 @@ _OFFICIAL_DOCS_PRICING: Dict[tuple[str, str], PricingEntry] = {
        source_url="https://api-docs.deepseek.com/quick_start/pricing",
        pricing_version="deepseek-pricing-2026-05-12",
    ),
+    # Xiaomi MiMo
+    (
+        "xiaomi",
+        "mimo-v2.5",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.14"),
+        output_cost_per_million=Decimal("0.28"),
+        cache_read_cost_per_million=Decimal("0.0028"),
+        source="official_docs_snapshot",
+        source_url="https://platform.xiaomimimo.com/docs/en-US/price/pay-as-you-go",
+        pricing_version="xiaomi-pricing-2026-06-01",
+    ),
+    (
+        "xiaomi",
+        "mimo-v2.5-pro",
+    ): PricingEntry(
+        input_cost_per_million=Decimal("0.435"),
+        output_cost_per_million=Decimal("0.87"),
+        cache_read_cost_per_million=Decimal("0.0036"),
+        source="official_docs_snapshot",
+        source_url="https://platform.xiaomimimo.com/docs/en-US/price/pay-as-you-go",
+        pricing_version="xiaomi-pricing-2026-06-01",
+    ),
    # Google Gemini
    (
        "google",
@@ -749,7 +772,12 @@ def normalize_usage(
        # Port of cline/cline#10266.
        cache_read_tokens = _to_int(getattr(details, "cached_tokens", 0) if details else 0)
        if not cache_read_tokens:
-            cache_read_tokens = _to_int(getattr(response_usage, "cache_read_input_tokens", 0))
+            cache_read_tokens = _to_int(
+                getattr(response_usage, "cache_read_input_tokens", 0)
+                or getattr(response_usage, "cache_hit_tokens", 0)
+                or getattr(response_usage, "hit_tokens", 0)
+                or getattr(response_usage, "cache_tokens", 0)
+            )
        cache_write_tokens = _to_int(
            getattr(details, "cache_write_tokens", 0) if details else 0
        )
@@ -763,6 +791,8 @@ def normalize_usage(
    output_details = getattr(response_usage, "output_tokens_details", None)
    if output_details:
        reasoning_tokens = _to_int(getattr(output_details, "reasoning_tokens", 0))
+    if not reasoning_tokens:
+        reasoning_tokens = _to_int(getattr(response_usage, "reasoning_tokens", 0))

    return CanonicalUsage(
        input_tokens=input_tokens,
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -415,6 +415,7 @@ AUTHOR_MAP = {
    "leon@agentlinker.ai": "agentlinker",
    "santoshhumagain1887@gmail.com": "npmisantosh",
    "39641663+luarss@users.noreply.github.com": "luarss",
+    "64351924+sujeet111@users.noreply.github.com": "sujeet111",
    "16263913+zccyman@users.noreply.github.com": "zccyman",
    "zccyman@users.noreply.github.com": "zccyman",  # PR #26998 (auxiliary fallback chain)
    "ahmetosrak@Ahmet-MacBook-Air.local": "Osraka",
--- a/tests/agent/test_error_classifier.py
+++ b/tests/agent/test_error_classifier.py
@@ -1612,6 +1612,47 @@ class TestMultimodalToolContentUnsupported:
        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
        assert result.retryable is True

+    def test_xiaomi_mimo_nested_param_backtick_text_is_not_set(self):
+        """MiMo can put the actionable message in error.param, not error.message."""
+        e = MockAPIError(
+            "Error code: 400 - {'error': {'code': '400', 'message': 'Param Incorrect'}}",
+            status_code=400,
+            body={
+                "error": {
+                    "code": "400",
+                    "message": "Param Incorrect",
+                    "param": "`text` is not set",
+                }
+            },
+        )
+
+        result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5-pro")
+
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+        assert result.retryable is True
+
+    def test_xiaomi_mimo_top_level_param_backtick_text_is_not_set(self):
+        e = MockAPIError(
+            "HTTP 400: Param Incorrect",
+            status_code=400,
+            body={"message": "Param Incorrect", "param": "`text` is not set"},
+        )
+
+        result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5-pro")
+
+        assert result.reason == FailoverReason.multimodal_tool_content_unsupported
+
+    def test_xiaomi_mimo_unrelated_param_stays_format_error(self):
+        e = MockAPIError(
+            "HTTP 400: Param Incorrect",
+            status_code=400,
+            body={"message": "Param Incorrect", "param": "model"},
+        )
+
+        result = classify_api_error(e, provider="xiaomi", model="mimo-v2.5-pro")
+
+        assert result.reason == FailoverReason.format_error
+
    def test_generic_tool_message_must_be_string(self):
        e = MockAPIError(
            "tool message content must be a string",
--- a/tests/agent/test_usage_pricing.py
+++ b/tests/agent/test_usage_pricing.py
@@ -1,3 +1,4 @@
+from decimal import Decimal
 from types import SimpleNamespace

 from agent.usage_pricing import (
@@ -106,6 +107,73 @@ def test_normalize_usage_openai_prefers_prompt_tokens_details_over_top_level():
    assert normalized.cache_write_tokens == 150


+def test_normalize_usage_reads_xiaomi_cache_hit_tokens_variant():
+    usage = SimpleNamespace(
+        prompt_tokens=1000,
+        completion_tokens=200,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+        cache_hit_tokens=700,
+    )
+
+    normalized = normalize_usage(usage, provider="xiaomi", api_mode="chat_completions")
+
+    assert normalized.cache_read_tokens == 700
+    assert normalized.input_tokens == 300
+    assert normalized.output_tokens == 200
+
+
+def test_normalize_usage_reads_xiaomi_hit_tokens_variant():
+    usage = SimpleNamespace(
+        prompt_tokens=1000,
+        completion_tokens=200,
+        prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+        hit_tokens=650,
+    )
+
+    normalized = normalize_usage(usage, provider="xiaomi", api_mode="chat_completions")
+
+    assert normalized.cache_read_tokens == 650
+    assert normalized.input_tokens == 350
+
+
+def test_normalize_usage_reads_top_level_cache_tokens_variant():
+    usage = SimpleNamespace(
+        prompt_tokens=1000,
+        completion_tokens=200,
+        cache_tokens=600,
+    )
+
+    normalized = normalize_usage(usage, provider="xiaomi", api_mode="chat_completions")
+
+    assert normalized.cache_read_tokens == 600
+    assert normalized.input_tokens == 400
+
+
+def test_normalize_usage_reads_top_level_reasoning_tokens_when_details_missing():
+    usage = SimpleNamespace(
+        prompt_tokens=100,
+        completion_tokens=300,
+        reasoning_tokens=75,
+    )
+
+    normalized = normalize_usage(usage, provider="xiaomi", api_mode="chat_completions")
+
+    assert normalized.reasoning_tokens == 75
+
+
+def test_normalize_usage_prefers_nested_reasoning_tokens_over_top_level():
+    usage = SimpleNamespace(
+        prompt_tokens=100,
+        completion_tokens=300,
+        output_tokens_details=SimpleNamespace(reasoning_tokens=80),
+        reasoning_tokens=75,
+    )
+
+    normalized = normalize_usage(usage, provider="xiaomi", api_mode="chat_completions")
+
+    assert normalized.reasoning_tokens == 80
+
+
 def test_openrouter_models_api_pricing_is_converted_from_per_token_to_per_million(monkeypatch):
    monkeypatch.setattr(
        "agent.usage_pricing.fetch_model_metadata",
@@ -250,3 +318,57 @@ def test_deepseek_v4_pro_estimate_usage_cost():
    assert result.amount_usd is not None
    # 1M input × $1.74/M + 500K output × $3.48/M = $1.74 + $1.74 = $3.48
    assert float(result.amount_usd) == 3.48
+
+
+def test_xiaomi_mimo_v25_pricing_entry_exists():
+    entry = get_pricing_entry(
+        "mimo-v2.5",
+        provider="xiaomi",
+    )
+
+    assert entry is not None
+    assert entry.input_cost_per_million == Decimal("0.14")
+    assert entry.output_cost_per_million == Decimal("0.28")
+    assert entry.cache_read_cost_per_million == Decimal("0.0028")
+
+
+def test_xiaomi_mimo_v25_pro_pricing_entry_exists():
+    entry = get_pricing_entry(
+        "mimo-v2.5-pro",
+        provider="xiaomi",
+    )
+
+    assert entry is not None
+    assert entry.input_cost_per_million == Decimal("0.435")
+    assert entry.output_cost_per_million == Decimal("0.87")
+    assert entry.cache_read_cost_per_million == Decimal("0.0036")
+
+
+def test_xiaomi_mimo_v25_estimate_usage_cost_includes_cache_read():
+    result = estimate_usage_cost(
+        "mimo-v2.5",
+        CanonicalUsage(
+            input_tokens=1000000,
+            output_tokens=500000,
+            cache_read_tokens=1000000,
+        ),
+        provider="xiaomi",
+    )
+
+    assert result.status == "estimated"
+    assert result.amount_usd == Decimal("0.2828")
+
+
+def test_xiaomi_mimo_v25_pro_estimate_usage_cost_includes_cache_read():
+    result = estimate_usage_cost(
+        "mimo-v2.5-pro",
+        CanonicalUsage(
+            input_tokens=1000000,
+            output_tokens=500000,
+            cache_read_tokens=1000000,
+        ),
+        provider="xiaomi",
+    )
+
+    assert result.status == "estimated"
+    assert result.amount_usd == Decimal("0.8736")