From 2ccdadcca6296d3a4128830865067c52f5ea2d5d Mon Sep 17 00:00:00 2001
From: zkl <i@zkl2333.com>
Date: Fri, 24 Apr 2026 14:48:55 +0800
Subject: [PATCH] fix(deepseek): bump V4 family context window to 1M tokens

#14934 added deepseek-v4-pro / deepseek-v4-flash to the DeepSeek native
provider but the context-window lookup still falls back to the existing
"deepseek" substring entry (128K). DeepSeek V4 ships with a 1M context
window, so any caller relying on get_model_context_length() for
pre-flight token budgeting (compression, context warnings) under-counts
by ~8x.

Add explicit lowercase entries for the four DeepSeek model ids that
ship 1M context:

- deepseek-v4-pro
- deepseek-v4-flash
- deepseek-chat (legacy alias, server-side maps to v4-flash non-thinking)
- deepseek-reasoner (legacy alias, server-side maps to v4-flash thinking)

Longest-key-first substring matching means these explicit entries also
cover the vendor-prefixed forms (deepseek/deepseek-v4-pro on OpenRouter
and Nous Portal) without regressing the existing 128K fallback for
older / unknown DeepSeek model ids on custom endpoints.

Source: https://api-docs.deepseek.com/zh-cn/quick_start/pricing
---
 agent/model_metadata.py            | 12 +++++++++-
 tests/agent/test_model_metadata.py | 37 ++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 29d5e1e89b..bce3a9998f 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -164,7 +164,17 @@ DEFAULT_CONTEXT_LENGTHS = {
     "gemma-4-31b": 256000,
     "gemma-3": 131072,
     "gemma": 8192,  # fallback for older gemma models
-    # DeepSeek
+    # DeepSeek — V4 family ships with a 1M context window. The legacy
+    # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side
+    # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash``
+    # and inherit the same 1M window. The ``deepseek`` substring entry
+    # below remains as a 128K fallback for older / unknown DeepSeek model
+    # ids (e.g. via custom endpoints).
+    # https://api-docs.deepseek.com/zh-cn/quick_start/pricing
+    "deepseek-v4-pro": 1_000_000,
+    "deepseek-v4-flash": 1_000_000,
+    "deepseek-chat": 1_000_000,
+    "deepseek-reasoner": 1_000_000,
     "deepseek": 128000,
     # Meta
     "llama": 131072,
diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py
index 42ec0a464f..d08cac3102 100644
--- a/tests/agent/test_model_metadata.py
+++ b/tests/agent/test_model_metadata.py
@@ -192,6 +192,43 @@ class TestDefaultContextLengths:
                     f"{model_id}: expected {expected_ctx}, got {actual}"
                 )
 
+    def test_deepseek_v4_models_1m_context(self):
+        from agent.model_metadata import get_model_context_length
+        from unittest.mock import patch as mock_patch
+
+        expected_keys = {
+            "deepseek-v4-pro": 1_000_000,
+            "deepseek-v4-flash": 1_000_000,
+            "deepseek-chat": 1_000_000,
+            "deepseek-reasoner": 1_000_000,
+        }
+        for key, value in expected_keys.items():
+            assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing"
+            assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
+                f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
+            )
+
+        # Longest-first substring matching must resolve both the bare V4
+        # ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter
+        # / Nous Portal) to 1M without probing down to the legacy 128K
+        # ``deepseek`` substring fallback.
+        with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \
+             mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \
+             mock_patch("agent.model_metadata.get_cached_context_length", return_value=None):
+            cases = [
+                ("deepseek-v4-pro", 1_000_000),
+                ("deepseek-v4-flash", 1_000_000),
+                ("deepseek/deepseek-v4-pro", 1_000_000),
+                ("deepseek/deepseek-v4-flash", 1_000_000),
+                ("deepseek-chat", 1_000_000),
+                ("deepseek-reasoner", 1_000_000),
+            ]
+            for model_id, expected_ctx in cases:
+                actual = get_model_context_length(model_id)
+                assert actual == expected_ctx, (
+                    f"{model_id}: expected {expected_ctx}, got {actual}"
+                )
+
     def test_all_values_positive(self):
         for key, value in DEFAULT_CONTEXT_LENGTHS.items():
             assert value > 0, f"{key} has non-positive context length"