From 2ccdadcca6296d3a4128830865067c52f5ea2d5d Mon Sep 17 00:00:00 2001 From: zkl Date: Fri, 24 Apr 2026 14:48:55 +0800 Subject: [PATCH] fix(deepseek): bump V4 family context window to 1M tokens #14934 added deepseek-v4-pro / deepseek-v4-flash to the DeepSeek native provider but the context-window lookup still falls back to the existing "deepseek" substring entry (128K). DeepSeek V4 ships with a 1M context window, so any caller relying on get_model_context_length() for pre-flight token budgeting (compression, context warnings) under-counts by ~8x. Add explicit lowercase entries for the four DeepSeek model ids that ship 1M context: - deepseek-v4-pro - deepseek-v4-flash - deepseek-chat (legacy alias, server-side maps to v4-flash non-thinking) - deepseek-reasoner (legacy alias, server-side maps to v4-flash thinking) Longest-key-first substring matching means these explicit entries also cover the vendor-prefixed forms (deepseek/deepseek-v4-pro on OpenRouter and Nous Portal) without regressing the existing 128K fallback for older / unknown DeepSeek model ids on custom endpoints. Source: https://api-docs.deepseek.com/zh-cn/quick_start/pricing --- agent/model_metadata.py | 12 +++++++++- tests/agent/test_model_metadata.py | 37 ++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 29d5e1e89b..bce3a9998f 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -164,7 +164,17 @@ DEFAULT_CONTEXT_LENGTHS = { "gemma-4-31b": 256000, "gemma-3": 131072, "gemma": 8192, # fallback for older gemma models - # DeepSeek + # DeepSeek — V4 family ships with a 1M context window. The legacy + # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side + # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash`` + # and inherit the same 1M window. The ``deepseek`` substring entry + # below remains as a 128K fallback for older / unknown DeepSeek model + # ids (e.g. via custom endpoints). + # https://api-docs.deepseek.com/zh-cn/quick_start/pricing + "deepseek-v4-pro": 1_000_000, + "deepseek-v4-flash": 1_000_000, + "deepseek-chat": 1_000_000, + "deepseek-reasoner": 1_000_000, "deepseek": 128000, # Meta "llama": 131072, diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 42ec0a464f..d08cac3102 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -192,6 +192,43 @@ class TestDefaultContextLengths: f"{model_id}: expected {expected_ctx}, got {actual}" ) + def test_deepseek_v4_models_1m_context(self): + from agent.model_metadata import get_model_context_length + from unittest.mock import patch as mock_patch + + expected_keys = { + "deepseek-v4-pro": 1_000_000, + "deepseek-v4-flash": 1_000_000, + "deepseek-chat": 1_000_000, + "deepseek-reasoner": 1_000_000, + } + for key, value in expected_keys.items(): + assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing" + assert DEFAULT_CONTEXT_LENGTHS[key] == value, ( + f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}" + ) + + # Longest-first substring matching must resolve both the bare V4 + # ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter + # / Nous Portal) to 1M without probing down to the legacy 128K + # ``deepseek`` substring fallback. + with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ + mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ + mock_patch("agent.model_metadata.get_cached_context_length", return_value=None): + cases = [ + ("deepseek-v4-pro", 1_000_000), + ("deepseek-v4-flash", 1_000_000), + ("deepseek/deepseek-v4-pro", 1_000_000), + ("deepseek/deepseek-v4-flash", 1_000_000), + ("deepseek-chat", 1_000_000), + ("deepseek-reasoner", 1_000_000), + ] + for model_id, expected_ctx in cases: + actual = get_model_context_length(model_id) + assert actual == expected_ctx, ( + f"{model_id}: expected {expected_ctx}, got {actual}" + ) + def test_all_values_positive(self): for key, value in DEFAULT_CONTEXT_LENGTHS.items(): assert value > 0, f"{key} has non-positive context length"