From 438db0c7b062d5ceeadec5d9de009324ee822467 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Sun, 26 Apr 2026 05:43:31 -0700 Subject: [PATCH] fix(cli): /model picker honors provider-specific context caps (#16030) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_apply_model_switch_result` (the interactive `/model` picker's confirmation path) printed `ModelInfo.context_window` straight from models.dev, which reports the vendor-wide value (1.05M for gpt-5.5 on openai). ChatGPT Codex OAuth caps the same slug at 272K, so the picker showed 1M while the runtime (compressor, gateway `/model`, typed `/model `) correctly used 272K — the classic 'sometimes 1M, sometimes 272K' mismatch on a single model. Both display paths now go through `resolve_display_context_length()`, matching the fix that `_handle_model_switch` received earlier. Also bump the stale last-resort fallback in DEFAULT_CONTEXT_LENGTHS (`gpt-5.5: 400000 -> 1050000`) to match the real OpenAI API value; the 272K Codex cap is already enforced via the Codex-OAuth branch, so the fallback now reflects what every non-Codex probe-miss should see. Tests: adds `test_apply_model_switch_result_context.py` with three scenarios (Codex cap wins, OpenRouter shows 1.05M, resolver-empty falls back to ModelInfo). Updates the existing non-Codex fallback test to assert 1.05M (the correct value). ## Validation | path | before | after | |-------------------------------|-----------|-----------| | picker -> gpt-5.5 on Codex | 1,050,000 | 272,000 | | picker -> gpt-5.5 on OpenAI | 1,050,000 | 1,050,000 | | picker -> gpt-5.5 on OpenRouter | 1,050,000 | 1,050,000 | | typed /model gpt-5.5 on Codex | 272,000 | 272,000 | --- agent/model_metadata.py | 9 +- cli.py | 30 ++-- tests/agent/test_model_metadata.py | 6 +- .../test_apply_model_switch_result_context.py | 152 ++++++++++++++++++ 4 files changed, 177 insertions(+), 20 deletions(-) create mode 100644 tests/hermes_cli/test_apply_model_switch_result_context.py diff --git a/agent/model_metadata.py b/agent/model_metadata.py index bce3a9998f..62c18218b1 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -145,10 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = { "claude": 200000, # OpenAI — GPT-5 family (most have 400k; specific overrides first) # Source: https://developers.openai.com/api/docs/models - # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we - # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of - # Apr 2026) and is resolved via _resolve_codex_oauth_context_length(). - "gpt-5.5": 400000, + # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and + # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own + # provider-aware branches (_resolve_codex_oauth_context_length + models.dev). + # This hardcoded value is only reached when every probe misses. + "gpt-5.5": 1050000, "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) diff --git a/cli.py b/cli.py index 9f3e8964c4..bc77d4c350 100644 --- a/cli.py +++ b/cli.py @@ -5153,27 +5153,29 @@ class HermesCLI: _cprint(f" ✓ Model switched: {result.new_model}") _cprint(f" Provider: {provider_label}") + # Context: always resolve via the provider-aware chain so Codex OAuth, + # Copilot, and Nous-enforced caps win over the raw models.dev entry + # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth). mi = result.model_info + try: + from hermes_cli.model_switch import resolve_display_context_length + ctx = resolve_display_context_length( + result.new_model, + result.target_provider, + base_url=result.base_url or self.base_url or "", + api_key=result.api_key or self.api_key or "", + model_info=mi, + ) + if ctx: + _cprint(f" Context: {ctx:,} tokens") + except Exception: + pass if mi: - if mi.context_window: - _cprint(f" Context: {mi.context_window:,} tokens") if mi.max_output: _cprint(f" Max output: {mi.max_output:,} tokens") if mi.has_cost_data(): _cprint(f" Cost: {mi.format_cost()}") _cprint(f" Capabilities: {mi.format_capabilities()}") - else: - try: - from agent.model_metadata import get_model_context_length - ctx = get_model_context_length( - result.new_model, - base_url=result.base_url or self.base_url, - api_key=result.api_key or self.api_key, - provider=result.target_provider, - ) - _cprint(f" Context: {ctx:,} tokens") - except Exception: - pass cache_enabled = ( (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower()) diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index d08cac3102..c28b68226b 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -340,7 +340,9 @@ class TestCodexOAuthContextLength: from agent.model_metadata import get_model_context_length # OpenRouter — should hit its own catalog path first; when mocked - # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k). + # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M, + # matching the real direct-API value — Codex OAuth's 272k cap is + # provider-specific and must not leak here). with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ patch("agent.model_metadata.get_cached_context_length", return_value=None), \ @@ -351,7 +353,7 @@ class TestCodexOAuthContextLength: api_key="", provider="openrouter", ) - assert ctx == 400_000, ( + assert ctx == 1_050_000, ( f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override " "leaked outside openai-codex provider" ) diff --git a/tests/hermes_cli/test_apply_model_switch_result_context.py b/tests/hermes_cli/test_apply_model_switch_result_context.py new file mode 100644 index 0000000000..fd17150be3 --- /dev/null +++ b/tests/hermes_cli/test_apply_model_switch_result_context.py @@ -0,0 +1,152 @@ +"""Regression test for the `/model` picker confirmation display. + +Bug (April 2026): after choosing a model from the interactive `/model` picker, +``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window`` +straight from models.dev, which always reports the vendor-wide value (e.g. +gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps — in +particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling +``_handle_model_switch()`` (typed ``/model ``) was already fixed to use +``resolve_display_context_length()``; the picker path was missed, causing +"sometimes 1M, sometimes 272K" for the same model across sibling UI paths. + +Fix: both display paths now go through ``resolve_display_context_length()``. +""" +from __future__ import annotations + +from unittest.mock import patch + +from hermes_cli.model_switch import ModelSwitchResult + + +class _FakeModelInfo: + context_window = 1_050_000 + max_output = 0 + + def has_cost_data(self): + return False + + def format_capabilities(self): + return "" + + +class _StubCLI: + """Minimum attrs ``_apply_model_switch_result`` reads on ``self``.""" + agent = None + model = "" + provider = "" + requested_provider = "" + api_key = "" + _explicit_api_key = "" + base_url = "" + _explicit_base_url = "" + api_mode = "" + _pending_model_switch_note = "" + + +def _run_display(monkeypatch, result): + import cli as cli_mod + + captured: list[str] = [] + monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s))) + # Avoid writing to ~/.hermes/config.yaml during the test. + monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None) + cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False) + return captured + + +def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch): + """``_apply_model_switch_result`` must prefer the provider-aware resolver + (272K on Codex) over the raw models.dev value (1.05M for gpt-5.5). + """ + result = ModelSwitchResult( + success=True, + new_model="gpt-5.5", + target_provider="openai-codex", + provider_changed=True, + api_key="", + base_url="https://chatgpt.com/backend-api/codex", + api_mode="codex_responses", + warning_message="", + provider_label="ChatGPT Codex", + resolved_via_alias=False, + capabilities=None, + model_info=_FakeModelInfo(), # models.dev says 1.05M + is_global=False, + ) + with patch( + "agent.model_metadata.get_model_context_length", + return_value=272_000, + ): + lines = _run_display(monkeypatch, result) + + ctx_line = next((l for l in lines if "Context:" in l), "") + assert "272,000" in ctx_line, ( + f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}" + ) + assert "1,050,000" not in ctx_line, ( + f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}" + ) + + +def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch): + """On providers with no enforced cap (e.g. OpenRouter), the picker path + should surface the real 1.05M context for gpt-5.5 — resolver and models.dev + agree here. + """ + result = ModelSwitchResult( + success=True, + new_model="openai/gpt-5.5", + target_provider="openrouter", + provider_changed=True, + api_key="", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + warning_message="", + provider_label="OpenRouter", + resolved_via_alias=False, + capabilities=None, + model_info=_FakeModelInfo(), + is_global=False, + ) + with patch( + "agent.model_metadata.get_model_context_length", + return_value=1_050_000, + ): + lines = _run_display(monkeypatch, result) + + ctx_line = next((l for l in lines if "Context:" in l), "") + assert "1,050,000" in ctx_line, ( + f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}" + ) + + +def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch): + """If ``get_model_context_length`` returns nothing (rare — truly unknown + endpoint), the display still surfaces ``ModelInfo.context_window`` so the + user sees *something* rather than a silent blank. + """ + result = ModelSwitchResult( + success=True, + new_model="some-model", + target_provider="some-provider", + provider_changed=True, + api_key="", + base_url="", + api_mode="chat_completions", + warning_message="", + provider_label="Some Provider", + resolved_via_alias=False, + capabilities=None, + model_info=_FakeModelInfo(), # context_window = 1_050_000 + is_global=False, + ) + with patch( + "agent.model_metadata.get_model_context_length", + return_value=None, + ): + lines = _run_display(monkeypatch, result) + + ctx_line = next((l for l in lines if "Context:" in l), "") + assert "1,050,000" in ctx_line, ( + f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}" + )