mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Fixes #15779. Custom-provider per-model context_length (`custom_providers[].models.<id>.context_length`) is now honored across every resolution path, not just agent startup. Also adds 256K as the top probe tier and default fallback. ## What changed New helper `hermes_cli.config.get_custom_provider_context_length()` — single source of truth for the per-model override lookup, with trailing-slash-insensitive base-url matching. `agent.model_metadata.get_model_context_length()` gains an optional `custom_providers=` kwarg (step 0b — runs after explicit `config_context_length` but before every other probe). Wired through five call sites that previously either duplicated the lookup or ignored it entirely: - `run_agent.py` startup — refactored to use the new helper (dedups legacy inline loop, keeps invalid-value warning) - `AIAgent.switch_model()` — re-reads custom_providers from live config on every /model switch - `hermes_cli.model_switch.resolve_display_context_length()` — new `custom_providers=` kwarg - `gateway/run.py` /model confirmation (picker callback + text path) - `gateway/run.py` `_format_session_info` (/info) ## Context probe tiers `CONTEXT_PROBE_TIERS = [256_000, 128_000, 64_000, 32_000, 16_000, 8_000]` — was `[128_000, ...]`. `DEFAULT_FALLBACK_CONTEXT` follows tier[0], so unknown models now default to 256K. The stale `128000` literal in the OpenRouter metadata-miss path is replaced with `DEFAULT_FALLBACK_CONTEXT` for consistency. ## Repro (from #15779) ```yaml custom_providers: - name: my-custom-endpoint base_url: https://example.invalid/v1 model: gpt-5.5 models: gpt-5.5: context_length: 1050000 ``` `/model gpt-5.5 --provider custom:my-custom-endpoint` → previously "Context: 128,000", now "Context: 1,050,000". ## Tests - `tests/hermes_cli/test_custom_provider_context_length.py` — new file, 19 tests covering the helper, step-0b integration, and the 256K tier invariants - `tests/hermes_cli/test_model_switch_context_display.py` — added regression tests for #15779 through the display resolver - `tests/gateway/test_session_info.py` — updated default-fallback assertion (128K → 256K) - `tests/agent/test_model_metadata.py` — updated tier assertions for the new top tier
149 lines
5.9 KiB
Python
149 lines
5.9 KiB
Python
"""Regression test for /model context-length display on provider-capped models.
|
|
|
|
Bug (April 2026): `/model gpt-5.5` on openai-codex (ChatGPT OAuth) showed
|
|
"Context: 1,050,000 tokens" because the display code used the raw models.dev
|
|
``ModelInfo.context_window`` (which reports the direct-OpenAI API value) instead
|
|
of the provider-aware resolver. The agent was actually running at 272K — Codex
|
|
OAuth's enforced cap — so the display was lying to the user.
|
|
|
|
Fix: ``resolve_display_context_length()`` prefers
|
|
``agent.model_metadata.get_model_context_length`` (which knows about Codex OAuth,
|
|
Copilot, Nous, etc.) and falls back to models.dev only if that returns nothing.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from unittest.mock import patch
|
|
|
|
from hermes_cli.model_switch import resolve_display_context_length
|
|
|
|
|
|
class _FakeModelInfo:
|
|
def __init__(self, ctx):
|
|
self.context_window = ctx
|
|
|
|
|
|
class TestResolveDisplayContextLength:
|
|
def test_codex_oauth_overrides_models_dev(self):
|
|
"""gpt-5.5 on openai-codex must show Codex's 272K cap, not models.dev's 1.05M."""
|
|
fake_mi = _FakeModelInfo(1_050_000) # what models.dev reports
|
|
with patch(
|
|
"agent.model_metadata.get_model_context_length",
|
|
return_value=272_000, # what Codex OAuth actually enforces
|
|
):
|
|
ctx = resolve_display_context_length(
|
|
"gpt-5.5",
|
|
"openai-codex",
|
|
base_url="https://chatgpt.com/backend-api/codex",
|
|
api_key="",
|
|
model_info=fake_mi,
|
|
)
|
|
assert ctx == 272_000, (
|
|
"Codex OAuth's 272K cap must win over models.dev's 1.05M for gpt-5.5"
|
|
)
|
|
|
|
def test_falls_back_to_model_info_when_resolver_returns_none(self):
|
|
fake_mi = _FakeModelInfo(1_048_576)
|
|
with patch(
|
|
"agent.model_metadata.get_model_context_length", return_value=None
|
|
):
|
|
ctx = resolve_display_context_length(
|
|
"some-model",
|
|
"some-provider",
|
|
model_info=fake_mi,
|
|
)
|
|
assert ctx == 1_048_576
|
|
|
|
def test_returns_none_when_both_sources_empty(self):
|
|
with patch(
|
|
"agent.model_metadata.get_model_context_length", return_value=None
|
|
):
|
|
ctx = resolve_display_context_length(
|
|
"unknown-model",
|
|
"unknown-provider",
|
|
model_info=None,
|
|
)
|
|
assert ctx is None
|
|
|
|
def test_resolver_exception_falls_back_to_model_info(self):
|
|
fake_mi = _FakeModelInfo(200_000)
|
|
with patch(
|
|
"agent.model_metadata.get_model_context_length",
|
|
side_effect=RuntimeError("network down"),
|
|
):
|
|
ctx = resolve_display_context_length(
|
|
"x", "y", model_info=fake_mi
|
|
)
|
|
assert ctx == 200_000
|
|
|
|
def test_prefers_resolver_even_when_model_info_has_larger_value(self):
|
|
"""Invariant: provider-aware resolver is authoritative, even if models.dev
|
|
reports a bigger window."""
|
|
fake_mi = _FakeModelInfo(2_000_000)
|
|
with patch(
|
|
"agent.model_metadata.get_model_context_length", return_value=128_000
|
|
):
|
|
ctx = resolve_display_context_length(
|
|
"capped-model",
|
|
"capped-provider",
|
|
model_info=fake_mi,
|
|
)
|
|
assert ctx == 128_000
|
|
|
|
def test_custom_providers_override_honored(self):
|
|
"""Regression for #15779: /model switch onto a custom provider must
|
|
surface the configured per-model context_length, not the 128K/256K
|
|
fallback.
|
|
"""
|
|
custom_provs = [
|
|
{
|
|
"name": "my-custom-endpoint",
|
|
"base_url": "https://example.invalid/v1",
|
|
"models": {"gpt-5.5": {"context_length": 1_050_000}},
|
|
}
|
|
]
|
|
# Real resolver call — no mock — so the override path is exercised
|
|
# through agent.model_metadata.get_model_context_length.
|
|
from unittest.mock import patch as _p
|
|
from agent import model_metadata as _mm
|
|
with _p.object(_mm, "get_cached_context_length", return_value=None), \
|
|
_p.object(_mm, "fetch_endpoint_model_metadata", return_value={}), \
|
|
_p.object(_mm, "fetch_model_metadata", return_value={}), \
|
|
_p.object(_mm, "is_local_endpoint", return_value=False), \
|
|
_p.object(_mm, "_is_known_provider_base_url", return_value=False):
|
|
ctx = resolve_display_context_length(
|
|
"gpt-5.5",
|
|
"custom",
|
|
base_url="https://example.invalid/v1",
|
|
api_key="k",
|
|
custom_providers=custom_provs,
|
|
)
|
|
assert ctx == 1_050_000, (
|
|
"custom_providers[].models.gpt-5.5.context_length=1.05M must win "
|
|
"over probe-down fallback"
|
|
)
|
|
|
|
def test_custom_providers_trailing_slash_insensitive(self):
|
|
"""Base URL comparison must tolerate trailing-slash differences
|
|
between config.yaml and the runtime value.
|
|
"""
|
|
custom_provs = [
|
|
{
|
|
"base_url": "https://example.invalid/v1/",
|
|
"models": {"m": {"context_length": 400_000}},
|
|
}
|
|
]
|
|
from unittest.mock import patch as _p
|
|
from agent import model_metadata as _mm
|
|
with _p.object(_mm, "get_cached_context_length", return_value=None), \
|
|
_p.object(_mm, "fetch_endpoint_model_metadata", return_value={}), \
|
|
_p.object(_mm, "fetch_model_metadata", return_value={}), \
|
|
_p.object(_mm, "is_local_endpoint", return_value=False), \
|
|
_p.object(_mm, "_is_known_provider_base_url", return_value=False):
|
|
ctx = resolve_display_context_length(
|
|
"m",
|
|
"custom",
|
|
base_url="https://example.invalid/v1", # no trailing slash
|
|
custom_providers=custom_provs,
|
|
)
|
|
assert ctx == 400_000
|