tests/hermes_cli/test_custom_provider_context_length.py

"""Regression tests for custom_providers per-model context_length resolution.

Covers the fix for #15779 — mid-session /model switch to a named custom
provider must honor ``custom_providers[].models.<id>.context_length`` the
same way startup already does.
"""
from __future__ import annotations

from unittest.mock import patch

from hermes_cli.config import get_custom_provider_context_length


class TestGetCustomProviderContextLength:
    def test_returns_override_for_matching_entry(self):
        custom = [
            {
                "name": "my-endpoint",
                "base_url": "https://example.invalid/v1",
                "models": {"gpt-5.5": {"context_length": 1_050_000}},
            }
        ]
        assert (
            get_custom_provider_context_length(
                "gpt-5.5", "https://example.invalid/v1", custom
            )
            == 1_050_000
        )

    def test_trailing_slash_insensitive(self):
        custom = [
            {
                "base_url": "https://example.invalid/v1/",
                "models": {"m": {"context_length": 500_000}},
            }
        ]
        # config has trailing slash, runtime doesn't — must match
        assert (
            get_custom_provider_context_length(
                "m", "https://example.invalid/v1", custom
            )
            == 500_000
        )
        # and the reverse
        custom2 = [
            {
                "base_url": "https://example.invalid/v1",
                "models": {"m": {"context_length": 500_000}},
            }
        ]
        assert (
            get_custom_provider_context_length(
                "m", "https://example.invalid/v1/", custom2
            )
            == 500_000
        )

    def test_returns_none_when_url_does_not_match(self):
        custom = [
            {
                "base_url": "https://example.invalid/v1",
                "models": {"m": {"context_length": 400_000}},
            }
        ]
        assert (
            get_custom_provider_context_length(
                "m", "https://other.invalid/v1", custom
            )
            is None
        )

    def test_returns_none_when_model_does_not_match(self):
        custom = [
            {
                "base_url": "https://example.invalid/v1",
                "models": {"gpt-5.5": {"context_length": 400_000}},
            }
        ]
        assert (
            get_custom_provider_context_length(
                "different-model", "https://example.invalid/v1", custom
            )
            is None
        )

    def test_returns_none_for_string_value(self):
        """'256K' string is not a valid int — skip silently.

        (The inline startup path still emits a user-visible warning; the
        helper itself returns None so downstream fallbacks can run.)
        """
        custom = [
            {
                "base_url": "https://example.invalid/v1",
                "models": {"m": {"context_length": "256K"}},
            }
        ]
        assert (
            get_custom_provider_context_length(
                "m", "https://example.invalid/v1", custom
            )
            is None
        )

    def test_returns_none_for_zero_or_negative(self):
        for bad in (0, -1, -100):
            custom = [
                {
                    "base_url": "https://example.invalid/v1",
                    "models": {"m": {"context_length": bad}},
                }
            ]
            assert (
                get_custom_provider_context_length(
                    "m", "https://example.invalid/v1", custom
                )
                is None
            ), f"value {bad!r} should be rejected"

    def test_empty_inputs_return_none(self):
        assert get_custom_provider_context_length("", "http://x", [{"base_url": "http://x", "models": {"": {"context_length": 1}}}]) is None
        assert get_custom_provider_context_length("m", "", [{"base_url": "", "models": {"m": {"context_length": 1}}}]) is None
        assert get_custom_provider_context_length("m", "http://x", None) is None
        assert get_custom_provider_context_length("m", "http://x", []) is None

    def test_ignores_non_dict_entries(self):
        """Malformed entries must not crash the lookup."""
        custom = [
            "not a dict",
            None,
            {"base_url": "https://example.invalid/v1", "models": "not a dict"},
            {"base_url": "https://example.invalid/v1", "models": {"m": "not a dict"}},
            {
                "base_url": "https://example.invalid/v1",
                "models": {"m": {"context_length": 400_000}},
            },
        ]
        assert (
            get_custom_provider_context_length(
                "m", "https://example.invalid/v1", custom
            )
            == 400_000
        )


class TestGetModelContextLengthHonorsOverride:
    """agent.model_metadata.get_model_context_length must honor the
    custom_providers override at step 0b — before any probe, cache hit,
    or models.dev lookup can override it.
    """

    def _mock_all_probes(self):
        """Context manager that disables every downstream resolution step."""
        from agent import model_metadata as _mm
        return [
            patch.object(_mm, "get_cached_context_length", return_value=None),
            patch.object(_mm, "fetch_endpoint_model_metadata", return_value={}),
            patch.object(_mm, "fetch_model_metadata", return_value={}),
            patch.object(_mm, "is_local_endpoint", return_value=False),
            patch.object(_mm, "_is_known_provider_base_url", return_value=False),
        ]

    def test_custom_providers_override_wins_over_default_fallback(self):
        from agent.model_metadata import get_model_context_length
        custom = [
            {
                "base_url": "https://example.invalid/v1",
                "models": {"gpt-5.5": {"context_length": 1_050_000}},
            }
        ]
        patches = self._mock_all_probes()
        for p in patches:
            p.start()
        try:
            ctx = get_model_context_length(
                "gpt-5.5",
                base_url="https://example.invalid/v1",
                provider="custom",
                custom_providers=custom,
            )
        finally:
            for p in patches:
                p.stop()
        assert ctx == 1_050_000

    def test_explicit_config_context_length_still_wins(self):
        """Top-level model.context_length (step 0) outranks custom_providers (step 0b).

        Users who set both should see the top-level value — that's the
        documented precedence and matches the long-standing step-0 behavior.
        """
        from agent.model_metadata import get_model_context_length
        custom = [
            {
                "base_url": "https://example.invalid/v1",
                "models": {"m": {"context_length": 1_050_000}},
            }
        ]
        ctx = get_model_context_length(
            "m",
            base_url="https://example.invalid/v1",
            provider="custom",
            config_context_length=500_000,  # explicit top-level wins
            custom_providers=custom,
        )
        assert ctx == 500_000

    def test_no_override_falls_through_to_default(self):
        """With custom_providers=None and all probes disabled, resolver
        returns DEFAULT_FALLBACK_CONTEXT (256K after the stepdown bump).
        """
        from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT
        patches = self._mock_all_probes()
        for p in patches:
            p.start()
        try:
            ctx = get_model_context_length(
                "unknown-model",
                base_url="https://example.invalid/v1",
                provider="custom",
                custom_providers=None,
            )
        finally:
            for p in patches:
                p.stop()
        assert ctx == DEFAULT_FALLBACK_CONTEXT


class TestContextProbeTiers:
    def test_256k_is_top_tier_and_default(self):
        """The stepdown probe starts at 256K and 256K is the new default."""
        from agent.model_metadata import CONTEXT_PROBE_TIERS, DEFAULT_FALLBACK_CONTEXT

        assert CONTEXT_PROBE_TIERS[0] == 256_000
        assert DEFAULT_FALLBACK_CONTEXT == 256_000
        # Tiers still descend monotonically
        for a, b in zip(CONTEXT_PROBE_TIERS, CONTEXT_PROBE_TIERS[1:]):
            assert a > b, f"tiers must strictly descend, got {a} then {b}"
        # 128K is still a tier (users relying on it probe-down get there)
        assert 128_000 in CONTEXT_PROBE_TIERS
fix(context): honor custom_providers context_length on /model switch + bump probe tier to 256K (#15844) Fixes #15779. Custom-provider per-model context_length (`custom_providers[].models.<id>.context_length`) is now honored across every resolution path, not just agent startup. Also adds 256K as the top probe tier and default fallback. ## What changed New helper `hermes_cli.config.get_custom_provider_context_length()` — single source of truth for the per-model override lookup, with trailing-slash-insensitive base-url matching. `agent.model_metadata.get_model_context_length()` gains an optional `custom_providers=` kwarg (step 0b — runs after explicit `config_context_length` but before every other probe). Wired through five call sites that previously either duplicated the lookup or ignored it entirely: - `run_agent.py` startup — refactored to use the new helper (dedups legacy inline loop, keeps invalid-value warning) - `AIAgent.switch_model()` — re-reads custom_providers from live config on every /model switch - `hermes_cli.model_switch.resolve_display_context_length()` — new `custom_providers=` kwarg - `gateway/run.py` /model confirmation (picker callback + text path) - `gateway/run.py` `_format_session_info` (/info) ## Context probe tiers `CONTEXT_PROBE_TIERS = [256_000, 128_000, 64_000, 32_000, 16_000, 8_000]` — was `[128_000, ...]`. `DEFAULT_FALLBACK_CONTEXT` follows tier[0], so unknown models now default to 256K. The stale `128000` literal in the OpenRouter metadata-miss path is replaced with `DEFAULT_FALLBACK_CONTEXT` for consistency. ## Repro (from #15779) ```yaml custom_providers: - name: my-custom-endpoint base_url: https://example.invalid/v1 model: gpt-5.5 models: gpt-5.5: context_length: 1050000 ``` `/model gpt-5.5 --provider custom:my-custom-endpoint` → previously "Context: 128,000", now "Context: 1,050,000". ## Tests - `tests/hermes_cli/test_custom_provider_context_length.py` — new file, 19 tests covering the helper, step-0b integration, and the 256K tier invariants - `tests/hermes_cli/test_model_switch_context_display.py` — added regression tests for #15779 through the display resolver - `tests/gateway/test_session_info.py` — updated default-fallback assertion (128K → 256K) - `tests/agent/test_model_metadata.py` — updated tier assertions for the new top tier 2026-04-25 18:47:53 -07:00			`"""Regression tests for custom_providers per-model context_length resolution.`

			`Covers the fix for #15779 — mid-session /model switch to a named custom`
			provider must honor ``custom_providers[].models.<id>.context_length`` the
			`same way startup already does.`
			`"""`
			`from __future__ import annotations`

			`from unittest.mock import patch`

			`from hermes_cli.config import get_custom_provider_context_length`


			`class TestGetCustomProviderContextLength:`
			`def test_returns_override_for_matching_entry(self):`
			`custom = [`
			`{`
			`"name": "my-endpoint",`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"gpt-5.5": {"context_length": 1_050_000}},`
			`}`
			`]`
			`assert (`
			`get_custom_provider_context_length(`
			`"gpt-5.5", "https://example.invalid/v1", custom`
			`)`
			`== 1_050_000`
			`)`

			`def test_trailing_slash_insensitive(self):`
			`custom = [`
			`{`
			`"base_url": "https://example.invalid/v1/",`
			`"models": {"m": {"context_length": 500_000}},`
			`}`
			`]`
			`# config has trailing slash, runtime doesn't — must match`
			`assert (`
			`get_custom_provider_context_length(`
			`"m", "https://example.invalid/v1", custom`
			`)`
			`== 500_000`
			`)`
			`# and the reverse`
			`custom2 = [`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"m": {"context_length": 500_000}},`
			`}`
			`]`
			`assert (`
			`get_custom_provider_context_length(`
			`"m", "https://example.invalid/v1/", custom2`
			`)`
			`== 500_000`
			`)`

			`def test_returns_none_when_url_does_not_match(self):`
			`custom = [`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"m": {"context_length": 400_000}},`
			`}`
			`]`
			`assert (`
			`get_custom_provider_context_length(`
			`"m", "https://other.invalid/v1", custom`
			`)`
			`is None`
			`)`

			`def test_returns_none_when_model_does_not_match(self):`
			`custom = [`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"gpt-5.5": {"context_length": 400_000}},`
			`}`
			`]`
			`assert (`
			`get_custom_provider_context_length(`
			`"different-model", "https://example.invalid/v1", custom`
			`)`
			`is None`
			`)`

			`def test_returns_none_for_string_value(self):`
			`"""'256K' string is not a valid int — skip silently.`

			`(The inline startup path still emits a user-visible warning; the`
			`helper itself returns None so downstream fallbacks can run.)`
			`"""`
			`custom = [`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"m": {"context_length": "256K"}},`
			`}`
			`]`
			`assert (`
			`get_custom_provider_context_length(`
			`"m", "https://example.invalid/v1", custom`
			`)`
			`is None`
			`)`

			`def test_returns_none_for_zero_or_negative(self):`
			`for bad in (0, -1, -100):`
			`custom = [`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"m": {"context_length": bad}},`
			`}`
			`]`
			`assert (`
			`get_custom_provider_context_length(`
			`"m", "https://example.invalid/v1", custom`
			`)`
			`is None`
			`), f"value {bad!r} should be rejected"`

			`def test_empty_inputs_return_none(self):`
			`assert get_custom_provider_context_length("", "http://x", [{"base_url": "http://x", "models": {"": {"context_length": 1}}}]) is None`
			`assert get_custom_provider_context_length("m", "", [{"base_url": "", "models": {"m": {"context_length": 1}}}]) is None`
			`assert get_custom_provider_context_length("m", "http://x", None) is None`
			`assert get_custom_provider_context_length("m", "http://x", []) is None`

			`def test_ignores_non_dict_entries(self):`
			`"""Malformed entries must not crash the lookup."""`
			`custom = [`
			`"not a dict",`
			`None,`
			`{"base_url": "https://example.invalid/v1", "models": "not a dict"},`
			`{"base_url": "https://example.invalid/v1", "models": {"m": "not a dict"}},`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"m": {"context_length": 400_000}},`
			`},`
			`]`
			`assert (`
			`get_custom_provider_context_length(`
			`"m", "https://example.invalid/v1", custom`
			`)`
			`== 400_000`
			`)`


			`class TestGetModelContextLengthHonorsOverride:`
			`"""agent.model_metadata.get_model_context_length must honor the`
			`custom_providers override at step 0b — before any probe, cache hit,`
			`or models.dev lookup can override it.`
			`"""`

			`def _mock_all_probes(self):`
			`"""Context manager that disables every downstream resolution step."""`
			`from agent import model_metadata as _mm`
			`return [`
			`patch.object(_mm, "get_cached_context_length", return_value=None),`
			`patch.object(_mm, "fetch_endpoint_model_metadata", return_value={}),`
			`patch.object(_mm, "fetch_model_metadata", return_value={}),`
			`patch.object(_mm, "is_local_endpoint", return_value=False),`
			`patch.object(_mm, "_is_known_provider_base_url", return_value=False),`
			`]`

			`def test_custom_providers_override_wins_over_default_fallback(self):`
			`from agent.model_metadata import get_model_context_length`
			`custom = [`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"gpt-5.5": {"context_length": 1_050_000}},`
			`}`
			`]`
			`patches = self._mock_all_probes()`
			`for p in patches:`
			`p.start()`
			`try:`
			`ctx = get_model_context_length(`
			`"gpt-5.5",`
			`base_url="https://example.invalid/v1",`
			`provider="custom",`
			`custom_providers=custom,`
			`)`
			`finally:`
			`for p in patches:`
			`p.stop()`
			`assert ctx == 1_050_000`

			`def test_explicit_config_context_length_still_wins(self):`
			`"""Top-level model.context_length (step 0) outranks custom_providers (step 0b).`

			`Users who set both should see the top-level value — that's the`
			`documented precedence and matches the long-standing step-0 behavior.`
			`"""`
			`from agent.model_metadata import get_model_context_length`
			`custom = [`
			`{`
			`"base_url": "https://example.invalid/v1",`
			`"models": {"m": {"context_length": 1_050_000}},`
			`}`
			`]`
			`ctx = get_model_context_length(`
			`"m",`
			`base_url="https://example.invalid/v1",`
			`provider="custom",`
			`config_context_length=500_000, # explicit top-level wins`
			`custom_providers=custom,`
			`)`
			`assert ctx == 500_000`

			`def test_no_override_falls_through_to_default(self):`
			`"""With custom_providers=None and all probes disabled, resolver`
			`returns DEFAULT_FALLBACK_CONTEXT (256K after the stepdown bump).`
			`"""`
			`from agent.model_metadata import get_model_context_length, DEFAULT_FALLBACK_CONTEXT`
			`patches = self._mock_all_probes()`
			`for p in patches:`
			`p.start()`
			`try:`
			`ctx = get_model_context_length(`
			`"unknown-model",`
			`base_url="https://example.invalid/v1",`
			`provider="custom",`
			`custom_providers=None,`
			`)`
			`finally:`
			`for p in patches:`
			`p.stop()`
			`assert ctx == DEFAULT_FALLBACK_CONTEXT`


			`class TestContextProbeTiers:`
			`def test_256k_is_top_tier_and_default(self):`
			`"""The stepdown probe starts at 256K and 256K is the new default."""`
			`from agent.model_metadata import CONTEXT_PROBE_TIERS, DEFAULT_FALLBACK_CONTEXT`

			`assert CONTEXT_PROBE_TIERS[0] == 256_000`
			`assert DEFAULT_FALLBACK_CONTEXT == 256_000`
			`# Tiers still descend monotonically`
			`for a, b in zip(CONTEXT_PROBE_TIERS, CONTEXT_PROBE_TIERS[1:]):`
			`assert a > b, f"tiers must strictly descend, got {a} then {b}"`
			`# 128K is still a tier (users relying on it probe-down get there)`
			`assert 128_000 in CONTEXT_PROBE_TIERS`