mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 23:11:37 +08:00
Fixes #15779. Custom-provider per-model context_length (`custom_providers[].models.<id>.context_length`) is now honored across every resolution path, not just agent startup. Also adds 256K as the top probe tier and default fallback. ## What changed New helper `hermes_cli.config.get_custom_provider_context_length()` — single source of truth for the per-model override lookup, with trailing-slash-insensitive base-url matching. `agent.model_metadata.get_model_context_length()` gains an optional `custom_providers=` kwarg (step 0b — runs after explicit `config_context_length` but before every other probe). Wired through five call sites that previously either duplicated the lookup or ignored it entirely: - `run_agent.py` startup — refactored to use the new helper (dedups legacy inline loop, keeps invalid-value warning) - `AIAgent.switch_model()` — re-reads custom_providers from live config on every /model switch - `hermes_cli.model_switch.resolve_display_context_length()` — new `custom_providers=` kwarg - `gateway/run.py` /model confirmation (picker callback + text path) - `gateway/run.py` `_format_session_info` (/info) ## Context probe tiers `CONTEXT_PROBE_TIERS = [256_000, 128_000, 64_000, 32_000, 16_000, 8_000]` — was `[128_000, ...]`. `DEFAULT_FALLBACK_CONTEXT` follows tier[0], so unknown models now default to 256K. The stale `128000` literal in the OpenRouter metadata-miss path is replaced with `DEFAULT_FALLBACK_CONTEXT` for consistency. ## Repro (from #15779) ```yaml custom_providers: - name: my-custom-endpoint base_url: https://example.invalid/v1 model: gpt-5.5 models: gpt-5.5: context_length: 1050000 ``` `/model gpt-5.5 --provider custom:my-custom-endpoint` → previously "Context: 128,000", now "Context: 1,050,000". ## Tests - `tests/hermes_cli/test_custom_provider_context_length.py` — new file, 19 tests covering the helper, step-0b integration, and the 256K tier invariants - `tests/hermes_cli/test_model_switch_context_display.py` — added regression tests for #15779 through the display resolver - `tests/gateway/test_session_info.py` — updated default-fallback assertion (128K → 256K) - `tests/agent/test_model_metadata.py` — updated tier assertions for the new top tier
111 lines
4.9 KiB
Python
111 lines
4.9 KiB
Python
"""Tests for GatewayRunner._format_session_info — session config surfacing."""
|
|
|
|
import pytest
|
|
from unittest.mock import patch, MagicMock
|
|
from pathlib import Path
|
|
|
|
from gateway.run import GatewayRunner
|
|
|
|
|
|
@pytest.fixture()
|
|
def runner():
|
|
"""Create a bare GatewayRunner without __init__."""
|
|
return GatewayRunner.__new__(GatewayRunner)
|
|
|
|
|
|
def _patch_info(tmp_path, config_yaml, model, runtime):
|
|
"""Return a context-manager stack that patches _format_session_info deps."""
|
|
cfg_path = tmp_path / "config.yaml"
|
|
if config_yaml is not None:
|
|
cfg_path.write_text(config_yaml)
|
|
return (
|
|
patch("gateway.run._hermes_home", tmp_path),
|
|
patch("gateway.run._resolve_gateway_model", return_value=model),
|
|
patch("gateway.run._resolve_runtime_agent_kwargs", return_value=runtime),
|
|
)
|
|
|
|
|
|
class TestFormatSessionInfo:
|
|
|
|
def test_includes_model_name(self, runner, tmp_path):
|
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: anthropic/claude-opus-4.6\n provider: openrouter\n",
|
|
"anthropic/claude-opus-4.6",
|
|
{"provider": "openrouter", "base_url": "https://openrouter.ai/api/v1", "api_key": "k"})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "claude-opus-4.6" in info
|
|
|
|
def test_includes_provider(self, runner, tmp_path):
|
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n provider: openrouter\n",
|
|
"test-model",
|
|
{"provider": "openrouter", "base_url": "", "api_key": ""})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "openrouter" in info
|
|
|
|
def test_config_context_length(self, runner, tmp_path):
|
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n context_length: 32768\n",
|
|
"test-model",
|
|
{"provider": "custom", "base_url": "", "api_key": ""})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "32K" in info
|
|
assert "config" in info
|
|
|
|
def test_default_fallback_hint(self, runner, tmp_path):
|
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: unknown-model-xyz\n",
|
|
"unknown-model-xyz",
|
|
{"provider": "", "base_url": "", "api_key": ""})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "256K" in info
|
|
assert "model.context_length" in info
|
|
|
|
def test_local_endpoint_shown(self, runner, tmp_path):
|
|
p1, p2, p3 = _patch_info(
|
|
tmp_path,
|
|
"model:\n default: qwen3:8b\n provider: custom\n base_url: http://localhost:11434/v1\n context_length: 8192\n",
|
|
"qwen3:8b",
|
|
{"provider": "custom", "base_url": "http://localhost:11434/v1", "api_key": ""})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "localhost:11434" in info
|
|
assert "8K" in info
|
|
|
|
def test_cloud_endpoint_hidden(self, runner, tmp_path):
|
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n provider: openrouter\n",
|
|
"test-model",
|
|
{"provider": "openrouter", "base_url": "https://openrouter.ai/api/v1", "api_key": "k"})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "Endpoint" not in info
|
|
|
|
def test_million_context_format(self, runner, tmp_path):
|
|
p1, p2, p3 = _patch_info(tmp_path, "model:\n default: test-model\n context_length: 1000000\n",
|
|
"test-model",
|
|
{"provider": "", "base_url": "", "api_key": ""})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "1.0M" in info
|
|
|
|
def test_missing_config(self, runner, tmp_path):
|
|
"""No config.yaml should not crash."""
|
|
p1, p2, p3 = _patch_info(tmp_path, None, # don't create config
|
|
"anthropic/claude-sonnet-4.6",
|
|
{"provider": "openrouter", "base_url": "", "api_key": ""})
|
|
with p1, p2, p3:
|
|
info = runner._format_session_info()
|
|
assert "Model" in info
|
|
assert "Context" in info
|
|
|
|
def test_runtime_resolution_failure_doesnt_crash(self, runner, tmp_path):
|
|
"""If runtime resolution raises, should still produce output."""
|
|
cfg_path = tmp_path / "config.yaml"
|
|
cfg_path.write_text("model:\n default: test-model\n context_length: 4096\n")
|
|
with patch("gateway.run._hermes_home", tmp_path), \
|
|
patch("gateway.run._resolve_gateway_model", return_value="test-model"), \
|
|
patch("gateway.run._resolve_runtime_agent_kwargs", side_effect=RuntimeError("no creds")):
|
|
info = runner._format_session_info()
|
|
assert "4K" in info
|
|
assert "config" in info
|