feat(fast): broaden /fast whitelist to all OpenAI + Anthropic models (#16883)

Switch _PRIORITY_PROCESSING_MODELS and _ANTHROPIC_FAST_MODE_MODELS from
hardcoded frozensets to prefix-based matching. Any gpt-*, o1*, o3*, o4*
(OpenAI) and any claude-* (Anthropic) now exposes /fast.

Fixes the case where gpt-5.5 and other post-catalog models silently
skipped Priority Processing because they weren't in the frozenset.
Future OpenAI/Anthropic releases will work without a catalog bump.

Safety:
- Codex-series (*codex*) still excluded — they route through the Codex
  Responses API which doesn't take service_tier.
- Anthropic adapter already gates speed=fast on native endpoints only
  (_is_third_party_anthropic_endpoint), so claude-sonnet-4.6 on
  OpenRouter/Bedrock/opencode-zen won't leak the unknown beta.
- service_tier=priority is silently dropped by non-OpenAI proxies, so
  false positives are harmless.
This commit is contained in:
Teknium
2026-04-28 00:44:43 -07:00
committed by GitHub
parent 6ce796b495
commit 8269f9056c
2 changed files with 99 additions and 41 deletions

View File

@@ -114,17 +114,38 @@ class TestPriorityProcessingModels(unittest.TestCase):
def test_all_documented_models_supported(self):
from hermes_cli.models import model_supports_fast_mode
# All models from OpenAI's Priority Processing pricing table
# All OpenAI flagship models support Priority Processing — including
# future releases (gpt-5.5, 5.6...) via pattern matching.
supported = [
"gpt-5.5", "gpt-5.5-mini",
"gpt-5.4", "gpt-5.4-mini", "gpt-5.2",
"gpt-5.1", "gpt-5", "gpt-5-mini",
"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
"gpt-4o", "gpt-4o-mini",
"o3", "o4-mini",
"o1", "o1-mini", "o3", "o3-mini", "o4-mini",
]
for model in supported:
assert model_supports_fast_mode(model), f"{model} should support fast mode"
def test_all_anthropic_models_supported(self):
from hermes_cli.models import model_supports_fast_mode
# All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku.
supported = [
"claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6",
"claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4",
"claude-haiku-4-5", "claude-3-5-haiku",
]
for model in supported:
assert model_supports_fast_mode(model), f"{model} should support fast mode"
def test_codex_models_excluded(self):
"""Codex models route through Responses API and don't accept service_tier."""
from hermes_cli.models import model_supports_fast_mode
for model in ["gpt-5-codex", "gpt-5.2-codex", "gpt-5.3-codex", "gpt-5.1-codex-max"]:
assert not model_supports_fast_mode(model), f"{model} is codex — should not expose /fast"
def test_vendor_prefix_stripped(self):
from hermes_cli.models import model_supports_fast_mode
@@ -135,8 +156,15 @@ class TestPriorityProcessingModels(unittest.TestCase):
def test_non_priority_models_rejected(self):
from hermes_cli.models import model_supports_fast_mode
# Codex-series models route through the Codex Responses API and
# don't accept service_tier, so they're excluded.
assert model_supports_fast_mode("gpt-5.3-codex") is False
assert model_supports_fast_mode("claude-sonnet-4") is False
assert model_supports_fast_mode("gpt-5.2-codex") is False
assert model_supports_fast_mode("gpt-5-codex") is False
# Non-OpenAI, non-Anthropic models
assert model_supports_fast_mode("gemini-3-pro-preview") is False
assert model_supports_fast_mode("kimi-k2-thinking") is False
assert model_supports_fast_mode("deepseek-chat") is False
assert model_supports_fast_mode("") is False
assert model_supports_fast_mode(None) is False
@@ -153,7 +181,8 @@ class TestPriorityProcessingModels(unittest.TestCase):
from hermes_cli.models import resolve_fast_mode_overrides
assert resolve_fast_mode_overrides("gpt-5.3-codex") is None
assert resolve_fast_mode_overrides("claude-sonnet-4") is None
assert resolve_fast_mode_overrides("gemini-3-pro-preview") is None
assert resolve_fast_mode_overrides("kimi-k2-thinking") is None
class TestFastModeRouting(unittest.TestCase):
@@ -228,13 +257,26 @@ class TestAnthropicFastMode(unittest.TestCase):
assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True
assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True
def test_anthropic_non_opus_rejected(self):
def test_anthropic_all_claude_models_supported(self):
from hermes_cli.models import model_supports_fast_mode
assert model_supports_fast_mode("claude-sonnet-4-6") is False
assert model_supports_fast_mode("claude-sonnet-4.6") is False
assert model_supports_fast_mode("claude-haiku-4-5") is False
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False
# All Claude models support fast mode — Opus, Sonnet, Haiku.
# The anthropic adapter gates speed=fast on native Anthropic
# endpoints only, so third-party proxies that reject the beta
# are protected downstream (see _is_third_party_anthropic_endpoint).
assert model_supports_fast_mode("claude-sonnet-4-6") is True
assert model_supports_fast_mode("claude-sonnet-4.6") is True
assert model_supports_fast_mode("claude-haiku-4-5") is True
assert model_supports_fast_mode("claude-opus-4-7") is True
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True
def test_non_claude_models_not_anthropic_fast(self):
"""Non-Claude models should not be treated as Anthropic fast-mode."""
from hermes_cli.models import _is_anthropic_fast_model
assert _is_anthropic_fast_model("gpt-5.4") is False
assert _is_anthropic_fast_model("gemini-3-pro") is False
assert _is_anthropic_fast_model("kimi-k2-thinking") is False
def test_anthropic_variant_tags_stripped(self):
from hermes_cli.models import model_supports_fast_mode
@@ -264,9 +306,11 @@ class TestAnthropicFastMode(unittest.TestCase):
assert _is_anthropic_fast_model("claude-opus-4-6") is True
assert _is_anthropic_fast_model("claude-opus-4.6") is True
assert _is_anthropic_fast_model("claude-sonnet-4-6") is True
assert _is_anthropic_fast_model("claude-haiku-4-5") is True
assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True
assert _is_anthropic_fast_model("gpt-5.4") is False
assert _is_anthropic_fast_model("claude-sonnet-4-6") is False
assert _is_anthropic_fast_model("") is False
def test_fast_command_exposed_for_anthropic_model(self):
cli_mod = _import_cli()
@@ -276,12 +320,22 @@ class TestAnthropicFastMode(unittest.TestCase):
)
assert cli_mod.HermesCLI._fast_command_available(stub) is True
def test_fast_command_hidden_for_anthropic_sonnet(self):
def test_fast_command_exposed_for_anthropic_sonnet(self):
"""Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url."""
cli_mod = _import_cli()
stub = SimpleNamespace(
provider="anthropic", requested_provider="anthropic",
model="claude-sonnet-4-6", agent=None,
)
assert cli_mod.HermesCLI._fast_command_available(stub) is True
def test_fast_command_hidden_for_non_claude_non_openai(self):
"""Non-Claude, non-OpenAI models should not expose /fast."""
cli_mod = _import_cli()
stub = SimpleNamespace(
provider="gemini", requested_provider="gemini",
model="gemini-3-pro-preview", agent=None,
)
assert cli_mod.HermesCLI._fast_command_available(stub) is False
def test_turn_route_injects_speed_for_anthropic(self):