mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 15:01:34 +08:00
Compare commits
1 Commits
bb/base-gu
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7d001a2da2 |
35
cli.py
35
cli.py
@@ -2572,7 +2572,7 @@ class HermesCLI:
|
||||
def _resolve_turn_agent_config(self, user_message: str) -> dict:
|
||||
"""Resolve model/runtime overrides for a single user turn."""
|
||||
from agent.smart_model_routing import resolve_turn_route
|
||||
from hermes_cli.models import resolve_fast_mode_runtime
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
route = resolve_turn_route(
|
||||
user_message,
|
||||
@@ -2595,27 +2595,10 @@ class HermesCLI:
|
||||
return route
|
||||
|
||||
try:
|
||||
fast_runtime = resolve_fast_mode_runtime(route.get("model"))
|
||||
overrides = resolve_fast_mode_overrides(route.get("model"))
|
||||
except Exception:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
if not fast_runtime:
|
||||
route["request_overrides"] = None
|
||||
return route
|
||||
|
||||
runtime = fast_runtime["runtime"]
|
||||
route["runtime"] = runtime
|
||||
route["request_overrides"] = fast_runtime["request_overrides"]
|
||||
route["label"] = f"fast route → {route.get('model')} ({runtime.get('provider')})"
|
||||
route["signature"] = (
|
||||
route.get("model"),
|
||||
runtime.get("provider"),
|
||||
runtime.get("base_url"),
|
||||
runtime.get("api_mode"),
|
||||
runtime.get("command"),
|
||||
tuple(runtime.get("args") or ()),
|
||||
json.dumps(route["request_overrides"], sort_keys=True),
|
||||
)
|
||||
overrides = None
|
||||
route["request_overrides"] = overrides
|
||||
return route
|
||||
|
||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
|
||||
@@ -5662,15 +5645,15 @@ class HermesCLI:
|
||||
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||
|
||||
def _handle_fast_command(self, cmd: str):
|
||||
"""Handle /fast — choose the Codex Responses service tier."""
|
||||
"""Handle /fast — toggle OpenAI Priority Processing (service_tier)."""
|
||||
if not self._fast_command_available():
|
||||
_cprint(" (._.) /fast is only available for models that explicitly expose a fast backend.")
|
||||
_cprint(" (._.) /fast is only available for OpenAI models that support Priority Processing.")
|
||||
return
|
||||
|
||||
parts = cmd.strip().split(maxsplit=1)
|
||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||
status = "fast" if self.service_tier == "priority" else "normal"
|
||||
_cprint(f" {_GOLD}Codex inference tier: {status}{_RST}")
|
||||
_cprint(f" {_GOLD}Priority Processing: {status}{_RST}")
|
||||
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
||||
return
|
||||
|
||||
@@ -5691,9 +5674,9 @@ class HermesCLI:
|
||||
|
||||
self.agent = None # Force agent re-init with new service-tier config
|
||||
if save_config_value("agent.service_tier", saved_value):
|
||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (saved to config){_RST}")
|
||||
_cprint(f" {_GOLD}✓ Priority Processing set to {label} (saved to config){_RST}")
|
||||
else:
|
||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (session only){_RST}")
|
||||
_cprint(f" {_GOLD}✓ Priority Processing set to {label} (session only){_RST}")
|
||||
|
||||
def _on_reasoning(self, reasoning_text: str):
|
||||
"""Callback for intermediate reasoning display during tool-call loops."""
|
||||
|
||||
@@ -100,7 +100,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||
args_hint="[level|show|hide]",
|
||||
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||
CommandDef("fast", "Choose Codex inference tier (Normal/Fast)", "Configuration",
|
||||
CommandDef("fast", "Toggle OpenAI Priority Processing (Normal/Fast)", "Configuration",
|
||||
cli_only=True, args_hint="[normal|fast|status]",
|
||||
subcommands=("normal", "fast", "status", "on", "off")),
|
||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||
|
||||
@@ -1017,58 +1017,45 @@ def provider_label(provider: Optional[str]) -> str:
|
||||
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
||||
|
||||
|
||||
_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = {
|
||||
"gpt-5.4": {
|
||||
"provider": "openai-codex",
|
||||
"request_overrides": {"service_tier": "priority"},
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Return backend config for models that expose Fast mode.
|
||||
|
||||
To expose Fast mode for a new model, add its normalized model slug to
|
||||
``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and
|
||||
backend-specific request overrides Hermes should apply.
|
||||
"""
|
||||
raw = str(model_id or "").strip().lower()
|
||||
if "/" in raw:
|
||||
raw = raw.split("/", 1)[1]
|
||||
config = _FAST_MODE_BACKEND_CONFIG.get(raw)
|
||||
return dict(config) if config else None
|
||||
# Models that support OpenAI Priority Processing (service_tier="priority").
|
||||
# See https://openai.com/api-priority-processing/ for the canonical list.
|
||||
# Only the bare model slug is stored (no vendor prefix).
|
||||
_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
||||
"gpt-5.4",
|
||||
"gpt-5.4-mini",
|
||||
"gpt-5.2",
|
||||
"gpt-5.1",
|
||||
"gpt-5",
|
||||
"gpt-5-mini",
|
||||
"gpt-4.1",
|
||||
"gpt-4.1-mini",
|
||||
"gpt-4.1-nano",
|
||||
"gpt-4o",
|
||||
"gpt-4o-mini",
|
||||
"o3",
|
||||
"o4-mini",
|
||||
})
|
||||
|
||||
|
||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||
"""Return whether Hermes should expose Fast mode for the active model."""
|
||||
return fast_mode_backend_config(model_id) is not None
|
||||
"""Return whether Hermes should expose the /fast (Priority Processing) toggle."""
|
||||
raw = str(model_id or "").strip().lower()
|
||||
if "/" in raw:
|
||||
raw = raw.split("/", 1)[1]
|
||||
return raw in _PRIORITY_PROCESSING_MODELS
|
||||
|
||||
|
||||
def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Resolve runtime selection and request overrides for a fast-mode model."""
|
||||
cfg = fast_mode_backend_config(model_id)
|
||||
if not cfg:
|
||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||
"""Return request_overrides for Priority Processing, or None if unsupported.
|
||||
|
||||
Unlike the previous ``resolve_fast_mode_runtime``, this does NOT force a
|
||||
provider/backend switch. The ``service_tier`` parameter is injected into
|
||||
whatever API path the user is already on (Codex Responses, Chat Completions,
|
||||
or OpenRouter passthrough).
|
||||
"""
|
||||
if not model_supports_fast_mode(model_id):
|
||||
return None
|
||||
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
runtime = resolve_runtime_provider(
|
||||
requested=cfg.get("provider"),
|
||||
explicit_base_url=cfg.get("base_url"),
|
||||
explicit_api_key=cfg.get("api_key"),
|
||||
)
|
||||
return {
|
||||
"runtime": {
|
||||
"api_key": runtime.get("api_key"),
|
||||
"base_url": runtime.get("base_url"),
|
||||
"provider": runtime.get("provider"),
|
||||
"api_mode": runtime.get("api_mode"),
|
||||
"command": runtime.get("command"),
|
||||
"args": list(runtime.get("args") or []),
|
||||
"credential_pool": runtime.get("credential_pool"),
|
||||
},
|
||||
"request_overrides": dict(cfg.get("request_overrides") or {}),
|
||||
}
|
||||
return {"service_tier": "priority"}
|
||||
|
||||
|
||||
def _resolve_copilot_catalog_api_key() -> str:
|
||||
|
||||
@@ -5686,6 +5686,11 @@ class AIAgent:
|
||||
if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
|
||||
api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
|
||||
|
||||
# Priority Processing / generic request overrides (e.g. service_tier).
|
||||
# Applied last so overrides win over any defaults set above.
|
||||
if self.request_overrides:
|
||||
api_kwargs.update(self.request_overrides)
|
||||
|
||||
return api_kwargs
|
||||
|
||||
def _supports_reasoning_extra_body(self) -> bool:
|
||||
|
||||
@@ -108,15 +108,52 @@ class TestHandleFastCommand(unittest.TestCase):
|
||||
self.assertTrue(mock_cprint.called)
|
||||
|
||||
|
||||
class TestFastModeRegistry(unittest.TestCase):
|
||||
def test_only_gpt_5_4_is_enabled_for_codex(self):
|
||||
from hermes_cli.models import fast_mode_backend_config
|
||||
class TestPriorityProcessingModels(unittest.TestCase):
|
||||
"""Verify the expanded Priority Processing model registry."""
|
||||
|
||||
assert fast_mode_backend_config("gpt-5.4") == {
|
||||
"provider": "openai-codex",
|
||||
"request_overrides": {"service_tier": "priority"},
|
||||
}
|
||||
assert fast_mode_backend_config("gpt-5.3-codex") is None
|
||||
def test_all_documented_models_supported(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
# All models from OpenAI's Priority Processing pricing table
|
||||
supported = [
|
||||
"gpt-5.4", "gpt-5.4-mini", "gpt-5.2",
|
||||
"gpt-5.1", "gpt-5", "gpt-5-mini",
|
||||
"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
|
||||
"gpt-4o", "gpt-4o-mini",
|
||||
"o3", "o4-mini",
|
||||
]
|
||||
for model in supported:
|
||||
assert model_supports_fast_mode(model), f"{model} should support fast mode"
|
||||
|
||||
def test_vendor_prefix_stripped(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
assert model_supports_fast_mode("openai/gpt-5.4") is True
|
||||
assert model_supports_fast_mode("openai/gpt-4.1") is True
|
||||
assert model_supports_fast_mode("openai/o3") is True
|
||||
|
||||
def test_non_priority_models_rejected(self):
|
||||
from hermes_cli.models import model_supports_fast_mode
|
||||
|
||||
assert model_supports_fast_mode("gpt-5.3-codex") is False
|
||||
assert model_supports_fast_mode("claude-sonnet-4") is False
|
||||
assert model_supports_fast_mode("") is False
|
||||
assert model_supports_fast_mode(None) is False
|
||||
|
||||
def test_resolve_overrides_returns_service_tier(self):
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
result = resolve_fast_mode_overrides("gpt-5.4")
|
||||
assert result == {"service_tier": "priority"}
|
||||
|
||||
result = resolve_fast_mode_overrides("gpt-4.1")
|
||||
assert result == {"service_tier": "priority"}
|
||||
|
||||
def test_resolve_overrides_none_for_unsupported(self):
|
||||
from hermes_cli.models import resolve_fast_mode_overrides
|
||||
|
||||
assert resolve_fast_mode_overrides("gpt-5.3-codex") is None
|
||||
assert resolve_fast_mode_overrides("claude-sonnet-4") is None
|
||||
|
||||
|
||||
class TestFastModeRouting(unittest.TestCase):
|
||||
@@ -126,7 +163,16 @@ class TestFastModeRouting(unittest.TestCase):
|
||||
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
|
||||
def test_turn_route_switches_to_model_backend_when_fast_enabled(self):
|
||||
def test_fast_command_exposed_for_non_codex_models(self):
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(provider="openai", requested_provider="openai", model="gpt-4.1", agent=None)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
|
||||
stub = SimpleNamespace(provider="openrouter", requested_provider="openrouter", model="o3", agent=None)
|
||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||
|
||||
def test_turn_route_injects_overrides_without_provider_switch(self):
|
||||
"""Fast mode should add request_overrides but NOT change the provider/runtime."""
|
||||
cli_mod = _import_cli()
|
||||
stub = SimpleNamespace(
|
||||
model="gpt-5.4",
|
||||
@@ -141,35 +187,28 @@ class TestFastModeRouting(unittest.TestCase):
|
||||
service_tier="priority",
|
||||
)
|
||||
|
||||
with (
|
||||
patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||
"model": "gpt-5.4",
|
||||
"runtime": {
|
||||
"api_key": "primary-key",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
},
|
||||
"label": None,
|
||||
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}),
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"provider": "openai-codex",
|
||||
"api_mode": "codex_responses",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_key": "codex-key",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
}),
|
||||
):
|
||||
original_runtime = {
|
||||
"api_key": "***",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
"command": None,
|
||||
"args": [],
|
||||
"credential_pool": None,
|
||||
}
|
||||
|
||||
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||
"model": "gpt-5.4",
|
||||
"runtime": dict(original_runtime),
|
||||
"label": None,
|
||||
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||
}):
|
||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||
|
||||
assert route["runtime"]["provider"] == "openai-codex"
|
||||
assert route["runtime"]["api_mode"] == "codex_responses"
|
||||
# Provider should NOT have changed
|
||||
assert route["runtime"]["provider"] == "openrouter"
|
||||
assert route["runtime"]["api_mode"] == "chat_completions"
|
||||
# But request_overrides should be set
|
||||
assert route["request_overrides"] == {"service_tier": "priority"}
|
||||
|
||||
def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self):
|
||||
@@ -190,7 +229,7 @@ class TestFastModeRouting(unittest.TestCase):
|
||||
primary_route = {
|
||||
"model": "gpt-5.3-codex",
|
||||
"runtime": {
|
||||
"api_key": "primary-key",
|
||||
"api_key": "***",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "openrouter",
|
||||
"api_mode": "chat_completions",
|
||||
|
||||
@@ -225,6 +225,26 @@ class TestDeveloperRoleSwap:
|
||||
assert kwargs["messages"][0]["role"] == "developer"
|
||||
|
||||
|
||||
class TestBuildApiKwargsChatCompletionsServiceTier:
|
||||
"""service_tier via request_overrides works on the chat_completions path."""
|
||||
|
||||
def test_includes_service_tier_via_request_overrides(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
agent.model = "gpt-4.1"
|
||||
agent.request_overrides = {"service_tier": "priority"}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert kwargs["service_tier"] == "priority"
|
||||
|
||||
def test_no_service_tier_when_overrides_empty(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "openrouter")
|
||||
agent.model = "gpt-4.1"
|
||||
agent.request_overrides = {}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
assert "service_tier" not in kwargs
|
||||
|
||||
|
||||
class TestBuildApiKwargsAIGateway:
|
||||
def test_uses_chat_completions_format(self, monkeypatch):
|
||||
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
|
||||
|
||||
Reference in New Issue
Block a user