mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 15:31:38 +08:00
Compare commits
1 Commits
feat/volce
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7d001a2da2 |
35
cli.py
35
cli.py
@@ -2572,7 +2572,7 @@ class HermesCLI:
|
|||||||
def _resolve_turn_agent_config(self, user_message: str) -> dict:
|
def _resolve_turn_agent_config(self, user_message: str) -> dict:
|
||||||
"""Resolve model/runtime overrides for a single user turn."""
|
"""Resolve model/runtime overrides for a single user turn."""
|
||||||
from agent.smart_model_routing import resolve_turn_route
|
from agent.smart_model_routing import resolve_turn_route
|
||||||
from hermes_cli.models import resolve_fast_mode_runtime
|
from hermes_cli.models import resolve_fast_mode_overrides
|
||||||
|
|
||||||
route = resolve_turn_route(
|
route = resolve_turn_route(
|
||||||
user_message,
|
user_message,
|
||||||
@@ -2595,27 +2595,10 @@ class HermesCLI:
|
|||||||
return route
|
return route
|
||||||
|
|
||||||
try:
|
try:
|
||||||
fast_runtime = resolve_fast_mode_runtime(route.get("model"))
|
overrides = resolve_fast_mode_overrides(route.get("model"))
|
||||||
except Exception:
|
except Exception:
|
||||||
route["request_overrides"] = None
|
overrides = None
|
||||||
return route
|
route["request_overrides"] = overrides
|
||||||
if not fast_runtime:
|
|
||||||
route["request_overrides"] = None
|
|
||||||
return route
|
|
||||||
|
|
||||||
runtime = fast_runtime["runtime"]
|
|
||||||
route["runtime"] = runtime
|
|
||||||
route["request_overrides"] = fast_runtime["request_overrides"]
|
|
||||||
route["label"] = f"fast route → {route.get('model')} ({runtime.get('provider')})"
|
|
||||||
route["signature"] = (
|
|
||||||
route.get("model"),
|
|
||||||
runtime.get("provider"),
|
|
||||||
runtime.get("base_url"),
|
|
||||||
runtime.get("api_mode"),
|
|
||||||
runtime.get("command"),
|
|
||||||
tuple(runtime.get("args") or ()),
|
|
||||||
json.dumps(route["request_overrides"], sort_keys=True),
|
|
||||||
)
|
|
||||||
return route
|
return route
|
||||||
|
|
||||||
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
|
def _init_agent(self, *, model_override: str = None, runtime_override: dict = None, route_label: str = None, request_overrides: dict | None = None) -> bool:
|
||||||
@@ -5662,15 +5645,15 @@ class HermesCLI:
|
|||||||
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||||
|
|
||||||
def _handle_fast_command(self, cmd: str):
|
def _handle_fast_command(self, cmd: str):
|
||||||
"""Handle /fast — choose the Codex Responses service tier."""
|
"""Handle /fast — toggle OpenAI Priority Processing (service_tier)."""
|
||||||
if not self._fast_command_available():
|
if not self._fast_command_available():
|
||||||
_cprint(" (._.) /fast is only available for models that explicitly expose a fast backend.")
|
_cprint(" (._.) /fast is only available for OpenAI models that support Priority Processing.")
|
||||||
return
|
return
|
||||||
|
|
||||||
parts = cmd.strip().split(maxsplit=1)
|
parts = cmd.strip().split(maxsplit=1)
|
||||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||||
status = "fast" if self.service_tier == "priority" else "normal"
|
status = "fast" if self.service_tier == "priority" else "normal"
|
||||||
_cprint(f" {_GOLD}Codex inference tier: {status}{_RST}")
|
_cprint(f" {_GOLD}Priority Processing: {status}{_RST}")
|
||||||
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -5691,9 +5674,9 @@ class HermesCLI:
|
|||||||
|
|
||||||
self.agent = None # Force agent re-init with new service-tier config
|
self.agent = None # Force agent re-init with new service-tier config
|
||||||
if save_config_value("agent.service_tier", saved_value):
|
if save_config_value("agent.service_tier", saved_value):
|
||||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (saved to config){_RST}")
|
_cprint(f" {_GOLD}✓ Priority Processing set to {label} (saved to config){_RST}")
|
||||||
else:
|
else:
|
||||||
_cprint(f" {_GOLD}✓ Codex inference tier set to {label} (session only){_RST}")
|
_cprint(f" {_GOLD}✓ Priority Processing set to {label} (session only){_RST}")
|
||||||
|
|
||||||
def _on_reasoning(self, reasoning_text: str):
|
def _on_reasoning(self, reasoning_text: str):
|
||||||
"""Callback for intermediate reasoning display during tool-call loops."""
|
"""Callback for intermediate reasoning display during tool-call loops."""
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||||
args_hint="[level|show|hide]",
|
args_hint="[level|show|hide]",
|
||||||
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||||
CommandDef("fast", "Choose Codex inference tier (Normal/Fast)", "Configuration",
|
CommandDef("fast", "Toggle OpenAI Priority Processing (Normal/Fast)", "Configuration",
|
||||||
cli_only=True, args_hint="[normal|fast|status]",
|
cli_only=True, args_hint="[normal|fast|status]",
|
||||||
subcommands=("normal", "fast", "status", "on", "off")),
|
subcommands=("normal", "fast", "status", "on", "off")),
|
||||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||||
|
|||||||
@@ -1017,58 +1017,45 @@ def provider_label(provider: Optional[str]) -> str:
|
|||||||
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
return _PROVIDER_LABELS.get(normalized, original or "OpenRouter")
|
||||||
|
|
||||||
|
|
||||||
_FAST_MODE_BACKEND_CONFIG: dict[str, dict[str, Any]] = {
|
# Models that support OpenAI Priority Processing (service_tier="priority").
|
||||||
"gpt-5.4": {
|
# See https://openai.com/api-priority-processing/ for the canonical list.
|
||||||
"provider": "openai-codex",
|
# Only the bare model slug is stored (no vendor prefix).
|
||||||
"request_overrides": {"service_tier": "priority"},
|
_PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
||||||
},
|
"gpt-5.4",
|
||||||
}
|
"gpt-5.4-mini",
|
||||||
|
"gpt-5.2",
|
||||||
|
"gpt-5.1",
|
||||||
def fast_mode_backend_config(model_id: Optional[str]) -> dict[str, Any] | None:
|
"gpt-5",
|
||||||
"""Return backend config for models that expose Fast mode.
|
"gpt-5-mini",
|
||||||
|
"gpt-4.1",
|
||||||
To expose Fast mode for a new model, add its normalized model slug to
|
"gpt-4.1-mini",
|
||||||
``_FAST_MODE_BACKEND_CONFIG`` along with the backend runtime selection and
|
"gpt-4.1-nano",
|
||||||
backend-specific request overrides Hermes should apply.
|
"gpt-4o",
|
||||||
"""
|
"gpt-4o-mini",
|
||||||
raw = str(model_id or "").strip().lower()
|
"o3",
|
||||||
if "/" in raw:
|
"o4-mini",
|
||||||
raw = raw.split("/", 1)[1]
|
})
|
||||||
config = _FAST_MODE_BACKEND_CONFIG.get(raw)
|
|
||||||
return dict(config) if config else None
|
|
||||||
|
|
||||||
|
|
||||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||||
"""Return whether Hermes should expose Fast mode for the active model."""
|
"""Return whether Hermes should expose the /fast (Priority Processing) toggle."""
|
||||||
return fast_mode_backend_config(model_id) is not None
|
raw = str(model_id or "").strip().lower()
|
||||||
|
if "/" in raw:
|
||||||
|
raw = raw.split("/", 1)[1]
|
||||||
|
return raw in _PRIORITY_PROCESSING_MODELS
|
||||||
|
|
||||||
|
|
||||||
def resolve_fast_mode_runtime(model_id: Optional[str]) -> dict[str, Any] | None:
|
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||||
"""Resolve runtime selection and request overrides for a fast-mode model."""
|
"""Return request_overrides for Priority Processing, or None if unsupported.
|
||||||
cfg = fast_mode_backend_config(model_id)
|
|
||||||
if not cfg:
|
Unlike the previous ``resolve_fast_mode_runtime``, this does NOT force a
|
||||||
|
provider/backend switch. The ``service_tier`` parameter is injected into
|
||||||
|
whatever API path the user is already on (Codex Responses, Chat Completions,
|
||||||
|
or OpenRouter passthrough).
|
||||||
|
"""
|
||||||
|
if not model_supports_fast_mode(model_id):
|
||||||
return None
|
return None
|
||||||
|
return {"service_tier": "priority"}
|
||||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
|
||||||
|
|
||||||
runtime = resolve_runtime_provider(
|
|
||||||
requested=cfg.get("provider"),
|
|
||||||
explicit_base_url=cfg.get("base_url"),
|
|
||||||
explicit_api_key=cfg.get("api_key"),
|
|
||||||
)
|
|
||||||
return {
|
|
||||||
"runtime": {
|
|
||||||
"api_key": runtime.get("api_key"),
|
|
||||||
"base_url": runtime.get("base_url"),
|
|
||||||
"provider": runtime.get("provider"),
|
|
||||||
"api_mode": runtime.get("api_mode"),
|
|
||||||
"command": runtime.get("command"),
|
|
||||||
"args": list(runtime.get("args") or []),
|
|
||||||
"credential_pool": runtime.get("credential_pool"),
|
|
||||||
},
|
|
||||||
"request_overrides": dict(cfg.get("request_overrides") or {}),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def _resolve_copilot_catalog_api_key() -> str:
|
def _resolve_copilot_catalog_api_key() -> str:
|
||||||
|
|||||||
@@ -5686,6 +5686,11 @@ class AIAgent:
|
|||||||
if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
|
if "x.ai" in self._base_url_lower and hasattr(self, "session_id") and self.session_id:
|
||||||
api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
|
api_kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id}
|
||||||
|
|
||||||
|
# Priority Processing / generic request overrides (e.g. service_tier).
|
||||||
|
# Applied last so overrides win over any defaults set above.
|
||||||
|
if self.request_overrides:
|
||||||
|
api_kwargs.update(self.request_overrides)
|
||||||
|
|
||||||
return api_kwargs
|
return api_kwargs
|
||||||
|
|
||||||
def _supports_reasoning_extra_body(self) -> bool:
|
def _supports_reasoning_extra_body(self) -> bool:
|
||||||
|
|||||||
@@ -108,15 +108,52 @@ class TestHandleFastCommand(unittest.TestCase):
|
|||||||
self.assertTrue(mock_cprint.called)
|
self.assertTrue(mock_cprint.called)
|
||||||
|
|
||||||
|
|
||||||
class TestFastModeRegistry(unittest.TestCase):
|
class TestPriorityProcessingModels(unittest.TestCase):
|
||||||
def test_only_gpt_5_4_is_enabled_for_codex(self):
|
"""Verify the expanded Priority Processing model registry."""
|
||||||
from hermes_cli.models import fast_mode_backend_config
|
|
||||||
|
|
||||||
assert fast_mode_backend_config("gpt-5.4") == {
|
def test_all_documented_models_supported(self):
|
||||||
"provider": "openai-codex",
|
from hermes_cli.models import model_supports_fast_mode
|
||||||
"request_overrides": {"service_tier": "priority"},
|
|
||||||
}
|
# All models from OpenAI's Priority Processing pricing table
|
||||||
assert fast_mode_backend_config("gpt-5.3-codex") is None
|
supported = [
|
||||||
|
"gpt-5.4", "gpt-5.4-mini", "gpt-5.2",
|
||||||
|
"gpt-5.1", "gpt-5", "gpt-5-mini",
|
||||||
|
"gpt-4.1", "gpt-4.1-mini", "gpt-4.1-nano",
|
||||||
|
"gpt-4o", "gpt-4o-mini",
|
||||||
|
"o3", "o4-mini",
|
||||||
|
]
|
||||||
|
for model in supported:
|
||||||
|
assert model_supports_fast_mode(model), f"{model} should support fast mode"
|
||||||
|
|
||||||
|
def test_vendor_prefix_stripped(self):
|
||||||
|
from hermes_cli.models import model_supports_fast_mode
|
||||||
|
|
||||||
|
assert model_supports_fast_mode("openai/gpt-5.4") is True
|
||||||
|
assert model_supports_fast_mode("openai/gpt-4.1") is True
|
||||||
|
assert model_supports_fast_mode("openai/o3") is True
|
||||||
|
|
||||||
|
def test_non_priority_models_rejected(self):
|
||||||
|
from hermes_cli.models import model_supports_fast_mode
|
||||||
|
|
||||||
|
assert model_supports_fast_mode("gpt-5.3-codex") is False
|
||||||
|
assert model_supports_fast_mode("claude-sonnet-4") is False
|
||||||
|
assert model_supports_fast_mode("") is False
|
||||||
|
assert model_supports_fast_mode(None) is False
|
||||||
|
|
||||||
|
def test_resolve_overrides_returns_service_tier(self):
|
||||||
|
from hermes_cli.models import resolve_fast_mode_overrides
|
||||||
|
|
||||||
|
result = resolve_fast_mode_overrides("gpt-5.4")
|
||||||
|
assert result == {"service_tier": "priority"}
|
||||||
|
|
||||||
|
result = resolve_fast_mode_overrides("gpt-4.1")
|
||||||
|
assert result == {"service_tier": "priority"}
|
||||||
|
|
||||||
|
def test_resolve_overrides_none_for_unsupported(self):
|
||||||
|
from hermes_cli.models import resolve_fast_mode_overrides
|
||||||
|
|
||||||
|
assert resolve_fast_mode_overrides("gpt-5.3-codex") is None
|
||||||
|
assert resolve_fast_mode_overrides("claude-sonnet-4") is None
|
||||||
|
|
||||||
|
|
||||||
class TestFastModeRouting(unittest.TestCase):
|
class TestFastModeRouting(unittest.TestCase):
|
||||||
@@ -126,7 +163,16 @@ class TestFastModeRouting(unittest.TestCase):
|
|||||||
|
|
||||||
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||||
|
|
||||||
def test_turn_route_switches_to_model_backend_when_fast_enabled(self):
|
def test_fast_command_exposed_for_non_codex_models(self):
|
||||||
|
cli_mod = _import_cli()
|
||||||
|
stub = SimpleNamespace(provider="openai", requested_provider="openai", model="gpt-4.1", agent=None)
|
||||||
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||||
|
|
||||||
|
stub = SimpleNamespace(provider="openrouter", requested_provider="openrouter", model="o3", agent=None)
|
||||||
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||||
|
|
||||||
|
def test_turn_route_injects_overrides_without_provider_switch(self):
|
||||||
|
"""Fast mode should add request_overrides but NOT change the provider/runtime."""
|
||||||
cli_mod = _import_cli()
|
cli_mod = _import_cli()
|
||||||
stub = SimpleNamespace(
|
stub = SimpleNamespace(
|
||||||
model="gpt-5.4",
|
model="gpt-5.4",
|
||||||
@@ -141,35 +187,28 @@ class TestFastModeRouting(unittest.TestCase):
|
|||||||
service_tier="priority",
|
service_tier="priority",
|
||||||
)
|
)
|
||||||
|
|
||||||
with (
|
original_runtime = {
|
||||||
patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
"api_key": "***",
|
||||||
"model": "gpt-5.4",
|
|
||||||
"runtime": {
|
|
||||||
"api_key": "primary-key",
|
|
||||||
"base_url": "https://openrouter.ai/api/v1",
|
"base_url": "https://openrouter.ai/api/v1",
|
||||||
"provider": "openrouter",
|
"provider": "openrouter",
|
||||||
"api_mode": "chat_completions",
|
"api_mode": "chat_completions",
|
||||||
"command": None,
|
"command": None,
|
||||||
"args": [],
|
"args": [],
|
||||||
"credential_pool": None,
|
"credential_pool": None,
|
||||||
},
|
}
|
||||||
|
|
||||||
|
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||||
|
"model": "gpt-5.4",
|
||||||
|
"runtime": dict(original_runtime),
|
||||||
"label": None,
|
"label": None,
|
||||||
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
"signature": ("gpt-5.4", "openrouter", "https://openrouter.ai/api/v1", "chat_completions", None, ()),
|
||||||
}),
|
}):
|
||||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
|
||||||
"provider": "openai-codex",
|
|
||||||
"api_mode": "codex_responses",
|
|
||||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
|
||||||
"api_key": "codex-key",
|
|
||||||
"command": None,
|
|
||||||
"args": [],
|
|
||||||
"credential_pool": None,
|
|
||||||
}),
|
|
||||||
):
|
|
||||||
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||||
|
|
||||||
assert route["runtime"]["provider"] == "openai-codex"
|
# Provider should NOT have changed
|
||||||
assert route["runtime"]["api_mode"] == "codex_responses"
|
assert route["runtime"]["provider"] == "openrouter"
|
||||||
|
assert route["runtime"]["api_mode"] == "chat_completions"
|
||||||
|
# But request_overrides should be set
|
||||||
assert route["request_overrides"] == {"service_tier": "priority"}
|
assert route["request_overrides"] == {"service_tier": "priority"}
|
||||||
|
|
||||||
def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self):
|
def test_turn_route_keeps_primary_runtime_when_model_has_no_fast_backend(self):
|
||||||
@@ -190,7 +229,7 @@ class TestFastModeRouting(unittest.TestCase):
|
|||||||
primary_route = {
|
primary_route = {
|
||||||
"model": "gpt-5.3-codex",
|
"model": "gpt-5.3-codex",
|
||||||
"runtime": {
|
"runtime": {
|
||||||
"api_key": "primary-key",
|
"api_key": "***",
|
||||||
"base_url": "https://openrouter.ai/api/v1",
|
"base_url": "https://openrouter.ai/api/v1",
|
||||||
"provider": "openrouter",
|
"provider": "openrouter",
|
||||||
"api_mode": "chat_completions",
|
"api_mode": "chat_completions",
|
||||||
|
|||||||
@@ -225,6 +225,26 @@ class TestDeveloperRoleSwap:
|
|||||||
assert kwargs["messages"][0]["role"] == "developer"
|
assert kwargs["messages"][0]["role"] == "developer"
|
||||||
|
|
||||||
|
|
||||||
|
class TestBuildApiKwargsChatCompletionsServiceTier:
|
||||||
|
"""service_tier via request_overrides works on the chat_completions path."""
|
||||||
|
|
||||||
|
def test_includes_service_tier_via_request_overrides(self, monkeypatch):
|
||||||
|
agent = _make_agent(monkeypatch, "openrouter")
|
||||||
|
agent.model = "gpt-4.1"
|
||||||
|
agent.request_overrides = {"service_tier": "priority"}
|
||||||
|
messages = [{"role": "user", "content": "hi"}]
|
||||||
|
kwargs = agent._build_api_kwargs(messages)
|
||||||
|
assert kwargs["service_tier"] == "priority"
|
||||||
|
|
||||||
|
def test_no_service_tier_when_overrides_empty(self, monkeypatch):
|
||||||
|
agent = _make_agent(monkeypatch, "openrouter")
|
||||||
|
agent.model = "gpt-4.1"
|
||||||
|
agent.request_overrides = {}
|
||||||
|
messages = [{"role": "user", "content": "hi"}]
|
||||||
|
kwargs = agent._build_api_kwargs(messages)
|
||||||
|
assert "service_tier" not in kwargs
|
||||||
|
|
||||||
|
|
||||||
class TestBuildApiKwargsAIGateway:
|
class TestBuildApiKwargsAIGateway:
|
||||||
def test_uses_chat_completions_format(self, monkeypatch):
|
def test_uses_chat_completions_format(self, monkeypatch):
|
||||||
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
|
agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
|
||||||
|
|||||||
Reference in New Issue
Block a user