mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-04 01:37:34 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
eed4df6224 |
@@ -103,6 +103,11 @@ _COMMON_BETAS = [
|
|||||||
# fall back to the provider's default response path.
|
# fall back to the provider's default response path.
|
||||||
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
_TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
|
||||||
|
|
||||||
|
# Fast mode beta — enables the ``speed: "fast"`` request parameter for
|
||||||
|
# significantly higher output token throughput on Opus 4.6 (~2.5x).
|
||||||
|
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||||
|
_FAST_MODE_BETA = "fast-mode-2026-02-01"
|
||||||
|
|
||||||
# Additional beta headers required for OAuth/subscription auth.
|
# Additional beta headers required for OAuth/subscription auth.
|
||||||
# Matches what Claude Code (and pi-ai / OpenCode) send.
|
# Matches what Claude Code (and pi-ai / OpenCode) send.
|
||||||
_OAUTH_ONLY_BETAS = [
|
_OAUTH_ONLY_BETAS = [
|
||||||
@@ -1256,6 +1261,7 @@ def build_anthropic_kwargs(
|
|||||||
preserve_dots: bool = False,
|
preserve_dots: bool = False,
|
||||||
context_length: Optional[int] = None,
|
context_length: Optional[int] = None,
|
||||||
base_url: str | None = None,
|
base_url: str | None = None,
|
||||||
|
fast_mode: bool = False,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Build kwargs for anthropic.messages.create().
|
"""Build kwargs for anthropic.messages.create().
|
||||||
|
|
||||||
@@ -1289,6 +1295,10 @@ def build_anthropic_kwargs(
|
|||||||
|
|
||||||
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
When *base_url* points to a third-party Anthropic-compatible endpoint,
|
||||||
thinking block signatures are stripped (they are Anthropic-proprietary).
|
thinking block signatures are stripped (they are Anthropic-proprietary).
|
||||||
|
|
||||||
|
When *fast_mode* is True, adds ``speed: "fast"`` and the fast-mode beta
|
||||||
|
header for ~2.5x faster output throughput on Opus 4.6. Currently only
|
||||||
|
supported on native Anthropic endpoints (not third-party compatible ones).
|
||||||
"""
|
"""
|
||||||
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
system, anthropic_messages = convert_messages_to_anthropic(messages, base_url=base_url)
|
||||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||||
@@ -1387,6 +1397,20 @@ def build_anthropic_kwargs(
|
|||||||
kwargs["temperature"] = 1
|
kwargs["temperature"] = 1
|
||||||
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
|
||||||
|
|
||||||
|
# ── Fast mode (Opus 4.6 only) ────────────────────────────────────
|
||||||
|
# Adds speed:"fast" + the fast-mode beta header for ~2.5x output speed.
|
||||||
|
# Only for native Anthropic endpoints — third-party providers would
|
||||||
|
# reject the unknown beta header and speed parameter.
|
||||||
|
if fast_mode and not _is_third_party_anthropic_endpoint(base_url):
|
||||||
|
kwargs["speed"] = "fast"
|
||||||
|
# Build extra_headers with ALL applicable betas (the per-request
|
||||||
|
# extra_headers override the client-level anthropic-beta header).
|
||||||
|
betas = list(_common_betas_for_base_url(base_url))
|
||||||
|
if is_oauth:
|
||||||
|
betas.extend(_OAUTH_ONLY_BETAS)
|
||||||
|
betas.append(_FAST_MODE_BETA)
|
||||||
|
kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
|
||||||
|
|
||||||
return kwargs
|
return kwargs
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
19
cli.py
19
cli.py
@@ -5697,15 +5697,24 @@ class HermesCLI:
|
|||||||
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
_cprint(f" {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")
|
||||||
|
|
||||||
def _handle_fast_command(self, cmd: str):
|
def _handle_fast_command(self, cmd: str):
|
||||||
"""Handle /fast — toggle OpenAI Priority Processing (service_tier)."""
|
"""Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
|
||||||
if not self._fast_command_available():
|
if not self._fast_command_available():
|
||||||
_cprint(" (._.) /fast is only available for OpenAI models that support Priority Processing.")
|
_cprint(" (._.) /fast is only available for models that support fast mode (OpenAI Priority Processing or Anthropic Fast Mode).")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Determine the branding for the current model
|
||||||
|
try:
|
||||||
|
from hermes_cli.models import _is_anthropic_fast_model
|
||||||
|
agent = getattr(self, "agent", None)
|
||||||
|
model = getattr(agent, "model", None) or getattr(self, "model", None)
|
||||||
|
feature_name = "Anthropic Fast Mode" if _is_anthropic_fast_model(model) else "Priority Processing"
|
||||||
|
except Exception:
|
||||||
|
feature_name = "Fast mode"
|
||||||
|
|
||||||
parts = cmd.strip().split(maxsplit=1)
|
parts = cmd.strip().split(maxsplit=1)
|
||||||
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
if len(parts) < 2 or parts[1].strip().lower() == "status":
|
||||||
status = "fast" if self.service_tier == "priority" else "normal"
|
status = "fast" if self.service_tier == "priority" else "normal"
|
||||||
_cprint(f" {_GOLD}Priority Processing: {status}{_RST}")
|
_cprint(f" {_GOLD}{feature_name}: {status}{_RST}")
|
||||||
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
_cprint(f" {_DIM}Usage: /fast [normal|fast|status]{_RST}")
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -5726,9 +5735,9 @@ class HermesCLI:
|
|||||||
|
|
||||||
self.agent = None # Force agent re-init with new service-tier config
|
self.agent = None # Force agent re-init with new service-tier config
|
||||||
if save_config_value("agent.service_tier", saved_value):
|
if save_config_value("agent.service_tier", saved_value):
|
||||||
_cprint(f" {_GOLD}✓ Priority Processing set to {label} (saved to config){_RST}")
|
_cprint(f" {_GOLD}✓ {feature_name} set to {label} (saved to config){_RST}")
|
||||||
else:
|
else:
|
||||||
_cprint(f" {_GOLD}✓ Priority Processing set to {label} (session only){_RST}")
|
_cprint(f" {_GOLD}✓ {feature_name} set to {label} (session only){_RST}")
|
||||||
|
|
||||||
def _on_reasoning(self, reasoning_text: str):
|
def _on_reasoning(self, reasoning_text: str):
|
||||||
"""Callback for intermediate reasoning display during tool-call loops."""
|
"""Callback for intermediate reasoning display during tool-call loops."""
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
|||||||
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
CommandDef("reasoning", "Manage reasoning effort and display", "Configuration",
|
||||||
args_hint="[level|show|hide]",
|
args_hint="[level|show|hide]",
|
||||||
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
subcommands=("none", "minimal", "low", "medium", "high", "xhigh", "show", "hide", "on", "off")),
|
||||||
CommandDef("fast", "Toggle OpenAI Priority Processing (Normal/Fast)", "Configuration",
|
CommandDef("fast", "Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode (Normal/Fast)", "Configuration",
|
||||||
cli_only=True, args_hint="[normal|fast|status]",
|
cli_only=True, args_hint="[normal|fast|status]",
|
||||||
subcommands=("normal", "fast", "status", "on", "off")),
|
subcommands=("normal", "fast", "status", "on", "off")),
|
||||||
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
CommandDef("skin", "Show or change the display skin/theme", "Configuration",
|
||||||
|
|||||||
@@ -1036,25 +1036,57 @@ _PRIORITY_PROCESSING_MODELS: frozenset[str] = frozenset({
|
|||||||
"o4-mini",
|
"o4-mini",
|
||||||
})
|
})
|
||||||
|
|
||||||
|
# Models that support Anthropic Fast Mode (speed="fast").
|
||||||
|
# See https://platform.claude.com/docs/en/build-with-claude/fast-mode
|
||||||
|
# Currently only Claude Opus 4.6. Both hyphen and dot variants are stored
|
||||||
|
# to handle native Anthropic (claude-opus-4-6) and OpenRouter (claude-opus-4.6).
|
||||||
|
_ANTHROPIC_FAST_MODE_MODELS: frozenset[str] = frozenset({
|
||||||
|
"claude-opus-4-6",
|
||||||
|
"claude-opus-4.6",
|
||||||
|
})
|
||||||
|
|
||||||
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
|
||||||
"""Return whether Hermes should expose the /fast (Priority Processing) toggle."""
|
def _strip_vendor_prefix(model_id: str) -> str:
|
||||||
|
"""Strip vendor/ prefix from a model ID (e.g. 'anthropic/claude-opus-4-6' -> 'claude-opus-4-6')."""
|
||||||
raw = str(model_id or "").strip().lower()
|
raw = str(model_id or "").strip().lower()
|
||||||
if "/" in raw:
|
if "/" in raw:
|
||||||
raw = raw.split("/", 1)[1]
|
raw = raw.split("/", 1)[1]
|
||||||
return raw in _PRIORITY_PROCESSING_MODELS
|
return raw
|
||||||
|
|
||||||
|
|
||||||
|
def model_supports_fast_mode(model_id: Optional[str]) -> bool:
|
||||||
|
"""Return whether Hermes should expose the /fast toggle for this model."""
|
||||||
|
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||||
|
if raw in _PRIORITY_PROCESSING_MODELS:
|
||||||
|
return True
|
||||||
|
# Anthropic fast mode — strip date suffixes (e.g. claude-opus-4-6-20260401)
|
||||||
|
# and OpenRouter variant tags (:fast, :beta) for matching.
|
||||||
|
base = raw.split(":")[0]
|
||||||
|
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||||
|
|
||||||
|
|
||||||
|
def _is_anthropic_fast_model(model_id: Optional[str]) -> bool:
|
||||||
|
"""Return True if the model supports Anthropic's fast mode (speed='fast')."""
|
||||||
|
raw = _strip_vendor_prefix(str(model_id or ""))
|
||||||
|
base = raw.split(":")[0]
|
||||||
|
return base in _ANTHROPIC_FAST_MODE_MODELS
|
||||||
|
|
||||||
|
|
||||||
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None:
|
||||||
"""Return request_overrides for Priority Processing, or None if unsupported.
|
"""Return request_overrides for fast/priority mode, or None if unsupported.
|
||||||
|
|
||||||
Unlike the previous ``resolve_fast_mode_runtime``, this does NOT force a
|
Returns provider-appropriate overrides:
|
||||||
provider/backend switch. The ``service_tier`` parameter is injected into
|
- OpenAI models: ``{"service_tier": "priority"}`` (Priority Processing)
|
||||||
whatever API path the user is already on (Codex Responses, Chat Completions,
|
- Anthropic models: ``{"speed": "fast"}`` (Anthropic Fast Mode beta)
|
||||||
or OpenRouter passthrough).
|
|
||||||
|
The overrides are injected into the API request kwargs by
|
||||||
|
``_build_api_kwargs`` in run_agent.py — each API path handles its own
|
||||||
|
keys (service_tier for OpenAI/Codex, speed for Anthropic Messages).
|
||||||
"""
|
"""
|
||||||
if not model_supports_fast_mode(model_id):
|
if not model_supports_fast_mode(model_id):
|
||||||
return None
|
return None
|
||||||
|
if _is_anthropic_fast_model(model_id):
|
||||||
|
return {"speed": "fast"}
|
||||||
return {"service_tier": "priority"}
|
return {"service_tier": "priority"}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -5466,6 +5466,7 @@ class AIAgent:
|
|||||||
preserve_dots=self._anthropic_preserve_dots(),
|
preserve_dots=self._anthropic_preserve_dots(),
|
||||||
context_length=ctx_len,
|
context_length=ctx_len,
|
||||||
base_url=getattr(self, "_anthropic_base_url", None),
|
base_url=getattr(self, "_anthropic_base_url", None),
|
||||||
|
fast_mode=self.request_overrides.get("speed") == "fast",
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.api_mode == "codex_responses":
|
if self.api_mode == "codex_responses":
|
||||||
|
|||||||
@@ -247,6 +247,163 @@ class TestFastModeRouting(unittest.TestCase):
|
|||||||
assert route.get("request_overrides") is None
|
assert route.get("request_overrides") is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestAnthropicFastMode(unittest.TestCase):
|
||||||
|
"""Verify Anthropic Fast Mode model support and override resolution."""
|
||||||
|
|
||||||
|
def test_anthropic_opus_supported(self):
|
||||||
|
from hermes_cli.models import model_supports_fast_mode
|
||||||
|
|
||||||
|
# Native Anthropic format (hyphens)
|
||||||
|
assert model_supports_fast_mode("claude-opus-4-6") is True
|
||||||
|
# OpenRouter format (dots)
|
||||||
|
assert model_supports_fast_mode("claude-opus-4.6") is True
|
||||||
|
# With vendor prefix
|
||||||
|
assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True
|
||||||
|
assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True
|
||||||
|
|
||||||
|
def test_anthropic_non_opus_rejected(self):
|
||||||
|
from hermes_cli.models import model_supports_fast_mode
|
||||||
|
|
||||||
|
assert model_supports_fast_mode("claude-sonnet-4-6") is False
|
||||||
|
assert model_supports_fast_mode("claude-sonnet-4.6") is False
|
||||||
|
assert model_supports_fast_mode("claude-haiku-4-5") is False
|
||||||
|
assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False
|
||||||
|
|
||||||
|
def test_anthropic_variant_tags_stripped(self):
|
||||||
|
from hermes_cli.models import model_supports_fast_mode
|
||||||
|
|
||||||
|
# OpenRouter variant tags after colon should be stripped
|
||||||
|
assert model_supports_fast_mode("claude-opus-4.6:fast") is True
|
||||||
|
assert model_supports_fast_mode("claude-opus-4.6:beta") is True
|
||||||
|
|
||||||
|
def test_resolve_overrides_returns_speed_for_anthropic(self):
|
||||||
|
from hermes_cli.models import resolve_fast_mode_overrides
|
||||||
|
|
||||||
|
result = resolve_fast_mode_overrides("claude-opus-4-6")
|
||||||
|
assert result == {"speed": "fast"}
|
||||||
|
|
||||||
|
result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6")
|
||||||
|
assert result == {"speed": "fast"}
|
||||||
|
|
||||||
|
def test_resolve_overrides_returns_service_tier_for_openai(self):
|
||||||
|
"""OpenAI models should still get service_tier, not speed."""
|
||||||
|
from hermes_cli.models import resolve_fast_mode_overrides
|
||||||
|
|
||||||
|
result = resolve_fast_mode_overrides("gpt-5.4")
|
||||||
|
assert result == {"service_tier": "priority"}
|
||||||
|
|
||||||
|
def test_is_anthropic_fast_model(self):
|
||||||
|
from hermes_cli.models import _is_anthropic_fast_model
|
||||||
|
|
||||||
|
assert _is_anthropic_fast_model("claude-opus-4-6") is True
|
||||||
|
assert _is_anthropic_fast_model("claude-opus-4.6") is True
|
||||||
|
assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True
|
||||||
|
assert _is_anthropic_fast_model("gpt-5.4") is False
|
||||||
|
assert _is_anthropic_fast_model("claude-sonnet-4-6") is False
|
||||||
|
|
||||||
|
def test_fast_command_exposed_for_anthropic_model(self):
|
||||||
|
cli_mod = _import_cli()
|
||||||
|
stub = SimpleNamespace(
|
||||||
|
provider="anthropic", requested_provider="anthropic",
|
||||||
|
model="claude-opus-4-6", agent=None,
|
||||||
|
)
|
||||||
|
assert cli_mod.HermesCLI._fast_command_available(stub) is True
|
||||||
|
|
||||||
|
def test_fast_command_hidden_for_anthropic_sonnet(self):
|
||||||
|
cli_mod = _import_cli()
|
||||||
|
stub = SimpleNamespace(
|
||||||
|
provider="anthropic", requested_provider="anthropic",
|
||||||
|
model="claude-sonnet-4-6", agent=None,
|
||||||
|
)
|
||||||
|
assert cli_mod.HermesCLI._fast_command_available(stub) is False
|
||||||
|
|
||||||
|
def test_turn_route_injects_speed_for_anthropic(self):
|
||||||
|
"""Anthropic models should get speed:'fast' override, not service_tier."""
|
||||||
|
cli_mod = _import_cli()
|
||||||
|
stub = SimpleNamespace(
|
||||||
|
model="claude-opus-4-6",
|
||||||
|
api_key="sk-ant-test",
|
||||||
|
base_url="https://api.anthropic.com",
|
||||||
|
provider="anthropic",
|
||||||
|
api_mode="anthropic_messages",
|
||||||
|
acp_command=None,
|
||||||
|
acp_args=[],
|
||||||
|
_credential_pool=None,
|
||||||
|
_smart_model_routing={},
|
||||||
|
service_tier="priority",
|
||||||
|
)
|
||||||
|
|
||||||
|
original_runtime = {
|
||||||
|
"api_key": "***",
|
||||||
|
"base_url": "https://api.anthropic.com",
|
||||||
|
"provider": "anthropic",
|
||||||
|
"api_mode": "anthropic_messages",
|
||||||
|
"command": None,
|
||||||
|
"args": [],
|
||||||
|
"credential_pool": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch("agent.smart_model_routing.resolve_turn_route", return_value={
|
||||||
|
"model": "claude-opus-4-6",
|
||||||
|
"runtime": dict(original_runtime),
|
||||||
|
"label": None,
|
||||||
|
"signature": ("claude-opus-4-6", "anthropic", "https://api.anthropic.com", "anthropic_messages", None, ()),
|
||||||
|
}):
|
||||||
|
route = cli_mod.HermesCLI._resolve_turn_agent_config(stub, "hi")
|
||||||
|
|
||||||
|
assert route["runtime"]["provider"] == "anthropic"
|
||||||
|
assert route["request_overrides"] == {"speed": "fast"}
|
||||||
|
|
||||||
|
|
||||||
|
class TestAnthropicFastModeAdapter(unittest.TestCase):
|
||||||
|
"""Verify build_anthropic_kwargs handles fast_mode parameter."""
|
||||||
|
|
||||||
|
def test_fast_mode_adds_speed_and_beta(self):
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs, _FAST_MODE_BETA
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="claude-opus-4-6",
|
||||||
|
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=None,
|
||||||
|
reasoning_config=None,
|
||||||
|
fast_mode=True,
|
||||||
|
)
|
||||||
|
assert kwargs.get("speed") == "fast"
|
||||||
|
assert "extra_headers" in kwargs
|
||||||
|
assert _FAST_MODE_BETA in kwargs["extra_headers"].get("anthropic-beta", "")
|
||||||
|
|
||||||
|
def test_fast_mode_off_no_speed(self):
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="claude-opus-4-6",
|
||||||
|
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=None,
|
||||||
|
reasoning_config=None,
|
||||||
|
fast_mode=False,
|
||||||
|
)
|
||||||
|
assert "speed" not in kwargs
|
||||||
|
assert "extra_headers" not in kwargs
|
||||||
|
|
||||||
|
def test_fast_mode_skipped_for_third_party_endpoint(self):
|
||||||
|
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||||
|
|
||||||
|
kwargs = build_anthropic_kwargs(
|
||||||
|
model="claude-opus-4-6",
|
||||||
|
messages=[{"role": "user", "content": [{"type": "text", "text": "hi"}]}],
|
||||||
|
tools=None,
|
||||||
|
max_tokens=None,
|
||||||
|
reasoning_config=None,
|
||||||
|
fast_mode=True,
|
||||||
|
base_url="https://api.minimax.io/anthropic/v1",
|
||||||
|
)
|
||||||
|
# Third-party endpoints should NOT get speed or fast-mode beta
|
||||||
|
assert "speed" not in kwargs
|
||||||
|
assert "extra_headers" not in kwargs
|
||||||
|
|
||||||
|
|
||||||
class TestConfigDefault(unittest.TestCase):
|
class TestConfigDefault(unittest.TestCase):
|
||||||
def test_default_config_has_service_tier(self):
|
def test_default_config_has_service_tier(self):
|
||||||
from hermes_cli.config import DEFAULT_CONFIG
|
from hermes_cli.config import DEFAULT_CONFIG
|
||||||
|
|||||||
Reference in New Issue
Block a user