feat: add Anthropic Fast Mode support to /fast command (#7037)

Extends the /fast command to support Anthropic's Fast Mode beta in addition to OpenAI Priority Processing. When enabled on Claude Opus 4.6, adds speed:"fast" and the fast-mode-2026-02-01 beta header to API requests for ~2.5x faster output token throughput. Changes: - hermes_cli/models.py: Add _ANTHROPIC_FAST_MODE_MODELS registry, model_supports_fast_mode() now recognizes Claude Opus 4.6, resolve_fast_mode_overrides() returns {speed: fast} for Anthropic vs {service_tier: priority} for OpenAI - agent/anthropic_adapter.py: Add _FAST_MODE_BETA constant, build_anthropic_kwargs() accepts fast_mode=True which injects speed:fast + beta header via extra_headers (skipped for third-party Anthropic-compatible endpoints like MiniMax) - run_agent.py: Pass fast_mode to build_anthropic_kwargs in the anthropic_messages path of _build_api_kwargs() - cli.py: Update _handle_fast_command with provider-aware messaging (shows 'Anthropic Fast Mode' vs 'Priority Processing') - hermes_cli/commands.py: Update /fast description to mention both providers - tests: 13 new tests covering Anthropic model detection, override resolution, CLI availability, routing, adapter kwargs, and third-party endpoint safety
2026-04-28 06:51:16 +08:00 · 2026-04-10 02:32:15 -07:00
parent 0848a79476
commit 8779a268a7
6 changed files with 237 additions and 14 deletions
--- a/cli.py
+++ b/cli.py
@@ -5697,15 +5697,24 @@ class HermesCLI:
            _cprint(f"  {_GOLD}✓ Reasoning effort set to '{arg}' (session only){_RST}")

    def _handle_fast_command(self, cmd: str):
-        """Handle /fast — toggle OpenAI Priority Processing (service_tier)."""
+        """Handle /fast — toggle fast mode (OpenAI Priority Processing / Anthropic Fast Mode)."""
        if not self._fast_command_available():
-            _cprint("  (._.) /fast is only available for OpenAI models that support Priority Processing.")
+            _cprint("  (._.) /fast is only available for models that support fast mode (OpenAI Priority Processing or Anthropic Fast Mode).")
            return

+        # Determine the branding for the current model
+        try:
+            from hermes_cli.models import _is_anthropic_fast_model
+            agent = getattr(self, "agent", None)
+            model = getattr(agent, "model", None) or getattr(self, "model", None)
+            feature_name = "Anthropic Fast Mode" if _is_anthropic_fast_model(model) else "Priority Processing"
+        except Exception:
+            feature_name = "Fast mode"
+
        parts = cmd.strip().split(maxsplit=1)
        if len(parts) < 2 or parts[1].strip().lower() == "status":
            status = "fast" if self.service_tier == "priority" else "normal"
-            _cprint(f"  {_GOLD}Priority Processing: {status}{_RST}")
+            _cprint(f"  {_GOLD}{feature_name}: {status}{_RST}")
            _cprint(f"  {_DIM}Usage: /fast [normal|fast|status]{_RST}")
            return

@@ -5726,9 +5735,9 @@ class HermesCLI:

        self.agent = None  # Force agent re-init with new service-tier config
        if save_config_value("agent.service_tier", saved_value):
-            _cprint(f"  {_GOLD}✓ Priority Processing set to {label} (saved to config){_RST}")
+            _cprint(f"  {_GOLD}✓ {feature_name} set to {label} (saved to config){_RST}")
        else:
-            _cprint(f"  {_GOLD}✓ Priority Processing set to {label} (session only){_RST}")
+            _cprint(f"  {_GOLD}✓ {feature_name} set to {label} (session only){_RST}")

    def _on_reasoning(self, reasoning_text: str):
        """Callback for intermediate reasoning display during tool-call loops."""