mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-04 09:47:54 +08:00
Compare commits
2 Commits
gemini-cli
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c29186ab59 | ||
|
|
83f556692e |
119
cli.py
119
cli.py
@@ -3447,7 +3447,122 @@ class HermesCLI:
|
||||
print(" Run: hermes setup")
|
||||
print()
|
||||
|
||||
print(" To change model or provider, use: hermes model")
|
||||
print(" Switch mid-chat: /model <provider:model>")
|
||||
print(" Full picker: hermes model")
|
||||
|
||||
def _handle_model_switch(self, cmd: str):
|
||||
"""Handle /model command — switch model mid-session.
|
||||
|
||||
Syntax:
|
||||
/model → show current model + usage
|
||||
/model sonnet → alias for claude-sonnet-4.6
|
||||
/model claude-sonnet-4 → auto-detect provider
|
||||
/model openai:gpt-5 → explicit provider
|
||||
/model custom → switch to custom endpoint
|
||||
"""
|
||||
from hermes_cli.models import _PROVIDER_LABELS, normalize_provider
|
||||
from hermes_cli.model_switch import (
|
||||
switch_model, switch_to_custom_provider,
|
||||
MODEL_ALIASES, suggest_models,
|
||||
)
|
||||
|
||||
parts = cmd.split(maxsplit=1)
|
||||
raw_input = parts[1].strip() if len(parts) > 1 else ""
|
||||
|
||||
# No argument → show current model and how to switch
|
||||
if not raw_input:
|
||||
provider_label = _PROVIDER_LABELS.get(self.provider, self.provider)
|
||||
print(f"\n Current: {self.model} via {provider_label}")
|
||||
print()
|
||||
print(" Switch with aliases:")
|
||||
print(" /model sonnet /model opus /model haiku")
|
||||
print(" /model gpt5 /model gpt5-mini /model codex")
|
||||
print(" /model gemini /model deepseek /model grok")
|
||||
print()
|
||||
print(" Or full names: /model anthropic/claude-sonnet-4.5")
|
||||
print(" Direct provider: /model anthropic:claude-opus-4")
|
||||
print(" Custom endpoint: /model custom:my-local-model")
|
||||
print()
|
||||
return
|
||||
|
||||
# Handle bare "custom" → auto-detect custom endpoint
|
||||
if raw_input.lower() == "custom":
|
||||
result = switch_to_custom_provider()
|
||||
if not result.success:
|
||||
print(f"\n Error: {result.error_message}")
|
||||
return
|
||||
raw_input = f"custom:{result.model}"
|
||||
|
||||
# Same model check (quick path)
|
||||
if raw_input == self.model:
|
||||
print(f"\n Already using {self.model}")
|
||||
return
|
||||
|
||||
# Run the shared switch pipeline
|
||||
result = switch_model(
|
||||
raw_input,
|
||||
current_provider=self.provider,
|
||||
current_model=self.model,
|
||||
current_base_url=self.base_url,
|
||||
current_api_key=getattr(self, 'api_key', ''),
|
||||
)
|
||||
|
||||
if not result.success:
|
||||
# On failure, try to suggest alternatives
|
||||
suggestions = suggest_models(raw_input)
|
||||
print(f"\n Error: {result.error_message}")
|
||||
if suggestions:
|
||||
sug_str = ", ".join(suggestions)
|
||||
print(f" Did you mean: {sug_str}?")
|
||||
print()
|
||||
return
|
||||
|
||||
# Same model after resolution (e.g. alias resolved to current)
|
||||
if result.new_model == self.model and not result.provider_changed:
|
||||
print(f"\n Already using {self.model}")
|
||||
return
|
||||
|
||||
old_model = self.model
|
||||
old_provider = self.provider
|
||||
|
||||
# Apply the switch to the live agent (if one exists)
|
||||
if self.agent is not None:
|
||||
self.agent.switch_model(
|
||||
new_model=result.new_model,
|
||||
new_provider=result.target_provider,
|
||||
api_key=result.api_key,
|
||||
base_url=result.base_url,
|
||||
api_mode=result.api_mode,
|
||||
)
|
||||
|
||||
# Update CLI-level state so the next _init_agent() (if agent is None)
|
||||
# also picks up the new model
|
||||
self.model = result.new_model
|
||||
self.provider = result.target_provider
|
||||
self.api_key = result.api_key
|
||||
self.base_url = result.base_url
|
||||
self.api_mode = result.api_mode
|
||||
|
||||
# Persist to config.yaml so future sessions use the new model
|
||||
if result.persist:
|
||||
save_config_value("model.default", result.new_model)
|
||||
save_config_value("model.provider", result.target_provider)
|
||||
if result.base_url:
|
||||
save_config_value("model.base_url", result.base_url)
|
||||
|
||||
# Format output
|
||||
new_label = _PROVIDER_LABELS.get(result.target_provider, result.target_provider)
|
||||
if result.resolved_via_alias:
|
||||
print(f"\n {result.resolved_via_alias} → {result.new_model} via {new_label}")
|
||||
else:
|
||||
print(f"\n Switched to {result.new_model} via {new_label}")
|
||||
if result.provider_changed:
|
||||
old_label = _PROVIDER_LABELS.get(old_provider, old_provider)
|
||||
print(f" Provider: {old_label} → {new_label}")
|
||||
if result.warning_message:
|
||||
print(f" Note: {result.warning_message}")
|
||||
print(f" Prompt cache reset (new model).")
|
||||
print()
|
||||
|
||||
def _handle_prompt_command(self, cmd: str):
|
||||
"""Handle the /prompt command to view or set system prompt."""
|
||||
@@ -3998,6 +4113,8 @@ class HermesCLI:
|
||||
self.new_session()
|
||||
elif canonical == "resume":
|
||||
self._handle_resume_command(cmd_original)
|
||||
elif canonical == "model":
|
||||
self._handle_model_switch(cmd_original)
|
||||
elif canonical == "provider":
|
||||
self._show_model_and_providers()
|
||||
elif canonical == "prompt":
|
||||
|
||||
132
gateway/run.py
132
gateway/run.py
@@ -1821,6 +1821,11 @@ class GatewayRunner:
|
||||
adapter._pending_messages[_quick_key] = queued_event
|
||||
return "Queued for the next turn."
|
||||
|
||||
# /model must not be queued as an interrupt — it's a config change
|
||||
# that requires no agent to be running. Return a clear message.
|
||||
if _cmd_def_inner and _cmd_def_inner.name == "model":
|
||||
return "⏳ Agent is running — wait for it to finish or `/stop` first, then switch models."
|
||||
|
||||
# /approve and /deny must bypass the running-agent interrupt path.
|
||||
# The agent thread is blocked on a threading.Event inside
|
||||
# tools/approval.py — sending an interrupt won't unblock it.
|
||||
@@ -1920,6 +1925,9 @@ class GatewayRunner:
|
||||
if canonical == "yolo":
|
||||
return await self._handle_yolo_command(event)
|
||||
|
||||
if canonical == "model":
|
||||
return await self._handle_model_command(event)
|
||||
|
||||
if canonical == "provider":
|
||||
return await self._handle_provider_command(event)
|
||||
|
||||
@@ -3227,6 +3235,130 @@ class GatewayRunner:
|
||||
lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _handle_model_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /model command — switch model mid-session.
|
||||
|
||||
Works across all gateway platforms (Telegram, Discord, Slack,
|
||||
Matrix, WhatsApp, etc.) since they all route through the same
|
||||
gateway command dispatch.
|
||||
"""
|
||||
import yaml
|
||||
from hermes_cli.models import _PROVIDER_LABELS, normalize_provider
|
||||
from hermes_cli.model_switch import (
|
||||
switch_model, switch_to_custom_provider,
|
||||
MODEL_ALIASES, suggest_models,
|
||||
)
|
||||
from hermes_cli.config import save_config
|
||||
|
||||
raw_input = event.get_command_args().strip()
|
||||
|
||||
# Resolve current provider/model from config
|
||||
config_path = _hermes_home / "config.yaml"
|
||||
current_provider = "openrouter"
|
||||
current_model = ""
|
||||
current_base_url = ""
|
||||
current_api_key = ""
|
||||
try:
|
||||
if config_path.exists():
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, dict):
|
||||
current_provider = model_cfg.get("provider", "openrouter")
|
||||
current_model = model_cfg.get("default") or model_cfg.get("model", "")
|
||||
current_base_url = model_cfg.get("base_url", "")
|
||||
elif isinstance(model_cfg, str):
|
||||
current_model = model_cfg
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
current_provider = normalize_provider(current_provider)
|
||||
|
||||
# No argument → show current model and how to switch
|
||||
if not raw_input:
|
||||
provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
|
||||
return (
|
||||
f"🤖 **Current:** `{current_model}` via {provider_label}\n\n"
|
||||
"**Aliases:** sonnet, opus, haiku, gpt5, gpt5-mini, codex, "
|
||||
"gemini, deepseek, grok, qwen, minimax\n\n"
|
||||
"**Full names:** `/model anthropic/claude-sonnet-4.5`\n"
|
||||
"**Direct provider:** `/model anthropic:claude-opus-4`\n"
|
||||
"**Custom endpoint:** `/model custom:my-local-model`"
|
||||
)
|
||||
|
||||
# Handle bare "custom"
|
||||
if raw_input.lower() == "custom":
|
||||
custom_result = switch_to_custom_provider()
|
||||
if not custom_result.success:
|
||||
return f"❌ {custom_result.error_message}"
|
||||
raw_input = f"custom:{custom_result.model}"
|
||||
|
||||
# Same model check (quick path)
|
||||
if raw_input == current_model:
|
||||
return f"Already using `{current_model}`"
|
||||
|
||||
# Run the shared switch pipeline
|
||||
result = switch_model(
|
||||
raw_input,
|
||||
current_provider=current_provider,
|
||||
current_model=current_model,
|
||||
current_base_url=current_base_url,
|
||||
current_api_key=current_api_key,
|
||||
)
|
||||
|
||||
if not result.success:
|
||||
# Try to suggest alternatives on failure
|
||||
suggestions = suggest_models(raw_input, limit=3)
|
||||
msg = f"❌ {result.error_message}"
|
||||
if suggestions:
|
||||
sug_str = ", ".join(f"`{s}`" for s in suggestions)
|
||||
msg += f"\nDid you mean: {sug_str}?"
|
||||
return msg
|
||||
|
||||
# Same model after resolution
|
||||
if result.new_model == current_model and not result.provider_changed:
|
||||
return f"Already using `{current_model}`"
|
||||
|
||||
# Persist to config.yaml
|
||||
if result.persist:
|
||||
try:
|
||||
with open(config_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if not isinstance(model_cfg, dict):
|
||||
model_cfg = {"default": model_cfg} if model_cfg else {}
|
||||
model_cfg["default"] = result.new_model
|
||||
model_cfg["provider"] = result.target_provider
|
||||
if result.base_url:
|
||||
model_cfg["base_url"] = result.base_url
|
||||
elif "base_url" in model_cfg and not result.is_custom_target:
|
||||
# Clear stale base_url when switching away from custom
|
||||
del model_cfg["base_url"]
|
||||
cfg["model"] = model_cfg
|
||||
save_config(cfg)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to persist model switch: %s", e)
|
||||
|
||||
# Evict the cached agent so the next message creates a fresh one
|
||||
# with the new model/provider configuration.
|
||||
source = event.source
|
||||
session_key = self._session_key_for_source(source)
|
||||
self._evict_cached_agent(session_key)
|
||||
|
||||
# Format response
|
||||
new_label = _PROVIDER_LABELS.get(result.target_provider, result.target_provider)
|
||||
if result.resolved_via_alias:
|
||||
lines = [f"✅ **{result.resolved_via_alias}** → `{result.new_model}` via {new_label}"]
|
||||
else:
|
||||
lines = [f"✅ Switched to `{result.new_model}` via {new_label}"]
|
||||
if result.provider_changed:
|
||||
old_label = _PROVIDER_LABELS.get(current_provider, current_provider)
|
||||
lines.append(f"Provider: {old_label} → {new_label}")
|
||||
if result.warning_message:
|
||||
lines.append(f"⚠️ {result.warning_message}")
|
||||
lines.append("Prompt cache reset (new model).")
|
||||
return "\n".join(lines)
|
||||
|
||||
async def _handle_provider_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /provider command - show available providers."""
|
||||
import yaml
|
||||
|
||||
@@ -82,6 +82,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
# Configuration
|
||||
CommandDef("config", "Show current configuration", "Configuration",
|
||||
cli_only=True),
|
||||
CommandDef("model", "Switch model mid-session (e.g. /model claude-sonnet-4 or /model openai:gpt-5)",
|
||||
"Configuration", args_hint="[provider:model]"),
|
||||
CommandDef("provider", "Show available providers and current provider",
|
||||
"Configuration"),
|
||||
CommandDef("prompt", "View/set custom system prompt", "Configuration",
|
||||
|
||||
@@ -1,25 +1,90 @@
|
||||
"""Shared model-switching logic for CLI and gateway /model commands.
|
||||
"""Mid-chat model switching pipeline for CLI and gateway.
|
||||
|
||||
Both the CLI (cli.py) and gateway (gateway/run.py) /model handlers
|
||||
share the same core pipeline:
|
||||
|
||||
parse_model_input → is_custom detection → auto-detect provider
|
||||
→ credential resolution → validate model → return result
|
||||
|
||||
This module extracts that shared pipeline into pure functions that
|
||||
return result objects. The callers handle all platform-specific
|
||||
concerns: state mutation, config persistence, output formatting.
|
||||
Core design: aliases resolve to an abstract model identity, then the
|
||||
pipeline formats it for whatever provider you're currently on. Typing
|
||||
'/model sonnet' on OpenRouter gives you 'anthropic/claude-sonnet-4.6'.
|
||||
Typing it on native Anthropic gives you 'claude-sonnet-4-6'. Same
|
||||
intent, correct name for each provider.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from difflib import get_close_matches
|
||||
from typing import Optional
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Model aliases — abstract identities, not provider-specific names
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ModelIdentity:
|
||||
"""Abstract model identity resolved dynamically from catalogs.
|
||||
|
||||
``vendor`` + ``family`` define WHAT model you want. The actual
|
||||
version is resolved at runtime from the provider's catalog — so
|
||||
"sonnet" always means the latest sonnet, not a hardcoded version.
|
||||
"""
|
||||
vendor: str # openai, anthropic, google, etc.
|
||||
family: str # prefix to match: "claude-sonnet", "gpt-5", etc.
|
||||
|
||||
|
||||
# Maps short alias → model family. NO version numbers here — the
|
||||
# catalog is searched at runtime for the first match, which is the
|
||||
# latest/recommended version.
|
||||
MODEL_ALIASES: dict[str, ModelIdentity] = {
|
||||
# Anthropic Claude
|
||||
"opus": ModelIdentity("anthropic", "claude-opus"),
|
||||
"sonnet": ModelIdentity("anthropic", "claude-sonnet"),
|
||||
"haiku": ModelIdentity("anthropic", "claude-haiku"),
|
||||
"claude": ModelIdentity("anthropic", "claude-opus"),
|
||||
|
||||
# OpenAI GPT
|
||||
"gpt5": ModelIdentity("openai", "gpt-5"),
|
||||
"gpt-5": ModelIdentity("openai", "gpt-5"),
|
||||
"gpt5-mini": ModelIdentity("openai", "gpt-5-mini"), # family suffix narrows it
|
||||
"gpt5-pro": ModelIdentity("openai", "gpt-5-pro"),
|
||||
"gpt5-nano": ModelIdentity("openai", "gpt-5-nano"),
|
||||
"codex": ModelIdentity("openai", "codex"),
|
||||
|
||||
# Google Gemini
|
||||
"gemini": ModelIdentity("google", "gemini"),
|
||||
"gemini-pro": ModelIdentity("google", "gemini-pro"),
|
||||
"gemini-flash": ModelIdentity("google", "gemini-flash"),
|
||||
|
||||
# Others — family is broad enough to pick the latest
|
||||
"deepseek": ModelIdentity("deepseek", "deepseek-chat"),
|
||||
"qwen": ModelIdentity("qwen", "qwen"),
|
||||
"grok": ModelIdentity("x-ai", "grok"),
|
||||
"glm": ModelIdentity("z-ai", "glm"),
|
||||
"kimi": ModelIdentity("moonshotai", "kimi"),
|
||||
"minimax": ModelIdentity("minimax", "minimax-m2"),
|
||||
"mimo": ModelIdentity("xiaomi", "mimo"),
|
||||
"nemotron": ModelIdentity("nvidia", "nemotron"),
|
||||
}
|
||||
|
||||
# Providers that use vendor/model slug format
|
||||
_AGGREGATOR_PROVIDERS = {"openrouter", "nous", "ai-gateway", "kilocode"}
|
||||
|
||||
# Providers that use hyphens instead of dots in model names
|
||||
_HYPHEN_PROVIDERS = {"anthropic", "opencode-zen", "opencode-go"}
|
||||
|
||||
# Common vendor prefixes on OpenRouter
|
||||
_OPENROUTER_VENDORS = {
|
||||
"openai", "anthropic", "google", "deepseek", "meta", "mistral",
|
||||
"qwen", "minimax", "x-ai", "z-ai", "moonshotai", "nvidia",
|
||||
"xiaomi", "stepfun", "arcee-ai", "cohere", "databricks",
|
||||
}
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Result types
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
@dataclass
|
||||
class ModelSwitchResult:
|
||||
"""Result of a model switch attempt."""
|
||||
|
||||
success: bool
|
||||
new_model: str = ""
|
||||
target_provider: str = ""
|
||||
@@ -32,12 +97,12 @@ class ModelSwitchResult:
|
||||
warning_message: str = ""
|
||||
is_custom_target: bool = False
|
||||
provider_label: str = ""
|
||||
resolved_via_alias: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class CustomAutoResult:
|
||||
"""Result of switching to bare 'custom' provider with auto-detect."""
|
||||
|
||||
"""Result of switching to bare 'custom' with auto-detect."""
|
||||
success: bool
|
||||
model: str = ""
|
||||
base_url: str = ""
|
||||
@@ -45,158 +110,378 @@ class CustomAutoResult:
|
||||
error_message: str = ""
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Provider-aware alias resolution
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
def _find_in_catalog(
|
||||
identity: ModelIdentity,
|
||||
provider: str,
|
||||
) -> Optional[str]:
|
||||
"""Find the best matching model in a provider's catalog.
|
||||
|
||||
Searches for the first model whose bare name starts with the
|
||||
identity's family prefix. Catalogs are ordered by recommendation,
|
||||
so the first match is the latest/best version.
|
||||
|
||||
Returns the model name in the provider's native format, or None.
|
||||
"""
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
|
||||
|
||||
family = identity.family.lower()
|
||||
vendor = identity.vendor.lower()
|
||||
|
||||
# Split family into tokens for flexible matching.
|
||||
# "gpt-5-mini" → ["gpt", "5", "mini"] — matches "gpt-5.4-mini"
|
||||
family_tokens = [t for t in family.replace(".", "-").split("-") if t]
|
||||
|
||||
def _tokens_match(name: str) -> bool:
|
||||
"""Check if all family tokens appear in the model name."""
|
||||
nl = name.lower()
|
||||
return all(t in nl for t in family_tokens)
|
||||
|
||||
if provider in _AGGREGATOR_PROVIDERS:
|
||||
prefix = f"{vendor}/{family}"
|
||||
# 1. Prefix match (strongest)
|
||||
for slug, _ in OPENROUTER_MODELS:
|
||||
if slug.lower().startswith(prefix):
|
||||
return slug
|
||||
# 2. Token match — all family tokens present + correct vendor
|
||||
for slug, _ in OPENROUTER_MODELS:
|
||||
if slug.lower().startswith(f"{vendor}/") and _tokens_match(slug):
|
||||
return slug
|
||||
return None
|
||||
|
||||
# Non-aggregator providers
|
||||
catalog = _PROVIDER_MODELS.get(provider, [])
|
||||
# 1. Prefix match
|
||||
for model_name in catalog:
|
||||
bare = model_name.lower()
|
||||
if "/" in bare:
|
||||
bare = bare.split("/", 1)[1]
|
||||
if bare.startswith(family):
|
||||
return model_name
|
||||
# 2. Token match
|
||||
for model_name in catalog:
|
||||
if _tokens_match(model_name):
|
||||
return model_name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def resolve_alias(
|
||||
raw_input: str,
|
||||
current_provider: str = "openrouter",
|
||||
) -> Optional[tuple[str, str, str]]:
|
||||
"""Resolve a short alias to (provider, model_name, alias_used).
|
||||
|
||||
Dynamically searches the current provider's catalog for the latest
|
||||
model matching the alias's family prefix:
|
||||
- 'sonnet' on OpenRouter → first catalog entry starting with
|
||||
'anthropic/claude-sonnet' → ('openrouter', 'anthropic/claude-sonnet-4.6', 'sonnet')
|
||||
- 'sonnet' on Anthropic → first entry starting with 'claude-sonnet'
|
||||
→ ('anthropic', 'claude-sonnet-4-6', 'sonnet')
|
||||
- 'gpt5' on Anthropic → no GPT in Anthropic catalog → None
|
||||
"""
|
||||
key = raw_input.strip().lower()
|
||||
if key not in MODEL_ALIASES:
|
||||
return None
|
||||
|
||||
identity = MODEL_ALIASES[key]
|
||||
match = _find_in_catalog(identity, current_provider)
|
||||
|
||||
if match:
|
||||
return (current_provider, match, key)
|
||||
|
||||
# Not found on current provider — return None so the pipeline
|
||||
# can try fallback providers or cross-provider detection
|
||||
return None
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Fuzzy suggestions
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
def suggest_models(raw_input: str, limit: int = 3) -> list[str]:
|
||||
"""Suggest similar model names when input doesn't match."""
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
|
||||
|
||||
candidates: list[str] = list(MODEL_ALIASES.keys())
|
||||
|
||||
for model_id, _ in OPENROUTER_MODELS:
|
||||
candidates.append(model_id)
|
||||
if "/" in model_id:
|
||||
candidates.append(model_id.split("/", 1)[1])
|
||||
|
||||
for models in _PROVIDER_MODELS.values():
|
||||
for m in models:
|
||||
candidates.append(m)
|
||||
if "/" in m:
|
||||
candidates.append(m.split("/", 1)[1])
|
||||
|
||||
seen: set[str] = set()
|
||||
unique: list[str] = []
|
||||
for c in candidates:
|
||||
cl = c.lower()
|
||||
if cl not in seen:
|
||||
seen.add(cl)
|
||||
unique.append(c)
|
||||
|
||||
query = raw_input.strip().lower()
|
||||
matches = get_close_matches(query, [c.lower() for c in unique], n=limit, cutoff=0.5)
|
||||
lower_to_orig = {c.lower(): c for c in unique}
|
||||
return [lower_to_orig.get(m, m) for m in matches]
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Aggregator-aware model resolution
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
def _resolve_on_aggregator(
|
||||
raw_model: str,
|
||||
current_provider: str,
|
||||
) -> Optional[str]:
|
||||
"""Try to resolve a bare model name within an aggregator.
|
||||
|
||||
Prevents bare names from triggering unwanted provider switches.
|
||||
"""
|
||||
from hermes_cli.models import OPENROUTER_MODELS
|
||||
|
||||
model_lower = raw_model.lower()
|
||||
|
||||
slugs = [m for m, _ in OPENROUTER_MODELS]
|
||||
slug_lower = {m.lower(): m for m in slugs}
|
||||
bare_to_slug: dict[str, str] = {}
|
||||
for s in slugs:
|
||||
if "/" in s:
|
||||
bare = s.split("/", 1)[1].lower()
|
||||
bare_to_slug[bare] = s
|
||||
|
||||
# Exact match on full slug
|
||||
if model_lower in slug_lower:
|
||||
return slug_lower[model_lower]
|
||||
|
||||
# Exact match on bare name
|
||||
if model_lower in bare_to_slug:
|
||||
return bare_to_slug[model_lower]
|
||||
|
||||
# Already has vendor/ prefix — accept on aggregator
|
||||
if "/" in raw_model:
|
||||
vendor = raw_model.split("/", 1)[0].lower()
|
||||
if vendor in _OPENROUTER_VENDORS:
|
||||
return raw_model
|
||||
|
||||
# Try prepending vendor prefixes
|
||||
for vendor in _OPENROUTER_VENDORS:
|
||||
candidate = f"{vendor}/{raw_model}"
|
||||
if candidate.lower() in slug_lower:
|
||||
return slug_lower[candidate.lower()]
|
||||
|
||||
# Fuzzy match on bare names
|
||||
close = get_close_matches(model_lower, list(bare_to_slug.keys()), n=1, cutoff=0.75)
|
||||
if close:
|
||||
return bare_to_slug[close[0]]
|
||||
|
||||
return None
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Core switch pipeline
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
def switch_model(
|
||||
raw_input: str,
|
||||
current_provider: str,
|
||||
current_model: str = "",
|
||||
current_base_url: str = "",
|
||||
current_api_key: str = "",
|
||||
) -> ModelSwitchResult:
|
||||
"""Core model-switching pipeline shared between CLI and gateway.
|
||||
"""Core model-switching pipeline.
|
||||
|
||||
Handles parsing, provider detection, credential resolution, and
|
||||
model validation. Does NOT handle config persistence, state
|
||||
mutation, or output formatting — those are caller responsibilities.
|
||||
|
||||
Args:
|
||||
raw_input: The user's model input (e.g. "claude-sonnet-4",
|
||||
"zai:glm-5", "custom:local:qwen").
|
||||
current_provider: The currently active provider.
|
||||
current_base_url: The currently active base URL (used for
|
||||
is_custom detection).
|
||||
current_api_key: The currently active API key.
|
||||
|
||||
Returns:
|
||||
ModelSwitchResult with all information the caller needs to
|
||||
apply the switch and format output.
|
||||
Key behavior: aliases and bare names resolve on your CURRENT provider.
|
||||
'/model sonnet' on Anthropic gives you claude-sonnet-4-6 on Anthropic.
|
||||
'/model sonnet' on OpenRouter gives you anthropic/claude-sonnet-4.6.
|
||||
Only explicit provider:model syntax switches providers.
|
||||
"""
|
||||
from hermes_cli.models import (
|
||||
parse_model_input,
|
||||
detect_provider_for_model,
|
||||
validate_requested_model,
|
||||
_PROVIDER_LABELS,
|
||||
_PROVIDER_MODELS,
|
||||
_KNOWN_PROVIDER_NAMES,
|
||||
OPENROUTER_MODELS,
|
||||
opencode_model_api_mode,
|
||||
)
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
# Step 1: Parse provider:model syntax
|
||||
target_provider, new_model = parse_model_input(raw_input, current_provider)
|
||||
stripped = raw_input.strip()
|
||||
if not stripped:
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
error_message="No model specified. Usage: /model <name> or /model provider:model",
|
||||
)
|
||||
|
||||
# Step 2: Detect if we're currently on a custom endpoint
|
||||
on_aggregator = current_provider in _AGGREGATOR_PROVIDERS
|
||||
|
||||
# ── Step 1: Alias resolution (provider-aware) ──
|
||||
alias_result = resolve_alias(stripped, current_provider)
|
||||
resolved_alias = ""
|
||||
if alias_result:
|
||||
target_provider, new_model, resolved_alias = alias_result
|
||||
else:
|
||||
# Check if this was an alias that's unavailable on the current provider
|
||||
key = stripped.strip().lower()
|
||||
if key in MODEL_ALIASES:
|
||||
identity = MODEL_ALIASES[key]
|
||||
# Model isn't available on current provider — find one that has it
|
||||
# Try aggregators first (most likely to have everything)
|
||||
for fallback in ["openrouter", "nous"]:
|
||||
if fallback != current_provider:
|
||||
fallback_match = _find_in_catalog(identity, fallback)
|
||||
if not fallback_match:
|
||||
continue
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=fallback)
|
||||
if runtime.get("api_key"):
|
||||
fallback_label = _PROVIDER_LABELS.get(fallback, fallback)
|
||||
current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
|
||||
return ModelSwitchResult(
|
||||
success=True,
|
||||
new_model=fallback_match,
|
||||
target_provider=fallback,
|
||||
provider_changed=True,
|
||||
api_key=runtime.get("api_key", ""),
|
||||
base_url=runtime.get("base_url", ""),
|
||||
api_mode=runtime.get("api_mode", ""),
|
||||
persist=True,
|
||||
warning_message=(
|
||||
f"{identity.family} isn't available on "
|
||||
f"{current_label} — switching to {fallback_label}."
|
||||
),
|
||||
provider_label=fallback_label,
|
||||
resolved_via_alias=key,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
error_message=(
|
||||
f"{identity.family} isn't available on {current_provider} "
|
||||
f"and no fallback provider is configured."
|
||||
),
|
||||
)
|
||||
|
||||
# ── Step 2: Vendor:model on aggregators ──
|
||||
if on_aggregator and ":" in stripped:
|
||||
left, right = stripped.split(":", 1)
|
||||
left_lower = left.strip().lower()
|
||||
if left_lower in _OPENROUTER_VENDORS and left_lower not in _KNOWN_PROVIDER_NAMES:
|
||||
target_provider = current_provider
|
||||
new_model = f"{left.strip()}/{right.strip()}"
|
||||
else:
|
||||
target_provider, new_model = parse_model_input(stripped, current_provider)
|
||||
else:
|
||||
# ── Step 3: Standard parse ──
|
||||
target_provider, new_model = parse_model_input(stripped, current_provider)
|
||||
|
||||
if not new_model:
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
error_message="No model name provided. Usage: /model <name> or /model provider:model",
|
||||
)
|
||||
|
||||
# ── Step 4: Aggregator-aware resolution ──
|
||||
_base = current_base_url or ""
|
||||
is_custom = current_provider == "custom" or (
|
||||
"localhost" in _base or "127.0.0.1" in _base
|
||||
)
|
||||
|
||||
# Step 3: Auto-detect provider when no explicit provider:model syntax
|
||||
# was used. Skip for custom providers — the model name might
|
||||
# coincidentally match a known provider's catalog.
|
||||
if target_provider == current_provider and not is_custom:
|
||||
if not alias_result and target_provider == current_provider and on_aggregator:
|
||||
aggregator_slug = _resolve_on_aggregator(new_model, current_provider)
|
||||
if aggregator_slug:
|
||||
new_model = aggregator_slug
|
||||
else:
|
||||
detected = detect_provider_for_model(new_model, current_provider)
|
||||
if detected:
|
||||
target_provider, new_model = detected
|
||||
elif not alias_result and target_provider == current_provider and not is_custom:
|
||||
detected = detect_provider_for_model(new_model, current_provider)
|
||||
if detected:
|
||||
target_provider, new_model = detected
|
||||
|
||||
provider_changed = target_provider != current_provider
|
||||
|
||||
# Step 4: Resolve credentials for target provider
|
||||
# ── Step 5: Resolve credentials ──
|
||||
api_key = current_api_key
|
||||
base_url = current_base_url
|
||||
api_mode = ""
|
||||
if provider_changed:
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=target_provider)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception as e:
|
||||
provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
|
||||
if target_provider == "custom":
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
target_provider=target_provider,
|
||||
error_message=(
|
||||
"No custom endpoint configured. Set model.base_url "
|
||||
"in config.yaml, or set OPENAI_BASE_URL in .env, "
|
||||
"or run: hermes setup → Custom OpenAI-compatible endpoint"
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=target_provider)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception as e:
|
||||
provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
|
||||
if target_provider == "custom":
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
target_provider=target_provider,
|
||||
success=False, target_provider=target_provider,
|
||||
error_message=(
|
||||
f"Could not resolve credentials for provider "
|
||||
f"'{provider_label}': {e}"
|
||||
"No custom endpoint configured.\n"
|
||||
"Set model.base_url in config.yaml or OPENAI_BASE_URL in .env."
|
||||
),
|
||||
)
|
||||
else:
|
||||
# Gateway also resolves for unchanged provider to get accurate
|
||||
# base_url for validation probing.
|
||||
try:
|
||||
runtime = resolve_runtime_provider(requested=current_provider)
|
||||
api_key = runtime.get("api_key", "")
|
||||
base_url = runtime.get("base_url", "")
|
||||
api_mode = runtime.get("api_mode", "")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Step 5: Validate the model
|
||||
try:
|
||||
validation = validate_requested_model(
|
||||
new_model,
|
||||
target_provider,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
)
|
||||
except Exception:
|
||||
validation = {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": None,
|
||||
}
|
||||
|
||||
if not validation.get("accepted"):
|
||||
msg = validation.get("message", "Invalid model")
|
||||
return ModelSwitchResult(
|
||||
success=False,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
error_message=msg,
|
||||
success=False, target_provider=target_provider,
|
||||
error_message=(
|
||||
f"No credentials for {provider_label}.\n"
|
||||
f"Run `hermes setup` to configure it.\nDetail: {e}"
|
||||
),
|
||||
)
|
||||
|
||||
# Step 6: Build result
|
||||
# ── Step 6: Catalog validation ──
|
||||
known_models: list[str] = []
|
||||
if target_provider in _AGGREGATOR_PROVIDERS:
|
||||
known_models = [m for m, _ in OPENROUTER_MODELS]
|
||||
elif target_provider in _PROVIDER_MODELS:
|
||||
known_models = list(_PROVIDER_MODELS[target_provider])
|
||||
|
||||
model_lower = new_model.lower()
|
||||
found = any(m.lower() == model_lower for m in known_models)
|
||||
|
||||
warning_message = ""
|
||||
if not found and known_models:
|
||||
close = get_close_matches(model_lower, [m.lower() for m in known_models], n=3, cutoff=0.5)
|
||||
if close:
|
||||
lower_to_orig = {m.lower(): m for m in known_models}
|
||||
suggestions = [lower_to_orig.get(c, c) for c in close]
|
||||
warning_message = f"Not in catalog — did you mean: {', '.join(f'`{s}`' for s in suggestions)}?"
|
||||
else:
|
||||
warning_message = f"`{new_model}` not in catalog — sending as-is."
|
||||
elif not found and not known_models:
|
||||
warning_message = f"No catalog for {target_provider} — accepting as-is."
|
||||
|
||||
# ── Step 7: Build result ──
|
||||
provider_label = _PROVIDER_LABELS.get(target_provider, target_provider)
|
||||
is_custom_target = target_provider == "custom" or (
|
||||
base_url
|
||||
and "openrouter.ai" not in (base_url or "")
|
||||
base_url and "openrouter.ai" not in (base_url or "")
|
||||
and ("localhost" in (base_url or "") or "127.0.0.1" in (base_url or ""))
|
||||
)
|
||||
|
||||
if target_provider in {"opencode-zen", "opencode-go"}:
|
||||
# Recompute against the requested new model, not the currently-configured
|
||||
# model used during runtime resolution. OpenCode mixes API surfaces by
|
||||
# model family, so a same-provider model switch can change api_mode.
|
||||
api_mode = opencode_model_api_mode(target_provider, new_model)
|
||||
|
||||
return ModelSwitchResult(
|
||||
success=True,
|
||||
new_model=new_model,
|
||||
target_provider=target_provider,
|
||||
provider_changed=provider_changed,
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
api_mode=api_mode,
|
||||
persist=bool(validation.get("persist")),
|
||||
warning_message=validation.get("message") or "",
|
||||
is_custom_target=is_custom_target,
|
||||
provider_label=provider_label,
|
||||
success=True, new_model=new_model, target_provider=target_provider,
|
||||
provider_changed=provider_changed, api_key=api_key, base_url=base_url,
|
||||
api_mode=api_mode, persist=True, warning_message=warning_message,
|
||||
is_custom_target=is_custom_target, provider_label=provider_label,
|
||||
resolved_via_alias=resolved_alias,
|
||||
)
|
||||
|
||||
|
||||
def switch_to_custom_provider() -> CustomAutoResult:
|
||||
"""Handle bare '/model custom' — resolve endpoint and auto-detect model.
|
||||
|
||||
Returns a result object; the caller handles persistence and output.
|
||||
"""
|
||||
"""Handle bare '/model custom' — resolve endpoint and auto-detect model."""
|
||||
from hermes_cli.runtime_provider import (
|
||||
resolve_runtime_provider,
|
||||
_auto_detect_local_model,
|
||||
@@ -207,7 +492,7 @@ def switch_to_custom_provider() -> CustomAutoResult:
|
||||
except Exception as e:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
error_message=f"Could not resolve custom endpoint: {e}",
|
||||
error_message=f"No custom endpoint configured.\nSet model.base_url in config.yaml or OPENAI_BASE_URL in .env.\nDetail: {e}",
|
||||
)
|
||||
|
||||
cust_base = runtime.get("base_url", "")
|
||||
@@ -216,29 +501,14 @@ def switch_to_custom_provider() -> CustomAutoResult:
|
||||
if not cust_base or "openrouter.ai" in cust_base:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
error_message=(
|
||||
"No custom endpoint configured. "
|
||||
"Set model.base_url in config.yaml, or set OPENAI_BASE_URL "
|
||||
"in .env, or run: hermes setup → Custom OpenAI-compatible endpoint"
|
||||
),
|
||||
error_message="No custom endpoint configured.\nSet model.base_url in config.yaml or OPENAI_BASE_URL in .env.",
|
||||
)
|
||||
|
||||
detected_model = _auto_detect_local_model(cust_base)
|
||||
if not detected_model:
|
||||
return CustomAutoResult(
|
||||
success=False,
|
||||
base_url=cust_base,
|
||||
api_key=cust_key,
|
||||
error_message=(
|
||||
f"Custom endpoint at {cust_base} is reachable but no single "
|
||||
f"model was auto-detected. Specify the model explicitly: "
|
||||
f"/model custom:<model-name>"
|
||||
),
|
||||
success=False, base_url=cust_base, api_key=cust_key,
|
||||
error_message=f"Custom endpoint at {cust_base} responded but no model detected.\nSpecify explicitly: /model custom:<model-name>",
|
||||
)
|
||||
|
||||
return CustomAutoResult(
|
||||
success=True,
|
||||
model=detected_model,
|
||||
base_url=cust_base,
|
||||
api_key=cust_key,
|
||||
)
|
||||
return CustomAutoResult(success=True, model=detected_model, base_url=cust_base, api_key=cust_key)
|
||||
|
||||
148
run_agent.py
148
run_agent.py
@@ -1267,7 +1267,153 @@ class AIAgent:
|
||||
self.context_compressor._context_probe_persistable = False
|
||||
# Iterative summary from previous session must not bleed into new one (#2635)
|
||||
self.context_compressor._previous_summary = None
|
||||
|
||||
|
||||
# ── Mid-chat model switching ──────────────────────────────────────────
|
||||
|
||||
def switch_model(
|
||||
self,
|
||||
new_model: str,
|
||||
new_provider: str,
|
||||
api_key: str = "",
|
||||
base_url: str = "",
|
||||
api_mode: str = "",
|
||||
) -> None:
|
||||
"""Switch the agent to a different model/provider mid-conversation.
|
||||
|
||||
Follows the same pattern as ``_try_activate_fallback()`` for the
|
||||
client/state swap, but differs in two critical ways:
|
||||
|
||||
1. Updates ``_primary_runtime`` — this is a permanent switch, not
|
||||
a temporary fallback that gets restored next turn.
|
||||
2. Invalidates ``_cached_system_prompt`` — the system prompt
|
||||
contains model-dependent content (tool enforcement guidance,
|
||||
Google model guidance, Alibaba self-identification) that must
|
||||
be rebuilt for the new model.
|
||||
|
||||
The caller (CLI or gateway handler) is responsible for:
|
||||
- Parsing user input via ``model_switch.switch_model()``
|
||||
- Credential resolution (api_key, base_url)
|
||||
- Persisting the change to config.yaml
|
||||
- Formatting output messages
|
||||
|
||||
Args:
|
||||
new_model: The new model slug (e.g. ``"claude-sonnet-4"``).
|
||||
new_provider: The provider ID (e.g. ``"openrouter"``).
|
||||
api_key: API key for the target provider.
|
||||
base_url: Base URL for the target provider.
|
||||
api_mode: Explicit api_mode override. If empty, auto-detected
|
||||
from provider/base_url.
|
||||
"""
|
||||
old_model = self.model
|
||||
|
||||
# ── Determine api_mode ──
|
||||
if not api_mode:
|
||||
api_mode = "chat_completions"
|
||||
if new_provider == "openai-codex":
|
||||
api_mode = "codex_responses"
|
||||
elif new_provider == "anthropic":
|
||||
api_mode = "anthropic_messages"
|
||||
elif base_url:
|
||||
_bu_lower = base_url.rstrip("/").lower()
|
||||
if _bu_lower.endswith("/anthropic"):
|
||||
api_mode = "anthropic_messages"
|
||||
elif self._is_direct_openai_url(base_url):
|
||||
api_mode = "codex_responses"
|
||||
|
||||
self.model = new_model
|
||||
self.provider = new_provider
|
||||
self.base_url = base_url
|
||||
self.api_mode = api_mode
|
||||
|
||||
# ── Build new client ──
|
||||
if api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import (
|
||||
build_anthropic_client,
|
||||
resolve_anthropic_token,
|
||||
_is_oauth_token,
|
||||
)
|
||||
effective_key = api_key or (
|
||||
resolve_anthropic_token() if new_provider == "anthropic" else ""
|
||||
)
|
||||
self.api_key = effective_key
|
||||
self._anthropic_api_key = effective_key
|
||||
self._anthropic_base_url = base_url or None
|
||||
self._anthropic_client = build_anthropic_client(
|
||||
effective_key, self._anthropic_base_url,
|
||||
)
|
||||
self._is_anthropic_oauth = _is_oauth_token(effective_key)
|
||||
self.client = None
|
||||
self._client_kwargs = {}
|
||||
else:
|
||||
self.api_key = api_key
|
||||
new_kwargs = {"api_key": api_key, "base_url": base_url}
|
||||
self._client_kwargs = new_kwargs
|
||||
self.client = self._create_openai_client(
|
||||
dict(new_kwargs), reason="model_switch", shared=True,
|
||||
)
|
||||
# Clear anthropic state if we were previously on anthropic
|
||||
self._anthropic_client = None
|
||||
|
||||
# ── Re-evaluate prompt caching for the new model/provider ──
|
||||
is_native_anthropic = api_mode == "anthropic_messages"
|
||||
self._use_prompt_caching = (
|
||||
("openrouter" in (base_url or "").lower() and "claude" in new_model.lower())
|
||||
or is_native_anthropic
|
||||
)
|
||||
|
||||
# ── Update context compressor for new model's context window ──
|
||||
if hasattr(self, "context_compressor") and self.context_compressor:
|
||||
from agent.model_metadata import get_model_context_length
|
||||
new_context_length = get_model_context_length(
|
||||
new_model,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
provider=new_provider,
|
||||
)
|
||||
self.context_compressor.model = new_model
|
||||
self.context_compressor.base_url = base_url
|
||||
self.context_compressor.api_key = api_key
|
||||
self.context_compressor.provider = new_provider
|
||||
self.context_compressor.context_length = new_context_length
|
||||
self.context_compressor.threshold_tokens = int(
|
||||
new_context_length * self.context_compressor.threshold_percent
|
||||
)
|
||||
|
||||
# ── Invalidate system prompt — it contains model-dependent content ──
|
||||
self._invalidate_system_prompt()
|
||||
|
||||
# ── Update _primary_runtime snapshot (permanent switch) ──
|
||||
_cc = self.context_compressor
|
||||
self._primary_runtime = {
|
||||
"model": self.model,
|
||||
"provider": self.provider,
|
||||
"base_url": self.base_url,
|
||||
"api_mode": self.api_mode,
|
||||
"api_key": getattr(self, "api_key", ""),
|
||||
"client_kwargs": dict(self._client_kwargs),
|
||||
"use_prompt_caching": self._use_prompt_caching,
|
||||
"compressor_model": _cc.model,
|
||||
"compressor_base_url": _cc.base_url,
|
||||
"compressor_api_key": getattr(_cc, "api_key", ""),
|
||||
"compressor_provider": _cc.provider,
|
||||
"compressor_context_length": _cc.context_length,
|
||||
"compressor_threshold_tokens": _cc.threshold_tokens,
|
||||
}
|
||||
if self.api_mode == "anthropic_messages":
|
||||
self._primary_runtime.update({
|
||||
"anthropic_api_key": self._anthropic_api_key,
|
||||
"anthropic_base_url": self._anthropic_base_url,
|
||||
"is_anthropic_oauth": self._is_anthropic_oauth,
|
||||
})
|
||||
|
||||
# ── Reset fallback state — new primary means fresh fallback chain ──
|
||||
self._fallback_activated = False
|
||||
self._fallback_index = 0
|
||||
|
||||
logging.info(
|
||||
"Model switched: %s → %s (%s)", old_model, new_model, new_provider,
|
||||
)
|
||||
|
||||
def _safe_print(self, *args, **kwargs):
|
||||
"""Print that silently handles broken pipes / closed stdout.
|
||||
|
||||
|
||||
627
tests/test_model_switch.py
Normal file
627
tests/test_model_switch.py
Normal file
@@ -0,0 +1,627 @@
|
||||
"""Tests for mid-chat /model switching.
|
||||
|
||||
Covers the full model-switching stack:
|
||||
- Model aliases (sonnet, opus, gpt5, etc.)
|
||||
- Fuzzy matching and suggestions
|
||||
- CommandDef registration (commands.py)
|
||||
- Switch pipeline (model_switch.py)
|
||||
- AIAgent.switch_model() method (run_agent.py)
|
||||
- CLI handler (cli.py)
|
||||
- Gateway handler (gateway/run.py)
|
||||
- Edge cases and error paths
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch, AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
# Ensure project root is importable
|
||||
PROJECT_ROOT = Path(__file__).parent.parent
|
||||
if str(PROJECT_ROOT) not in sys.path:
|
||||
sys.path.insert(0, str(PROJECT_ROOT))
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Model aliases
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestModelAliases:
|
||||
"""Verify the alias system resolves short names to full model slugs."""
|
||||
|
||||
def test_sonnet_alias(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("sonnet")
|
||||
assert result is not None
|
||||
provider, model, alias = result
|
||||
assert "claude" in model.lower()
|
||||
assert "sonnet" in model.lower()
|
||||
assert alias == "sonnet"
|
||||
|
||||
def test_opus_alias(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("opus")
|
||||
assert result is not None
|
||||
_, model, _ = result
|
||||
assert "opus" in model.lower()
|
||||
|
||||
def test_haiku_alias(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("haiku")
|
||||
assert result is not None
|
||||
_, model, _ = result
|
||||
assert "haiku" in model.lower()
|
||||
|
||||
def test_gpt5_alias(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("gpt5")
|
||||
assert result is not None
|
||||
_, model, _ = result
|
||||
assert "gpt-5" in model.lower()
|
||||
|
||||
def test_gemini_alias(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("gemini")
|
||||
assert result is not None
|
||||
_, model, _ = result
|
||||
assert "gemini" in model.lower()
|
||||
|
||||
def test_deepseek_alias(self):
|
||||
"""deepseek not in static OpenRouter catalog — falls through to pipeline."""
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
# deepseek-chat isn't in our curated OPENROUTER_MODELS,
|
||||
# so alias returns None and the pipeline handles it via
|
||||
# aggregator resolution or provider detection
|
||||
result = resolve_alias("deepseek", "openrouter")
|
||||
# May or may not resolve depending on catalog state — just don't crash
|
||||
if result:
|
||||
_, model, _ = result
|
||||
assert "deepseek" in model.lower()
|
||||
|
||||
def test_codex_alias(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("codex")
|
||||
assert result is not None
|
||||
_, model, _ = result
|
||||
assert "codex" in model.lower()
|
||||
|
||||
def test_case_insensitive(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
assert resolve_alias("SONNET") is not None
|
||||
assert resolve_alias("Opus") is not None
|
||||
assert resolve_alias("GPT5") is not None
|
||||
|
||||
def test_unknown_returns_none(self):
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
assert resolve_alias("nonexistent-model-xyz") is None
|
||||
assert resolve_alias("") is None
|
||||
|
||||
def test_all_aliases_have_valid_identities(self):
|
||||
"""Every alias must have a vendor and family."""
|
||||
from hermes_cli.model_switch import MODEL_ALIASES
|
||||
for alias, identity in MODEL_ALIASES.items():
|
||||
assert identity.vendor, f"Alias '{alias}' has empty vendor"
|
||||
assert identity.family, f"Alias '{alias}' has empty family"
|
||||
|
||||
def test_alias_provider_aware_openrouter(self):
|
||||
"""On OpenRouter, sonnet resolves with vendor/ prefix."""
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("sonnet", "openrouter")
|
||||
assert result is not None
|
||||
provider, model, _ = result
|
||||
assert provider == "openrouter"
|
||||
assert model.startswith("anthropic/")
|
||||
|
||||
def test_alias_provider_aware_anthropic(self):
|
||||
"""On native Anthropic, sonnet resolves with hyphens."""
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("sonnet", "anthropic")
|
||||
assert result is not None
|
||||
provider, model, _ = result
|
||||
assert provider == "anthropic"
|
||||
assert "." not in model # hyphens, not dots
|
||||
assert "claude-sonnet" in model
|
||||
|
||||
def test_alias_unavailable_on_provider(self):
|
||||
"""GPT5 on native Anthropic returns None (not available)."""
|
||||
from hermes_cli.model_switch import resolve_alias
|
||||
result = resolve_alias("gpt5", "anthropic")
|
||||
assert result is None
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Fuzzy matching and suggestions
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestFuzzyMatching:
|
||||
"""Verify fuzzy matching suggests alternatives for typos."""
|
||||
|
||||
def test_close_typo_gets_suggestion(self):
|
||||
from hermes_cli.model_switch import suggest_models
|
||||
suggestions = suggest_models("sonet") # missing 'n'
|
||||
assert len(suggestions) > 0
|
||||
# Should suggest "sonnet" or something close
|
||||
assert any("sonnet" in s.lower() for s in suggestions)
|
||||
|
||||
def test_partial_name_gets_suggestion(self):
|
||||
from hermes_cli.model_switch import suggest_models
|
||||
suggestions = suggest_models("claude-sonn")
|
||||
assert len(suggestions) > 0
|
||||
|
||||
def test_completely_wrong_gets_empty(self):
|
||||
from hermes_cli.model_switch import suggest_models
|
||||
suggestions = suggest_models("zzzzzzzzzzz")
|
||||
# May or may not return suggestions — just shouldn't crash
|
||||
assert isinstance(suggestions, list)
|
||||
|
||||
def test_suggestion_limit(self):
|
||||
from hermes_cli.model_switch import suggest_models
|
||||
suggestions = suggest_models("gpt", limit=2)
|
||||
assert len(suggestions) <= 2
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# CommandDef registration
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestCommandRegistration:
|
||||
"""Verify /model is registered correctly in the command system."""
|
||||
|
||||
def test_model_command_exists(self):
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
names = [c.name for c in COMMAND_REGISTRY]
|
||||
assert "model" in names
|
||||
|
||||
def test_model_command_properties(self):
|
||||
from hermes_cli.commands import COMMAND_REGISTRY
|
||||
cmd = next(c for c in COMMAND_REGISTRY if c.name == "model")
|
||||
assert cmd.category == "Configuration"
|
||||
assert not cmd.cli_only
|
||||
assert not cmd.gateway_only
|
||||
assert cmd.args_hint
|
||||
|
||||
def test_model_command_resolves(self):
|
||||
from hermes_cli.commands import resolve_command
|
||||
result = resolve_command("model")
|
||||
assert result is not None
|
||||
assert result.name == "model"
|
||||
|
||||
def test_model_in_gateway_known_commands(self):
|
||||
from hermes_cli.commands import GATEWAY_KNOWN_COMMANDS
|
||||
assert "model" in GATEWAY_KNOWN_COMMANDS
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Switch pipeline
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestSwitchPipeline:
|
||||
"""Test the rebuilt model switch pipeline."""
|
||||
|
||||
def test_empty_input_error(self):
|
||||
from hermes_cli.model_switch import switch_model
|
||||
result = switch_model("", current_provider="openrouter")
|
||||
assert not result.success
|
||||
assert "No model" in result.error_message
|
||||
|
||||
def test_alias_resolves_in_pipeline(self):
|
||||
"""Typing 'sonnet' should resolve through the alias table."""
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"api_key": "test-key", "base_url": "https://openrouter.ai/api/v1", "api_mode": "",
|
||||
}):
|
||||
result = switch_model("sonnet", current_provider="openrouter")
|
||||
assert result.success
|
||||
assert "sonnet" in result.new_model.lower()
|
||||
assert result.resolved_via_alias == "sonnet"
|
||||
|
||||
def test_vendor_colon_on_aggregator(self):
|
||||
"""openai:gpt-5.4 on OpenRouter becomes openai/gpt-5.4 (stays on aggregator)."""
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"api_key": "key", "base_url": "https://openrouter.ai/api/v1", "api_mode": "",
|
||||
}):
|
||||
result = switch_model("openai:gpt-5.4", current_provider="openrouter")
|
||||
assert result.success
|
||||
assert result.new_model == "openai/gpt-5.4"
|
||||
assert result.target_provider == "openrouter"
|
||||
assert not result.provider_changed # stays on aggregator
|
||||
|
||||
def test_explicit_hermes_provider_model(self):
|
||||
"""anthropic:claude-opus-4 switches to the anthropic hermes provider."""
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.models.parse_model_input", return_value=("anthropic", "claude-opus-4")), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"api_key": "sk-ant", "base_url": "https://api.anthropic.com", "api_mode": "anthropic_messages",
|
||||
}):
|
||||
result = switch_model("anthropic:claude-opus-4", current_provider="openrouter")
|
||||
assert result.success
|
||||
assert result.target_provider == "anthropic"
|
||||
assert result.provider_changed
|
||||
|
||||
def test_missing_credentials_actionable_error(self):
|
||||
"""Error message should be actionable when creds are missing."""
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.models.parse_model_input", return_value=("anthropic", "claude-opus")), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=Exception("No Anthropic credentials found")):
|
||||
result = switch_model("anthropic:claude-opus", current_provider="openrouter")
|
||||
assert not result.success
|
||||
assert "hermes setup" in result.error_message.lower()
|
||||
|
||||
def test_unrecognized_model_warning(self):
|
||||
"""Unrecognized model gets a warning but still succeeds."""
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"api_key": "key", "base_url": "https://openrouter.ai/api/v1", "api_mode": "",
|
||||
}), \
|
||||
patch("hermes_cli.models.parse_model_input", return_value=("openrouter", "weird-unknown-model")), \
|
||||
patch("hermes_cli.models.detect_provider_for_model", return_value=None):
|
||||
result = switch_model("weird-unknown-model", current_provider="openrouter")
|
||||
assert result.success
|
||||
assert result.warning_message # should have a warning
|
||||
|
||||
def test_custom_provider_error_message(self):
|
||||
"""Custom endpoint error gives specific guidance."""
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.models.parse_model_input", return_value=("custom", "local-model")), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=Exception("no endpoint")):
|
||||
result = switch_model("custom:local-model", current_provider="openrouter")
|
||||
assert not result.success
|
||||
assert "config.yaml" in result.error_message or "hermes setup" in result.error_message
|
||||
|
||||
def test_persist_always_true_on_success(self):
|
||||
"""Successful switches should always persist."""
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"api_key": "key", "base_url": "https://openrouter.ai/api/v1", "api_mode": "",
|
||||
}):
|
||||
result = switch_model("opus", current_provider="openrouter")
|
||||
assert result.success
|
||||
assert result.persist is True
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# AIAgent.switch_model()
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestAgentSwitchModel:
|
||||
"""Test the AIAgent.switch_model() method."""
|
||||
|
||||
def _make_agent(self, model="test-model", provider="openrouter"):
|
||||
"""Create a minimal mock agent with the attributes switch_model needs."""
|
||||
from run_agent import AIAgent
|
||||
with patch.object(AIAgent, "__init__", lambda self: None):
|
||||
agent = AIAgent()
|
||||
agent.model = model
|
||||
agent.provider = provider
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.api_mode = "chat_completions"
|
||||
agent.api_key = "test-key"
|
||||
agent.client = MagicMock()
|
||||
agent._client_kwargs = {"api_key": "test-key", "base_url": "https://openrouter.ai/api/v1"}
|
||||
agent._use_prompt_caching = True
|
||||
agent._cached_system_prompt = "cached prompt"
|
||||
agent._fallback_activated = False
|
||||
agent._fallback_index = 0
|
||||
agent._anthropic_client = None
|
||||
agent._anthropic_api_key = ""
|
||||
agent._anthropic_base_url = None
|
||||
agent._is_anthropic_oauth = False
|
||||
agent._memory_store = None
|
||||
cc = MagicMock()
|
||||
cc.model = model
|
||||
cc.base_url = "https://openrouter.ai/api/v1"
|
||||
cc.api_key = "test-key"
|
||||
cc.provider = provider
|
||||
cc.context_length = 200000
|
||||
cc.threshold_tokens = 160000
|
||||
cc.threshold_percent = 0.8
|
||||
agent.context_compressor = cc
|
||||
agent._primary_runtime = {
|
||||
"model": model, "provider": provider,
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_mode": "chat_completions", "api_key": "test-key",
|
||||
"client_kwargs": dict(agent._client_kwargs),
|
||||
"use_prompt_caching": True,
|
||||
"compressor_model": model, "compressor_base_url": "https://openrouter.ai/api/v1",
|
||||
"compressor_api_key": "test-key", "compressor_provider": provider,
|
||||
"compressor_context_length": 200000, "compressor_threshold_tokens": 160000,
|
||||
}
|
||||
agent._create_openai_client = MagicMock(return_value=MagicMock())
|
||||
agent._is_direct_openai_url = MagicMock(return_value=False)
|
||||
agent._invalidate_system_prompt = MagicMock()
|
||||
return agent
|
||||
|
||||
def test_basic_switch(self):
|
||||
agent = self._make_agent()
|
||||
agent.switch_model(
|
||||
new_model="claude-sonnet-4",
|
||||
new_provider="openrouter",
|
||||
api_key="test-key",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
assert agent.model == "claude-sonnet-4"
|
||||
|
||||
def test_system_prompt_invalidated(self):
|
||||
agent = self._make_agent()
|
||||
agent.switch_model(
|
||||
new_model="new-model", new_provider="openrouter",
|
||||
api_key="key", base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
agent._invalidate_system_prompt.assert_called_once()
|
||||
|
||||
def test_primary_runtime_updated(self):
|
||||
agent = self._make_agent()
|
||||
agent.switch_model(
|
||||
new_model="gpt-5", new_provider="openai",
|
||||
api_key="sk-test", base_url="https://api.openai.com/v1",
|
||||
)
|
||||
assert agent._primary_runtime["model"] == "gpt-5"
|
||||
assert agent._primary_runtime["provider"] == "openai"
|
||||
|
||||
def test_prompt_caching_claude_on_openrouter(self):
|
||||
agent = self._make_agent()
|
||||
agent._use_prompt_caching = False
|
||||
agent.switch_model(
|
||||
new_model="anthropic/claude-sonnet-4",
|
||||
new_provider="openrouter",
|
||||
api_key="key", base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
assert agent._use_prompt_caching is True
|
||||
|
||||
def test_prompt_caching_non_claude(self):
|
||||
agent = self._make_agent()
|
||||
agent._use_prompt_caching = True
|
||||
agent.switch_model(
|
||||
new_model="openai/gpt-5",
|
||||
new_provider="openrouter",
|
||||
api_key="key", base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
assert agent._use_prompt_caching is False
|
||||
|
||||
def test_cross_api_mode_to_anthropic(self):
|
||||
agent = self._make_agent()
|
||||
with patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), \
|
||||
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant"), \
|
||||
patch("agent.anthropic_adapter._is_oauth_token", return_value=False):
|
||||
agent.switch_model(
|
||||
new_model="claude-opus-4", new_provider="anthropic",
|
||||
api_key="sk-ant",
|
||||
)
|
||||
assert agent.api_mode == "anthropic_messages"
|
||||
assert agent.client is None
|
||||
|
||||
def test_switch_from_anthropic_clears_state(self):
|
||||
agent = self._make_agent()
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent._anthropic_client = MagicMock()
|
||||
agent.switch_model(
|
||||
new_model="gpt-5", new_provider="openai",
|
||||
api_key="sk-test", base_url="https://api.openai.com/v1",
|
||||
)
|
||||
assert agent.api_mode == "chat_completions"
|
||||
assert agent._anthropic_client is None
|
||||
|
||||
def test_context_compressor_updated(self):
|
||||
agent = self._make_agent()
|
||||
with patch("agent.model_metadata.get_model_context_length", return_value=128000):
|
||||
agent.switch_model(
|
||||
new_model="gpt-4o", new_provider="openai",
|
||||
api_key="key", base_url="https://api.openai.com/v1",
|
||||
)
|
||||
assert agent.context_compressor.context_length == 128000
|
||||
|
||||
def test_fallback_state_reset(self):
|
||||
agent = self._make_agent()
|
||||
agent._fallback_activated = True
|
||||
agent._fallback_index = 2
|
||||
agent.switch_model(
|
||||
new_model="new", new_provider="openrouter",
|
||||
api_key="key", base_url="https://openrouter.ai/api/v1",
|
||||
)
|
||||
assert agent._fallback_activated is False
|
||||
assert agent._fallback_index == 0
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# CLI handler
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestCLIHandler:
|
||||
"""Test the CLI /model handler."""
|
||||
|
||||
def _make_cli(self, model="test-model", provider="openrouter"):
|
||||
cli = MagicMock()
|
||||
cli.model = model
|
||||
cli.provider = provider
|
||||
cli.base_url = "https://openrouter.ai/api/v1"
|
||||
cli.api_key = "test-key"
|
||||
cli.api_mode = "chat_completions"
|
||||
cli.agent = MagicMock()
|
||||
cli.agent.switch_model = MagicMock()
|
||||
from cli import HermesCLI
|
||||
cli._handle_model_switch = HermesCLI._handle_model_switch.__get__(cli)
|
||||
return cli
|
||||
|
||||
def test_no_args_shows_aliases(self, capsys):
|
||||
cli = self._make_cli()
|
||||
with patch("hermes_cli.models._PROVIDER_LABELS", {"openrouter": "OpenRouter"}):
|
||||
cli._handle_model_switch("/model")
|
||||
captured = capsys.readouterr()
|
||||
assert "sonnet" in captured.out
|
||||
assert "opus" in captured.out
|
||||
assert "gpt5" in captured.out
|
||||
|
||||
def test_alias_switch(self, capsys):
|
||||
cli = self._make_cli()
|
||||
mock_result = MagicMock()
|
||||
mock_result.success = True
|
||||
mock_result.new_model = "anthropic/claude-sonnet-4.6"
|
||||
mock_result.target_provider = "openrouter"
|
||||
mock_result.provider_changed = False
|
||||
mock_result.api_key = "key"
|
||||
mock_result.base_url = "https://openrouter.ai/api/v1"
|
||||
mock_result.api_mode = ""
|
||||
mock_result.persist = True
|
||||
mock_result.warning_message = ""
|
||||
mock_result.resolved_via_alias = "sonnet"
|
||||
|
||||
with patch("hermes_cli.model_switch.switch_model", return_value=mock_result), \
|
||||
patch("hermes_cli.models._PROVIDER_LABELS", {"openrouter": "OpenRouter"}), \
|
||||
patch("cli.save_config_value"):
|
||||
cli._handle_model_switch("/model sonnet")
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "sonnet" in captured.out
|
||||
assert "claude-sonnet" in captured.out
|
||||
assert cli.model == "anthropic/claude-sonnet-4.6"
|
||||
|
||||
def test_failed_switch_shows_suggestions(self, capsys):
|
||||
cli = self._make_cli()
|
||||
mock_result = MagicMock()
|
||||
mock_result.success = False
|
||||
mock_result.error_message = "No credentials"
|
||||
|
||||
with patch("hermes_cli.model_switch.switch_model", return_value=mock_result), \
|
||||
patch("hermes_cli.model_switch.suggest_models", return_value=["sonnet", "opus"]):
|
||||
cli._handle_model_switch("/model sonet")
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert "Did you mean" in captured.out
|
||||
|
||||
def test_same_model_noop(self, capsys):
|
||||
cli = self._make_cli(model="anthropic/claude-sonnet-4.6")
|
||||
cli._handle_model_switch("/model anthropic/claude-sonnet-4.6")
|
||||
captured = capsys.readouterr()
|
||||
assert "Already using" in captured.out
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Gateway handler
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestGatewayHandler:
|
||||
"""Test the gateway /model handler."""
|
||||
|
||||
def _make_gateway_config(self, tmp_path, model="test-model", provider="openrouter"):
|
||||
import yaml
|
||||
config_dir = tmp_path / ".hermes"
|
||||
config_dir.mkdir(exist_ok=True)
|
||||
config_path = config_dir / "config.yaml"
|
||||
config = {"model": {"default": model, "provider": provider}}
|
||||
with open(config_path, "w") as f:
|
||||
yaml.dump(config, f)
|
||||
return config_path, config_dir
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_args_shows_aliases(self, tmp_path, monkeypatch):
|
||||
config_path, config_dir = self._make_gateway_config(tmp_path)
|
||||
monkeypatch.setattr("gateway.run._hermes_home", config_dir)
|
||||
|
||||
from gateway.run import GatewayRunner
|
||||
runner = MagicMock(spec=GatewayRunner)
|
||||
runner._handle_model_command = GatewayRunner._handle_model_command.__get__(runner)
|
||||
|
||||
event = MagicMock()
|
||||
event.get_command_args.return_value = ""
|
||||
|
||||
result = await runner._handle_model_command(event)
|
||||
assert "sonnet" in result
|
||||
assert "opus" in result
|
||||
assert "gpt5" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_successful_switch_evicts_agent(self, tmp_path, monkeypatch):
|
||||
config_path, config_dir = self._make_gateway_config(tmp_path)
|
||||
monkeypatch.setattr("gateway.run._hermes_home", config_dir)
|
||||
|
||||
from gateway.run import GatewayRunner
|
||||
runner = MagicMock(spec=GatewayRunner)
|
||||
runner._handle_model_command = GatewayRunner._handle_model_command.__get__(runner)
|
||||
runner._session_key_for_source = MagicMock(return_value="test-key")
|
||||
runner._evict_cached_agent = MagicMock()
|
||||
|
||||
event = MagicMock()
|
||||
event.get_command_args.return_value = "sonnet"
|
||||
event.source = MagicMock()
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.success = True
|
||||
mock_result.new_model = "anthropic/claude-sonnet-4.6"
|
||||
mock_result.target_provider = "openrouter"
|
||||
mock_result.provider_changed = False
|
||||
mock_result.persist = True
|
||||
mock_result.warning_message = ""
|
||||
mock_result.resolved_via_alias = "sonnet"
|
||||
mock_result.is_custom_target = False
|
||||
|
||||
with patch("hermes_cli.model_switch.switch_model", return_value=mock_result), \
|
||||
patch("hermes_cli.config.save_config"):
|
||||
result = await runner._handle_model_command(event)
|
||||
|
||||
assert "sonnet" in result
|
||||
runner._evict_cached_agent.assert_called_once()
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_error_with_suggestions(self, tmp_path, monkeypatch):
|
||||
config_path, config_dir = self._make_gateway_config(tmp_path)
|
||||
monkeypatch.setattr("gateway.run._hermes_home", config_dir)
|
||||
|
||||
from gateway.run import GatewayRunner
|
||||
runner = MagicMock(spec=GatewayRunner)
|
||||
runner._handle_model_command = GatewayRunner._handle_model_command.__get__(runner)
|
||||
|
||||
event = MagicMock()
|
||||
event.get_command_args.return_value = "sonet" # typo
|
||||
event.source = MagicMock()
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.success = False
|
||||
mock_result.error_message = "No credentials"
|
||||
|
||||
with patch("hermes_cli.model_switch.switch_model", return_value=mock_result), \
|
||||
patch("hermes_cli.model_switch.suggest_models", return_value=["sonnet"]):
|
||||
result = await runner._handle_model_command(event)
|
||||
|
||||
assert "Did you mean" in result
|
||||
assert "sonnet" in result
|
||||
|
||||
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
# Edge cases
|
||||
# ═══════════════════════════════════════════════════════════════════════
|
||||
|
||||
class TestEdgeCases:
|
||||
|
||||
def test_custom_auto_result(self):
|
||||
from hermes_cli.model_switch import CustomAutoResult
|
||||
r = CustomAutoResult(success=True, model="llama-3.3", base_url="http://localhost:11434")
|
||||
assert r.success
|
||||
|
||||
def test_result_has_alias_field(self):
|
||||
from hermes_cli.model_switch import ModelSwitchResult
|
||||
r = ModelSwitchResult(success=True, resolved_via_alias="sonnet")
|
||||
assert r.resolved_via_alias == "sonnet"
|
||||
|
||||
def test_switch_to_custom_no_endpoint(self):
|
||||
from hermes_cli.model_switch import switch_to_custom_provider
|
||||
with patch("hermes_cli.runtime_provider.resolve_runtime_provider",
|
||||
side_effect=Exception("no endpoint")):
|
||||
result = switch_to_custom_provider()
|
||||
assert not result.success
|
||||
assert "config.yaml" in result.error_message
|
||||
|
||||
def test_opencode_api_mode_recompute(self):
|
||||
from hermes_cli.model_switch import switch_model
|
||||
with patch("hermes_cli.models.parse_model_input", return_value=("opencode-zen", "claude-opus")), \
|
||||
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={
|
||||
"api_key": "key", "base_url": "https://example.com", "api_mode": "chat_completions",
|
||||
}), \
|
||||
patch("hermes_cli.models.opencode_model_api_mode", return_value="anthropic_messages") as mock_oc:
|
||||
result = switch_model("opencode-zen:claude-opus", current_provider="openrouter")
|
||||
assert result.success
|
||||
assert result.api_mode == "anthropic_messages"
|
||||
90
uv.lock
generated
90
uv.lock
generated
@@ -1017,6 +1017,31 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c6/45/e6dd0c6c740c67c07474f2eb5175bb5656598488db444c4abd2a4e948393/daytona_toolbox_api_client_async-0.155.0-py3-none-any.whl", hash = "sha256:6ecf6351a31686d8e33ff054db69e279c45b574018b6c9a1cae15a7940412951", size = 176355, upload-time = "2026-03-24T14:47:36.327Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "debugpy"
|
||||
version = "1.8.20"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/e0/b7/cd8080344452e4874aae67c40d8940e2b4d47b01601a8fd9f44786c757c7/debugpy-1.8.20.tar.gz", hash = "sha256:55bc8701714969f1ab89a6d5f2f3d40c36f91b2cbe2f65d98bf8196f6a6a2c33", size = 1645207, upload-time = "2026-01-29T23:03:28.199Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/51/56/c3baf5cbe4dd77427fd9aef99fcdade259ad128feeb8a786c246adb838e5/debugpy-1.8.20-cp311-cp311-macosx_15_0_universal2.whl", hash = "sha256:eada6042ad88fa1571b74bd5402ee8b86eded7a8f7b827849761700aff171f1b", size = 2208318, upload-time = "2026-01-29T23:03:36.481Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/9a/7d/4fa79a57a8e69fe0d9763e98d1110320f9ecd7f1f362572e3aafd7417c9d/debugpy-1.8.20-cp311-cp311-manylinux_2_34_x86_64.whl", hash = "sha256:7de0b7dfeedc504421032afba845ae2a7bcc32ddfb07dae2c3ca5442f821c344", size = 3171493, upload-time = "2026-01-29T23:03:37.775Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/7d/f2/1e8f8affe51e12a26f3a8a8a4277d6e60aa89d0a66512f63b1e799d424a4/debugpy-1.8.20-cp311-cp311-win32.whl", hash = "sha256:773e839380cf459caf73cc533ea45ec2737a5cc184cf1b3b796cd4fd98504fec", size = 5209240, upload-time = "2026-01-29T23:03:39.109Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/d5/92/1cb532e88560cbee973396254b21bece8c5d7c2ece958a67afa08c9f10dc/debugpy-1.8.20-cp311-cp311-win_amd64.whl", hash = "sha256:1f7650546e0eded1902d0f6af28f787fa1f1dbdbc97ddabaf1cd963a405930cb", size = 5233481, upload-time = "2026-01-29T23:03:40.659Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/14/57/7f34f4736bfb6e00f2e4c96351b07805d83c9a7b33d28580ae01374430f7/debugpy-1.8.20-cp312-cp312-macosx_15_0_universal2.whl", hash = "sha256:4ae3135e2089905a916909ef31922b2d733d756f66d87345b3e5e52b7a55f13d", size = 2550686, upload-time = "2026-01-29T23:03:42.023Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/ab/78/b193a3975ca34458f6f0e24aaf5c3e3da72f5401f6054c0dfd004b41726f/debugpy-1.8.20-cp312-cp312-manylinux_2_34_x86_64.whl", hash = "sha256:88f47850a4284b88bd2bfee1f26132147d5d504e4e86c22485dfa44b97e19b4b", size = 4310588, upload-time = "2026-01-29T23:03:43.314Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/55/f14deb95eaf4f30f07ef4b90a8590fc05d9e04df85ee379712f6fb6736d7/debugpy-1.8.20-cp312-cp312-win32.whl", hash = "sha256:4057ac68f892064e5f98209ab582abfee3b543fb55d2e87610ddc133a954d390", size = 5331372, upload-time = "2026-01-29T23:03:45.526Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a1/39/2bef246368bd42f9bd7cba99844542b74b84dacbdbea0833e610f384fee8/debugpy-1.8.20-cp312-cp312-win_amd64.whl", hash = "sha256:a1a8f851e7cf171330679ef6997e9c579ef6dd33c9098458bd9986a0f4ca52e3", size = 5372835, upload-time = "2026-01-29T23:03:47.245Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/15/e2/fc500524cc6f104a9d049abc85a0a8b3f0d14c0a39b9c140511c61e5b40b/debugpy-1.8.20-cp313-cp313-macosx_15_0_universal2.whl", hash = "sha256:5dff4bb27027821fdfcc9e8f87309a28988231165147c31730128b1c983e282a", size = 2539560, upload-time = "2026-01-29T23:03:48.738Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/90/83/fb33dcea789ed6018f8da20c5a9bc9d82adc65c0c990faed43f7c955da46/debugpy-1.8.20-cp313-cp313-manylinux_2_34_x86_64.whl", hash = "sha256:84562982dd7cf5ebebfdea667ca20a064e096099997b175fe204e86817f64eaf", size = 4293272, upload-time = "2026-01-29T23:03:50.169Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/25/b1e4a01bfb824d79a6af24b99ef291e24189080c93576dfd9b1a2815cd0f/debugpy-1.8.20-cp313-cp313-win32.whl", hash = "sha256:da11dea6447b2cadbf8ce2bec59ecea87cc18d2c574980f643f2d2dfe4862393", size = 5331208, upload-time = "2026-01-29T23:03:51.547Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/13/f7/a0b368ce54ffff9e9028c098bd2d28cfc5b54f9f6c186929083d4c60ba58/debugpy-1.8.20-cp313-cp313-win_amd64.whl", hash = "sha256:eb506e45943cab2efb7c6eafdd65b842f3ae779f020c82221f55aca9de135ed7", size = 5372930, upload-time = "2026-01-29T23:03:53.585Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/33/2e/f6cb9a8a13f5058f0a20fe09711a7b726232cd5a78c6a7c05b2ec726cff9/debugpy-1.8.20-cp314-cp314-macosx_15_0_universal2.whl", hash = "sha256:9c74df62fc064cd5e5eaca1353a3ef5a5d50da5eb8058fcef63106f7bebe6173", size = 2538066, upload-time = "2026-01-29T23:03:54.999Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/56/6ddca50b53624e1ca3ce1d1e49ff22db46c47ea5fb4c0cc5c9b90a616364/debugpy-1.8.20-cp314-cp314-manylinux_2_34_x86_64.whl", hash = "sha256:077a7447589ee9bc1ff0cdf443566d0ecf540ac8aa7333b775ebcb8ce9f4ecad", size = 4269425, upload-time = "2026-01-29T23:03:56.518Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/c5/d9/d64199c14a0d4c476df46c82470a3ce45c8d183a6796cfb5e66533b3663c/debugpy-1.8.20-cp314-cp314-win32.whl", hash = "sha256:352036a99dd35053b37b7803f748efc456076f929c6a895556932eaf2d23b07f", size = 5331407, upload-time = "2026-01-29T23:03:58.481Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e0/d9/1f07395b54413432624d61524dfd98c1a7c7827d2abfdb8829ac92638205/debugpy-1.8.20-cp314-cp314-win_amd64.whl", hash = "sha256:a98eec61135465b062846112e5ecf2eebb855305acc1dfbae43b72903b8ab5be", size = 5372521, upload-time = "2026-01-29T23:03:59.864Z" },
|
||||
{ url = "https://files.pythonhosted.org/packages/e0/c3/7f67dea8ccf8fdcb9c99033bbe3e90b9e7395415843accb81428c441be2d/debugpy-1.8.20-py2.py3-none-any.whl", hash = "sha256:5be9bed9ae3be00665a06acaa48f8329d2b9632f15fd09f6a9a8c8d9907e54d7", size = 5337658, upload-time = "2026-01-29T23:04:17.404Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deprecated"
|
||||
version = "1.3.1"
|
||||
@@ -1133,6 +1158,24 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/97/a8/c070e1340636acb38d4e6a7e45c46d168a462b48b9b3257e14ca0e5af79b/environs-14.6.0-py3-none-any.whl", hash = "sha256:f8fb3d6c6a55872b0c6db077a28f5a8c7b8984b7c32029613d44cef95cfc0812", size = 17205, upload-time = "2026-02-20T04:02:07.299Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exa-py"
|
||||
version = "2.10.2"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "httpcore" },
|
||||
{ name = "httpx" },
|
||||
{ name = "openai" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "requests" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fe/4f/f06a6f277d668f143e330fe503b0027cc5fed753b22c3e161f8cbbccdf65/exa_py-2.10.2.tar.gz", hash = "sha256:f781f30b199f1102333384728adae64bb15a6bbcabfa97e91fd705f90acffc45", size = 53792, upload-time = "2026-03-26T20:29:35.764Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e2/bc/7a34e904a415040ba626948d0b0a36a08cd073f12b13342578a68331be3c/exa_py-2.10.2-py3-none-any.whl", hash = "sha256:ecb2a7581f4b7a8aeb6b434acce1bbc40f92ed1d4126b2aa6029913acd904a47", size = 72248, upload-time = "2026-03-26T20:29:37.306Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "execnet"
|
||||
version = "2.1.2"
|
||||
@@ -1600,13 +1643,13 @@ wheels = [
|
||||
|
||||
[[package]]
|
||||
name = "hermes-agent"
|
||||
version = "0.5.0"
|
||||
version = "0.7.0"
|
||||
source = { editable = "." }
|
||||
dependencies = [
|
||||
{ name = "anthropic" },
|
||||
{ name = "edge-tts" },
|
||||
{ name = "exa-py" },
|
||||
{ name = "fal-client" },
|
||||
{ name = "faster-whisper" },
|
||||
{ name = "fire" },
|
||||
{ name = "firecrawl-py" },
|
||||
{ name = "httpx" },
|
||||
@@ -1632,10 +1675,13 @@ all = [
|
||||
{ name = "aiohttp" },
|
||||
{ name = "croniter" },
|
||||
{ name = "daytona" },
|
||||
{ name = "debugpy" },
|
||||
{ name = "dingtalk-stream" },
|
||||
{ name = "discord-py", extra = ["voice"] },
|
||||
{ name = "elevenlabs" },
|
||||
{ name = "faster-whisper" },
|
||||
{ name = "honcho-ai" },
|
||||
{ name = "lark-oapi" },
|
||||
{ name = "mcp" },
|
||||
{ name = "modal" },
|
||||
{ name = "numpy" },
|
||||
@@ -1660,6 +1706,7 @@ daytona = [
|
||||
{ name = "daytona" },
|
||||
]
|
||||
dev = [
|
||||
{ name = "debugpy" },
|
||||
{ name = "mcp" },
|
||||
{ name = "pytest" },
|
||||
{ name = "pytest-asyncio" },
|
||||
@@ -1668,6 +1715,9 @@ dev = [
|
||||
dingtalk = [
|
||||
{ name = "dingtalk-stream" },
|
||||
]
|
||||
feishu = [
|
||||
{ name = "lark-oapi" },
|
||||
]
|
||||
homeassistant = [
|
||||
{ name = "aiohttp" },
|
||||
]
|
||||
@@ -1712,6 +1762,7 @@ tts-premium = [
|
||||
{ name = "elevenlabs" },
|
||||
]
|
||||
voice = [
|
||||
{ name = "faster-whisper" },
|
||||
{ name = "numpy" },
|
||||
{ name = "sounddevice" },
|
||||
]
|
||||
@@ -1729,13 +1780,15 @@ requires-dist = [
|
||||
{ name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" },
|
||||
{ name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" },
|
||||
{ name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" },
|
||||
{ name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" },
|
||||
{ name = "dingtalk-stream", marker = "extra == 'dingtalk'", specifier = ">=0.1.0,<1" },
|
||||
{ name = "discord-py", extras = ["voice"], marker = "extra == 'messaging'", specifier = ">=2.7.1,<3" },
|
||||
{ name = "edge-tts", specifier = ">=7.2.7,<8" },
|
||||
{ name = "elevenlabs", marker = "extra == 'tts-premium'", specifier = ">=1.0,<2" },
|
||||
{ name = "exa-py", specifier = ">=2.9.0,<3" },
|
||||
{ name = "fal-client", specifier = ">=0.13.1,<1" },
|
||||
{ name = "fastapi", marker = "extra == 'rl'", specifier = ">=0.104.0,<1" },
|
||||
{ name = "faster-whisper", specifier = ">=1.0.0,<2" },
|
||||
{ name = "faster-whisper", marker = "extra == 'voice'", specifier = ">=1.0.0,<2" },
|
||||
{ name = "fire", specifier = ">=0.7.1,<1" },
|
||||
{ name = "firecrawl-py", specifier = ">=4.16.0,<5" },
|
||||
{ name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" },
|
||||
@@ -1744,6 +1797,7 @@ requires-dist = [
|
||||
{ name = "hermes-agent", extras = ["daytona"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["dev"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["dingtalk"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["feishu"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["homeassistant"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["honcho"], marker = "extra == 'all'" },
|
||||
{ name = "hermes-agent", extras = ["mcp"], marker = "extra == 'all'" },
|
||||
@@ -1757,6 +1811,7 @@ requires-dist = [
|
||||
{ name = "honcho-ai", marker = "extra == 'honcho'", specifier = ">=2.0.1,<3" },
|
||||
{ name = "httpx", specifier = ">=0.28.1,<1" },
|
||||
{ name = "jinja2", specifier = ">=3.1.5,<4" },
|
||||
{ name = "lark-oapi", marker = "extra == 'feishu'", specifier = ">=1.5.3,<2" },
|
||||
{ name = "matrix-nio", extras = ["e2e"], marker = "extra == 'matrix'", specifier = ">=0.24.0,<1" },
|
||||
{ name = "mcp", marker = "extra == 'dev'", specifier = ">=1.2.0,<2" },
|
||||
{ name = "mcp", marker = "extra == 'mcp'", specifier = ">=1.2.0,<2" },
|
||||
@@ -1789,7 +1844,7 @@ requires-dist = [
|
||||
{ name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
|
||||
{ name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" },
|
||||
]
|
||||
provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "dingtalk", "rl", "yc-bench", "all"]
|
||||
provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "dingtalk", "feishu", "rl", "yc-bench", "all"]
|
||||
|
||||
[[package]]
|
||||
name = "hf-transfer"
|
||||
@@ -2267,6 +2322,21 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/0a/dd/8050c947d435c8d4bc94e3252f4d8bb8a76cfb424f043a8680be637a57f1/kiwisolver-1.5.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:59cd8683f575d96df5bb48f6add94afc055012c29e28124fcae2b63661b9efb1", size = 73558, upload-time = "2026-03-09T13:15:52.112Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lark-oapi"
|
||||
version = "1.5.3"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "httpx" },
|
||||
{ name = "pycryptodome" },
|
||||
{ name = "requests" },
|
||||
{ name = "requests-toolbelt" },
|
||||
{ name = "websockets" },
|
||||
]
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/bf/ff/2ece5d735ebfa2af600a53176f2636ae47af2bf934e08effab64f0d1e047/lark_oapi-1.5.3-py3-none-any.whl", hash = "sha256:fda6b32bb38d21b6bdaae94979c600b94c7c521e985adade63a54e4b3e20cc36", size = 6993016, upload-time = "2026-01-27T08:21:49.307Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "latex2sympy2-extended"
|
||||
version = "1.11.0"
|
||||
@@ -4122,6 +4192,18 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "requests-toolbelt"
|
||||
version = "1.0.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "requests" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f3/61/d7545dafb7ac2230c70d38d31cbfe4cc64f7144dc41f6e4e4b78ecd9f5bb/requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6", size = 206888, upload-time = "2023-05-01T04:11:33.229Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/3f/51/d4db610ef29373b879047326cbf6fa98b6c1969d6f6dc423279de2b1be2c/requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06", size = 54481, upload-time = "2023-05-01T04:11:28.427Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rich"
|
||||
version = "14.3.3"
|
||||
|
||||
113
website/docs/user-guide/features/model-switching.md
Normal file
113
website/docs/user-guide/features/model-switching.md
Normal file
@@ -0,0 +1,113 @@
|
||||
---
|
||||
sidebar_position: 3
|
||||
---
|
||||
|
||||
# Switching Models
|
||||
|
||||
Change models mid-conversation without losing your chat history.
|
||||
|
||||
```
|
||||
/model sonnet
|
||||
```
|
||||
|
||||
That's it. Your conversation continues with the new model. Hermes formats the name correctly for whatever provider you're on — you don't need to think about it.
|
||||
|
||||
## Quick Reference
|
||||
|
||||
| You type | You get |
|
||||
|----------|---------|
|
||||
| `/model sonnet` | Claude Sonnet 4.6 |
|
||||
| `/model opus` | Claude Opus 4.6 |
|
||||
| `/model haiku` | Claude Haiku 4.5 |
|
||||
| `/model gpt5` | GPT-5.4 |
|
||||
| `/model gpt5-mini` | GPT-5.4 Mini |
|
||||
| `/model gpt5-pro` | GPT-5.4 Pro |
|
||||
| `/model codex` | GPT-5.3 Codex |
|
||||
| `/model gemini` | Gemini 3 Pro |
|
||||
| `/model gemini-flash` | Gemini 3 Flash |
|
||||
| `/model deepseek` | DeepSeek Chat |
|
||||
| `/model grok` | Grok 4.20 |
|
||||
| `/model qwen` | Qwen 3.6 Plus |
|
||||
| `/model minimax` | MiniMax M2.7 |
|
||||
|
||||
These aliases **stay on your current provider**. If you're on OpenRouter, you stay on OpenRouter. If you're on native Anthropic, you stay on native Anthropic. The model name is formatted correctly for each — `anthropic/claude-sonnet-4.6` on OpenRouter becomes `claude-sonnet-4-6` on native Anthropic automatically.
|
||||
|
||||
If the model isn't available on your current provider (like `/model gpt5` on native Anthropic), Hermes will switch to a provider that has it and tell you.
|
||||
|
||||
Type `/model` with no arguments to see the full alias list and your current model.
|
||||
|
||||
## Full Model Names
|
||||
|
||||
Aliases cover the most popular models. For anything else, use the full name in your provider's format:
|
||||
|
||||
```
|
||||
/model anthropic/claude-sonnet-4.5
|
||||
/model openai/gpt-5.4-nano
|
||||
/model nvidia/nemotron-3-super-120b-a12b
|
||||
```
|
||||
|
||||
On OpenRouter these are the standard model IDs from [openrouter.ai/models](https://openrouter.ai). On other providers, use whatever model name that provider expects.
|
||||
|
||||
If you're not sure of the exact name, type something close. Hermes will suggest corrections:
|
||||
|
||||
```
|
||||
> /model claude-sonet
|
||||
Note: Not in catalog — did you mean: anthropic/claude-sonnet-4.6?
|
||||
```
|
||||
|
||||
## Switching Providers
|
||||
|
||||
Aliases and bare model names keep you on your current provider. To explicitly switch to a different provider, use the provider prefix with a colon:
|
||||
|
||||
```
|
||||
/model anthropic:claude-opus-4
|
||||
/model deepseek:deepseek-chat
|
||||
/model nous:anthropic/claude-opus-4.6
|
||||
```
|
||||
|
||||
The part before the colon is the Hermes provider name (the same names from `hermes setup`). The part after is the model name as that provider knows it.
|
||||
|
||||
To see which providers you have configured: `/provider`
|
||||
|
||||
:::tip
|
||||
On OpenRouter, you can also use `openai:gpt-5.4` — Hermes knows "openai" is a vendor name on OpenRouter (not a separate Hermes provider) and converts it to `openai/gpt-5.4` automatically.
|
||||
:::
|
||||
|
||||
## Custom / Local Endpoints
|
||||
|
||||
If you've set up a local model server (Ollama, vLLM, LM Studio, etc.):
|
||||
|
||||
```
|
||||
/model custom
|
||||
```
|
||||
|
||||
This auto-detects the model running on your custom endpoint. If you have multiple models or want to specify one:
|
||||
|
||||
```
|
||||
/model custom:llama-3.3-70b
|
||||
```
|
||||
|
||||
Custom endpoints are configured in `~/.hermes/config.yaml` under `model.base_url`, or via the `OPENAI_BASE_URL` environment variable.
|
||||
|
||||
## What Happens When You Switch
|
||||
|
||||
- **Conversation history is preserved.** The new model picks up where the old one left off.
|
||||
- **Prompt cache resets.** The new model builds a fresh cache. This is unavoidable — different models have different cache keys.
|
||||
- **System prompt rebuilds.** Some models get tailored guidance (tool use patterns, etc.). The system prompt updates automatically.
|
||||
- **Config is saved.** The new model becomes your default for future sessions too.
|
||||
|
||||
## Where It Works
|
||||
|
||||
`/model` works everywhere Hermes runs:
|
||||
|
||||
- CLI (`hermes chat`)
|
||||
- Telegram
|
||||
- Discord
|
||||
- Slack
|
||||
- Matrix
|
||||
- WhatsApp
|
||||
- Signal
|
||||
- Home Assistant
|
||||
- All other gateway platforms
|
||||
|
||||
On messaging platforms, if the agent is currently processing a message, `/model` will ask you to wait or `/stop` first.
|
||||
Reference in New Issue
Block a user