mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-12 21:28:57 +08:00
Compare commits
7 Commits
fix/anthro
...
fix/custom
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d7bc0e1d03 | ||
|
|
2a3a374c78 | ||
|
|
0cee97c500 | ||
|
|
7b6d14e62a | ||
|
|
67d707e851 | ||
|
|
e648863d52 | ||
|
|
a7cc1cf309 |
@@ -654,10 +654,23 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
|
||||
if not token:
|
||||
return None, None
|
||||
|
||||
# Allow base URL override from config.yaml model.base_url
|
||||
base_url = _ANTHROPIC_DEFAULT_BASE_URL
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
cfg = load_config()
|
||||
model_cfg = cfg.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
if cfg_base_url:
|
||||
base_url = cfg_base_url
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
|
||||
logger.debug("Auxiliary client: Anthropic native (%s)", model)
|
||||
real_client = build_anthropic_client(token, _ANTHROPIC_DEFAULT_BASE_URL)
|
||||
return AnthropicAuxiliaryClient(real_client, model, token, _ANTHROPIC_DEFAULT_BASE_URL), model
|
||||
logger.debug("Auxiliary client: Anthropic native (%s) at %s", model, base_url)
|
||||
real_client = build_anthropic_client(token, base_url)
|
||||
return AnthropicAuxiliaryClient(real_client, model, token, base_url), model
|
||||
|
||||
|
||||
def _resolve_forced_provider(forced: str) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
|
||||
@@ -46,6 +46,7 @@ class ContextCompressor:
|
||||
summary_model_override: str = None,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
):
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
@@ -56,7 +57,10 @@ class ContextCompressor:
|
||||
self.summary_target_tokens = summary_target_tokens
|
||||
self.quiet_mode = quiet_mode
|
||||
|
||||
self.context_length = get_model_context_length(model, base_url=base_url, api_key=api_key)
|
||||
self.context_length = get_model_context_length(
|
||||
model, base_url=base_url, api_key=api_key,
|
||||
config_context_length=config_context_length,
|
||||
)
|
||||
self.threshold_tokens = int(self.context_length * threshold_percent)
|
||||
self.compression_count = 0
|
||||
self._context_probed = False # True after a step-down from context error
|
||||
|
||||
@@ -136,6 +136,8 @@ _CONTEXT_LENGTH_KEYS = (
|
||||
"max_input_tokens",
|
||||
"max_sequence_length",
|
||||
"max_seq_len",
|
||||
"n_ctx_train",
|
||||
"n_ctx",
|
||||
)
|
||||
|
||||
_MAX_COMPLETION_KEYS = (
|
||||
@@ -342,6 +344,25 @@ def fetch_endpoint_model_metadata(
|
||||
entry["pricing"] = pricing
|
||||
_add_model_aliases(cache, model_id, entry)
|
||||
|
||||
# If this is a llama.cpp server, query /props for actual allocated context
|
||||
is_llamacpp = any(
|
||||
m.get("owned_by") == "llamacpp"
|
||||
for m in payload.get("data", []) if isinstance(m, dict)
|
||||
)
|
||||
if is_llamacpp:
|
||||
try:
|
||||
props_url = candidate.rstrip("/").replace("/v1", "") + "/props"
|
||||
props_resp = requests.get(props_url, headers=headers, timeout=5)
|
||||
if props_resp.ok:
|
||||
props = props_resp.json()
|
||||
gen_settings = props.get("default_generation_settings", {})
|
||||
n_ctx = gen_settings.get("n_ctx")
|
||||
model_alias = props.get("model_alias", "")
|
||||
if n_ctx and model_alias and model_alias in cache:
|
||||
cache[model_alias]["context_length"] = n_ctx
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_endpoint_model_metadata_cache[normalized] = cache
|
||||
_endpoint_model_metadata_cache_time[normalized] = time.time()
|
||||
return cache
|
||||
@@ -439,16 +460,26 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def get_model_context_length(model: str, base_url: str = "", api_key: str = "") -> int:
|
||||
def get_model_context_length(
|
||||
model: str,
|
||||
base_url: str = "",
|
||||
api_key: str = "",
|
||||
config_context_length: int | None = None,
|
||||
) -> int:
|
||||
"""Get the context length for a model.
|
||||
|
||||
Resolution order:
|
||||
0. Explicit config override (model.context_length in config.yaml)
|
||||
1. Persistent cache (previously discovered via probing)
|
||||
2. Active endpoint metadata (/models for explicit custom endpoints)
|
||||
3. OpenRouter API metadata
|
||||
4. Hardcoded DEFAULT_CONTEXT_LENGTHS (fuzzy match for hosted routes only)
|
||||
5. First probe tier (2M) — will be narrowed on first context error
|
||||
"""
|
||||
# 0. Explicit config override — user knows best
|
||||
if config_context_length is not None and isinstance(config_context_length, int) and config_context_length > 0:
|
||||
return config_context_length
|
||||
|
||||
# 1. Check persistent cache (model+provider)
|
||||
if base_url:
|
||||
cached = get_cached_context_length(model, base_url)
|
||||
@@ -458,13 +489,30 @@ def get_model_context_length(model: str, base_url: str = "", api_key: str = "")
|
||||
# 2. Active endpoint metadata for explicit custom routes
|
||||
if _is_custom_endpoint(base_url):
|
||||
endpoint_metadata = fetch_endpoint_model_metadata(base_url, api_key=api_key)
|
||||
if model in endpoint_metadata:
|
||||
context_length = endpoint_metadata[model].get("context_length")
|
||||
matched = endpoint_metadata.get(model)
|
||||
if not matched:
|
||||
# Single-model servers: if only one model is loaded, use it
|
||||
if len(endpoint_metadata) == 1:
|
||||
matched = next(iter(endpoint_metadata.values()))
|
||||
else:
|
||||
# Fuzzy match: substring in either direction
|
||||
for key, entry in endpoint_metadata.items():
|
||||
if model in key or key in model:
|
||||
matched = entry
|
||||
break
|
||||
if matched:
|
||||
context_length = matched.get("context_length")
|
||||
if isinstance(context_length, int):
|
||||
return context_length
|
||||
if not _is_known_provider_base_url(base_url):
|
||||
# Explicit third-party endpoints should not borrow fuzzy global
|
||||
# defaults from unrelated providers with similarly named models.
|
||||
logger.info(
|
||||
"Could not detect context length for model %r at %s — "
|
||||
"defaulting to %s tokens (probe-down). Set model.context_length "
|
||||
"in config.yaml to override.",
|
||||
model, base_url, f"{CONTEXT_PROBE_TIERS[0]:,}",
|
||||
)
|
||||
return CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
# 3. OpenRouter API metadata
|
||||
|
||||
10
cli.py
10
cli.py
@@ -1046,6 +1046,14 @@ class HermesCLI:
|
||||
_config_model = _model_config.get("default", "") if isinstance(_model_config, dict) else (_model_config or "")
|
||||
_FALLBACK_MODEL = "anthropic/claude-opus-4.6"
|
||||
self.model = model or _config_model or _FALLBACK_MODEL
|
||||
# Auto-detect model from local server if still on fallback
|
||||
if self.model == _FALLBACK_MODEL:
|
||||
_base_url = _model_config.get("base_url", "") if isinstance(_model_config, dict) else ""
|
||||
if "localhost" in _base_url or "127.0.0.1" in _base_url:
|
||||
from hermes_cli.runtime_provider import _auto_detect_local_model
|
||||
_detected = _auto_detect_local_model(_base_url)
|
||||
if _detected:
|
||||
self.model = _detected
|
||||
# Track whether model was explicitly chosen by the user or fell back
|
||||
# to the global default. Provider-specific normalisation may override
|
||||
# the default silently but should warn when overriding an explicit choice.
|
||||
@@ -1251,6 +1259,8 @@ class HermesCLI:
|
||||
def _get_status_bar_snapshot(self) -> Dict[str, Any]:
|
||||
model_name = self.model or "unknown"
|
||||
model_short = model_name.split("/")[-1] if "/" in model_name else model_name
|
||||
if model_short.endswith(".gguf"):
|
||||
model_short = model_short[:-5]
|
||||
if len(model_short) > 26:
|
||||
model_short = f"{model_short[:23]}..."
|
||||
|
||||
|
||||
120
gateway/run.py
120
gateway/run.py
@@ -1441,6 +1441,12 @@ class GatewayRunner:
|
||||
if canonical == "reload-mcp":
|
||||
return await self._handle_reload_mcp_command(event)
|
||||
|
||||
if canonical == "approve":
|
||||
return await self._handle_approve_command(event)
|
||||
|
||||
if canonical == "deny":
|
||||
return await self._handle_deny_command(event)
|
||||
|
||||
if canonical == "update":
|
||||
return await self._handle_update_command(event)
|
||||
|
||||
@@ -1518,32 +1524,9 @@ class GatewayRunner:
|
||||
except Exception as e:
|
||||
logger.debug("Skill command check failed (non-fatal): %s", e)
|
||||
|
||||
# Check for pending exec approval responses
|
||||
session_key_preview = self._session_key_for_source(source)
|
||||
if session_key_preview in self._pending_approvals:
|
||||
user_text = event.text.strip().lower()
|
||||
if user_text in ("yes", "y", "approve", "ok", "go", "do it"):
|
||||
approval = self._pending_approvals.pop(session_key_preview)
|
||||
cmd = approval["command"]
|
||||
pattern_keys = approval.get("pattern_keys", [])
|
||||
if not pattern_keys:
|
||||
pk = approval.get("pattern_key", "")
|
||||
pattern_keys = [pk] if pk else []
|
||||
logger.info("User approved dangerous command: %s...", cmd[:60])
|
||||
from tools.terminal_tool import terminal_tool
|
||||
from tools.approval import approve_session
|
||||
for pk in pattern_keys:
|
||||
approve_session(session_key_preview, pk)
|
||||
result = terminal_tool(command=cmd, force=True)
|
||||
return f"✅ Command approved and executed.\n\n```\n{result[:3500]}\n```"
|
||||
elif user_text in ("no", "n", "deny", "cancel", "nope"):
|
||||
self._pending_approvals.pop(session_key_preview)
|
||||
return "❌ Command denied."
|
||||
elif user_text in ("full", "show", "view", "show full", "view full"):
|
||||
# Show full command without consuming the approval
|
||||
cmd = self._pending_approvals[session_key_preview]["command"]
|
||||
return f"Full command:\n\n```\n{cmd}\n```\n\nReply yes/no to approve or deny."
|
||||
# If it's not clearly an approval/denial, fall through to normal processing
|
||||
# Pending exec approvals are handled by /approve and /deny commands above.
|
||||
# No bare text matching — "yes" in normal conversation must not trigger
|
||||
# execution of a dangerous command.
|
||||
|
||||
# Get or create session
|
||||
session_entry = self.session_store.get_or_create_session(source)
|
||||
@@ -2059,9 +2042,22 @@ class GatewayRunner:
|
||||
# Check if the agent encountered a dangerous command needing approval
|
||||
try:
|
||||
from tools.approval import pop_pending
|
||||
import time as _time
|
||||
pending = pop_pending(session_key)
|
||||
if pending:
|
||||
pending["timestamp"] = _time.time()
|
||||
self._pending_approvals[session_key] = pending
|
||||
# Append structured instructions so the user knows how to respond
|
||||
cmd_preview = pending.get("command", "")
|
||||
if len(cmd_preview) > 200:
|
||||
cmd_preview = cmd_preview[:200] + "..."
|
||||
approval_hint = (
|
||||
f"\n\n⚠️ **Dangerous command requires approval:**\n"
|
||||
f"```\n{cmd_preview}\n```\n"
|
||||
f"Reply `/approve` to execute, `/approve session` to approve this pattern "
|
||||
f"for the session, or `/deny` to cancel."
|
||||
)
|
||||
response = (response or "") + approval_hint
|
||||
except Exception as e:
|
||||
logger.debug("Failed to check pending approvals: %s", e)
|
||||
|
||||
@@ -3696,6 +3692,78 @@ class GatewayRunner:
|
||||
logger.warning("MCP reload failed: %s", e)
|
||||
return f"❌ MCP reload failed: {e}"
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# /approve & /deny — explicit dangerous-command approval
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
_APPROVAL_TIMEOUT_SECONDS = 300 # 5 minutes
|
||||
|
||||
async def _handle_approve_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /approve command — execute a pending dangerous command.
|
||||
|
||||
Usage:
|
||||
/approve — approve and execute the pending command
|
||||
/approve session — approve and remember for this session
|
||||
/approve always — approve this pattern permanently
|
||||
"""
|
||||
source = event.source
|
||||
session_key = self._session_key_for_source(source)
|
||||
|
||||
if session_key not in self._pending_approvals:
|
||||
return "No pending command to approve."
|
||||
|
||||
import time as _time
|
||||
approval = self._pending_approvals[session_key]
|
||||
|
||||
# Check for timeout
|
||||
ts = approval.get("timestamp", 0)
|
||||
if _time.time() - ts > self._APPROVAL_TIMEOUT_SECONDS:
|
||||
self._pending_approvals.pop(session_key, None)
|
||||
return "⚠️ Approval expired (timed out after 5 minutes). Ask the agent to try again."
|
||||
|
||||
self._pending_approvals.pop(session_key)
|
||||
cmd = approval["command"]
|
||||
pattern_keys = approval.get("pattern_keys", [])
|
||||
if not pattern_keys:
|
||||
pk = approval.get("pattern_key", "")
|
||||
pattern_keys = [pk] if pk else []
|
||||
|
||||
# Determine approval scope from args
|
||||
args = event.get_command_args().strip().lower()
|
||||
from tools.approval import approve_session, approve_permanent
|
||||
|
||||
if args in ("always", "permanent", "permanently"):
|
||||
for pk in pattern_keys:
|
||||
approve_permanent(pk)
|
||||
scope_msg = " (pattern approved permanently)"
|
||||
elif args in ("session", "ses"):
|
||||
for pk in pattern_keys:
|
||||
approve_session(session_key, pk)
|
||||
scope_msg = " (pattern approved for this session)"
|
||||
else:
|
||||
# One-time approval — just approve for session so the immediate
|
||||
# replay works, but don't advertise it as session-wide
|
||||
for pk in pattern_keys:
|
||||
approve_session(session_key, pk)
|
||||
scope_msg = ""
|
||||
|
||||
logger.info("User approved dangerous command via /approve: %s...%s", cmd[:60], scope_msg)
|
||||
from tools.terminal_tool import terminal_tool
|
||||
result = terminal_tool(command=cmd, force=True)
|
||||
return f"✅ Command approved and executed{scope_msg}.\n\n```\n{result[:3500]}\n```"
|
||||
|
||||
async def _handle_deny_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /deny command — reject a pending dangerous command."""
|
||||
source = event.source
|
||||
session_key = self._session_key_for_source(source)
|
||||
|
||||
if session_key not in self._pending_approvals:
|
||||
return "No pending command to deny."
|
||||
|
||||
self._pending_approvals.pop(session_key)
|
||||
logger.info("User denied dangerous command via /deny")
|
||||
return "❌ Command denied."
|
||||
|
||||
async def _handle_update_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /update command — update Hermes Agent to the latest version.
|
||||
|
||||
|
||||
@@ -289,6 +289,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
|
||||
_hero = HERMES_CADUCEUS
|
||||
left_lines = ["", _hero, ""]
|
||||
model_short = model.split("/")[-1] if "/" in model else model
|
||||
if model_short.endswith(".gguf"):
|
||||
model_short = model_short[:-5]
|
||||
if len(model_short) > 28:
|
||||
model_short = model_short[:25] + "..."
|
||||
ctx_str = f" [dim {dim}]·[/] [dim {dim}]{_format_context_length(context_length)} context[/]" if context_length else ""
|
||||
|
||||
@@ -61,6 +61,10 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("rollback", "List or restore filesystem checkpoints", "Session",
|
||||
args_hint="[number]"),
|
||||
CommandDef("stop", "Kill all running background processes", "Session"),
|
||||
CommandDef("approve", "Approve a pending dangerous command", "Session",
|
||||
gateway_only=True, args_hint="[session|always]"),
|
||||
CommandDef("deny", "Deny a pending dangerous command", "Session",
|
||||
gateway_only=True),
|
||||
CommandDef("background", "Run a prompt in the background", "Session",
|
||||
aliases=("bg",), args_hint="<prompt>"),
|
||||
CommandDef("status", "Show session info", "Session",
|
||||
|
||||
@@ -24,11 +24,41 @@ def _normalize_custom_provider_name(value: str) -> str:
|
||||
return value.strip().lower().replace(" ", "-")
|
||||
|
||||
|
||||
def _auto_detect_local_model(base_url: str) -> str:
|
||||
"""Query a local server for its model name when only one model is loaded."""
|
||||
if not base_url:
|
||||
return ""
|
||||
try:
|
||||
import requests
|
||||
url = base_url.rstrip("/")
|
||||
if not url.endswith("/v1"):
|
||||
url += "/v1"
|
||||
resp = requests.get(url + "/models", timeout=5)
|
||||
if resp.ok:
|
||||
models = resp.json().get("data", [])
|
||||
if len(models) == 1:
|
||||
model_id = models[0].get("id", "")
|
||||
if model_id:
|
||||
return model_id
|
||||
except Exception:
|
||||
pass
|
||||
return ""
|
||||
|
||||
|
||||
def _get_model_config() -> Dict[str, Any]:
|
||||
config = load_config()
|
||||
model_cfg = config.get("model")
|
||||
if isinstance(model_cfg, dict):
|
||||
return dict(model_cfg)
|
||||
cfg = dict(model_cfg)
|
||||
default = cfg.get("default", "").strip()
|
||||
base_url = cfg.get("base_url", "").strip()
|
||||
is_local = "localhost" in base_url or "127.0.0.1" in base_url
|
||||
is_fallback = not default or default == "anthropic/claude-opus-4.6"
|
||||
if is_local and is_fallback and base_url:
|
||||
detected = _auto_detect_local_model(base_url)
|
||||
if detected:
|
||||
cfg["default"] = detected
|
||||
return cfg
|
||||
if isinstance(model_cfg, str) and model_cfg.strip():
|
||||
return {"default": model_cfg.strip()}
|
||||
return {}
|
||||
@@ -313,10 +343,14 @@ def resolve_runtime_provider(
|
||||
"No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
|
||||
"run 'claude setup-token', or authenticate with 'claude /login'."
|
||||
)
|
||||
# Allow base URL override from config.yaml model.base_url
|
||||
model_cfg = _get_model_config()
|
||||
cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
|
||||
base_url = cfg_base_url or "https://api.anthropic.com"
|
||||
return {
|
||||
"provider": "anthropic",
|
||||
"api_mode": "anthropic_messages",
|
||||
"base_url": "https://api.anthropic.com",
|
||||
"base_url": base_url,
|
||||
"api_key": token,
|
||||
"source": "env",
|
||||
"requested_provider": requested_provider,
|
||||
@@ -340,13 +374,23 @@ def resolve_runtime_provider(
|
||||
if pconfig and pconfig.auth_type == "api_key":
|
||||
creds = resolve_api_key_provider_credentials(provider)
|
||||
model_cfg = _get_model_config()
|
||||
base_url = creds.get("base_url", "").rstrip("/")
|
||||
api_mode = "chat_completions"
|
||||
if provider == "copilot":
|
||||
api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
|
||||
else:
|
||||
# Check explicit api_mode from model config first
|
||||
configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
|
||||
if configured_mode:
|
||||
api_mode = configured_mode
|
||||
# Auto-detect Anthropic-compatible endpoints by URL convention
|
||||
# (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
|
||||
elif base_url.rstrip("/").endswith("/anthropic"):
|
||||
api_mode = "anthropic_messages"
|
||||
return {
|
||||
"provider": provider,
|
||||
"api_mode": api_mode,
|
||||
"base_url": creds.get("base_url", "").rstrip("/"),
|
||||
"base_url": base_url,
|
||||
"api_key": creds.get("api_key", ""),
|
||||
"source": creds.get("source", "env"),
|
||||
"requested_provider": requested_provider,
|
||||
|
||||
22
run_agent.py
22
run_agent.py
@@ -493,6 +493,11 @@ class AIAgent:
|
||||
elif self.provider == "anthropic" or (provider_name is None and "api.anthropic.com" in self._base_url_lower):
|
||||
self.api_mode = "anthropic_messages"
|
||||
self.provider = "anthropic"
|
||||
elif self._base_url_lower.rstrip("/").endswith("/anthropic"):
|
||||
# Third-party Anthropic-compatible endpoints (e.g. MiniMax, DashScope)
|
||||
# use a URL convention ending in /anthropic. Auto-detect these so the
|
||||
# Anthropic Messages API adapter is used instead of chat completions.
|
||||
self.api_mode = "anthropic_messages"
|
||||
else:
|
||||
self.api_mode = "chat_completions"
|
||||
|
||||
@@ -964,6 +969,18 @@ class AIAgent:
|
||||
compression_threshold = float(_compression_cfg.get("threshold", 0.50))
|
||||
compression_enabled = str(_compression_cfg.get("enabled", True)).lower() in ("true", "1", "yes")
|
||||
compression_summary_model = _compression_cfg.get("summary_model") or None
|
||||
|
||||
# Read explicit context_length override from model config
|
||||
_model_cfg = _agent_cfg.get("model", {})
|
||||
if isinstance(_model_cfg, dict):
|
||||
_config_context_length = _model_cfg.get("context_length")
|
||||
else:
|
||||
_config_context_length = None
|
||||
if _config_context_length is not None:
|
||||
try:
|
||||
_config_context_length = int(_config_context_length)
|
||||
except (TypeError, ValueError):
|
||||
_config_context_length = None
|
||||
|
||||
self.context_compressor = ContextCompressor(
|
||||
model=self.model,
|
||||
@@ -975,6 +992,7 @@ class AIAgent:
|
||||
quiet_mode=self.quiet_mode,
|
||||
base_url=self.base_url,
|
||||
api_key=getattr(self, "api_key", ""),
|
||||
config_context_length=_config_context_length,
|
||||
)
|
||||
self.compression_enabled = compression_enabled
|
||||
self._user_turn_count = 0
|
||||
@@ -3474,11 +3492,11 @@ class AIAgent:
|
||||
|
||||
# Determine api_mode from provider
|
||||
fb_api_mode = "chat_completions"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
if fb_provider == "openai-codex":
|
||||
fb_api_mode = "codex_responses"
|
||||
elif fb_provider == "anthropic":
|
||||
elif fb_provider == "anthropic" or fb_base_url.rstrip("/").lower().endswith("/anthropic"):
|
||||
fb_api_mode = "anthropic_messages"
|
||||
fb_base_url = str(fb_client.base_url)
|
||||
|
||||
old_model = self.model
|
||||
self.model = fb_model
|
||||
|
||||
@@ -218,6 +218,79 @@ class TestGetModelContextLength:
|
||||
|
||||
assert result == CONTEXT_PROBE_TIERS[0]
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_custom_endpoint_single_model_fallback(self, mock_endpoint_fetch, mock_fetch):
|
||||
"""Single-model servers: use the only model even if name doesn't match."""
|
||||
mock_fetch.return_value = {}
|
||||
mock_endpoint_fetch.return_value = {
|
||||
"Qwen3.5-9B-Q4_K_M.gguf": {"context_length": 131072}
|
||||
}
|
||||
|
||||
result = get_model_context_length(
|
||||
"qwen3.5:9b",
|
||||
base_url="http://myserver.example.com:8080/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert result == 131072
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
@patch("agent.model_metadata.fetch_endpoint_model_metadata")
|
||||
def test_custom_endpoint_fuzzy_substring_match(self, mock_endpoint_fetch, mock_fetch):
|
||||
"""Fuzzy match: configured model name is substring of endpoint model."""
|
||||
mock_fetch.return_value = {}
|
||||
mock_endpoint_fetch.return_value = {
|
||||
"org/llama-3.3-70b-instruct-fp8": {"context_length": 131072},
|
||||
"org/qwen-2.5-72b": {"context_length": 32768},
|
||||
}
|
||||
|
||||
result = get_model_context_length(
|
||||
"llama-3.3-70b-instruct",
|
||||
base_url="http://myserver.example.com:8080/v1",
|
||||
api_key="test-key",
|
||||
)
|
||||
|
||||
assert result == 131072
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_config_context_length_overrides_all(self, mock_fetch):
|
||||
"""Explicit config_context_length takes priority over everything."""
|
||||
mock_fetch.return_value = {
|
||||
"test/model": {"context_length": 200000}
|
||||
}
|
||||
|
||||
result = get_model_context_length(
|
||||
"test/model",
|
||||
config_context_length=65536,
|
||||
)
|
||||
|
||||
assert result == 65536
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_config_context_length_zero_is_ignored(self, mock_fetch):
|
||||
"""config_context_length=0 should be treated as unset."""
|
||||
mock_fetch.return_value = {}
|
||||
|
||||
result = get_model_context_length(
|
||||
"anthropic/claude-sonnet-4",
|
||||
config_context_length=0,
|
||||
)
|
||||
|
||||
assert result == 200000
|
||||
|
||||
@patch("agent.model_metadata.fetch_model_metadata")
|
||||
def test_config_context_length_none_is_ignored(self, mock_fetch):
|
||||
"""config_context_length=None should be treated as unset."""
|
||||
mock_fetch.return_value = {}
|
||||
|
||||
result = get_model_context_length(
|
||||
"anthropic/claude-sonnet-4",
|
||||
config_context_length=None,
|
||||
)
|
||||
|
||||
assert result == 200000
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# fetch_model_metadata — caching, TTL, slugs, failures
|
||||
|
||||
240
tests/gateway/test_approve_deny_commands.py
Normal file
240
tests/gateway/test_approve_deny_commands.py
Normal file
@@ -0,0 +1,240 @@
|
||||
"""Tests for /approve and /deny gateway commands.
|
||||
|
||||
Verifies that dangerous command approvals require explicit /approve or /deny
|
||||
slash commands, not bare "yes"/"no" text matching.
|
||||
"""
|
||||
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.session import SessionEntry, SessionSource, build_session_key
|
||||
|
||||
|
||||
def _make_source() -> SessionSource:
|
||||
return SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
user_id="u1",
|
||||
chat_id="c1",
|
||||
user_name="tester",
|
||||
chat_type="dm",
|
||||
)
|
||||
|
||||
|
||||
def _make_event(text: str) -> MessageEvent:
|
||||
return MessageEvent(
|
||||
text=text,
|
||||
source=_make_source(),
|
||||
message_id="m1",
|
||||
)
|
||||
|
||||
|
||||
def _make_runner():
|
||||
from gateway.run import GatewayRunner
|
||||
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = GatewayConfig(
|
||||
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")}
|
||||
)
|
||||
adapter = MagicMock()
|
||||
adapter.send = AsyncMock()
|
||||
runner.adapters = {Platform.TELEGRAM: adapter}
|
||||
runner._voice_mode = {}
|
||||
runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False)
|
||||
runner.session_store = MagicMock()
|
||||
runner._running_agents = {}
|
||||
runner._pending_messages = {}
|
||||
runner._pending_approvals = {}
|
||||
runner._session_db = None
|
||||
runner._reasoning_config = None
|
||||
runner._provider_routing = {}
|
||||
runner._fallback_model = None
|
||||
runner._show_reasoning = False
|
||||
runner._is_user_authorized = lambda _source: True
|
||||
runner._set_session_env = lambda _context: None
|
||||
return runner
|
||||
|
||||
|
||||
def _make_pending_approval(command="sudo rm -rf /tmp/test", pattern_key="sudo"):
|
||||
return {
|
||||
"command": command,
|
||||
"pattern_key": pattern_key,
|
||||
"pattern_keys": [pattern_key],
|
||||
"description": "sudo command",
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# /approve command
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApproveCommand:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_executes_pending_command(self):
|
||||
"""Basic /approve executes the pending command."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/approve")
|
||||
with patch("tools.terminal_tool.terminal_tool", return_value="done") as mock_term:
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "✅ Command approved and executed" in result
|
||||
mock_term.assert_called_once_with(command="sudo rm -rf /tmp/test", force=True)
|
||||
assert session_key not in runner._pending_approvals
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_session_remembers_pattern(self):
|
||||
"""/approve session approves the pattern for the session."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/approve session")
|
||||
with (
|
||||
patch("tools.terminal_tool.terminal_tool", return_value="done"),
|
||||
patch("tools.approval.approve_session") as mock_session,
|
||||
):
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "pattern approved for this session" in result
|
||||
mock_session.assert_called_once_with(session_key, "sudo")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_always_approves_permanently(self):
|
||||
"""/approve always approves the pattern permanently."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/approve always")
|
||||
with (
|
||||
patch("tools.terminal_tool.terminal_tool", return_value="done"),
|
||||
patch("tools.approval.approve_permanent") as mock_perm,
|
||||
):
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "pattern approved permanently" in result
|
||||
mock_perm.assert_called_once_with("sudo")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_no_pending(self):
|
||||
"""/approve with no pending approval returns helpful message."""
|
||||
runner = _make_runner()
|
||||
event = _make_event("/approve")
|
||||
result = await runner._handle_approve_command(event)
|
||||
assert "No pending command" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_approve_expired(self):
|
||||
"""/approve on a timed-out approval rejects it."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
approval = _make_pending_approval()
|
||||
approval["timestamp"] = time.time() - 600 # 10 minutes ago
|
||||
runner._pending_approvals[session_key] = approval
|
||||
|
||||
event = _make_event("/approve")
|
||||
result = await runner._handle_approve_command(event)
|
||||
|
||||
assert "expired" in result
|
||||
assert session_key not in runner._pending_approvals
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# /deny command
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDenyCommand:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deny_clears_pending(self):
|
||||
"""/deny clears the pending approval."""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
event = _make_event("/deny")
|
||||
result = await runner._handle_deny_command(event)
|
||||
|
||||
assert "❌ Command denied" in result
|
||||
assert session_key not in runner._pending_approvals
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_deny_no_pending(self):
|
||||
"""/deny with no pending approval returns helpful message."""
|
||||
runner = _make_runner()
|
||||
event = _make_event("/deny")
|
||||
result = await runner._handle_deny_command(event)
|
||||
assert "No pending command" in result
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Bare "yes" must NOT trigger approval
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestBareTextNoLongerApproves:
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_yes_does_not_execute_pending_command(self):
|
||||
"""Saying 'yes' in normal conversation must not execute a pending command.
|
||||
|
||||
This is the core bug from issue #1888: bare text matching against
|
||||
'yes'/'no' could intercept unrelated user messages.
|
||||
"""
|
||||
runner = _make_runner()
|
||||
source = _make_source()
|
||||
session_key = runner._session_key_for_source(source)
|
||||
runner._pending_approvals[session_key] = _make_pending_approval()
|
||||
|
||||
# Simulate the user saying "yes" as a normal message.
|
||||
# The old code would have executed the pending command.
|
||||
# Now it should fall through to normal processing (agent handles it).
|
||||
event = _make_event("yes")
|
||||
|
||||
# The approval should still be pending — "yes" is not /approve
|
||||
# We can't easily run _handle_message end-to-end, but we CAN verify
|
||||
# the old text-matching block no longer exists by confirming the
|
||||
# approval is untouched after the command dispatch section.
|
||||
# The key assertion is that _pending_approvals is NOT consumed.
|
||||
assert session_key in runner._pending_approvals
|
||||
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Approval hint appended to response
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApprovalHint:
|
||||
|
||||
def test_approval_hint_appended_to_response(self):
|
||||
"""When a pending approval is collected, structured instructions
|
||||
should be appended to the agent response."""
|
||||
# This tests the approval collection logic at the end of _handle_message.
|
||||
# We verify the hint format directly.
|
||||
cmd = "sudo rm -rf /tmp/dangerous"
|
||||
cmd_preview = cmd
|
||||
hint = (
|
||||
f"\n\n⚠️ **Dangerous command requires approval:**\n"
|
||||
f"```\n{cmd_preview}\n```\n"
|
||||
f"Reply `/approve` to execute, `/approve session` to approve this pattern "
|
||||
f"for the session, or `/deny` to cancel."
|
||||
)
|
||||
assert "/approve" in hint
|
||||
assert "/deny" in hint
|
||||
assert cmd in hint
|
||||
@@ -438,10 +438,75 @@ def test_named_custom_provider_without_api_mode_defaults(monkeypatch):
|
||||
lambda p: {
|
||||
"name": "my-server",
|
||||
"base_url": "http://localhost:8000/v1",
|
||||
"api_key": "sk-test",
|
||||
"api_key": "***",
|
||||
},
|
||||
)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="my-server")
|
||||
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
|
||||
|
||||
def test_anthropic_messages_in_valid_api_modes():
|
||||
"""anthropic_messages should be accepted by _parse_api_mode."""
|
||||
assert rp._parse_api_mode("anthropic_messages") == "anthropic_messages"
|
||||
|
||||
|
||||
def test_api_key_provider_anthropic_url_auto_detection(monkeypatch):
|
||||
"""API-key providers with /anthropic base URL should auto-detect anthropic_messages mode."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
|
||||
monkeypatch.setenv("MINIMAX_BASE_URL", "https://api.minimax.io/anthropic")
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="minimax")
|
||||
|
||||
assert resolved["provider"] == "minimax"
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
assert resolved["base_url"] == "https://api.minimax.io/anthropic"
|
||||
|
||||
|
||||
def test_api_key_provider_explicit_api_mode_config(monkeypatch):
|
||||
"""API-key providers should respect api_mode from model config."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {"api_mode": "anthropic_messages"})
|
||||
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
|
||||
monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="minimax")
|
||||
|
||||
assert resolved["provider"] == "minimax"
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
|
||||
|
||||
def test_api_key_provider_default_url_stays_chat_completions(monkeypatch):
|
||||
"""API-key providers with default /v1 URL should stay on chat_completions."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "minimax")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: {})
|
||||
monkeypatch.setenv("MINIMAX_API_KEY", "test-minimax-key")
|
||||
monkeypatch.delenv("MINIMAX_BASE_URL", raising=False)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="minimax")
|
||||
|
||||
assert resolved["provider"] == "minimax"
|
||||
assert resolved["api_mode"] == "chat_completions"
|
||||
assert resolved["base_url"] == "https://api.minimax.io/v1"
|
||||
|
||||
|
||||
def test_named_custom_provider_anthropic_api_mode(monkeypatch):
|
||||
"""Custom providers should accept api_mode: anthropic_messages."""
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "my-anthropic-proxy")
|
||||
monkeypatch.setattr(
|
||||
rp, "_get_named_custom_provider",
|
||||
lambda p: {
|
||||
"name": "my-anthropic-proxy",
|
||||
"base_url": "https://proxy.example.com/anthropic",
|
||||
"api_key": "test-key",
|
||||
"api_mode": "anthropic_messages",
|
||||
},
|
||||
)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="my-anthropic-proxy")
|
||||
|
||||
assert resolved["api_mode"] == "anthropic_messages"
|
||||
assert resolved["base_url"] == "https://proxy.example.com/anthropic"
|
||||
|
||||
@@ -51,6 +51,8 @@ hermes setup # Or configure everything at once
|
||||
| **MiniMax China** | China-region MiniMax endpoint | Set `MINIMAX_CN_API_KEY` |
|
||||
| **Alibaba Cloud** | Qwen models via DashScope | Set `DASHSCOPE_API_KEY` |
|
||||
| **Kilo Code** | KiloCode-hosted models | Set `KILOCODE_API_KEY` |
|
||||
| **OpenCode Zen** | Pay-as-you-go access to curated models | Set `OPENCODE_ZEN_API_KEY` |
|
||||
| **OpenCode Go** | $10/month subscription for open models | Set `OPENCODE_GO_API_KEY` |
|
||||
| **Vercel AI Gateway** | Vercel AI Gateway routing | Set `AI_GATEWAY_API_KEY` |
|
||||
| **Custom Endpoint** | VLLM, SGLang, or any OpenAI-compatible API | Set base URL + API key |
|
||||
|
||||
|
||||
@@ -66,7 +66,7 @@ Common options:
|
||||
| `-q`, `--query "..."` | One-shot, non-interactive prompt. |
|
||||
| `-m`, `--model <model>` | Override the model for this run. |
|
||||
| `-t`, `--toolsets <csv>` | Enable a comma-separated set of toolsets. |
|
||||
| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`. |
|
||||
| `--provider <provider>` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `opencode-zen`, `opencode-go`, `ai-gateway`, `kilocode`, `alibaba`. |
|
||||
| `-v`, `--verbose` | Verbose output. |
|
||||
| `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. |
|
||||
| `--resume <session>` / `--continue [name]` | Resume a session directly from `chat`. |
|
||||
|
||||
@@ -41,6 +41,12 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
|
||||
| `ANTHROPIC_TOKEN` | Manual or legacy Anthropic OAuth/setup-token override |
|
||||
| `DASHSCOPE_API_KEY` | Alibaba Cloud DashScope API key for Qwen models ([modelstudio.console.alibabacloud.com](https://modelstudio.console.alibabacloud.com/)) |
|
||||
| `DASHSCOPE_BASE_URL` | Custom DashScope base URL (default: international endpoint) |
|
||||
| `DEEPSEEK_API_KEY` | DeepSeek API key for direct DeepSeek access ([platform.deepseek.com](https://platform.deepseek.com/api_keys)) |
|
||||
| `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
|
||||
| `OPENCODE_ZEN_API_KEY` | OpenCode Zen API key — pay-as-you-go access to curated models ([opencode.ai](https://opencode.ai/auth)) |
|
||||
| `OPENCODE_ZEN_BASE_URL` | Override OpenCode Zen base URL |
|
||||
| `OPENCODE_GO_API_KEY` | OpenCode Go API key — $10/month subscription for open models ([opencode.ai](https://opencode.ai/auth)) |
|
||||
| `OPENCODE_GO_BASE_URL` | Override OpenCode Go base URL |
|
||||
| `CLAUDE_CODE_OAUTH_TOKEN` | Explicit Claude Code token override if you export one manually |
|
||||
| `HERMES_MODEL` | Preferred model name (checked before `LLM_MODEL`, used by gateway) |
|
||||
| `LLM_MODEL` | Default model name (fallback when not set in config.yaml) |
|
||||
@@ -71,6 +77,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
| `PARALLEL_API_KEY` | AI-native web search ([parallel.ai](https://parallel.ai/)) |
|
||||
| `FIRECRAWL_API_KEY` | Web scraping ([firecrawl.dev](https://firecrawl.dev/)) |
|
||||
| `FIRECRAWL_API_URL` | Custom Firecrawl API endpoint for self-hosted instances (optional) |
|
||||
| `TAVILY_API_KEY` | Tavily API key for AI-native web search, extract, and crawl ([app.tavily.com](https://app.tavily.com/home)) |
|
||||
| `BROWSERBASE_API_KEY` | Browser automation ([browserbase.com](https://browserbase.com/)) |
|
||||
| `BROWSERBASE_PROJECT_ID` | Browserbase project ID |
|
||||
| `BROWSER_USE_API_KEY` | Browser Use cloud browser API key ([browser-use.com](https://browser-use.com/)) |
|
||||
@@ -83,6 +90,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
| `GROQ_BASE_URL` | Override the Groq OpenAI-compatible STT endpoint |
|
||||
| `STT_OPENAI_MODEL` | Override the OpenAI STT model (default: `whisper-1`) |
|
||||
| `STT_OPENAI_BASE_URL` | Override the OpenAI-compatible STT endpoint |
|
||||
| `GITHUB_TOKEN` | GitHub token for Skills Hub (higher API rate limits, skill publish) |
|
||||
| `HONCHO_API_KEY` | Cross-session user modeling ([honcho.dev](https://honcho.dev/)) |
|
||||
| `TINKER_API_KEY` | RL training ([tinker-console.thinkingmachines.ai](https://tinker-console.thinkingmachines.ai/)) |
|
||||
| `WANDB_API_KEY` | RL training metrics ([wandb.ai](https://wandb.ai/)) |
|
||||
@@ -211,7 +219,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
|
||||
| Variable | Description |
|
||||
|----------|-------------|
|
||||
| `HERMES_MAX_ITERATIONS` | Max tool-calling iterations per conversation (default: 60) |
|
||||
| `HERMES_MAX_ITERATIONS` | Max tool-calling iterations per conversation (default: 90) |
|
||||
| `HERMES_TOOL_PROGRESS` | Deprecated compatibility variable for tool progress display. Prefer `display.tool_progress` in `config.yaml`. |
|
||||
| `HERMES_TOOL_PROGRESS_MODE` | Deprecated compatibility variable for tool progress mode. Prefer `display.tool_progress` in `config.yaml`. |
|
||||
| `HERMES_HUMAN_DELAY_MODE` | Response pacing: `off`/`natural`/`custom` |
|
||||
@@ -221,6 +229,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
|
||||
| `HERMES_API_TIMEOUT` | LLM API call timeout in seconds (default: `900`) |
|
||||
| `HERMES_EXEC_ASK` | Enable execution approval prompts in gateway mode (`true`/`false`) |
|
||||
| `HERMES_BACKGROUND_NOTIFICATIONS` | Background process notification mode in gateway: `all` (default), `result`, `error`, `off` |
|
||||
| `HERMES_EPHEMERAL_SYSTEM_PROMPT` | Ephemeral system prompt injected at API-call time (never persisted to sessions) |
|
||||
|
||||
## Session Settings
|
||||
|
||||
|
||||
@@ -21,9 +21,8 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
|
||||
| Command | Description |
|
||||
|---------|-------------|
|
||||
| `/new` | Start a new conversation (reset history) |
|
||||
| `/reset` | Reset conversation only (keep screen) |
|
||||
| `/clear` | Clear screen and reset conversation (fresh start) |
|
||||
| `/new` (alias: `/reset`) | Start a new session (fresh session ID + history) |
|
||||
| `/clear` | Clear screen and start a new session |
|
||||
| `/history` | Show conversation history |
|
||||
| `/save` | Save the current conversation |
|
||||
| `/retry` | Retry the last message (resend to agent) |
|
||||
@@ -31,6 +30,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
| `/title` | Set a title for the current session (usage: /title My Session Name) |
|
||||
| `/compress` | Manually compress conversation context (flush memories + summarize) |
|
||||
| `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) |
|
||||
| `/stop` | Kill all running background processes |
|
||||
| `/background <prompt>` | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). |
|
||||
| `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. |
|
||||
|
||||
@@ -58,6 +58,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
| `/skills` | Search, install, inspect, or manage skills from online registries |
|
||||
| `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) |
|
||||
| `/reload-mcp` | Reload MCP servers from config.yaml |
|
||||
| `/plugins` | List installed plugins and their status |
|
||||
|
||||
### Info
|
||||
|
||||
@@ -95,7 +96,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
|
||||
| `/new` | Start a new conversation. |
|
||||
| `/reset` | Reset conversation history. |
|
||||
| `/status` | Show session info. |
|
||||
| `/stop` | Interrupt the running agent without queuing a follow-up prompt. |
|
||||
| `/stop` | Kill all running background processes and interrupt the running agent. |
|
||||
| `/model [provider:model]` | Show or change the model, including provider switches. |
|
||||
| `/provider` | Show provider availability and auth status. |
|
||||
| `/personality [name]` | Set a personality overlay for the session. |
|
||||
@@ -119,7 +120,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
|
||||
|
||||
## Notes
|
||||
|
||||
- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, and `/verbose` are **CLI-only** commands.
|
||||
- `/status`, `/stop`, `/sethome`, `/resume`, and `/update` are **messaging-only** commands.
|
||||
- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, `/verbose`, and `/plugins` are **CLI-only** commands.
|
||||
- `/status`, `/sethome`, and `/update` are **messaging-only** commands.
|
||||
- `/background`, `/voice`, `/reload-mcp`, and `/rollback` work in **both** the CLI and the messaging gateway.
|
||||
- `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord.
|
||||
|
||||
@@ -141,6 +141,19 @@ This page documents the built-in Hermes tool registry as it exists in code. Avai
|
||||
|------|-------------|----------------------|
|
||||
| `todo` | Manage your task list for the current session. Use for complex tasks with 3+ steps or when the user provides multiple tasks. Call with no parameters to read the current list. Writing: - Provide 'todos' array to create/update items - merge=… | — |
|
||||
|
||||
## `vision` toolset
|
||||
|
||||
| Tool | Description | Requires environment |
|
||||
|------|-------------|----------------------|
|
||||
| `vision_analyze` | Analyze images using AI vision. Provides a comprehensive description and answers a specific question about the image content. | — |
|
||||
|
||||
## `web` toolset
|
||||
|
||||
| Tool | Description | Requires environment |
|
||||
|------|-------------|----------------------|
|
||||
| `web_search` | Search the web for information on any topic. Returns up to 5 relevant results with titles, URLs, and descriptions. | PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY |
|
||||
| `web_extract` | Extract content from web page URLs. Returns page content in markdown format. Also works with PDF URLs — pass the PDF link directly and it converts to markdown text. Pages under 5000 chars return full markdown; larger pages are LLM-summarized. | PARALLEL_API_KEY or FIRECRAWL_API_KEY or TAVILY_API_KEY |
|
||||
|
||||
## `tts` toolset
|
||||
|
||||
| Tool | Description | Requires environment |
|
||||
|
||||
@@ -10,26 +10,29 @@ Toolsets are named bundles of tools that you can enable with `hermes chat --tool
|
||||
|
||||
| Toolset | Kind | Resolves to |
|
||||
|---------|------|-------------|
|
||||
| `browser` | core | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` |
|
||||
| `browser` | core | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `web_search` |
|
||||
| `clarify` | core | `clarify` |
|
||||
| `code_execution` | core | `execute_code` |
|
||||
| `cronjob` | core | `cronjob` |
|
||||
| `debugging` | composite | `patch`, `process`, `read_file`, `search_files`, `terminal`, `web_extract`, `web_search`, `write_file` |
|
||||
| `delegation` | core | `delegate_task` |
|
||||
| `file` | core | `patch`, `read_file`, `search_files`, `write_file` |
|
||||
| `hermes-cli` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-discord` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-email` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-gateway` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-homeassistant` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-signal` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-slack` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-telegram` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-whatsapp` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `cronjob`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-acp` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `delegate_task`, `execute_code`, `memory`, `patch`, `process`, `read_file`, `search_files`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-cli` | platform | `browser_back`, `browser_click`, `browser_close`, `browser_console`, `browser_get_images`, `browser_navigate`, `browser_press`, `browser_scroll`, `browser_snapshot`, `browser_type`, `browser_vision`, `clarify`, `cronjob`, `delegate_task`, `execute_code`, `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services`, `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search`, `image_generate`, `memory`, `mixture_of_agents`, `patch`, `process`, `read_file`, `search_files`, `send_message`, `session_search`, `skill_manage`, `skill_view`, `skills_list`, `terminal`, `text_to_speech`, `todo`, `vision_analyze`, `web_extract`, `web_search`, `write_file` |
|
||||
| `hermes-discord` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-email` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-gateway` | composite | Union of all messaging platform toolsets |
|
||||
| `hermes-homeassistant` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-signal` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-slack` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-sms` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-telegram` | platform | _(same as hermes-cli)_ |
|
||||
| `hermes-whatsapp` | platform | _(same as hermes-cli)_ |
|
||||
| `homeassistant` | core | `ha_call_service`, `ha_get_state`, `ha_list_entities`, `ha_list_services` |
|
||||
| `honcho` | core | `honcho_conclude`, `honcho_context`, `honcho_profile`, `honcho_search` |
|
||||
| `image_gen` | core | `image_generate` |
|
||||
| `memory` | core | `memory` |
|
||||
| `messaging` | core | `send_message` |
|
||||
| `moa` | core | `mixture_of_agents` |
|
||||
| `rl` | core | `rl_check_status`, `rl_edit_config`, `rl_get_current_config`, `rl_get_results`, `rl_list_environments`, `rl_list_runs`, `rl_select_environment`, `rl_start_training`, `rl_stop_training`, `rl_test_inference` |
|
||||
| `safe` | composite | `image_generate`, `mixture_of_agents`, `vision_analyze`, `web_extract`, `web_search` |
|
||||
|
||||
@@ -74,7 +74,8 @@ You need at least one way to connect to an LLM. Use `hermes model` to switch pro
|
||||
| **MiniMax China** | `MINIMAX_CN_API_KEY` in `~/.hermes/.env` (provider: `minimax-cn`) |
|
||||
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`, aliases: `dashscope`, `qwen`) |
|
||||
| **Kilo Code** | `KILOCODE_API_KEY` in `~/.hermes/.env` (provider: `kilocode`) |
|
||||
| **Alibaba Cloud** | `DASHSCOPE_API_KEY` in `~/.hermes/.env` (provider: `alibaba`) |
|
||||
| **OpenCode Zen** | `OPENCODE_ZEN_API_KEY` in `~/.hermes/.env` (provider: `opencode-zen`) |
|
||||
| **OpenCode Go** | `OPENCODE_GO_API_KEY` in `~/.hermes/.env` (provider: `opencode-go`) |
|
||||
| **Custom Endpoint** | `hermes model` (saved in `config.yaml`) or `OPENAI_BASE_URL` + `OPENAI_API_KEY` in `~/.hermes/.env` |
|
||||
|
||||
:::info Codex Note
|
||||
@@ -413,6 +414,29 @@ LLM_MODEL=meta-llama/Llama-3.1-70B-Instruct-Turbo
|
||||
|
||||
---
|
||||
|
||||
### Context Length Detection
|
||||
|
||||
Hermes automatically detects your model's context length by querying the endpoint's `/v1/models` response. For most setups this works out of the box. If detection fails (the model name doesn't match, the endpoint doesn't expose `/v1/models`, etc.), Hermes falls back to a high default and probes downward on context-length errors.
|
||||
|
||||
To set the context length explicitly, add `context_length` to your model config:
|
||||
|
||||
```yaml
|
||||
model:
|
||||
default: "qwen3.5:9b"
|
||||
base_url: "http://localhost:8080/v1"
|
||||
context_length: 131072 # tokens
|
||||
```
|
||||
|
||||
This takes highest priority — it overrides auto-detection, cached values, and hardcoded defaults.
|
||||
|
||||
:::tip When to set this manually
|
||||
- Your model shows "2M context" in the status bar (detection failed)
|
||||
- You want to limit context below the model's maximum (e.g., 8k on a 128k model to save VRAM)
|
||||
- You're running behind a proxy that doesn't expose `/v1/models`
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
### Choosing the Right Setup
|
||||
|
||||
| Use Case | Recommended |
|
||||
|
||||
Reference in New Issue
Block a user