mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-23 10:23:19 +08:00
Compare commits
1 Commits
fix/oauth-
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c44ed125a4 |
@@ -1,8 +1,5 @@
|
||||
FROM debian:13.4
|
||||
|
||||
# Disable Python stdout buffering to ensure logs are printed immediately
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
# Install system dependencies in one layer, clear APT cache
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends \
|
||||
|
||||
@@ -1238,27 +1238,10 @@ def build_anthropic_kwargs(
|
||||
) -> Dict[str, Any]:
|
||||
"""Build kwargs for anthropic.messages.create().
|
||||
|
||||
Naming note — two distinct concepts, easily confused:
|
||||
max_tokens = OUTPUT token cap for a single response.
|
||||
Anthropic's API calls this "max_tokens" but it only
|
||||
limits the *output*. Anthropic's own native SDK
|
||||
renamed it "max_output_tokens" for clarity.
|
||||
context_length = TOTAL context window (input tokens + output tokens).
|
||||
The API enforces: input_tokens + max_tokens ≤ context_length.
|
||||
Stored on the ContextCompressor; reduced on overflow errors.
|
||||
|
||||
When *max_tokens* is None the model's native output ceiling is used
|
||||
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6).
|
||||
|
||||
When *context_length* is provided and the model's native output ceiling
|
||||
exceeds it (e.g. a local endpoint with an 8K window), the output cap is
|
||||
clamped to context_length − 1. This only kicks in for unusually small
|
||||
context windows; for full-size models the native output cap is always
|
||||
smaller than the context window so no clamping happens.
|
||||
NOTE: this clamping does not account for prompt size — if the prompt is
|
||||
large, Anthropic may still reject the request. The caller must detect
|
||||
"max_tokens too large given prompt" errors and retry with a smaller cap
|
||||
(see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
|
||||
When *max_tokens* is None, the model's native output limit is used
|
||||
(e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). If *context_length*
|
||||
is provided, the effective limit is clamped so it doesn't exceed
|
||||
the context window.
|
||||
|
||||
When *is_oauth* is True, applies Claude Code compatibility transforms:
|
||||
system prompt prefix, tool name prefixing, and prompt sanitization.
|
||||
@@ -1273,14 +1256,10 @@ def build_anthropic_kwargs(
|
||||
anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
|
||||
|
||||
model = normalize_model_name(model, preserve_dots=preserve_dots)
|
||||
# effective_max_tokens = output cap for this call (≠ total context window)
|
||||
effective_max_tokens = max_tokens or _get_anthropic_max_output(model)
|
||||
|
||||
# Clamp output cap to fit inside the total context window.
|
||||
# Only matters for small custom endpoints where context_length < native
|
||||
# output ceiling. For standard Anthropic models context_length (e.g.
|
||||
# 200K) is always larger than the output ceiling (e.g. 128K), so this
|
||||
# branch is not taken.
|
||||
# Clamp to context window if the user set a lower context_length
|
||||
# (e.g. custom endpoint with limited capacity).
|
||||
if context_length and effective_max_tokens > context_length:
|
||||
effective_max_tokens = max(context_length - 1, 1)
|
||||
|
||||
|
||||
@@ -702,7 +702,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
|
||||
extra = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -721,7 +721,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
|
||||
extra = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
@@ -1137,7 +1137,7 @@ def _resolve_auto() -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
main_model = _read_main_model()
|
||||
if (main_provider and main_model
|
||||
and main_provider not in _AGGREGATOR_PROVIDERS
|
||||
and main_provider not in ("auto", "")):
|
||||
and main_provider not in ("auto", "custom", "")):
|
||||
client, resolved = resolve_provider_client(main_provider, main_model)
|
||||
if client is not None:
|
||||
logger.info("Auxiliary auto-detect: using main provider %s (%s)",
|
||||
@@ -1195,7 +1195,7 @@ def _to_async_client(sync_client, model: str):
|
||||
|
||||
async_kwargs["default_headers"] = copilot_default_headers()
|
||||
elif "api.kimi.com" in base_lower:
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"}
|
||||
return AsyncOpenAI(**async_kwargs), model
|
||||
|
||||
|
||||
@@ -1315,13 +1315,7 @@ def resolve_provider_client(
|
||||
)
|
||||
return None, None
|
||||
final_model = model or _read_main_model() or "gpt-4o-mini"
|
||||
extra = {}
|
||||
if "api.kimi.com" in custom_base.lower():
|
||||
extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"}
|
||||
elif "api.githubcopilot.com" in custom_base.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base, **extra)
|
||||
client = OpenAI(api_key=custom_key, base_url=custom_base)
|
||||
return (_to_async_client(client, final_model) if async_mode
|
||||
else (client, final_model))
|
||||
# Try custom first, then codex, then API-key providers
|
||||
@@ -1400,7 +1394,7 @@ def resolve_provider_client(
|
||||
# Provider-specific headers
|
||||
headers = {}
|
||||
if "api.kimi.com" in base_url.lower():
|
||||
headers["User-Agent"] = "KimiCLI/1.3"
|
||||
headers["User-Agent"] = "KimiCLI/1.0"
|
||||
elif "api.githubcopilot.com" in base_url.lower():
|
||||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
|
||||
@@ -18,14 +18,12 @@ import hermes_cli.auth as auth_mod
|
||||
from hermes_cli.auth import (
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
DEFAULT_AGENT_KEY_MIN_TTL_SECONDS,
|
||||
KIMI_CODE_BASE_URL,
|
||||
PROVIDER_REGISTRY,
|
||||
_codex_access_token_is_expiring,
|
||||
_decode_jwt_claims,
|
||||
_import_codex_cli_tokens,
|
||||
_load_auth_store,
|
||||
_load_provider_state,
|
||||
_resolve_kimi_base_url,
|
||||
_resolve_zai_base_url,
|
||||
read_credential_pool,
|
||||
write_credential_pool,
|
||||
@@ -513,13 +511,6 @@ class CredentialPool:
|
||||
except Exception as wexc:
|
||||
logger.debug("Failed to write refreshed token to credentials file: %s", wexc)
|
||||
elif self.provider == "openai-codex":
|
||||
# Proactively sync from ~/.codex/auth.json before refresh.
|
||||
# The Codex CLI (or another Hermes profile) may have already
|
||||
# consumed our refresh_token. Syncing first avoids a
|
||||
# "refresh_token_reused" error when the CLI has a newer pair.
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
entry.access_token,
|
||||
entry.refresh_token,
|
||||
@@ -605,35 +596,6 @@ class CredentialPool:
|
||||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For openai-codex: the refresh_token may have been consumed by
|
||||
# the Codex CLI between our proactive sync and the refresh call.
|
||||
# Re-sync and retry once.
|
||||
if self.provider == "openai-codex":
|
||||
synced = self._sync_codex_entry_from_cli(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug("Retrying Codex refresh with synced token from ~/.codex/auth.json")
|
||||
try:
|
||||
refreshed = auth_mod.refresh_codex_oauth_pure(
|
||||
synced.access_token,
|
||||
synced.refresh_token,
|
||||
)
|
||||
updated = replace(
|
||||
synced,
|
||||
access_token=refreshed["access_token"],
|
||||
refresh_token=refreshed["refresh_token"],
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as retry_exc:
|
||||
logger.debug("Codex retry refresh also failed: %s", retry_exc)
|
||||
elif not self._entry_needs_refresh(synced):
|
||||
logger.debug("Codex CLI has valid token, using without refresh")
|
||||
return synced
|
||||
self._mark_exhausted(entry, None)
|
||||
return None
|
||||
|
||||
@@ -1122,9 +1084,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
||||
active_sources.add(source)
|
||||
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
|
||||
base_url = env_url or pconfig.inference_base_url
|
||||
if provider == "kimi-coding":
|
||||
base_url = _resolve_kimi_base_url(token, pconfig.inference_base_url, env_url)
|
||||
elif provider == "zai":
|
||||
if provider == "zai":
|
||||
base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
|
||||
@@ -603,49 +603,6 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]:
|
||||
return None
|
||||
|
||||
|
||||
def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]:
|
||||
"""Detect an "output cap too large" error and return how many output tokens are available.
|
||||
|
||||
Background — two distinct context errors exist:
|
||||
1. "Prompt too long" — the INPUT itself exceeds the context window.
|
||||
Fix: compress history and/or halve context_length.
|
||||
2. "max_tokens too large" — input is fine, but input + requested_output > window.
|
||||
Fix: reduce max_tokens (the output cap) for this call.
|
||||
Do NOT touch context_length — the window hasn't shrunk.
|
||||
|
||||
Anthropic's API returns errors like:
|
||||
"max_tokens: 32768 > context_window: 200000 - input_tokens: 190000 = available_tokens: 10000"
|
||||
|
||||
Returns the number of output tokens that would fit (e.g. 10000 above), or None if
|
||||
the error does not look like a max_tokens-too-large error.
|
||||
"""
|
||||
error_lower = error_msg.lower()
|
||||
|
||||
# Must look like an output-cap error, not a prompt-length error.
|
||||
is_output_cap_error = (
|
||||
"max_tokens" in error_lower
|
||||
and ("available_tokens" in error_lower or "available tokens" in error_lower)
|
||||
)
|
||||
if not is_output_cap_error:
|
||||
return None
|
||||
|
||||
# Extract the available_tokens figure.
|
||||
# Anthropic format: "… = available_tokens: 10000"
|
||||
patterns = [
|
||||
r'available_tokens[:\s]+(\d+)',
|
||||
r'available\s+tokens[:\s]+(\d+)',
|
||||
# fallback: last number after "=" in expressions like "200000 - 190000 = 10000"
|
||||
r'=\s*(\d+)\s*$',
|
||||
]
|
||||
for pattern in patterns:
|
||||
match = re.search(pattern, error_lower)
|
||||
if match:
|
||||
tokens = int(match.group(1))
|
||||
if tokens >= 1:
|
||||
return tokens
|
||||
return None
|
||||
|
||||
|
||||
def _model_id_matches(candidate_id: str, lookup_model: str) -> bool:
|
||||
"""Return True if *candidate_id* (from server) matches *lookup_model* (configured).
|
||||
|
||||
|
||||
@@ -48,25 +48,6 @@ model:
|
||||
# api_key: "your-key-here" # Uncomment to set here instead of .env
|
||||
base_url: "https://openrouter.ai/api/v1"
|
||||
|
||||
# ── Token limits — two settings, easy to confuse ──────────────────────────
|
||||
#
|
||||
# context_length: TOTAL context window (input + output tokens combined).
|
||||
# Controls when Hermes compresses history and validates requests.
|
||||
# Leave unset — Hermes auto-detects the correct value from the provider.
|
||||
# Set manually only when auto-detection is wrong (e.g. a local server with
|
||||
# a custom num_ctx, or a proxy that doesn't expose /v1/models).
|
||||
#
|
||||
# context_length: 131072
|
||||
#
|
||||
# max_tokens: OUTPUT cap — maximum tokens the model may generate per response.
|
||||
# Unrelated to how long your conversation history can be.
|
||||
# The OpenAI-standard name "max_tokens" is a misnomer; Anthropic's native
|
||||
# API has since renamed it "max_output_tokens" for clarity.
|
||||
# Leave unset to use the model's native output ceiling (recommended).
|
||||
# Set only if you want to deliberately limit individual response length.
|
||||
#
|
||||
# max_tokens: 8192
|
||||
|
||||
# =============================================================================
|
||||
# OpenRouter Provider Routing (only applies when using OpenRouter)
|
||||
# =============================================================================
|
||||
|
||||
66
cli.py
66
cli.py
@@ -1603,12 +1603,7 @@ class HermesCLI:
|
||||
return f"[{('█' * filled) + ('░' * max(0, width - filled))}]"
|
||||
|
||||
def _get_status_bar_snapshot(self) -> Dict[str, Any]:
|
||||
# Prefer the agent's model name — it updates on fallback.
|
||||
# self.model reflects the originally configured model and never
|
||||
# changes mid-session, so the TUI would show a stale name after
|
||||
# _try_activate_fallback() switches provider/model.
|
||||
agent = getattr(self, "agent", None)
|
||||
model_name = (getattr(agent, "model", None) or self.model or "unknown")
|
||||
model_name = self.model or "unknown"
|
||||
model_short = model_name.split("/")[-1] if "/" in model_name else model_name
|
||||
if model_short.endswith(".gguf"):
|
||||
model_short = model_short[:-5]
|
||||
@@ -1634,6 +1629,7 @@ class HermesCLI:
|
||||
"compressions": 0,
|
||||
}
|
||||
|
||||
agent = getattr(self, "agent", None)
|
||||
if not agent:
|
||||
return snapshot
|
||||
|
||||
@@ -4008,7 +4004,59 @@ class HermesCLI:
|
||||
|
||||
print(" To change model or provider, use: hermes model")
|
||||
|
||||
|
||||
def _handle_prompt_command(self, cmd: str):
|
||||
"""Handle the /prompt command to view or set system prompt."""
|
||||
parts = cmd.split(maxsplit=1)
|
||||
|
||||
if len(parts) > 1:
|
||||
# Set new prompt
|
||||
new_prompt = parts[1].strip()
|
||||
|
||||
if new_prompt.lower() == "clear":
|
||||
self.system_prompt = ""
|
||||
self.agent = None # Force re-init
|
||||
if save_config_value("agent.system_prompt", ""):
|
||||
print("(^_^)b System prompt cleared (saved to config)")
|
||||
else:
|
||||
print("(^_^) System prompt cleared (session only)")
|
||||
else:
|
||||
self.system_prompt = new_prompt
|
||||
self.agent = None # Force re-init
|
||||
if save_config_value("agent.system_prompt", new_prompt):
|
||||
print("(^_^)b System prompt set (saved to config)")
|
||||
else:
|
||||
print("(^_^) System prompt set (session only)")
|
||||
print(f" \"{new_prompt[:60]}{'...' if len(new_prompt) > 60 else ''}\"")
|
||||
else:
|
||||
# Show current prompt
|
||||
print()
|
||||
print("+" + "-" * 50 + "+")
|
||||
print("|" + " " * 15 + "(^_^) System Prompt" + " " * 15 + "|")
|
||||
print("+" + "-" * 50 + "+")
|
||||
print()
|
||||
if self.system_prompt:
|
||||
# Word wrap the prompt for display
|
||||
words = self.system_prompt.split()
|
||||
lines = []
|
||||
current_line = ""
|
||||
for word in words:
|
||||
if len(current_line) + len(word) + 1 <= 50:
|
||||
current_line += (" " if current_line else "") + word
|
||||
else:
|
||||
lines.append(current_line)
|
||||
current_line = word
|
||||
if current_line:
|
||||
lines.append(current_line)
|
||||
for line in lines:
|
||||
print(f" {line}")
|
||||
else:
|
||||
print(" (no custom prompt set - using default)")
|
||||
print()
|
||||
print(" Usage:")
|
||||
print(" /prompt <text> - Set a custom system prompt")
|
||||
print(" /prompt clear - Remove custom prompt")
|
||||
print(" /personality - Use a predefined personality")
|
||||
print()
|
||||
|
||||
|
||||
@staticmethod
|
||||
@@ -4508,7 +4556,9 @@ class HermesCLI:
|
||||
self._handle_model_switch(cmd_original)
|
||||
elif canonical == "provider":
|
||||
self._show_model_and_providers()
|
||||
|
||||
elif canonical == "prompt":
|
||||
# Use original case so prompt text isn't lowercased
|
||||
self._handle_prompt_command(cmd_original)
|
||||
elif canonical == "personality":
|
||||
# Use original case (handler lowercases the personality name itself)
|
||||
self._handle_personality_command(cmd_original)
|
||||
|
||||
@@ -250,7 +250,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
|
||||
# Kimi Code Endpoint Detection
|
||||
# =============================================================================
|
||||
|
||||
# Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work
|
||||
# Kimi Code (platform.kimi.ai) issues keys prefixed "sk-kimi-" that only work
|
||||
# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on
|
||||
# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set
|
||||
# KIMI_BASE_URL explicitly.
|
||||
|
||||
@@ -87,7 +87,8 @@ COMMAND_REGISTRY: list[CommandDef] = [
|
||||
CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"),
|
||||
CommandDef("provider", "Show available providers and current provider",
|
||||
"Configuration"),
|
||||
|
||||
CommandDef("prompt", "View/set custom system prompt", "Configuration",
|
||||
cli_only=True, args_hint="[text]", subcommands=("clear",)),
|
||||
CommandDef("personality", "Set a predefined personality", "Configuration",
|
||||
args_hint="[name]"),
|
||||
CommandDef("statusbar", "Toggle the context/model status bar", "Configuration",
|
||||
|
||||
@@ -733,7 +733,6 @@ def list_authenticated_providers(
|
||||
fetch_models_dev,
|
||||
get_provider_info as _mdev_pinfo,
|
||||
)
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS
|
||||
|
||||
results: List[dict] = []
|
||||
@@ -754,16 +753,9 @@ def list_authenticated_providers(
|
||||
if not isinstance(pdata, dict):
|
||||
continue
|
||||
|
||||
# Prefer auth.py PROVIDER_REGISTRY for env var names — it's our
|
||||
# source of truth. models.dev can have wrong mappings (e.g.
|
||||
# minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY).
|
||||
pconfig = PROVIDER_REGISTRY.get(hermes_id)
|
||||
if pconfig and pconfig.api_key_env_vars:
|
||||
env_vars = list(pconfig.api_key_env_vars)
|
||||
else:
|
||||
env_vars = pdata.get("env", [])
|
||||
if not isinstance(env_vars, list):
|
||||
continue
|
||||
env_vars = pdata.get("env", [])
|
||||
if not isinstance(env_vars, list):
|
||||
continue
|
||||
|
||||
# Check if any env var is set
|
||||
has_creds = any(os.environ.get(ev) for ev in env_vars)
|
||||
|
||||
@@ -2572,120 +2572,9 @@ _OPENCLAW_SCRIPT = (
|
||||
)
|
||||
|
||||
|
||||
def _load_openclaw_migration_module():
|
||||
"""Load the openclaw_to_hermes migration script as a module.
|
||||
|
||||
Returns the loaded module, or None if the script can't be loaded.
|
||||
"""
|
||||
if not _OPENCLAW_SCRIPT.exists():
|
||||
return None
|
||||
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"openclaw_to_hermes", _OPENCLAW_SCRIPT
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
return None
|
||||
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
# Register in sys.modules so @dataclass can resolve the module
|
||||
# (Python 3.11+ requires this for dynamically loaded modules)
|
||||
import sys as _sys
|
||||
_sys.modules[spec.name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
_sys.modules.pop(spec.name, None)
|
||||
raise
|
||||
return mod
|
||||
|
||||
|
||||
# Item kinds that represent high-impact changes warranting explicit warnings.
|
||||
# Gateway tokens/channels can hijack messaging platforms from the old agent.
|
||||
# Config values may have different semantics between OpenClaw and Hermes.
|
||||
# Instruction/context files (.md) can contain incompatible setup procedures.
|
||||
_HIGH_IMPACT_KIND_KEYWORDS = {
|
||||
"gateway": "⚠ Gateway/messaging — this will configure Hermes to use your OpenClaw messaging channels",
|
||||
"telegram": "⚠ Telegram — this will point Hermes at your OpenClaw Telegram bot",
|
||||
"slack": "⚠ Slack — this will point Hermes at your OpenClaw Slack workspace",
|
||||
"discord": "⚠ Discord — this will point Hermes at your OpenClaw Discord bot",
|
||||
"whatsapp": "⚠ WhatsApp — this will point Hermes at your OpenClaw WhatsApp connection",
|
||||
"config": "⚠ Config values — OpenClaw settings may not map 1:1 to Hermes equivalents",
|
||||
"soul": "⚠ Instruction file — may contain OpenClaw-specific setup/restart procedures",
|
||||
"memory": "⚠ Memory/context file — may reference OpenClaw-specific infrastructure",
|
||||
"context": "⚠ Context file — may contain OpenClaw-specific instructions",
|
||||
}
|
||||
|
||||
|
||||
def _print_migration_preview(report: dict):
|
||||
"""Print a detailed dry-run preview of what migration would do.
|
||||
|
||||
Groups items by category and adds explicit warnings for high-impact
|
||||
changes like gateway token takeover and config value differences.
|
||||
"""
|
||||
items = report.get("items", [])
|
||||
if not items:
|
||||
print_info("Nothing to migrate.")
|
||||
return
|
||||
|
||||
migrated_items = [i for i in items if i.get("status") == "migrated"]
|
||||
conflict_items = [i for i in items if i.get("status") == "conflict"]
|
||||
skipped_items = [i for i in items if i.get("status") == "skipped"]
|
||||
|
||||
warnings_shown = set()
|
||||
|
||||
if migrated_items:
|
||||
print(color(" Would import:", Colors.GREEN))
|
||||
for item in migrated_items:
|
||||
kind = item.get("kind", "unknown")
|
||||
dest = item.get("destination", "")
|
||||
if dest:
|
||||
dest_short = str(dest).replace(str(Path.home()), "~")
|
||||
print(f" {kind:<22s} → {dest_short}")
|
||||
else:
|
||||
print(f" {kind}")
|
||||
|
||||
# Check for high-impact items and collect warnings
|
||||
kind_lower = kind.lower()
|
||||
dest_lower = str(dest).lower()
|
||||
for keyword, warning in _HIGH_IMPACT_KIND_KEYWORDS.items():
|
||||
if keyword in kind_lower or keyword in dest_lower:
|
||||
warnings_shown.add(warning)
|
||||
print()
|
||||
|
||||
if conflict_items:
|
||||
print(color(" Would overwrite (conflicts with existing Hermes config):", Colors.YELLOW))
|
||||
for item in conflict_items:
|
||||
kind = item.get("kind", "unknown")
|
||||
reason = item.get("reason", "already exists")
|
||||
print(f" {kind:<22s} {reason}")
|
||||
print()
|
||||
|
||||
if skipped_items:
|
||||
print(color(" Would skip:", Colors.DIM))
|
||||
for item in skipped_items:
|
||||
kind = item.get("kind", "unknown")
|
||||
reason = item.get("reason", "")
|
||||
print(f" {kind:<22s} {reason}")
|
||||
print()
|
||||
|
||||
# Print collected warnings
|
||||
if warnings_shown:
|
||||
print(color(" ── Warnings ──", Colors.YELLOW))
|
||||
for warning in sorted(warnings_shown):
|
||||
print(color(f" {warning}", Colors.YELLOW))
|
||||
print()
|
||||
print(color(" Note: OpenClaw config values may have different semantics in Hermes.", Colors.YELLOW))
|
||||
print(color(" For example, OpenClaw's tool_call_execution: \"auto\" ≠ Hermes's yolo mode.", Colors.YELLOW))
|
||||
print(color(" Instruction files (.md) from OpenClaw may contain incompatible procedures.", Colors.YELLOW))
|
||||
print()
|
||||
|
||||
|
||||
def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
"""Detect ~/.openclaw and offer to migrate during first-time setup.
|
||||
|
||||
Runs a dry-run first to show the user exactly what would be imported,
|
||||
overwritten, or taken over. Only executes after explicit confirmation.
|
||||
|
||||
Returns True if migration ran successfully, False otherwise.
|
||||
"""
|
||||
openclaw_dir = Path.home() / ".openclaw"
|
||||
@@ -2698,12 +2587,12 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
print()
|
||||
print_header("OpenClaw Installation Detected")
|
||||
print_info(f"Found OpenClaw data at {openclaw_dir}")
|
||||
print_info("Hermes can preview what would be imported before making any changes.")
|
||||
print_info("Hermes can import your settings, memories, skills, and API keys.")
|
||||
print()
|
||||
|
||||
if not prompt_yes_no("Would you like to see what can be imported?", default=True):
|
||||
if not prompt_yes_no("Would you like to import from OpenClaw?", default=True):
|
||||
print_info(
|
||||
"Skipping migration. You can run it later with: hermes claw migrate --dry-run"
|
||||
"Skipping migration. You can run it later via the openclaw-migration skill."
|
||||
)
|
||||
return False
|
||||
|
||||
@@ -2712,71 +2601,34 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
if not config_path.exists():
|
||||
save_config(load_config())
|
||||
|
||||
# Load the migration module
|
||||
# Dynamically load the migration script
|
||||
try:
|
||||
mod = _load_openclaw_migration_module()
|
||||
if mod is None:
|
||||
spec = importlib.util.spec_from_file_location(
|
||||
"openclaw_to_hermes", _OPENCLAW_SCRIPT
|
||||
)
|
||||
if spec is None or spec.loader is None:
|
||||
print_warning("Could not load migration script.")
|
||||
return False
|
||||
except Exception as e:
|
||||
print_warning(f"Could not load migration script: {e}")
|
||||
logger.debug("OpenClaw migration module load error", exc_info=True)
|
||||
return False
|
||||
|
||||
# ── Phase 1: Dry-run preview ──
|
||||
try:
|
||||
mod = importlib.util.module_from_spec(spec)
|
||||
# Register in sys.modules so @dataclass can resolve the module
|
||||
# (Python 3.11+ requires this for dynamically loaded modules)
|
||||
import sys as _sys
|
||||
_sys.modules[spec.name] = mod
|
||||
try:
|
||||
spec.loader.exec_module(mod)
|
||||
except Exception:
|
||||
_sys.modules.pop(spec.name, None)
|
||||
raise
|
||||
|
||||
# Run migration with the "full" preset, execute mode, no overwrite
|
||||
selected = mod.resolve_selected_options(None, None, preset="full")
|
||||
dry_migrator = mod.Migrator(
|
||||
source_root=openclaw_dir.resolve(),
|
||||
target_root=hermes_home.resolve(),
|
||||
execute=False, # dry-run — no files modified
|
||||
workspace_target=None,
|
||||
overwrite=True, # show everything including conflicts
|
||||
migrate_secrets=True,
|
||||
output_dir=None,
|
||||
selected_options=selected,
|
||||
preset_name="full",
|
||||
)
|
||||
preview_report = dry_migrator.migrate()
|
||||
except Exception as e:
|
||||
print_warning(f"Migration preview failed: {e}")
|
||||
logger.debug("OpenClaw migration preview error", exc_info=True)
|
||||
return False
|
||||
|
||||
# Display the full preview
|
||||
preview_summary = preview_report.get("summary", {})
|
||||
preview_count = preview_summary.get("migrated", 0)
|
||||
|
||||
if preview_count == 0:
|
||||
print()
|
||||
print_info("Nothing to import from OpenClaw.")
|
||||
return False
|
||||
|
||||
print()
|
||||
print_header(f"Migration Preview — {preview_count} item(s) would be imported")
|
||||
print_info("No changes have been made yet. Review the list below:")
|
||||
print()
|
||||
_print_migration_preview(preview_report)
|
||||
|
||||
# ── Phase 2: Confirm and execute ──
|
||||
if not prompt_yes_no("Proceed with migration?", default=False):
|
||||
print_info(
|
||||
"Migration cancelled. You can run it later with: hermes claw migrate"
|
||||
)
|
||||
print_info(
|
||||
"Use --dry-run to preview again, or --preset minimal for a lighter import."
|
||||
)
|
||||
return False
|
||||
|
||||
# Execute the migration — overwrite=False so existing Hermes configs are
|
||||
# preserved. The user saw the preview; conflicts are skipped by default.
|
||||
try:
|
||||
migrator = mod.Migrator(
|
||||
source_root=openclaw_dir.resolve(),
|
||||
target_root=hermes_home.resolve(),
|
||||
execute=True,
|
||||
workspace_target=None,
|
||||
overwrite=False, # preserve existing Hermes config
|
||||
overwrite=True,
|
||||
migrate_secrets=True,
|
||||
output_dir=None,
|
||||
selected_options=selected,
|
||||
@@ -2788,7 +2640,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
logger.debug("OpenClaw migration error", exc_info=True)
|
||||
return False
|
||||
|
||||
# Print final summary
|
||||
# Print summary
|
||||
summary = report.get("summary", {})
|
||||
migrated = summary.get("migrated", 0)
|
||||
skipped = summary.get("skipped", 0)
|
||||
@@ -2799,7 +2651,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool:
|
||||
if migrated:
|
||||
print_success(f"Imported {migrated} item(s) from OpenClaw.")
|
||||
if conflicts:
|
||||
print_info(f"Skipped {conflicts} item(s) that already exist in Hermes (use hermes claw migrate --overwrite to force).")
|
||||
print_info(f"Skipped {conflicts} item(s) that already exist in Hermes.")
|
||||
if skipped:
|
||||
print_info(f"Skipped {skipped} item(s) (not found or unchanged).")
|
||||
if errors:
|
||||
|
||||
68
run_agent.py
68
run_agent.py
@@ -87,7 +87,6 @@ from agent.model_metadata import (
|
||||
fetch_model_metadata,
|
||||
estimate_tokens_rough, estimate_messages_tokens_rough, estimate_request_tokens_rough,
|
||||
get_next_probe_tier, parse_context_limit_from_error,
|
||||
parse_available_output_tokens_from_error,
|
||||
save_context_length, is_local_endpoint,
|
||||
query_ollama_num_ctx,
|
||||
)
|
||||
@@ -4969,21 +4968,9 @@ class AIAgent:
|
||||
# Swap OpenAI client and config in-place
|
||||
self.api_key = fb_client.api_key
|
||||
self.client = fb_client
|
||||
# Preserve provider-specific headers that
|
||||
# resolve_provider_client() may have baked into
|
||||
# fb_client via the default_headers kwarg. The OpenAI
|
||||
# SDK stores these in _custom_headers. Without this,
|
||||
# subsequent request-client rebuilds (via
|
||||
# _create_request_openai_client) drop the headers,
|
||||
# causing 403s from providers like Kimi Coding that
|
||||
# require a User-Agent sentinel.
|
||||
fb_headers = getattr(fb_client, "_custom_headers", None)
|
||||
if not fb_headers:
|
||||
fb_headers = getattr(fb_client, "default_headers", None)
|
||||
self._client_kwargs = {
|
||||
"api_key": fb_client.api_key,
|
||||
"base_url": fb_base_url,
|
||||
**({"default_headers": dict(fb_headers)} if fb_headers else {}),
|
||||
}
|
||||
|
||||
# Re-evaluate prompt caching for the new provider/model
|
||||
@@ -5398,22 +5385,15 @@ class AIAgent:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
|
||||
# Pass context_length (total input+output window) so the adapter can
|
||||
# clamp max_tokens (output cap) when the user configured a smaller
|
||||
# context window than the model's native output limit.
|
||||
# Pass context_length so the adapter can clamp max_tokens if the
|
||||
# user configured a smaller context window than the model's output limit.
|
||||
ctx_len = getattr(self, "context_compressor", None)
|
||||
ctx_len = ctx_len.context_length if ctx_len else None
|
||||
# _ephemeral_max_output_tokens is set for one call when the API
|
||||
# returns "max_tokens too large given prompt" — it caps output to
|
||||
# the available window space without touching context_length.
|
||||
ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
|
||||
if ephemeral_out is not None:
|
||||
self._ephemeral_max_output_tokens = None # consume immediately
|
||||
return build_anthropic_kwargs(
|
||||
model=self.model,
|
||||
messages=anthropic_messages,
|
||||
tools=self.tools,
|
||||
max_tokens=ephemeral_out if ephemeral_out is not None else self.max_tokens,
|
||||
max_tokens=self.max_tokens,
|
||||
reasoning_config=self.reasoning_config,
|
||||
is_oauth=self._is_anthropic_oauth,
|
||||
preserve_dots=self._anthropic_preserve_dots(),
|
||||
@@ -8314,48 +8294,6 @@ class AIAgent:
|
||||
compressor = self.context_compressor
|
||||
old_ctx = compressor.context_length
|
||||
|
||||
# ── Distinguish two very different errors ───────────
|
||||
# 1. "Prompt too long": the INPUT exceeds the context window.
|
||||
# Fix: reduce context_length + compress history.
|
||||
# 2. "max_tokens too large": input is fine, but
|
||||
# input_tokens + requested max_tokens > context_window.
|
||||
# Fix: reduce max_tokens (the OUTPUT cap) for this call.
|
||||
# Do NOT shrink context_length — the window is unchanged.
|
||||
#
|
||||
# Note: max_tokens = output token cap (one response).
|
||||
# context_length = total window (input + output combined).
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
if available_out is not None:
|
||||
# Error is purely about the output cap being too large.
|
||||
# Cap output to the available space and retry without
|
||||
# touching context_length or triggering compression.
|
||||
safe_out = max(1, available_out - 64) # small safety margin
|
||||
self._ephemeral_max_output_tokens = safe_out
|
||||
self._vprint(
|
||||
f"{self.log_prefix}⚠️ Output cap too large for current prompt — "
|
||||
f"retrying with max_tokens={safe_out:,} "
|
||||
f"(available_tokens={available_out:,}; context_length unchanged at {old_ctx:,})",
|
||||
force=True,
|
||||
)
|
||||
# Still count against compression_attempts so we don't
|
||||
# loop forever if the error keeps recurring.
|
||||
compression_attempts += 1
|
||||
if compression_attempts > max_compression_attempts:
|
||||
self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True)
|
||||
self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True)
|
||||
logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.")
|
||||
self._persist_session(messages, conversation_history)
|
||||
return {
|
||||
"messages": messages,
|
||||
"completed": False,
|
||||
"api_calls": api_call_count,
|
||||
"error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.",
|
||||
"partial": True
|
||||
}
|
||||
restart_with_compressed_messages = True
|
||||
break
|
||||
|
||||
# Error is about the INPUT being too large — reduce context_length.
|
||||
# Try to parse the actual limit from the error message
|
||||
parsed_limit = parse_context_limit_from_error(error_msg)
|
||||
if parsed_limit and parsed_limit < old_ctx:
|
||||
|
||||
@@ -249,6 +249,7 @@ Type these during an interactive chat session.
|
||||
/config Show config (CLI)
|
||||
/model [name] Show or change model
|
||||
/provider Show provider info
|
||||
/prompt [text] View/set system prompt (CLI)
|
||||
/personality [name] Set personality
|
||||
/reasoning [level] Set reasoning (none|low|medium|high|xhigh|show|hide)
|
||||
/verbose Cycle: off → new → all → verbose
|
||||
|
||||
@@ -41,7 +41,6 @@ def _attach_agent(
|
||||
session_completion_tokens=completion_tokens,
|
||||
session_total_tokens=total_tokens,
|
||||
session_api_calls=api_calls,
|
||||
get_rate_limit_state=lambda: None,
|
||||
context_compressor=SimpleNamespace(
|
||||
last_prompt_tokens=context_tokens,
|
||||
context_length=context_length,
|
||||
|
||||
@@ -38,8 +38,6 @@ def _isolate_hermes_home(tmp_path, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False)
|
||||
monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False)
|
||||
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
|
||||
# Avoid making real calls during tests if this key is set in the env files
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
|
||||
@@ -38,11 +38,10 @@ def _make_timeout_error() -> httpx.TimeoutException:
|
||||
# cache_image_from_url (base.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@patch("tools.url_safety.is_safe_url", return_value=True)
|
||||
class TestCacheImageFromUrl:
|
||||
"""Tests for gateway.platforms.base.cache_image_from_url"""
|
||||
|
||||
def test_success_on_first_attempt(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_success_on_first_attempt(self, tmp_path, monkeypatch):
|
||||
"""A clean 200 response caches the image and returns a path."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -66,7 +65,7 @@ class TestCacheImageFromUrl:
|
||||
assert path.endswith(".jpg")
|
||||
mock_client.get.assert_called_once()
|
||||
|
||||
def test_retries_on_timeout_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
|
||||
"""A timeout on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -96,7 +95,7 @@ class TestCacheImageFromUrl:
|
||||
assert mock_client.get.call_count == 2
|
||||
mock_sleep.assert_called_once()
|
||||
|
||||
def test_retries_on_429_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_retries_on_429_then_succeeds(self, tmp_path, monkeypatch):
|
||||
"""A 429 response on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -123,7 +122,7 @@ class TestCacheImageFromUrl:
|
||||
assert path.endswith(".jpg")
|
||||
assert mock_client.get.call_count == 2
|
||||
|
||||
def test_raises_after_max_retries_exhausted(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_raises_after_max_retries_exhausted(self, tmp_path, monkeypatch):
|
||||
"""Timeout on every attempt raises after all retries are consumed."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -146,7 +145,7 @@ class TestCacheImageFromUrl:
|
||||
# 3 total calls: initial + 2 retries
|
||||
assert mock_client.get.call_count == 3
|
||||
|
||||
def test_non_retryable_4xx_raises_immediately(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_non_retryable_4xx_raises_immediately(self, tmp_path, monkeypatch):
|
||||
"""A 404 (non-retryable) is raised immediately without any retry."""
|
||||
monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img")
|
||||
|
||||
@@ -176,11 +175,10 @@ class TestCacheImageFromUrl:
|
||||
# cache_audio_from_url (base.py)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@patch("tools.url_safety.is_safe_url", return_value=True)
|
||||
class TestCacheAudioFromUrl:
|
||||
"""Tests for gateway.platforms.base.cache_audio_from_url"""
|
||||
|
||||
def test_success_on_first_attempt(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_success_on_first_attempt(self, tmp_path, monkeypatch):
|
||||
"""A clean 200 response caches the audio and returns a path."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -204,7 +202,7 @@ class TestCacheAudioFromUrl:
|
||||
assert path.endswith(".ogg")
|
||||
mock_client.get.assert_called_once()
|
||||
|
||||
def test_retries_on_timeout_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch):
|
||||
"""A timeout on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -234,7 +232,7 @@ class TestCacheAudioFromUrl:
|
||||
assert mock_client.get.call_count == 2
|
||||
mock_sleep.assert_called_once()
|
||||
|
||||
def test_retries_on_429_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_retries_on_429_then_succeeds(self, tmp_path, monkeypatch):
|
||||
"""A 429 response on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -261,7 +259,7 @@ class TestCacheAudioFromUrl:
|
||||
assert path.endswith(".ogg")
|
||||
assert mock_client.get.call_count == 2
|
||||
|
||||
def test_retries_on_500_then_succeeds(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_retries_on_500_then_succeeds(self, tmp_path, monkeypatch):
|
||||
"""A 500 response on the first attempt is retried; second attempt succeeds."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -288,7 +286,7 @@ class TestCacheAudioFromUrl:
|
||||
assert path.endswith(".ogg")
|
||||
assert mock_client.get.call_count == 2
|
||||
|
||||
def test_raises_after_max_retries_exhausted(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_raises_after_max_retries_exhausted(self, tmp_path, monkeypatch):
|
||||
"""Timeout on every attempt raises after all retries are consumed."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
@@ -311,7 +309,7 @@ class TestCacheAudioFromUrl:
|
||||
# 3 total calls: initial + 2 retries
|
||||
assert mock_client.get.call_count == 3
|
||||
|
||||
def test_non_retryable_4xx_raises_immediately(self, _mock_safe, tmp_path, monkeypatch):
|
||||
def test_non_retryable_4xx_raises_immediately(self, tmp_path, monkeypatch):
|
||||
"""A 404 (non-retryable) is raised immediately without any retry."""
|
||||
monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio")
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import base64
|
||||
import os
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from unittest.mock import AsyncMock
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -355,8 +355,7 @@ class TestMediaUpload:
|
||||
assert calls[3][1]["chunk_index"] == 2
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@patch("tools.url_safety.is_safe_url", return_value=True)
|
||||
async def test_download_remote_bytes_rejects_large_content_length(self, _mock_safe):
|
||||
async def test_download_remote_bytes_rejects_large_content_length(self):
|
||||
from gateway.platforms.wecom import WeComAdapter
|
||||
|
||||
class FakeResponse:
|
||||
|
||||
@@ -628,21 +628,14 @@ class TestHasAnyProviderConfigured:
|
||||
def test_claude_code_creds_ignored_on_fresh_install(self, monkeypatch, tmp_path):
|
||||
"""Claude Code credentials should NOT skip the wizard when Hermes is unconfigured."""
|
||||
from hermes_cli import config as config_module
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
|
||||
monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
|
||||
# Clear all provider env vars so earlier checks don't short-circuit
|
||||
_all_vars = {"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"}
|
||||
for pconfig in PROVIDER_REGISTRY.values():
|
||||
if pconfig.auth_type == "api_key":
|
||||
_all_vars.update(pconfig.api_key_env_vars)
|
||||
for var in _all_vars:
|
||||
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
# Prevent gh-cli / copilot auth fallback from leaking in
|
||||
monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda _pid: {})
|
||||
# Simulate valid Claude Code credentials
|
||||
monkeypatch.setattr(
|
||||
"agent.anthropic_adapter.read_claude_code_credentials",
|
||||
@@ -717,7 +710,6 @@ class TestHasAnyProviderConfigured:
|
||||
"""config.yaml model dict with empty default and no creds stays false."""
|
||||
import yaml
|
||||
from hermes_cli import config as config_module
|
||||
from hermes_cli.auth import PROVIDER_REGISTRY
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_file = hermes_home / "config.yaml"
|
||||
@@ -727,15 +719,9 @@ class TestHasAnyProviderConfigured:
|
||||
monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env")
|
||||
monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
_all_vars = {"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"}
|
||||
for pconfig in PROVIDER_REGISTRY.values():
|
||||
if pconfig.auth_type == "api_key":
|
||||
_all_vars.update(pconfig.api_key_env_vars)
|
||||
for var in _all_vars:
|
||||
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY",
|
||||
"ANTHROPIC_TOKEN", "OPENAI_BASE_URL"):
|
||||
monkeypatch.delenv(var, raising=False)
|
||||
# Prevent gh-cli / copilot auth fallback from leaking in
|
||||
monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda _pid: {})
|
||||
from hermes_cli.main import _has_any_provider_configured
|
||||
assert _has_any_provider_configured() is False
|
||||
|
||||
@@ -955,10 +941,9 @@ class TestHuggingFaceModels:
|
||||
"""Every HF model should have a context length entry."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS
|
||||
lower_keys = {k.lower() for k in DEFAULT_CONTEXT_LENGTHS}
|
||||
hf_models = _PROVIDER_MODELS["huggingface"]
|
||||
for model in hf_models:
|
||||
assert model.lower() in lower_keys, (
|
||||
assert model in DEFAULT_CONTEXT_LENGTHS, (
|
||||
f"HF model {model!r} missing from DEFAULT_CONTEXT_LENGTHS"
|
||||
)
|
||||
|
||||
|
||||
@@ -425,8 +425,8 @@ class TestSlashCommandCompleter:
|
||||
class TestSubcommands:
|
||||
def test_explicit_subcommands_extracted(self):
|
||||
"""Commands with explicit subcommands on CommandDef are extracted."""
|
||||
assert "/skills" in SUBCOMMANDS
|
||||
assert "install" in SUBCOMMANDS["/skills"]
|
||||
assert "/prompt" in SUBCOMMANDS
|
||||
assert "clear" in SUBCOMMANDS["/prompt"]
|
||||
|
||||
def test_reasoning_has_subcommands(self):
|
||||
assert "/reasoning" in SUBCOMMANDS
|
||||
|
||||
@@ -44,7 +44,7 @@ class TestOfferOpenclawMigration:
|
||||
assert setup_mod._offer_openclaw_migration(tmp_path / ".hermes") is False
|
||||
|
||||
def test_runs_migration_when_user_accepts(self, tmp_path):
|
||||
"""Should run dry-run preview first, then execute after confirmation."""
|
||||
"""Should dynamically load the script and run the Migrator."""
|
||||
openclaw_dir = tmp_path / ".openclaw"
|
||||
openclaw_dir.mkdir()
|
||||
|
||||
@@ -60,7 +60,6 @@ class TestOfferOpenclawMigration:
|
||||
fake_migrator = MagicMock()
|
||||
fake_migrator.migrate.return_value = {
|
||||
"summary": {"migrated": 3, "skipped": 1, "conflict": 0, "error": 0},
|
||||
"items": [{"kind": "config", "status": "migrated", "destination": "/tmp/x"}],
|
||||
"output_dir": str(hermes_home / "migration"),
|
||||
}
|
||||
fake_mod.Migrator = MagicMock(return_value=fake_migrator)
|
||||
@@ -71,7 +70,6 @@ class TestOfferOpenclawMigration:
|
||||
with (
|
||||
patch("hermes_cli.setup.Path.home", return_value=tmp_path),
|
||||
patch.object(setup_mod, "_OPENCLAW_SCRIPT", script),
|
||||
# Both prompts answered Yes: preview offer + proceed confirmation
|
||||
patch.object(setup_mod, "prompt_yes_no", return_value=True),
|
||||
patch.object(setup_mod, "get_config_path", return_value=config_path),
|
||||
patch("importlib.util.spec_from_file_location") as mock_spec_fn,
|
||||
@@ -93,75 +91,13 @@ class TestOfferOpenclawMigration:
|
||||
fake_mod.resolve_selected_options.assert_called_once_with(
|
||||
None, None, preset="full"
|
||||
)
|
||||
# Migrator called twice: once for dry-run preview, once for execution
|
||||
assert fake_mod.Migrator.call_count == 2
|
||||
|
||||
# First call: dry-run preview (execute=False, overwrite=True to show all)
|
||||
preview_kwargs = fake_mod.Migrator.call_args_list[0][1]
|
||||
assert preview_kwargs["execute"] is False
|
||||
assert preview_kwargs["overwrite"] is True
|
||||
assert preview_kwargs["migrate_secrets"] is True
|
||||
assert preview_kwargs["preset_name"] == "full"
|
||||
|
||||
# Second call: actual execution (execute=True, overwrite=False to preserve)
|
||||
exec_kwargs = fake_mod.Migrator.call_args_list[1][1]
|
||||
assert exec_kwargs["execute"] is True
|
||||
assert exec_kwargs["overwrite"] is False
|
||||
assert exec_kwargs["migrate_secrets"] is True
|
||||
assert exec_kwargs["preset_name"] == "full"
|
||||
|
||||
# migrate() called twice (once per Migrator instance)
|
||||
assert fake_migrator.migrate.call_count == 2
|
||||
|
||||
def test_user_declines_after_preview(self, tmp_path):
|
||||
"""Should return False when user sees preview but declines to proceed."""
|
||||
openclaw_dir = tmp_path / ".openclaw"
|
||||
openclaw_dir.mkdir()
|
||||
|
||||
hermes_home = tmp_path / ".hermes"
|
||||
hermes_home.mkdir()
|
||||
config_path = hermes_home / "config.yaml"
|
||||
config_path.write_text("agent:\n max_turns: 90\n")
|
||||
|
||||
fake_mod = ModuleType("openclaw_to_hermes")
|
||||
fake_mod.resolve_selected_options = MagicMock(return_value={"soul", "memory"})
|
||||
fake_migrator = MagicMock()
|
||||
fake_migrator.migrate.return_value = {
|
||||
"summary": {"migrated": 3, "skipped": 0, "conflict": 0, "error": 0},
|
||||
"items": [{"kind": "config", "status": "migrated", "destination": "/tmp/x"}],
|
||||
}
|
||||
fake_mod.Migrator = MagicMock(return_value=fake_migrator)
|
||||
|
||||
script = tmp_path / "openclaw_to_hermes.py"
|
||||
script.write_text("# placeholder")
|
||||
|
||||
# First prompt (preview): Yes, Second prompt (proceed): No
|
||||
prompt_responses = iter([True, False])
|
||||
|
||||
with (
|
||||
patch("hermes_cli.setup.Path.home", return_value=tmp_path),
|
||||
patch.object(setup_mod, "_OPENCLAW_SCRIPT", script),
|
||||
patch.object(setup_mod, "prompt_yes_no", side_effect=prompt_responses),
|
||||
patch.object(setup_mod, "get_config_path", return_value=config_path),
|
||||
patch("importlib.util.spec_from_file_location") as mock_spec_fn,
|
||||
):
|
||||
mock_spec = MagicMock()
|
||||
mock_spec.loader = MagicMock()
|
||||
mock_spec_fn.return_value = mock_spec
|
||||
|
||||
def exec_module(mod):
|
||||
mod.resolve_selected_options = fake_mod.resolve_selected_options
|
||||
mod.Migrator = fake_mod.Migrator
|
||||
|
||||
mock_spec.loader.exec_module = exec_module
|
||||
|
||||
result = setup_mod._offer_openclaw_migration(hermes_home)
|
||||
|
||||
assert result is False
|
||||
# Only dry-run Migrator was created, not the execute one
|
||||
assert fake_mod.Migrator.call_count == 1
|
||||
preview_kwargs = fake_mod.Migrator.call_args[1]
|
||||
assert preview_kwargs["execute"] is False
|
||||
fake_mod.Migrator.assert_called_once()
|
||||
call_kwargs = fake_mod.Migrator.call_args[1]
|
||||
assert call_kwargs["execute"] is True
|
||||
assert call_kwargs["overwrite"] is True
|
||||
assert call_kwargs["migrate_secrets"] is True
|
||||
assert call_kwargs["preset_name"] == "full"
|
||||
fake_migrator.migrate.assert_called_once()
|
||||
|
||||
def test_handles_migration_error_gracefully(self, tmp_path):
|
||||
"""Should catch exceptions and return False."""
|
||||
|
||||
@@ -354,14 +354,6 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
|
||||
lambda *args, **kwargs: {"web", "image_gen", "tts", "browser"},
|
||||
)
|
||||
monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
|
||||
# Prevent leaked platform tokens (e.g. DISCORD_BOT_TOKEN from gateway.run
|
||||
# import) from adding extra platforms. The loop in tools_command runs
|
||||
# apply_nous_managed_defaults per platform; a second iteration sees values
|
||||
# set by the first as "explicit" and skips them.
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.tools_config._get_enabled_platforms",
|
||||
lambda: ["cli"],
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"hermes_cli.nous_subscription.get_nous_auth_status",
|
||||
lambda: {"logged_in": True},
|
||||
|
||||
@@ -368,9 +368,6 @@ class TestCmdUpdateLaunchdRestart:
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "is_macos", lambda: False,
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
gateway_cli, "is_linux", lambda: True,
|
||||
)
|
||||
|
||||
mock_run.side_effect = _make_run_side_effect(
|
||||
commit_count="3",
|
||||
|
||||
@@ -1,319 +0,0 @@
|
||||
"""Tests for the context-halving bugfix.
|
||||
|
||||
Background
|
||||
----------
|
||||
When the API returns "max_tokens too large given prompt" (input is fine,
|
||||
but input_tokens + requested max_tokens > context_window), the old code
|
||||
incorrectly halved context_length via get_next_probe_tier().
|
||||
|
||||
The fix introduces:
|
||||
* parse_available_output_tokens_from_error() — detects this specific
|
||||
error class and returns the available output token budget.
|
||||
* _ephemeral_max_output_tokens on AIAgent — a one-shot override that
|
||||
caps the output for one retry without touching context_length.
|
||||
|
||||
Naming note
|
||||
-----------
|
||||
max_tokens = OUTPUT token cap (a single response).
|
||||
context_length = TOTAL context window (input + output combined).
|
||||
These are different and the old code conflated them; the fix keeps them
|
||||
separate.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch, PropertyMock
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# parse_available_output_tokens_from_error — unit tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestParseAvailableOutputTokens:
|
||||
"""Pure-function tests; no I/O required."""
|
||||
|
||||
def _parse(self, msg):
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
return parse_available_output_tokens_from_error(msg)
|
||||
|
||||
# ── Should detect and extract ────────────────────────────────────────
|
||||
|
||||
def test_anthropic_canonical_format(self):
|
||||
"""Canonical Anthropic error: max_tokens: X > context_window: Y - input_tokens: Z = available_tokens: W"""
|
||||
msg = (
|
||||
"max_tokens: 32768 > context_window: 200000 "
|
||||
"- input_tokens: 190000 = available_tokens: 10000"
|
||||
)
|
||||
assert self._parse(msg) == 10000
|
||||
|
||||
def test_anthropic_format_large_numbers(self):
|
||||
msg = (
|
||||
"max_tokens: 128000 > context_window: 200000 "
|
||||
"- input_tokens: 180000 = available_tokens: 20000"
|
||||
)
|
||||
assert self._parse(msg) == 20000
|
||||
|
||||
def test_available_tokens_variant_spacing(self):
|
||||
"""Handles extra spaces around the colon."""
|
||||
msg = "max_tokens: 32768 > 200000 available_tokens : 5000"
|
||||
assert self._parse(msg) == 5000
|
||||
|
||||
def test_available_tokens_natural_language(self):
|
||||
"""'available tokens: N' wording (no underscore)."""
|
||||
msg = "max_tokens must be at most 10000 given your prompt (available tokens: 10000)"
|
||||
assert self._parse(msg) == 10000
|
||||
|
||||
def test_single_token_available(self):
|
||||
"""Edge case: only 1 token left."""
|
||||
msg = "max_tokens: 9999 > context_window: 10000 - input_tokens: 9999 = available_tokens: 1"
|
||||
assert self._parse(msg) == 1
|
||||
|
||||
# ── Should NOT detect (returns None) ─────────────────────────────────
|
||||
|
||||
def test_prompt_too_long_is_not_output_cap_error(self):
|
||||
"""'prompt is too long' errors must NOT be caught — they need context halving."""
|
||||
msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_generic_context_window_exceeded(self):
|
||||
"""Generic context window errors without available_tokens should not match."""
|
||||
msg = "context window exceeded: maximum is 32768 tokens"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_context_length_exceeded(self):
|
||||
msg = "context_length_exceeded: prompt has 131073 tokens, limit is 131072"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_no_max_tokens_keyword(self):
|
||||
"""Error not related to max_tokens at all."""
|
||||
msg = "invalid_api_key: the API key is invalid"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
def test_empty_string(self):
|
||||
assert self._parse("") is None
|
||||
|
||||
def test_rate_limit_error(self):
|
||||
msg = "rate_limit_error: too many requests per minute"
|
||||
assert self._parse(msg) is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_anthropic_kwargs — output cap clamping
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildAnthropicKwargsClamping:
|
||||
"""The context_length clamp only fires when output ceiling > window.
|
||||
For standard Anthropic models (output ceiling < window) it must not fire.
|
||||
"""
|
||||
|
||||
def _build(self, model, max_tokens=None, context_length=None):
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
return build_anthropic_kwargs(
|
||||
model=model,
|
||||
messages=[{"role": "user", "content": "hi"}],
|
||||
tools=None,
|
||||
max_tokens=max_tokens,
|
||||
reasoning_config=None,
|
||||
context_length=context_length,
|
||||
)
|
||||
|
||||
def test_no_clamping_when_output_ceiling_fits_in_window(self):
|
||||
"""Opus 4.6 native output (128K) < context window (200K) — no clamping."""
|
||||
kwargs = self._build("claude-opus-4-6", context_length=200_000)
|
||||
assert kwargs["max_tokens"] == 128_000
|
||||
|
||||
def test_clamping_fires_for_tiny_custom_window(self):
|
||||
"""When context_length is 8K (local model), output cap is clamped to 7999."""
|
||||
kwargs = self._build("claude-opus-4-6", context_length=8_000)
|
||||
assert kwargs["max_tokens"] == 7_999
|
||||
|
||||
def test_explicit_max_tokens_respected_when_within_window(self):
|
||||
"""Explicit max_tokens smaller than window passes through unchanged."""
|
||||
kwargs = self._build("claude-opus-4-6", max_tokens=4096, context_length=200_000)
|
||||
assert kwargs["max_tokens"] == 4096
|
||||
|
||||
def test_explicit_max_tokens_clamped_when_exceeds_window(self):
|
||||
"""Explicit max_tokens larger than a small window is clamped."""
|
||||
kwargs = self._build("claude-opus-4-6", max_tokens=32_768, context_length=16_000)
|
||||
assert kwargs["max_tokens"] == 15_999
|
||||
|
||||
def test_no_context_length_uses_native_ceiling(self):
|
||||
"""Without context_length the native output ceiling is used directly."""
|
||||
kwargs = self._build("claude-sonnet-4-6")
|
||||
assert kwargs["max_tokens"] == 64_000
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Ephemeral max_tokens mechanism — _build_api_kwargs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestEphemeralMaxOutputTokens:
|
||||
"""_build_api_kwargs consumes _ephemeral_max_output_tokens exactly once
|
||||
and falls back to self.max_tokens on subsequent calls.
|
||||
"""
|
||||
|
||||
def _make_agent(self):
|
||||
"""Return a minimal AIAgent with api_mode='anthropic_messages' and
|
||||
a stubbed context_compressor, bypassing full __init__ cost."""
|
||||
from run_agent import AIAgent
|
||||
agent = object.__new__(AIAgent)
|
||||
# Minimal attributes used by _build_api_kwargs
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.model = "claude-opus-4-6"
|
||||
agent.tools = []
|
||||
agent.max_tokens = None
|
||||
agent.reasoning_config = None
|
||||
agent._is_anthropic_oauth = False
|
||||
agent._ephemeral_max_output_tokens = None
|
||||
|
||||
compressor = MagicMock()
|
||||
compressor.context_length = 200_000
|
||||
agent.context_compressor = compressor
|
||||
|
||||
# Stub out the internal message-preparation helper
|
||||
agent._prepare_anthropic_messages_for_api = MagicMock(
|
||||
return_value=[{"role": "user", "content": "hi"}]
|
||||
)
|
||||
agent._anthropic_preserve_dots = MagicMock(return_value=False)
|
||||
return agent
|
||||
|
||||
def test_ephemeral_override_is_used_on_first_call(self):
|
||||
"""When _ephemeral_max_output_tokens is set, it overrides self.max_tokens."""
|
||||
agent = self._make_agent()
|
||||
agent._ephemeral_max_output_tokens = 5_000
|
||||
|
||||
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert kwargs["max_tokens"] == 5_000
|
||||
|
||||
def test_ephemeral_override_is_consumed_after_one_call(self):
|
||||
"""After one call the ephemeral override is cleared to None."""
|
||||
agent = self._make_agent()
|
||||
agent._ephemeral_max_output_tokens = 5_000
|
||||
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert agent._ephemeral_max_output_tokens is None
|
||||
|
||||
def test_subsequent_call_uses_self_max_tokens(self):
|
||||
"""A second _build_api_kwargs call uses the normal max_tokens path."""
|
||||
agent = self._make_agent()
|
||||
agent._ephemeral_max_output_tokens = 5_000
|
||||
agent.max_tokens = None # will resolve to native ceiling (128K for Opus 4.6)
|
||||
|
||||
agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
# Second call — ephemeral is gone
|
||||
kwargs2 = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert kwargs2["max_tokens"] == 128_000 # Opus 4.6 native ceiling
|
||||
|
||||
def test_no_ephemeral_uses_self_max_tokens_directly(self):
|
||||
"""Without an ephemeral override, self.max_tokens is used normally."""
|
||||
agent = self._make_agent()
|
||||
agent.max_tokens = 8_192
|
||||
|
||||
kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}])
|
||||
assert kwargs["max_tokens"] == 8_192
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Integration: error handler does NOT halve context_length for output-cap errors
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestContextNotHalvedOnOutputCapError:
|
||||
"""When the API returns 'max_tokens too large given prompt', the handler
|
||||
must set _ephemeral_max_output_tokens and NOT modify context_length.
|
||||
"""
|
||||
|
||||
def _make_agent_with_compressor(self, context_length=200_000):
|
||||
from run_agent import AIAgent
|
||||
from agent.context_compressor import ContextCompressor
|
||||
|
||||
agent = object.__new__(AIAgent)
|
||||
agent.api_mode = "anthropic_messages"
|
||||
agent.model = "claude-opus-4-6"
|
||||
agent.base_url = "https://api.anthropic.com"
|
||||
agent.tools = []
|
||||
agent.max_tokens = None
|
||||
agent.reasoning_config = None
|
||||
agent._is_anthropic_oauth = False
|
||||
agent._ephemeral_max_output_tokens = None
|
||||
agent.log_prefix = ""
|
||||
agent.quiet_mode = True
|
||||
agent.verbose_logging = False
|
||||
|
||||
compressor = MagicMock(spec=ContextCompressor)
|
||||
compressor.context_length = context_length
|
||||
compressor.threshold_percent = 0.75
|
||||
agent.context_compressor = compressor
|
||||
|
||||
agent._prepare_anthropic_messages_for_api = MagicMock(
|
||||
return_value=[{"role": "user", "content": "hi"}]
|
||||
)
|
||||
agent._anthropic_preserve_dots = MagicMock(return_value=False)
|
||||
agent._vprint = MagicMock()
|
||||
return agent
|
||||
|
||||
def test_output_cap_error_sets_ephemeral_not_context_length(self):
|
||||
"""On 'max_tokens too large' error, _ephemeral_max_output_tokens is set
|
||||
and compressor.context_length is left unchanged."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
|
||||
error_msg = (
|
||||
"max_tokens: 128000 > context_window: 200000 "
|
||||
"- input_tokens: 180000 = available_tokens: 20000"
|
||||
)
|
||||
|
||||
# Simulate the handler logic from run_agent.py
|
||||
agent = self._make_agent_with_compressor(context_length=200_000)
|
||||
old_ctx = agent.context_compressor.context_length
|
||||
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
assert available_out == 20_000, "parser must detect the error"
|
||||
|
||||
# The fix: set ephemeral, skip context_length modification
|
||||
agent._ephemeral_max_output_tokens = max(1, available_out - 64)
|
||||
|
||||
# context_length must be untouched
|
||||
assert agent.context_compressor.context_length == old_ctx
|
||||
assert agent._ephemeral_max_output_tokens == 19_936
|
||||
|
||||
def test_prompt_too_long_still_triggers_probe_tier(self):
|
||||
"""Genuine prompt-too-long errors must still use get_next_probe_tier."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
from agent.model_metadata import get_next_probe_tier
|
||||
|
||||
error_msg = "prompt is too long: 205000 tokens > 200000 maximum"
|
||||
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
assert available_out is None, "prompt-too-long must not be caught by output-cap parser"
|
||||
|
||||
# The old halving path is still used for this class of error
|
||||
new_ctx = get_next_probe_tier(200_000)
|
||||
assert new_ctx == 128_000
|
||||
|
||||
def test_output_cap_error_safety_margin(self):
|
||||
"""The ephemeral value includes a 64-token safety margin below available_out."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
|
||||
error_msg = (
|
||||
"max_tokens: 32768 > context_window: 200000 "
|
||||
"- input_tokens: 190000 = available_tokens: 10000"
|
||||
)
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
safe_out = max(1, available_out - 64)
|
||||
assert safe_out == 9_936
|
||||
|
||||
def test_safety_margin_never_goes_below_one(self):
|
||||
"""When available_out is very small, safe_out must be at least 1."""
|
||||
from agent.model_metadata import parse_available_output_tokens_from_error
|
||||
|
||||
error_msg = (
|
||||
"max_tokens: 10 > context_window: 200000 "
|
||||
"- input_tokens: 199990 = available_tokens: 1"
|
||||
)
|
||||
available_out = parse_available_output_tokens_from_error(error_msg)
|
||||
safe_out = max(1, available_out - 64)
|
||||
assert safe_out == 1
|
||||
@@ -63,4 +63,4 @@ class TestCamofoxConfigDefaults:
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# managed_persistence is auto-merged by _deep_merge, no version bump needed
|
||||
assert DEFAULT_CONFIG["_config_version"] == 13
|
||||
assert DEFAULT_CONFIG["_config_version"] == 12
|
||||
|
||||
@@ -258,30 +258,28 @@ def _make_execute_only_env(forward_env=None):
|
||||
|
||||
def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch):
|
||||
"""_build_init_env_args picks up forwarded env vars from .env file at init time."""
|
||||
# Use a var that is NOT in _HERMES_PROVIDER_ENV_BLOCKLIST (GITHUB_TOKEN
|
||||
# is in the copilot provider's api_key_env_vars and gets stripped).
|
||||
env = _make_execute_only_env(["DATABASE_URL"])
|
||||
env = _make_execute_only_env(["GITHUB_TOKEN"])
|
||||
|
||||
monkeypatch.delenv("DATABASE_URL", raising=False)
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})
|
||||
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"})
|
||||
|
||||
args = env._build_init_env_args()
|
||||
args_str = " ".join(args)
|
||||
|
||||
assert "DATABASE_URL=value_from_dotenv" in args_str
|
||||
assert "GITHUB_TOKEN=value_from_dotenv" in args_str
|
||||
|
||||
|
||||
def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch):
|
||||
"""Shell env vars take priority over .env file values in init env args."""
|
||||
env = _make_execute_only_env(["DATABASE_URL"])
|
||||
env = _make_execute_only_env(["GITHUB_TOKEN"])
|
||||
|
||||
monkeypatch.setenv("DATABASE_URL", "value_from_shell")
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"})
|
||||
monkeypatch.setenv("GITHUB_TOKEN", "value_from_shell")
|
||||
monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"})
|
||||
|
||||
args = env._build_init_env_args()
|
||||
args_str = " ".join(args)
|
||||
|
||||
assert "DATABASE_URL=value_from_shell" in args_str
|
||||
assert "GITHUB_TOKEN=value_from_shell" in args_str
|
||||
assert "value_from_dotenv" not in args_str
|
||||
|
||||
|
||||
|
||||
@@ -147,7 +147,7 @@ class TestBaseEnvCompatibility:
|
||||
"""Hermes wires parser selection through ServerManager.tool_parser."""
|
||||
import ast
|
||||
|
||||
base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py"
|
||||
base_env_path = Path(__file__).parent.parent / "environments" / "hermes_base_env.py"
|
||||
source = base_env_path.read_text()
|
||||
tree = ast.parse(source)
|
||||
|
||||
@@ -171,7 +171,7 @@ class TestBaseEnvCompatibility:
|
||||
|
||||
def test_hermes_base_env_uses_config_tool_call_parser(self):
|
||||
"""Verify hermes_base_env uses the config field rather than a local parser instance."""
|
||||
base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py"
|
||||
base_env_path = Path(__file__).parent.parent / "environments" / "hermes_base_env.py"
|
||||
source = base_env_path.read_text()
|
||||
|
||||
assert 'tool_call_parser: str = Field(' in source
|
||||
|
||||
@@ -125,9 +125,7 @@ class TestSendMatrix:
|
||||
url = call_kwargs[0][0]
|
||||
assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/!room:example.com/send/m.room.message/")
|
||||
assert call_kwargs[1]["headers"]["Authorization"] == "Bearer syt_tok"
|
||||
payload = call_kwargs[1]["json"]
|
||||
assert payload["msgtype"] == "m.text"
|
||||
assert payload["body"] == "hello matrix"
|
||||
assert call_kwargs[1]["json"] == {"msgtype": "m.text", "body": "hello matrix"}
|
||||
|
||||
def test_http_error(self):
|
||||
resp = _make_aiohttp_resp(403, text_data="Forbidden")
|
||||
|
||||
@@ -30,10 +30,7 @@ class TestValidateImageUrl:
|
||||
"""Tests for URL validation, including urlparse-based netloc check."""
|
||||
|
||||
def test_valid_https_url(self):
|
||||
with patch("tools.url_safety.socket.getaddrinfo", return_value=[
|
||||
(2, 1, 6, "", ("93.184.216.34", 0)),
|
||||
]):
|
||||
assert _validate_image_url("https://example.com/image.jpg") is True
|
||||
assert _validate_image_url("https://example.com/image.jpg") is True
|
||||
|
||||
def test_valid_http_url(self):
|
||||
with patch("tools.url_safety.socket.getaddrinfo", return_value=[
|
||||
@@ -59,16 +56,10 @@ class TestValidateImageUrl:
|
||||
assert _validate_image_url("http://localhost:8080/image.png") is False
|
||||
|
||||
def test_valid_url_with_port(self):
|
||||
with patch("tools.url_safety.socket.getaddrinfo", return_value=[
|
||||
(2, 1, 6, "", ("93.184.216.34", 0)),
|
||||
]):
|
||||
assert _validate_image_url("http://example.com:8080/image.png") is True
|
||||
assert _validate_image_url("http://example.com:8080/image.png") is True
|
||||
|
||||
def test_valid_url_with_path_only(self):
|
||||
with patch("tools.url_safety.socket.getaddrinfo", return_value=[
|
||||
(2, 1, 6, "", ("93.184.216.34", 0)),
|
||||
]):
|
||||
assert _validate_image_url("https://example.com/") is True
|
||||
assert _validate_image_url("https://example.com/") is True
|
||||
|
||||
def test_rejects_empty_string(self):
|
||||
assert _validate_image_url("") is False
|
||||
@@ -450,11 +441,6 @@ class TestVisionRequirements:
|
||||
(tmp_path / "auth.json").write_text(
|
||||
'{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"codex-access-token","refresh_token":"codex-refresh-token"}}}}'
|
||||
)
|
||||
# config.yaml must reference the codex provider so vision auto-detect
|
||||
# falls back to the active provider via _read_main_provider().
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
'model:\n default: gpt-4o\n provider: openai-codex\n'
|
||||
)
|
||||
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
|
||||
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
|
||||
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
|
||||
|
||||
@@ -225,7 +225,6 @@ class TestWebCrawlTavily:
|
||||
patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \
|
||||
patch("tools.web_tools.httpx.post", return_value=mock_response), \
|
||||
patch("tools.web_tools.check_website_access", return_value=None), \
|
||||
patch("tools.web_tools.is_safe_url", return_value=True), \
|
||||
patch("tools.interrupt.is_interrupted", return_value=False):
|
||||
from tools.web_tools import web_crawl_tool
|
||||
result = json.loads(asyncio.get_event_loop().run_until_complete(
|
||||
@@ -245,7 +244,6 @@ class TestWebCrawlTavily:
|
||||
patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \
|
||||
patch("tools.web_tools.httpx.post", return_value=mock_response) as mock_post, \
|
||||
patch("tools.web_tools.check_website_access", return_value=None), \
|
||||
patch("tools.web_tools.is_safe_url", return_value=True), \
|
||||
patch("tools.interrupt.is_interrupted", return_value=False):
|
||||
from tools.web_tools import web_crawl_tool
|
||||
asyncio.get_event_loop().run_until_complete(
|
||||
|
||||
@@ -657,8 +657,8 @@ model:
|
||||
#### Responses get cut off mid-sentence
|
||||
|
||||
**Possible causes:**
|
||||
1. **Low output cap (`max_tokens`) on the server** — SGLang defaults to 128 tokens per response. Set `--default-max-tokens` on the server or configure Hermes with `model.max_tokens` in config.yaml. Note: `max_tokens` controls response length only — it is unrelated to how long your conversation history can be (that is `context_length`).
|
||||
2. **Context exhaustion** — The model filled its context window. Increase `model.context_length` or enable [context compression](/docs/user-guide/configuration#context-compression) in Hermes.
|
||||
1. **Low `max_tokens` on the server** — SGLang defaults to 128 tokens per response. Set `--default-max-tokens` on the server or configure Hermes with `model.max_tokens` in config.yaml.
|
||||
2. **Context exhaustion** — The model filled its context window. Increase context length or enable [context compression](/docs/user-guide/configuration#context-compression) in Hermes.
|
||||
|
||||
---
|
||||
|
||||
@@ -751,15 +751,6 @@ model:
|
||||
|
||||
### Context Length Detection
|
||||
|
||||
:::note Two settings, easy to confuse
|
||||
**`context_length`** is the **total context window** — the combined budget for input *and* output tokens (e.g. 200,000 for Claude Opus 4.6). Hermes uses this to decide when to compress history and to validate API requests.
|
||||
|
||||
**`model.max_tokens`** is the **output cap** — the maximum number of tokens the model may generate in a *single response*. It has nothing to do with how long your conversation history can be. The industry-standard name `max_tokens` is a common source of confusion; Anthropic's native API has since renamed it `max_output_tokens` for clarity.
|
||||
|
||||
Set `context_length` when auto-detection gets the window size wrong.
|
||||
Set `model.max_tokens` only when you need to limit how long individual responses can be.
|
||||
:::
|
||||
|
||||
Hermes uses a multi-source resolution chain to detect the correct context window for your model and provider:
|
||||
|
||||
1. **Config override** — `model.context_length` in config.yaml (highest priority)
|
||||
|
||||
@@ -46,6 +46,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in
|
||||
| `/config` | Show current configuration |
|
||||
| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint) |
|
||||
| `/provider` | Show available providers and current provider |
|
||||
| `/prompt` | View/set custom system prompt |
|
||||
| `/personality` | Set a predefined personality |
|
||||
| `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. |
|
||||
| `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) |
|
||||
@@ -143,7 +144,7 @@ The messaging gateway supports the following built-in commands inside Telegram,
|
||||
|
||||
## Notes
|
||||
|
||||
- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/statusbar`, and `/plugins` are **CLI-only** commands.
|
||||
- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, `/statusbar`, and `/plugins` are **CLI-only** commands.
|
||||
- `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config.
|
||||
- `/status`, `/sethome`, `/update`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands.
|
||||
- `/background`, `/voice`, `/reload-mcp`, `/rollback`, and `/yolo` work in **both** the CLI and the messaging gateway.
|
||||
|
||||
Reference in New Issue
Block a user