diff --git a/.github/workflows/docs-site-checks.yml b/.github/workflows/docs-site-checks.yml index 14cdb8f6a6..ea05d28046 100644 --- a/.github/workflows/docs-site-checks.yml +++ b/.github/workflows/docs-site-checks.yml @@ -27,8 +27,8 @@ jobs: with: python-version: '3.11' - - name: Install Python dependencies - run: python -m pip install ascii-guard pyyaml + - name: Install ascii-guard + run: python -m pip install ascii-guard==2.3.0 pyyaml==6.0.3 - name: Extract skill metadata for dashboard run: python3 website/scripts/extract-skills.py diff --git a/.github/workflows/nix.yml b/.github/workflows/nix.yml index 004f8236a2..dba33bfffc 100644 --- a/.github/workflows/nix.yml +++ b/.github/workflows/nix.yml @@ -27,8 +27,8 @@ jobs: timeout-minutes: 30 steps: - uses: actions/checkout@v4 - - uses: DeterminateSystems/nix-installer-action@main - - uses: DeterminateSystems/magic-nix-cache-action@main + - uses: DeterminateSystems/nix-installer-action@ef8a148080ab6020fd15196c2084a2eea5ff2d25 # v22 + - uses: DeterminateSystems/magic-nix-cache-action@565684385bcd71bad329742eefe8d12f2e765b39 # v13 - name: Check flake if: runner.os == 'Linux' run: nix flake check --print-build-logs diff --git a/Dockerfile b/Dockerfile index a9624530c0..0eddaba0bc 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,8 @@ FROM debian:13.4 +# Disable Python stdout buffering to ensure logs are printed immediately +ENV PYTHONUNBUFFERED=1 + # Install system dependencies in one layer, clear APT cache RUN apt-get update && \ apt-get install -y --no-install-recommends \ diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index fa5e391a4f..d5c0c06fbb 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -1238,10 +1238,27 @@ def build_anthropic_kwargs( ) -> Dict[str, Any]: """Build kwargs for anthropic.messages.create(). - When *max_tokens* is None, the model's native output limit is used - (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). If *context_length* - is provided, the effective limit is clamped so it doesn't exceed - the context window. + Naming note — two distinct concepts, easily confused: + max_tokens = OUTPUT token cap for a single response. + Anthropic's API calls this "max_tokens" but it only + limits the *output*. Anthropic's own native SDK + renamed it "max_output_tokens" for clarity. + context_length = TOTAL context window (input tokens + output tokens). + The API enforces: input_tokens + max_tokens ≤ context_length. + Stored on the ContextCompressor; reduced on overflow errors. + + When *max_tokens* is None the model's native output ceiling is used + (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). + + When *context_length* is provided and the model's native output ceiling + exceeds it (e.g. a local endpoint with an 8K window), the output cap is + clamped to context_length − 1. This only kicks in for unusually small + context windows; for full-size models the native output cap is always + smaller than the context window so no clamping happens. + NOTE: this clamping does not account for prompt size — if the prompt is + large, Anthropic may still reject the request. The caller must detect + "max_tokens too large given prompt" errors and retry with a smaller cap + (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens). When *is_oauth* is True, applies Claude Code compatibility transforms: system prompt prefix, tool name prefixing, and prompt sanitization. @@ -1256,10 +1273,14 @@ def build_anthropic_kwargs( anthropic_tools = convert_tools_to_anthropic(tools) if tools else [] model = normalize_model_name(model, preserve_dots=preserve_dots) + # effective_max_tokens = output cap for this call (≠ total context window) effective_max_tokens = max_tokens or _get_anthropic_max_output(model) - # Clamp to context window if the user set a lower context_length - # (e.g. custom endpoint with limited capacity). + # Clamp output cap to fit inside the total context window. + # Only matters for small custom endpoints where context_length < native + # output ceiling. For standard Anthropic models context_length (e.g. + # 200K) is always larger than the output ceiling (e.g. 128K), so this + # branch is not taken. if context_length and effective_max_tokens > context_length: effective_max_tokens = max(context_length - 1, 1) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 27c67c10a3..2f3a64a64c 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -702,7 +702,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model) extra = {} if "api.kimi.com" in base_url.lower(): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"} elif "api.githubcopilot.com" in base_url.lower(): from hermes_cli.models import copilot_default_headers @@ -721,7 +721,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model) extra = {} if "api.kimi.com" in base_url.lower(): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"} elif "api.githubcopilot.com" in base_url.lower(): from hermes_cli.models import copilot_default_headers @@ -1047,6 +1047,32 @@ def _is_payment_error(exc: Exception) -> bool: return False +def _is_connection_error(exc: Exception) -> bool: + """Detect connection/network errors that warrant provider fallback. + + Returns True for errors indicating the provider endpoint is unreachable + (DNS failure, connection refused, TLS errors, timeouts). These are + distinct from API errors (4xx/5xx) which indicate the provider IS + reachable but returned an error. + """ + from openai import APIConnectionError, APITimeoutError + + if isinstance(exc, (APIConnectionError, APITimeoutError)): + return True + # urllib3 / httpx / httpcore connection errors + err_type = type(exc).__name__ + if any(kw in err_type for kw in ("Connection", "Timeout", "DNS", "SSL")): + return True + err_lower = str(exc).lower() + if any(kw in err_lower for kw in ( + "connection refused", "name or service not known", + "no route to host", "network is unreachable", + "timed out", "connection reset", + )): + return True + return False + + def _try_payment_fallback( failed_provider: str, task: str = None, @@ -1169,7 +1195,7 @@ def _to_async_client(sync_client, model: str): async_kwargs["default_headers"] = copilot_default_headers() elif "api.kimi.com" in base_lower: - async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.0"} + async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.3"} return AsyncOpenAI(**async_kwargs), model @@ -1289,7 +1315,13 @@ def resolve_provider_client( ) return None, None final_model = model or _read_main_model() or "gpt-4o-mini" - client = OpenAI(api_key=custom_key, base_url=custom_base) + extra = {} + if "api.kimi.com" in custom_base.lower(): + extra["default_headers"] = {"User-Agent": "KimiCLI/1.3"} + elif "api.githubcopilot.com" in custom_base.lower(): + from hermes_cli.models import copilot_default_headers + extra["default_headers"] = copilot_default_headers() + client = OpenAI(api_key=custom_key, base_url=custom_base, **extra) return (_to_async_client(client, final_model) if async_mode else (client, final_model)) # Try custom first, then codex, then API-key providers @@ -1368,7 +1400,7 @@ def resolve_provider_client( # Provider-specific headers headers = {} if "api.kimi.com" in base_url.lower(): - headers["User-Agent"] = "KimiCLI/1.0" + headers["User-Agent"] = "KimiCLI/1.3" elif "api.githubcopilot.com" in base_url.lower(): from hermes_cli.models import copilot_default_headers @@ -2093,7 +2125,18 @@ def call_llm( # try alternative providers instead of giving up. This handles the # common case where a user runs out of OpenRouter credits but has # Codex OAuth or another provider available. - if _is_payment_error(first_err): + # + # ── Connection error fallback ──────────────────────────────── + # When a provider endpoint is unreachable (DNS failure, connection + # refused, timeout), try alternative providers. This handles stale + # Codex/OAuth tokens that authenticate but whose endpoint is down, + # and providers the user never configured that got picked up by + # the auto-detection chain. + should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) + if should_fallback: + reason = "payment error" if _is_payment_error(first_err) else "connection error" + logger.info("Auxiliary %s: %s on %s (%s), trying fallback", + task or "call", reason, resolved_provider, first_err) fb_client, fb_model, fb_label = _try_payment_fallback( resolved_provider, task) if fb_client is not None: diff --git a/agent/credential_pool.py b/agent/credential_pool.py index dd2c9abc5e..a17d71ba5e 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -18,12 +18,14 @@ import hermes_cli.auth as auth_mod from hermes_cli.auth import ( CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, DEFAULT_AGENT_KEY_MIN_TTL_SECONDS, + KIMI_CODE_BASE_URL, PROVIDER_REGISTRY, _codex_access_token_is_expiring, _decode_jwt_claims, _import_codex_cli_tokens, _load_auth_store, _load_provider_state, + _resolve_kimi_base_url, _resolve_zai_base_url, read_credential_pool, write_credential_pool, @@ -1084,7 +1086,9 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool active_sources.add(source) auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY base_url = env_url or pconfig.inference_base_url - if provider == "zai": + if provider == "kimi-coding": + base_url = _resolve_kimi_base_url(token, pconfig.inference_base_url, env_url) + elif provider == "zai": base_url = _resolve_zai_base_url(token, pconfig.inference_base_url, env_url) changed |= _upsert_entry( entries, diff --git a/agent/error_classifier.py b/agent/error_classifier.py index b227932ad7..0f1450113f 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -596,6 +596,9 @@ def _classify_400( err_obj = body.get("error", {}) if isinstance(err_obj, dict): err_body_msg = (err_obj.get("message") or "").strip().lower() + # Responses API (and some providers) use flat body: {"message": "..."} + if not err_body_msg: + err_body_msg = (body.get("message") or "").strip().lower() is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "") is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80 diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 9282586fea..791f778c22 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -603,6 +603,49 @@ def parse_context_limit_from_error(error_msg: str) -> Optional[int]: return None +def parse_available_output_tokens_from_error(error_msg: str) -> Optional[int]: + """Detect an "output cap too large" error and return how many output tokens are available. + + Background — two distinct context errors exist: + 1. "Prompt too long" — the INPUT itself exceeds the context window. + Fix: compress history and/or halve context_length. + 2. "max_tokens too large" — input is fine, but input + requested_output > window. + Fix: reduce max_tokens (the output cap) for this call. + Do NOT touch context_length — the window hasn't shrunk. + + Anthropic's API returns errors like: + "max_tokens: 32768 > context_window: 200000 - input_tokens: 190000 = available_tokens: 10000" + + Returns the number of output tokens that would fit (e.g. 10000 above), or None if + the error does not look like a max_tokens-too-large error. + """ + error_lower = error_msg.lower() + + # Must look like an output-cap error, not a prompt-length error. + is_output_cap_error = ( + "max_tokens" in error_lower + and ("available_tokens" in error_lower or "available tokens" in error_lower) + ) + if not is_output_cap_error: + return None + + # Extract the available_tokens figure. + # Anthropic format: "… = available_tokens: 10000" + patterns = [ + r'available_tokens[:\s]+(\d+)', + r'available\s+tokens[:\s]+(\d+)', + # fallback: last number after "=" in expressions like "200000 - 190000 = 10000" + r'=\s*(\d+)\s*$', + ] + for pattern in patterns: + match = re.search(pattern, error_lower) + if match: + tokens = int(match.group(1)) + if tokens >= 1: + return tokens + return None + + def _model_id_matches(candidate_id: str, lookup_model: str) -> bool: """Return True if *candidate_id* (from server) matches *lookup_model* (configured). diff --git a/cli-config.yaml.example b/cli-config.yaml.example index d75284443f..346e6e851f 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -48,6 +48,25 @@ model: # api_key: "your-key-here" # Uncomment to set here instead of .env base_url: "https://openrouter.ai/api/v1" + # ── Token limits — two settings, easy to confuse ────────────────────────── + # + # context_length: TOTAL context window (input + output tokens combined). + # Controls when Hermes compresses history and validates requests. + # Leave unset — Hermes auto-detects the correct value from the provider. + # Set manually only when auto-detection is wrong (e.g. a local server with + # a custom num_ctx, or a proxy that doesn't expose /v1/models). + # + # context_length: 131072 + # + # max_tokens: OUTPUT cap — maximum tokens the model may generate per response. + # Unrelated to how long your conversation history can be. + # The OpenAI-standard name "max_tokens" is a misnomer; Anthropic's native + # API has since renamed it "max_output_tokens" for clarity. + # Leave unset to use the model's native output ceiling (recommended). + # Set only if you want to deliberately limit individual response length. + # + # max_tokens: 8192 + # ============================================================================= # OpenRouter Provider Routing (only applies when using OpenRouter) # ============================================================================= diff --git a/cli.py b/cli.py index fa32ae9119..db956766b4 100644 --- a/cli.py +++ b/cli.py @@ -1603,7 +1603,12 @@ class HermesCLI: return f"[{('█' * filled) + ('░' * max(0, width - filled))}]" def _get_status_bar_snapshot(self) -> Dict[str, Any]: - model_name = self.model or "unknown" + # Prefer the agent's model name — it updates on fallback. + # self.model reflects the originally configured model and never + # changes mid-session, so the TUI would show a stale name after + # _try_activate_fallback() switches provider/model. + agent = getattr(self, "agent", None) + model_name = (getattr(agent, "model", None) or self.model or "unknown") model_short = model_name.split("/")[-1] if "/" in model_name else model_name if model_short.endswith(".gguf"): model_short = model_short[:-5] @@ -1629,7 +1634,6 @@ class HermesCLI: "compressions": 0, } - agent = getattr(self, "agent", None) if not agent: return snapshot @@ -4004,59 +4008,7 @@ class HermesCLI: print(" To change model or provider, use: hermes model") - def _handle_prompt_command(self, cmd: str): - """Handle the /prompt command to view or set system prompt.""" - parts = cmd.split(maxsplit=1) - - if len(parts) > 1: - # Set new prompt - new_prompt = parts[1].strip() - - if new_prompt.lower() == "clear": - self.system_prompt = "" - self.agent = None # Force re-init - if save_config_value("agent.system_prompt", ""): - print("(^_^)b System prompt cleared (saved to config)") - else: - print("(^_^) System prompt cleared (session only)") - else: - self.system_prompt = new_prompt - self.agent = None # Force re-init - if save_config_value("agent.system_prompt", new_prompt): - print("(^_^)b System prompt set (saved to config)") - else: - print("(^_^) System prompt set (session only)") - print(f" \"{new_prompt[:60]}{'...' if len(new_prompt) > 60 else ''}\"") - else: - # Show current prompt - print() - print("+" + "-" * 50 + "+") - print("|" + " " * 15 + "(^_^) System Prompt" + " " * 15 + "|") - print("+" + "-" * 50 + "+") - print() - if self.system_prompt: - # Word wrap the prompt for display - words = self.system_prompt.split() - lines = [] - current_line = "" - for word in words: - if len(current_line) + len(word) + 1 <= 50: - current_line += (" " if current_line else "") + word - else: - lines.append(current_line) - current_line = word - if current_line: - lines.append(current_line) - for line in lines: - print(f" {line}") - else: - print(" (no custom prompt set - using default)") - print() - print(" Usage:") - print(" /prompt - Set a custom system prompt") - print(" /prompt clear - Remove custom prompt") - print(" /personality - Use a predefined personality") - print() + @staticmethod @@ -4556,9 +4508,7 @@ class HermesCLI: self._handle_model_switch(cmd_original) elif canonical == "provider": self._show_model_and_providers() - elif canonical == "prompt": - # Use original case so prompt text isn't lowercased - self._handle_prompt_command(cmd_original) + elif canonical == "personality": # Use original case (handler lowercases the personality name itself) self._handle_personality_command(cmd_original) diff --git a/flake.lock b/flake.lock index 628e492f65..78ceba92d7 100644 --- a/flake.lock +++ b/flake.lock @@ -22,16 +22,16 @@ }, "nixpkgs": { "locked": { - "lastModified": 1751274312, - "narHash": "sha256-/bVBlRpECLVzjV19t5KMdMFWSwKLtb5RyXdjz3LJT+g=", + "lastModified": 1775036866, + "narHash": "sha256-ZojAnPuCdy657PbTq5V0Y+AHKhZAIwSIT2cb8UgAz/U=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "50ab793786d9de88ee30ec4e4c24fb4236fc2674", + "rev": "6201e203d09599479a3b3450ed24fa81537ebc4e", "type": "github" }, "original": { "owner": "NixOS", - "ref": "nixos-24.11", + "ref": "nixos-unstable", "repo": "nixpkgs", "type": "github" } diff --git a/flake.nix b/flake.nix index 87be89c85c..919fa434dc 100644 --- a/flake.nix +++ b/flake.nix @@ -2,7 +2,7 @@ description = "Hermes Agent - AI agent framework by Nous Research"; inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; flake-parts = { url = "github:hercules-ci/flake-parts"; inputs.nixpkgs-lib.follows = "nixpkgs"; diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index b7360fdd32..4d59f7dbf9 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -250,7 +250,7 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { # Kimi Code Endpoint Detection # ============================================================================= -# Kimi Code (platform.kimi.ai) issues keys prefixed "sk-kimi-" that only work +# Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work # on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on # api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set # KIMI_BASE_URL explicitly. @@ -3017,12 +3017,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: _save_provider_state(auth_store, "nous", auth_state) saved_to = _save_auth_store(auth_store) - config_path = _update_config_for_provider("nous", inference_base_url) print() print("Login successful!") print(f" Auth state: {saved_to}") - print(f" Config updated: {config_path} (model.provider=nous)") + # Resolve model BEFORE writing provider to config.yaml so we never + # leave the config in a half-updated state (provider=nous but model + # still set to the previous provider's model, e.g. opus from + # OpenRouter). The auth.json active_provider was already set above. + selected_model = None try: runtime_key = auth_state.get("agent_key") or auth_state.get("access_token") if not isinstance(runtime_key, str) or not runtime_key: @@ -3056,9 +3059,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: unavailable_models=unavailable_models, portal_url=_portal, ) - if selected_model: - _save_model_choice(selected_model) - print(f"Default model set to: {selected_model}") elif unavailable_models: _url = (_portal or DEFAULT_NOUS_PORTAL_URL).rstrip("/") print("No free models currently available.") @@ -3070,6 +3070,15 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: print() print(f"Login succeeded, but could not fetch available models. Reason: {message}") + # Write provider + model atomically so config is never mismatched. + config_path = _update_config_for_provider( + "nous", inference_base_url, default_model=selected_model, + ) + if selected_model: + _save_model_choice(selected_model) + print(f"Default model set to: {selected_model}") + print(f" Config updated: {config_path} (model.provider=nous)") + except KeyboardInterrupt: print("\nLogin cancelled.") raise SystemExit(130) diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 70d9cb8aa3..ac0f44d749 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -87,8 +87,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("model", "Switch model for this session", "Configuration", args_hint="[model] [--global]"), CommandDef("provider", "Show available providers and current provider", "Configuration"), - CommandDef("prompt", "View/set custom system prompt", "Configuration", - cli_only=True, args_hint="[text]", subcommands=("clear",)), + CommandDef("personality", "Set a predefined personality", "Configuration", args_hint="[name]"), CommandDef("statusbar", "Toggle the context/model status bar", "Configuration", diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 7d120d94f1..ef35108df0 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -733,6 +733,7 @@ def list_authenticated_providers( fetch_models_dev, get_provider_info as _mdev_pinfo, ) + from hermes_cli.auth import PROVIDER_REGISTRY from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS results: List[dict] = [] @@ -753,9 +754,16 @@ def list_authenticated_providers( if not isinstance(pdata, dict): continue - env_vars = pdata.get("env", []) - if not isinstance(env_vars, list): - continue + # Prefer auth.py PROVIDER_REGISTRY for env var names — it's our + # source of truth. models.dev can have wrong mappings (e.g. + # minimax-cn → MINIMAX_API_KEY instead of MINIMAX_CN_API_KEY). + pconfig = PROVIDER_REGISTRY.get(hermes_id) + if pconfig and pconfig.api_key_env_vars: + env_vars = list(pconfig.api_key_env_vars) + else: + env_vars = pdata.get("env", []) + if not isinstance(env_vars, list): + continue # Check if any env var is set has_creds = any(os.environ.get(ev) for ev in env_vars) diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 95c9fa6228..72b8aab18e 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -2572,9 +2572,120 @@ _OPENCLAW_SCRIPT = ( ) +def _load_openclaw_migration_module(): + """Load the openclaw_to_hermes migration script as a module. + + Returns the loaded module, or None if the script can't be loaded. + """ + if not _OPENCLAW_SCRIPT.exists(): + return None + + spec = importlib.util.spec_from_file_location( + "openclaw_to_hermes", _OPENCLAW_SCRIPT + ) + if spec is None or spec.loader is None: + return None + + mod = importlib.util.module_from_spec(spec) + # Register in sys.modules so @dataclass can resolve the module + # (Python 3.11+ requires this for dynamically loaded modules) + import sys as _sys + _sys.modules[spec.name] = mod + try: + spec.loader.exec_module(mod) + except Exception: + _sys.modules.pop(spec.name, None) + raise + return mod + + +# Item kinds that represent high-impact changes warranting explicit warnings. +# Gateway tokens/channels can hijack messaging platforms from the old agent. +# Config values may have different semantics between OpenClaw and Hermes. +# Instruction/context files (.md) can contain incompatible setup procedures. +_HIGH_IMPACT_KIND_KEYWORDS = { + "gateway": "⚠ Gateway/messaging — this will configure Hermes to use your OpenClaw messaging channels", + "telegram": "⚠ Telegram — this will point Hermes at your OpenClaw Telegram bot", + "slack": "⚠ Slack — this will point Hermes at your OpenClaw Slack workspace", + "discord": "⚠ Discord — this will point Hermes at your OpenClaw Discord bot", + "whatsapp": "⚠ WhatsApp — this will point Hermes at your OpenClaw WhatsApp connection", + "config": "⚠ Config values — OpenClaw settings may not map 1:1 to Hermes equivalents", + "soul": "⚠ Instruction file — may contain OpenClaw-specific setup/restart procedures", + "memory": "⚠ Memory/context file — may reference OpenClaw-specific infrastructure", + "context": "⚠ Context file — may contain OpenClaw-specific instructions", +} + + +def _print_migration_preview(report: dict): + """Print a detailed dry-run preview of what migration would do. + + Groups items by category and adds explicit warnings for high-impact + changes like gateway token takeover and config value differences. + """ + items = report.get("items", []) + if not items: + print_info("Nothing to migrate.") + return + + migrated_items = [i for i in items if i.get("status") == "migrated"] + conflict_items = [i for i in items if i.get("status") == "conflict"] + skipped_items = [i for i in items if i.get("status") == "skipped"] + + warnings_shown = set() + + if migrated_items: + print(color(" Would import:", Colors.GREEN)) + for item in migrated_items: + kind = item.get("kind", "unknown") + dest = item.get("destination", "") + if dest: + dest_short = str(dest).replace(str(Path.home()), "~") + print(f" {kind:<22s} → {dest_short}") + else: + print(f" {kind}") + + # Check for high-impact items and collect warnings + kind_lower = kind.lower() + dest_lower = str(dest).lower() + for keyword, warning in _HIGH_IMPACT_KIND_KEYWORDS.items(): + if keyword in kind_lower or keyword in dest_lower: + warnings_shown.add(warning) + print() + + if conflict_items: + print(color(" Would overwrite (conflicts with existing Hermes config):", Colors.YELLOW)) + for item in conflict_items: + kind = item.get("kind", "unknown") + reason = item.get("reason", "already exists") + print(f" {kind:<22s} {reason}") + print() + + if skipped_items: + print(color(" Would skip:", Colors.DIM)) + for item in skipped_items: + kind = item.get("kind", "unknown") + reason = item.get("reason", "") + print(f" {kind:<22s} {reason}") + print() + + # Print collected warnings + if warnings_shown: + print(color(" ── Warnings ──", Colors.YELLOW)) + for warning in sorted(warnings_shown): + print(color(f" {warning}", Colors.YELLOW)) + print() + print(color(" Note: OpenClaw config values may have different semantics in Hermes.", Colors.YELLOW)) + print(color(" For example, OpenClaw's tool_call_execution: \"auto\" ≠ Hermes's yolo mode.", Colors.YELLOW)) + print(color(" Instruction files (.md) from OpenClaw may contain incompatible procedures.", Colors.YELLOW)) + print() + + def _offer_openclaw_migration(hermes_home: Path) -> bool: """Detect ~/.openclaw and offer to migrate during first-time setup. + Runs a dry-run first to show the user exactly what would be imported, + overwritten, or taken over. Only executes after explicit confirmation. + Returns True if migration ran successfully, False otherwise. """ openclaw_dir = Path.home() / ".openclaw" @@ -2587,12 +2698,12 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool: print() print_header("OpenClaw Installation Detected") print_info(f"Found OpenClaw data at {openclaw_dir}") - print_info("Hermes can import your settings, memories, skills, and API keys.") + print_info("Hermes can preview what would be imported before making any changes.") print() - if not prompt_yes_no("Would you like to import from OpenClaw?", default=True): + if not prompt_yes_no("Would you like to see what can be imported?", default=True): print_info( - "Skipping migration. You can run it later via the openclaw-migration skill." + "Skipping migration. You can run it later with: hermes claw migrate --dry-run" ) return False @@ -2601,34 +2712,71 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool: if not config_path.exists(): save_config(load_config()) - # Dynamically load the migration script + # Load the migration module try: - spec = importlib.util.spec_from_file_location( - "openclaw_to_hermes", _OPENCLAW_SCRIPT - ) - if spec is None or spec.loader is None: + mod = _load_openclaw_migration_module() + if mod is None: print_warning("Could not load migration script.") return False + except Exception as e: + print_warning(f"Could not load migration script: {e}") + logger.debug("OpenClaw migration module load error", exc_info=True) + return False - mod = importlib.util.module_from_spec(spec) - # Register in sys.modules so @dataclass can resolve the module - # (Python 3.11+ requires this for dynamically loaded modules) - import sys as _sys - _sys.modules[spec.name] = mod - try: - spec.loader.exec_module(mod) - except Exception: - _sys.modules.pop(spec.name, None) - raise - - # Run migration with the "full" preset, execute mode, no overwrite + # ── Phase 1: Dry-run preview ── + try: selected = mod.resolve_selected_options(None, None, preset="full") + dry_migrator = mod.Migrator( + source_root=openclaw_dir.resolve(), + target_root=hermes_home.resolve(), + execute=False, # dry-run — no files modified + workspace_target=None, + overwrite=True, # show everything including conflicts + migrate_secrets=True, + output_dir=None, + selected_options=selected, + preset_name="full", + ) + preview_report = dry_migrator.migrate() + except Exception as e: + print_warning(f"Migration preview failed: {e}") + logger.debug("OpenClaw migration preview error", exc_info=True) + return False + + # Display the full preview + preview_summary = preview_report.get("summary", {}) + preview_count = preview_summary.get("migrated", 0) + + if preview_count == 0: + print() + print_info("Nothing to import from OpenClaw.") + return False + + print() + print_header(f"Migration Preview — {preview_count} item(s) would be imported") + print_info("No changes have been made yet. Review the list below:") + print() + _print_migration_preview(preview_report) + + # ── Phase 2: Confirm and execute ── + if not prompt_yes_no("Proceed with migration?", default=False): + print_info( + "Migration cancelled. You can run it later with: hermes claw migrate" + ) + print_info( + "Use --dry-run to preview again, or --preset minimal for a lighter import." + ) + return False + + # Execute the migration — overwrite=False so existing Hermes configs are + # preserved. The user saw the preview; conflicts are skipped by default. + try: migrator = mod.Migrator( source_root=openclaw_dir.resolve(), target_root=hermes_home.resolve(), execute=True, workspace_target=None, - overwrite=True, + overwrite=False, # preserve existing Hermes config migrate_secrets=True, output_dir=None, selected_options=selected, @@ -2640,7 +2788,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool: logger.debug("OpenClaw migration error", exc_info=True) return False - # Print summary + # Print final summary summary = report.get("summary", {}) migrated = summary.get("migrated", 0) skipped = summary.get("skipped", 0) @@ -2651,7 +2799,7 @@ def _offer_openclaw_migration(hermes_home: Path) -> bool: if migrated: print_success(f"Imported {migrated} item(s) from OpenClaw.") if conflicts: - print_info(f"Skipped {conflicts} item(s) that already exist in Hermes.") + print_info(f"Skipped {conflicts} item(s) that already exist in Hermes (use hermes claw migrate --overwrite to force).") if skipped: print_info(f"Skipped {skipped} item(s) (not found or unchanged).") if errors: diff --git a/nix/nixosModules.nix b/nix/nixosModules.nix index c961aa616a..948f7df8c5 100644 --- a/nix/nixosModules.nix +++ b/nix/nixosModules.nix @@ -569,7 +569,7 @@ # ── Activation: link config + auth + documents ──────────────────── { - system.activationScripts."hermes-agent-setup" = lib.stringAfter [ "users" "setupSecrets" ] '' + system.activationScripts."hermes-agent-setup" = lib.stringAfter ([ "users" ] ++ lib.optional (config.system.activationScripts ? setupSecrets) "setupSecrets") '' # Ensure directories exist (activation runs before tmpfiles) mkdir -p ${cfg.stateDir}/.hermes mkdir -p ${cfg.stateDir}/home diff --git a/nix/packages.nix b/nix/packages.nix index 9a65b889d3..eb50d4a17b 100644 --- a/nix/packages.nix +++ b/nix/packages.nix @@ -14,7 +14,7 @@ }; runtimeDeps = with pkgs; [ - nodejs_20 ripgrep git openssh ffmpeg + nodejs_20 ripgrep git openssh ffmpeg tirith ]; runtimePath = pkgs.lib.makeBinPath runtimeDeps; diff --git a/run_agent.py b/run_agent.py index b8ed44ef78..db3f4b310d 100644 --- a/run_agent.py +++ b/run_agent.py @@ -87,6 +87,7 @@ from agent.model_metadata import ( fetch_model_metadata, estimate_tokens_rough, estimate_messages_tokens_rough, estimate_request_tokens_rough, get_next_probe_tier, parse_context_limit_from_error, + parse_available_output_tokens_from_error, save_context_length, is_local_endpoint, query_ollama_num_ctx, ) @@ -4968,9 +4969,21 @@ class AIAgent: # Swap OpenAI client and config in-place self.api_key = fb_client.api_key self.client = fb_client + # Preserve provider-specific headers that + # resolve_provider_client() may have baked into + # fb_client via the default_headers kwarg. The OpenAI + # SDK stores these in _custom_headers. Without this, + # subsequent request-client rebuilds (via + # _create_request_openai_client) drop the headers, + # causing 403s from providers like Kimi Coding that + # require a User-Agent sentinel. + fb_headers = getattr(fb_client, "_custom_headers", None) + if not fb_headers: + fb_headers = getattr(fb_client, "default_headers", None) self._client_kwargs = { "api_key": fb_client.api_key, "base_url": fb_base_url, + **({"default_headers": dict(fb_headers)} if fb_headers else {}), } # Re-evaluate prompt caching for the new provider/model @@ -5385,15 +5398,22 @@ class AIAgent: if self.api_mode == "anthropic_messages": from agent.anthropic_adapter import build_anthropic_kwargs anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages) - # Pass context_length so the adapter can clamp max_tokens if the - # user configured a smaller context window than the model's output limit. + # Pass context_length (total input+output window) so the adapter can + # clamp max_tokens (output cap) when the user configured a smaller + # context window than the model's native output limit. ctx_len = getattr(self, "context_compressor", None) ctx_len = ctx_len.context_length if ctx_len else None + # _ephemeral_max_output_tokens is set for one call when the API + # returns "max_tokens too large given prompt" — it caps output to + # the available window space without touching context_length. + ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) + if ephemeral_out is not None: + self._ephemeral_max_output_tokens = None # consume immediately return build_anthropic_kwargs( model=self.model, messages=anthropic_messages, tools=self.tools, - max_tokens=self.max_tokens, + max_tokens=ephemeral_out if ephemeral_out is not None else self.max_tokens, reasoning_config=self.reasoning_config, is_oauth=self._is_anthropic_oauth, preserve_dots=self._anthropic_preserve_dots(), @@ -8294,6 +8314,48 @@ class AIAgent: compressor = self.context_compressor old_ctx = compressor.context_length + # ── Distinguish two very different errors ─────────── + # 1. "Prompt too long": the INPUT exceeds the context window. + # Fix: reduce context_length + compress history. + # 2. "max_tokens too large": input is fine, but + # input_tokens + requested max_tokens > context_window. + # Fix: reduce max_tokens (the OUTPUT cap) for this call. + # Do NOT shrink context_length — the window is unchanged. + # + # Note: max_tokens = output token cap (one response). + # context_length = total window (input + output combined). + available_out = parse_available_output_tokens_from_error(error_msg) + if available_out is not None: + # Error is purely about the output cap being too large. + # Cap output to the available space and retry without + # touching context_length or triggering compression. + safe_out = max(1, available_out - 64) # small safety margin + self._ephemeral_max_output_tokens = safe_out + self._vprint( + f"{self.log_prefix}⚠️ Output cap too large for current prompt — " + f"retrying with max_tokens={safe_out:,} " + f"(available_tokens={available_out:,}; context_length unchanged at {old_ctx:,})", + force=True, + ) + # Still count against compression_attempts so we don't + # loop forever if the error keeps recurring. + compression_attempts += 1 + if compression_attempts > max_compression_attempts: + self._vprint(f"{self.log_prefix}❌ Max compression attempts ({max_compression_attempts}) reached.", force=True) + self._vprint(f"{self.log_prefix} 💡 Try /new to start a fresh conversation, or /compress to retry compression.", force=True) + logging.error(f"{self.log_prefix}Context compression failed after {max_compression_attempts} attempts.") + self._persist_session(messages, conversation_history) + return { + "messages": messages, + "completed": False, + "api_calls": api_call_count, + "error": f"Context length exceeded: max compression attempts ({max_compression_attempts}) reached.", + "partial": True + } + restart_with_compressed_messages = True + break + + # Error is about the INPUT being too large — reduce context_length. # Try to parse the actual limit from the error message parsed_limit = parse_context_limit_from_error(error_msg) if parsed_limit and parsed_limit < old_ctx: diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md index 8d93e3fb79..74445c267e 100644 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md @@ -249,7 +249,6 @@ Type these during an interactive chat session. /config Show config (CLI) /model [name] Show or change model /provider Show provider info -/prompt [text] View/set system prompt (CLI) /personality [name] Set personality /reasoning [level] Set reasoning (none|low|medium|high|xhigh|show|hide) /verbose Cycle: off → new → all → verbose diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index da248f8218..c597355891 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -507,6 +507,38 @@ class TestClassifyApiError: assert result.reason == FailoverReason.format_error assert result.retryable is False + def test_400_flat_body_descriptive_not_context_overflow(self): + """Responses API flat body with descriptive error + large session → format error. + + The Codex Responses API returns errors in flat body format: + {"message": "...", "type": "..."} without an "error" wrapper. + A descriptive 400 must NOT be misclassified as context overflow + just because the session is large. + """ + e = MockAPIError( + "Invalid 'input[index].name': string does not match pattern.", + status_code=400, + body={"message": "Invalid 'input[index].name': string does not match pattern.", + "type": "invalid_request_error"}, + ) + result = classify_api_error(e, approx_tokens=200000, context_length=400000, num_messages=500) + assert result.reason == FailoverReason.format_error + assert result.retryable is False + + def test_400_flat_body_generic_large_session_still_context_overflow(self): + """Flat body with generic 'Error' message + large session → context overflow. + + Regression: the flat-body fallback must not break the existing heuristic + for genuinely generic errors from providers that use flat bodies. + """ + e = MockAPIError( + "Error", + status_code=400, + body={"message": "Error"}, + ) + result = classify_api_error(e, approx_tokens=100000, context_length=200000) + assert result.reason == FailoverReason.context_overflow + # ── Peer closed + large session ── def test_peer_closed_large_session(self): diff --git a/tests/cli/test_cli_status_bar.py b/tests/cli/test_cli_status_bar.py index e728328b8c..a884c42180 100644 --- a/tests/cli/test_cli_status_bar.py +++ b/tests/cli/test_cli_status_bar.py @@ -41,6 +41,7 @@ def _attach_agent( session_completion_tokens=completion_tokens, session_total_tokens=total_tokens, session_api_calls=api_calls, + get_rate_limit_state=lambda: None, context_compressor=SimpleNamespace( last_prompt_tokens=context_tokens, context_length=context_length, diff --git a/tests/conftest.py b/tests/conftest.py index 313a3cecfd..0211404667 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -38,6 +38,8 @@ def _isolate_hermes_home(tmp_path, monkeypatch): monkeypatch.delenv("HERMES_SESSION_CHAT_ID", raising=False) monkeypatch.delenv("HERMES_SESSION_CHAT_NAME", raising=False) monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False) + # Avoid making real calls during tests if this key is set in the env files + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) @pytest.fixture() diff --git a/tests/gateway/test_media_download_retry.py b/tests/gateway/test_media_download_retry.py index 8f135a053b..f0147dfb46 100644 --- a/tests/gateway/test_media_download_retry.py +++ b/tests/gateway/test_media_download_retry.py @@ -38,10 +38,11 @@ def _make_timeout_error() -> httpx.TimeoutException: # cache_image_from_url (base.py) # --------------------------------------------------------------------------- +@patch("tools.url_safety.is_safe_url", return_value=True) class TestCacheImageFromUrl: """Tests for gateway.platforms.base.cache_image_from_url""" - def test_success_on_first_attempt(self, tmp_path, monkeypatch): + def test_success_on_first_attempt(self, _mock_safe, tmp_path, monkeypatch): """A clean 200 response caches the image and returns a path.""" monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img") @@ -65,7 +66,7 @@ class TestCacheImageFromUrl: assert path.endswith(".jpg") mock_client.get.assert_called_once() - def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch): + def test_retries_on_timeout_then_succeeds(self, _mock_safe, tmp_path, monkeypatch): """A timeout on the first attempt is retried; second attempt succeeds.""" monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img") @@ -95,7 +96,7 @@ class TestCacheImageFromUrl: assert mock_client.get.call_count == 2 mock_sleep.assert_called_once() - def test_retries_on_429_then_succeeds(self, tmp_path, monkeypatch): + def test_retries_on_429_then_succeeds(self, _mock_safe, tmp_path, monkeypatch): """A 429 response on the first attempt is retried; second attempt succeeds.""" monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img") @@ -122,7 +123,7 @@ class TestCacheImageFromUrl: assert path.endswith(".jpg") assert mock_client.get.call_count == 2 - def test_raises_after_max_retries_exhausted(self, tmp_path, monkeypatch): + def test_raises_after_max_retries_exhausted(self, _mock_safe, tmp_path, monkeypatch): """Timeout on every attempt raises after all retries are consumed.""" monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img") @@ -145,7 +146,7 @@ class TestCacheImageFromUrl: # 3 total calls: initial + 2 retries assert mock_client.get.call_count == 3 - def test_non_retryable_4xx_raises_immediately(self, tmp_path, monkeypatch): + def test_non_retryable_4xx_raises_immediately(self, _mock_safe, tmp_path, monkeypatch): """A 404 (non-retryable) is raised immediately without any retry.""" monkeypatch.setattr("gateway.platforms.base.IMAGE_CACHE_DIR", tmp_path / "img") @@ -175,10 +176,11 @@ class TestCacheImageFromUrl: # cache_audio_from_url (base.py) # --------------------------------------------------------------------------- +@patch("tools.url_safety.is_safe_url", return_value=True) class TestCacheAudioFromUrl: """Tests for gateway.platforms.base.cache_audio_from_url""" - def test_success_on_first_attempt(self, tmp_path, monkeypatch): + def test_success_on_first_attempt(self, _mock_safe, tmp_path, monkeypatch): """A clean 200 response caches the audio and returns a path.""" monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio") @@ -202,7 +204,7 @@ class TestCacheAudioFromUrl: assert path.endswith(".ogg") mock_client.get.assert_called_once() - def test_retries_on_timeout_then_succeeds(self, tmp_path, monkeypatch): + def test_retries_on_timeout_then_succeeds(self, _mock_safe, tmp_path, monkeypatch): """A timeout on the first attempt is retried; second attempt succeeds.""" monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio") @@ -232,7 +234,7 @@ class TestCacheAudioFromUrl: assert mock_client.get.call_count == 2 mock_sleep.assert_called_once() - def test_retries_on_429_then_succeeds(self, tmp_path, monkeypatch): + def test_retries_on_429_then_succeeds(self, _mock_safe, tmp_path, monkeypatch): """A 429 response on the first attempt is retried; second attempt succeeds.""" monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio") @@ -259,7 +261,7 @@ class TestCacheAudioFromUrl: assert path.endswith(".ogg") assert mock_client.get.call_count == 2 - def test_retries_on_500_then_succeeds(self, tmp_path, monkeypatch): + def test_retries_on_500_then_succeeds(self, _mock_safe, tmp_path, monkeypatch): """A 500 response on the first attempt is retried; second attempt succeeds.""" monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio") @@ -286,7 +288,7 @@ class TestCacheAudioFromUrl: assert path.endswith(".ogg") assert mock_client.get.call_count == 2 - def test_raises_after_max_retries_exhausted(self, tmp_path, monkeypatch): + def test_raises_after_max_retries_exhausted(self, _mock_safe, tmp_path, monkeypatch): """Timeout on every attempt raises after all retries are consumed.""" monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio") @@ -309,7 +311,7 @@ class TestCacheAudioFromUrl: # 3 total calls: initial + 2 retries assert mock_client.get.call_count == 3 - def test_non_retryable_4xx_raises_immediately(self, tmp_path, monkeypatch): + def test_non_retryable_4xx_raises_immediately(self, _mock_safe, tmp_path, monkeypatch): """A 404 (non-retryable) is raised immediately without any retry.""" monkeypatch.setattr("gateway.platforms.base.AUDIO_CACHE_DIR", tmp_path / "audio") diff --git a/tests/gateway/test_wecom.py b/tests/gateway/test_wecom.py index a7101c6973..418a4b622f 100644 --- a/tests/gateway/test_wecom.py +++ b/tests/gateway/test_wecom.py @@ -4,7 +4,7 @@ import base64 import os from pathlib import Path from types import SimpleNamespace -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, patch import pytest @@ -355,7 +355,8 @@ class TestMediaUpload: assert calls[3][1]["chunk_index"] == 2 @pytest.mark.asyncio - async def test_download_remote_bytes_rejects_large_content_length(self): + @patch("tools.url_safety.is_safe_url", return_value=True) + async def test_download_remote_bytes_rejects_large_content_length(self, _mock_safe): from gateway.platforms.wecom import WeComAdapter class FakeResponse: diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py index ee86507a16..d97b0c1f75 100644 --- a/tests/hermes_cli/test_api_key_providers.py +++ b/tests/hermes_cli/test_api_key_providers.py @@ -628,14 +628,21 @@ class TestHasAnyProviderConfigured: def test_claude_code_creds_ignored_on_fresh_install(self, monkeypatch, tmp_path): """Claude Code credentials should NOT skip the wizard when Hermes is unconfigured.""" from hermes_cli import config as config_module + from hermes_cli.auth import PROVIDER_REGISTRY hermes_home = tmp_path / ".hermes" hermes_home.mkdir() monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) # Clear all provider env vars so earlier checks don't short-circuit - for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", - "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + _all_vars = {"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"} + for pconfig in PROVIDER_REGISTRY.values(): + if pconfig.auth_type == "api_key": + _all_vars.update(pconfig.api_key_env_vars) + for var in _all_vars: monkeypatch.delenv(var, raising=False) + # Prevent gh-cli / copilot auth fallback from leaking in + monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda _pid: {}) # Simulate valid Claude Code credentials monkeypatch.setattr( "agent.anthropic_adapter.read_claude_code_credentials", @@ -710,6 +717,7 @@ class TestHasAnyProviderConfigured: """config.yaml model dict with empty default and no creds stays false.""" import yaml from hermes_cli import config as config_module + from hermes_cli.auth import PROVIDER_REGISTRY hermes_home = tmp_path / ".hermes" hermes_home.mkdir() config_file = hermes_home / "config.yaml" @@ -719,9 +727,15 @@ class TestHasAnyProviderConfigured: monkeypatch.setattr(config_module, "get_env_path", lambda: hermes_home / ".env") monkeypatch.setattr(config_module, "get_hermes_home", lambda: hermes_home) monkeypatch.setenv("HERMES_HOME", str(hermes_home)) - for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", - "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"): + _all_vars = {"OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", + "ANTHROPIC_TOKEN", "OPENAI_BASE_URL"} + for pconfig in PROVIDER_REGISTRY.values(): + if pconfig.auth_type == "api_key": + _all_vars.update(pconfig.api_key_env_vars) + for var in _all_vars: monkeypatch.delenv(var, raising=False) + # Prevent gh-cli / copilot auth fallback from leaking in + monkeypatch.setattr("hermes_cli.auth.get_auth_status", lambda _pid: {}) from hermes_cli.main import _has_any_provider_configured assert _has_any_provider_configured() is False @@ -941,9 +955,10 @@ class TestHuggingFaceModels: """Every HF model should have a context length entry.""" from hermes_cli.models import _PROVIDER_MODELS from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS + lower_keys = {k.lower() for k in DEFAULT_CONTEXT_LENGTHS} hf_models = _PROVIDER_MODELS["huggingface"] for model in hf_models: - assert model in DEFAULT_CONTEXT_LENGTHS, ( + assert model.lower() in lower_keys, ( f"HF model {model!r} missing from DEFAULT_CONTEXT_LENGTHS" ) diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index 81c262a840..98a4b2efc2 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -425,8 +425,8 @@ class TestSlashCommandCompleter: class TestSubcommands: def test_explicit_subcommands_extracted(self): """Commands with explicit subcommands on CommandDef are extracted.""" - assert "/prompt" in SUBCOMMANDS - assert "clear" in SUBCOMMANDS["/prompt"] + assert "/skills" in SUBCOMMANDS + assert "install" in SUBCOMMANDS["/skills"] def test_reasoning_has_subcommands(self): assert "/reasoning" in SUBCOMMANDS diff --git a/tests/hermes_cli/test_setup_openclaw_migration.py b/tests/hermes_cli/test_setup_openclaw_migration.py index b956f1fe64..fe80263905 100644 --- a/tests/hermes_cli/test_setup_openclaw_migration.py +++ b/tests/hermes_cli/test_setup_openclaw_migration.py @@ -44,7 +44,7 @@ class TestOfferOpenclawMigration: assert setup_mod._offer_openclaw_migration(tmp_path / ".hermes") is False def test_runs_migration_when_user_accepts(self, tmp_path): - """Should dynamically load the script and run the Migrator.""" + """Should run dry-run preview first, then execute after confirmation.""" openclaw_dir = tmp_path / ".openclaw" openclaw_dir.mkdir() @@ -60,6 +60,7 @@ class TestOfferOpenclawMigration: fake_migrator = MagicMock() fake_migrator.migrate.return_value = { "summary": {"migrated": 3, "skipped": 1, "conflict": 0, "error": 0}, + "items": [{"kind": "config", "status": "migrated", "destination": "/tmp/x"}], "output_dir": str(hermes_home / "migration"), } fake_mod.Migrator = MagicMock(return_value=fake_migrator) @@ -70,6 +71,7 @@ class TestOfferOpenclawMigration: with ( patch("hermes_cli.setup.Path.home", return_value=tmp_path), patch.object(setup_mod, "_OPENCLAW_SCRIPT", script), + # Both prompts answered Yes: preview offer + proceed confirmation patch.object(setup_mod, "prompt_yes_no", return_value=True), patch.object(setup_mod, "get_config_path", return_value=config_path), patch("importlib.util.spec_from_file_location") as mock_spec_fn, @@ -91,13 +93,75 @@ class TestOfferOpenclawMigration: fake_mod.resolve_selected_options.assert_called_once_with( None, None, preset="full" ) - fake_mod.Migrator.assert_called_once() - call_kwargs = fake_mod.Migrator.call_args[1] - assert call_kwargs["execute"] is True - assert call_kwargs["overwrite"] is True - assert call_kwargs["migrate_secrets"] is True - assert call_kwargs["preset_name"] == "full" - fake_migrator.migrate.assert_called_once() + # Migrator called twice: once for dry-run preview, once for execution + assert fake_mod.Migrator.call_count == 2 + + # First call: dry-run preview (execute=False, overwrite=True to show all) + preview_kwargs = fake_mod.Migrator.call_args_list[0][1] + assert preview_kwargs["execute"] is False + assert preview_kwargs["overwrite"] is True + assert preview_kwargs["migrate_secrets"] is True + assert preview_kwargs["preset_name"] == "full" + + # Second call: actual execution (execute=True, overwrite=False to preserve) + exec_kwargs = fake_mod.Migrator.call_args_list[1][1] + assert exec_kwargs["execute"] is True + assert exec_kwargs["overwrite"] is False + assert exec_kwargs["migrate_secrets"] is True + assert exec_kwargs["preset_name"] == "full" + + # migrate() called twice (once per Migrator instance) + assert fake_migrator.migrate.call_count == 2 + + def test_user_declines_after_preview(self, tmp_path): + """Should return False when user sees preview but declines to proceed.""" + openclaw_dir = tmp_path / ".openclaw" + openclaw_dir.mkdir() + + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + config_path = hermes_home / "config.yaml" + config_path.write_text("agent:\n max_turns: 90\n") + + fake_mod = ModuleType("openclaw_to_hermes") + fake_mod.resolve_selected_options = MagicMock(return_value={"soul", "memory"}) + fake_migrator = MagicMock() + fake_migrator.migrate.return_value = { + "summary": {"migrated": 3, "skipped": 0, "conflict": 0, "error": 0}, + "items": [{"kind": "config", "status": "migrated", "destination": "/tmp/x"}], + } + fake_mod.Migrator = MagicMock(return_value=fake_migrator) + + script = tmp_path / "openclaw_to_hermes.py" + script.write_text("# placeholder") + + # First prompt (preview): Yes, Second prompt (proceed): No + prompt_responses = iter([True, False]) + + with ( + patch("hermes_cli.setup.Path.home", return_value=tmp_path), + patch.object(setup_mod, "_OPENCLAW_SCRIPT", script), + patch.object(setup_mod, "prompt_yes_no", side_effect=prompt_responses), + patch.object(setup_mod, "get_config_path", return_value=config_path), + patch("importlib.util.spec_from_file_location") as mock_spec_fn, + ): + mock_spec = MagicMock() + mock_spec.loader = MagicMock() + mock_spec_fn.return_value = mock_spec + + def exec_module(mod): + mod.resolve_selected_options = fake_mod.resolve_selected_options + mod.Migrator = fake_mod.Migrator + + mock_spec.loader.exec_module = exec_module + + result = setup_mod._offer_openclaw_migration(hermes_home) + + assert result is False + # Only dry-run Migrator was created, not the execute one + assert fake_mod.Migrator.call_count == 1 + preview_kwargs = fake_mod.Migrator.call_args[1] + assert preview_kwargs["execute"] is False def test_handles_migration_error_gracefully(self, tmp_path): """Should catch exceptions and return False.""" diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index 7371c89df7..830bad8d5f 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -354,6 +354,14 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch): lambda *args, **kwargs: {"web", "image_gen", "tts", "browser"}, ) monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None) + # Prevent leaked platform tokens (e.g. DISCORD_BOT_TOKEN from gateway.run + # import) from adding extra platforms. The loop in tools_command runs + # apply_nous_managed_defaults per platform; a second iteration sees values + # set by the first as "explicit" and skips them. + monkeypatch.setattr( + "hermes_cli.tools_config._get_enabled_platforms", + lambda: ["cli"], + ) monkeypatch.setattr( "hermes_cli.nous_subscription.get_nous_auth_status", lambda: {"logged_in": True}, diff --git a/tests/hermes_cli/test_update_gateway_restart.py b/tests/hermes_cli/test_update_gateway_restart.py index 9366c06cf6..e4c8e92275 100644 --- a/tests/hermes_cli/test_update_gateway_restart.py +++ b/tests/hermes_cli/test_update_gateway_restart.py @@ -368,6 +368,9 @@ class TestCmdUpdateLaunchdRestart: monkeypatch.setattr( gateway_cli, "is_macos", lambda: False, ) + monkeypatch.setattr( + gateway_cli, "is_linux", lambda: True, + ) mock_run.side_effect = _make_run_side_effect( commit_count="3", diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py new file mode 100644 index 0000000000..1ba423c8ff --- /dev/null +++ b/tests/test_ctx_halving_fix.py @@ -0,0 +1,319 @@ +"""Tests for the context-halving bugfix. + +Background +---------- +When the API returns "max_tokens too large given prompt" (input is fine, +but input_tokens + requested max_tokens > context_window), the old code +incorrectly halved context_length via get_next_probe_tier(). + +The fix introduces: + * parse_available_output_tokens_from_error() — detects this specific + error class and returns the available output token budget. + * _ephemeral_max_output_tokens on AIAgent — a one-shot override that + caps the output for one retry without touching context_length. + +Naming note +----------- + max_tokens = OUTPUT token cap (a single response). + context_length = TOTAL context window (input + output combined). +These are different and the old code conflated them; the fix keeps them +separate. +""" + +import sys +import os +from unittest.mock import MagicMock, patch, PropertyMock + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) + +import pytest + + +# --------------------------------------------------------------------------- +# parse_available_output_tokens_from_error — unit tests +# --------------------------------------------------------------------------- + +class TestParseAvailableOutputTokens: + """Pure-function tests; no I/O required.""" + + def _parse(self, msg): + from agent.model_metadata import parse_available_output_tokens_from_error + return parse_available_output_tokens_from_error(msg) + + # ── Should detect and extract ──────────────────────────────────────── + + def test_anthropic_canonical_format(self): + """Canonical Anthropic error: max_tokens: X > context_window: Y - input_tokens: Z = available_tokens: W""" + msg = ( + "max_tokens: 32768 > context_window: 200000 " + "- input_tokens: 190000 = available_tokens: 10000" + ) + assert self._parse(msg) == 10000 + + def test_anthropic_format_large_numbers(self): + msg = ( + "max_tokens: 128000 > context_window: 200000 " + "- input_tokens: 180000 = available_tokens: 20000" + ) + assert self._parse(msg) == 20000 + + def test_available_tokens_variant_spacing(self): + """Handles extra spaces around the colon.""" + msg = "max_tokens: 32768 > 200000 available_tokens : 5000" + assert self._parse(msg) == 5000 + + def test_available_tokens_natural_language(self): + """'available tokens: N' wording (no underscore).""" + msg = "max_tokens must be at most 10000 given your prompt (available tokens: 10000)" + assert self._parse(msg) == 10000 + + def test_single_token_available(self): + """Edge case: only 1 token left.""" + msg = "max_tokens: 9999 > context_window: 10000 - input_tokens: 9999 = available_tokens: 1" + assert self._parse(msg) == 1 + + # ── Should NOT detect (returns None) ───────────────────────────────── + + def test_prompt_too_long_is_not_output_cap_error(self): + """'prompt is too long' errors must NOT be caught — they need context halving.""" + msg = "prompt is too long: 205000 tokens > 200000 maximum" + assert self._parse(msg) is None + + def test_generic_context_window_exceeded(self): + """Generic context window errors without available_tokens should not match.""" + msg = "context window exceeded: maximum is 32768 tokens" + assert self._parse(msg) is None + + def test_context_length_exceeded(self): + msg = "context_length_exceeded: prompt has 131073 tokens, limit is 131072" + assert self._parse(msg) is None + + def test_no_max_tokens_keyword(self): + """Error not related to max_tokens at all.""" + msg = "invalid_api_key: the API key is invalid" + assert self._parse(msg) is None + + def test_empty_string(self): + assert self._parse("") is None + + def test_rate_limit_error(self): + msg = "rate_limit_error: too many requests per minute" + assert self._parse(msg) is None + + +# --------------------------------------------------------------------------- +# build_anthropic_kwargs — output cap clamping +# --------------------------------------------------------------------------- + +class TestBuildAnthropicKwargsClamping: + """The context_length clamp only fires when output ceiling > window. + For standard Anthropic models (output ceiling < window) it must not fire. + """ + + def _build(self, model, max_tokens=None, context_length=None): + from agent.anthropic_adapter import build_anthropic_kwargs + return build_anthropic_kwargs( + model=model, + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=max_tokens, + reasoning_config=None, + context_length=context_length, + ) + + def test_no_clamping_when_output_ceiling_fits_in_window(self): + """Opus 4.6 native output (128K) < context window (200K) — no clamping.""" + kwargs = self._build("claude-opus-4-6", context_length=200_000) + assert kwargs["max_tokens"] == 128_000 + + def test_clamping_fires_for_tiny_custom_window(self): + """When context_length is 8K (local model), output cap is clamped to 7999.""" + kwargs = self._build("claude-opus-4-6", context_length=8_000) + assert kwargs["max_tokens"] == 7_999 + + def test_explicit_max_tokens_respected_when_within_window(self): + """Explicit max_tokens smaller than window passes through unchanged.""" + kwargs = self._build("claude-opus-4-6", max_tokens=4096, context_length=200_000) + assert kwargs["max_tokens"] == 4096 + + def test_explicit_max_tokens_clamped_when_exceeds_window(self): + """Explicit max_tokens larger than a small window is clamped.""" + kwargs = self._build("claude-opus-4-6", max_tokens=32_768, context_length=16_000) + assert kwargs["max_tokens"] == 15_999 + + def test_no_context_length_uses_native_ceiling(self): + """Without context_length the native output ceiling is used directly.""" + kwargs = self._build("claude-sonnet-4-6") + assert kwargs["max_tokens"] == 64_000 + + +# --------------------------------------------------------------------------- +# Ephemeral max_tokens mechanism — _build_api_kwargs +# --------------------------------------------------------------------------- + +class TestEphemeralMaxOutputTokens: + """_build_api_kwargs consumes _ephemeral_max_output_tokens exactly once + and falls back to self.max_tokens on subsequent calls. + """ + + def _make_agent(self): + """Return a minimal AIAgent with api_mode='anthropic_messages' and + a stubbed context_compressor, bypassing full __init__ cost.""" + from run_agent import AIAgent + agent = object.__new__(AIAgent) + # Minimal attributes used by _build_api_kwargs + agent.api_mode = "anthropic_messages" + agent.model = "claude-opus-4-6" + agent.tools = [] + agent.max_tokens = None + agent.reasoning_config = None + agent._is_anthropic_oauth = False + agent._ephemeral_max_output_tokens = None + + compressor = MagicMock() + compressor.context_length = 200_000 + agent.context_compressor = compressor + + # Stub out the internal message-preparation helper + agent._prepare_anthropic_messages_for_api = MagicMock( + return_value=[{"role": "user", "content": "hi"}] + ) + agent._anthropic_preserve_dots = MagicMock(return_value=False) + return agent + + def test_ephemeral_override_is_used_on_first_call(self): + """When _ephemeral_max_output_tokens is set, it overrides self.max_tokens.""" + agent = self._make_agent() + agent._ephemeral_max_output_tokens = 5_000 + + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["max_tokens"] == 5_000 + + def test_ephemeral_override_is_consumed_after_one_call(self): + """After one call the ephemeral override is cleared to None.""" + agent = self._make_agent() + agent._ephemeral_max_output_tokens = 5_000 + + agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert agent._ephemeral_max_output_tokens is None + + def test_subsequent_call_uses_self_max_tokens(self): + """A second _build_api_kwargs call uses the normal max_tokens path.""" + agent = self._make_agent() + agent._ephemeral_max_output_tokens = 5_000 + agent.max_tokens = None # will resolve to native ceiling (128K for Opus 4.6) + + agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + # Second call — ephemeral is gone + kwargs2 = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs2["max_tokens"] == 128_000 # Opus 4.6 native ceiling + + def test_no_ephemeral_uses_self_max_tokens_directly(self): + """Without an ephemeral override, self.max_tokens is used normally.""" + agent = self._make_agent() + agent.max_tokens = 8_192 + + kwargs = agent._build_api_kwargs([{"role": "user", "content": "hi"}]) + assert kwargs["max_tokens"] == 8_192 + + +# --------------------------------------------------------------------------- +# Integration: error handler does NOT halve context_length for output-cap errors +# --------------------------------------------------------------------------- + +class TestContextNotHalvedOnOutputCapError: + """When the API returns 'max_tokens too large given prompt', the handler + must set _ephemeral_max_output_tokens and NOT modify context_length. + """ + + def _make_agent_with_compressor(self, context_length=200_000): + from run_agent import AIAgent + from agent.context_compressor import ContextCompressor + + agent = object.__new__(AIAgent) + agent.api_mode = "anthropic_messages" + agent.model = "claude-opus-4-6" + agent.base_url = "https://api.anthropic.com" + agent.tools = [] + agent.max_tokens = None + agent.reasoning_config = None + agent._is_anthropic_oauth = False + agent._ephemeral_max_output_tokens = None + agent.log_prefix = "" + agent.quiet_mode = True + agent.verbose_logging = False + + compressor = MagicMock(spec=ContextCompressor) + compressor.context_length = context_length + compressor.threshold_percent = 0.75 + agent.context_compressor = compressor + + agent._prepare_anthropic_messages_for_api = MagicMock( + return_value=[{"role": "user", "content": "hi"}] + ) + agent._anthropic_preserve_dots = MagicMock(return_value=False) + agent._vprint = MagicMock() + return agent + + def test_output_cap_error_sets_ephemeral_not_context_length(self): + """On 'max_tokens too large' error, _ephemeral_max_output_tokens is set + and compressor.context_length is left unchanged.""" + from agent.model_metadata import parse_available_output_tokens_from_error + from agent.model_metadata import get_next_probe_tier + + error_msg = ( + "max_tokens: 128000 > context_window: 200000 " + "- input_tokens: 180000 = available_tokens: 20000" + ) + + # Simulate the handler logic from run_agent.py + agent = self._make_agent_with_compressor(context_length=200_000) + old_ctx = agent.context_compressor.context_length + + available_out = parse_available_output_tokens_from_error(error_msg) + assert available_out == 20_000, "parser must detect the error" + + # The fix: set ephemeral, skip context_length modification + agent._ephemeral_max_output_tokens = max(1, available_out - 64) + + # context_length must be untouched + assert agent.context_compressor.context_length == old_ctx + assert agent._ephemeral_max_output_tokens == 19_936 + + def test_prompt_too_long_still_triggers_probe_tier(self): + """Genuine prompt-too-long errors must still use get_next_probe_tier.""" + from agent.model_metadata import parse_available_output_tokens_from_error + from agent.model_metadata import get_next_probe_tier + + error_msg = "prompt is too long: 205000 tokens > 200000 maximum" + + available_out = parse_available_output_tokens_from_error(error_msg) + assert available_out is None, "prompt-too-long must not be caught by output-cap parser" + + # The old halving path is still used for this class of error + new_ctx = get_next_probe_tier(200_000) + assert new_ctx == 128_000 + + def test_output_cap_error_safety_margin(self): + """The ephemeral value includes a 64-token safety margin below available_out.""" + from agent.model_metadata import parse_available_output_tokens_from_error + + error_msg = ( + "max_tokens: 32768 > context_window: 200000 " + "- input_tokens: 190000 = available_tokens: 10000" + ) + available_out = parse_available_output_tokens_from_error(error_msg) + safe_out = max(1, available_out - 64) + assert safe_out == 9_936 + + def test_safety_margin_never_goes_below_one(self): + """When available_out is very small, safe_out must be at least 1.""" + from agent.model_metadata import parse_available_output_tokens_from_error + + error_msg = ( + "max_tokens: 10 > context_window: 200000 " + "- input_tokens: 199990 = available_tokens: 1" + ) + available_out = parse_available_output_tokens_from_error(error_msg) + safe_out = max(1, available_out - 64) + assert safe_out == 1 diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py index 7fe4c3d4c2..b1f128ccee 100644 --- a/tests/tools/test_browser_camofox_state.py +++ b/tests/tools/test_browser_camofox_state.py @@ -63,4 +63,4 @@ class TestCamofoxConfigDefaults: from hermes_cli.config import DEFAULT_CONFIG # managed_persistence is auto-merged by _deep_merge, no version bump needed - assert DEFAULT_CONFIG["_config_version"] == 12 + assert DEFAULT_CONFIG["_config_version"] == 13 diff --git a/tests/tools/test_docker_environment.py b/tests/tools/test_docker_environment.py index 498ef9d506..e19229a795 100644 --- a/tests/tools/test_docker_environment.py +++ b/tests/tools/test_docker_environment.py @@ -258,28 +258,30 @@ def _make_execute_only_env(forward_env=None): def test_init_env_args_uses_hermes_dotenv_for_allowlisted_env(monkeypatch): """_build_init_env_args picks up forwarded env vars from .env file at init time.""" - env = _make_execute_only_env(["GITHUB_TOKEN"]) + # Use a var that is NOT in _HERMES_PROVIDER_ENV_BLOCKLIST (GITHUB_TOKEN + # is in the copilot provider's api_key_env_vars and gets stripped). + env = _make_execute_only_env(["DATABASE_URL"]) - monkeypatch.delenv("GITHUB_TOKEN", raising=False) - monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"}) + monkeypatch.delenv("DATABASE_URL", raising=False) + monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"}) args = env._build_init_env_args() args_str = " ".join(args) - assert "GITHUB_TOKEN=value_from_dotenv" in args_str + assert "DATABASE_URL=value_from_dotenv" in args_str def test_init_env_args_prefers_shell_env_over_hermes_dotenv(monkeypatch): """Shell env vars take priority over .env file values in init env args.""" - env = _make_execute_only_env(["GITHUB_TOKEN"]) + env = _make_execute_only_env(["DATABASE_URL"]) - monkeypatch.setenv("GITHUB_TOKEN", "value_from_shell") - monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"GITHUB_TOKEN": "value_from_dotenv"}) + monkeypatch.setenv("DATABASE_URL", "value_from_shell") + monkeypatch.setattr(docker_env, "_load_hermes_env_vars", lambda: {"DATABASE_URL": "value_from_dotenv"}) args = env._build_init_env_args() args_str = " ".join(args) - assert "GITHUB_TOKEN=value_from_shell" in args_str + assert "DATABASE_URL=value_from_shell" in args_str assert "value_from_dotenv" not in args_str diff --git a/tests/tools/test_managed_server_tool_support.py b/tests/tools/test_managed_server_tool_support.py index 92cf83f5c4..5b917f3da8 100644 --- a/tests/tools/test_managed_server_tool_support.py +++ b/tests/tools/test_managed_server_tool_support.py @@ -147,7 +147,7 @@ class TestBaseEnvCompatibility: """Hermes wires parser selection through ServerManager.tool_parser.""" import ast - base_env_path = Path(__file__).parent.parent / "environments" / "hermes_base_env.py" + base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py" source = base_env_path.read_text() tree = ast.parse(source) @@ -171,7 +171,7 @@ class TestBaseEnvCompatibility: def test_hermes_base_env_uses_config_tool_call_parser(self): """Verify hermes_base_env uses the config field rather than a local parser instance.""" - base_env_path = Path(__file__).parent.parent / "environments" / "hermes_base_env.py" + base_env_path = Path(__file__).parent.parent.parent / "environments" / "hermes_base_env.py" source = base_env_path.read_text() assert 'tool_call_parser: str = Field(' in source diff --git a/tests/tools/test_send_message_missing_platforms.py b/tests/tools/test_send_message_missing_platforms.py index 881ae33d2b..a6741e16dc 100644 --- a/tests/tools/test_send_message_missing_platforms.py +++ b/tests/tools/test_send_message_missing_platforms.py @@ -125,7 +125,9 @@ class TestSendMatrix: url = call_kwargs[0][0] assert url.startswith("https://matrix.example.com/_matrix/client/v3/rooms/!room:example.com/send/m.room.message/") assert call_kwargs[1]["headers"]["Authorization"] == "Bearer syt_tok" - assert call_kwargs[1]["json"] == {"msgtype": "m.text", "body": "hello matrix"} + payload = call_kwargs[1]["json"] + assert payload["msgtype"] == "m.text" + assert payload["body"] == "hello matrix" def test_http_error(self): resp = _make_aiohttp_resp(403, text_data="Forbidden") diff --git a/tests/tools/test_vision_tools.py b/tests/tools/test_vision_tools.py index 97ee57a11a..6612f0e893 100644 --- a/tests/tools/test_vision_tools.py +++ b/tests/tools/test_vision_tools.py @@ -30,7 +30,10 @@ class TestValidateImageUrl: """Tests for URL validation, including urlparse-based netloc check.""" def test_valid_https_url(self): - assert _validate_image_url("https://example.com/image.jpg") is True + with patch("tools.url_safety.socket.getaddrinfo", return_value=[ + (2, 1, 6, "", ("93.184.216.34", 0)), + ]): + assert _validate_image_url("https://example.com/image.jpg") is True def test_valid_http_url(self): with patch("tools.url_safety.socket.getaddrinfo", return_value=[ @@ -56,10 +59,16 @@ class TestValidateImageUrl: assert _validate_image_url("http://localhost:8080/image.png") is False def test_valid_url_with_port(self): - assert _validate_image_url("http://example.com:8080/image.png") is True + with patch("tools.url_safety.socket.getaddrinfo", return_value=[ + (2, 1, 6, "", ("93.184.216.34", 0)), + ]): + assert _validate_image_url("http://example.com:8080/image.png") is True def test_valid_url_with_path_only(self): - assert _validate_image_url("https://example.com/") is True + with patch("tools.url_safety.socket.getaddrinfo", return_value=[ + (2, 1, 6, "", ("93.184.216.34", 0)), + ]): + assert _validate_image_url("https://example.com/") is True def test_rejects_empty_string(self): assert _validate_image_url("") is False @@ -441,6 +450,11 @@ class TestVisionRequirements: (tmp_path / "auth.json").write_text( '{"active_provider":"openai-codex","providers":{"openai-codex":{"tokens":{"access_token":"codex-access-token","refresh_token":"codex-refresh-token"}}}}' ) + # config.yaml must reference the codex provider so vision auto-detect + # falls back to the active provider via _read_main_provider(). + (tmp_path / "config.yaml").write_text( + 'model:\n default: gpt-4o\n provider: openai-codex\n' + ) monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) monkeypatch.delenv("OPENAI_BASE_URL", raising=False) monkeypatch.delenv("OPENAI_API_KEY", raising=False) diff --git a/tests/tools/test_web_tools_tavily.py b/tests/tools/test_web_tools_tavily.py index 2e49b72f16..aef39e8e16 100644 --- a/tests/tools/test_web_tools_tavily.py +++ b/tests/tools/test_web_tools_tavily.py @@ -225,6 +225,7 @@ class TestWebCrawlTavily: patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \ patch("tools.web_tools.httpx.post", return_value=mock_response), \ patch("tools.web_tools.check_website_access", return_value=None), \ + patch("tools.web_tools.is_safe_url", return_value=True), \ patch("tools.interrupt.is_interrupted", return_value=False): from tools.web_tools import web_crawl_tool result = json.loads(asyncio.get_event_loop().run_until_complete( @@ -244,6 +245,7 @@ class TestWebCrawlTavily: patch.dict(os.environ, {"TAVILY_API_KEY": "tvly-test"}), \ patch("tools.web_tools.httpx.post", return_value=mock_response) as mock_post, \ patch("tools.web_tools.check_website_access", return_value=None), \ + patch("tools.web_tools.is_safe_url", return_value=True), \ patch("tools.interrupt.is_interrupted", return_value=False): from tools.web_tools import web_crawl_tool asyncio.get_event_loop().run_until_complete( diff --git a/website/docs/getting-started/nix-setup.md b/website/docs/getting-started/nix-setup.md index 8bd1924053..4db4939868 100644 --- a/website/docs/getting-started/nix-setup.md +++ b/website/docs/getting-started/nix-setup.md @@ -74,7 +74,7 @@ This module requires NixOS. For non-NixOS systems (macOS, other Linux distros), # /etc/nixos/flake.nix (or your system flake) { inputs = { - nixpkgs.url = "github:NixOS/nixpkgs/nixos-24.11"; + nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; hermes-agent.url = "github:NousResearch/hermes-agent"; }; diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index fbfa69ade6..133990b442 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -657,8 +657,8 @@ model: #### Responses get cut off mid-sentence **Possible causes:** -1. **Low `max_tokens` on the server** — SGLang defaults to 128 tokens per response. Set `--default-max-tokens` on the server or configure Hermes with `model.max_tokens` in config.yaml. -2. **Context exhaustion** — The model filled its context window. Increase context length or enable [context compression](/docs/user-guide/configuration#context-compression) in Hermes. +1. **Low output cap (`max_tokens`) on the server** — SGLang defaults to 128 tokens per response. Set `--default-max-tokens` on the server or configure Hermes with `model.max_tokens` in config.yaml. Note: `max_tokens` controls response length only — it is unrelated to how long your conversation history can be (that is `context_length`). +2. **Context exhaustion** — The model filled its context window. Increase `model.context_length` or enable [context compression](/docs/user-guide/configuration#context-compression) in Hermes. --- @@ -751,6 +751,15 @@ model: ### Context Length Detection +:::note Two settings, easy to confuse +**`context_length`** is the **total context window** — the combined budget for input *and* output tokens (e.g. 200,000 for Claude Opus 4.6). Hermes uses this to decide when to compress history and to validate API requests. + +**`model.max_tokens`** is the **output cap** — the maximum number of tokens the model may generate in a *single response*. It has nothing to do with how long your conversation history can be. The industry-standard name `max_tokens` is a common source of confusion; Anthropic's native API has since renamed it `max_output_tokens` for clarity. + +Set `context_length` when auto-detection gets the window size wrong. +Set `model.max_tokens` only when you need to limit how long individual responses can be. +::: + Hermes uses a multi-source resolution chain to detect the correct context window for your model and provider: 1. **Config override** — `model.context_length` in config.yaml (highest priority) diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 89a30c46b6..a695d8dc12 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -46,7 +46,6 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/config` | Show current configuration | | `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint) | | `/provider` | Show available providers and current provider | -| `/prompt` | View/set custom system prompt | | `/personality` | Set a predefined personality | | `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. | | `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) | @@ -144,7 +143,7 @@ The messaging gateway supports the following built-in commands inside Telegram, ## Notes -- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/prompt`, `/cron`, `/skills`, `/platforms`, `/paste`, `/statusbar`, and `/plugins` are **CLI-only** commands. +- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/statusbar`, and `/plugins` are **CLI-only** commands. - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config. - `/status`, `/sethome`, `/update`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands. - `/background`, `/voice`, `/reload-mcp`, `/rollback`, and `/yolo` work in **both** the CLI and the messaging gateway.