diff --git a/agent/credential_pool.py b/agent/credential_pool.py index f6cb24dd6b..4f1395d17f 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -14,6 +14,7 @@ from datetime import datetime from typing import Any, Dict, List, Optional, Set, Tuple from hermes_constants import OPENROUTER_BASE_URL +from hermes_cli.config import get_env_value import hermes_cli.auth as auth_mod from hermes_cli.auth import ( CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS, @@ -1273,7 +1274,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool def _is_source_suppressed(_p, _s): # type: ignore[misc] return False if provider == "openrouter": - token = os.getenv("OPENROUTER_API_KEY", "").strip() + # Check both os.environ and ~/.hermes/.env file + token = (get_env_value("OPENROUTER_API_KEY") or "").strip() if token: source = "env:OPENROUTER_API_KEY" if _is_source_suppressed(provider, source): @@ -1299,7 +1301,7 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool env_url = "" if pconfig.base_url_env_var: - env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/") + env_url = (get_env_value(pconfig.base_url_env_var) or "").strip().rstrip("/") env_vars = list(pconfig.api_key_env_vars) if provider == "anthropic": @@ -1310,7 +1312,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool ] for env_var in env_vars: - token = os.getenv(env_var, "").strip() + # Check both os.environ and ~/.hermes/.env file + token = (get_env_value(env_var) or "").strip() if not token: continue source = f"env:{env_var}" diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 29d5e1e89b..62c18218b1 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -145,10 +145,11 @@ DEFAULT_CONTEXT_LENGTHS = { "claude": 200000, # OpenAI — GPT-5 family (most have 400k; specific overrides first) # Source: https://developers.openai.com/api/docs/models - # GPT-5.5 (launched Apr 23 2026). 400k is the fallback for providers we - # can't probe live. ChatGPT Codex OAuth actually caps lower (272k as of - # Apr 2026) and is resolved via _resolve_codex_oauth_context_length(). - "gpt-5.5": 400000, + # GPT-5.5 (launched Apr 23 2026) is 1.05M on the direct OpenAI API and + # ChatGPT Codex OAuth caps it at 272K; both paths resolve via their own + # provider-aware branches (_resolve_codex_oauth_context_length + models.dev). + # This hardcoded value is only reached when every probe misses. + "gpt-5.5": 1050000, "gpt-5.4-nano": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4-mini": 400000, # 400k (not 1.05M like full 5.4) "gpt-5.4": 1050000, # GPT-5.4, GPT-5.4 Pro (1.05M context) @@ -164,7 +165,17 @@ DEFAULT_CONTEXT_LENGTHS = { "gemma-4-31b": 256000, "gemma-3": 131072, "gemma": 8192, # fallback for older gemma models - # DeepSeek + # DeepSeek — V4 family ships with a 1M context window. The legacy + # aliases ``deepseek-chat`` / ``deepseek-reasoner`` are server-side + # mapped to the non-thinking / thinking modes of ``deepseek-v4-flash`` + # and inherit the same 1M window. The ``deepseek`` substring entry + # below remains as a 128K fallback for older / unknown DeepSeek model + # ids (e.g. via custom endpoints). + # https://api-docs.deepseek.com/zh-cn/quick_start/pricing + "deepseek-v4-pro": 1_000_000, + "deepseek-v4-flash": 1_000_000, + "deepseek-chat": 1_000_000, + "deepseek-reasoner": 1_000_000, "deepseek": 128000, # Meta "llama": 131072, diff --git a/agent/nous_rate_guard.py b/agent/nous_rate_guard.py index 712d8a0f1f..ea866f2e08 100644 --- a/agent/nous_rate_guard.py +++ b/agent/nous_rate_guard.py @@ -180,3 +180,145 @@ def format_remaining(seconds: float) -> str: h, remainder = divmod(s, 3600) m = remainder // 60 return f"{h}h {m}m" if m else f"{h}h" + + +# Buckets with reset windows shorter than this are treated as transient +# (upstream jitter, secondary throttling) rather than a genuine quota +# exhaustion worth a cross-session breaker trip. +_MIN_RESET_FOR_BREAKER_SECONDS = 60.0 + + +def is_genuine_nous_rate_limit( + *, + headers: Optional[Mapping[str, str]] = None, + last_known_state: Optional[Any] = None, +) -> bool: + """Decide whether a 429 from Nous Portal is a real account rate limit. + + Nous Portal multiplexes multiple upstream providers (DeepSeek, Kimi, + MiMo, Hermes, ...) behind one endpoint. A 429 can mean either: + + (a) The caller's own RPM / RPH / TPM / TPH bucket on Nous is + exhausted — a genuine rate limit that will last until the + bucket resets. + (b) The upstream provider is out of capacity for a specific model + — transient, clears in seconds, and has nothing to do with + the caller's quota on Nous. + + Tripping the cross-session breaker on (b) blocks ALL Nous requests + (and all models, since Nous is one provider key) for minutes even + though the caller's account is healthy and a different model would + have worked. That's the bug users hit when DeepSeek V4 Pro 429s + trigger a breaker that then blocks Kimi 2.6 and MiMo V2.5 Pro. + + We tell the two apart by looking at: + + 1. The 429 response's own ``x-ratelimit-*`` headers. Nous emits + the full suite on every response including 429s. An exhausted + bucket (``remaining == 0`` with a reset window >= 60s) is + proof of (a). + 2. The last-known-good rate-limit state captured by + ``_capture_rate_limits()`` on the previous successful + response. If any bucket there was already near-exhausted with + a substantial reset window, the current 429 is almost + certainly (a) continuing from that condition. + + If neither signal fires, we treat the 429 as (b): fail the single + request, let the retry loop or model-switch proceed, and do NOT + write the cross-session breaker file. + + Returns True when the evidence points at (a). + """ + # Signal 1: current 429 response headers. + state = _parse_buckets_from_headers(headers) + if _has_exhausted_bucket(state): + return True + + # Signal 2: last-known-good state from a recent successful response. + # Accepts either a RateLimitState (dataclass from rate_limit_tracker) + # or a dict of bucket snapshots. + if last_known_state is not None and _has_exhausted_bucket_in_object(last_known_state): + return True + + return False + + +def _parse_buckets_from_headers( + headers: Optional[Mapping[str, str]], +) -> dict[str, tuple[Optional[int], Optional[float]]]: + """Extract (remaining, reset_seconds) per bucket from x-ratelimit-* headers. + + Returns empty dict when no rate-limit headers are present. + """ + if not headers: + return {} + + lowered = {k.lower(): v for k, v in headers.items()} + if not any(k.startswith("x-ratelimit-") for k in lowered): + return {} + + def _maybe_int(raw: Optional[str]) -> Optional[int]: + if raw is None: + return None + try: + return int(float(raw)) + except (TypeError, ValueError): + return None + + def _maybe_float(raw: Optional[str]) -> Optional[float]: + if raw is None: + return None + try: + return float(raw) + except (TypeError, ValueError): + return None + + result: dict[str, tuple[Optional[int], Optional[float]]] = {} + for tag in ("requests", "requests-1h", "tokens", "tokens-1h"): + remaining = _maybe_int(lowered.get(f"x-ratelimit-remaining-{tag}")) + reset = _maybe_float(lowered.get(f"x-ratelimit-reset-{tag}")) + if remaining is not None or reset is not None: + result[tag] = (remaining, reset) + return result + + +def _has_exhausted_bucket( + buckets: Mapping[str, tuple[Optional[int], Optional[float]]], +) -> bool: + """Return True when any bucket has remaining == 0 AND a meaningful reset window.""" + for remaining, reset in buckets.values(): + if remaining is None or remaining > 0: + continue + if reset is None: + continue + if reset >= _MIN_RESET_FOR_BREAKER_SECONDS: + return True + return False + + +def _has_exhausted_bucket_in_object(state: Any) -> bool: + """Check a RateLimitState-like object for an exhausted bucket. + + Accepts the dataclass from ``agent.rate_limit_tracker`` (buckets + exposed as attributes ``requests_min``, ``requests_hour``, + ``tokens_min``, ``tokens_hour``) and falls back gracefully for any + object missing those attributes. + """ + for attr in ("requests_min", "requests_hour", "tokens_min", "tokens_hour"): + bucket = getattr(state, attr, None) + if bucket is None: + continue + limit = getattr(bucket, "limit", 0) or 0 + remaining = getattr(bucket, "remaining", 0) or 0 + # Prefer the adjusted "remaining_seconds_now" property when present; + # fall back to raw reset_seconds. + reset = getattr(bucket, "remaining_seconds_now", None) + if reset is None: + reset = getattr(bucket, "reset_seconds", 0.0) or 0.0 + if limit <= 0: + continue + if remaining > 0: + continue + if reset >= _MIN_RESET_FOR_BREAKER_SECONDS: + return True + return False diff --git a/agent/onboarding.py b/agent/onboarding.py new file mode 100644 index 0000000000..eed832ab90 --- /dev/null +++ b/agent/onboarding.py @@ -0,0 +1,144 @@ +""" +Contextual first-touch onboarding hints. + +Instead of blocking first-run questionnaires, show a one-time hint the *first* +time a user hits a behavior fork — message-while-running, first long-running +tool, etc. Each hint is shown once per install (tracked in ``config.yaml`` under +``onboarding.seen.``) and then never again. + +Keep this module tiny and dependency-free so both the CLI and gateway can import +it without pulling in heavy modules. +""" + +from __future__ import annotations + +import logging +from pathlib import Path +from typing import Any, Mapping, Optional + +logger = logging.getLogger(__name__) + + +# ------------------------------------------------------------------------- +# Flag names (stable — used as config.yaml keys under onboarding.seen) +# ------------------------------------------------------------------------- + +BUSY_INPUT_FLAG = "busy_input_prompt" +TOOL_PROGRESS_FLAG = "tool_progress_prompt" + + +# ------------------------------------------------------------------------- +# Hint content +# ------------------------------------------------------------------------- + +def busy_input_hint_gateway(mode: str) -> str: + """Hint shown the first time a user messages while the agent is busy. + + ``mode`` is the effective busy_input_mode that was just applied, so the + message matches reality ("I just interrupted…" vs "I just queued…"). + """ + if mode == "queue": + return ( + "💡 First-time tip — I queued your message instead of interrupting. " + "Send `/busy interrupt` to make new messages stop the current task " + "immediately, or `/busy status` to check. This notice won't appear again." + ) + return ( + "💡 First-time tip — I just interrupted my current task to answer you. " + "Send `/busy queue` to queue follow-ups for after the current task instead, " + "or `/busy status` to check. This notice won't appear again." + ) + + +def busy_input_hint_cli(mode: str) -> str: + """CLI version of the busy-input hint (plain text, no markdown).""" + if mode == "queue": + return ( + "(tip) Your message was queued for the next turn. " + "Use /busy interrupt to make Enter stop the current run instead. " + "This tip only shows once." + ) + return ( + "(tip) Your message interrupted the current run. " + "Use /busy queue to queue messages for the next turn instead. " + "This tip only shows once." + ) + + +def tool_progress_hint_gateway() -> str: + return ( + "💡 First-time tip — that tool took a while and I'm streaming every step. " + "If the progress messages feel noisy, send `/verbose` to cycle modes " + "(all → new → off). This notice won't appear again." + ) + + +def tool_progress_hint_cli() -> str: + return ( + "(tip) That tool ran for a while. Use /verbose to cycle tool-progress " + "display modes (all -> new -> off -> verbose). This tip only shows once." + ) + + +# ------------------------------------------------------------------------- +# State read / write +# ------------------------------------------------------------------------- + +def _get_seen_dict(config: Mapping[str, Any]) -> Mapping[str, Any]: + onboarding = config.get("onboarding") if isinstance(config, Mapping) else None + if not isinstance(onboarding, Mapping): + return {} + seen = onboarding.get("seen") + return seen if isinstance(seen, Mapping) else {} + + +def is_seen(config: Mapping[str, Any], flag: str) -> bool: + """Return True if the user has already been shown this first-touch hint.""" + return bool(_get_seen_dict(config).get(flag)) + + +def mark_seen(config_path: Path, flag: str) -> bool: + """Persist ``onboarding.seen. = True`` to ``config_path``. + + Uses the atomic YAML writer so a concurrent process can't observe a + partially-written file. Returns True on success, False on any error + (including the config file being absent — onboarding is best-effort). + """ + try: + import yaml + from utils import atomic_yaml_write + except Exception as e: # pragma: no cover — dependency issue + logger.debug("onboarding: failed to import yaml/utils: %s", e) + return False + + try: + cfg: dict = {} + if config_path.exists(): + with open(config_path, encoding="utf-8") as f: + cfg = yaml.safe_load(f) or {} + if not isinstance(cfg.get("onboarding"), dict): + cfg["onboarding"] = {} + seen = cfg["onboarding"].get("seen") + if not isinstance(seen, dict): + seen = {} + cfg["onboarding"]["seen"] = seen + if seen.get(flag) is True: + return True # already marked — nothing to do + seen[flag] = True + atomic_yaml_write(config_path, cfg) + return True + except Exception as e: + logger.debug("onboarding: failed to mark flag %s: %s", flag, e) + return False + + +__all__ = [ + "BUSY_INPUT_FLAG", + "TOOL_PROGRESS_FLAG", + "busy_input_hint_gateway", + "busy_input_hint_cli", + "tool_progress_hint_gateway", + "tool_progress_hint_cli", + "is_seen", + "mark_seen", +] diff --git a/agent/skill_commands.py b/agent/skill_commands.py index 6b73e83b3e..19c9b06c6c 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -329,7 +329,7 @@ def build_skill_invocation_message( loaded_skill, skill_dir, skill_name = loaded activation_note = ( - f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want ' + f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want ' "you to follow its instructions. The full skill content is loaded below.]" ) return _build_skill_message( @@ -368,7 +368,7 @@ def build_preloaded_skills_prompt( loaded_skill, skill_dir, skill_name = loaded activation_note = ( - f'[SYSTEM: The user launched this CLI session with the "{skill_name}" skill ' + f'[IMPORTANT: The user launched this CLI session with the "{skill_name}" skill ' "preloaded. Treat its instructions as active guidance for the duration of this " "session unless the user overrides them.]" ) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index 90d98490c5..56090dca8b 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -824,7 +824,9 @@ delegation: # Display # ============================================================================= display: - # Use compact banner mode + # Use compact banner mode (hides the ASCII-art banner, shows a single line). + # true: Compact single-line banner + # false: Full ASCII banner with tool/skill summary (default) compact: false # Tool progress display level (CLI and gateway) @@ -838,12 +840,15 @@ display: # Gateway-only natural mid-turn assistant updates. # When true, completed assistant status messages are sent as separate chat # messages. This is independent of tool_progress and gateway streaming. + # true: Send mid-turn assistant updates as separate messages (default) + # false: Only send the final response interim_assistant_messages: true - # What Enter does when Hermes is already busy in the CLI. + # What Enter does when Hermes is already busy (CLI and gateway platforms). # interrupt: Interrupt the current run and redirect Hermes (default) # queue: Queue your message for the next turn - # Ctrl+C always interrupts regardless of this setting. + # Ctrl+C (or /stop in gateway) always interrupts regardless of this setting. + # Toggle at runtime with /busy_input_mode . busy_input_mode: interrupt # Background process notifications (gateway/messaging only). @@ -859,17 +864,22 @@ display: # Play terminal bell when agent finishes a response. # Useful for long-running tasks — your terminal will ding when the agent is done. # Works over SSH. Most terminals can be configured to flash the taskbar or play a sound. + # true: Ring the terminal bell on each response + # false: Silent (default) bell_on_complete: false # Show model reasoning/thinking before each response. # When enabled, a dim box shows the model's thought process above the response. # Toggle at runtime with /reasoning show or /reasoning hide. + # true: Show the reasoning box + # false: Hide reasoning (default) show_reasoning: false # Stream tokens to the terminal as they arrive instead of waiting for the # full response. The response box opens on first token and text appears # line-by-line. Tool calls are still captured silently. - # Stream tokens to the terminal in real-time. Disable to wait for full responses. + # true: Stream tokens as they arrive (default) + # false: Wait for the full response before rendering streaming: true # ─────────────────────────────────────────────────────────────────────────── @@ -879,10 +889,15 @@ display: # response box label, and branding text. Change at runtime with /skin . # # Built-in skins: - # default — Classic Hermes gold/kawaii - # ares — Crimson/bronze war-god theme with spinner wings - # mono — Clean grayscale monochrome - # slate — Cool blue developer-focused + # default — Classic Hermes gold/kawaii + # ares — Crimson/bronze war-god theme with spinner wings + # mono — Clean grayscale monochrome + # slate — Cool blue developer-focused + # daylight — Bright light-mode theme + # warm-lightmode — Warm paper-tone light-mode theme + # poseidon — Sea-green/teal Olympian theme + # sisyphus — Earthy stone-and-moss theme + # charizard — Fiery orange dragon theme # # Custom skins: drop a YAML file in ~/.hermes/skins/.yaml # Schema (all fields optional, missing values inherit from default): diff --git a/cli.py b/cli.py index 9f3e8964c4..60103bf956 100644 --- a/cli.py +++ b/cli.py @@ -417,6 +417,11 @@ def load_cli_config() -> Dict[str, Any]: "base_url": "", # Direct OpenAI-compatible endpoint for subagents "api_key": "", # API key for delegation.base_url (falls back to OPENAI_API_KEY) }, + "onboarding": { + # First-touch hint flags (see agent/onboarding.py). Each hint is + # shown once per install then latched here. + "seen": {}, + }, } # Track whether the config file explicitly set terminal config. @@ -1373,7 +1378,7 @@ def _resolve_attachment_path(raw_path: str) -> Path | None: def _format_process_notification(evt: dict) -> "str | None": - """Format a process notification event into a [SYSTEM: ...] message. + """Format a process notification event into a [IMPORTANT: ...] message. Handles both completion events (notify_on_complete) and watch pattern match events from the unified completion_queue. @@ -1383,14 +1388,14 @@ def _format_process_notification(evt: dict) -> "str | None": _cmd = evt.get("command", "unknown") if evt_type == "watch_disabled": - return f"[SYSTEM: {evt.get('message', '')}]" + return f"[IMPORTANT: {evt.get('message', '')}]" if evt_type == "watch_match": _pat = evt.get("pattern", "?") _out = evt.get("output", "") _sup = evt.get("suppressed", 0) text = ( - f"[SYSTEM: Background process {_sid} matched " + f"[IMPORTANT: Background process {_sid} matched " f"watch pattern \"{_pat}\".\n" f"Command: {_cmd}\n" f"Matched output:\n{_out}" @@ -1404,7 +1409,7 @@ def _format_process_notification(evt: dict) -> "str | None": _exit = evt.get("exit_code", "?") _out = evt.get("output", "") return ( - f"[SYSTEM: Background process {_sid} completed " + f"[IMPORTANT: Background process {_sid} completed " f"(exit code {_exit}).\n" f"Command: {_cmd}\n" f"Output:\n{_out}]" @@ -4910,6 +4915,12 @@ class HermesCLI: if self.agent: self.agent.session_id = new_session_id self.agent.session_start = now + # Redirect the JSON session log to the new branch session file so + # messages written after branching land in the correct file. + if hasattr(self.agent, "session_log_file") and hasattr(self.agent, "logs_dir"): + self.agent.session_log_file = ( + self.agent.logs_dir / f"session_{new_session_id}.json" + ) self.agent.reset_session_state() if hasattr(self.agent, "_last_flushed_db_idx"): self.agent._last_flushed_db_idx = len(self.conversation_history) @@ -5153,27 +5164,29 @@ class HermesCLI: _cprint(f" ✓ Model switched: {result.new_model}") _cprint(f" Provider: {provider_label}") + # Context: always resolve via the provider-aware chain so Codex OAuth, + # Copilot, and Nous-enforced caps win over the raw models.dev entry + # (e.g. gpt-5.5 is 1.05M on openai but 272K on Codex OAuth). mi = result.model_info + try: + from hermes_cli.model_switch import resolve_display_context_length + ctx = resolve_display_context_length( + result.new_model, + result.target_provider, + base_url=result.base_url or self.base_url or "", + api_key=result.api_key or self.api_key or "", + model_info=mi, + ) + if ctx: + _cprint(f" Context: {ctx:,} tokens") + except Exception: + pass if mi: - if mi.context_window: - _cprint(f" Context: {mi.context_window:,} tokens") if mi.max_output: _cprint(f" Max output: {mi.max_output:,} tokens") if mi.has_cost_data(): _cprint(f" Cost: {mi.format_cost()}") _cprint(f" Capabilities: {mi.format_capabilities()}") - else: - try: - from agent.model_metadata import get_model_context_length - ctx = get_model_context_length( - result.new_model, - base_url=result.base_url or self.base_url, - api_key=result.api_key or self.api_key, - provider=result.target_provider, - ) - _cprint(f" Context: {ctx:,} tokens") - except Exception: - pass cache_enabled = ( (base_url_host_matches(result.base_url or "", "openrouter.ai") and "claude" in result.new_model.lower()) @@ -6122,8 +6135,6 @@ class HermesCLI: self._handle_agents_command() elif canonical == "background": self._handle_background_command(cmd_original) - elif canonical == "btw": - self._handle_btw_command(cmd_original) elif canonical == "queue": # Extract prompt after "/queue " or "/q " parts = cmd_original.split(None, 1) @@ -6410,122 +6421,6 @@ class HermesCLI: self._background_tasks[task_id] = thread thread.start() - def _handle_btw_command(self, cmd: str): - """Handle /btw — ephemeral side question using session context. - - Snapshots the current conversation history, spawns a no-tools agent in - a background thread, and prints the answer without persisting anything - to the main session. - """ - parts = cmd.strip().split(maxsplit=1) - if len(parts) < 2 or not parts[1].strip(): - _cprint(" Usage: /btw ") - _cprint(" Example: /btw what module owns session title sanitization?") - _cprint(" Answers using session context. No tools, not persisted.") - return - - question = parts[1].strip() - task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{uuid.uuid4().hex[:6]}" - - if not self._ensure_runtime_credentials(): - _cprint(" (>_<) Cannot start /btw: no valid credentials.") - return - - turn_route = self._resolve_turn_agent_config(question) - history_snapshot = list(self.conversation_history) - - preview = question[:60] + ("..." if len(question) > 60 else "") - _cprint(f' 💬 /btw: "{preview}"') - - def run_btw(): - try: - btw_agent = AIAgent( - model=turn_route["model"], - api_key=turn_route["runtime"].get("api_key"), - base_url=turn_route["runtime"].get("base_url"), - provider=turn_route["runtime"].get("provider"), - api_mode=turn_route["runtime"].get("api_mode"), - acp_command=turn_route["runtime"].get("command"), - acp_args=turn_route["runtime"].get("args"), - max_iterations=8, - enabled_toolsets=[], - quiet_mode=True, - verbose_logging=False, - session_id=task_id, - platform="cli", - reasoning_config=self.reasoning_config, - service_tier=self.service_tier, - request_overrides=turn_route.get("request_overrides"), - providers_allowed=self._providers_only, - providers_ignored=self._providers_ignore, - providers_order=self._providers_order, - provider_sort=self._provider_sort, - provider_require_parameters=self._provider_require_params, - provider_data_collection=self._provider_data_collection, - fallback_model=self._fallback_model, - session_db=None, - skip_memory=True, - skip_context_files=True, - persist_session=False, - ) - - btw_prompt = ( - "[Ephemeral /btw side question. Answer using the conversation " - "context. No tools available. Be direct and concise.]\n\n" - + question - ) - result = btw_agent.run_conversation( - user_message=btw_prompt, - conversation_history=history_snapshot, - task_id=task_id, - ) - - response = (result.get("final_response") or "") if result else "" - if not response and result and result.get("error"): - response = f"Error: {result['error']}" - - # TUI refresh before printing - if self._app: - self._app.invalidate() - time.sleep(0.05) - print() - - if response: - try: - from hermes_cli.skin_engine import get_active_skin - _skin = get_active_skin() - _resp_color = _skin.get_color("response_border", "#4F6D4A") - except Exception: - _resp_color = "#4F6D4A" - - ChatConsole().print(Panel( - _render_final_assistant_content(response, mode=self.final_response_markdown), - title=f"[{_resp_color} bold]⚕ /btw[/]", - title_align="left", - border_style=_resp_color, - box=rich_box.HORIZONTALS, - padding=(1, 4), - )) - else: - _cprint(" 💬 /btw: (no response)") - - if self.bell_on_complete: - sys.stdout.write("\a") - sys.stdout.flush() - - except Exception as e: - if self._app: - self._app.invalidate() - time.sleep(0.05) - print() - _cprint(f" ❌ /btw failed: {e}") - finally: - if self._app: - self._invalidate(min_interval=0) - - thread = threading.Thread(target=run_btw, daemon=True, name=f"btw-{task_id}") - thread.start() - @staticmethod def _try_launch_chrome_debug(port: int, system: str) -> bool: """Try to launch Chrome/Chromium with remote debugging enabled. @@ -7328,7 +7223,7 @@ class HermesCLI: change_detail = ". ".join(change_parts) + ". " if change_parts else "" self.conversation_history.append({ "role": "user", - "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]", + "content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]", }) # Persist session immediately so the session log reflects the @@ -7410,6 +7305,31 @@ class HermesCLI: _cprint(f" {line}") except Exception: pass + # First-touch onboarding: on the first tool in this process + # that takes longer than the threshold while we're in the + # noisiest progress mode, print a one-time hint about + # /verbose. Latched on self so it fires at most once per + # process; persisted to config.yaml so it never fires again + # across processes either. + try: + if ( + not getattr(self, "_long_tool_hint_fired", False) + and self.tool_progress_mode == "all" + and duration >= 30.0 + ): + from agent.onboarding import ( + TOOL_PROGRESS_FLAG, + is_seen, + mark_seen, + tool_progress_hint_cli, + ) + if not is_seen(CLI_CONFIG, TOOL_PROGRESS_FLAG): + self._long_tool_hint_fired = True + _cprint(f" {_DIM}{tool_progress_hint_cli()}{_RST}") + mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG) + CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[TOOL_PROGRESS_FLAG] = True + except Exception: + pass self._invalidate() return if event_type != "tool.started": @@ -9293,6 +9213,24 @@ class HermesCLI: f"agent_running={self._agent_running}\n") except Exception: pass + # First-touch onboarding: on the very first busy-while-running + # event for this install, print a one-line tip explaining the + # /busy knob. Flag persists to config.yaml and never fires + # again. Guarded for exceptions so onboarding can't break + # the input loop. + try: + from agent.onboarding import ( + BUSY_INPUT_FLAG, + busy_input_hint_cli, + is_seen, + mark_seen, + ) + if not is_seen(CLI_CONFIG, BUSY_INPUT_FLAG): + _cprint(f" {_DIM}{busy_input_hint_cli(self.busy_input_mode)}{_RST}") + mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG) + CLI_CONFIG.setdefault("onboarding", {}).setdefault("seen", {})[BUSY_INPUT_FLAG] = True + except Exception: + pass else: self._pending_input.put(payload) event.app.current_buffer.reset(append_to_history=True) @@ -9909,7 +9847,7 @@ class HermesCLI: status = cli_ref._command_status or "Processing command..." return f"{frame} {status}" if cli_ref._agent_running: - return "type a message + Enter to interrupt, Ctrl+C to cancel" + return "msg=interrupt · /queue · /bg · /steer · Ctrl+C cancel" if cli_ref._voice_mode: return "type or Ctrl+B to record" return "" diff --git a/cron/scheduler.py b/cron/scheduler.py index 32b351aa04..2ca012ea05 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -715,7 +715,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: # Always prepend cron execution guidance so the agent knows how # delivery works and can suppress delivery when appropriate. cron_hint = ( - "[SYSTEM: You are running as a scheduled cron job. " + "[IMPORTANT: You are running as a scheduled cron job. " "DELIVERY: Your final response will be automatically delivered " "to the user — do NOT use send_message or try to deliver " "the output yourself. Just produce your report/output as your " @@ -751,7 +751,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: parts.append("") parts.extend( [ - f'[SYSTEM: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]', + f'[IMPORTANT: The user has invoked the "{skill_name}" skill, indicating they want you to follow its instructions. The full skill content is loaded below.]', "", content, ] @@ -759,7 +759,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: if skipped: notice = ( - f"[SYSTEM: The following skill(s) were listed for this job but could not be found " + f"[IMPORTANT: The following skill(s) were listed for this job but could not be found " f"and were skipped: {', '.join(skipped)}. " f"Start your response with a brief notice so the user is aware, e.g.: " f"'⚠️ Skill(s) not found and skipped: {', '.join(skipped)}']" diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 0be1d656c2..299aab97a2 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -41,6 +41,15 @@ if [ "$(id -u)" = "0" ]; then echo "Warning: chown failed (rootless container?) — continuing anyway" fi + # Ensure config.yaml is readable by the hermes runtime user even if it was + # edited on the host after initial ownership setup. Must run here (as root) + # rather than after the gosu drop, otherwise a non-root caller like + # `docker run -u $(id -u):$(id -g)` hits "Operation not permitted" (#15865). + if [ -f "$HERMES_HOME/config.yaml" ]; then + chown hermes:hermes "$HERMES_HOME/config.yaml" 2>/dev/null || true + chmod 640 "$HERMES_HOME/config.yaml" 2>/dev/null || true + fi + echo "Dropping root privileges" exec gosu hermes "$0" "$@" fi @@ -67,13 +76,6 @@ if [ ! -f "$HERMES_HOME/config.yaml" ]; then cp "$INSTALL_DIR/cli-config.yaml.example" "$HERMES_HOME/config.yaml" fi -# Ensure the main config file remains accessible to the hermes runtime user -# even if it was edited on the host after initial ownership setup. -if [ -f "$HERMES_HOME/config.yaml" ]; then - chown hermes:hermes "$HERMES_HOME/config.yaml" - chmod 640 "$HERMES_HOME/config.yaml" -fi - # SOUL.md if [ ! -f "$HERMES_HOME/SOUL.md" ]; then cp "$INSTALL_DIR/docker/SOUL.md" "$HERMES_HOME/SOUL.md" diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 2732513854..8cb4f7c0eb 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1025,7 +1025,20 @@ class BasePlatformAdapter(ABC): self._post_delivery_callbacks: Dict[str, Any] = {} self._expected_cancelled_tasks: set[asyncio.Task] = set() self._busy_session_handler: Optional[Callable[[MessageEvent, str], Awaitable[bool]]] = None - # Chats where auto-TTS on voice input is disabled (set by /voice off) + # Auto-TTS on voice input: ``_auto_tts_default`` is the global default + # (``voice.auto_tts`` in config.yaml, pushed by GatewayRunner on connect). + # Per-chat overrides live in two sets populated from ``_voice_mode``: + # - ``_auto_tts_enabled_chats``: chat explicitly opted in via ``/voice on`` + # or ``/voice tts`` (mode is ``voice_only`` or ``all``). Fires even when + # the global default is False. + # - ``_auto_tts_disabled_chats``: chat explicitly opted out via + # ``/voice off`` (mode is ``off``). Suppresses auto-TTS even when the + # global default is True. + # The gate in _process_message() is: + # fire if chat in _auto_tts_enabled_chats + # OR (_auto_tts_default and chat not in _auto_tts_disabled_chats) + self._auto_tts_default: bool = False + self._auto_tts_enabled_chats: set = set() self._auto_tts_disabled_chats: set = set() # Chats where typing indicator is paused (e.g. during approval waits). # _keep_typing skips send_typing when the chat_id is in this set. @@ -1047,6 +1060,21 @@ class BasePlatformAdapter(ABC): def fatal_error_retryable(self) -> bool: return self._fatal_error_retryable + def _should_auto_tts_for_chat(self, chat_id: str) -> bool: + """Whether auto-TTS on voice input should fire for ``chat_id``. + + Decision layers (Issue #16007): + 1. Explicit ``/voice on`` or ``/voice tts`` → always fire (even if + ``voice.auto_tts`` is False). + 2. Explicit ``/voice off`` → never fire. + 3. Fall back to the global ``voice.auto_tts`` config default. + """ + if chat_id in self._auto_tts_enabled_chats: + return True + if chat_id in self._auto_tts_disabled_chats: + return False + return bool(self._auto_tts_default) + def set_fatal_error_handler(self, handler: Callable[["BasePlatformAdapter"], Awaitable[None] | None]) -> None: self._fatal_error_handler = handler @@ -2214,12 +2242,14 @@ class BasePlatformAdapter(ABC): logger.info("[%s] extract_local_files found %d file(s) in response", self.name, len(local_files)) # Auto-TTS: if voice message, generate audio FIRST (before sending text) - # Skipped when the chat has voice mode disabled (/voice off) + # Gated via ``_should_auto_tts_for_chat``: fires when the chat has + # an explicit ``/voice on|tts`` opt-in OR when ``voice.auto_tts`` is + # True globally and no ``/voice off`` has been issued. _tts_path = None - if (event.message_type == MessageType.VOICE + if (self._should_auto_tts_for_chat(event.source.chat_id) + and event.message_type == MessageType.VOICE and text_content - and not media_files - and event.source.chat_id not in self._auto_tts_disabled_chats): + and not media_files): try: from tools.tts_tool import text_to_speech_tool, check_tts_requirements if check_tts_requirements(): diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 5d30f244e8..b4018c6df6 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -2315,11 +2315,6 @@ class DiscordAdapter(BasePlatformAdapter): async def slash_background(interaction: discord.Interaction, prompt: str): await self._run_simple_slash(interaction, f"/background {prompt}", "Background task started~") - @tree.command(name="btw", description="Ephemeral side question using session context") - @discord.app_commands.describe(question="Your side question (no tools, not persisted)") - async def slash_btw(interaction: discord.Interaction, question: str): - await self._run_simple_slash(interaction, f"/btw {question}") - # ── Auto-register any gateway-available commands not yet on the tree ── # This ensures new commands added to COMMAND_REGISTRY in # hermes_cli/commands.py automatically appear as Discord slash diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index 191689a5ae..61cc7020a2 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -207,8 +207,31 @@ class SlackAdapter(BasePlatformAdapter): async def handle_assistant_thread_context_changed(event, say): await self._handle_assistant_thread_lifecycle_event(event) - # Register slash command handler - @self._app.command("/hermes") + # Register slash command handler(s) + # + # Every gateway command from COMMAND_REGISTRY is a native Slack + # slash, matching Discord and Telegram's model (e.g. /btw, /stop, + # /model work directly without /hermes prefix). A single regex + # matcher dispatches all of them to one handler so we don't need + # N identical @app.command() decorators. + # + # The slash commands must ALSO be declared in the Slack app + # manifest (see `hermes slack manifest`). In Socket Mode, Slack + # routes the command event through the socket regardless of the + # manifest's request URL, but it will not deliver an event for + # a slash command the manifest doesn't declare. + from hermes_cli.commands import slack_native_slashes + import re as _re + + _slash_names = [name for name, _d, _h in slack_native_slashes()] + if _slash_names: + _slash_pattern = _re.compile( + r"^/(?:" + "|".join(_re.escape(n) for n in _slash_names) + r")$" + ) + else: # pragma: no cover - registry always non-empty + _slash_pattern = _re.compile(r"^/hermes$") + + @self._app.command(_slash_pattern) async def handle_hermes_command(ack, command): await ack() await self._handle_slash_command(command) @@ -1561,7 +1584,20 @@ class SlackAdapter(BasePlatformAdapter): return "" async def _handle_slash_command(self, command: dict) -> None: - """Handle /hermes slash command.""" + """Handle Slack slash commands. + + Every gateway command in COMMAND_REGISTRY is registered as a native + Slack slash (``/btw``, ``/stop``, ``/model``, etc.), matching the + Discord and Telegram model. The slash name itself is the command; + any text after it is the argument list. + + The legacy ``/hermes [args]`` form is preserved for + backward compatibility with older workspace manifests and for users + who want a single entry point for free-form questions (``/hermes + what's the weather`` — non-slash text is treated as a regular + message). + """ + slash_name = (command.get("command") or "").lstrip("/").strip() text = command.get("text", "").strip() user_id = command.get("user_id", "") channel_id = command.get("channel_id", "") @@ -1571,20 +1607,25 @@ class SlackAdapter(BasePlatformAdapter): if team_id and channel_id: self._channel_team[channel_id] = team_id - # Map subcommands to gateway commands — derived from central registry. - # Also keep "compact" as a Slack-specific alias for /compress. - from hermes_cli.commands import slack_subcommand_map - subcommand_map = slack_subcommand_map() - subcommand_map["compact"] = "/compress" - first_word = text.split()[0] if text else "" - if first_word in subcommand_map: - # Preserve arguments after the subcommand - rest = text[len(first_word):].strip() - text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word] - elif text: - pass # Treat as a regular question + if slash_name in ("hermes", ""): + # Legacy /hermes [args] routing + free-form questions. + # Empty slash_name falls into this branch for backward compat + # with any caller that didn't populate command["command"]. + from hermes_cli.commands import slack_subcommand_map + subcommand_map = slack_subcommand_map() + subcommand_map["compact"] = "/compress" + first_word = text.split()[0] if text else "" + if first_word in subcommand_map: + rest = text[len(first_word):].strip() + text = f"{subcommand_map[first_word]} {rest}".strip() if rest else subcommand_map[first_word] + elif text: + pass # Treat as a regular question + else: + text = "/help" else: - text = "/help" + # Native slash — / [args]. Route directly through the + # gateway command dispatcher by prepending the slash. + text = f"/{slash_name} {text}".strip() source = self.build_source( chat_id=channel_id, diff --git a/gateway/run.py b/gateway/run.py index 05578fa0d8..8fda2c1f1e 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -591,20 +591,20 @@ def _parse_session_key(session_key: str) -> "dict | None": def _format_gateway_process_notification(evt: dict) -> "str | None": - """Format a watch pattern event from completion_queue into a [SYSTEM:] message.""" + """Format a watch pattern event from completion_queue into a [IMPORTANT:] message.""" evt_type = evt.get("type", "completion") _sid = evt.get("session_id", "unknown") _cmd = evt.get("command", "unknown") if evt_type == "watch_disabled": - return f"[SYSTEM: {evt.get('message', '')}]" + return f"[IMPORTANT: {evt.get('message', '')}]" if evt_type == "watch_match": _pat = evt.get("pattern", "?") _out = evt.get("output", "") _sup = evt.get("suppressed", 0) text = ( - f"[SYSTEM: Background process {_sid} matched " + f"[IMPORTANT: Background process {_sid} matched " f"watch pattern \"{_pat}\".\n" f"Command: {_cmd}\n" f"Matched output:\n{_out}" @@ -881,23 +881,74 @@ class GatewayRunner: return if disabled: disabled_chats.add(chat_id) + # ``/voice off`` also clears any explicit enable — it's a hard override. + enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None) + if isinstance(enabled_chats, set): + enabled_chats.discard(chat_id) else: disabled_chats.discard(chat_id) - def _sync_voice_mode_state_to_adapter(self, adapter) -> None: - """Restore persisted /voice off state into a live platform adapter.""" - disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None) - if not isinstance(disabled_chats, set): + def _set_adapter_auto_tts_enabled(self, adapter, chat_id: str, enabled: bool) -> None: + """Update an adapter's per-chat auto-TTS opt-in set if present. + + Used for ``/voice on``/``/voice tts`` where the user explicitly wants + auto-TTS even when ``voice.auto_tts`` is False globally. + """ + enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None) + if not isinstance(enabled_chats, set): return + if enabled: + enabled_chats.add(chat_id) + # An explicit opt-in clears any stale /voice off for this chat. + disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None) + if isinstance(disabled_chats, set): + disabled_chats.discard(chat_id) + else: + enabled_chats.discard(chat_id) + + def _sync_voice_mode_state_to_adapter(self, adapter) -> None: + """Restore persisted /voice state into a live platform adapter. + + Populates three fields from config + ``self._voice_mode``: + - ``_auto_tts_default``: global default from ``voice.auto_tts`` + - ``_auto_tts_enabled_chats``: chats with mode ``voice_only``/``all`` + - ``_auto_tts_disabled_chats``: chats with mode ``off`` + """ platform = getattr(adapter, "platform", None) if not isinstance(platform, Platform): return - disabled_chats.clear() + + disabled_chats = getattr(adapter, "_auto_tts_disabled_chats", None) + enabled_chats = getattr(adapter, "_auto_tts_enabled_chats", None) + if not isinstance(disabled_chats, set) and not isinstance(enabled_chats, set): + return + + # Push the global voice.auto_tts default (config.yaml) onto the adapter. + # Lazy import to avoid adding a module-level dep from gateway → hermes_cli. + try: + from hermes_cli.config import load_config as _load_full_config + _full_cfg = _load_full_config() + _auto_tts_default = bool( + (_full_cfg.get("voice") or {}).get("auto_tts", False) + ) + except Exception: + _auto_tts_default = False + if hasattr(adapter, "_auto_tts_default"): + adapter._auto_tts_default = _auto_tts_default + prefix = f"{platform.value}:" - disabled_chats.update( - key[len(prefix):] for key, mode in self._voice_mode.items() - if mode == "off" and key.startswith(prefix) - ) + if isinstance(disabled_chats, set): + disabled_chats.clear() + disabled_chats.update( + key[len(prefix):] for key, mode in self._voice_mode.items() + if mode == "off" and key.startswith(prefix) + ) + if isinstance(enabled_chats, set): + enabled_chats.clear() + enabled_chats.update( + key[len(prefix):] for key, mode in self._voice_mode.items() + if mode in ("voice_only", "all") and key.startswith(prefix) + ) async def _safe_adapter_disconnect(self, adapter, platform) -> None: """Call adapter.disconnect() defensively, swallowing any error. @@ -1579,6 +1630,27 @@ class GatewayRunner: f"I'll respond to your message shortly." ) + # First-touch onboarding: the very first time a user sends a message + # while the agent is busy, append a one-time hint explaining the + # queue/interrupt knob. Flag is persisted to config.yaml so it never + # fires again on this install. + try: + from agent.onboarding import ( + BUSY_INPUT_FLAG, + busy_input_hint_gateway, + is_seen, + mark_seen, + ) + _user_cfg = _load_gateway_config() + if not is_seen(_user_cfg, BUSY_INPUT_FLAG): + message = ( + f"{message}\n\n" + f"{busy_input_hint_gateway('queue' if is_queue_mode else 'interrupt')}" + ) + mark_seen(_hermes_home / "config.yaml", BUSY_INPUT_FLAG) + except Exception as _onb_err: + logger.debug("Failed to apply busy-input onboarding hint: %s", _onb_err) + thread_meta = {"thread_id": event.source.thread_id} if event.source.thread_id else None try: await adapter._send_with_retry( @@ -3426,6 +3498,8 @@ class GatewayRunner: # /background must bypass the running-agent guard — it starts a # parallel task and must never interrupt the active conversation. + # /btw is an alias of /background and resolves to the same canonical + # name, so this branch handles both commands. if _cmd_def_inner and _cmd_def_inner.name == "background": return await self._handle_background_command(event) @@ -3701,9 +3775,6 @@ class GatewayRunner: if canonical == "background": return await self._handle_background_command(event) - if canonical == "btw": - return await self._handle_btw_command(event) - if canonical == "steer": # No active agent — /steer has no tool call to inject into. # Strip the prefix so downstream treats it as a normal user @@ -4161,7 +4232,7 @@ class GatewayRunner: if _loaded: _loaded_skill, _skill_dir, _display_name = _loaded _note = ( - f'[SYSTEM: The "{_display_name}" skill is auto-loaded. ' + f'[IMPORTANT: The "{_display_name}" skill is auto-loaded. ' f"Follow its instructions for this session.]" ) _part = _build_skill_message(_loaded_skill, _skill_dir, _note) @@ -5977,7 +6048,7 @@ class GatewayRunner: self._voice_mode[voice_key] = "voice_only" self._save_voice_modes() if adapter: - self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) + self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True) return ( "Voice mode enabled.\n" "I'll reply with voice when you send voice messages.\n" @@ -5993,7 +6064,7 @@ class GatewayRunner: self._voice_mode[voice_key] = "all" self._save_voice_modes() if adapter: - self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) + self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True) return ( "Auto-TTS enabled.\n" "All replies will include a voice message." @@ -6032,7 +6103,7 @@ class GatewayRunner: self._voice_mode[voice_key] = "voice_only" self._save_voice_modes() if adapter: - self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=False) + self._set_adapter_auto_tts_enabled(adapter, chat_id, enabled=True) return "Voice mode enabled." else: self._voice_mode[voice_key] = "off" @@ -6083,7 +6154,7 @@ class GatewayRunner: adapter._voice_sources[guild_id] = event.source.to_dict() self._voice_mode[self._voice_key(event.source.platform, event.source.chat_id)] = "all" self._save_voice_modes() - self._set_adapter_auto_tts_disabled(adapter, event.source.chat_id, disabled=False) + self._set_adapter_auto_tts_enabled(adapter, event.source.chat_id, enabled=True) return ( f"Joined voice channel **{voice_channel.name}**.\n" f"I'll speak my replies and listen to you. Use /voice leave to disconnect." @@ -6601,177 +6672,6 @@ class GatewayRunner: except Exception: pass - async def _handle_btw_command(self, event: MessageEvent) -> str: - """Handle /btw — ephemeral side question in the same chat.""" - question = event.get_command_args().strip() - if not question: - return ( - "Usage: /btw \n" - "Example: /btw what module owns session title sanitization?\n\n" - "Answers using session context. No tools, not persisted." - ) - - source = event.source - session_key = self._session_key_for_source(source) - - # Guard: one /btw at a time per session - existing = getattr(self, "_active_btw_tasks", {}).get(session_key) - if existing and not existing.done(): - return "A /btw is already running for this chat. Wait for it to finish." - - if not hasattr(self, "_active_btw_tasks"): - self._active_btw_tasks: dict = {} - - import uuid as _uuid - task_id = f"btw_{datetime.now().strftime('%H%M%S')}_{_uuid.uuid4().hex[:6]}" - _task = asyncio.create_task(self._run_btw_task(question, source, session_key, task_id)) - self._background_tasks.add(_task) - self._active_btw_tasks[session_key] = _task - - def _cleanup(task): - self._background_tasks.discard(task) - if self._active_btw_tasks.get(session_key) is task: - self._active_btw_tasks.pop(session_key, None) - - _task.add_done_callback(_cleanup) - - preview = question[:60] + ("..." if len(question) > 60 else "") - return f'💬 /btw: "{preview}"\nReply will appear here shortly.' - - async def _run_btw_task( - self, question: str, source, session_key: str, task_id: str, - ) -> None: - """Execute an ephemeral /btw side question and deliver the answer.""" - from run_agent import AIAgent - - adapter = self.adapters.get(source.platform) - if not adapter: - logger.warning("No adapter for platform %s in /btw task %s", source.platform, task_id) - return - - _thread_meta = {"thread_id": source.thread_id} if source.thread_id else None - - try: - user_config = _load_gateway_config() - model, runtime_kwargs = self._resolve_session_agent_runtime( - source=source, - session_key=session_key, - user_config=user_config, - ) - if not runtime_kwargs.get("api_key"): - await adapter.send( - source.chat_id, - "❌ /btw failed: no provider credentials configured.", - metadata=_thread_meta, - ) - return - - platform_key = _platform_config_key(source.platform) - reasoning_config = self._resolve_session_reasoning_config( - source=source, - session_key=session_key, - ) - self._service_tier = self._load_service_tier() - turn_route = self._resolve_turn_agent_config(question, model, runtime_kwargs) - pr = self._provider_routing - - # Snapshot history from running agent or stored transcript - running_agent = self._running_agents.get(session_key) - if running_agent and running_agent is not _AGENT_PENDING_SENTINEL: - history_snapshot = list(getattr(running_agent, "_session_messages", []) or []) - else: - session_entry = self.session_store.get_or_create_session(source) - history_snapshot = self.session_store.load_transcript(session_entry.session_id) - - btw_prompt = ( - "[Ephemeral /btw side question. Answer using the conversation " - "context. No tools available. Be direct and concise.]\n\n" - + question - ) - - def run_sync(): - agent = AIAgent( - model=turn_route["model"], - **turn_route["runtime"], - max_iterations=8, - quiet_mode=True, - verbose_logging=False, - enabled_toolsets=[], - reasoning_config=reasoning_config, - service_tier=self._service_tier, - request_overrides=turn_route.get("request_overrides"), - providers_allowed=pr.get("only"), - providers_ignored=pr.get("ignore"), - providers_order=pr.get("order"), - provider_sort=pr.get("sort"), - provider_require_parameters=pr.get("require_parameters", False), - provider_data_collection=pr.get("data_collection"), - session_id=task_id, - platform=platform_key, - session_db=None, - fallback_model=self._fallback_model, - skip_memory=True, - skip_context_files=True, - persist_session=False, - ) - try: - return agent.run_conversation( - user_message=btw_prompt, - conversation_history=history_snapshot, - task_id=task_id, - ) - finally: - self._cleanup_agent_resources(agent) - - result = await self._run_in_executor_with_context(run_sync) - - response = (result.get("final_response") or "") if result else "" - if not response and result and result.get("error"): - response = f"Error: {result['error']}" - if not response: - response = "(No response generated)" - - media_files, response = adapter.extract_media(response) - images, text_content = adapter.extract_images(response) - preview = question[:60] + ("..." if len(question) > 60 else "") - header = f'💬 /btw: "{preview}"\n\n' - - if text_content: - await adapter.send( - chat_id=source.chat_id, - content=header + text_content, - metadata=_thread_meta, - ) - elif not images and not media_files: - await adapter.send( - chat_id=source.chat_id, - content=header + "(No response generated)", - metadata=_thread_meta, - ) - - for image_url, alt_text in (images or []): - try: - await adapter.send_image(chat_id=source.chat_id, image_url=image_url, caption=alt_text) - except Exception: - pass - - for media_path, _is_voice in (media_files or []): - try: - await adapter.send_file(chat_id=source.chat_id, file_path=media_path) - except Exception: - pass - - except Exception as e: - logger.exception("/btw task %s failed", task_id) - try: - await adapter.send( - chat_id=source.chat_id, - content=f"❌ /btw failed: {e}", - metadata=_thread_meta, - ) - except Exception: - pass - async def _handle_reasoning_command(self, event: MessageEvent) -> str: """Handle /reasoning command — manage reasoning effort and display toggle. @@ -7573,7 +7473,7 @@ class GatewayRunner: change_detail = ". ".join(change_parts) + ". " if change_parts else "" reload_msg = { "role": "user", - "content": f"[SYSTEM: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]", + "content": f"[IMPORTANT: MCP servers have been reloaded. {change_detail}{tool_summary}. The tool list for this conversation has been updated accordingly.]", } try: session_entry = self.session_store.get_or_create_session(event.source) @@ -8512,7 +8412,7 @@ class GatewayRunner: from tools.ansi_strip import strip_ansi _out = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else "" synth_text = ( - f"[SYSTEM: Background process {session_id} completed " + f"[IMPORTANT: Background process {session_id} completed " f"(exit code {session.exit_code}).\n" f"Command: {session.command}\n" f"Output:\n{_out}]" @@ -8822,6 +8722,25 @@ class GatewayRunner: with _lock: self._agent_cache.pop(session_key, None) + @staticmethod + def _init_cached_agent_for_turn(agent: Any, interrupt_depth: int) -> None: + """Reset per-turn state on a cached agent before a new turn starts. + + Both _last_activity_ts and _last_activity_desc are only reset for + fresh external turns (depth 0); they are semantically paired — + desc describes the activity *at* ts, so updating one without the + other would make get_activity_summary() misleading. + For interrupt-recursive turns both are preserved so the inactivity + watchdog can accumulate stuck-turn idle time and fire the 30-min + timeout (#15654). The depth-0 reset is still needed: a session + idle for 29 min would otherwise trip the watchdog before the new + turn makes its first API call (#9051). + """ + if interrupt_depth == 0: + agent._last_activity_ts = time.time() + agent._last_activity_desc = "starting new turn (cached)" + agent._api_call_count = 0 + def _release_evicted_agent_soft(self, agent: Any) -> None: """Soft cleanup for cache-evicted agents — preserves session tool state. @@ -9360,16 +9279,62 @@ class GatewayRunner: last_tool = [None] # Mutable container for tracking in closure last_progress_msg = [None] # Track last message for dedup repeat_count = [0] # How many times the same message repeated - + # First-touch onboarding latch: fires at most once per run, even if + # several tools exceed the threshold. + long_tool_hint_fired = [False] + _LONG_TOOL_THRESHOLD_S = 30.0 + def progress_callback(event_type: str, tool_name: str = None, preview: str = None, args: dict = None, **kwargs): """Callback invoked by agent on tool lifecycle events.""" if not progress_queue or not _run_still_current(): return + # First-touch onboarding: the first time a tool takes longer than + # _LONG_TOOL_THRESHOLD_S during a run that's streaming every tool + # (progress_mode == "all"), append a one-time hint suggesting + # /verbose. We only fire when (a) the user hasn't seen the hint + # before and (b) /verbose is actually usable on this platform + # (gateway gate must be open). The CLI has its own trigger. + if event_type == "tool.completed" and not long_tool_hint_fired[0]: + try: + duration = kwargs.get("duration") or 0 + if duration >= _LONG_TOOL_THRESHOLD_S and progress_mode == "all": + from agent.onboarding import ( + TOOL_PROGRESS_FLAG, + is_seen, + mark_seen, + tool_progress_hint_gateway, + ) + _cfg = _load_gateway_config() + gate_on = bool(_cfg.get("display", {}).get("tool_progress_command", False)) + if gate_on and not is_seen(_cfg, TOOL_PROGRESS_FLAG): + long_tool_hint_fired[0] = True + progress_queue.put(tool_progress_hint_gateway()) + mark_seen(_hermes_home / "config.yaml", TOOL_PROGRESS_FLAG) + except Exception as _hint_err: + logger.debug("tool-progress onboarding hint failed: %s", _hint_err) + return + # Only act on tool.started events (ignore tool.completed, reasoning.available, etc.) if event_type not in ("tool.started",): return + # Suppress tool-progress bubbles once the user has sent `stop`. + # When the LLM response carries N parallel tool calls, the agent + # fires N "tool.started" events back-to-back before checking for + # interrupts — without this guard, a late `stop` still renders + # all N as 🔍 bubbles, making the interrupt feel ignored. + # (agent lives in run_sync's scope; agent_holder[0] is the shared + # handle across nested scopes — see line ~9607.) + try: + _agent_for_interrupt = agent_holder[0] if agent_holder else None + if _agent_for_interrupt is not None and getattr( + _agent_for_interrupt, "is_interrupted", False + ): + return + except Exception: + pass + # "new" mode: only report when tool changes if progress_mode == "new" and tool_name == last_tool[0]: return @@ -9476,6 +9441,22 @@ class GatewayRunner: raw = progress_queue.get_nowait() + # Drain silently when interrupted: events queued in the + # window between tool parse and interrupt processing + # should not render as bubbles. The "⚡ Interrupting + # current task" message is sent separately and is the + # last progress-flavored bubble the user should see. + try: + _agent_for_interrupt = agent_holder[0] if agent_holder else None + if _agent_for_interrupt is not None and getattr( + _agent_for_interrupt, "is_interrupted", False + ): + # Drop this event and continue draining. + await asyncio.sleep(0) + continue + except Exception: + pass + # Handle dedup messages: update last line with repeat counter if isinstance(raw, tuple) and len(raw) == 3 and raw[0] == "__dedup__": _, base_msg, count = raw @@ -9804,12 +9785,7 @@ class GatewayRunner: _cache.move_to_end(session_key) except KeyError: pass - # Reset activity timestamp so the inactivity timeout - # handler doesn't see stale idle time from the previous - # turn and immediately kill this agent. (#9051) - agent._last_activity_ts = time.time() - agent._last_activity_desc = "starting new turn (cached)" - agent._api_call_count = 0 + self._init_cached_agent_for_turn(agent, _interrupt_depth) logger.debug("Reusing cached agent for session %s", session_key) if agent is None: diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 482e3c47a2..610a06dc94 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -467,11 +467,27 @@ def _resolve_api_key_provider_secret( pass return "", "" + from hermes_cli.config import get_env_value for env_var in pconfig.api_key_env_vars: - val = os.getenv(env_var, "").strip() + # Check both os.environ and ~/.hermes/.env file + val = (get_env_value(env_var) or "").strip() if has_usable_secret(val): return val, env_var + # Fallback: try credential pool (e.g. zai key stored via auth.json) + try: + from agent.credential_pool import load_pool + pool = load_pool(provider_id) + if pool and pool.has_credentials(): + entry = pool.peek() + if entry: + key = getattr(entry, "access_token", "") or getattr(entry, "runtime_api_key", "") + key = str(key).strip() + if has_usable_secret(key): + return key, f"credential_pool:{provider_id}" + except Exception: + pass + return "", "" @@ -4244,10 +4260,10 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: ) from hermes_cli.models import ( - _PROVIDER_MODELS, get_pricing_for_provider, + get_curated_nous_model_ids, get_pricing_for_provider, check_nous_free_tier, partition_nous_models_by_tier, ) - model_ids = _PROVIDER_MODELS.get("nous", []) + model_ids = get_curated_nous_model_ids() print() unavailable_models: list = [] diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 4d650487b4..d0eb74d872 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -84,9 +84,7 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("deny", "Deny a pending dangerous command", "Session", gateway_only=True), CommandDef("background", "Run a prompt in the background", "Session", - aliases=("bg",), args_hint=""), - CommandDef("btw", "Ephemeral side question using session context (no tools, not persisted)", "Session", - args_hint=""), + aliases=("bg", "btw"), args_hint=""), CommandDef("agents", "Show active agents and running tasks", "Session", aliases=("tasks",)), CommandDef("queue", "Queue a prompt for the next turn (doesn't interrupt)", "Session", @@ -808,6 +806,114 @@ def discord_skill_commands_by_category( return trimmed_categories, uncategorized, hidden +# --------------------------------------------------------------------------- +# Slack native slash commands +# --------------------------------------------------------------------------- + +# Slack slash command name constraints: lowercase a-z, 0-9, hyphens, +# underscores. Max 32 chars. Slack app manifest accepts up to 50 slash +# commands per app. +_SLACK_MAX_SLASH_COMMANDS = 50 +_SLACK_NAME_LIMIT = 32 +_SLACK_INVALID_CHARS = re.compile(r"[^a-z0-9_\-]") + + +def _sanitize_slack_name(raw: str) -> str: + """Convert a command name to a valid Slack slash command name. + + Slack allows lowercase a-z, digits, hyphens, and underscores. Max 32 + chars. Uppercase is lowercased; invalid chars are stripped. + """ + name = raw.lower() + name = _SLACK_INVALID_CHARS.sub("", name) + name = name.strip("-_") + return name[:_SLACK_NAME_LIMIT] + + +def slack_native_slashes() -> list[tuple[str, str, str]]: + """Return (slash_name, description, usage_hint) triples for Slack. + + Every gateway-available command in ``COMMAND_REGISTRY`` is surfaced as + a standalone Slack slash command (e.g. ``/btw``, ``/stop``, ``/model``), + matching Discord's and Telegram's model where every command is a + first-class slash and not a ``/hermes `` subcommand. + + Both canonical names and aliases are included so users can type any + documented form (e.g. ``/background``, ``/bg``, and ``/btw`` all work). + Plugin-registered slash commands are included too. + + Results are clamped to Slack's 50-command limit with duplicate-name + avoidance. ``/hermes`` is always reserved as the first entry so the + legacy ``/hermes `` form keeps working for anything that + gets dropped by the clamp or for free-form questions. + """ + overrides = _resolve_config_gates() + entries: list[tuple[str, str, str]] = [] + seen: set[str] = set() + + # Reserve /hermes as the catch-all top-level command. + entries.append(("hermes", "Talk to Hermes or run a subcommand", "[subcommand] [args]")) + seen.add("hermes") + + def _add(name: str, desc: str, hint: str) -> None: + slack_name = _sanitize_slack_name(name) + if not slack_name or slack_name in seen: + return + if len(entries) >= _SLACK_MAX_SLASH_COMMANDS: + return + # Slack description cap is 2000 chars; keep it short. + entries.append((slack_name, desc[:140], hint[:100])) + seen.add(slack_name) + + # First pass: canonical names (so they win slots if we hit the cap). + for cmd in COMMAND_REGISTRY: + if not _is_gateway_available(cmd, overrides): + continue + _add(cmd.name, cmd.description, cmd.args_hint or "") + + # Second pass: aliases. + for cmd in COMMAND_REGISTRY: + if not _is_gateway_available(cmd, overrides): + continue + for alias in cmd.aliases: + # Skip aliases that only differ from canonical by case/punctuation + # normalization (already covered by _add dedup). + _add(alias, f"Alias for /{cmd.name} — {cmd.description}", cmd.args_hint or "") + + # Third pass: plugin commands. + for name, description, args_hint in _iter_plugin_command_entries(): + _add(name, description, args_hint or "") + + return entries + + +def slack_app_manifest(request_url: str = "https://hermes-agent.local/slack/commands") -> dict[str, Any]: + """Generate a Slack app manifest with all gateway commands as slashes. + + ``request_url`` is required by Slack's manifest schema for every slash + command, but in Socket Mode (which we use) Slack ignores it and routes + the command event through the WebSocket. A placeholder URL is fine. + + The returned dict is the ``features.slash_commands`` portion only — + callers compose it into a full manifest (or merge into an existing + one). Keeping it narrow avoids coupling us to the rest of the manifest + schema (display_information, oauth_config, settings, etc.) which users + set up once in the Slack UI and rarely change. + """ + slashes = [] + for name, desc, usage in slack_native_slashes(): + entry = { + "command": f"/{name}", + "description": desc or f"Run /{name}", + "should_escape": False, + "url": request_url, + } + if usage: + entry["usage_hint"] = usage + slashes.append(entry) + return {"features": {"slash_commands": slashes}} + + def slack_subcommand_map() -> dict[str, str]: """Return subcommand -> /command mapping for Slack /hermes handler. diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 3b5e24a376..542b4d4fa4 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -465,6 +465,7 @@ DEFAULT_CONFIG = { "command_timeout": 30, # Timeout for browser commands in seconds (screenshot, navigate, etc.) "record_sessions": False, # Auto-record browser sessions as WebM videos "allow_private_urls": False, # Allow navigating to private/internal IPs (localhost, 192.168.x.x, etc.) + "auto_local_for_private_urls": True, # When a cloud provider is set, auto-spawn local Chromium for LAN/localhost URLs instead of sending them to the cloud "cdp_url": "", # Optional persistent CDP endpoint for attaching to an existing Chromium/Chrome # CDP supervisor — dialog + frame detection via a persistent WebSocket. # Active only when a CDP-capable backend is attached (Browserbase or @@ -959,6 +960,27 @@ DEFAULT_CONFIG = { "backup_count": 3, # Number of rotated backup files to keep }, + # Remotely-hosted model catalog manifest. When enabled, the CLI fetches + # curated model lists for OpenRouter and Nous Portal from this URL, + # falling back to the in-repo snapshot on network failure. Lets us + # update model picker lists without shipping a hermes-agent release. + # The default URL is served by the docs site GitHub Pages deploy. + "model_catalog": { + "enabled": True, + "url": "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json", + # Disk cache TTL in hours. Beyond this, the CLI refetches on the + # next /model or `hermes model` invocation; network failures + # silently fall back to the stale cache. + "ttl_hours": 24, + # Optional per-provider override URLs for third parties that want + # to self-host their own curation list using the same schema. + # Example: + # providers: + # openrouter: + # url: https://example.com/my-curation.json + "providers": {}, + }, + # Network settings — workarounds for connectivity issues. "network": { # Force IPv4 connections. On servers with broken or unreachable IPv6, @@ -995,6 +1017,13 @@ DEFAULT_CONFIG = { "min_interval_hours": 24, }, + # Contextual first-touch onboarding hints (see agent/onboarding.py). + # Each hint is shown once per install and then latched here so it + # never fires again. Users can wipe the section to re-see all hints. + "onboarding": { + "seen": {}, + }, + # Config schema version - bump this when adding new required fields "_config_version": 22, } diff --git a/hermes_cli/fallback_cmd.py b/hermes_cli/fallback_cmd.py new file mode 100644 index 0000000000..02c0a01c39 --- /dev/null +++ b/hermes_cli/fallback_cmd.py @@ -0,0 +1,361 @@ +""" +hermes fallback — manage the fallback provider chain. + +Fallback providers are tried in order when the primary model fails with +rate-limit, overload, or connection errors. See: +https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers + +Subcommands: + hermes fallback [list] Show the current fallback chain (default when no subcommand) + hermes fallback add Pick provider + model via the same picker as `hermes model`, + then append the selection to the chain + hermes fallback remove Pick an entry to delete from the chain + hermes fallback clear Remove all fallback entries + +Storage: ``fallback_providers`` in ``~/.hermes/config.yaml`` (top-level, list of +``{provider, model, base_url?, api_mode?}`` dicts). The legacy single-dict +``fallback_model`` format is migrated to the new list format on first add. +""" +from __future__ import annotations + +import copy +from typing import Any, Dict, List, Optional + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _read_chain(config: Dict[str, Any]) -> List[Dict[str, Any]]: + """Return the normalized fallback chain as a list of dicts. + + Accepts both the new list format (``fallback_providers``) and the legacy + single-dict format (``fallback_model``). The returned list is always a + fresh copy — callers can mutate without touching the config dict. + """ + chain = config.get("fallback_providers") or [] + if isinstance(chain, list): + result = [dict(e) for e in chain if isinstance(e, dict) and e.get("provider") and e.get("model")] + if result: + return result + legacy = config.get("fallback_model") + if isinstance(legacy, dict) and legacy.get("provider") and legacy.get("model"): + return [dict(legacy)] + if isinstance(legacy, list): + return [dict(e) for e in legacy if isinstance(e, dict) and e.get("provider") and e.get("model")] + return [] + + +def _write_chain(config: Dict[str, Any], chain: List[Dict[str, Any]]) -> None: + """Persist the chain to ``fallback_providers`` and clear legacy key.""" + config["fallback_providers"] = chain + # Drop the legacy single-dict key on write so there's only one source of truth. + if "fallback_model" in config: + config.pop("fallback_model", None) + + +def _format_entry(entry: Dict[str, Any]) -> str: + """One-line human-readable rendering of a fallback entry.""" + provider = entry.get("provider", "?") + model = entry.get("model", "?") + base = entry.get("base_url") + suffix = f" [{base}]" if base else "" + return f"{model} (via {provider}){suffix}" + + +def _extract_fallback_from_model_cfg(model_cfg: Any) -> Optional[Dict[str, Any]]: + """Pull the ``{provider, model, base_url?, api_mode?}`` dict from a ``config["model"]`` snapshot.""" + if not isinstance(model_cfg, dict): + return None + provider = (model_cfg.get("provider") or "").strip() + # The picker writes the selected model to ``model.default``. + model = (model_cfg.get("default") or model_cfg.get("model") or "").strip() + if not provider or not model: + return None + entry: Dict[str, Any] = {"provider": provider, "model": model} + base_url = (model_cfg.get("base_url") or "").strip() + if base_url: + entry["base_url"] = base_url + api_mode = (model_cfg.get("api_mode") or "").strip() + if api_mode: + entry["api_mode"] = api_mode + return entry + + +def _snapshot_auth_active_provider() -> Any: + """Return the current ``active_provider`` in auth.json, or a sentinel if unavailable.""" + try: + from hermes_cli.auth import _load_auth_store + store = _load_auth_store() + return store.get("active_provider") + except Exception: + return None + + +def _restore_auth_active_provider(value: Any) -> None: + """Write back a previously snapshotted ``active_provider`` value.""" + try: + from hermes_cli.auth import _auth_store_lock, _load_auth_store, _save_auth_store + with _auth_store_lock(): + store = _load_auth_store() + store["active_provider"] = value + _save_auth_store(store) + except Exception: + # Best-effort — if auth.json can't be restored, the user's primary + # provider may have been deactivated by the picker. They can re-run + # `hermes model` to fix it. Don't fail the fallback add. + pass + + +# --------------------------------------------------------------------------- +# Subcommand handlers +# --------------------------------------------------------------------------- + +def cmd_fallback_list(args) -> None: # noqa: ARG001 + """Print the current fallback chain.""" + from hermes_cli.config import load_config + + config = load_config() + chain = _read_chain(config) + + print() + if not chain: + print(" No fallback providers configured.") + print() + print(" Add one with: hermes fallback add") + print() + return + + primary = _describe_primary(config) + if primary: + print(f" Primary: {primary}") + print() + print(f" Fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):") + for i, entry in enumerate(chain, 1): + print(f" {i}. {_format_entry(entry)}") + print() + print(" Tried in order when the primary fails (rate-limit, 5xx, connection errors).") + print(" Docs: https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers") + print() + + +def _describe_primary(config: Dict[str, Any]) -> Optional[str]: + """One-line description of the primary model for display purposes.""" + model_cfg = config.get("model") + if isinstance(model_cfg, dict): + provider = (model_cfg.get("provider") or "?").strip() or "?" + model = (model_cfg.get("default") or model_cfg.get("model") or "?").strip() or "?" + return f"{model} (via {provider})" + if isinstance(model_cfg, str) and model_cfg.strip(): + return model_cfg.strip() + return None + + +def cmd_fallback_add(args) -> None: + """Launch the same picker as `hermes model`, then append the selection to the chain.""" + from hermes_cli.main import _require_tty, select_provider_and_model + from hermes_cli.config import load_config, save_config + + _require_tty("fallback add") + + # Snapshot BEFORE the picker runs so we can distinguish "user actually + # picked something" from "user cancelled" by comparing before/after. + before_cfg = load_config() + model_before = copy.deepcopy(before_cfg.get("model")) + active_provider_before = _snapshot_auth_active_provider() + + print() + print(" Adding a fallback provider. The picker below is the same one used by") + print(" `hermes model` — select the provider + model you want as a fallback.") + print() + + try: + select_provider_and_model(args=args) + except SystemExit: + # Some provider flows exit on auth failure — restore state and re-raise. + _restore_model_cfg(model_before) + _restore_auth_active_provider(active_provider_before) + raise + + # Read the post-picker state to see what the user selected. + after_cfg = load_config() + model_after = after_cfg.get("model") + + new_entry = _extract_fallback_from_model_cfg(model_after) + if not new_entry: + # Picker didn't complete (user cancelled or flow bailed). Nothing to do. + _restore_model_cfg(model_before) + _restore_auth_active_provider(active_provider_before) + print() + print(" No fallback added.") + return + + # Picker picked the same thing that's already the primary → nothing changed, + # and there's nothing useful to add as a fallback to itself. + primary_entry = _extract_fallback_from_model_cfg(model_before) + if primary_entry and primary_entry["provider"] == new_entry["provider"] \ + and primary_entry["model"] == new_entry["model"]: + _restore_model_cfg(model_before) + _restore_auth_active_provider(active_provider_before) + print() + print(f" Selected model matches the current primary ({_format_entry(new_entry)}).") + print(" A provider cannot be a fallback for itself — no change.") + return + + # Reload the config with the primary restored, then append the new entry + # to ``fallback_providers``. We deliberately re-load (rather than mutating + # ``after_cfg``) because the picker may have touched other top-level keys + # (custom_providers, providers credentials) that we want to keep. + _restore_model_cfg(model_before) + _restore_auth_active_provider(active_provider_before) + + final_cfg = load_config() + chain = _read_chain(final_cfg) + + # Reject exact-duplicate fallback entries. + for existing in chain: + if existing.get("provider") == new_entry["provider"] \ + and existing.get("model") == new_entry["model"]: + print() + print(f" {_format_entry(new_entry)} is already in the fallback chain — skipped.") + return + + chain.append(new_entry) + _write_chain(final_cfg, chain) + save_config(final_cfg) + + print() + print(f" Added fallback: {_format_entry(new_entry)}") + print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.") + print() + print(" Run `hermes fallback list` to view, or `hermes fallback remove` to delete.") + + +def _restore_model_cfg(model_before: Any) -> None: + """Restore ``config["model"]`` to a previously-captured snapshot.""" + from hermes_cli.config import load_config, save_config + + cfg = load_config() + if model_before is None: + cfg.pop("model", None) + else: + cfg["model"] = copy.deepcopy(model_before) + save_config(cfg) + + +def cmd_fallback_remove(args) -> None: # noqa: ARG001 + """Pick an entry from the chain and remove it.""" + from hermes_cli.config import load_config, save_config + + config = load_config() + chain = _read_chain(config) + + if not chain: + print() + print(" No fallback providers configured — nothing to remove.") + print() + return + + choices = [_format_entry(e) for e in chain] + choices.append("Cancel") + + try: + from hermes_cli.setup import _curses_prompt_choice + idx = _curses_prompt_choice("Select a fallback to remove:", choices, 0) + except Exception: + idx = _numbered_pick("Select a fallback to remove:", choices) + + if idx is None or idx < 0 or idx >= len(chain): + print() + print(" Cancelled — no change.") + return + + removed = chain.pop(idx) + _write_chain(config, chain) + save_config(config) + + print() + print(f" Removed fallback: {_format_entry(removed)}") + if chain: + print(f" Chain is now {len(chain)} {'entry' if len(chain) == 1 else 'entries'} long.") + else: + print(" Fallback chain is now empty.") + print() + + +def cmd_fallback_clear(args) -> None: # noqa: ARG001 + """Remove all fallback entries (with confirmation).""" + from hermes_cli.config import load_config, save_config + + config = load_config() + chain = _read_chain(config) + + if not chain: + print() + print(" No fallback providers configured — nothing to clear.") + print() + return + + print() + print(f" Current fallback chain ({len(chain)} {'entry' if len(chain) == 1 else 'entries'}):") + for i, entry in enumerate(chain, 1): + print(f" {i}. {_format_entry(entry)}") + print() + try: + resp = input(" Clear all entries? [y/N]: ").strip().lower() + except (KeyboardInterrupt, EOFError): + print() + print(" Cancelled.") + return + if resp not in ("y", "yes"): + print(" Cancelled — no change.") + return + + _write_chain(config, []) + save_config(config) + print() + print(" Fallback chain cleared.") + print() + + +def _numbered_pick(question: str, choices: List[str]) -> Optional[int]: + """Fallback numbered-list picker when curses is unavailable.""" + print(question) + for i, c in enumerate(choices, 1): + print(f" {i}. {c}") + print() + while True: + try: + val = input(f"Choice [1-{len(choices)}]: ").strip() + if not val: + return None + idx = int(val) - 1 + if 0 <= idx < len(choices): + return idx + print(f"Please enter 1-{len(choices)}") + except ValueError: + print("Please enter a number") + except (KeyboardInterrupt, EOFError): + print() + return None + + +# --------------------------------------------------------------------------- +# Dispatch +# --------------------------------------------------------------------------- + +def cmd_fallback(args) -> None: + """Top-level dispatcher for ``hermes fallback [subcommand]``.""" + sub = getattr(args, "fallback_command", None) + if sub in (None, "", "list", "ls"): + cmd_fallback_list(args) + elif sub == "add": + cmd_fallback_add(args) + elif sub in ("remove", "rm"): + cmd_fallback_remove(args) + elif sub == "clear": + cmd_fallback_clear(args) + else: + print(f"Unknown fallback subcommand: {sub}") + print("Use one of: list, add, remove, clear") + raise SystemExit(2) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 2064b324f5..e10af44cd9 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2315,13 +2315,13 @@ def _model_flow_nous(config, current_model="", args=None): # The live /models endpoint returns hundreds of models; the curated list # shows only agentic models users recognize from OpenRouter. from hermes_cli.models import ( - _PROVIDER_MODELS, + get_curated_nous_model_ids, get_pricing_for_provider, check_nous_free_tier, partition_nous_models_by_tier, ) - model_ids = _PROVIDER_MODELS.get("nous", []) + model_ids = get_curated_nous_model_ids() if not model_ids: print("No curated models available for Nous Portal.") return @@ -4780,6 +4780,37 @@ def cmd_webhook(args): webhook_command(args) +def cmd_slack(args): + """Slack integration helpers. + + Dispatches ``hermes slack ``. Currently supports: + manifest — print or write a Slack app manifest with every gateway + command registered as a first-class slash. + """ + sub = getattr(args, "slack_command", None) + if sub in (None, ""): + # No subcommand — print usage hint. + print( + "usage: hermes slack \n" + "\n" + "subcommands:\n" + " manifest Generate a Slack app manifest with every gateway\n" + " command registered as a native slash\n" + "\n" + "Run `hermes slack manifest -h` for details.", + file=sys.stderr, + ) + return 1 + + if sub == "manifest": + from hermes_cli.slack_cli import slack_manifest_command + + return slack_manifest_command(args) + + print(f"Unknown slack subcommand: {sub}", file=sys.stderr) + return 1 + + def cmd_hooks(args): """Shell-hook inspection and management.""" from hermes_cli.hooks import hooks_command @@ -7223,6 +7254,9 @@ Examples: hermes auth remove

Remove pooled credential by index, id, or label hermes auth reset Clear exhaustion status for a provider hermes model Select default model + hermes fallback [list] Show fallback provider chain + hermes fallback add Add a fallback provider (same picker as `hermes model`) + hermes fallback remove Remove a fallback provider from the chain hermes config View configuration hermes config edit Edit config in $EDITOR hermes config set model gpt-4 Set a config value @@ -7564,6 +7598,42 @@ For more help on a command: ) model_parser.set_defaults(func=cmd_model) + # ========================================================================= + # fallback command — manage the fallback provider chain + # ========================================================================= + from hermes_cli.fallback_cmd import cmd_fallback + + fallback_parser = subparsers.add_parser( + "fallback", + help="Manage fallback providers (tried when the primary model fails)", + description=( + "Manage the fallback provider chain. Fallback providers are tried " + "in order when the primary model fails with rate-limit, overload, or " + "connection errors. See: " + "https://hermes-agent.nousresearch.com/docs/user-guide/features/fallback-providers" + ), + ) + fallback_subparsers = fallback_parser.add_subparsers(dest="fallback_command") + fallback_subparsers.add_parser( + "list", + aliases=["ls"], + help="Show the current fallback chain (default when no subcommand)", + ) + fallback_subparsers.add_parser( + "add", + help="Pick a provider + model (same picker as `hermes model`) and append to the chain", + ) + fallback_subparsers.add_parser( + "remove", + aliases=["rm"], + help="Pick an entry to delete from the chain", + ) + fallback_subparsers.add_parser( + "clear", + help="Remove all fallback entries", + ) + fallback_parser.set_defaults(func=cmd_fallback) + # ========================================================================= # gateway command # ========================================================================= @@ -7759,6 +7829,54 @@ For more help on a command: ) whatsapp_parser.set_defaults(func=cmd_whatsapp) + # ========================================================================= + # slack command + # ========================================================================= + slack_parser = subparsers.add_parser( + "slack", + help="Slack integration helpers (manifest generation, etc.)", + description="Slack integration helpers for Hermes.", + ) + slack_sub = slack_parser.add_subparsers(dest="slack_command") + slack_manifest = slack_sub.add_parser( + "manifest", + help="Print or write a Slack app manifest with every gateway command " + "registered as a native slash (/btw, /stop, /model, ...)", + description=( + "Generate a Slack app manifest that registers every gateway " + "command in COMMAND_REGISTRY as a first-class Slack slash " + "command (matching Discord and Telegram parity). Paste the " + "output into Slack app config → Features → App Manifest → " + "Edit, then Save. Reinstall the app if Slack prompts for it." + ), + ) + slack_manifest.add_argument( + "--write", + nargs="?", + const=True, + default=None, + metavar="PATH", + help="Write manifest to a file instead of stdout. With no PATH " + "writes to $HERMES_HOME/slack-manifest.json.", + ) + slack_manifest.add_argument( + "--name", + default=None, + help='Bot display name (default: "Hermes")', + ) + slack_manifest.add_argument( + "--description", + default=None, + help="Bot description shown in Slack's app directory.", + ) + slack_manifest.add_argument( + "--slashes-only", + action="store_true", + help="Emit only the features.slash_commands array (for merging " + "into an existing manifest manually).", + ) + slack_parser.set_defaults(func=cmd_slack) + # ========================================================================= # login command # ========================================================================= @@ -8414,6 +8532,12 @@ Examples: skills_list.add_argument( "--source", default="all", choices=["all", "hub", "builtin", "local"] ) + skills_list.add_argument( + "--enabled-only", + action="store_true", + help="Hide disabled skills. Use with -p to see exactly " + "which skills will load for that profile.", + ) skills_check = skills_subparsers.add_parser( "check", help="Check installed hub skills for updates" diff --git a/hermes_cli/model_catalog.py b/hermes_cli/model_catalog.py new file mode 100644 index 0000000000..500910d57f --- /dev/null +++ b/hermes_cli/model_catalog.py @@ -0,0 +1,329 @@ +"""Remote model catalog fetcher. + +The Hermes docs site hosts a JSON manifest of curated models for providers +we want to update without shipping a release (currently OpenRouter and +Nous Portal). This module fetches, validates, and caches that manifest, +falling back to the in-repo hardcoded lists when the network is unavailable. + +Pipeline +-------- +1. ``get_catalog()`` — returns a parsed manifest dict. + - Checks in-process cache (invalidated by TTL). + - Reads disk cache at ``~/.hermes/cache/model_catalog.json``. + - Fetches the master URL if disk cache is stale or missing. + - On any fetch failure, keeps using the stale cache (or empty dict). + +2. ``get_curated_openrouter_models()`` / ``get_curated_nous_models()`` — + thin accessors returning the shapes existing callers expect. Each + falls back to the in-repo hardcoded list on any lookup failure. + +Schema (version 1) +------------------ +:: + + { + "version": 1, + "updated_at": "2026-04-25T22:00:00Z", + "metadata": {...}, # free-form + "providers": { + "openrouter": { + "metadata": {...}, # free-form + "models": [ + {"id": "vendor/model", "description": "recommended", + "metadata": {...}} # free-form, model-level + ] + }, + "nous": {...} + } + } + +Unknown fields are ignored — extra metadata can be added at either level +without bumping ``version``. ``version`` bumps are reserved for +breaking changes (renaming ``providers``, changing ``models`` shape). +""" + +from __future__ import annotations + +import json +import logging +import os +import time +import urllib.error +import urllib.request +from pathlib import Path +from typing import Any + +from hermes_cli import __version__ as _HERMES_VERSION + +logger = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +DEFAULT_CATALOG_URL = ( + "https://hermes-agent.nousresearch.com/docs/api/model-catalog.json" +) +DEFAULT_TTL_HOURS = 24 +DEFAULT_FETCH_TIMEOUT = 8.0 +SUPPORTED_SCHEMA_VERSION = 1 + +_HERMES_USER_AGENT = f"hermes-cli/{_HERMES_VERSION}" + +# In-process cache to avoid repeated disk + parse work across multiple +# calls within the same session. Invalidated by TTL against the disk file's +# mtime, so calling code never has to think about this. +_catalog_cache: dict[str, Any] | None = None +_catalog_cache_source_mtime: float = 0.0 + + +# --------------------------------------------------------------------------- +# Config +# --------------------------------------------------------------------------- + + +def _load_catalog_config() -> dict[str, Any]: + """Load the ``model_catalog`` config block with defaults filled in.""" + try: + from hermes_cli.config import load_config + cfg = load_config() or {} + except Exception: + cfg = {} + + raw = cfg.get("model_catalog") + if not isinstance(raw, dict): + raw = {} + + return { + "enabled": bool(raw.get("enabled", True)), + "url": str(raw.get("url") or DEFAULT_CATALOG_URL), + "ttl_hours": float(raw.get("ttl_hours") or DEFAULT_TTL_HOURS), + "providers": raw.get("providers") if isinstance(raw.get("providers"), dict) else {}, + } + + +def _cache_path() -> Path: + """Return the disk cache path. Import lazily so tests can monkeypatch home.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "cache" / "model_catalog.json" + + +# --------------------------------------------------------------------------- +# Fetch + validate + cache +# --------------------------------------------------------------------------- + + +def _fetch_manifest(url: str, timeout: float) -> dict[str, Any] | None: + """HTTP GET the manifest URL and return a parsed dict, or None on failure.""" + try: + req = urllib.request.Request( + url, + headers={ + "Accept": "application/json", + "User-Agent": _HERMES_USER_AGENT, + }, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + except (urllib.error.URLError, TimeoutError, json.JSONDecodeError, OSError) as exc: + logger.info("model catalog fetch failed (%s): %s", url, exc) + return None + except Exception as exc: # pragma: no cover — defensive + logger.info("model catalog fetch errored (%s): %s", url, exc) + return None + + if not _validate_manifest(data): + logger.info("model catalog at %s failed schema validation", url) + return None + + return data + + +def _validate_manifest(data: Any) -> bool: + """Return True when ``data`` matches the minimum manifest shape.""" + if not isinstance(data, dict): + return False + version = data.get("version") + if not isinstance(version, int) or version > SUPPORTED_SCHEMA_VERSION: + # Future schema version we don't understand — refuse rather than + # guess. Older schemas (version < 1) aren't supported either. + return False + providers = data.get("providers") + if not isinstance(providers, dict): + return False + for pname, pblock in providers.items(): + if not isinstance(pname, str) or not isinstance(pblock, dict): + return False + models = pblock.get("models") + if not isinstance(models, list): + return False + for m in models: + if not isinstance(m, dict): + return False + if not isinstance(m.get("id"), str) or not m["id"].strip(): + return False + return True + + +def _read_disk_cache() -> tuple[dict[str, Any] | None, float]: + """Return ``(data_or_none, mtime)``. mtime is 0 if file is missing.""" + path = _cache_path() + try: + mtime = path.stat().st_mtime + except (OSError, FileNotFoundError): + return (None, 0.0) + try: + with open(path) as fh: + data = json.load(fh) + except (OSError, json.JSONDecodeError): + return (None, 0.0) + if not _validate_manifest(data): + return (None, 0.0) + return (data, mtime) + + +def _write_disk_cache(data: dict[str, Any]) -> None: + path = _cache_path() + try: + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + with open(tmp, "w") as fh: + json.dump(data, fh, indent=2) + fh.write("\n") + os.replace(tmp, path) + except OSError as exc: + logger.info("model catalog cache write failed: %s", exc) + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def get_catalog(*, force_refresh: bool = False) -> dict[str, Any]: + """Return the parsed model catalog manifest, or an empty dict on failure. + + Callers should treat a missing provider/model as "use the in-repo fallback" + — never raise from this function so the CLI keeps working offline. + """ + global _catalog_cache, _catalog_cache_source_mtime + + cfg = _load_catalog_config() + if not cfg["enabled"]: + return {} + + ttl_seconds = max(0.0, cfg["ttl_hours"] * 3600.0) + + disk_data, disk_mtime = _read_disk_cache() + now = time.time() + disk_fresh = disk_data is not None and (now - disk_mtime) < ttl_seconds + + # In-process cache hit: disk hasn't changed since we loaded it and still fresh. + if ( + not force_refresh + and _catalog_cache is not None + and disk_data is not None + and disk_mtime == _catalog_cache_source_mtime + and disk_fresh + ): + return _catalog_cache + + # Disk is fresh enough — use it without a network hit. + if not force_refresh and disk_fresh and disk_data is not None: + _catalog_cache = disk_data + _catalog_cache_source_mtime = disk_mtime + return disk_data + + # Need to (re)fetch. If it fails, fall back to any stale disk copy. + fetched = _fetch_manifest(cfg["url"], DEFAULT_FETCH_TIMEOUT) + if fetched is not None: + _write_disk_cache(fetched) + new_disk_data, new_mtime = _read_disk_cache() + if new_disk_data is not None: + _catalog_cache = new_disk_data + _catalog_cache_source_mtime = new_mtime + return new_disk_data + _catalog_cache = fetched + _catalog_cache_source_mtime = now + return fetched + + if disk_data is not None: + _catalog_cache = disk_data + _catalog_cache_source_mtime = disk_mtime + return disk_data + + return {} + + +def _fetch_provider_override(provider: str) -> dict[str, Any] | None: + """If ``model_catalog.providers..url`` is set, fetch that instead.""" + cfg = _load_catalog_config() + if not cfg["enabled"]: + return None + provider_cfg = cfg["providers"].get(provider) + if not isinstance(provider_cfg, dict): + return None + override_url = provider_cfg.get("url") + if not isinstance(override_url, str) or not override_url.strip(): + return None + # Override fetches skip the disk cache because they're usually + # third-party self-hosted. Re-request on every call but with a short + # timeout so they don't block the picker. + return _fetch_manifest(override_url.strip(), DEFAULT_FETCH_TIMEOUT) + + +def _get_provider_block(provider: str) -> dict[str, Any] | None: + """Return the provider's manifest block, respecting per-provider overrides.""" + override = _fetch_provider_override(provider) + if override is not None: + block = override.get("providers", {}).get(provider) + if isinstance(block, dict): + return block + + catalog = get_catalog() + if not catalog: + return None + block = catalog.get("providers", {}).get(provider) + return block if isinstance(block, dict) else None + + +def get_curated_openrouter_models() -> list[tuple[str, str]] | None: + """Return OpenRouter's curated ``[(id, description), ...]`` from the manifest. + + Returns ``None`` when the manifest is unavailable, so callers can fall + back to their hardcoded list. + """ + block = _get_provider_block("openrouter") + if not block: + return None + out: list[tuple[str, str]] = [] + for m in block.get("models", []): + mid = str(m.get("id") or "").strip() + if not mid: + continue + desc = str(m.get("description") or "") + out.append((mid, desc)) + return out or None + + +def get_curated_nous_models() -> list[str] | None: + """Return Nous Portal's curated list of model ids from the manifest. + + Returns ``None`` when the manifest is unavailable. + """ + block = _get_provider_block("nous") + if not block: + return None + out: list[str] = [] + for m in block.get("models", []): + mid = str(m.get("id") or "").strip() + if mid: + out.append(mid) + return out or None + + +def reset_cache() -> None: + """Clear the in-process cache. Used by tests and ``hermes model --refresh``.""" + global _catalog_cache, _catalog_cache_source_mtime + _catalog_cache = None + _catalog_cache_source_mtime = 0.0 diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 23ddc6f3ca..dbc1a1e2b6 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -876,7 +876,16 @@ def fetch_openrouter_models( if _openrouter_catalog_cache is not None and not force_refresh: return list(_openrouter_catalog_cache) - fallback = list(OPENROUTER_MODELS) + # Prefer the remotely-hosted catalog manifest; fall back to the in-repo + # snapshot when the manifest is unreachable. Both are curated lists that + # drive the picker; the OpenRouter live /v1/models filter (tool support, + # free pricing) is applied on top either way. + try: + from hermes_cli.model_catalog import get_curated_openrouter_models + remote = get_curated_openrouter_models() + except Exception: + remote = None + fallback = list(remote) if remote else list(OPENROUTER_MODELS) preferred_ids = [mid for mid, _ in fallback] try: @@ -929,6 +938,24 @@ def model_ids(*, force_refresh: bool = False) -> list[str]: return [mid for mid, _ in fetch_openrouter_models(force_refresh=force_refresh)] +def get_curated_nous_model_ids() -> list[str]: + """Return the curated Nous Portal model-id list. + + Prefers the remotely-hosted catalog manifest (published under + ``website/static/api/model-catalog.json``); falls back to the in-repo + snapshot in ``_PROVIDER_MODELS["nous"]`` when the manifest is + unreachable. Always returns a list (never None). + """ + try: + from hermes_cli.model_catalog import get_curated_nous_models + remote = get_curated_nous_models() + except Exception: + remote = None + if remote: + return list(remote) + return list(_PROVIDER_MODELS.get("nous", [])) + + def _ai_gateway_model_is_free(pricing: Any) -> bool: """Return True if an AI Gateway model has $0 input AND output pricing.""" if not isinstance(pricing, dict): diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 0fa1f8abb2..2c4d28e027 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -1856,27 +1856,32 @@ def _setup_slack(): if existing: print_info("Slack: already configured") if not prompt_yes_no("Reconfigure Slack?", False): + # Even without reconfiguring, offer to refresh the manifest so + # new commands (e.g. /btw, /stop, ...) get registered in Slack. + if prompt_yes_no( + "Regenerate the Slack app manifest with the latest command " + "list? (recommended after `hermes update`)", + True, + ): + _write_slack_manifest_and_instruct() return print_info("Steps to create a Slack app:") - print_info(" 1. Go to https://api.slack.com/apps → Create New App (from scratch)") + print_info(" 1. Go to https://api.slack.com/apps → Create New App") + print_info(" Pick 'From an app manifest' — we'll generate one for you below.") print_info(" 2. Enable Socket Mode: Settings → Socket Mode → Enable") print_info(" • Create an App-Level Token with 'connections:write' scope") - print_info(" 3. Add Bot Token Scopes: Features → OAuth & Permissions") - print_info(" Required scopes: chat:write, app_mentions:read,") - print_info(" channels:history, channels:read, im:history,") - print_info(" im:read, im:write, users:read, files:read, files:write") - print_info(" Optional for private channels: groups:history") - print_info(" 4. Subscribe to Events: Features → Event Subscriptions → Enable") - print_info(" Required events: message.im, message.channels, app_mention") - print_info(" Optional for private channels: message.groups") - print_warning(" ⚠ Without message.channels the bot will ONLY work in DMs,") - print_warning(" not public channels.") - print_info(" 5. Install to Workspace: Settings → Install App") - print_info(" 6. Reinstall the app after any scope or event changes") - print_info(" 7. After installing, invite the bot to channels: /invite @YourBot") + print_info(" 3. Install to Workspace: Settings → Install App") + print_info(" 4. After installing, invite the bot to channels: /invite @YourBot") print() print_info(" Full guide: https://hermes-agent.nousresearch.com/docs/user-guide/messaging/slack/") + print() + + # Generate and write manifest up-front so the user can paste it into + # the "Create from manifest" flow instead of clicking through scopes / + # events / slash commands one at a time. + _write_slack_manifest_and_instruct() + print() bot_token = prompt("Slack Bot Token (xoxb-...)", password=True) if not bot_token: @@ -1902,6 +1907,49 @@ def _setup_slack(): print_info(" Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.") +def _write_slack_manifest_and_instruct(): + """Generate the Slack manifest, write it under HERMES_HOME, and print + paste-into-Slack instructions. + + Exposed as its own helper so both the initial setup flow and the + "reconfigure? → no" branch can refresh the manifest without the user + re-entering tokens. Failures are non-fatal — if the manifest write + fails for any reason, we print a warning and skip rather than abort + the whole Slack setup. + """ + try: + from hermes_cli.slack_cli import _build_full_manifest + from hermes_constants import get_hermes_home + + manifest = _build_full_manifest( + bot_name="Hermes", + bot_description="Your Hermes agent on Slack", + ) + target = Path(get_hermes_home()) / "slack-manifest.json" + target.parent.mkdir(parents=True, exist_ok=True) + import json as _json + target.write_text( + _json.dumps(manifest, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + print_success(f"Slack app manifest written to: {target}") + print_info( + " Paste it into https://api.slack.com/apps → your app → Features " + "→ App Manifest → Edit, then Save. Slack will prompt to " + "reinstall if scopes or slash commands changed." + ) + print_info( + " Re-run `hermes slack manifest --write` anytime to refresh after " + "Hermes adds new commands." + ) + except Exception as exc: # pragma: no cover - best-effort UX helper + print_warning(f"Couldn't write Slack manifest: {exc}") + print_info( + " You can generate it manually later with: " + "hermes slack manifest --write" + ) + + def _setup_matrix(): """Configure Matrix credentials.""" print_header("Matrix") diff --git a/hermes_cli/skills_hub.py b/hermes_cli/skills_hub.py index bf92fafe10..2e425eee89 100644 --- a/hermes_cli/skills_hub.py +++ b/hermes_cli/skills_hub.py @@ -599,11 +599,24 @@ def inspect_skill(identifier: str) -> Optional[dict]: return out -def do_list(source_filter: str = "all", console: Optional[Console] = None) -> None: - """List installed skills, distinguishing hub, builtin, and local skills.""" +def do_list(source_filter: str = "all", + enabled_only: bool = False, + console: Optional[Console] = None) -> None: + """List installed skills, distinguishing hub, builtin, and local skills. + + Args: + source_filter: ``all`` | ``hub`` | ``builtin`` | ``local``. + enabled_only: If True, hide disabled skills from the output. + + Enabled/disabled state is resolved against the currently active profile's + config — ``hermes -p skills list`` reads that profile's + ``skills.disabled`` list because ``-p`` swaps ``HERMES_HOME`` at process + start. No explicit profile flag needed here. + """ from tools.skills_hub import HubLockFile, ensure_hub_dirs from tools.skills_sync import _read_manifest from tools.skills_tool import _find_all_skills + from agent.skill_utils import get_disabled_skill_names c = console or _console ensure_hub_dirs() @@ -611,17 +624,26 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No hub_installed = {e["name"]: e for e in lock.list_installed()} builtin_names = set(_read_manifest()) - all_skills = _find_all_skills() + # Pull ALL skills (including disabled ones) so we can annotate status. + all_skills = _find_all_skills(skip_disabled=True) + disabled_names = get_disabled_skill_names() - table = Table(title="Installed Skills") + title = "Installed Skills" + if enabled_only: + title += " (enabled only)" + + table = Table(title=title) table.add_column("Name", style="bold cyan") table.add_column("Category", style="dim") table.add_column("Source", style="dim") table.add_column("Trust", style="dim") + table.add_column("Status", style="dim") hub_count = 0 builtin_count = 0 local_count = 0 + enabled_count = 0 + disabled_count = 0 for skill in sorted(all_skills, key=lambda s: (s.get("category") or "", s["name"])): name = skill["name"] @@ -632,29 +654,48 @@ def do_list(source_filter: str = "all", console: Optional[Console] = None) -> No source_type = "hub" source_display = hub_entry.get("source", "hub") trust = hub_entry.get("trust_level", "community") - hub_count += 1 elif name in builtin_names: source_type = "builtin" source_display = "builtin" trust = "builtin" - builtin_count += 1 else: source_type = "local" source_display = "local" trust = "local" - local_count += 1 if source_filter != "all" and source_filter != source_type: continue + is_enabled = name not in disabled_names + if enabled_only and not is_enabled: + continue + + if source_type == "hub": + hub_count += 1 + elif source_type == "builtin": + builtin_count += 1 + else: + local_count += 1 + + if is_enabled: + enabled_count += 1 + status_cell = "[bold green]enabled[/]" + else: + disabled_count += 1 + status_cell = "[dim red]disabled[/]" + trust_style = {"builtin": "bright_cyan", "trusted": "green", "community": "yellow", "local": "dim"}.get(trust, "dim") trust_label = "official" if source_display == "official" else trust - table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]") + table.add_row(name, category, source_display, f"[{trust_style}]{trust_label}[/]", status_cell) c.print(table) - c.print( - f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local[/]\n" - ) + summary = f"[dim]{hub_count} hub-installed, {builtin_count} builtin, {local_count} local" + if enabled_only: + summary += f" — {enabled_count} enabled shown" + else: + summary += f" — {enabled_count} enabled, {disabled_count} disabled" + summary += "[/]\n" + c.print(summary) def do_check(name: Optional[str] = None, console: Optional[Console] = None) -> None: @@ -1127,7 +1168,10 @@ def skills_command(args) -> None: elif action == "inspect": do_inspect(args.identifier) elif action == "list": - do_list(source_filter=args.source) + do_list( + source_filter=args.source, + enabled_only=getattr(args, "enabled_only", False), + ) elif action == "check": do_check(name=getattr(args, "name", None)) elif action == "update": @@ -1279,11 +1323,12 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None: elif action == "list": source_filter = "all" + enabled_only = "--enabled-only" in args or "--enabled" in args if "--source" in args: idx = args.index("--source") if idx + 1 < len(args): source_filter = args[idx + 1] - do_list(source_filter=source_filter, console=c) + do_list(source_filter=source_filter, enabled_only=enabled_only, console=c) elif action == "check": name = args[0] if args else None @@ -1371,7 +1416,8 @@ def _print_skills_help(console: Console) -> None: " [cyan]search[/] Search registries for skills\n" " [cyan]install[/] Install a skill (with security scan)\n" " [cyan]inspect[/] Preview a skill without installing\n" - " [cyan]list[/] [--source hub|builtin|local] List installed skills\n" + " [cyan]list[/] [--source hub|builtin|local] [--enabled-only]\n" + " List installed skills; --enabled-only filters to the active profile's live set\n" " [cyan]check[/] [name] Check hub skills for upstream updates\n" " [cyan]update[/] [name] Update hub skills with upstream changes\n" " [cyan]audit[/] [name] Re-scan hub skills for security\n" diff --git a/hermes_cli/slack_cli.py b/hermes_cli/slack_cli.py new file mode 100644 index 0000000000..d76f8a6e06 --- /dev/null +++ b/hermes_cli/slack_cli.py @@ -0,0 +1,152 @@ +"""``hermes slack ...`` CLI subcommands. + +Today only ``hermes slack manifest`` is implemented — it generates the +Slack app manifest JSON for registering every gateway command as a native +Slack slash (``/btw``, ``/stop``, ``/model``, …) so users get the same +first-class slash UX Discord and Telegram already have. + +Typical workflow:: + + $ hermes slack manifest > slack-manifest.json + # or: + $ hermes slack manifest --write + +Then paste the printed JSON into the Slack app config (Features → App +Manifest → Edit) and click Save. Slack diffs the manifest and prompts +for reinstall when scopes/commands change. +""" +from __future__ import annotations + +import json +import sys +from pathlib import Path + + +def _build_full_manifest(bot_name: str, bot_description: str) -> dict: + """Build a full Slack manifest merging display info + our slash list. + + The slash-command list is always generated from ``COMMAND_REGISTRY`` so + it stays in sync with the rest of Hermes. Other manifest sections + (display info, OAuth scopes, socket mode) are set to sensible defaults + for a Hermes deployment — users can tweak them in the Slack UI after + pasting. + """ + from hermes_cli.commands import slack_app_manifest + + partial = slack_app_manifest() + slashes = partial["features"]["slash_commands"] + + return { + "_metadata": { + "major_version": 1, + "minor_version": 1, + }, + "display_information": { + "name": bot_name[:35], + "description": (bot_description or "Your Hermes agent on Slack")[:140], + "background_color": "#1a1a2e", + }, + "features": { + "bot_user": { + "display_name": bot_name[:80], + "always_online": True, + }, + "slash_commands": slashes, + "assistant_view": { + "assistant_description": "Chat with Hermes in threads and DMs.", + }, + }, + "oauth_config": { + "scopes": { + "bot": [ + "app_mentions:read", + "assistant:write", + "channels:history", + "channels:read", + "chat:write", + "commands", + "files:read", + "files:write", + "groups:history", + "im:history", + "im:read", + "im:write", + "users:read", + ], + }, + }, + "settings": { + "event_subscriptions": { + "bot_events": [ + "app_mention", + "assistant_thread_context_changed", + "assistant_thread_started", + "message.channels", + "message.groups", + "message.im", + ], + }, + "interactivity": { + "is_enabled": True, + }, + "org_deploy_enabled": False, + "socket_mode_enabled": True, + "token_rotation_enabled": False, + }, + } + + +def slack_manifest_command(args) -> int: + """Print or write a Slack app manifest JSON. + + Flags (all parsed in ``hermes_cli/main.py``): + --write [PATH] Write to file instead of stdout (default path: + ``$HERMES_HOME/slack-manifest.json``) + --name NAME Override the bot display name (default: "Hermes") + --description DESC Override the bot description + --slashes-only Emit only the ``features.slash_commands`` array (for + merging into an existing manifest manually) + """ + name = getattr(args, "name", None) or "Hermes" + description = getattr(args, "description", None) or "Your Hermes agent on Slack" + + if getattr(args, "slashes_only", False): + from hermes_cli.commands import slack_app_manifest + + manifest = slack_app_manifest()["features"]["slash_commands"] + else: + manifest = _build_full_manifest(name, description) + + payload = json.dumps(manifest, indent=2, ensure_ascii=False) + "\n" + + write_target = getattr(args, "write", None) + if write_target is not None: + if isinstance(write_target, bool) and write_target: + # --write with no value → default location + try: + from hermes_constants import get_hermes_home + + target = Path(get_hermes_home()) / "slack-manifest.json" + except Exception: + target = Path.home() / ".hermes" / "slack-manifest.json" + else: + target = Path(write_target).expanduser() + target.parent.mkdir(parents=True, exist_ok=True) + target.write_text(payload, encoding="utf-8") + print(f"Slack manifest written to: {target}", file=sys.stderr) + print( + "\nNext steps:\n" + " 1. Open https://api.slack.com/apps and pick your Hermes app\n" + " (or create a new one: Create New App → From an app manifest).\n" + f" 2. Features → App Manifest → paste the contents of\n" + f" {target}\n" + " 3. Save; Slack will prompt to reinstall the app if scopes or\n" + " slash commands changed.\n" + " 4. Make sure Socket Mode is enabled and you have a bot token\n" + " (xoxb-...) and app token (xapp-...) configured via\n" + " `hermes setup`.\n", + file=sys.stderr, + ) + else: + sys.stdout.write(payload) + return 0 diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index db66e1db1b..a93a31db13 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -10,8 +10,7 @@ import random TIPS = [ # --- Slash Commands --- - "/btw asks a quick side question without tools or history — great for clarifications.", - "/background runs a task in a separate session while your current one stays free.", + "/background (alias /bg or /btw) runs a task in a separate session while your current one stays free.", "/branch forks the current session so you can explore a different direction without losing progress.", "/compress manually compresses conversation context when things get long.", "/rollback lists filesystem checkpoints — restore files the agent modified to any prior state.", diff --git a/hermes_state.py b/hermes_state.py index 8ae8ae6e61..cc40313084 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -832,7 +832,18 @@ class SessionDB: params = [] if not include_children: - where_clauses.append("s.parent_session_id IS NULL") + # Show root sessions and branch sessions (whose parent ended with + # end_reason='branched' before the child was created), while still + # hiding sub-agent runs and compression continuations (which also + # carry a parent_session_id but were spawned while the parent was + # still live — i.e., started_at < parent.ended_at). + where_clauses.append( + "(s.parent_session_id IS NULL" + " OR EXISTS (SELECT 1 FROM sessions p" + " WHERE p.id = s.parent_session_id" + " AND p.end_reason = 'branched'" + " AND s.started_at >= p.ended_at))" + ) if source: where_clauses.append("s.source = ?") diff --git a/run_agent.py b/run_agent.py index 1f2a062127..984c8e71d5 100644 --- a/run_agent.py +++ b/run_agent.py @@ -892,7 +892,6 @@ class AIAgent: checkpoints_enabled: bool = False, checkpoint_max_snapshots: int = 50, pass_session_id: bool = False, - persist_session: bool = True, ): """ Initialize the AI Agent. @@ -964,7 +963,6 @@ class AIAgent: self.background_review_callback = None # Optional sync callback for gateway delivery self.skip_context_files = skip_context_files self.pass_session_id = pass_session_id - self.persist_session = persist_session self._credential_pool = credential_pool self.log_prefix_chars = log_prefix_chars self.log_prefix = f"{log_prefix} " if log_prefix else "" @@ -3109,13 +3107,28 @@ class AIAgent: ) _SKILL_REVIEW_PROMPT = ( - "Review the conversation above and consider saving or updating a skill if appropriate.\n\n" - "Focus on: was a non-trivial approach used to complete a task that required trial " - "and error, or changing course due to experiential findings along the way, or did " - "the user expect or desire a different method or outcome?\n\n" - "If a relevant skill already exists, update it with what you learned. " - "Otherwise, create a new skill if the approach is reusable.\n" - "If nothing is worth saving, just say 'Nothing to save.' and stop." + "Review the conversation above and consider whether a skill should be saved or updated.\n\n" + "Work in this order — do not skip steps:\n\n" + "1. SURVEY the existing skill landscape first. Call skills_list to see what you " + "have. If anything looks potentially relevant, skill_view it before deciding. " + "You are looking for the CLASS of task that just happened, not the exact task. " + "Example: a successful Tauri build is in the class \"desktop app build " + "troubleshooting\", not \"fix my specific Tauri error today\".\n\n" + "2. THINK CLASS-FIRST. What general pattern of task did the user just complete? " + "What conditions will trigger this pattern again? Describe the class in one " + "sentence before looking at what to save.\n\n" + "3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill " + "already covers the class — even partially — update it (skill_manage patch) " + "with the new insight. Broaden its \"when to use\" trigger if needed.\n\n" + "4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. " + "When you create one, name and scope it at the class level " + "(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger " + "section must describe the class of situations, not this one session.\n\n" + "5. If you notice two existing skills that overlap, note it in your response " + "so a future review can consolidate them. Do not consolidate now unless the " + "overlap is obvious and low-risk.\n\n" + "Only act when something is genuinely worth saving. " + "If nothing stands out, just say 'Nothing to save.' and stop." ) _COMBINED_REVIEW_PROMPT = ( @@ -3125,9 +3138,16 @@ class AIAgent: "about how you should behave, their work style, or ways they want you to operate? " "If so, save using the memory tool.\n\n" "**Skills**: Was a non-trivial approach used to complete a task that required trial " - "and error, or changing course due to experiential findings along the way, or did " - "the user expect or desire a different method or outcome? If a relevant skill " - "already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n" + "and error, changing course due to experiential findings, or a different method " + "or outcome than the user expected? If so, work in this order:\n" + " a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n" + " b. Identify the CLASS of task, not the specific task " + "(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n" + " c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n" + " d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at " + "the class level, not this one session.\n" + " e. If you notice overlapping skills during the survey, note it so a future " + "review can consolidate them.\n\n" "Only act if there's something genuinely worth saving. " "If nothing stands out, just say 'Nothing to save.' and stop." ) @@ -3225,12 +3245,25 @@ class AIAgent: with open(os.devnull, "w") as _devnull, \ contextlib.redirect_stdout(_devnull), \ contextlib.redirect_stderr(_devnull): + # Inherit the parent agent's live runtime (provider, model, + # base_url, api_key, api_mode) so the fork uses the exact + # same credentials the main turn is using. Without this, + # AIAgent.__init__ re-runs auto-resolution from env vars, + # which fails for OAuth-only providers, session-scoped + # creds, or credential-pool setups where the resolver can't + # reconstruct auth from scratch -- producing the spurious + # "No LLM provider configured" warning at end of turn. + _parent_runtime = self._current_main_runtime() review_agent = AIAgent( model=self.model, max_iterations=8, quiet_mode=True, platform=self.platform, provider=self.provider, + api_mode=_parent_runtime.get("api_mode") or None, + base_url=_parent_runtime.get("base_url") or None, + api_key=_parent_runtime.get("api_key") or None, + credential_pool=getattr(self, "_credential_pool", None), parent_session_id=self.session_id, ) review_agent._memory_write_origin = "background_review" @@ -3331,10 +3364,7 @@ class AIAgent: """Save session state to both JSON log and SQLite on any exit path. Ensures conversations are never lost, even on errors or early returns. - Skipped when ``persist_session=False`` (ephemeral helper flows). """ - if not self.persist_session: - return self._apply_persist_user_message_override(messages) self._session_messages = messages self._save_session_log(messages) @@ -7851,7 +7881,17 @@ class AIAgent: api_msg["reasoning_content"] = existing return - # 2. DeepSeek / Kimi thinking mode: tool-call turns that lack + # 2. Healthy session: promote 'reasoning' field to 'reasoning_content' + # for providers that use the internal 'reasoning' key. + # This must happen BEFORE the DeepSeek/Kimi tool-call check so that + # genuine reasoning content is not overwritten by the empty-string + # fallback (#15812 regression in PR #15478). + normalized_reasoning = source_msg.get("reasoning") + if isinstance(normalized_reasoning, str) and normalized_reasoning: + api_msg["reasoning_content"] = normalized_reasoning + return + + # 3. DeepSeek / Kimi thinking mode: tool-call turns that lack # reasoning_content are "poisoned history" — a prior provider (MiniMax, # etc.) left them empty. DeepSeek returns HTTP 400 if reasoning_content # is absent on replay; inject "" to satisfy the provider's requirement @@ -7867,13 +7907,6 @@ class AIAgent: api_msg["reasoning_content"] = "" return - # 3. Healthy session: promote 'reasoning' field to 'reasoning_content' - # for providers that use the internal 'reasoning' key. - normalized_reasoning = source_msg.get("reasoning") - if isinstance(normalized_reasoning, str) and normalized_reasoning: - api_msg["reasoning_content"] = normalized_reasoning - return - # 4. DeepSeek / Kimi thinking mode: all assistant messages need # reasoning_content. Inject "" to satisfy the provider's requirement # when no explicit reasoning content is present. @@ -11007,36 +11040,69 @@ class AIAgent: continue # ── Nous Portal: record rate limit & skip retries ───── - # When Nous returns a 429, record the reset time to a - # shared file so ALL sessions (cron, gateway, auxiliary) - # know not to pile on. Then skip further retries — - # each one burns another RPH request and deepens the - # rate limit hole. The retry loop's top-of-iteration - # guard will catch this on the next pass and try - # fallback or bail with a clear message. + # When Nous returns a 429 that is a genuine account- + # level rate limit, record the reset time to a shared + # file so ALL sessions (cron, gateway, auxiliary) know + # not to pile on, then skip further retries -- each + # one burns another RPH request and deepens the hole. + # The retry loop's top-of-iteration guard will catch + # this on the next pass and try fallback or bail. + # + # IMPORTANT: Nous Portal multiplexes multiple upstream + # providers (DeepSeek, Kimi, MiMo, Hermes). A 429 can + # also mean an UPSTREAM provider is out of capacity + # for one specific model -- transient, clears in + # seconds, nothing to do with the caller's quota. + # Tripping the cross-session breaker on that would + # block every Nous model for minutes. We use + # ``is_genuine_nous_rate_limit`` to tell the two + # apart via the 429's own x-ratelimit-* headers and + # the last-known-good state captured on the previous + # successful response. if ( is_rate_limited and self.provider == "nous" and classified.reason == FailoverReason.rate_limit and not recovered_with_pool ): + _genuine_nous_rate_limit = False try: - from agent.nous_rate_guard import record_nous_rate_limit + from agent.nous_rate_guard import ( + is_genuine_nous_rate_limit, + record_nous_rate_limit, + ) _err_resp = getattr(api_error, "response", None) _err_hdrs = ( getattr(_err_resp, "headers", None) if _err_resp else None ) - record_nous_rate_limit( + _genuine_nous_rate_limit = is_genuine_nous_rate_limit( headers=_err_hdrs, - error_context=error_context, + last_known_state=self._rate_limit_state, ) + if _genuine_nous_rate_limit: + record_nous_rate_limit( + headers=_err_hdrs, + error_context=error_context, + ) + else: + logging.info( + "Nous 429 looks like upstream capacity " + "(no exhausted bucket in headers or " + "last-known state) -- not tripping " + "cross-session breaker." + ) except Exception: pass - # Skip straight to max_retries — the top-of-loop - # guard will handle fallback or bail cleanly. - retry_count = max_retries - continue + if _genuine_nous_rate_limit: + # Skip straight to max_retries -- the + # top-of-loop guard will handle fallback or + # bail cleanly. + retry_count = max_retries + continue + # Upstream capacity 429: fall through to normal + # retry logic. A different model (or the same + # model a moment later) will typically succeed. is_payload_too_large = ( classified.reason == FailoverReason.payload_too_large diff --git a/scripts/build_model_catalog.py b/scripts/build_model_catalog.py new file mode 100755 index 0000000000..cd21c929e7 --- /dev/null +++ b/scripts/build_model_catalog.py @@ -0,0 +1,95 @@ +#!/usr/bin/env python3 +"""Build the Hermes Model Catalog — a centralized JSON manifest of curated models. + +This script reads the in-repo hardcoded curated lists (``OPENROUTER_MODELS``, +``_PROVIDER_MODELS["nous"]``) and writes them to a JSON manifest that the +Hermes CLI fetches at runtime. Publishing the catalog through the docs site +lets maintainers update model lists without shipping a Hermes release. + +The runtime fetcher falls back to the same in-repo hardcoded lists if the +manifest is unreachable, so this script is a convenience for keeping the +manifest in sync — not a source of truth. + +Usage:: + + python scripts/build_model_catalog.py + +Output: ``website/static/api/model-catalog.json`` + +Live URL (after ``deploy-site.yml`` runs on merge to main): +``https://hermes-agent.nousresearch.com/docs/api/model-catalog.json`` +""" + +from __future__ import annotations + +import json +import os +import sys +from datetime import datetime, timezone + +REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.insert(0, REPO_ROOT) + +# Ensure HERMES_HOME is set for imports that touch it at module level. +os.environ.setdefault("HERMES_HOME", os.path.join(os.path.expanduser("~"), ".hermes")) + +from hermes_cli.models import OPENROUTER_MODELS, _PROVIDER_MODELS # noqa: E402 + +OUTPUT_PATH = os.path.join(REPO_ROOT, "website", "static", "api", "model-catalog.json") +CATALOG_VERSION = 1 + + +def build_catalog() -> dict: + return { + "version": CATALOG_VERSION, + "updated_at": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"), + "metadata": { + "source": "hermes-agent repo", + "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog", + }, + "providers": { + "openrouter": { + "metadata": { + "display_name": "OpenRouter", + "note": ( + "Descriptions drive picker badges. Live /api/v1/models " + "filters curated ids by tool-calling support and free pricing." + ), + }, + "models": [ + {"id": mid, "description": desc} + for mid, desc in OPENROUTER_MODELS + ], + }, + "nous": { + "metadata": { + "display_name": "Nous Portal", + "note": ( + "Free-tier gating is determined live via Portal pricing " + "(partition_nous_models_by_tier), not this manifest." + ), + }, + "models": [ + {"id": mid} + for mid in _PROVIDER_MODELS.get("nous", []) + ], + }, + }, + } + + +def main() -> int: + catalog = build_catalog() + os.makedirs(os.path.dirname(OUTPUT_PATH), exist_ok=True) + with open(OUTPUT_PATH, "w") as fh: + json.dump(catalog, fh, indent=2) + fh.write("\n") + + print(f"Wrote {OUTPUT_PATH}") + for provider, block in catalog["providers"].items(): + print(f" {provider}: {len(block['models'])} models") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/release.py b/scripts/release.py index d2b50edb8b..b0612f09ad 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -43,6 +43,7 @@ AUTHOR_MAP = { "teknium1@gmail.com": "teknium1", "teknium@nousresearch.com": "teknium1", "127238744+teknium1@users.noreply.github.com": "teknium1", + "focusflow.app.help@gmail.com": "yes999zc", "343873859@qq.com": "DrStrangerUJN", "uzmpsk.dilekakbas@gmail.com": "dlkakbs", "jefferson@heimdallstrategy.com": "Mind-Dragon", @@ -69,6 +70,8 @@ AUTHOR_MAP = { "keira.voss94@gmail.com": "keiravoss94", "16443023+stablegenius49@users.noreply.github.com": "stablegenius49", "fqsy1416@gmail.com": "EKKOLearnAI", + "octo-patch@github.com": "octo-patch", + "math0r-be@github.com": "math0r-be", "simbamax99@gmail.com": "simbam99", "iris@growthpillars.co": "irispillars", "185121704+stablegenius49@users.noreply.github.com": "stablegenius49", @@ -118,6 +121,7 @@ AUTHOR_MAP = { "nocoo@users.noreply.github.com": "nocoo", "30841158+n-WN@users.noreply.github.com": "n-WN", "tsuijinglei@gmail.com": "hiddenpuppy", + "buraysandro9@gmail.com": "ygd58", "jerome@clawwork.ai": "HiddenPuppy", "jerome.benoit@sap.com": "jerome-benoit", "wysie@users.noreply.github.com": "Wysie", diff --git a/skills/autonomous-ai-agents/hermes-agent/SKILL.md b/skills/autonomous-ai-agents/hermes-agent/SKILL.md index 4ed03a904c..76a0e51b6c 100644 --- a/skills/autonomous-ai-agents/hermes-agent/SKILL.md +++ b/skills/autonomous-ai-agents/hermes-agent/SKILL.md @@ -281,7 +281,6 @@ Type these during an interactive chat session. ### Utility ``` /branch (/fork) Branch the current session -/btw Ephemeral side question (doesn't interrupt main task) /fast Toggle priority/fast processing /browser Open CDP browser connection /history Show conversation history (CLI) diff --git a/skills/feeds/DESCRIPTION.md b/skills/feeds/DESCRIPTION.md deleted file mode 100644 index 5c2c97bf6d..0000000000 --- a/skills/feeds/DESCRIPTION.md +++ /dev/null @@ -1,3 +0,0 @@ ---- -description: Skills for monitoring, aggregating, and processing RSS feeds, blogs, and web content sources. ---- diff --git a/tests/agent/test_model_metadata.py b/tests/agent/test_model_metadata.py index 42ec0a464f..c28b68226b 100644 --- a/tests/agent/test_model_metadata.py +++ b/tests/agent/test_model_metadata.py @@ -192,6 +192,43 @@ class TestDefaultContextLengths: f"{model_id}: expected {expected_ctx}, got {actual}" ) + def test_deepseek_v4_models_1m_context(self): + from agent.model_metadata import get_model_context_length + from unittest.mock import patch as mock_patch + + expected_keys = { + "deepseek-v4-pro": 1_000_000, + "deepseek-v4-flash": 1_000_000, + "deepseek-chat": 1_000_000, + "deepseek-reasoner": 1_000_000, + } + for key, value in expected_keys.items(): + assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing" + assert DEFAULT_CONTEXT_LENGTHS[key] == value, ( + f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}" + ) + + # Longest-first substring matching must resolve both the bare V4 + # ids (native DeepSeek) and the vendor-prefixed forms (OpenRouter + # / Nous Portal) to 1M without probing down to the legacy 128K + # ``deepseek`` substring fallback. + with mock_patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ + mock_patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ + mock_patch("agent.model_metadata.get_cached_context_length", return_value=None): + cases = [ + ("deepseek-v4-pro", 1_000_000), + ("deepseek-v4-flash", 1_000_000), + ("deepseek/deepseek-v4-pro", 1_000_000), + ("deepseek/deepseek-v4-flash", 1_000_000), + ("deepseek-chat", 1_000_000), + ("deepseek-reasoner", 1_000_000), + ] + for model_id, expected_ctx in cases: + actual = get_model_context_length(model_id) + assert actual == expected_ctx, ( + f"{model_id}: expected {expected_ctx}, got {actual}" + ) + def test_all_values_positive(self): for key, value in DEFAULT_CONTEXT_LENGTHS.items(): assert value > 0, f"{key} has non-positive context length" @@ -303,7 +340,9 @@ class TestCodexOAuthContextLength: from agent.model_metadata import get_model_context_length # OpenRouter — should hit its own catalog path first; when mocked - # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (400k). + # empty, falls through to hardcoded DEFAULT_CONTEXT_LENGTHS (1.05M, + # matching the real direct-API value — Codex OAuth's 272k cap is + # provider-specific and must not leak here). with patch("agent.model_metadata.fetch_model_metadata", return_value={}), \ patch("agent.model_metadata.fetch_endpoint_model_metadata", return_value={}), \ patch("agent.model_metadata.get_cached_context_length", return_value=None), \ @@ -314,7 +353,7 @@ class TestCodexOAuthContextLength: api_key="", provider="openrouter", ) - assert ctx == 400_000, ( + assert ctx == 1_050_000, ( f"Non-Codex gpt-5.5 resolved to {ctx}; Codex 272k override " "leaked outside openai-codex provider" ) diff --git a/tests/agent/test_nous_rate_guard.py b/tests/agent/test_nous_rate_guard.py index 45d30f7246..4441aa6e44 100644 --- a/tests/agent/test_nous_rate_guard.py +++ b/tests/agent/test_nous_rate_guard.py @@ -251,3 +251,141 @@ class TestAuxiliaryClientIntegration: monkeypatch.setattr(aux, "_read_nous_auth", lambda: None) result = aux._try_nous() assert result == (None, None) + + +class TestIsGenuineNousRateLimit: + """Tell a real account-level 429 apart from an upstream-capacity 429. + + Nous Portal multiplexes upstreams (DeepSeek, Kimi, MiMo, Hermes). + A 429 from an upstream out of capacity should NOT trip the + cross-session breaker; a real user-quota 429 should. + """ + + def test_exhausted_hourly_bucket_in_429_headers_is_genuine(self): + from agent.nous_rate_guard import is_genuine_nous_rate_limit + + headers = { + "x-ratelimit-limit-requests-1h": "800", + "x-ratelimit-remaining-requests-1h": "0", + "x-ratelimit-reset-requests-1h": "3100", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-remaining-requests": "198", + "x-ratelimit-reset-requests": "40", + } + assert is_genuine_nous_rate_limit(headers=headers) is True + + def test_exhausted_tokens_bucket_is_genuine(self): + from agent.nous_rate_guard import is_genuine_nous_rate_limit + + headers = { + "x-ratelimit-limit-tokens": "800000", + "x-ratelimit-remaining-tokens": "0", + "x-ratelimit-reset-tokens": "45", # < 60s threshold -> not genuine + "x-ratelimit-limit-tokens-1h": "8000000", + "x-ratelimit-remaining-tokens-1h": "0", + "x-ratelimit-reset-tokens-1h": "1800", # >= 60s threshold -> genuine + } + assert is_genuine_nous_rate_limit(headers=headers) is True + + def test_healthy_headers_on_429_are_upstream_capacity(self): + # Classic upstream-capacity symptom: Nous edge reports plenty of + # headroom on every bucket, but returns 429 anyway because + # upstream (DeepSeek / Kimi / ...) is out of capacity. + from agent.nous_rate_guard import is_genuine_nous_rate_limit + + headers = { + "x-ratelimit-limit-requests": "200", + "x-ratelimit-remaining-requests": "198", + "x-ratelimit-reset-requests": "40", + "x-ratelimit-limit-requests-1h": "800", + "x-ratelimit-remaining-requests-1h": "750", + "x-ratelimit-reset-requests-1h": "3100", + "x-ratelimit-limit-tokens": "800000", + "x-ratelimit-remaining-tokens": "790000", + "x-ratelimit-reset-tokens": "40", + "x-ratelimit-limit-tokens-1h": "8000000", + "x-ratelimit-remaining-tokens-1h": "7800000", + "x-ratelimit-reset-tokens-1h": "3100", + } + assert is_genuine_nous_rate_limit(headers=headers) is False + + def test_bare_429_with_no_headers_is_upstream(self): + from agent.nous_rate_guard import is_genuine_nous_rate_limit + + assert is_genuine_nous_rate_limit(headers=None) is False + assert is_genuine_nous_rate_limit(headers={}) is False + assert is_genuine_nous_rate_limit( + headers={"content-type": "application/json"} + ) is False + + def test_exhausted_bucket_with_short_reset_is_not_genuine(self): + # remaining == 0 but reset in < 60s: almost certainly a + # secondary per-minute throttle that will clear immediately -- + # not worth tripping the cross-session breaker. + from agent.nous_rate_guard import is_genuine_nous_rate_limit + + headers = { + "x-ratelimit-limit-requests": "200", + "x-ratelimit-remaining-requests": "0", + "x-ratelimit-reset-requests": "30", + } + assert is_genuine_nous_rate_limit(headers=headers) is False + + def test_last_known_state_with_exhausted_bucket_triggers_genuine(self): + # Headers on the 429 lack rate-limit info, but the previous + # successful response already showed the hourly bucket + # exhausted -- the 429 is almost certainly that limit + # continuing. + from agent.nous_rate_guard import is_genuine_nous_rate_limit + from agent.rate_limit_tracker import parse_rate_limit_headers + + prior_headers = { + "x-ratelimit-limit-requests-1h": "800", + "x-ratelimit-remaining-requests-1h": "0", + "x-ratelimit-reset-requests-1h": "2000", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-remaining-requests": "100", + "x-ratelimit-reset-requests": "30", + "x-ratelimit-limit-tokens": "800000", + "x-ratelimit-remaining-tokens": "700000", + "x-ratelimit-reset-tokens": "30", + "x-ratelimit-limit-tokens-1h": "8000000", + "x-ratelimit-remaining-tokens-1h": "7000000", + "x-ratelimit-reset-tokens-1h": "2000", + } + last_state = parse_rate_limit_headers(prior_headers, provider="nous") + assert is_genuine_nous_rate_limit( + headers=None, last_known_state=last_state + ) is True + + def test_last_known_state_all_healthy_stays_upstream(self): + # Prior state was healthy; bare 429 arrives; should be treated + # as upstream capacity. + from agent.nous_rate_guard import is_genuine_nous_rate_limit + from agent.rate_limit_tracker import parse_rate_limit_headers + + prior_headers = { + "x-ratelimit-limit-requests-1h": "800", + "x-ratelimit-remaining-requests-1h": "750", + "x-ratelimit-reset-requests-1h": "2000", + "x-ratelimit-limit-requests": "200", + "x-ratelimit-remaining-requests": "180", + "x-ratelimit-reset-requests": "30", + "x-ratelimit-limit-tokens": "800000", + "x-ratelimit-remaining-tokens": "790000", + "x-ratelimit-reset-tokens": "30", + "x-ratelimit-limit-tokens-1h": "8000000", + "x-ratelimit-remaining-tokens-1h": "7900000", + "x-ratelimit-reset-tokens-1h": "2000", + } + last_state = parse_rate_limit_headers(prior_headers, provider="nous") + assert is_genuine_nous_rate_limit( + headers=None, last_known_state=last_state + ) is False + + def test_none_last_state_and_no_headers_is_upstream(self): + from agent.nous_rate_guard import is_genuine_nous_rate_limit + + assert is_genuine_nous_rate_limit( + headers=None, last_known_state=None + ) is False diff --git a/tests/agent/test_onboarding.py b/tests/agent/test_onboarding.py new file mode 100644 index 0000000000..a14c7d1797 --- /dev/null +++ b/tests/agent/test_onboarding.py @@ -0,0 +1,164 @@ +"""Tests for agent/onboarding.py — contextual first-touch hint helpers.""" + +from __future__ import annotations + +import yaml +import pytest + +from agent.onboarding import ( + BUSY_INPUT_FLAG, + TOOL_PROGRESS_FLAG, + busy_input_hint_cli, + busy_input_hint_gateway, + is_seen, + mark_seen, + tool_progress_hint_cli, + tool_progress_hint_gateway, +) + + +class TestIsSeen: + def test_empty_config_unseen(self): + assert is_seen({}, BUSY_INPUT_FLAG) is False + + def test_missing_onboarding_unseen(self): + assert is_seen({"display": {}}, BUSY_INPUT_FLAG) is False + + def test_onboarding_not_dict_unseen(self): + assert is_seen({"onboarding": "nope"}, BUSY_INPUT_FLAG) is False + + def test_seen_dict_missing_flag(self): + assert is_seen({"onboarding": {"seen": {}}}, BUSY_INPUT_FLAG) is False + + def test_seen_flag_true(self): + cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}} + assert is_seen(cfg, BUSY_INPUT_FLAG) is True + + def test_seen_flag_falsy(self): + cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: False}}} + assert is_seen(cfg, BUSY_INPUT_FLAG) is False + + def test_other_flags_isolated(self): + cfg = {"onboarding": {"seen": {BUSY_INPUT_FLAG: True}}} + assert is_seen(cfg, TOOL_PROGRESS_FLAG) is False + + +class TestMarkSeen: + def test_creates_missing_file_and_sets_flag(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True + + loaded = yaml.safe_load(cfg_path.read_text()) + assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True + + def test_preserves_other_config(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + cfg_path.write_text(yaml.safe_dump({ + "model": {"default": "claude-sonnet-4.6"}, + "display": {"skin": "default"}, + })) + + assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True + loaded = yaml.safe_load(cfg_path.read_text()) + + assert loaded["model"]["default"] == "claude-sonnet-4.6" + assert loaded["display"]["skin"] == "default" + assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True + + def test_preserves_other_seen_flags(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + cfg_path.write_text(yaml.safe_dump({ + "onboarding": {"seen": {TOOL_PROGRESS_FLAG: True}}, + })) + + assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True + loaded = yaml.safe_load(cfg_path.read_text()) + + assert loaded["onboarding"]["seen"][TOOL_PROGRESS_FLAG] is True + assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True + + def test_idempotent(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + mark_seen(cfg_path, BUSY_INPUT_FLAG) + first = cfg_path.read_text() + + # Second call must be a no-op on-disk content (file may be touched, + # but the YAML contents should be identical). + mark_seen(cfg_path, BUSY_INPUT_FLAG) + second = cfg_path.read_text() + + assert yaml.safe_load(first) == yaml.safe_load(second) + + def test_handles_non_dict_onboarding(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + cfg_path.write_text(yaml.safe_dump({"onboarding": "corrupted"})) + + assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True + loaded = yaml.safe_load(cfg_path.read_text()) + assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True + + def test_handles_non_dict_seen(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + cfg_path.write_text(yaml.safe_dump({"onboarding": {"seen": "corrupted"}})) + + assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True + loaded = yaml.safe_load(cfg_path.read_text()) + assert loaded["onboarding"]["seen"][BUSY_INPUT_FLAG] is True + + +class TestHintMessages: + def test_busy_input_hint_gateway_interrupt(self): + msg = busy_input_hint_gateway("interrupt") + assert "/busy queue" in msg + assert "interrupted" in msg.lower() + + def test_busy_input_hint_gateway_queue(self): + msg = busy_input_hint_gateway("queue") + assert "/busy interrupt" in msg + assert "queued" in msg.lower() + + def test_busy_input_hint_cli_interrupt(self): + msg = busy_input_hint_cli("interrupt") + assert "/busy queue" in msg + + def test_busy_input_hint_cli_queue(self): + msg = busy_input_hint_cli("queue") + assert "/busy interrupt" in msg + + def test_tool_progress_hints_mention_verbose(self): + assert "/verbose" in tool_progress_hint_gateway() + assert "/verbose" in tool_progress_hint_cli() + + def test_hints_are_not_empty(self): + for hint in ( + busy_input_hint_gateway("queue"), + busy_input_hint_gateway("interrupt"), + busy_input_hint_cli("queue"), + busy_input_hint_cli("interrupt"), + tool_progress_hint_gateway(), + tool_progress_hint_cli(), + ): + assert hint.strip() + + +class TestRoundTrip: + """After mark_seen, is_seen on the re-loaded config must return True.""" + + def test_mark_then_is_seen(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + + assert mark_seen(cfg_path, BUSY_INPUT_FLAG) is True + loaded = yaml.safe_load(cfg_path.read_text()) + + assert is_seen(loaded, BUSY_INPUT_FLAG) is True + assert is_seen(loaded, TOOL_PROGRESS_FLAG) is False + + def test_mark_both_flags_independently(self, tmp_path): + cfg_path = tmp_path / "config.yaml" + + mark_seen(cfg_path, BUSY_INPUT_FLAG) + mark_seen(cfg_path, TOOL_PROGRESS_FLAG) + loaded = yaml.safe_load(cfg_path.read_text()) + + assert is_seen(loaded, BUSY_INPUT_FLAG) is True + assert is_seen(loaded, TOOL_PROGRESS_FLAG) is True diff --git a/tests/cli/test_branch_command.py b/tests/cli/test_branch_command.py index 9c3ec61d8c..581cdbdb6a 100644 --- a/tests/cli/test_branch_command.py +++ b/tests/cli/test_branch_command.py @@ -160,6 +160,30 @@ class TestBranchCommandCLI: assert agent.reset_session_state.called assert agent._last_flushed_db_idx == 4 # len(conversation_history) + def test_branch_updates_agent_session_log_file(self, cli_instance, session_db, tmp_path): + """Branching must redirect the agent's session_log_file to the new session's path.""" + from cli import HermesCLI + from pathlib import Path + + logs_dir = tmp_path / "sessions" + logs_dir.mkdir() + + agent = MagicMock() + agent._last_flushed_db_idx = 0 + agent.logs_dir = logs_dir + agent.session_log_file = logs_dir / f"session_{cli_instance.session_id}.json" + cli_instance.agent = agent + + old_log_file = agent.session_log_file + HermesCLI._handle_branch_command(cli_instance, "/branch") + + new_session_id = cli_instance.session_id + expected_log = logs_dir / f"session_{new_session_id}.json" + assert agent.session_log_file == expected_log, ( + "session_log_file must point to the branch session, not the original" + ) + assert agent.session_log_file != old_log_file + def test_branch_sets_resumed_flag(self, cli_instance, session_db): """Branch should set _resumed=True to prevent auto-title generation.""" from cli import HermesCLI diff --git a/tests/gateway/test_agent_cache.py b/tests/gateway/test_agent_cache.py index d4019e1d5e..e21ea62440 100644 --- a/tests/gateway/test_agent_cache.py +++ b/tests/gateway/test_agent_cache.py @@ -1043,3 +1043,132 @@ class TestAgentCacheIdleResume: new_agent.close() except Exception: pass + + +_FAKE_NOW = 10_000.0 # Fixed epoch for deterministic time assertions + + +class TestCachedAgentInactivityReset: + """Inactivity-clock reset must be gated on _interrupt_depth == 0. + + On interrupt-recursive turns (_interrupt_depth > 0) the clock must + keep accumulating so the inactivity watchdog can fire when a turn is + stuck in an interrupt loop. Resetting unconditionally prevented the + 30-min timeout from triggering (#15654). The depth-0 reset is still + needed: a session idle for 29 min must not trip the watchdog before + the new turn makes its first API call (#9051). + """ + + def _fake_agent(self, stale_seconds: float = 1800.0): + m = MagicMock() + m._last_activity_ts = _FAKE_NOW - stale_seconds + m._api_call_count = 10 + m._last_activity_desc = "previous turn activity" + return m + + def test_fresh_turn_resets_idle_clock(self): + """interrupt_depth=0: clock resets so a post-idle turn gets a + fresh 30-min inactivity window (guard for #9051).""" + from gateway.run import GatewayRunner + + agent = self._fake_agent(stale_seconds=1800.0) + old_ts = agent._last_activity_ts + + with patch("gateway.run.time") as mock_time: + mock_time.time.return_value = _FAKE_NOW + GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0) + + assert agent._last_activity_ts == _FAKE_NOW, ( + "_last_activity_ts was not reset on a fresh turn (interrupt_depth=0)" + ) + assert agent._last_activity_ts > old_ts, ( + "Stale idle time should be cleared so the new turn gets a fresh window" + ) + + def test_fresh_turn_resets_desc(self): + """interrupt_depth=0: description is updated to reflect the new turn.""" + from gateway.run import GatewayRunner + + agent = self._fake_agent() + + with patch("gateway.run.time") as mock_time: + mock_time.time.return_value = _FAKE_NOW + GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=0) + + assert agent._last_activity_desc == "starting new turn (cached)" + + def test_interrupt_turn_preserves_idle_clock(self): + """interrupt_depth=1: clock preserved so accumulated stuck-turn + idle time is not discarded by an interrupt-recursive re-entry (#15654).""" + from gateway.run import GatewayRunner + + agent = self._fake_agent(stale_seconds=1200.0) + old_ts = agent._last_activity_ts + + GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1) + + assert agent._last_activity_ts == old_ts, ( + "_last_activity_ts must not be reset on interrupt-recursive turns " + "(interrupt_depth>0) — the watchdog needs the accumulated idle time" + ) + + def test_interrupt_turn_preserves_desc(self): + """interrupt_depth=1: desc preserved — it is semantically paired with ts.""" + from gateway.run import GatewayRunner + + agent = self._fake_agent(stale_seconds=1200.0) + + GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1) + + assert agent._last_activity_desc == "previous turn activity", ( + "_last_activity_desc must not change on interrupt-recursive turns; " + "it describes the activity *at* _last_activity_ts" + ) + + def test_deep_interrupt_recursion_preserves_idle_clock(self): + """interrupt_depth=MAX-1: clock still preserved at any non-zero depth.""" + from gateway.run import GatewayRunner + + agent = self._fake_agent(stale_seconds=600.0) + old_ts = agent._last_activity_ts + + GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=4) + + assert agent._last_activity_ts == old_ts + + def test_api_call_count_reset_regardless_of_depth(self): + """_api_call_count is always reset to 0 for the new turn, at any depth.""" + from gateway.run import GatewayRunner + + agent_fresh = self._fake_agent() + agent_interrupted = self._fake_agent() + + with patch("gateway.run.time") as mock_time: + mock_time.time.return_value = _FAKE_NOW + GatewayRunner._init_cached_agent_for_turn(agent_fresh, interrupt_depth=0) + GatewayRunner._init_cached_agent_for_turn(agent_interrupted, interrupt_depth=1) + + assert agent_fresh._api_call_count == 0 + assert agent_interrupted._api_call_count == 0 + + def test_watchdog_accumulation_across_recursive_turns(self): + """Scenario: stuck turn + user interrupt → recursive turn. + + The idle time seen by the watchdog must reflect the full stuck + duration, not restart from zero on the recursive re-entry. + """ + from gateway.run import GatewayRunner + + STUCK_FOR = 1750.0 + agent = self._fake_agent(stale_seconds=STUCK_FOR) + + # Simulate: user sees "Still working..." and sends another message. + # That triggers an interrupt → _run_agent recurses at depth=1. + GatewayRunner._init_cached_agent_for_turn(agent, interrupt_depth=1) + + # Watchdog sees time.time() - _last_activity_ts ≥ STUCK_FOR. + idle_secs = _FAKE_NOW - agent._last_activity_ts + assert idle_secs >= STUCK_FOR - 1.0, ( + f"Watchdog would see {idle_secs:.0f}s idle, expected ~{STUCK_FOR}s. " + "Inactivity timeout could not fire for a stuck interrupted turn." + ) diff --git a/tests/gateway/test_busy_session_ack.py b/tests/gateway/test_busy_session_ack.py index 290c1a4b89..2d5f30f6d3 100644 --- a/tests/gateway/test_busy_session_ack.py +++ b/tests/gateway/test_busy_session_ack.py @@ -349,3 +349,121 @@ class TestBusySessionAck: result = await runner._handle_active_session_busy_message(event, sk) assert result is False # not handled, let default path try + + +class TestBusySessionOnboardingHint: + """First-touch hint appended to the busy-ack the first time it fires.""" + + @pytest.mark.asyncio + async def test_first_busy_ack_appends_interrupt_hint(self, tmp_path, monkeypatch): + """First busy-while-running message gets an extra hint about /busy.""" + import gateway.run as _gr + + monkeypatch.setattr(_gr, "_hermes_home", tmp_path) + # mark_seen imports utils.atomic_yaml_write; make sure it resolves + # against a writable dir by pointing _hermes_home at tmp_path. + monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {}) + + runner, _sentinel = _make_runner() + runner._busy_input_mode = "interrupt" + adapter = _make_adapter() + + event = _make_event(text="ping") + sk = build_session_key(event.source) + + agent = MagicMock() + agent.get_activity_summary.return_value = { + "api_call_count": 3, "max_iterations": 60, + "current_tool": None, "last_activity_ts": time.time(), + "last_activity_desc": "api", "seconds_since_activity": 0.1, + } + runner._running_agents[sk] = agent + runner._running_agents_ts[sk] = time.time() - 5 + runner.adapters[event.source.platform] = adapter + + await runner._handle_active_session_busy_message(event, sk) + + call_kwargs = adapter._send_with_retry.call_args + content = call_kwargs.kwargs.get("content", "") + + # Normal ack body + assert "Interrupting" in content + # First-touch hint appended + assert "First-time tip" in content + assert "/busy queue" in content + + # The flag is now persisted to tmp_path/config.yaml + import yaml + cfg = yaml.safe_load((tmp_path / "config.yaml").read_text()) + assert cfg["onboarding"]["seen"]["busy_input_prompt"] is True + + @pytest.mark.asyncio + async def test_second_busy_ack_omits_hint(self, tmp_path, monkeypatch): + """Once the flag is marked, the hint never appears again.""" + import gateway.run as _gr + import yaml + + monkeypatch.setattr(_gr, "_hermes_home", tmp_path) + # Pre-populate the config so is_seen() returns True from the start. + (tmp_path / "config.yaml").write_text(yaml.safe_dump({ + "onboarding": {"seen": {"busy_input_prompt": True}}, + })) + monkeypatch.setattr( + _gr, "_load_gateway_config", + lambda: yaml.safe_load((tmp_path / "config.yaml").read_text()), + ) + + runner, _sentinel = _make_runner() + runner._busy_input_mode = "interrupt" + adapter = _make_adapter() + + event = _make_event(text="ping again") + sk = build_session_key(event.source) + + agent = MagicMock() + agent.get_activity_summary.return_value = { + "api_call_count": 3, "max_iterations": 60, + "current_tool": None, "last_activity_ts": time.time(), + "last_activity_desc": "api", "seconds_since_activity": 0.1, + } + runner._running_agents[sk] = agent + runner._running_agents_ts[sk] = time.time() - 5 + runner.adapters[event.source.platform] = adapter + + await runner._handle_active_session_busy_message(event, sk) + + call_kwargs = adapter._send_with_retry.call_args + content = call_kwargs.kwargs.get("content", "") + + assert "Interrupting" in content + assert "First-time tip" not in content + assert "/busy queue" not in content + + @pytest.mark.asyncio + async def test_queue_mode_hint_points_to_interrupt(self, tmp_path, monkeypatch): + """In queue mode the hint should suggest /busy interrupt, not /busy queue.""" + import gateway.run as _gr + + monkeypatch.setattr(_gr, "_hermes_home", tmp_path) + monkeypatch.setattr(_gr, "_load_gateway_config", lambda: {}) + + runner, _sentinel = _make_runner() + runner._busy_input_mode = "queue" + adapter = _make_adapter() + + event = _make_event(text="queue me") + sk = build_session_key(event.source) + runner.adapters[event.source.platform] = adapter + + agent = MagicMock() + runner._running_agents[sk] = agent + + with patch("gateway.run.merge_pending_message_event"): + await runner._handle_active_session_busy_message(event, sk) + + content = adapter._send_with_retry.call_args.kwargs.get("content", "") + assert "Queued for the next turn" in content + assert "First-time tip" in content + assert "/busy interrupt" in content + # Must NOT tell the user to /busy queue when they're already on queue. + assert "/busy queue" not in content diff --git a/tests/gateway/test_run_progress_interrupt.py b/tests/gateway/test_run_progress_interrupt.py new file mode 100644 index 0000000000..23969677e0 --- /dev/null +++ b/tests/gateway/test_run_progress_interrupt.py @@ -0,0 +1,215 @@ +"""Tests for interrupt-aware tool-progress suppression in gateway. + +When a user sends `stop` while the agent is executing a batch of parallel +tool calls, the gateway's progress_callback should stop queuing 🔍 bubbles +and the drain loop should drop any already-queued events. Without this +guard, the stop acknowledgement appears first but is followed by a trail +of tool-progress bubbles for calls that were already parsed from the LLM +response — making the interrupt feel ignored. +""" + +import asyncio +import importlib +import sys +import time +import types +from types import SimpleNamespace + +import pytest + +from gateway.config import Platform, PlatformConfig +from gateway.platforms.base import BasePlatformAdapter, SendResult +from gateway.session import SessionSource + + +class ProgressCaptureAdapter(BasePlatformAdapter): + def __init__(self, platform=Platform.TELEGRAM): + super().__init__(PlatformConfig(enabled=True, token="***"), platform) + self.sent = [] + self.edits = [] + self.typing = [] + + async def connect(self) -> bool: + return True + + async def disconnect(self) -> None: + return None + + async def send(self, chat_id, content, reply_to=None, metadata=None) -> SendResult: + self.sent.append({"chat_id": chat_id, "content": content}) + return SendResult(success=True, message_id="progress-1") + + async def edit_message(self, chat_id, message_id, content) -> SendResult: + self.edits.append({"message_id": message_id, "content": content}) + return SendResult(success=True, message_id=message_id) + + async def send_typing(self, chat_id, metadata=None) -> None: + self.typing.append(chat_id) + + async def stop_typing(self, chat_id) -> None: + return None + + async def get_chat_info(self, chat_id: str): + return {"id": chat_id} + + +class PreInterruptAgent: + """Fires tool-progress events BEFORE the interrupt lands. + + These should render normally. Baseline for comparison with the + interrupted case — proves the harness renders events when no + interrupt is active. + """ + + def __init__(self, **kwargs): + self.tool_progress_callback = kwargs.get("tool_progress_callback") + self.tools = [] + self._interrupt_requested = False + + @property + def is_interrupted(self) -> bool: + return self._interrupt_requested + + def run_conversation(self, message, conversation_history=None, task_id=None): + self.tool_progress_callback("tool.started", "web_search", "first search", {}) + time.sleep(0.35) # let the drain loop process + return {"final_response": "done", "messages": [], "api_calls": 1} + + +class InterruptedAgent: + """Fires tool.started events AFTER interrupt — all should be suppressed. + + Mirrors the failure mode in the bug report: LLM returned N parallel + web_search calls, interrupt flag flipped, remaining events still + rendered as bubbles. With the fix, none of these should appear. + """ + + def __init__(self, **kwargs): + self.tool_progress_callback = kwargs.get("tool_progress_callback") + self.tools = [] + # Start already interrupted — simulates stop having already landed + # by the time the agent batch starts firing tool.started events. + self._interrupt_requested = True + + @property + def is_interrupted(self) -> bool: + return self._interrupt_requested + + def run_conversation(self, message, conversation_history=None, task_id=None): + # Parallel tool batch — in production these come from one LLM + # response with 5 tool_calls. All are post-interrupt. + self.tool_progress_callback("tool.started", "web_search", "cognee hermes", {}) + self.tool_progress_callback("tool.started", "web_search", "McBee deer hunting", {}) + self.tool_progress_callback("tool.started", "web_search", "kuzu graph db", {}) + self.tool_progress_callback("tool.started", "web_search", "moonshot kimi api", {}) + self.tool_progress_callback("tool.started", "web_search", "platform.moonshot.cn", {}) + time.sleep(0.35) # let the drain loop attempt to process the queue + return {"final_response": "interrupted", "messages": [], "api_calls": 1} + + +def _make_runner(adapter): + gateway_run = importlib.import_module("gateway.run") + GatewayRunner = gateway_run.GatewayRunner + + runner = object.__new__(GatewayRunner) + runner.adapters = {adapter.platform: adapter} + runner._voice_mode = {} + runner._prefill_messages = [] + runner._ephemeral_system_prompt = "" + runner._reasoning_config = None + runner._provider_routing = {} + runner._fallback_model = None + runner._session_db = None + runner._running_agents = {} + runner._session_run_generation = {} + runner.hooks = SimpleNamespace(loaded_hooks=False) + runner.config = SimpleNamespace( + thread_sessions_per_user=False, + group_sessions_per_user=False, + stt_enabled=False, + ) + return runner + + +async def _run_once(monkeypatch, tmp_path, agent_cls, session_id): + monkeypatch.setenv("HERMES_TOOL_PROGRESS_MODE", "all") + + fake_dotenv = types.ModuleType("dotenv") + fake_dotenv.load_dotenv = lambda *args, **kwargs: None + monkeypatch.setitem(sys.modules, "dotenv", fake_dotenv) + + fake_run_agent = types.ModuleType("run_agent") + fake_run_agent.AIAgent = agent_cls + monkeypatch.setitem(sys.modules, "run_agent", fake_run_agent) + + adapter = ProgressCaptureAdapter() + runner = _make_runner(adapter) + gateway_run = importlib.import_module("gateway.run") + monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + monkeypatch.setattr( + gateway_run, + "_resolve_runtime_agent_kwargs", + lambda: {"api_key": "fake"}, + ) + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1001", + chat_type="group", + thread_id="17585", + ) + result = await runner._run_agent( + message="hi", + context_prompt="", + history=[], + source=source, + session_id=session_id, + session_key="agent:main:telegram:group:-1001:17585", + ) + return adapter, result + + +@pytest.mark.asyncio +async def test_baseline_non_interrupted_agent_renders_progress(monkeypatch, tmp_path): + """Sanity check: when is_interrupted is False, tool-progress renders normally.""" + adapter, result = await _run_once(monkeypatch, tmp_path, PreInterruptAgent, "sess-baseline") + assert result["final_response"] == "done" + rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join( + c["content"] for c in adapter.edits + ) + assert "first search" in rendered, ( + "baseline agent should render its tool-progress event — " + "if this fails the test harness is broken, not the fix" + ) + + +@pytest.mark.asyncio +async def test_progress_suppressed_when_agent_is_interrupted(monkeypatch, tmp_path): + """Post-interrupt tool.started events must not render as bubbles. + + This is Bug B from the screenshot: user sends `stop`, agent acks with + ⚡ Interrupting, but 5 more 🔍 web_search bubbles still render because + their tool.started events were already parsed from the LLM response. + With the fix, progress_callback and the drain loop both check + is_interrupted and skip these events. + """ + adapter, result = await _run_once( + monkeypatch, tmp_path, InterruptedAgent, "sess-interrupted" + ) + assert result["final_response"] == "interrupted" + + rendered = " ".join(c["content"] for c in adapter.sent) + " " + " ".join( + c["content"] for c in adapter.edits + ) + + # None of the post-interrupt queries should appear. + for leaked_query in ( + "cognee hermes", + "McBee deer hunting", + "kuzu graph db", + "moonshot kimi api", + "platform.moonshot.cn", + ): + assert leaked_query not in rendered, ( + f"event '{leaked_query}' leaked into the UI after interrupt — " + f"progress_callback / drain loop is not checking is_interrupted" + ) diff --git a/tests/gateway/test_running_agent_session_toggles.py b/tests/gateway/test_running_agent_session_toggles.py index fbe0d5163c..6bf8be9973 100644 --- a/tests/gateway/test_running_agent_session_toggles.py +++ b/tests/gateway/test_running_agent_session_toggles.py @@ -165,3 +165,26 @@ async def test_reasoning_rejected_mid_run(): assert result is not None assert "can't run mid-turn" in result assert "/reasoning" in result + + +@pytest.mark.asyncio +async def test_btw_dispatches_mid_run(): + """/btw mid-run must dispatch to /background's handler, not hit the catch-all. + + /btw is an alias of /background (see hermes_cli/commands.py). Typing + /btw mid-turn must spawn a parallel background task — that's the whole + point of the command. Before the mid-turn bypass was added for + /background, /btw fell through to the "Agent is running — wait or + /stop first" catch-all, making it useless in exactly the scenario it + was designed for. The alias and the bypass together make it work. + """ + runner = _make_runner() + runner._handle_background_command = AsyncMock( + return_value='🚀 Background task started: "what module owns titles?"' + ) + + result = await runner._handle_message(_make_event("/btw what module owns titles?")) + + runner._handle_background_command.assert_awaited_once() + assert result is not None + assert "can't run mid-turn" not in result diff --git a/tests/gateway/test_slack.py b/tests/gateway/test_slack.py index cdd27364b7..877d100d6f 100644 --- a/tests/gateway/test_slack.py +++ b/tests/gateway/test_slack.py @@ -147,7 +147,20 @@ class TestAppMentionHandler: assert "app_mention" in registered_events assert "assistant_thread_started" in registered_events assert "assistant_thread_context_changed" in registered_events - assert "/hermes" in registered_commands + # Slack slash commands are registered via a single regex matcher + # covering every COMMAND_REGISTRY entry (e.g. /hermes, /btw, /stop, + # /model, ...) so users get native-slash parity with Discord and + # Telegram. Verify the regex matches the key expected slashes. + assert len(registered_commands) == 1, ( + f"expected 1 combined slash matcher, got {registered_commands!r}" + ) + slash_matcher = registered_commands[0] + import re as _re + assert isinstance(slash_matcher, _re.Pattern) + for expected in ("/hermes", "/btw", "/stop", "/model", "/help"): + assert slash_matcher.match(expected), ( + f"Slack slash regex does not match {expected}" + ) class TestSlackConnectCleanup: @@ -1544,6 +1557,83 @@ class TestSlashCommands: msg = adapter.handle_message.call_args[0][0] assert msg.text == "/reasoning" + # ------------------------------------------------------------------ + # Native slash commands — /btw, /stop, /model, ... dispatched directly + # instead of as /hermes subcommands. This is the Discord/Telegram parity + # fix: the slash name itself becomes the command. + # ------------------------------------------------------------------ + + @pytest.mark.asyncio + async def test_native_btw_slash(self, adapter): + """/btw with args must dispatch to /background, not /hermes btw.""" + command = { + "command": "/btw", + "text": "fix the failing test", + "user_id": "U1", + "channel_id": "C1", + } + await adapter._handle_slash_command(command) + msg = adapter.handle_message.call_args[0][0] + # The gateway command dispatcher resolves /btw -> background via + # resolve_command() — our handler's job is just to deliver + # "/btw " to the gateway runner, which is what this asserts. + assert msg.text == "/btw fix the failing test" + + @pytest.mark.asyncio + async def test_native_stop_slash_no_args(self, adapter): + command = { + "command": "/stop", + "text": "", + "user_id": "U1", + "channel_id": "C1", + } + await adapter._handle_slash_command(command) + msg = adapter.handle_message.call_args[0][0] + assert msg.text == "/stop" + + @pytest.mark.asyncio + async def test_native_model_slash_with_args(self, adapter): + command = { + "command": "/model", + "text": "anthropic/claude-sonnet-4", + "user_id": "U1", + "channel_id": "C1", + } + await adapter._handle_slash_command(command) + msg = adapter.handle_message.call_args[0][0] + assert msg.text == "/model anthropic/claude-sonnet-4" + + @pytest.mark.asyncio + async def test_legacy_hermes_prefix_still_works(self, adapter): + """Backward compat: /hermes btw foo must still route to /btw foo. + + Old workspace manifests only declared /hermes as the single slash. + After users refresh their manifest they get /btw natively, but the + legacy form must keep working during the transition. + """ + command = { + "command": "/hermes", + "text": "btw run the tests", + "user_id": "U1", + "channel_id": "C1", + } + await adapter._handle_slash_command(command) + msg = adapter.handle_message.call_args[0][0] + assert msg.text == "/btw run the tests" + + @pytest.mark.asyncio + async def test_legacy_hermes_freeform_question(self, adapter): + """/hermes must stay as the raw text (non-command).""" + command = { + "command": "/hermes", + "text": "what's the weather today?", + "user_id": "U1", + "channel_id": "C1", + } + await adapter._handle_slash_command(command) + msg = adapter.handle_message.call_args[0][0] + assert msg.text == "what's the weather today?" + # --------------------------------------------------------------------------- # TestMessageSplitting diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py index ed36b976e5..2e9c54608a 100644 --- a/tests/gateway/test_voice_command.py +++ b/tests/gateway/test_voice_command.py @@ -177,6 +177,53 @@ class TestHandleVoiceCommand: assert adapter._auto_tts_disabled_chats == {"123"} + def test_sync_populates_enabled_chats_from_voice_modes(self, runner): + """Issue #16007: sync also restores per-chat /voice on|tts opt-ins. + + The adapter's ``_auto_tts_enabled_chats`` must mirror chats whose + persisted voice_mode is ``voice_only`` or ``all`` — without this, + ``/voice on`` was relying on a "not in disabled set" default that + silently enabled auto-TTS for every chat. + """ + from gateway.config import Platform + runner._voice_mode = { + "telegram:off_chat": "off", + "telegram:on_chat": "voice_only", + "telegram:tts_chat": "all", + "slack:999": "voice_only", # wrong platform, must be ignored + } + adapter = SimpleNamespace( + _auto_tts_default=False, + _auto_tts_disabled_chats=set(), + _auto_tts_enabled_chats=set(), + platform=Platform.TELEGRAM, + ) + + runner._sync_voice_mode_state_to_adapter(adapter) + + assert adapter._auto_tts_disabled_chats == {"off_chat"} + assert adapter._auto_tts_enabled_chats == {"on_chat", "tts_chat"} + + def test_sync_pushes_config_default_onto_adapter(self, runner, monkeypatch): + """Issue #16007: ``voice.auto_tts`` must propagate to ``_auto_tts_default``.""" + from gateway.config import Platform + + fake_cfg = {"voice": {"auto_tts": True}} + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: fake_cfg, + ) + adapter = SimpleNamespace( + _auto_tts_default=False, + _auto_tts_disabled_chats=set(), + _auto_tts_enabled_chats=set(), + platform=Platform.TELEGRAM, + ) + + runner._sync_voice_mode_state_to_adapter(adapter) + + assert adapter._auto_tts_default is True + def test_restart_restores_voice_off_state(self, runner, tmp_path): from gateway.config import Platform runner._VOICE_MODE_PATH.write_text(json.dumps({"telegram:123": "off"})) @@ -2706,3 +2753,56 @@ class TestUDPKeepalive: mock_conn.send_packet.assert_called_with(b'\xf8\xff\xfe') finally: DiscordAdapter._KEEPALIVE_INTERVAL = original_interval + + +# ===================================================================== +# BasePlatformAdapter._should_auto_tts_for_chat — gate for auto-TTS +# on voice input. Regression test for Issue #16007. +# ===================================================================== + +class TestShouldAutoTtsForChat: + """Three-layer gate: per-chat enable > per-chat disable > config default.""" + + def _make_adapter(self, *, default: bool, enabled=(), disabled=()): + """Build a bare adapter with only the attrs the gate reads.""" + adapter = SimpleNamespace( + _auto_tts_default=default, + _auto_tts_enabled_chats=set(enabled), + _auto_tts_disabled_chats=set(disabled), + ) + # Bind the unbound method — _should_auto_tts_for_chat only reads the + # three attrs above via ``self.``, so an unbound call works. + from gateway.platforms.base import BasePlatformAdapter + return BasePlatformAdapter._should_auto_tts_for_chat, adapter + + def test_default_false_no_override_suppresses(self): + """Issue #16007: voice.auto_tts=False and no per-chat state → no TTS.""" + fn, adapter = self._make_adapter(default=False) + assert fn(adapter, "chat1") is False + + def test_default_true_no_override_fires(self): + fn, adapter = self._make_adapter(default=True) + assert fn(adapter, "chat1") is True + + def test_explicit_enable_overrides_false_default(self): + """``/voice on`` with config auto_tts=False still fires.""" + fn, adapter = self._make_adapter(default=False, enabled={"chat1"}) + assert fn(adapter, "chat1") is True + + def test_explicit_disable_overrides_true_default(self): + """``/voice off`` with config auto_tts=True still suppresses.""" + fn, adapter = self._make_adapter(default=True, disabled={"chat1"}) + assert fn(adapter, "chat1") is False + + def test_enabled_wins_over_disabled(self): + """An explicit enable beats an explicit disable (enable takes priority).""" + fn, adapter = self._make_adapter( + default=False, enabled={"chat1"}, disabled={"chat1"} + ) + assert fn(adapter, "chat1") is True + + def test_per_chat_isolation(self): + """Enable for chat1 doesn't leak to chat2.""" + fn, adapter = self._make_adapter(default=False, enabled={"chat1"}) + assert fn(adapter, "chat1") is True + assert fn(adapter, "chat2") is False diff --git a/tests/hermes_cli/test_apply_model_switch_result_context.py b/tests/hermes_cli/test_apply_model_switch_result_context.py new file mode 100644 index 0000000000..fd17150be3 --- /dev/null +++ b/tests/hermes_cli/test_apply_model_switch_result_context.py @@ -0,0 +1,152 @@ +"""Regression test for the `/model` picker confirmation display. + +Bug (April 2026): after choosing a model from the interactive `/model` picker, +``HermesCLI._apply_model_switch_result()`` printed ``ModelInfo.context_window`` +straight from models.dev, which always reports the vendor-wide value (e.g. +gpt-5.5 = 1,050,000 on ``openai``). That ignored provider-specific caps — in +particular, ChatGPT Codex OAuth enforces 272K on the same slug. The sibling +``_handle_model_switch()`` (typed ``/model ``) was already fixed to use +``resolve_display_context_length()``; the picker path was missed, causing +"sometimes 1M, sometimes 272K" for the same model across sibling UI paths. + +Fix: both display paths now go through ``resolve_display_context_length()``. +""" +from __future__ import annotations + +from unittest.mock import patch + +from hermes_cli.model_switch import ModelSwitchResult + + +class _FakeModelInfo: + context_window = 1_050_000 + max_output = 0 + + def has_cost_data(self): + return False + + def format_capabilities(self): + return "" + + +class _StubCLI: + """Minimum attrs ``_apply_model_switch_result`` reads on ``self``.""" + agent = None + model = "" + provider = "" + requested_provider = "" + api_key = "" + _explicit_api_key = "" + base_url = "" + _explicit_base_url = "" + api_mode = "" + _pending_model_switch_note = "" + + +def _run_display(monkeypatch, result): + import cli as cli_mod + + captured: list[str] = [] + monkeypatch.setattr(cli_mod, "_cprint", lambda s, *a, **k: captured.append(str(s))) + # Avoid writing to ~/.hermes/config.yaml during the test. + monkeypatch.setattr(cli_mod, "save_config_value", lambda *a, **k: None) + cli_mod.HermesCLI._apply_model_switch_result(_StubCLI(), result, False) + return captured + + +def test_picker_path_uses_provider_aware_context_on_codex(monkeypatch): + """``_apply_model_switch_result`` must prefer the provider-aware resolver + (272K on Codex) over the raw models.dev value (1.05M for gpt-5.5). + """ + result = ModelSwitchResult( + success=True, + new_model="gpt-5.5", + target_provider="openai-codex", + provider_changed=True, + api_key="", + base_url="https://chatgpt.com/backend-api/codex", + api_mode="codex_responses", + warning_message="", + provider_label="ChatGPT Codex", + resolved_via_alias=False, + capabilities=None, + model_info=_FakeModelInfo(), # models.dev says 1.05M + is_global=False, + ) + with patch( + "agent.model_metadata.get_model_context_length", + return_value=272_000, + ): + lines = _run_display(monkeypatch, result) + + ctx_line = next((l for l in lines if "Context:" in l), "") + assert "272,000" in ctx_line, ( + f"picker-path display must show Codex's 272K cap, got: {ctx_line!r}" + ) + assert "1,050,000" not in ctx_line, ( + f"picker-path display leaked models.dev's 1.05M for Codex: {ctx_line!r}" + ) + + +def test_picker_path_shows_vendor_value_when_no_provider_cap(monkeypatch): + """On providers with no enforced cap (e.g. OpenRouter), the picker path + should surface the real 1.05M context for gpt-5.5 — resolver and models.dev + agree here. + """ + result = ModelSwitchResult( + success=True, + new_model="openai/gpt-5.5", + target_provider="openrouter", + provider_changed=True, + api_key="", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + warning_message="", + provider_label="OpenRouter", + resolved_via_alias=False, + capabilities=None, + model_info=_FakeModelInfo(), + is_global=False, + ) + with patch( + "agent.model_metadata.get_model_context_length", + return_value=1_050_000, + ): + lines = _run_display(monkeypatch, result) + + ctx_line = next((l for l in lines if "Context:" in l), "") + assert "1,050,000" in ctx_line, ( + f"OpenRouter gpt-5.5 should show 1.05M context, got: {ctx_line!r}" + ) + + +def test_picker_path_falls_back_to_model_info_when_resolver_empty(monkeypatch): + """If ``get_model_context_length`` returns nothing (rare — truly unknown + endpoint), the display still surfaces ``ModelInfo.context_window`` so the + user sees *something* rather than a silent blank. + """ + result = ModelSwitchResult( + success=True, + new_model="some-model", + target_provider="some-provider", + provider_changed=True, + api_key="", + base_url="", + api_mode="chat_completions", + warning_message="", + provider_label="Some Provider", + resolved_via_alias=False, + capabilities=None, + model_info=_FakeModelInfo(), # context_window = 1_050_000 + is_global=False, + ) + with patch( + "agent.model_metadata.get_model_context_length", + return_value=None, + ): + lines = _run_display(monkeypatch, result) + + ctx_line = next((l for l in lines if "Context:" in l), "") + assert "1,050,000" in ctx_line, ( + f"resolver-empty path should fall back to ModelInfo, got: {ctx_line!r}" + ) diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index d77a076ebf..26bba9d58f 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -20,6 +20,8 @@ from hermes_cli.commands import ( discord_skill_commands, gateway_help_lines, resolve_command, + slack_app_manifest, + slack_native_slashes, slack_subcommand_map, telegram_bot_commands, telegram_menu_commands, @@ -256,6 +258,115 @@ class TestSlackSubcommandMap: assert cmd.name not in mapping +class TestSlackNativeSlashes: + """Slack native slash command generation — used to register every + COMMAND_REGISTRY entry as a first-class Slack slash, matching Discord + and Telegram.""" + + def test_returns_triples(self): + slashes = slack_native_slashes() + assert len(slashes) >= 10 + for entry in slashes: + assert isinstance(entry, tuple) and len(entry) == 3 + name, desc, hint = entry + assert isinstance(name, str) and name + assert isinstance(desc, str) + assert isinstance(hint, str) + + def test_hermes_catchall_is_first(self): + """``/hermes`` must be reserved as the first slot so the legacy + ``/hermes `` form keeps working after we add new + commands and hit the 50-slash cap.""" + slashes = slack_native_slashes() + assert slashes[0][0] == "hermes" + + def test_names_respect_slack_limits(self): + for name, _desc, _hint in slack_native_slashes(): + # Slack: lowercase a-z, 0-9, hyphens, underscores; max 32 chars + assert len(name) <= 32, f"slash {name!r} exceeds 32 chars" + assert name == name.lower() + for ch in name: + assert ch.isalnum() or ch in "-_", f"invalid char {ch!r} in {name!r}" + + def test_under_fifty_command_cap(self): + """Slack allows at most 50 slash commands per app.""" + assert len(slack_native_slashes()) <= 50 + + def test_unique_names(self): + names = [n for n, _d, _h in slack_native_slashes()] + assert len(names) == len(set(names)), "duplicate Slack slash names" + + def test_includes_canonical_commands(self): + names = {n for n, _d, _h in slack_native_slashes()} + # Sample of gateway-available canonical commands + for expected in ("new", "stop", "background", "model", "help", "status"): + assert expected in names, f"missing canonical /{expected}" + + def test_includes_aliases_as_first_class_slashes(self): + """Aliases (/btw, /bg, /reset, /q) must be registered as standalone + slashes — this is the whole point of native-slashes parity.""" + names = {n for n, _d, _h in slack_native_slashes()} + assert "btw" in names + assert "bg" in names + assert "reset" in names + assert "q" in names + + def test_telegram_parity(self): + """Every Telegram bot command must be registerable on Slack too. + + This catches the old behavior where Slack users couldn't invoke + commands like /btw natively. If a future command surfaces on + Telegram but not Slack (because of Slack's 50-slash cap), this + test fails loudly so we can curate the list rather than silently + dropping parity. + """ + slack_names = {n for n, _d, _h in slack_native_slashes()} + tg_names = {n for n, _d in telegram_bot_commands()} + # Some Telegram names have underscores where Slack uses hyphens + # (e.g. set_home vs sethome). Normalize both sides for comparison. + def _norm(s: str) -> str: + return s.replace("-", "_").replace("__", "_").strip("_") + + slack_norm = {_norm(n) for n in slack_names} + tg_norm = {_norm(n) for n in tg_names} + missing = tg_norm - slack_norm + assert not missing, ( + f"commands on Telegram but missing from Slack native slashes: {sorted(missing)}" + ) + + +class TestSlackAppManifest: + """Generated Slack app manifest (used by `hermes slack manifest`).""" + + def test_returns_dict(self): + m = slack_app_manifest() + assert isinstance(m, dict) + assert "features" in m + assert "slash_commands" in m["features"] + + def test_each_slash_has_required_fields(self): + m = slack_app_manifest() + for entry in m["features"]["slash_commands"]: + assert entry["command"].startswith("/") + assert "description" in entry + assert "url" in entry + # should_escape must be present (Slack defaults to True which + # HTML-escapes args — we want the raw text) + assert "should_escape" in entry + + def test_btw_is_in_manifest(self): + """Regression: /btw must be a native Slack slash, not just a + /hermes subcommand.""" + m = slack_app_manifest() + commands = [c["command"] for c in m["features"]["slash_commands"]] + assert "/btw" in commands + + def test_custom_request_url(self): + m = slack_app_manifest(request_url="https://example.com/slack") + for entry in m["features"]["slash_commands"]: + assert entry["url"] == "https://example.com/slack" + + # --------------------------------------------------------------------------- # Config-gated gateway commands # --------------------------------------------------------------------------- diff --git a/tests/hermes_cli/test_fallback_cmd.py b/tests/hermes_cli/test_fallback_cmd.py new file mode 100644 index 0000000000..a88c84b3aa --- /dev/null +++ b/tests/hermes_cli/test_fallback_cmd.py @@ -0,0 +1,486 @@ +"""Tests for `hermes fallback` — chain reading, add/remove/clear, legacy migration.""" +from __future__ import annotations + +import io +import types +from pathlib import Path +from unittest.mock import patch + +import pytest +import yaml + + +# --------------------------------------------------------------------------- +# Shared fixture — isolate HERMES_HOME so save_config writes to tmp_path +# --------------------------------------------------------------------------- + +@pytest.fixture() +def isolated_home(tmp_path, monkeypatch): + monkeypatch.setattr(Path, "home", lambda: tmp_path) + home = tmp_path / ".hermes" + home.mkdir(exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(home)) + return tmp_path + + +def _write_config(home: Path, data: dict) -> None: + config_path = home / ".hermes" / "config.yaml" + config_path.write_text(yaml.safe_dump(data), encoding="utf-8") + + +def _read_config(home: Path) -> dict: + config_path = home / ".hermes" / "config.yaml" + return yaml.safe_load(config_path.read_text(encoding="utf-8")) or {} + + +# --------------------------------------------------------------------------- +# _read_chain / _write_chain +# --------------------------------------------------------------------------- + +class TestReadChain: + def test_returns_empty_list_when_unset(self): + from hermes_cli.fallback_cmd import _read_chain + assert _read_chain({}) == [] + + def test_reads_new_list_format(self): + from hermes_cli.fallback_cmd import _read_chain + cfg = { + "fallback_providers": [ + {"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"}, + {"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"}, + ] + } + assert _read_chain(cfg) == [ + {"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"}, + {"provider": "nous", "model": "Hermes-4-Llama-3.1-405B"}, + ] + + def test_migrates_legacy_single_dict(self): + from hermes_cli.fallback_cmd import _read_chain + cfg = {"fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}} + assert _read_chain(cfg) == [{"provider": "openrouter", "model": "gpt-5.4"}] + + def test_skips_incomplete_entries(self): + from hermes_cli.fallback_cmd import _read_chain + cfg = { + "fallback_providers": [ + {"provider": "openrouter"}, # missing model + {"model": "gpt-5.4"}, # missing provider + {"provider": "nous", "model": "foo"}, # valid + "not-a-dict", # noise + ] + } + assert _read_chain(cfg) == [{"provider": "nous", "model": "foo"}] + + def test_returns_copies_not_aliases(self): + from hermes_cli.fallback_cmd import _read_chain + cfg = {"fallback_providers": [{"provider": "nous", "model": "foo"}]} + result = _read_chain(cfg) + result[0]["provider"] = "mutated" + assert cfg["fallback_providers"][0]["provider"] == "nous" + + +# --------------------------------------------------------------------------- +# _extract_fallback_from_model_cfg +# --------------------------------------------------------------------------- + +class TestExtractFallback: + def test_extracts_from_default_field(self): + from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg + model_cfg = {"provider": "openrouter", "default": "anthropic/claude-sonnet-4.6"} + assert _extract_fallback_from_model_cfg(model_cfg) == { + "provider": "openrouter", + "model": "anthropic/claude-sonnet-4.6", + } + + def test_extracts_optional_base_url_and_api_mode(self): + from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg + model_cfg = { + "provider": "custom", + "default": "local-model", + "base_url": "http://localhost:11434/v1", + "api_mode": "chat_completions", + } + assert _extract_fallback_from_model_cfg(model_cfg) == { + "provider": "custom", + "model": "local-model", + "base_url": "http://localhost:11434/v1", + "api_mode": "chat_completions", + } + + def test_returns_none_without_provider(self): + from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg + assert _extract_fallback_from_model_cfg({"default": "foo"}) is None + + def test_returns_none_without_model(self): + from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg + assert _extract_fallback_from_model_cfg({"provider": "openrouter"}) is None + + def test_returns_none_for_non_dict(self): + from hermes_cli.fallback_cmd import _extract_fallback_from_model_cfg + assert _extract_fallback_from_model_cfg("plain-string") is None + assert _extract_fallback_from_model_cfg(None) is None + + +# --------------------------------------------------------------------------- +# cmd_fallback_list +# --------------------------------------------------------------------------- + +class TestListCommand: + def test_list_empty(self, isolated_home, capsys): + _write_config(isolated_home, {}) + from hermes_cli.fallback_cmd import cmd_fallback_list + cmd_fallback_list(types.SimpleNamespace()) + out = capsys.readouterr().out + assert "No fallback providers configured" in out + assert "hermes fallback add" in out + + def test_list_with_entries(self, isolated_home, capsys): + _write_config(isolated_home, { + "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}, + "fallback_providers": [ + {"provider": "openrouter", "model": "anthropic/claude-sonnet-4.6"}, + {"provider": "nous", "model": "Hermes-4"}, + ], + }) + from hermes_cli.fallback_cmd import cmd_fallback_list + cmd_fallback_list(types.SimpleNamespace()) + out = capsys.readouterr().out + assert "Fallback chain (2 entries)" in out + assert "anthropic/claude-sonnet-4.6" in out + assert "Hermes-4" in out + # Primary should be shown too + assert "claude-sonnet-4-6" in out + + def test_list_migrates_legacy_for_display(self, isolated_home, capsys): + _write_config(isolated_home, { + "fallback_model": {"provider": "openrouter", "model": "gpt-5.4"}, + }) + from hermes_cli.fallback_cmd import cmd_fallback_list + cmd_fallback_list(types.SimpleNamespace()) + out = capsys.readouterr().out + assert "1 entry" in out + assert "gpt-5.4" in out + + +# --------------------------------------------------------------------------- +# cmd_fallback_add — mock select_provider_and_model +# --------------------------------------------------------------------------- + +class TestAddCommand: + def test_add_appends_new_entry(self, isolated_home, capsys): + _write_config(isolated_home, { + "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}, + }) + + def fake_picker(args=None): + # Simulate what the real picker does: writes the selection to config["model"] + from hermes_cli.config import load_config, save_config + cfg = load_config() + cfg["model"] = { + "provider": "openrouter", + "default": "anthropic/claude-sonnet-4.6", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + } + save_config(cfg) + + with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \ + patch("hermes_cli.main._require_tty"): + from hermes_cli.fallback_cmd import cmd_fallback_add + cmd_fallback_add(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + # Primary is preserved + assert cfg["model"]["provider"] == "anthropic" + assert cfg["model"]["default"] == "claude-sonnet-4-6" + # Fallback was appended + assert cfg["fallback_providers"] == [ + { + "provider": "openrouter", + "model": "anthropic/claude-sonnet-4.6", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + } + ] + out = capsys.readouterr().out + assert "Added fallback" in out + + def test_add_rejects_duplicate(self, isolated_home, capsys): + _write_config(isolated_home, { + "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}, + "fallback_providers": [ + {"provider": "openrouter", "model": "gpt-5.4"}, + ], + }) + + def fake_picker(args=None): + from hermes_cli.config import load_config, save_config + cfg = load_config() + cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"} + save_config(cfg) + + with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \ + patch("hermes_cli.main._require_tty"): + from hermes_cli.fallback_cmd import cmd_fallback_add + cmd_fallback_add(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + # Should still have exactly one entry + assert len(cfg["fallback_providers"]) == 1 + out = capsys.readouterr().out + assert "already in the fallback chain" in out + + def test_add_rejects_same_as_primary(self, isolated_home, capsys): + _write_config(isolated_home, { + "model": {"provider": "openrouter", "default": "gpt-5.4"}, + }) + + def fake_picker(args=None): + # User picks the same thing that's already the primary + from hermes_cli.config import load_config, save_config + cfg = load_config() + cfg["model"] = {"provider": "openrouter", "default": "gpt-5.4"} + save_config(cfg) + + with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \ + patch("hermes_cli.main._require_tty"): + from hermes_cli.fallback_cmd import cmd_fallback_add + cmd_fallback_add(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + assert "fallback_providers" not in cfg or cfg["fallback_providers"] == [] + out = capsys.readouterr().out + assert "matches the current primary" in out + + def test_add_preserves_primary_when_picker_changes_it(self, isolated_home): + """The picker mutates config["model"]; fallback_add must restore the primary.""" + _write_config(isolated_home, { + "model": { + "provider": "anthropic", + "default": "claude-sonnet-4-6", + "base_url": "https://api.anthropic.com", + "api_mode": "anthropic_messages", + }, + }) + + def fake_picker(args=None): + from hermes_cli.config import load_config, save_config + cfg = load_config() + cfg["model"] = { + "provider": "openrouter", + "default": "anthropic/claude-sonnet-4.6", + "base_url": "https://openrouter.ai/api/v1", + "api_mode": "chat_completions", + } + save_config(cfg) + + with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \ + patch("hermes_cli.main._require_tty"): + from hermes_cli.fallback_cmd import cmd_fallback_add + cmd_fallback_add(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + # Primary exactly as it was + assert cfg["model"]["provider"] == "anthropic" + assert cfg["model"]["default"] == "claude-sonnet-4-6" + assert cfg["model"]["base_url"] == "https://api.anthropic.com" + assert cfg["model"]["api_mode"] == "anthropic_messages" + # Fallback added + assert len(cfg["fallback_providers"]) == 1 + assert cfg["fallback_providers"][0]["provider"] == "openrouter" + + def test_add_noop_when_picker_cancelled(self, isolated_home, capsys): + _write_config(isolated_home, { + "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}, + }) + + def fake_picker(args=None): + # User cancelled — no change to config + pass + + with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \ + patch("hermes_cli.main._require_tty"): + from hermes_cli.fallback_cmd import cmd_fallback_add + cmd_fallback_add(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + assert "fallback_providers" not in cfg or cfg["fallback_providers"] == [] + out = capsys.readouterr().out + # Either "No fallback added" (picker fully cancelled) or "matches the current primary" + # (picker left config untouched) — both indicate a non-add outcome. + assert ("No fallback added" in out) or ("matches the current primary" in out) + + def test_add_noop_when_picker_clears_model(self, isolated_home, capsys): + """Simulate picker explicitly clearing model.default (unusual but possible).""" + _write_config(isolated_home, { + "model": {"provider": "anthropic", "default": "claude-sonnet-4-6"}, + }) + + def fake_picker(args=None): + from hermes_cli.config import load_config, save_config + cfg = load_config() + cfg["model"] = {"provider": "", "default": ""} + save_config(cfg) + + with patch("hermes_cli.main.select_provider_and_model", side_effect=fake_picker), \ + patch("hermes_cli.main._require_tty"): + from hermes_cli.fallback_cmd import cmd_fallback_add + cmd_fallback_add(types.SimpleNamespace()) + + out = capsys.readouterr().out + assert "No fallback added" in out + + +# --------------------------------------------------------------------------- +# cmd_fallback_remove +# --------------------------------------------------------------------------- + +class TestRemoveCommand: + def test_remove_empty_chain(self, isolated_home, capsys): + _write_config(isolated_home, {}) + from hermes_cli.fallback_cmd import cmd_fallback_remove + cmd_fallback_remove(types.SimpleNamespace()) + out = capsys.readouterr().out + assert "nothing to remove" in out + + def test_remove_selected_entry(self, isolated_home, capsys): + _write_config(isolated_home, { + "fallback_providers": [ + {"provider": "openrouter", "model": "gpt-5.4"}, + {"provider": "nous", "model": "Hermes-4"}, + {"provider": "anthropic", "model": "claude-sonnet-4-6"}, + ], + }) + + # Picker returns index 1 (the middle entry, "nous / Hermes-4") + with patch("hermes_cli.setup._curses_prompt_choice", return_value=1): + from hermes_cli.fallback_cmd import cmd_fallback_remove + cmd_fallback_remove(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + assert cfg["fallback_providers"] == [ + {"provider": "openrouter", "model": "gpt-5.4"}, + {"provider": "anthropic", "model": "claude-sonnet-4-6"}, + ] + out = capsys.readouterr().out + assert "Removed fallback" in out + assert "Hermes-4" in out + + def test_remove_cancel_keeps_chain(self, isolated_home): + _write_config(isolated_home, { + "fallback_providers": [ + {"provider": "openrouter", "model": "gpt-5.4"}, + ], + }) + + # Cancel = last item (index == len(chain) == 1 in our menu) + with patch("hermes_cli.setup._curses_prompt_choice", return_value=1): + from hermes_cli.fallback_cmd import cmd_fallback_remove + cmd_fallback_remove(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + assert len(cfg["fallback_providers"]) == 1 + + +# --------------------------------------------------------------------------- +# cmd_fallback_clear +# --------------------------------------------------------------------------- + +class TestClearCommand: + def test_clear_empty_chain(self, isolated_home, capsys): + _write_config(isolated_home, {}) + from hermes_cli.fallback_cmd import cmd_fallback_clear + cmd_fallback_clear(types.SimpleNamespace()) + out = capsys.readouterr().out + assert "nothing to clear" in out + + def test_clear_with_confirmation(self, isolated_home, capsys, monkeypatch): + _write_config(isolated_home, { + "fallback_providers": [ + {"provider": "openrouter", "model": "gpt-5.4"}, + {"provider": "nous", "model": "Hermes-4"}, + ], + }) + monkeypatch.setattr("builtins.input", lambda *a, **kw: "y") + from hermes_cli.fallback_cmd import cmd_fallback_clear + cmd_fallback_clear(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + assert cfg.get("fallback_providers") == [] + out = capsys.readouterr().out + assert "Fallback chain cleared" in out + + def test_clear_cancelled(self, isolated_home, monkeypatch): + _write_config(isolated_home, { + "fallback_providers": [{"provider": "openrouter", "model": "gpt-5.4"}], + }) + monkeypatch.setattr("builtins.input", lambda *a, **kw: "n") + from hermes_cli.fallback_cmd import cmd_fallback_clear + cmd_fallback_clear(types.SimpleNamespace()) + + cfg = _read_config(isolated_home) + assert len(cfg["fallback_providers"]) == 1 + + +# --------------------------------------------------------------------------- +# cmd_fallback dispatcher +# --------------------------------------------------------------------------- + +class TestDispatcher: + def test_no_subcommand_lists(self, isolated_home, capsys): + _write_config(isolated_home, {}) + from hermes_cli.fallback_cmd import cmd_fallback + cmd_fallback(types.SimpleNamespace(fallback_command=None)) + out = capsys.readouterr().out + assert "No fallback providers configured" in out + + def test_list_alias(self, isolated_home, capsys): + _write_config(isolated_home, {}) + from hermes_cli.fallback_cmd import cmd_fallback + cmd_fallback(types.SimpleNamespace(fallback_command="ls")) + out = capsys.readouterr().out + assert "No fallback providers configured" in out + + def test_remove_alias(self, isolated_home, capsys): + _write_config(isolated_home, {}) + from hermes_cli.fallback_cmd import cmd_fallback + cmd_fallback(types.SimpleNamespace(fallback_command="rm")) + out = capsys.readouterr().out + assert "nothing to remove" in out + + def test_unknown_subcommand_exits(self, isolated_home): + _write_config(isolated_home, {}) + from hermes_cli.fallback_cmd import cmd_fallback + with pytest.raises(SystemExit): + cmd_fallback(types.SimpleNamespace(fallback_command="nope")) + + +# --------------------------------------------------------------------------- +# argparse wiring — verify the subparser is registered +# --------------------------------------------------------------------------- + +class TestArgparseWiring: + """Verify `hermes fallback` is wired into main.py's argparse tree. + + main() builds the parser inline, so we invoke main([...]) via subprocess + with --help to introspect registered subcommands without side effects. + """ + + def test_fallback_help_lists_subcommands(self): + import subprocess + import sys + result = subprocess.run( + [sys.executable, "-m", "hermes_cli.main", "fallback", "--help"], + capture_output=True, + text=True, + timeout=30, + ) + # --help exits 0 + assert result.returncode == 0, f"stderr: {result.stderr}" + out = result.stdout + result.stderr + # All four subcommands should appear in help + assert "list" in out + assert "add" in out + assert "remove" in out + assert "clear" in out diff --git a/tests/hermes_cli/test_model_catalog.py b/tests/hermes_cli/test_model_catalog.py new file mode 100644 index 0000000000..2b757ac79b --- /dev/null +++ b/tests/hermes_cli/test_model_catalog.py @@ -0,0 +1,284 @@ +"""Tests for hermes_cli.model_catalog — remote manifest fetch + cache + fallback.""" + +from __future__ import annotations + +import json +import time +from pathlib import Path +from unittest.mock import patch + +import pytest + + +@pytest.fixture +def isolated_home(tmp_path, monkeypatch): + """Isolate HERMES_HOME + reset any module-level catalog cache per test.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(home)) + + # Force a fresh catalog module state for each test. + import importlib + from hermes_cli import model_catalog + importlib.reload(model_catalog) + yield home + model_catalog.reset_cache() + + +def _valid_manifest() -> dict: + return { + "version": 1, + "updated_at": "2026-04-25T22:00:00Z", + "metadata": {"source": "test"}, + "providers": { + "openrouter": { + "metadata": {"display_name": "OpenRouter"}, + "models": [ + {"id": "anthropic/claude-opus-4.7", "description": "recommended"}, + {"id": "openai/gpt-5.4", "description": ""}, + {"id": "openrouter/elephant-alpha", "description": "free"}, + ], + }, + "nous": { + "metadata": {"display_name": "Nous Portal"}, + "models": [ + {"id": "anthropic/claude-opus-4.7"}, + {"id": "moonshotai/kimi-k2.6"}, + ], + }, + }, + } + + +class TestValidation: + def test_accepts_well_formed_manifest(self, isolated_home): + from hermes_cli.model_catalog import _validate_manifest + assert _validate_manifest(_valid_manifest()) is True + + def test_rejects_non_dict(self, isolated_home): + from hermes_cli.model_catalog import _validate_manifest + assert _validate_manifest("string") is False + assert _validate_manifest([]) is False + assert _validate_manifest(None) is False + + def test_rejects_missing_version(self, isolated_home): + from hermes_cli.model_catalog import _validate_manifest + m = _valid_manifest() + del m["version"] + assert _validate_manifest(m) is False + + def test_rejects_future_version(self, isolated_home): + from hermes_cli.model_catalog import _validate_manifest + m = _valid_manifest() + m["version"] = 999 + assert _validate_manifest(m) is False + + def test_rejects_missing_providers(self, isolated_home): + from hermes_cli.model_catalog import _validate_manifest + m = _valid_manifest() + del m["providers"] + assert _validate_manifest(m) is False + + def test_rejects_malformed_model_entry(self, isolated_home): + from hermes_cli.model_catalog import _validate_manifest + m = _valid_manifest() + m["providers"]["openrouter"]["models"][0] = {"id": ""} # empty id + assert _validate_manifest(m) is False + + def test_rejects_non_string_model_id(self, isolated_home): + from hermes_cli.model_catalog import _validate_manifest + m = _valid_manifest() + m["providers"]["openrouter"]["models"][0] = {"id": 42} + assert _validate_manifest(m) is False + + +class TestFetchSuccess: + def test_fetch_and_cache_writes_disk(self, isolated_home): + from hermes_cli import model_catalog + manifest = _valid_manifest() + with patch.object( + model_catalog, "_fetch_manifest", return_value=manifest + ) as fetch: + result = model_catalog.get_catalog(force_refresh=True) + + assert result == manifest + assert fetch.called + + cache_file = model_catalog._cache_path() + assert cache_file.exists() + with open(cache_file) as fh: + assert json.load(fh) == manifest + + def test_second_call_uses_in_process_cache(self, isolated_home): + from hermes_cli import model_catalog + manifest = _valid_manifest() + with patch.object( + model_catalog, "_fetch_manifest", return_value=manifest + ) as fetch: + model_catalog.get_catalog(force_refresh=True) + model_catalog.get_catalog() # should not hit network again + assert fetch.call_count == 1 + + def test_force_refresh_always_refetches(self, isolated_home): + from hermes_cli import model_catalog + manifest = _valid_manifest() + with patch.object( + model_catalog, "_fetch_manifest", return_value=manifest + ) as fetch: + model_catalog.get_catalog(force_refresh=True) + model_catalog.get_catalog(force_refresh=True) + assert fetch.call_count == 2 + + +class TestFetchFailure: + def test_network_failure_returns_empty_when_no_cache(self, isolated_home): + from hermes_cli import model_catalog + with patch.object(model_catalog, "_fetch_manifest", return_value=None): + result = model_catalog.get_catalog(force_refresh=True) + assert result == {} + + def test_network_failure_falls_back_to_disk_cache(self, isolated_home): + from hermes_cli import model_catalog + # Prime disk cache with a fresh copy. + manifest = _valid_manifest() + with patch.object(model_catalog, "_fetch_manifest", return_value=manifest): + model_catalog.get_catalog(force_refresh=True) + + # Now wipe in-process cache and simulate network failure on refetch. + model_catalog.reset_cache() + with patch.object(model_catalog, "_fetch_manifest", return_value=None): + result = model_catalog.get_catalog(force_refresh=True) + + assert result == manifest + + def test_fetch_failure_falls_back_to_stale_cache(self, isolated_home): + from hermes_cli import model_catalog + manifest = _valid_manifest() + # Write stale cache directly (mtime in the past). + cache = model_catalog._cache_path() + cache.parent.mkdir(parents=True, exist_ok=True) + with open(cache, "w") as fh: + json.dump(manifest, fh) + old = time.time() - 30 * 24 * 3600 # 30 days ago + import os as _os + _os.utime(cache, (old, old)) + + with patch.object(model_catalog, "_fetch_manifest", return_value=None): + result = model_catalog.get_catalog() + + # Stale cache is better than nothing. + assert result == manifest + + +class TestCuratedAccessors: + def test_openrouter_returns_tuples(self, isolated_home): + from hermes_cli import model_catalog + with patch.object( + model_catalog, "_fetch_manifest", return_value=_valid_manifest() + ): + result = model_catalog.get_curated_openrouter_models() + assert result == [ + ("anthropic/claude-opus-4.7", "recommended"), + ("openai/gpt-5.4", ""), + ("openrouter/elephant-alpha", "free"), + ] + + def test_nous_returns_ids(self, isolated_home): + from hermes_cli import model_catalog + with patch.object( + model_catalog, "_fetch_manifest", return_value=_valid_manifest() + ): + result = model_catalog.get_curated_nous_models() + assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"] + + def test_openrouter_returns_none_when_catalog_empty(self, isolated_home): + from hermes_cli import model_catalog + with patch.object(model_catalog, "_fetch_manifest", return_value=None): + assert model_catalog.get_curated_openrouter_models() is None + + def test_nous_returns_none_when_catalog_empty(self, isolated_home): + from hermes_cli import model_catalog + with patch.object(model_catalog, "_fetch_manifest", return_value=None): + assert model_catalog.get_curated_nous_models() is None + + +class TestDisabled: + def test_disabled_config_short_circuits(self, isolated_home): + from hermes_cli import model_catalog + with patch.object( + model_catalog, + "_load_catalog_config", + return_value={ + "enabled": False, + "url": "http://ignored", + "ttl_hours": 24.0, + "providers": {}, + }, + ): + with patch.object(model_catalog, "_fetch_manifest") as fetch: + result = model_catalog.get_catalog() + assert result == {} + fetch.assert_not_called() + + +class TestProviderOverride: + def test_override_url_takes_precedence(self, isolated_home): + from hermes_cli import model_catalog + + override_payload = { + "version": 1, + "providers": { + "openrouter": { + "models": [ + {"id": "override/model", "description": "custom"}, + ] + } + }, + } + + def fake_fetch(url, timeout): + if "override" in url: + return override_payload + return _valid_manifest() + + with patch.object( + model_catalog, + "_load_catalog_config", + return_value={ + "enabled": True, + "url": "http://master", + "ttl_hours": 24.0, + "providers": {"openrouter": {"url": "http://override"}}, + }, + ): + with patch.object(model_catalog, "_fetch_manifest", side_effect=fake_fetch): + result = model_catalog.get_curated_openrouter_models() + + assert result == [("override/model", "custom")] + + +class TestIntegrationWithModelsModule: + """Exercise the fallback paths via the real callers in hermes_cli.models.""" + + def test_curated_nous_ids_falls_back_to_hardcoded_on_empty_catalog( + self, isolated_home + ): + from hermes_cli import model_catalog + from hermes_cli.models import get_curated_nous_model_ids, _PROVIDER_MODELS + + with patch.object(model_catalog, "_fetch_manifest", return_value=None): + result = get_curated_nous_model_ids() + + assert result == list(_PROVIDER_MODELS["nous"]) + + def test_curated_nous_ids_prefers_manifest(self, isolated_home): + from hermes_cli import model_catalog + from hermes_cli.models import get_curated_nous_model_ids + + with patch.object( + model_catalog, "_fetch_manifest", return_value=_valid_manifest() + ): + result = get_curated_nous_model_ids() + + assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"] diff --git a/tests/hermes_cli/test_skills_hub.py b/tests/hermes_cli/test_skills_hub.py index bf9fa71a3a..3866730921 100644 --- a/tests/hermes_cli/test_skills_hub.py +++ b/tests/hermes_cli/test_skills_hub.py @@ -56,7 +56,7 @@ def three_source_env(monkeypatch, hub_env): import tools.skills_tool as skills_tool monkeypatch.setattr(hub, "HubLockFile", lambda: _DummyLockFile([_HUB_ENTRY])) - monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: list(_ALL_THREE_SKILLS)) + monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: list(_ALL_THREE_SKILLS)) monkeypatch.setattr(skills_sync, "_read_manifest", lambda: dict(_BUILTIN_MANIFEST)) return hub_env @@ -107,7 +107,7 @@ def test_do_list_initializes_hub_dir(monkeypatch, hub_env): import tools.skills_sync as skills_sync import tools.skills_tool as skills_tool - monkeypatch.setattr(skills_tool, "_find_all_skills", lambda: []) + monkeypatch.setattr(skills_tool, "_find_all_skills", lambda **_kwargs: []) monkeypatch.setattr(skills_sync, "_read_manifest", lambda: {}) hub_dir = hub_env @@ -154,6 +154,74 @@ def test_do_list_filter_builtin(three_source_env): assert "local-skill" not in output +def test_do_list_renders_status_column(three_source_env, monkeypatch): + """Every list row should carry an enabled/disabled status (new in PR that + answered Mr Mochizuki's 'I just want to see what's live' question).""" + from agent import skill_utils + + monkeypatch.setattr(skill_utils, "get_disabled_skill_names", lambda platform=None: set()) + output = _capture() + + assert "Status" in output + assert "enabled" in output.lower() + # Summary counts enabled skills. + assert "3 enabled, 0 disabled" in output + + +def test_do_list_marks_disabled_skills(three_source_env, monkeypatch): + from agent import skill_utils + + # Simulate `skills.disabled: [hub-skill]` in config. + monkeypatch.setattr( + skill_utils, "get_disabled_skill_names", + lambda platform=None: {"hub-skill"}, + ) + output = _capture() + + # Row still appears (no --enabled-only), but marked disabled + assert "hub-skill" in output + assert "disabled" in output.lower() + assert "2 enabled, 1 disabled" in output + + +def test_do_list_enabled_only_hides_disabled(three_source_env, monkeypatch): + from agent import skill_utils + + monkeypatch.setattr( + skill_utils, "get_disabled_skill_names", + lambda platform=None: {"hub-skill"}, + ) + sink = StringIO() + console = Console(file=sink, force_terminal=False, color_system=None) + do_list(enabled_only=True, console=console) + output = sink.getvalue() + + assert "hub-skill" not in output + assert "builtin-skill" in output + assert "local-skill" in output + assert "enabled only" in output.lower() + assert "2 enabled shown" in output + + +def test_do_list_platform_env_is_ignored(three_source_env, monkeypatch): + """`hermes skills list` reads the active profile's config via + HERMES_HOME (swapped by -p), so it must NOT pass a platform arg to + ``get_disabled_skill_names`` — otherwise per-platform overrides + would silently leak in from HERMES_PLATFORM env.""" + from agent import skill_utils + + seen = {} + + def _fake(platform=None): + seen["platform"] = platform + return set() + + monkeypatch.setattr(skill_utils, "get_disabled_skill_names", _fake) + _capture() + + assert seen["platform"] is None + + def test_do_check_reports_available_updates(monkeypatch): output = _capture_check(monkeypatch, [ {"name": "hub-skill", "source": "skills.sh", "status": "update_available"}, diff --git a/tests/run_agent/test_review_prompt_class_first.py b/tests/run_agent/test_review_prompt_class_first.py new file mode 100644 index 0000000000..4a7fed1d74 --- /dev/null +++ b/tests/run_agent/test_review_prompt_class_first.py @@ -0,0 +1,78 @@ +"""Behavior tests for the class-first skill review prompts. + +The skill review / combined review prompts steer the background review agent +toward generalizing existing skills rather than accumulating near-duplicates. +These tests assert the behavioral *instructions* are present — they do NOT +snapshot the full prompt text (change-detector). +""" + +from run_agent import AIAgent + + +def test_skill_review_prompt_instructs_survey_first(): + """Prompt must tell the reviewer to list existing skills before deciding.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "skills_list" in prompt, "must instruct the reviewer to call skills_list" + assert "skill_view" in prompt, "must instruct the reviewer to skill_view candidates" + assert "SURVEY" in prompt, "must name the survey step explicitly" + + +def test_skill_review_prompt_is_class_first(): + """Prompt must steer toward the CLASS of task, not the specific task.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "CLASS" in prompt, "must tell the reviewer to think about the task class" + assert "class level" in prompt, "must anchor naming at the class level" + + +def test_skill_review_prompt_prefers_updating_existing(): + """Prompt must prefer generalizing an existing skill over creating a new one.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "PREFER GENERALIZING" in prompt or "PREFER UPDATING" in prompt, ( + "must state the update-over-create preference" + ) + assert "ONLY CREATE A NEW SKILL" in prompt, ( + "must gate new-skill creation behind a last-resort clause" + ) + + +def test_skill_review_prompt_flags_overlap_for_followup(): + """Prompt must ask the reviewer to note overlapping skills for future review.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "overlap" in prompt.lower(), "must mention the overlap-flagging protocol" + + +def test_skill_review_prompt_preserves_opt_out_clause(): + """The 'Nothing to save.' escape clause must remain.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "Nothing to save." in prompt + + +def test_combined_review_prompt_keeps_memory_section(): + """Combined prompt must still cover memory review.""" + prompt = AIAgent._COMBINED_REVIEW_PROMPT + assert "**Memory**" in prompt + assert "memory tool" in prompt + + +def test_combined_review_prompt_skills_section_is_class_first(): + """The **Skills** half of the combined prompt must follow the same protocol.""" + prompt = AIAgent._COMBINED_REVIEW_PROMPT + assert "**Skills**" in prompt + assert "SURVEY" in prompt + assert "CLASS" in prompt + assert "skills_list" in prompt + assert "ONLY CREATE A NEW SKILL" in prompt + + +def test_combined_review_prompt_preserves_opt_out_clause(): + prompt = AIAgent._COMBINED_REVIEW_PROMPT + assert "Nothing to save." in prompt + + +def test_memory_review_prompt_unchanged_in_structure(): + """Memory-only review prompt stays focused on user facts — not touched by this change.""" + prompt = AIAgent._MEMORY_REVIEW_PROMPT + # Guardrails: the memory-only prompt must NOT mention skills/surveys. + assert "skills_list" not in prompt + assert "SURVEY" not in prompt + assert "memory tool" in prompt diff --git a/tests/test_hermes_state.py b/tests/test_hermes_state.py index 94cd498a66..868a28c530 100644 --- a/tests/test_hermes_state.py +++ b/tests/test_hermes_state.py @@ -1485,6 +1485,48 @@ class TestListSessionsRich: assert "\n" not in sessions[0]["preview"] assert "Line one Line two" in sessions[0]["preview"] + def test_branch_session_visible_in_list(self, db): + """Branch sessions (parent ended with 'branched') must appear in list_sessions_rich.""" + db.create_session("parent", "cli") + db.end_session("parent", "branched") + db.create_session("branch", "cli", parent_session_id="parent") + db.append_message("branch", "user", "Exploring the alternative approach") + + sessions = db.list_sessions_rich() + ids = [s["id"] for s in sessions] + assert "branch" in ids, "Branch session should be visible in default list" + + def test_subagent_session_still_hidden(self, db): + """Sub-agent children (parent NOT ended with 'branched') remain hidden.""" + db.create_session("root", "cli") + db.create_session("delegate", "cli", parent_session_id="root") + + sessions = db.list_sessions_rich() + ids = [s["id"] for s in sessions] + assert "delegate" not in ids, "Delegate sub-agent should not appear in default list" + assert "root" in ids + + def test_compression_child_still_hidden(self, db): + """Compression continuation sessions remain hidden (parent ended with 'compression').""" + import time as _time + t0 = _time.time() + db.create_session("root", "cli") + db._conn.execute("UPDATE sessions SET started_at=? WHERE id=?", (t0, "root")) + db._conn.execute( + "UPDATE sessions SET ended_at=?, end_reason='compression' WHERE id=?", + (t0 + 1800, "root"), + ) + db._conn.commit() + db.create_session("continuation", "cli", parent_session_id="root") + db._conn.execute( + "UPDATE sessions SET started_at=? WHERE id=?", (t0 + 1801, "continuation") + ) + db._conn.commit() + + sessions = db.list_sessions_rich(project_compression_tips=False) + ids = [s["id"] for s in sessions] + assert "continuation" not in ids, "Compression continuation should stay hidden" + class TestCompressionChainProjection: """Tests for lineage-aware list_sessions_rich — compressed conversations diff --git a/tests/test_tui_gateway_server.py b/tests/test_tui_gateway_server.py index fd9dcc9cf6..7d70214a74 100644 --- a/tests/test_tui_gateway_server.py +++ b/tests/test_tui_gateway_server.py @@ -1835,3 +1835,112 @@ def test_model_options_propagates_list_exception(monkeypatch): assert "error" in resp assert resp["error"]["code"] == 5033 assert "catalog blew up" in resp["error"]["message"] + + +# --------------------------------------------------------------------------- +# prompt.submit — auto-title +# --------------------------------------------------------------------------- + +class _ImmediateThread: + """Runs the target callable synchronously so assertions can follow.""" + + def __init__(self, target=None, daemon=None): + self._target = target + + def start(self): + self._target() + + +def test_prompt_submit_auto_titles_session_on_complete(monkeypatch): + """maybe_auto_title is called after a successful (complete) prompt.""" + + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + return { + "final_response": "Rome was founded in 753 BC.", + "messages": [ + {"role": "user", "content": "Tell me about Rome"}, + {"role": "assistant", "content": "Rome was founded in 753 BC."}, + ], + } + + server._sessions["sid"] = _session(agent=_Agent()) + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None) + monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None) + monkeypatch.setattr(server, "render_message", lambda raw, cols: None) + monkeypatch.setattr(server, "_get_db", lambda: None) + + with patch("agent.title_generator.maybe_auto_title") as mock_title: + server.handle_request( + { + "id": "1", + "method": "prompt.submit", + "params": {"session_id": "sid", "text": "Tell me about Rome"}, + } + ) + + mock_title.assert_called_once() + args = mock_title.call_args.args + assert args[1] == "session-key" + assert args[2] == "Tell me about Rome" + assert args[3] == "Rome was founded in 753 BC." + + +def test_prompt_submit_skips_auto_title_when_interrupted(monkeypatch): + """maybe_auto_title must NOT be called when the agent was interrupted.""" + + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + return { + "final_response": "partial answer", + "interrupted": True, + "messages": [], + } + + server._sessions["sid"] = _session(agent=_Agent()) + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None) + monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None) + monkeypatch.setattr(server, "render_message", lambda raw, cols: None) + monkeypatch.setattr(server, "_get_db", lambda: None) + + with patch("agent.title_generator.maybe_auto_title") as mock_title: + server.handle_request( + { + "id": "1", + "method": "prompt.submit", + "params": {"session_id": "sid", "text": "Tell me about Rome"}, + } + ) + + mock_title.assert_not_called() + + +def test_prompt_submit_skips_auto_title_when_response_empty(monkeypatch): + """maybe_auto_title must NOT be called when the agent returns an empty reply.""" + + class _Agent: + def run_conversation(self, prompt, conversation_history=None, stream_callback=None): + return { + "final_response": "", + "messages": [], + } + + server._sessions["sid"] = _session(agent=_Agent()) + monkeypatch.setattr(server.threading, "Thread", _ImmediateThread) + monkeypatch.setattr(server, "_emit", lambda *args, **kwargs: None) + monkeypatch.setattr(server, "make_stream_renderer", lambda cols: None) + monkeypatch.setattr(server, "render_message", lambda raw, cols: None) + monkeypatch.setattr(server, "_get_db", lambda: None) + + with patch("agent.title_generator.maybe_auto_title") as mock_title: + server.handle_request( + { + "id": "1", + "method": "prompt.submit", + "params": {"session_id": "sid", "text": "Tell me about Rome"}, + } + ) + + mock_title.assert_not_called() diff --git a/tests/tools/test_browser_hybrid_routing.py b/tests/tools/test_browser_hybrid_routing.py new file mode 100644 index 0000000000..934b275d57 --- /dev/null +++ b/tests/tools/test_browser_hybrid_routing.py @@ -0,0 +1,248 @@ +"""Tests for hybrid browser-backend routing (LAN/localhost auto-local). + +When a cloud browser provider (Browserbase / Browser-Use / Firecrawl) is +configured globally, ``browser.auto_local_for_private_urls`` (default True) +causes ``browser_navigate`` to transparently spawn a local Chromium sidecar +for URLs whose host resolves to a private/loopback/LAN address, while +public URLs continue to hit the cloud session in the same conversation. + +These tests cover the routing decision layer — session_key selection, +sidecar detection, last-active-session tracking, and the config toggle. +The downstream session creation is covered by test_browser_cloud_fallback.py. +""" +from unittest.mock import Mock + +import pytest + +import tools.browser_tool as browser_tool + + +@pytest.fixture(autouse=True) +def _reset_routing_state(monkeypatch): + """Clear module-level caches so each test starts clean.""" + monkeypatch.setattr(browser_tool, "_active_sessions", {}) + monkeypatch.setattr(browser_tool, "_last_active_session_key", {}) + monkeypatch.setattr(browser_tool, "_cached_cloud_provider", None) + monkeypatch.setattr(browser_tool, "_cloud_provider_resolved", False) + monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls_resolved", False) + monkeypatch.setattr(browser_tool, "_cached_auto_local_for_private_urls", True) + monkeypatch.setattr(browser_tool, "_start_browser_cleanup_thread", lambda: None) + monkeypatch.setattr(browser_tool, "_update_session_activity", lambda t: None) + # Default: no CDP override, no Camofox + monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: None) + monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: False) + + +class TestNavigationSessionKey: + """Tests for _navigation_session_key URL-based routing decisions.""" + + def test_public_url_uses_bare_task_id(self, monkeypatch): + """Public URL with cloud provider configured → bare task_id (cloud).""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + key = browser_tool._navigation_session_key("default", "https://github.com/x/y") + assert key == "default" + + def test_localhost_routes_to_local_sidecar(self, monkeypatch): + """``localhost`` URL → ``::local`` suffix when cloud configured + flag on.""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + key = browser_tool._navigation_session_key("default", "http://localhost:3000/") + assert key == "default::local" + + def test_loopback_ipv4_routes_to_local_sidecar(self, monkeypatch): + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + key = browser_tool._navigation_session_key("default", "http://127.0.0.1:8080/") + assert key == "default::local" + + def test_rfc1918_lan_routes_to_local_sidecar(self, monkeypatch): + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + key = browser_tool._navigation_session_key("default", "http://192.168.1.50:8000/") + assert key == "default::local" + + def test_ipv6_loopback_routes_to_local_sidecar(self, monkeypatch): + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + key = browser_tool._navigation_session_key("default", "http://[::1]:3000/") + assert key == "default::local" + + def test_public_ip_literal_uses_bare_task_id(self, monkeypatch): + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + key = browser_tool._navigation_session_key("default", "https://8.8.8.8/") + assert key == "default" + + def test_mdns_local_hostname_routes_to_sidecar(self, monkeypatch): + """``*.local`` mDNS / ``*.lan`` / ``*.internal`` hostnames route to sidecar.""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + for host in ("raspberrypi.local", "printer.lan", "db.internal"): + key = browser_tool._navigation_session_key("default", f"http://{host}/") + assert key == "default::local", f"host {host!r} did not route to sidecar" + + def test_no_cloud_provider_stays_on_bare_task_id(self, monkeypatch): + """When cloud provider is not configured, no hybrid routing happens.""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: None) + key = browser_tool._navigation_session_key("default", "http://localhost:3000/") + assert key == "default" + + def test_camofox_mode_stays_on_bare_task_id(self, monkeypatch): + """Camofox is already local — no hybrid routing needed.""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + monkeypatch.setattr(browser_tool, "_is_camofox_mode", lambda: True) + key = browser_tool._navigation_session_key("default", "http://localhost:3000/") + assert key == "default" + + def test_cdp_override_stays_on_bare_task_id(self, monkeypatch): + """A user-supplied CDP endpoint owns the whole session — no hybrid.""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + monkeypatch.setattr(browser_tool, "_get_cdp_override", lambda: "ws://localhost:9222") + key = browser_tool._navigation_session_key("default", "http://localhost:3000/") + assert key == "default" + + def test_feature_flag_off_disables_hybrid_routing(self, monkeypatch): + """``auto_local_for_private_urls: false`` keeps private URLs on cloud.""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + monkeypatch.setattr(browser_tool, "_auto_local_for_private_urls", lambda: False) + key = browser_tool._navigation_session_key("default", "http://localhost:3000/") + assert key == "default" + + def test_none_task_id_defaults(self, monkeypatch): + """``None`` task_id resolves to 'default'.""" + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: Mock()) + key = browser_tool._navigation_session_key(None, "http://localhost:3000/") + assert key == "default::local" + + +class TestSessionKeyHelpers: + def test_is_local_sidecar_key(self): + assert browser_tool._is_local_sidecar_key("default::local") + assert browser_tool._is_local_sidecar_key("my_task::local") + assert not browser_tool._is_local_sidecar_key("default") + assert not browser_tool._is_local_sidecar_key("my_task") + + def test_last_session_key_falls_back_to_task_id(self, monkeypatch): + """Without a recorded last-active key, returns the bare task_id.""" + monkeypatch.setattr(browser_tool, "_last_active_session_key", {}) + assert browser_tool._last_session_key("default") == "default" + assert browser_tool._last_session_key("task-42") == "task-42" + assert browser_tool._last_session_key(None) == "default" + + def test_last_session_key_returns_recorded_key(self, monkeypatch): + monkeypatch.setattr( + browser_tool, + "_last_active_session_key", + {"default": "default::local", "task-42": "task-42"}, + ) + assert browser_tool._last_session_key("default") == "default::local" + assert browser_tool._last_session_key("task-42") == "task-42" + # Unknown task_id still falls back + assert browser_tool._last_session_key("other") == "other" + + +class TestHybridRoutingSessionCreation: + """_get_session_info must force a local session when the key carries ``::local``.""" + + def test_local_sidecar_key_skips_cloud_provider(self, monkeypatch): + """A ``::local``-suffixed key creates a local session even when cloud is set.""" + provider = Mock() + provider.create_session.return_value = { + "session_name": "should_not_be_used", + "bb_session_id": "bb_xxx", + "cdp_url": "wss://fake.browserbase.com/ws", + } + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider) + monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None) + + session = browser_tool._get_session_info("default::local") + + assert provider.create_session.call_count == 0 + assert session["bb_session_id"] is None + assert session["cdp_url"] is None + assert session["features"]["local"] is True + + def test_bare_task_id_with_cloud_provider_uses_cloud(self, monkeypatch): + """A bare task_id with cloud provider configured hits the cloud path.""" + provider = Mock() + provider.create_session.return_value = { + "session_name": "cloud-sess", + "bb_session_id": "bb_123", + "cdp_url": "wss://real.browserbase.com/ws", + } + monkeypatch.setattr(browser_tool, "_get_cloud_provider", lambda: provider) + monkeypatch.setattr(browser_tool, "_ensure_cdp_supervisor", lambda t: None) + monkeypatch.setattr(browser_tool, "_resolve_cdp_override", lambda u: u) + + session = browser_tool._get_session_info("default") + + assert provider.create_session.call_count == 1 + assert session["bb_session_id"] == "bb_123" + + +class TestCleanupHybridSessions: + """cleanup_browser(bare_task_id) must reap both cloud + local sidecar sessions.""" + + def test_cleanup_reaps_both_primary_and_sidecar(self, monkeypatch): + """Given a bare task_id with both sessions alive, both get cleaned.""" + reaped = [] + + def _fake_cleanup_one(key): + reaped.append(key) + + monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one) + monkeypatch.setattr( + browser_tool, + "_active_sessions", + { + "default": {"session_name": "cloud_sess"}, + "default::local": {"session_name": "local_sess"}, + }, + ) + monkeypatch.setattr( + browser_tool, "_last_active_session_key", {"default": "default::local"} + ) + + browser_tool.cleanup_browser("default") + + assert set(reaped) == {"default", "default::local"} + # last-active pointer dropped + assert "default" not in browser_tool._last_active_session_key + + def test_cleanup_reaps_only_primary_when_no_sidecar(self, monkeypatch): + """When no sidecar exists, only the primary is reaped.""" + reaped = [] + + def _fake_cleanup_one(key): + reaped.append(key) + + monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one) + monkeypatch.setattr( + browser_tool, + "_active_sessions", + {"default": {"session_name": "cloud_sess"}}, + ) + + browser_tool.cleanup_browser("default") + + assert reaped == ["default"] + + def test_cleanup_sidecar_directly_keeps_primary(self, monkeypatch): + """Calling cleanup with a ``::local`` key reaps only the sidecar.""" + reaped = [] + + def _fake_cleanup_one(key): + reaped.append(key) + + monkeypatch.setattr(browser_tool, "_cleanup_single_browser_session", _fake_cleanup_one) + monkeypatch.setattr( + browser_tool, + "_active_sessions", + { + "default": {"session_name": "cloud_sess"}, + "default::local": {"session_name": "local_sess"}, + }, + ) + monkeypatch.setattr( + browser_tool, "_last_active_session_key", {"default": "default::local"} + ) + + browser_tool.cleanup_browser("default::local") + + assert reaped == ["default::local"] + # Last-active pointer NOT dropped (primary task is still alive) + assert browser_tool._last_active_session_key.get("default") == "default::local" diff --git a/tests/tools/test_credential_pool_env_fallback.py b/tests/tools/test_credential_pool_env_fallback.py new file mode 100644 index 0000000000..938484f015 --- /dev/null +++ b/tests/tools/test_credential_pool_env_fallback.py @@ -0,0 +1,210 @@ +"""Tests for credential_pool .env fallback and auth credential_pool lookup. + +Covers the fix from #15914 / PR #15920: +- _seed_from_env reads API keys from ~/.hermes/.env when not in os.environ +- _resolve_api_key_provider_secret falls back to credential_pool when env vars are empty +- env vars take priority over .env file (handled by get_env_value itself) +- env vars take priority over credential pool (fallback only kicks in when env is empty) +""" + +import os +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + + +def _make_pconfig(provider_id="deepseek", env_vars=None): + """Create a minimal ProviderConfig for testing. + + Default provider_id is 'deepseek' because it's a real api_key provider + in PROVIDER_REGISTRY (needed for _seed_from_env's generic path). + """ + from hermes_cli.auth import ProviderConfig + return ProviderConfig( + id=provider_id, + name=provider_id.title(), + auth_type="api_key", + api_key_env_vars=tuple(env_vars or [f"{provider_id.upper()}_API_KEY"]), + ) + + +@pytest.fixture +def isolated_hermes_home(tmp_path, monkeypatch): + """Point HERMES_HOME at a temp dir and clear known API key env vars. + + Also invalidates any cached get_env_value state by patching Path.home(). + """ + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(home)) + + # Clear all known API key env vars so get_env_value falls through to .env + for key in [ + "OPENAI_API_KEY", "ANTHROPIC_API_KEY", "OPENROUTER_API_KEY", + "ZAI_API_KEY", "DEEPSEEK_API_KEY", "ANTHROPIC_TOKEN", + "CLAUDE_CODE_OAUTH_TOKEN", "OPENAI_BASE_URL", + ]: + monkeypatch.delenv(key, raising=False) + + return home + + +def _write_env_file(home: Path, **kwargs) -> None: + """Write key=value pairs to ~/.hermes/.env.""" + lines = [f"{k}={v}" for k, v in kwargs.items()] + (home / ".env").write_text("\n".join(lines) + "\n") + + +class TestCredentialPoolSeedsFromDotEnv: + """_seed_from_env must read keys from ~/.hermes/.env, not just os.environ. + + This is the load-bearing behaviour for the fix: when a user adds a key to + .env mid-session or via a non-CLI entry point that doesn't run + load_hermes_dotenv, the credential pool must still discover it. + """ + + def test_deepseek_key_from_dotenv_only(self, isolated_hermes_home): + """Key in .env but not os.environ → _seed_from_env adds a pool entry.""" + _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-only-12345") + assert "DEEPSEEK_API_KEY" not in os.environ + + from agent.credential_pool import _seed_from_env + entries = [] + changed, active_sources = _seed_from_env("deepseek", entries) + + assert changed is True + assert "env:DEEPSEEK_API_KEY" in active_sources + assert any( + e.access_token == "sk-dotenv-only-12345" + and e.source == "env:DEEPSEEK_API_KEY" + for e in entries + ), f"Expected seeded entry with dotenv key, got: {[(e.source, e.access_token) for e in entries]}" + + def test_openrouter_key_from_dotenv_only(self, isolated_hermes_home): + """OpenRouter path has its own branch — verify it also reads .env.""" + _write_env_file(isolated_hermes_home, OPENROUTER_API_KEY="sk-or-dotenv-abc") + assert "OPENROUTER_API_KEY" not in os.environ + + from agent.credential_pool import _seed_from_env + entries = [] + changed, active_sources = _seed_from_env("openrouter", entries) + + assert changed is True + assert "env:OPENROUTER_API_KEY" in active_sources + assert any( + e.access_token == "sk-or-dotenv-abc" for e in entries + ) + + def test_empty_dotenv_no_entries(self, isolated_hermes_home): + """No .env file, no env vars → no entries seeded (and no crash).""" + from agent.credential_pool import _seed_from_env + entries = [] + changed, active_sources = _seed_from_env("deepseek", entries) + assert changed is False + assert active_sources == set() + assert entries == [] + + def test_os_environ_still_wins_over_dotenv(self, isolated_hermes_home, monkeypatch): + """get_env_value checks os.environ first — verify seeding picks that up.""" + _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-stale") + monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-fresh-xyz") + + from agent.credential_pool import _seed_from_env + entries = [] + changed, _ = _seed_from_env("deepseek", entries) + + assert changed is True + seeded = [e for e in entries if e.source == "env:DEEPSEEK_API_KEY"] + assert len(seeded) == 1 + assert seeded[0].access_token == "sk-env-fresh-xyz" + + +class TestAuthResolvesFromDotEnv: + """_resolve_api_key_provider_secret must also read from ~/.hermes/.env.""" + + def test_key_from_dotenv_only(self, isolated_hermes_home): + """Key in .env but not os.environ → _resolve returns it with the env var source.""" + _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-resolve-789") + assert "DEEPSEEK_API_KEY" not in os.environ + + from hermes_cli.auth import _resolve_api_key_provider_secret + key, source = _resolve_api_key_provider_secret( + provider_id="deepseek", + pconfig=_make_pconfig(), + ) + assert key == "sk-dotenv-resolve-789" + assert source == "DEEPSEEK_API_KEY" + + +class TestAuthCredentialPoolFallback: + """_resolve_api_key_provider_secret falls back to credential pool when env + dotenv are empty.""" + + def test_credential_pool_fallback_structure(self, isolated_hermes_home): + """Empty env + empty .env → auth falls back to credential pool.""" + mock_entry = MagicMock() + mock_entry.access_token = "test-pool-key-12345" + mock_entry.runtime_api_key = "" + + mock_pool = MagicMock() + mock_pool.has_credentials.return_value = True + mock_pool.peek.return_value = mock_entry + + from hermes_cli.auth import _resolve_api_key_provider_secret + with patch("agent.credential_pool.load_pool", return_value=mock_pool): + key, source = _resolve_api_key_provider_secret( + provider_id="deepseek", + pconfig=_make_pconfig(), + ) + assert "test-pool-key-12345" in key + assert "credential_pool" in source + + def test_credential_pool_empty_returns_empty(self, isolated_hermes_home): + """Empty env + empty .env + empty pool → empty string.""" + mock_pool = MagicMock() + mock_pool.has_credentials.return_value = False + + from hermes_cli.auth import _resolve_api_key_provider_secret + with patch("agent.credential_pool.load_pool", return_value=mock_pool): + key, source = _resolve_api_key_provider_secret( + provider_id="deepseek", + pconfig=_make_pconfig(), + ) + assert key == "" + + def test_env_var_takes_priority_over_pool(self, isolated_hermes_home, monkeypatch): + """os.environ key wins — credential pool is NEVER consulted.""" + monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-env-key-first-abc123") + + mock_pool = MagicMock() + mock_pool.has_credentials.return_value = True + + from hermes_cli.auth import _resolve_api_key_provider_secret + with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp: + key, source = _resolve_api_key_provider_secret( + provider_id="deepseek", + pconfig=_make_pconfig(), + ) + assert key == "sk-env-key-first-abc123" + assert source == "DEEPSEEK_API_KEY" + # Pool should not even have been loaded — env var satisfied the request first + mp.assert_not_called() + + def test_dotenv_takes_priority_over_pool(self, isolated_hermes_home): + """Key in .env beats credential pool — pool only fires when both env sources are empty.""" + _write_env_file(isolated_hermes_home, DEEPSEEK_API_KEY="sk-dotenv-priority-xyz") + assert "DEEPSEEK_API_KEY" not in os.environ + + mock_pool = MagicMock() + mock_pool.has_credentials.return_value = True + + from hermes_cli.auth import _resolve_api_key_provider_secret + with patch("agent.credential_pool.load_pool", return_value=mock_pool) as mp: + key, source = _resolve_api_key_provider_secret( + provider_id="deepseek", + pconfig=_make_pconfig(), + ) + assert key == "sk-dotenv-priority-xyz" + assert source == "DEEPSEEK_API_KEY" + mp.assert_not_called() diff --git a/tests/tools/test_mcp_oauth.py b/tests/tools/test_mcp_oauth.py index b2f3f02297..db0342e993 100644 --- a/tests/tools/test_mcp_oauth.py +++ b/tests/tools/test_mcp_oauth.py @@ -491,11 +491,36 @@ def test_configure_callback_port_uses_explicit_port(): assert cfg["_resolved_port"] == 54321 -def test_parse_base_url_strips_path(): - """_parse_base_url drops path components for OAuth discovery.""" - from tools.mcp_oauth import _parse_base_url +def test_build_oauth_auth_preserves_server_url_path(): + """server_url with path is forwarded to OAuthClientProvider unmodified. + + Regression for #16015: previously ``_parse_base_url`` stripped the path, + collapsing ``https://mcp.notion.com/mcp`` to ``https://mcp.notion.com`` and + breaking RFC 9728 protected-resource validation against servers whose PRM + advertises a path-scoped resource (Notion). The MCP SDK strips the path + itself for authorization-server discovery via + ``OAuthContext.get_authorization_base_url``; Hermes must not pre-strip. + """ + from tools import mcp_oauth + + captured: dict = {} + + class _FakeProvider: + def __init__(self, **kwargs): + captured.update(kwargs) + + with patch.object(mcp_oauth, "_OAUTH_AVAILABLE", True), \ + patch.object(mcp_oauth, "OAuthClientProvider", _FakeProvider), \ + patch.object(mcp_oauth, "_is_interactive", return_value=True), \ + patch.object(mcp_oauth, "_maybe_preregister_client"), \ + patch.object(mcp_oauth, "HermesTokenStorage") as mock_storage_cls: + mock_storage_cls.return_value = MagicMock(has_cached_tokens=lambda: True) + build_oauth_auth( + server_name="notion", + server_url="https://mcp.notion.com/mcp", + oauth_config={}, + ) + + assert captured["server_url"] == "https://mcp.notion.com/mcp" - assert _parse_base_url("https://example.com/mcp/v1") == "https://example.com" - assert _parse_base_url("https://example.com") == "https://example.com" - assert _parse_base_url("https://host.example.com:8080/api") == "https://host.example.com:8080" diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 469e9be28d..aecb2ee7f6 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -483,6 +483,147 @@ def _is_local_backend() -> bool: return _is_camofox_mode() or _get_cloud_provider() is None +_auto_local_for_private_urls_resolved = False +_cached_auto_local_for_private_urls: bool = True + + +def _auto_local_for_private_urls() -> bool: + """Return whether a cloud-configured install should auto-spawn a local + Chromium for LAN/localhost URLs. + + Reads ``browser.auto_local_for_private_urls`` once (default ``True``) and + caches it for the process lifetime. When enabled, ``browser_navigate`` + routes URLs whose host resolves to a private/loopback/LAN address to a + local headless Chromium sidecar even when a cloud provider (Browserbase + / Browser-Use / Firecrawl) is configured globally. Public URLs continue + to use the cloud provider in the same conversation. + """ + global _auto_local_for_private_urls_resolved, _cached_auto_local_for_private_urls + if _auto_local_for_private_urls_resolved: + return _cached_auto_local_for_private_urls + + _auto_local_for_private_urls_resolved = True + try: + from hermes_cli.config import read_raw_config + cfg = read_raw_config() + browser_cfg = cfg.get("browser", {}) + if isinstance(browser_cfg, dict) and "auto_local_for_private_urls" in browser_cfg: + _cached_auto_local_for_private_urls = bool( + browser_cfg.get("auto_local_for_private_urls") + ) + except Exception as e: + logger.debug("Could not read auto_local_for_private_urls from config: %s", e) + return _cached_auto_local_for_private_urls + + +def _url_is_private(url: str) -> bool: + """Return True when the URL's host resolves to a private/LAN/loopback address. + + Reuses ``tools.url_safety.is_safe_url`` as the oracle — if the SSRF check + would reject the URL, we treat it as "private" for routing purposes. DNS + resolution failures are treated as NOT private (fall through to whatever + backend is configured, which will surface the DNS error naturally). + """ + try: + from tools.url_safety import is_safe_url + # is_safe_url returns False for private/loopback/link-local/CGNAT AND + # for DNS failures. We only want the private-network case here, so + # we parse + check the host shape as a DNS-failure sieve first. + from urllib.parse import urlparse + import ipaddress + import socket + parsed = urlparse(url) + hostname = (parsed.hostname or "").strip().lower().rstrip(".") + if not hostname: + return False + # Literal IP → check directly + try: + ip = ipaddress.ip_address(hostname) + return ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip in ipaddress.ip_network("100.64.0.0/10") + ) + except ValueError: + pass + # Hostname — must resolve to confirm it's private (bare "localhost" + # resolves to 127.0.0.1 via /etc/hosts). Short-circuit on obvious + # names to avoid a DNS hop. + if hostname in ("localhost",) or hostname.endswith(".localhost"): + return True + if hostname.endswith(".local") or hostname.endswith(".lan") or hostname.endswith(".internal"): + return True + try: + addr_info = socket.getaddrinfo(hostname, None, socket.AF_UNSPEC, socket.SOCK_STREAM) + except socket.gaierror: + return False # DNS fail → not private, let the normal path fail + for _, _, _, _, sockaddr in addr_info: + try: + ip = ipaddress.ip_address(sockaddr[0]) + except ValueError: + continue + if ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip in ipaddress.ip_network("100.64.0.0/10") + ): + return True + return False + except Exception as exc: + logger.debug("URL-privacy check failed for %s: %s", url, exc) + return False + + +def _navigation_session_key(task_id: str, url: str) -> str: + """Pick the session key that should handle ``url`` for ``task_id``. + + Returns the bare task_id unless ALL of these are true: + 1. A cloud provider is configured (``_get_cloud_provider()`` is not None). + 2. Auto-local routing is enabled (``browser.auto_local_for_private_urls``, + default True). + 3. The URL resolves to a private/LAN/loopback address. + 4. A CDP override is not active (that path owns the whole session). + 5. Camofox mode is not active (Camofox is already local-only). + + When all are true, returns ``f"{task_id}::local"`` so the hybrid-routing + path spawns a local Chromium sidecar while the cloud session (if any) + continues to serve public URLs. + """ + if task_id is None: + task_id = "default" + if _get_cdp_override(): + return task_id + if _is_camofox_mode(): + return task_id + if _get_cloud_provider() is None: + return task_id + if not _auto_local_for_private_urls(): + return task_id + if not _url_is_private(url): + return task_id + return f"{task_id}{_LOCAL_SUFFIX}" + + +def _is_local_sidecar_key(session_key: str) -> bool: + """Return True when ``session_key`` is a hybrid-routing local sidecar.""" + return session_key.endswith(_LOCAL_SUFFIX) + + +def _last_session_key(task_id: str) -> str: + """Return the session key to use for a non-nav browser tool call. + + If a previous ``browser_navigate`` on this task_id set a last-active key, + use it so snapshot/click/fill/etc. hit the same session. Otherwise fall + back to the bare task_id (matches original behavior for tasks that never + triggered hybrid routing). + """ + if task_id is None: + task_id = "default" + return _last_active_session_key.get(task_id, task_id) + + def _allow_private_urls() -> bool: """Return whether the browser is allowed to navigate to private/internal addresses. @@ -521,10 +662,25 @@ def _socket_safe_tmpdir() -> str: return tempfile.gettempdir() -# Track active sessions per task +# Track active sessions per "session key". +# +# A "session key" is either the bare task_id (cloud/default path) OR a composite +# like f"{task_id}::local" when the hybrid-routing feature spawns a local sidecar +# browser for a LAN/localhost URL while a cloud provider is configured globally. +# Both forms flow through the same _active_sessions / _run_browser_command / +# cleanup_browser code paths — the key is opaque to those internals. +# # Stores: session_name (always), bb_session_id + cdp_url (cloud mode only) -_active_sessions: Dict[str, Dict[str, str]] = {} # task_id -> {session_name, ...} -_recording_sessions: set = set() # task_ids with active recordings +_active_sessions: Dict[str, Dict[str, str]] = {} # session_key -> {session_name, ...} +_recording_sessions: set = set() # session_keys with active recordings + +# Tracks the most recent session_key used per task_id. Set by browser_navigate() +# after it chooses a backend for a URL; read by every non-nav browser tool +# (snapshot/click/fill/eval/...) so they target the session that served the last +# navigation. Without this, a task that navigated to localhost on the local +# sidecar would fall back to the cloud session on its next snapshot call. +_last_active_session_key: Dict[str, str] = {} # task_id -> session_key +_LOCAL_SUFFIX = "::local" # Flag to track if cleanup has been done _cleanup_done = False @@ -1014,37 +1170,48 @@ def _create_cdp_session(task_id: str, cdp_url: str) -> Dict[str, str]: def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: """ - Get or create session info for the given task. - + Get or create session info for the given session key. + In cloud mode, creates a Browserbase session with proxies enabled. In local mode, generates a session name for agent-browser --session. Also starts the inactivity cleanup thread and updates activity tracking. Thread-safe: multiple subagents can call this concurrently. - + Args: - task_id: Unique identifier for the task - + task_id: Session key. Normally the task_id as-is, but may carry the + ``::local`` suffix for the hybrid-routing local sidecar — in that + case the cloud provider is skipped even when one is configured, + and a local Chromium session is created instead. + Returns: Dict with session_name (always), bb_session_id + cdp_url (cloud only) """ if task_id is None: task_id = "default" - + # Start the cleanup thread if not running (handles inactivity timeouts) _start_browser_cleanup_thread() - + # Update activity timestamp for this session _update_session_activity(task_id) - + with _cleanup_lock: # Check if we already have a session for this task if task_id in _active_sessions: return _active_sessions[task_id] - + + # Hybrid routing: session keys ending with ``::local`` force a local + # Chromium regardless of the globally-configured cloud provider. Public + # URLs in the same conversation continue to use the cloud session under + # the bare task_id key. + force_local = _is_local_sidecar_key(task_id) + # Create session outside the lock (network call in cloud mode) cdp_override = _get_cdp_override() - if cdp_override: + if cdp_override and not force_local: session_info = _create_cdp_session(task_id, cdp_override) + elif force_local: + session_info = _create_local_session(task_id) else: provider = _get_cloud_provider() if provider is None: @@ -1081,7 +1248,7 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: session_info["fallback_from_cloud"] = True session_info["fallback_reason"] = str(e) session_info["fallback_provider"] = provider_name - + with _cleanup_lock: # Double-check: another thread may have created a session while we # were doing the network call. Use the existing one to avoid leaking @@ -1093,7 +1260,9 @@ def _get_session_info(task_id: Optional[str] = None) -> Dict[str, str]: # Lazy-start the CDP supervisor now that the session exists (if the # backend surfaces a CDP URL via override or session_info["cdp_url"]). # Idempotent; swallows errors. See _ensure_cdp_supervisor for details. - _ensure_cdp_supervisor(task_id) + # Skip for local sidecars — they have no CDP URL. + if not force_local: + _ensure_cdp_supervisor(task_id) return session_info @@ -1521,9 +1690,21 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: # SSRF protection — block private/internal addresses before navigating. # Skipped for local backends (Camofox, headless Chromium without a cloud # provider) because the agent already has full local network access via - # the terminal tool. Can also be opted out for cloud mode via - # ``browser.allow_private_urls`` in config. - if not _is_local_backend() and not _allow_private_urls() and not _is_safe_url(url): + # the terminal tool. Also skipped when hybrid routing will auto-spawn a + # local Chromium sidecar for this URL (cloud provider configured + + # private URL + ``browser.auto_local_for_private_urls`` enabled) — the + # cloud provider never sees the URL in that case. Can also be opted + # out globally via ``browser.allow_private_urls`` in config. + effective_task_id = task_id or "default" + nav_session_key = _navigation_session_key(effective_task_id, url) + auto_local_this_nav = _is_local_sidecar_key(nav_session_key) + + if ( + not _is_local_backend() + and not auto_local_this_nav + and not _allow_private_urls() + and not _is_safe_url(url) + ): return json.dumps({ "success": False, "error": "Blocked: URL targets a private or internal address", @@ -1543,19 +1724,31 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: from tools.browser_camofox import camofox_navigate return camofox_navigate(url, task_id) - effective_task_id = task_id or "default" - + if auto_local_this_nav: + logger.info( + "browser_navigate: auto-routing %s to local Chromium sidecar " + "(cloud provider %s stays on cloud for public URLs; " + "set browser.auto_local_for_private_urls: false to disable)", + url, + type(_get_cloud_provider()).__name__ if _get_cloud_provider() else "none", + ) + # Get session info to check if this is a new session # (will create one with features logged if not exists) - session_info = _get_session_info(effective_task_id) + session_info = _get_session_info(nav_session_key) is_first_nav = session_info.get("_first_nav", True) - + # Auto-start recording if configured and this is first navigation if is_first_nav: session_info["_first_nav"] = False - _maybe_start_recording(effective_task_id) + _maybe_start_recording(nav_session_key) - result = _run_browser_command(effective_task_id, "open", [url], timeout=max(_get_command_timeout(), 60)) + result = _run_browser_command(nav_session_key, "open", [url], timeout=max(_get_command_timeout(), 60)) + + # Remember which session served this nav so snapshot/click/fill/... + # on the same task_id hit it (critical when hybrid routing has both a + # cloud session and a local sidecar alive concurrently). + _last_active_session_key[effective_task_id] = nav_session_key if result.get("success"): data = result.get("data", {}) @@ -1565,10 +1758,17 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: # Post-redirect SSRF check — if the browser followed a redirect to a # private/internal address, block the result so the model can't read # internal content via subsequent browser_snapshot calls. - # Skipped for local backends (same rationale as the pre-nav check). - if not _is_local_backend() and not _allow_private_urls() and final_url and final_url != url and not _is_safe_url(final_url): + # Skipped for local backends (same rationale as the pre-nav check), + # and for the hybrid local sidecar (we're already on a local browser + # hitting a private URL by design). + if ( + not _is_local_backend() + and not auto_local_this_nav + and not _allow_private_urls() + and final_url and final_url != url and not _is_safe_url(final_url) + ): # Navigate away to a blank page to prevent snapshot leaks - _run_browser_command(effective_task_id, "open", ["about:blank"], timeout=10) + _run_browser_command(nav_session_key, "open", ["about:blank"], timeout=10) return json.dumps({ "success": False, "error": "Blocked: redirect landed on a private/internal address", @@ -1612,7 +1812,7 @@ def browser_navigate(url: str, task_id: Optional[str] = None) -> str: # Auto-take a compact snapshot so the model can act immediately # without a separate browser_snapshot call. try: - snap_result = _run_browser_command(effective_task_id, "snapshot", ["-c"]) + snap_result = _run_browser_command(nav_session_key, "snapshot", ["-c"]) if snap_result.get("success"): snap_data = snap_result.get("data", {}) snapshot_text = snap_data.get("snapshot", "") @@ -1652,7 +1852,7 @@ def browser_snapshot( from tools.browser_camofox import camofox_snapshot return camofox_snapshot(full, task_id, user_task) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") # Build command args based on full flag args = [] @@ -1714,7 +1914,7 @@ def browser_click(ref: str, task_id: Optional[str] = None) -> str: from tools.browser_camofox import camofox_click return camofox_click(ref, task_id) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") # Ensure ref starts with @ if not ref.startswith("@"): @@ -1750,7 +1950,7 @@ def browser_type(ref: str, text: str, task_id: Optional[str] = None) -> str: from tools.browser_camofox import camofox_type return camofox_type(ref, text, task_id) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") # Ensure ref starts with @ if not ref.startswith("@"): @@ -1804,7 +2004,7 @@ def browser_scroll(direction: str, task_id: Optional[str] = None) -> str: result = camofox_scroll(direction, task_id) return result - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") result = _run_browser_command(effective_task_id, "scroll", [direction, str(_SCROLL_PIXELS)]) if not result.get("success"): @@ -1833,7 +2033,7 @@ def browser_back(task_id: Optional[str] = None) -> str: from tools.browser_camofox import camofox_back return camofox_back(task_id) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") result = _run_browser_command(effective_task_id, "back", []) if result.get("success"): @@ -1864,7 +2064,7 @@ def browser_press(key: str, task_id: Optional[str] = None) -> str: from tools.browser_camofox import camofox_press return camofox_press(key, task_id) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") result = _run_browser_command(effective_task_id, "press", [key]) if result.get("success"): @@ -1906,7 +2106,7 @@ def browser_console(clear: bool = False, expression: Optional[str] = None, task_ from tools.browser_camofox import camofox_console return camofox_console(clear, task_id) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") console_args = ["--clear"] if clear else [] error_args = ["--clear"] if clear else [] @@ -1945,7 +2145,7 @@ def _browser_eval(expression: str, task_id: Optional[str] = None) -> str: if _is_camofox_mode(): return _camofox_eval(expression, task_id) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") result = _run_browser_command(effective_task_id, "eval", [expression]) if not result.get("success"): @@ -2077,7 +2277,7 @@ def browser_get_images(task_id: Optional[str] = None) -> str: from tools.browser_camofox import camofox_get_images return camofox_get_images(task_id) - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") # Use eval to run JavaScript that extracts images js_code = """JSON.stringify( @@ -2147,7 +2347,7 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] import base64 import uuid as uuid_mod - effective_task_id = task_id or "default" + effective_task_id = _last_session_key(task_id or "default") # Save screenshot to persistent location so it can be shared with users from hermes_constants import get_hermes_dir @@ -2350,17 +2550,47 @@ def _cleanup_old_recordings(max_age_hours=72): def cleanup_browser(task_id: Optional[str] = None) -> None: """ - Clean up browser session for a task. - + Clean up browser session(s) for a task. + Called automatically when a task completes or when inactivity timeout is reached. Closes both the agent-browser/Browserbase session and Camofox sessions. - + + When ``task_id`` is a bare task identifier (no ``::local`` suffix), reaps + BOTH the cloud/primary session AND any hybrid-routing local sidecar that + may have been spawned for LAN/localhost URLs in the same task. When + ``task_id`` already carries a ``::local`` suffix (called from the inactivity + cleanup loop against a specific session key), reaps only that one. + Args: - task_id: Task identifier to clean up + task_id: Task identifier (or explicit session key) """ if task_id is None: task_id = "default" + # Expand to the full set of session keys to reap. For a bare task_id + # that includes the cloud/primary key + the local sidecar if one exists. + if _is_local_sidecar_key(task_id): + session_keys = [task_id] + bare_task_id = task_id[: -len(_LOCAL_SUFFIX)] + else: + session_keys = [task_id] + sidecar_key = f"{task_id}{_LOCAL_SUFFIX}" + with _cleanup_lock: + if sidecar_key in _active_sessions: + session_keys.append(sidecar_key) + bare_task_id = task_id + + for session_key in session_keys: + _cleanup_single_browser_session(session_key) + + # Drop the last-active pointer only when the bare task is being cleaned + # (i.e. not when we're only reaping a sidecar mid-task). + if not _is_local_sidecar_key(task_id): + _last_active_session_key.pop(bare_task_id, None) + + +def _cleanup_single_browser_session(task_id: str) -> None: + """Internal: reap a single browser session by its exact session key.""" # Stop the CDP supervisor for this task FIRST so we close our WebSocket # before the backend tears down the underlying CDP endpoint. _stop_cdp_supervisor(task_id) @@ -2379,32 +2609,33 @@ def cleanup_browser(task_id: Optional[str] = None) -> None: logger.debug("cleanup_browser called for task_id: %s", task_id) logger.debug("Active sessions: %s", list(_active_sessions.keys())) - + # Check if session exists (under lock), but don't remove yet - # _run_browser_command needs it to build the close command. with _cleanup_lock: session_info = _active_sessions.get(task_id) - + if session_info: bb_session_id = session_info.get("bb_session_id", "unknown") logger.debug("Found session for task %s: bb_session_id=%s", task_id, bb_session_id) - + # Stop auto-recording before closing (saves the file) _maybe_stop_recording(task_id) - + # Try to close via agent-browser first (needs session in _active_sessions) try: _run_browser_command(task_id, "close", [], timeout=10) logger.debug("agent-browser close command completed for task %s", task_id) except Exception as e: logger.warning("agent-browser close failed for task %s: %s", task_id, e) - + # Now remove from tracking under lock with _cleanup_lock: _active_sessions.pop(task_id, None) _session_last_activity.pop(task_id, None) - - # Cloud mode: close the cloud browser session via provider API + + # Cloud mode: close the cloud browser session via provider API. + # Local sidecars have bb_session_id=None so this no-ops for them. if bb_session_id: provider = _get_cloud_provider() if provider is not None: diff --git a/tools/mcp_oauth.py b/tools/mcp_oauth.py index fd655bf3d2..51e243c6c1 100644 --- a/tools/mcp_oauth.py +++ b/tools/mcp_oauth.py @@ -519,12 +519,6 @@ def _maybe_preregister_client( logger.debug("Pre-registered client_id=%s for '%s'", client_id, storage._server_name) -def _parse_base_url(server_url: str) -> str: - """Strip path component from server URL, returning the base origin.""" - parsed = urlparse(server_url) - return f"{parsed.scheme}://{parsed.netloc}" - - def build_oauth_auth( server_name: str, server_url: str, @@ -570,7 +564,7 @@ def build_oauth_auth( _maybe_preregister_client(storage, cfg, client_metadata) return OAuthClientProvider( - server_url=_parse_base_url(server_url), + server_url=server_url, client_metadata=client_metadata, storage=storage, redirect_handler=_redirect_handler, diff --git a/tools/mcp_oauth_manager.py b/tools/mcp_oauth_manager.py index 7c8a91f3f9..dbe2fc3e06 100644 --- a/tools/mcp_oauth_manager.py +++ b/tools/mcp_oauth_manager.py @@ -362,7 +362,6 @@ class MCPOAuthManager: _configure_callback_port, _is_interactive, _maybe_preregister_client, - _parse_base_url, _redirect_handler, _wait_for_callback, ) @@ -387,7 +386,7 @@ class MCPOAuthManager: return _HERMES_PROVIDER_CLS( server_name=server_name, - server_url=_parse_base_url(entry.server_url), + server_url=entry.server_url, client_metadata=client_metadata, storage=storage, redirect_handler=_redirect_handler, diff --git a/tools/process_registry.py b/tools/process_registry.py index 57709bc29c..479030120d 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -776,7 +776,7 @@ class ProcessRegistry: # Only enqueue completion notification on the FIRST move. Without # this guard, kill_process() and the reader thread can both call - # _move_to_finished(), producing duplicate [SYSTEM: ...] messages. + # _move_to_finished(), producing duplicate [IMPORTANT: ...] messages. if was_running and session.notify_on_complete: from tools.ansi_strip import strip_ansi output_tail = strip_ansi(session.output_buffer[-2000:]) if session.output_buffer else "" diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 39def65eb5..5cfc38fab2 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2321,6 +2321,26 @@ def _(rid, params: dict) -> dict: payload["rendered"] = rendered _emit("message.complete", sid, payload) + if ( + status == "complete" + and isinstance(raw, str) + and raw.strip() + and isinstance(text, str) + and text.strip() + ): + try: + from agent.title_generator import maybe_auto_title + + maybe_auto_title( + _get_db(), + session.get("session_key") or sid, + text, + raw, + session.get("history", []), + ) + except Exception: + pass + # CLI parity: when voice-mode TTS is on, speak the agent reply # (cli.py:_voice_speak_response). Only the final text — tool # calls / reasoning already stream separately and would be @@ -2550,48 +2570,6 @@ def _(rid, params: dict) -> dict: return _ok(rid, {"task_id": task_id}) -@method("prompt.btw") -def _(rid, params: dict) -> dict: - session, err = _sess(params, rid) - if err: - return err - text, sid = params.get("text", ""), params.get("session_id", "") - if not text: - return _err(rid, 4012, "text required") - snapshot = list(session.get("history", [])) - - def run(): - session_tokens = _set_session_context(session["session_key"]) - try: - from run_agent import AIAgent - - result = AIAgent( - model=_resolve_model(), - quiet_mode=True, - platform="tui", - max_iterations=8, - enabled_toolsets=[], - ).run_conversation(text, conversation_history=snapshot) - _emit( - "btw.complete", - sid, - { - "text": ( - result.get("final_response", str(result)) - if isinstance(result, dict) - else str(result) - ) - }, - ) - except Exception as e: - _emit("btw.complete", sid, {"text": f"error: {e}"}) - finally: - _clear_session_context(session_tokens) - - threading.Thread(target=run, daemon=True).start() - return _ok(rid, {"status": "running"}) - - # ── Methods: respond ───────────────────────────────────────────────── diff --git a/ui-tui/README.md b/ui-tui/README.md index 2f95a47aa2..17d57f08af 100644 --- a/ui-tui/README.md +++ b/ui-tui/README.md @@ -252,7 +252,6 @@ Primary event types the client handles today: | `sudo.request` | `{ request_id }` | | `secret.request` | `{ prompt, env_var, request_id }` | | `background.complete` | `{ task_id, text }` | -| `btw.complete` | `{ text }` | | `error` | `{ message }` | | `gateway.stderr` | synthesized from child stderr | | `gateway.protocol_error` | synthesized from malformed stdout | diff --git a/ui-tui/packages/hermes-ink/src/ink/hooks/use-selection.ts b/ui-tui/packages/hermes-ink/src/ink/hooks/use-selection.ts index 58761fe241..bd4ef87fc7 100644 --- a/ui-tui/packages/hermes-ink/src/ink/hooks/use-selection.ts +++ b/ui-tui/packages/hermes-ink/src/ink/hooks/use-selection.ts @@ -9,9 +9,9 @@ import { type FocusMove, type SelectionState, shiftAnchor } from '../selection.j * Returns no-op functions when fullscreen mode is disabled. */ export function useSelection(): { - copySelection: () => string + copySelection: () => Promise /** Copy without clearing the highlight (for copy-on-select). */ - copySelectionNoClear: () => string + copySelectionNoClear: () => Promise clearSelection: () => void hasSelection: () => boolean /** Read the raw mutable selection state (for drag-to-scroll). */ @@ -48,8 +48,8 @@ export function useSelection(): { return useMemo(() => { if (!ink) { return { - copySelection: () => '', - copySelectionNoClear: () => '', + copySelection: async () => '', + copySelectionNoClear: async () => '', clearSelection: () => {}, hasSelection: () => false, getState: () => null, diff --git a/ui-tui/packages/hermes-ink/src/ink/ink.tsx b/ui-tui/packages/hermes-ink/src/ink/ink.tsx index ff6570f8cf..9db3980490 100644 --- a/ui-tui/packages/hermes-ink/src/ink/ink.tsx +++ b/ui-tui/packages/hermes-ink/src/ink/ink.tsx @@ -1302,11 +1302,13 @@ export default class Ink { } /** - * Copy the current selection to the clipboard without clearing the - * highlight. Matches iTerm2's copy-on-select behavior where the selected - * region stays visible after the automatic copy. + * Copy the current text selection to the system clipboard without clearing the + * selection. Returns the copied text when a clipboard path succeeded (native + * tool fired, tmux buffer loaded, or OSC 52 emitted), or '' when no path was + * taken (e.g. headless Linux without tmux). Matches iTerm2's copy-on-select + * behavior where the selected region stays visible after the automatic copy. */ - copySelectionNoClear(): string { + async copySelectionNoClear(): Promise { if (!hasSelection(this.selection)) { return '' } @@ -1314,28 +1316,41 @@ export default class Ink { const text = getSelectedText(this.selection, this.frontFrame.screen) if (text) { - // Raw OSC 52, or DCS-passthrough-wrapped OSC 52 inside tmux (tmux - // drops it silently unless allow-passthrough is on — no regression). - void setClipboard(text).then(raw => { - if (raw) { - this.options.stdout.write(raw) + try { + const { sequence, success } = await setClipboard(text) + + if (sequence) { + this.options.stdout.write(sequence) } - }) + + if (success) { + return text + } + + if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) { + console.error('[clipboard] no path reached the clipboard (headless + no tmux?) — set HERMES_TUI_FORCE_OSC52=1 to force the escape sequence') + } + } catch (err) { + if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) { + console.error('[clipboard] error:', err) + } + } } - return text + return '' } /** * Copy the current text selection to the system clipboard via OSC 52 - * and clear the selection. Returns the copied text (empty if no selection). + * and clear the selection. Returns the copied text (empty if no selection + * or clipboard operation failed). */ - copySelection(): string { + async copySelection(): Promise { if (!hasSelection(this.selection)) { return '' } - const text = this.copySelectionNoClear() + const text = await this.copySelectionNoClear() clearSelection(this.selection) this.notifySelectionChange() diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts b/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts index 4860544479..4c54f8d18a 100644 --- a/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts +++ b/ui-tui/packages/hermes-ink/src/ink/termio/osc.test.ts @@ -26,4 +26,26 @@ describe('shouldEmitClipboardSequence', () => { shouldEmitClipboardSequence({ HERMES_TUI_COPY_OSC52: '0', TERM: 'xterm-256color' } as NodeJS.ProcessEnv) ).toBe(false) }) + + it('HERMES_TUI_FORCE_OSC52 takes precedence over TMUX suppression', () => { + // Without the override, local-in-tmux suppresses the OSC 52 sequence + // so the terminal multiplexer path wins. FORCE_OSC52=1 flips that + // back on for users whose tmux config supports passthrough. + expect(shouldEmitClipboardSequence({ TMUX: '/tmp/t,1,0' } as NodeJS.ProcessEnv)).toBe(false) + expect( + shouldEmitClipboardSequence({ + HERMES_TUI_FORCE_OSC52: '1', + TMUX: '/tmp/t,1,0' + } as NodeJS.ProcessEnv) + ).toBe(true) + }) + + it('HERMES_TUI_FORCE_OSC52=0 suppresses OSC 52 even for remote or plain terminals', () => { + expect( + shouldEmitClipboardSequence({ + HERMES_TUI_FORCE_OSC52: '0', + SSH_CONNECTION: '1' + } as NodeJS.ProcessEnv) + ).toBe(false) + }) }) diff --git a/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts b/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts index 3230767e7e..c60196b8c1 100644 --- a/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts +++ b/ui-tui/packages/hermes-ink/src/ink/termio/osc.ts @@ -84,7 +84,11 @@ export function getClipboardPath(): ClipboardPath { } export function shouldEmitClipboardSequence(env: NodeJS.ProcessEnv = process.env): boolean { - const override = (env.HERMES_TUI_CLIPBOARD_OSC52 ?? env.HERMES_TUI_COPY_OSC52 ?? '').trim() + const override = ( + env.HERMES_TUI_FORCE_OSC52 ?? + env.HERMES_TUI_CLIPBOARD_OSC52 ?? + env.HERMES_TUI_COPY_OSC52 ?? '' + ).trim() if (ENV_ON_RE.test(override)) { return true @@ -162,10 +166,23 @@ export async function tmuxLoadBuffer(text: string): Promise { * utilities (pbcopy/wl-copy/xclip/xsel/clip.exe) always work locally. Over * SSH these would write to the remote clipboard — OSC 52 is the right path there. * - * Returns the sequence for the caller to write to stdout (raw OSC 52 - * outside tmux, DCS-wrapped inside). + * Returns { sequence, success }: + * - `sequence` is the bytes to write to stdout (raw OSC 52 outside tmux, + * DCS-wrapped inside; empty string when we shouldn't emit). + * - `success` is true when we believe SOME path reached the clipboard: + * native tool fired (local), tmux buffer loaded, or an OSC 52 sequence + * was emitted to the terminal. False only when no path was taken at + * all (headless Linux with no tmux + osc52 suppressed, effectively). + * This is best-effort — pbcopy/xclip are fire-and-forget, and OSC 52 + * depends on the outer terminal honoring the sequence — but it lets + * callers distinguish "nothing attempted" from "attempted". */ -export async function setClipboard(text: string): Promise { +export type ClipboardResult = { + sequence: string + success: boolean +} + +export async function setClipboard(text: string): Promise { const b64 = Buffer.from(text, 'utf8').toString('base64') const raw = osc(OSC.CLIPBOARD, 'c', b64) const emitSequence = shouldEmitClipboardSequence(process.env) @@ -177,20 +194,28 @@ export async function setClipboard(text: string): Promise { // (https://anthropic.slack.com/archives/C07VBSHV7EV/p1773943921788829). // Gated on SSH_CONNECTION (not SSH_TTY) since tmux panes inherit SSH_TTY // forever but SSH_CONNECTION is in tmux's default update-environment and - // clears on local attach. Fire-and-forget. - if (!process.env['SSH_CONNECTION']) { - copyNative(text) - } + // clears on local attach. Fire-and-forget, but `copyNativeAttempted` + // tells us whether ANY native path will be tried on this platform. + const nativeAttempted = + !process.env['SSH_CONNECTION'] && copyNative(text) const tmuxBufferLoaded = await tmuxLoadBuffer(text) // Inner OSC uses BEL directly (not osc()) — ST's ESC would need doubling // too, and BEL works everywhere for OSC 52. - if (tmuxBufferLoaded) { - return emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : '' - } + const sequence = tmuxBufferLoaded + ? (emitSequence ? tmuxPassthrough(`${ESC}]52;c;${b64}${BEL}`) : '') + : (emitSequence ? raw : '') - return emitSequence ? raw : '' + // Success if any path was taken. Native and tmux are fire-and-forget, + // so we can't truly confirm the clipboard was written — but if native + // was attempted OR tmux buffer loaded OR we emitted OSC 52, the user's + // paste is likely to work. The only false case is "we did literally + // nothing" (e.g. local-in-tmux with osc52 suppressed and tmux buffer + // load failed), in which case reporting failure to the user is honest. + const success = nativeAttempted || tmuxBufferLoaded || sequence.length > 0 + + return { sequence, success } } // Linux clipboard tool: undefined = not yet probed, null = none available. @@ -198,65 +223,95 @@ export async function setClipboard(text: string): Promise { // Cached after first attempt so repeated mouse-ups skip the probe chain. let linuxCopy: 'wl-copy' | 'xclip' | 'xsel' | null | undefined +/** Internal: probe once and cache — wl-copy first, then xclip, then xsel. */ +async function probeLinuxCopy(): Promise<'wl-copy' | 'xclip' | 'xsel' | null> { + const opts = { useCwd: false, timeout: 500 } + + const r = await execFileNoThrow('wl-copy', [], opts) + + if (r.code === 0) { + return 'wl-copy' + } + + const r2 = await execFileNoThrow('xclip', ['-selection', 'clipboard'], opts) + + if (r2.code === 0) { + return 'xclip' + } + + const r3 = await execFileNoThrow('xsel', ['--clipboard', '--input'], opts) + + return r3.code === 0 ? 'xsel' : null +} + /** * Shell out to a native clipboard utility as a safety net for OSC 52. * Only called when not in an SSH session (over SSH, these would write to * the remote machine's clipboard — OSC 52 is the right path there). * Fire-and-forget: failures are silent since OSC 52 may have succeeded. + * + * Returns true when a native copy path was (or will be) attempted — i.e. + * we'll spawn pbcopy on macOS, clip on Windows, or a known-working Linux + * tool. Returns false only when we know no native tool is viable (Linux + * without DISPLAY/WAYLAND_DISPLAY, or previously-probed-to-null). The + * return value is used to decide whether to tell the user the copy + * succeeded — spawning is best-effort but good enough to claim success. + * + * Linux behaviour: if DISPLAY and WAYLAND_DISPLAY are both unset, native + * clipboard tools cannot work (they need a display server). In that case + * we skip probing entirely and treat linuxCopy as permanently null. */ -function copyNative(text: string): void { +function copyNative(text: string): boolean { const opts = { input: text, useCwd: false, timeout: 2000 } switch (process.platform) { case 'darwin': void execFileNoThrow('pbcopy', [], opts) - return + return true case 'linux': { - if (linuxCopy === null) { - return - } - - if (linuxCopy === 'wl-copy') { - void execFileNoThrow('wl-copy', [], opts) - - return - } - - if (linuxCopy === 'xclip') { - void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts) - - return - } - - if (linuxCopy === 'xsel') { - void execFileNoThrow('xsel', ['--clipboard', '--input'], opts) - - return - } - - // First call: probe wl-copy (Wayland) then xclip/xsel (X11), cache winner. - void execFileNoThrow('wl-copy', [], opts).then(r => { - if (r.code === 0) { - linuxCopy = 'wl-copy' - - return + // If we already probed (success or hard-fail), short-circuit. + if (linuxCopy !== undefined) { + if (linuxCopy === null) { + // No working native tool — skip silently. + return false } - void execFileNoThrow('xclip', ['-selection', 'clipboard'], opts).then(r2 => { - if (r2.code === 0) { - linuxCopy = 'xclip' + // linuxCopy is a known-working tool; fire-and-forget. + void execFileNoThrow(linuxCopy, linuxCopy === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts) - return - } + return true + } - void execFileNoThrow('xsel', ['--clipboard', '--input'], opts).then(r3 => { - linuxCopy = r3.code === 0 ? 'xsel' : null - }) - }) - }) + // No display server → native tools will fail immediately. Cache null. + if (!process.env.DISPLAY && !process.env.WAYLAND_DISPLAY) { + if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) { + console.error('[clipboard] [native] Linux: no DISPLAY or WAYLAND_DISPLAY — native clipboard unavailable') + } - return + linuxCopy = null + + return false + } + // First call: probe in the background and cache the result for future copies. + // We don't await — this is fire-and-forget. Treat as an attempt: + // the probe will discover a tool and spawn it. If probing finds + // nothing, the NEXT copy will short-circuit above. + void (async () => { + const winner = await probeLinuxCopy() + linuxCopy = winner + + if (process.env.HERMES_TUI_DEBUG_CLIPBOARD) { + console.error(`[clipboard] [native] Linux: clipboard probe complete → ${winner ?? 'no tool available'}`) + } + + // Actually perform the copy with the discovered tool. + if (winner) { + void execFileNoThrow(winner, winner === 'wl-copy' ? [] : ['-selection', 'clipboard'], opts) + } + })() + + return true } case 'win32': @@ -264,8 +319,10 @@ function copyNative(text: string): void { // imperfect (system locale encoding) but good enough for a fallback. void execFileNoThrow('clip', [], opts) - return + return true } + + return false } /** @internal test-only */ diff --git a/ui-tui/src/__tests__/createSlashHandler.test.ts b/ui-tui/src/__tests__/createSlashHandler.test.ts index a51c89c5b8..3c504454d1 100644 --- a/ui-tui/src/__tests__/createSlashHandler.test.ts +++ b/ui-tui/src/__tests__/createSlashHandler.test.ts @@ -392,7 +392,7 @@ const buildComposer = () => ({ hasSelection: false, paste: vi.fn(), queueRef: { current: [] as string[] }, - selection: { copySelection: vi.fn(() => '') }, + selection: { copySelection: vi.fn(async () => '') }, setInput: vi.fn() }) diff --git a/ui-tui/src/app/createGatewayEventHandler.ts b/ui-tui/src/app/createGatewayEventHandler.ts index 4e51c03204..699a8138b9 100644 --- a/ui-tui/src/app/createGatewayEventHandler.ts +++ b/ui-tui/src/app/createGatewayEventHandler.ts @@ -429,12 +429,6 @@ export function createGatewayEventHandler(ctx: GatewayEventHandlerContext): (ev: return - case 'btw.complete': - dropBgTask('btw:x') - sys(`[btw] ${ev.payload.text}`) - - return - case 'subagent.spawn_requested': // Child built but not yet running (waiting on ThreadPoolExecutor slot). // Preserve completed state if a later event races in before this one. diff --git a/ui-tui/src/app/interfaces.ts b/ui-tui/src/app/interfaces.ts index 34919aca02..f221539184 100644 --- a/ui-tui/src/app/interfaces.ts +++ b/ui-tui/src/app/interfaces.ts @@ -33,7 +33,7 @@ export type StatusBarMode = 'bottom' | 'off' | 'top' export interface SelectionApi { captureScrolledRows: (firstRow: number, lastRow: number, side: 'above' | 'below') => void clearSelection: () => void - copySelection: () => string + copySelection: () => Promise getState: () => unknown shiftAnchor: (dRow: number, minRow: number, maxRow: number) => void shiftSelection: (dRow: number, minRow: number, maxRow: number) => void diff --git a/ui-tui/src/app/slash/commands/core.ts b/ui-tui/src/app/slash/commands/core.ts index 70804a1f5b..ecc080ca13 100644 --- a/ui-tui/src/app/slash/commands/core.ts +++ b/ui-tui/src/app/slash/commands/core.ts @@ -251,11 +251,17 @@ export const coreCommands: SlashCommand[] = [ { help: 'copy selection or assistant message', name: 'copy', - run: (arg, ctx) => { + run: async (arg, ctx) => { const { sys } = ctx.transcript - if (!arg && ctx.composer.hasSelection && ctx.composer.selection.copySelection()) { - return sys('copied selection') + if (!arg && ctx.composer.hasSelection) { + const text = await ctx.composer.selection.copySelection() + + if (text) { + return sys(`copied ${text.length} characters`) + } else { + return sys('clipboard copy failed — try HERMES_TUI_FORCE_OSC52=1 to force the escape sequence; HERMES_TUI_DEBUG_CLIPBOARD=1 for details') + } } if (arg && Number.isNaN(parseInt(arg, 10))) { diff --git a/ui-tui/src/app/slash/commands/session.ts b/ui-tui/src/app/slash/commands/session.ts index e91dd421f5..fc4fb8dafe 100644 --- a/ui-tui/src/app/slash/commands/session.ts +++ b/ui-tui/src/app/slash/commands/session.ts @@ -1,7 +1,6 @@ import { attachedImageNotice, introMsg, toTranscriptMessages } from '../../../domain/messages.js' import type { BackgroundStartResponse, - BtwStartResponse, ConfigGetValueResponse, ConfigSetResponse, ImageAttachResponse, @@ -26,7 +25,7 @@ const persistedModelArg = (arg: string) => { export const sessionCommands: SlashCommand[] = [ { - aliases: ['bg'], + aliases: ['bg', 'btw'], help: 'launch a background prompt', name: 'background', run: (arg, ctx) => { @@ -47,23 +46,6 @@ export const sessionCommands: SlashCommand[] = [ } }, - { - help: 'by-the-way follow-up', - name: 'btw', - run: (arg, ctx) => { - if (!arg) { - return ctx.transcript.sys('/btw ') - } - - ctx.gateway.rpc('prompt.btw', { session_id: ctx.sid, text: arg }).then( - ctx.guarded(() => { - patchUiState(state => ({ ...state, bgTasks: new Set(state.bgTasks).add('btw:x') })) - ctx.transcript.sys('btw running…') - }) - ) - } - }, - { help: 'change or show model', aliases: ['provider'], diff --git a/ui-tui/src/gatewayTypes.ts b/ui-tui/src/gatewayTypes.ts index e64d113c22..ce056040c2 100644 --- a/ui-tui/src/gatewayTypes.ts +++ b/ui-tui/src/gatewayTypes.ts @@ -178,10 +178,6 @@ export interface BackgroundStartResponse { task_id?: string } -export interface BtwStartResponse { - ok?: boolean -} - export interface ClarifyRespondResponse { ok?: boolean } @@ -403,7 +399,6 @@ export type GatewayEvent = | { payload: { request_id: string }; session_id?: string; type: 'sudo.request' } | { payload: { env_var: string; prompt: string; request_id: string }; session_id?: string; type: 'secret.request' } | { payload: { task_id: string; text: string }; session_id?: string; type: 'background.complete' } - | { payload: { text: string }; session_id?: string; type: 'btw.complete' } | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.spawn_requested' } | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.start' } | { payload: SubagentEventPayload; session_id?: string; type: 'subagent.thinking' } diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts index 344833ba18..497bf54b73 100644 --- a/ui-tui/src/types/hermes-ink.d.ts +++ b/ui-tui/src/types/hermes-ink.d.ts @@ -84,8 +84,8 @@ declare module '@hermes/ink' { export function withInkSuspended(run: RunExternalProcess): Promise export function useInput(handler: InputHandler, options?: { readonly isActive?: boolean }): void export function useSelection(): { - readonly copySelection: () => string - readonly copySelectionNoClear: () => string + readonly copySelection: () => Promise + readonly copySelectionNoClear: () => Promise readonly clearSelection: () => void readonly hasSelection: () => boolean readonly getState: () => unknown diff --git a/web/src/lib/gatewayClient.ts b/web/src/lib/gatewayClient.ts index 012482b710..fa58841ce1 100644 --- a/web/src/lib/gatewayClient.ts +++ b/web/src/lib/gatewayClient.ts @@ -32,7 +32,6 @@ export type GatewayEventName = | "sudo.request" | "secret.request" | "background.complete" - | "btw.complete" | "error" | "skin.changed" | (string & {}); diff --git a/web/src/pages/ChatPage.tsx b/web/src/pages/ChatPage.tsx index 80398104a1..525739b192 100644 --- a/web/src/pages/ChatPage.tsx +++ b/web/src/pages/ChatPage.tsx @@ -269,17 +269,17 @@ export default function ChatPage() { const payload = data.slice(semi + 1); if (payload === "?" || payload === "") return false; // read/clear — ignore try { - // atob returns a binary string (one byte per char); we need UTF-8 - // decode so multi-byte codepoints (≥, →, emoji, CJK) round-trip - // correctly. Without this step, the three UTF-8 bytes of `≥` - // would land in the clipboard as the three separate Latin-1 - // characters `≥`. const binary = atob(payload); const bytes = Uint8Array.from(binary, (c) => c.charCodeAt(0)); const text = new TextDecoder("utf-8").decode(bytes); - navigator.clipboard.writeText(text).catch(() => {}); - } catch { - // Malformed base64 — silently drop. + navigator.clipboard.writeText(text).catch((err) => { + // Most common reason: the Clipboard API requires a user gesture. + // This can fail when the OSC 52 response arrives outside the + // original keydown event's activation. Log to aid debugging. + console.warn("[dashboard clipboard] OSC 52 write failed:", err.message); + }); + } catch (e) { + console.warn("[dashboard clipboard] malformed OSC 52 payload"); } return true; }); @@ -290,16 +290,31 @@ export default function ChatPage() { term.attachCustomKeyEventHandler((ev) => { if (ev.type !== "keydown") return true; + // Copy: Cmd+C on macOS, Ctrl+Shift+C on other platforms. Bare Ctrl+C + // is reserved for SIGINT to the TUI child — matches xterm / gnome-terminal / + // konsole / Windows Terminal. Ctrl+Shift+C only copies if a selection exists; + // without a selection it passes through to the TUI so agents can still + // react to the keypress. + // Paste: Cmd+Shift+V on macOS, Ctrl+Shift+V on others. const copyModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey; const pasteModifier = isMac ? ev.metaKey : ev.ctrlKey && ev.shiftKey; if (copyModifier && ev.key.toLowerCase() === "c") { const sel = term.getSelection(); if (sel) { - navigator.clipboard.writeText(sel).catch(() => {}); + // Direct writeText inside the keydown handler preserves the user + // gesture — async round-trips through OSC 52 can lose activation + // and fail with "Document is not focused". + navigator.clipboard.writeText(sel).catch((err) => { + console.warn("[dashboard clipboard] direct copy failed:", err.message); + }); + // Clear xterm.js's highlight after copy (matches gnome-terminal). + term.clearSelection(); ev.preventDefault(); return false; } + // No selection → fall through so the TUI receives Ctrl+Shift+C + // (or the bare ev if the user used a different modifier). } if (pasteModifier && ev.key.toLowerCase() === "v") { @@ -308,7 +323,9 @@ export default function ChatPage() { .then((text) => { if (text) term.paste(text); }) - .catch(() => {}); + .catch((err) => { + console.warn("[dashboard clipboard] paste failed:", err.message); + }); ev.preventDefault(); return false; } diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 947994844b..9a804859eb 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -41,6 +41,7 @@ hermes [global-options] [subcommand/options] | `hermes gateway` | Run or manage the messaging gateway service. | | `hermes setup` | Interactive setup wizard for all or part of the configuration. | | `hermes whatsapp` | Configure and pair the WhatsApp bridge. | +| `hermes slack` | Slack helpers (currently: generate the app manifest with every command as a native slash). | | `hermes auth` | Manage credentials — add, list, remove, reset, set strategy. Handles OAuth flows for Codex/Nous/Anthropic. | | `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. | | `hermes status` | Show agent, auth, and platform status. | @@ -221,6 +222,33 @@ hermes whatsapp Runs the WhatsApp pairing/setup flow, including mode selection and QR-code pairing. +## `hermes slack` + +```bash +hermes slack manifest # print manifest to stdout +hermes slack manifest --write # write to ~/.hermes/slack-manifest.json +hermes slack manifest --slashes-only # just the features.slash_commands array +``` + +Generates a Slack app manifest that registers every gateway command in +`COMMAND_REGISTRY` (`/btw`, `/stop`, `/model`, …) as a first-class +Slack slash command — matching Discord and Telegram parity. Paste the +output into your Slack app config at +[https://api.slack.com/apps](https://api.slack.com/apps) → your app → +**Features → App Manifest → Edit**, then **Save**. Slack prompts for +reinstall if scopes or slash commands changed. + +| Flag | Default | Purpose | +|------|---------|---------| +| `--write [PATH]` | stdout | Write to a file instead of stdout. Bare `--write` writes `$HERMES_HOME/slack-manifest.json`. | +| `--name NAME` | `Hermes` | Bot display name in Slack. | +| `--description DESC` | default blurb | Bot description shown in the Slack app directory. | +| `--slashes-only` | off | Emit only `features.slash_commands` for merging into a manually-maintained manifest. | + +Run `hermes slack manifest --write` again after `hermes update` to pick +up any new commands. + + ## `hermes login` / `hermes logout` *(Deprecated)* :::caution diff --git a/website/docs/reference/model-catalog.md b/website/docs/reference/model-catalog.md new file mode 100644 index 0000000000..3393ffeebf --- /dev/null +++ b/website/docs/reference/model-catalog.md @@ -0,0 +1,103 @@ +--- +sidebar_position: 11 +title: Model Catalog +description: Remotely-hosted manifest driving curated model picker lists for OpenRouter and Nous Portal. +--- + +# Model Catalog + +Hermes fetches curated model lists for **OpenRouter** and **Nous Portal** from a JSON manifest hosted alongside the docs site. This lets maintainers update picker lists without shipping a new `hermes-agent` release. + +When the manifest is unreachable (offline, network blocked, hosting failure), Hermes silently falls back to the in-repo snapshot that ships with the CLI. The manifest never breaks the picker — worst case you see whatever list was bundled with your installed version. + +## Live manifest URL + +``` +https://hermes-agent.nousresearch.com/docs/api/model-catalog.json +``` + +Published on every merge to `main` via the existing `deploy-site.yml` GitHub Pages pipeline. The source of truth lives in the repo at `website/static/api/model-catalog.json`. + +## Schema + +```json +{ + "version": 1, + "updated_at": "2026-04-25T22:00:00Z", + "metadata": {}, + "providers": { + "openrouter": { + "metadata": {}, + "models": [ + {"id": "moonshotai/kimi-k2.6", "description": "recommended", "metadata": {}}, + {"id": "openai/gpt-5.4", "description": ""} + ] + }, + "nous": { + "metadata": {}, + "models": [ + {"id": "anthropic/claude-opus-4.7"}, + {"id": "moonshotai/kimi-k2.6"} + ] + } + } +} +``` + +Field notes: + +- **`version`** — integer schema version. Future schemas bump this; Hermes refuses manifests with versions it doesn't understand and falls back to the hardcoded snapshot. +- **`metadata`** — free-form dict at the manifest, provider, and model level. Any keys. Hermes ignores unknown fields, so you can annotate entries (`"tier": "paid"`, `"tags": [...]`, etc.) without coordinating a schema change. +- **`description`** — OpenRouter-only. Drives picker badge text (`"recommended"`, `"free"`, or empty). Nous Portal doesn't use this — free-tier gating is determined live from the Portal's pricing endpoint. +- **Pricing and context length** are NOT in the manifest. Those come from live provider APIs (`/v1/models` endpoints, models.dev) at fetch time. + +## Fetch behavior + +| When | What happens | +|---|---| +| `/model` or `hermes model` | Fetches if disk cache is stale, else uses cache | +| Disk cache fresh (< TTL) | No network hit | +| Network failure with cache | Silent fallback to cache, one log line | +| Network failure, no cache | Silent fallback to in-repo snapshot | +| Manifest fails schema validation | Treated as unreachable | + +Cache location: `~/.hermes/cache/model_catalog.json`. + +## Config + +```yaml +model_catalog: + enabled: true + url: https://hermes-agent.nousresearch.com/docs/api/model-catalog.json + ttl_hours: 24 + providers: {} +``` + +Set `enabled: false` to disable remote fetch entirely and always use the in-repo snapshot. + +### Per-provider override URLs + +Third parties can self-host their own curation list using the same schema. Point a provider at a custom URL: + +```yaml +model_catalog: + providers: + openrouter: + url: https://example.com/my-openrouter-curation.json +``` + +The overriding manifest only needs to populate the provider block(s) it cares about. Other providers continue to resolve against the master URL. + +## Updating the manifest + +Maintainers: + +```bash +# Re-generate from the in-repo hardcoded lists (keeps manifest in sync after +# editing OPENROUTER_MODELS or _PROVIDER_MODELS["nous"] in hermes_cli/models.py). +python scripts/build_model_catalog.py +``` + +Then PR the resulting change to `website/static/api/model-catalog.json` to `main`. The docs site auto-deploys on merge and the new manifest is live within a few minutes. + +You can also hand-edit the JSON directly for fine-grained metadata changes that don't belong in the in-repo snapshot — the generator script is a convenience, not the single source of truth. diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 6e04bcd010..ed2a2ff2fc 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -36,8 +36,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/resume [name]` | Resume a previously-named session | | `/status` | Show session info | | `/agents` (alias: `/tasks`) | Show active agents and running tasks across the current session. | -| `/background ` (alias: `/bg`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). | -| `/btw ` | Ephemeral side question using session context (no tools, not persisted). Useful for quick clarifications without affecting the conversation history. | +| `/background ` (alias: `/bg`, `/btw`) | Run a prompt in a separate background session. The agent processes your prompt independently — your current session stays free for other work. Results appear as a panel when the task finishes. See [CLI Background Sessions](/docs/user-guide/cli#background-sessions). | | `/branch [name]` (alias: `/fork`) | Branch the current session (explore a different path) | ### Configuration diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index 90b571aa8b..0ba7245958 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -242,6 +242,10 @@ You can also change it inside the CLI: /busy status ``` +:::tip First-touch hint +The very first time you press Enter while Hermes is working, Hermes prints a one-line reminder explaining the `/busy` knob (`"(tip) Your message interrupted the current run…"`). It only fires once per install — a flag in `config.yaml` under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again. +::: + ### Suspending to Background On Unix systems, press **`Ctrl+Z`** to suspend Hermes to the background — just like any terminal process. The shell prints a confirmation: diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 1da5963b7d..ac48e9f884 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -146,7 +146,9 @@ terminal: **Requirements:** Docker Desktop or Docker Engine installed and running. Hermes probes `$PATH` plus common macOS install locations (`/usr/local/bin/docker`, `/opt/homebrew/bin/docker`, Docker Desktop app bundle). -**Container lifecycle:** Each session starts a long-lived container (`docker run -d ... sleep 2h`). Commands run via `docker exec` with a login shell. On cleanup, the container is stopped and removed. +**Container lifecycle:** Hermes reuses a single long-lived container (`docker run -d ... sleep 2h`) for every terminal and file-tool call made by the top-level agent, across sessions, `/new`, and `/reset`, for the lifetime of the Hermes process. Commands run via `docker exec` with a login shell, so working-directory changes, installed packages, and files in `/workspace` all persist from one tool call to the next. The container is stopped and removed on Hermes shutdown (or when the idle-sweep reclaims it). + +Subagents (`delegate_task`) and RL rollouts get their own isolated containers keyed by `task_id` — only the top-level agent shares the `default` container. **Security hardening:** - `--cap-drop ALL` with only `DAC_OVERRIDE`, `CHOWN`, `FOWNER` added back diff --git a/website/docs/user-guide/features/browser.md b/website/docs/user-guide/features/browser.md index ca51b633ef..3bc1b0bb72 100644 --- a/website/docs/user-guide/features/browser.md +++ b/website/docs/user-guide/features/browser.md @@ -86,6 +86,40 @@ FIRECRAWL_API_URL=http://localhost:3002 FIRECRAWL_BROWSER_TTL=600 ``` +### Hybrid routing: cloud for public URLs, local for LAN/localhost + +When a cloud provider is configured, Hermes auto-spawns a **local Chromium sidecar** +for URLs that resolve to a private/loopback/LAN address (`localhost`, `127.0.0.1`, +`192.168.x.x`, `10.x.x.x`, `172.16-31.x.x`, `*.local`, `*.lan`, `*.internal`, +IPv6 loopback `::1`, link-local `169.254.x.x`). Public URLs continue to use the +cloud provider in the same conversation. + +This solves the common "I'm developing locally but using Browserbase" workflow — +the agent can screenshot your dashboard at `http://localhost:3000` AND scrape +`https://github.com` without you switching providers or disabling the SSRF guard. +The cloud provider never sees the private URL. + +The feature is **on by default**. To disable it (all URLs go to the configured +cloud provider, as before): + +```yaml +# ~/.hermes/config.yaml +browser: + cloud_provider: browserbase + auto_local_for_private_urls: false +``` + +With auto-routing disabled, private URLs are rejected with +`"Blocked: URL targets a private or internal address"` unless you also set +`browser.allow_private_urls: true` (which lets the cloud provider attempt them — +usually won't work since Browserbase etc. can't reach your LAN). + +Requirements: the local sidecar uses the same `agent-browser` CLI as pure local +mode, so you need it installed (`hermes setup tools → Browser Automation` +auto-installs it). Post-navigation redirects from a public URL onto a private +address are still blocked (you can't use a redirect-to-internal trick to reach +your LAN through the public path). + ### Camofox local mode [Camofox](https://github.com/jo-inc/camofox-browser) is a self-hosted Node.js server wrapping Camoufox (a Firefox fork with C++ fingerprint spoofing). It provides local anti-detection browsing without cloud dependencies. diff --git a/website/docs/user-guide/messaging/index.md b/website/docs/user-guide/messaging/index.md index dcde46a6b5..2e6fa4f212 100644 --- a/website/docs/user-guide/messaging/index.md +++ b/website/docs/user-guide/messaging/index.md @@ -219,6 +219,17 @@ Send any message while the agent is working to interrupt it. Key behaviors: - **Multiple messages are combined** — messages sent during interruption are joined into one prompt - **`/stop` command** — interrupts without queuing a follow-up message +### Queue vs interrupt (busy-input mode) + +By default, messaging a busy agent interrupts it. To switch the whole install so follow-ups queue behind the current task instead, set: + +```yaml +display: + busy_input_mode: queue # default: interrupt +``` + +The first time you message a busy agent on any platform, Hermes appends a one-line reminder to the busy-ack explaining the knob (`"💡 First-time tip — …"`). The reminder fires once per install — a flag under `onboarding.seen.busy_input_prompt` latches it. Delete that key to see the tip again. + ## Tool Progress Notifications Control how much tool activity is displayed in `~/.hermes/config.yaml`: diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md index a7eff683da..2f598fcfe9 100644 --- a/website/docs/user-guide/messaging/slack.md +++ b/website/docs/user-guide/messaging/slack.md @@ -29,13 +29,36 @@ the steps below. ## Step 1: Create a Slack App +The fastest path is to paste a manifest Hermes generates for you. It +declares every built-in slash command (`/btw`, `/stop`, `/model`, …), +every required OAuth scope, every event subscription, and enables Socket +Mode — all at once. + +### Option A: From a Hermes-generated manifest (recommended) + +1. Generate the manifest: + ```bash + hermes slack manifest --write + ``` + This writes `~/.hermes/slack-manifest.json` and prints paste-in + instructions. +2. Go to [https://api.slack.com/apps](https://api.slack.com/apps) → + **Create New App** → **From an app manifest** +3. Pick your workspace, paste the JSON contents, review, click **Next** + → **Create** +4. Skip ahead to **Step 6: Install App to Workspace**. The manifest + handled scopes, events, and slash commands for you. + +### Option B: From scratch (manual) + 1. Go to [https://api.slack.com/apps](https://api.slack.com/apps) 2. Click **Create New App** 3. Choose **From scratch** 4. Enter an app name (e.g., "Hermes Agent") and select your workspace 5. Click **Create App** -You'll land on the app's **Basic Information** page. +You'll land on the app's **Basic Information** page. Continue with +Steps 2–6 below. --- @@ -203,6 +226,57 @@ The bot will **not** automatically join channels. You must invite it to each cha --- +## Slash Commands + +Every Hermes command (`/btw`, `/stop`, `/new`, `/model`, `/help`, ...) +is a native Slack slash command — exactly the way they work on Telegram +and Discord. Type `/` in Slack and the autocomplete picker lists every +Hermes command with its description. + +Under the hood: Hermes ships with a generated Slack app manifest (see +Step 1, Option A) that declares every command in +[`COMMAND_REGISTRY`](https://github.com/NousResearch/hermes-agent/blob/main/hermes_cli/commands.py) +as a slash command. In Socket Mode, Slack routes the command event +through the WebSocket regardless of the manifest's `url` field. + +### Refreshing slash commands after updates + +When Hermes adds new commands (e.g. after `hermes update`), regenerate +the manifest and update your Slack app: + +```bash +hermes slack manifest --write +``` + +Then in Slack: +1. Open [https://api.slack.com/apps](https://api.slack.com/apps) → + your Hermes app +2. **Features → App Manifest → Edit** +3. Paste the new contents of `~/.hermes/slack-manifest.json` +4. **Save**. Slack will prompt to reinstall the app if scopes or slash + commands changed. + +### Legacy `/hermes ` still works + +For backward compatibility with older manifests, you can still type +`/hermes btw run the tests` — Hermes routes it the same way as `/btw +run the tests`. Free-form questions also work: `/hermes what's the +weather?` is treated as a regular message. + +### Advanced: emit only the slash-commands array + +If you maintain your Slack manifest by hand and just want the slash +command list: + +```bash +hermes slack manifest --slashes-only > /tmp/slashes.json +``` + +Paste that array into the `features.slash_commands` key of your +existing manifest. + +--- + ## How the Bot Responds Understanding how Hermes behaves in different contexts: diff --git a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md index efd6326259..10a91f2aae 100644 --- a/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md +++ b/website/docs/user-guide/skills/bundled/autonomous-ai-agents/autonomous-ai-agents-hermes-agent.md @@ -298,7 +298,6 @@ Type these during an interactive chat session. ### Utility ``` /branch (/fork) Branch the current session -/btw Ephemeral side question (doesn't interrupt main task) /fast Toggle priority/fast processing /browser Open CDP browser connection /history Show conversation history (CLI) diff --git a/website/scripts/extract-skills.py b/website/scripts/extract-skills.py index 30cf523161..79413aec0f 100644 --- a/website/scripts/extract-skills.py +++ b/website/scripts/extract-skills.py @@ -26,7 +26,6 @@ CATEGORY_LABELS = { "dogfood": "Dogfood", "domain": "Domain", "email": "Email", - "feeds": "Feeds", "gaming": "Gaming", "gifs": "GIFs", "github": "GitHub", diff --git a/website/sidebars.ts b/website/sidebars.ts index b3663e9da5..b654291810 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -613,6 +613,7 @@ const sidebars: SidebarsConfig = { 'reference/tools-reference', 'reference/toolsets-reference', 'reference/mcp-config-reference', + 'reference/model-catalog', 'reference/skills-catalog', 'reference/optional-skills-catalog', 'reference/faq', diff --git a/website/static/api/model-catalog.json b/website/static/api/model-catalog.json new file mode 100644 index 0000000000..a2ef50a1e1 --- /dev/null +++ b/website/static/api/model-catalog.json @@ -0,0 +1,259 @@ +{ + "version": 1, + "updated_at": "2026-04-26T12:34:42Z", + "metadata": { + "source": "hermes-agent repo", + "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog" + }, + "providers": { + "openrouter": { + "metadata": { + "display_name": "OpenRouter", + "note": "Descriptions drive picker badges. Live /api/v1/models filters curated ids by tool-calling support and free pricing." + }, + "models": [ + { + "id": "moonshotai/kimi-k2.6", + "description": "recommended" + }, + { + "id": "deepseek/deepseek-v4-pro", + "description": "" + }, + { + "id": "deepseek/deepseek-v4-flash", + "description": "" + }, + { + "id": "anthropic/claude-opus-4.7", + "description": "" + }, + { + "id": "anthropic/claude-opus-4.6", + "description": "" + }, + { + "id": "anthropic/claude-sonnet-4.6", + "description": "" + }, + { + "id": "qwen/qwen3.6-plus", + "description": "" + }, + { + "id": "anthropic/claude-sonnet-4.5", + "description": "" + }, + { + "id": "anthropic/claude-haiku-4.5", + "description": "" + }, + { + "id": "openrouter/elephant-alpha", + "description": "free" + }, + { + "id": "openai/gpt-5.5", + "description": "" + }, + { + "id": "openai/gpt-5.4-mini", + "description": "" + }, + { + "id": "xiaomi/mimo-v2.5-pro", + "description": "" + }, + { + "id": "xiaomi/mimo-v2.5", + "description": "" + }, + { + "id": "openai/gpt-5.3-codex", + "description": "" + }, + { + "id": "google/gemini-3-pro-image-preview", + "description": "" + }, + { + "id": "google/gemini-3-flash-preview", + "description": "" + }, + { + "id": "google/gemini-3.1-pro-preview", + "description": "" + }, + { + "id": "google/gemini-3.1-flash-lite-preview", + "description": "" + }, + { + "id": "qwen/qwen3.5-plus-02-15", + "description": "" + }, + { + "id": "qwen/qwen3.5-35b-a3b", + "description": "" + }, + { + "id": "stepfun/step-3.5-flash", + "description": "" + }, + { + "id": "minimax/minimax-m2.7", + "description": "" + }, + { + "id": "minimax/minimax-m2.5", + "description": "" + }, + { + "id": "minimax/minimax-m2.5:free", + "description": "free" + }, + { + "id": "z-ai/glm-5.1", + "description": "" + }, + { + "id": "z-ai/glm-5v-turbo", + "description": "" + }, + { + "id": "z-ai/glm-5-turbo", + "description": "" + }, + { + "id": "x-ai/grok-4.20", + "description": "" + }, + { + "id": "nvidia/nemotron-3-super-120b-a12b", + "description": "" + }, + { + "id": "nvidia/nemotron-3-super-120b-a12b:free", + "description": "free" + }, + { + "id": "arcee-ai/trinity-large-preview:free", + "description": "free" + }, + { + "id": "arcee-ai/trinity-large-thinking", + "description": "" + }, + { + "id": "openai/gpt-5.5-pro", + "description": "" + }, + { + "id": "openai/gpt-5.4-nano", + "description": "" + } + ] + }, + "nous": { + "metadata": { + "display_name": "Nous Portal", + "note": "Free-tier gating is determined live via Portal pricing (partition_nous_models_by_tier), not this manifest." + }, + "models": [ + { + "id": "moonshotai/kimi-k2.6" + }, + { + "id": "deepseek/deepseek-v4-pro" + }, + { + "id": "deepseek/deepseek-v4-flash" + }, + { + "id": "xiaomi/mimo-v2.5-pro" + }, + { + "id": "xiaomi/mimo-v2.5" + }, + { + "id": "anthropic/claude-opus-4.7" + }, + { + "id": "anthropic/claude-opus-4.6" + }, + { + "id": "anthropic/claude-sonnet-4.6" + }, + { + "id": "anthropic/claude-sonnet-4.5" + }, + { + "id": "anthropic/claude-haiku-4.5" + }, + { + "id": "openai/gpt-5.5" + }, + { + "id": "openai/gpt-5.4-mini" + }, + { + "id": "openai/gpt-5.3-codex" + }, + { + "id": "google/gemini-3-pro-preview" + }, + { + "id": "google/gemini-3-flash-preview" + }, + { + "id": "google/gemini-3.1-pro-preview" + }, + { + "id": "google/gemini-3.1-flash-lite-preview" + }, + { + "id": "qwen/qwen3.5-plus-02-15" + }, + { + "id": "qwen/qwen3.5-35b-a3b" + }, + { + "id": "stepfun/step-3.5-flash" + }, + { + "id": "minimax/minimax-m2.7" + }, + { + "id": "minimax/minimax-m2.5" + }, + { + "id": "minimax/minimax-m2.5:free" + }, + { + "id": "z-ai/glm-5.1" + }, + { + "id": "z-ai/glm-5v-turbo" + }, + { + "id": "z-ai/glm-5-turbo" + }, + { + "id": "x-ai/grok-4.20-beta" + }, + { + "id": "nvidia/nemotron-3-super-120b-a12b" + }, + { + "id": "arcee-ai/trinity-large-thinking" + }, + { + "id": "openai/gpt-5.5-pro" + }, + { + "id": "openai/gpt-5.4-nano" + } + ] + } + } +}