diff --git a/.dockerignore b/.dockerignore index 41999f5ac6..f4a02484eb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -25,3 +25,7 @@ ui-tui/packages/hermes-ink/dist/ # Runtime data (bind-mounted at /opt/data; must not leak into build context) data/ + +# Compose/profile runtime state (bind-mounted; avoid ownership/secret issues) +hermes-config/ +runtime/ diff --git a/AGENTS.md b/AGENTS.md index 57f8a2aaa4..9737ceed5f 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -282,7 +282,16 @@ The dashboard embeds the real `hermes --tui` — **not** a rewrite. See `hermes ## Adding New Tools -Requires changes in **2 files**: +For most custom or local-only tools, do **not** edit Hermes core. Use the plugin +route instead: create `~/.hermes/plugins//plugin.yaml` and +`~/.hermes/plugins//__init__.py`, then register tools with +`ctx.register_tool(...)`. Plugin toolsets are discovered automatically and can be +enabled or disabled without touching `tools/` or `toolsets.py`. + +Use the built-in route below only when the user is explicitly contributing a new +core Hermes tool that should ship in the base system. + +Built-in/core tools require changes in **2 files**: **1. Create `tools/your_tool.py`:** ```python diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 8d8334acd1..bb1b33fcc8 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -76,6 +76,7 @@ _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7") # Models where temperature/top_p/top_k return 400 if set to non-default values. # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it. _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7") +_FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6") # ── Max output token limits per Anthropic model ─────────────────────── # Source: Anthropic docs + Cline model catalog. Anthropic's API requires @@ -105,6 +106,9 @@ _ANTHROPIC_OUTPUT_LIMITS = { "claude-3-haiku": 4_096, # Third-party Anthropic-compatible providers "minimax": 131_072, + # Qwen models via DashScope Anthropic-compatible endpoint + # DashScope enforces max_tokens ∈ [1, 65536] + "qwen3": 65_536, } # For any model not in the table, assume the highest current limit. @@ -216,6 +220,17 @@ def _forbids_sampling_params(model: str) -> bool: return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS) +def _supports_fast_mode(model: str) -> bool: + """Return True for models that support Anthropic Fast Mode (speed=fast). + + Per Anthropic docs, fast mode is currently supported on Opus 4.6 only. + Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7) + returns HTTP 400. This guard prevents silently 400'ing when stale config + or older callers leave fast mode enabled across a model upgrade. + """ + return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS) + + # Beta headers for enhanced features (sent with ALL auth types). # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the # beta headers are still accepted (harmless no-op) but not required. Kept @@ -1222,6 +1237,14 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]: ``keep_nullable_hint=False`` because the Anthropic validator does not recognize the OpenAPI-style ``nullable: true`` extension and strict schema-to-grammar converters may reject unknown keywords. + + Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the + Anthropic API rejects union keywords at the schema root with a generic + HTTP 400. Several upstream and plugin tools ship schemas with one of + these keywords at the top level (commonly for Pydantic discriminated + unions). If we land here with those keywords still present after + nullable-union stripping, drop them and fall back to a plain object + schema so the tool still validates at the Anthropic boundary. """ if not schema: return {"type": "object", "properties": {}} @@ -1231,6 +1254,12 @@ def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]: normalized = strip_nullable_unions(schema, keep_nullable_hint=False) if not isinstance(normalized, dict): return {"type": "object", "properties": {}} + # Strip top-level union keywords that Anthropic's validator rejects. + banned = {"oneOf", "allOf", "anyOf"} + if banned & normalized.keys(): + normalized = {k: v for k, v in normalized.items() if k not in banned} + if "type" not in normalized: + normalized["type"] = "object" if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict): normalized = {**normalized, "properties": {}} return normalized @@ -1915,9 +1944,15 @@ def build_anthropic_kwargs( # ── Fast mode (Opus 4.6 only) ──────────────────────────────────── # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x - # output speed. Only for native Anthropic endpoints — third-party - # providers would reject the unknown beta header and speed parameter. - if fast_mode and not _is_third_party_anthropic_endpoint(base_url): + # output speed. Per Anthropic docs, fast mode is only supported on + # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter. + # Only for native Anthropic endpoints — third-party providers would + # reject the unknown beta header and speed parameter. + if ( + fast_mode + and not _is_third_party_anthropic_endpoint(base_url) + and _supports_fast_mode(model) + ): kwargs.setdefault("extra_body", {})["speed"] = "fast" # Build extra_headers with ALL applicable betas (the per-request # extra_headers override the client-level anthropic-beta header). diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index b86f78f8ec..4c706748a0 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1529,7 +1529,7 @@ def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]: return CodexAuxiliaryClient(real_client, model), model -def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: +def _try_anthropic(explicit_api_key: str = None) -> Tuple[Optional[Any], Optional[str]]: try: from agent.anthropic_adapter import build_anthropic_client, resolve_anthropic_token except ImportError: @@ -1539,10 +1539,10 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: if pool_present: if entry is None: return None, None - token = _pool_runtime_api_key(entry) + token = explicit_api_key or _pool_runtime_api_key(entry) else: entry = None - token = resolve_anthropic_token() + token = explicit_api_key or resolve_anthropic_token() if not token: return None, None @@ -2336,7 +2336,7 @@ def resolve_provider_client( if pconfig.auth_type == "api_key": if provider == "anthropic": - client, default_model = _try_anthropic() + client, default_model = _try_anthropic(explicit_api_key=explicit_api_key) if client is None: logger.warning("resolve_provider_client: anthropic requested but no Anthropic credentials found") return None, None @@ -2648,8 +2648,11 @@ def resolve_vision_provider_client( return resolved_provider, sync_client, final_model if resolved_base_url: + provider_for_base_override = ( + requested if requested and requested not in ("", "auto") else "custom" + ) client, final_model = resolve_provider_client( - "custom", + provider_for_base_override, model=resolved_model, async_mode=async_mode, explicit_base_url=resolved_base_url, @@ -2657,8 +2660,8 @@ def resolve_vision_provider_client( api_mode=resolved_api_mode, ) if client is None: - return "custom", None, None - return "custom", client, final_model + return provider_for_base_override, None, None + return provider_for_base_override, client, final_model if requested == "auto": # Vision auto-detection order: diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 21f07df491..f9111f9600 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -344,6 +344,7 @@ class ContextCompressor(ContextEngine): self._last_aux_model_failure_model = None self._last_compression_savings_pct = 100.0 self._ineffective_compression_count = 0 + self._summary_failure_cooldown_until = 0.0 # transient errors must not block a fresh session def update_model( self, @@ -553,7 +554,16 @@ class ContextCompressor(ContextEngine): break accumulated += msg_tokens boundary = i - prune_boundary = max(boundary, len(result) - min_protect) + # Translate the budget walk into a "protected count", apply the + # floor in count-space (where `max` reads naturally: protect at + # least `min_protect` messages or whatever the budget reserved, + # whichever is more), then convert back to a prune boundary. + # Doing this in index-space with `max` would invert the direction + # (smaller index = MORE protected), so a generous budget would + # silently get truncated back down to `min_protect`. + budget_protect_count = len(result) - boundary + protected_count = max(budget_protect_count, min_protect) + prune_boundary = len(result) - protected_count else: prune_boundary = len(result) - protect_tail_count @@ -569,6 +579,8 @@ class ContextCompressor(ContextEngine): # Skip multimodal content (list of content blocks) if isinstance(content, list): continue + if not isinstance(content, str): + continue if len(content) < 200: continue h = hashlib.md5(content.encode("utf-8", errors="replace")).hexdigest()[:12] @@ -588,6 +600,8 @@ class ContextCompressor(ContextEngine): # Skip multimodal content (list of content blocks) if isinstance(content, list): continue + if not isinstance(content, str): + continue if not content or content == _PRUNED_TOOL_PLACEHOLDER: continue # Skip already-deduplicated or previously-summarized results @@ -903,15 +917,19 @@ The user has requested that this compaction PRIORITISE preserving all informatio or "does not exist" in _err_str or "no available channel" in _err_str ) + _is_timeout = ( + _status in (408, 429, 502, 504) + or "timeout" in _err_str + ) if ( - _is_model_not_found + (_is_model_not_found or _is_timeout) and self.summary_model and self.summary_model != self.model and not getattr(self, "_summary_model_fallen_back", False) ): self._summary_model_fallen_back = True logging.warning( - "Summary model '%s' not available (%s). " + "Summary model '%s' unavailable (%s). " "Falling back to main model '%s' for compression.", self.summary_model, e, self.model, ) diff --git a/agent/curator.py b/agent/curator.py index cce3d8c103..a726e875b6 100644 --- a/agent/curator.py +++ b/agent/curator.py @@ -24,11 +24,12 @@ from __future__ import annotations import json import logging import os +import re import tempfile import threading from datetime import datetime, timedelta, timezone from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Set +from typing import Any, Callable, Dict, List, NamedTuple, Optional, Set from hermes_constants import get_hermes_home from tools import skill_usage @@ -36,6 +37,22 @@ from tools import skill_usage logger = logging.getLogger(__name__) +def _strip_aux_credential(value: Any) -> Optional[str]: + if value is None: + return None + text = str(value).strip() + return text or None + + +class _ReviewRuntimeBinding(NamedTuple): + """Provider/model for the curator review fork plus optional per-slot overrides.""" + + provider: str + model: str + explicit_api_key: Optional[str] + explicit_base_url: Optional[str] + + DEFAULT_INTERVAL_HOURS = 24 * 7 # 7 days DEFAULT_MIN_IDLE_HOURS = 2 DEFAULT_STALE_AFTER_DAYS = 30 @@ -453,6 +470,24 @@ def _reports_root() -> Path: return root +def _needle_in_path_component(needle: str, path: str) -> bool: + """Check if *needle* is a complete filename stem or directory name in *path*. + + Unlike simple substring matching, this avoids false positives where short + skill names are embedded in longer filenames (e.g. "api" matching + "references/api-design.md"). Hyphens and underscores are normalised so + "open-webui-setup" matches "open_webui_setup.md". + """ + norm_needle = needle.replace("-", "_") + for part in path.replace("\\", "/").split("/"): + if not part: + continue + stem = part.rsplit(".", 1)[0] if "." in part else part + if stem.replace("-", "_") == norm_needle: + return True + return False + + def _classify_removed_skills( removed: List[str], added: List[str], @@ -531,15 +566,29 @@ def _classify_removed_skills( continue # Look for the removed skill's name in file_path / content / raw. - haystacks: List[str] = [] + # Matching strategy differs by field type: + # file_path — needle must be a complete path component + # (filename stem or directory name), so "api" does NOT + # falsely match "references/api-design.md". + # content fields — word-boundary regex so "test" does NOT + # falsely match "latest" or "testing". + haystacks: List[tuple[str, str]] = [] for key in ("file_path", "file_content", "content", "new_string", "_raw"): v = args.get(key) if isinstance(v, str): - haystacks.append(v) + haystacks.append((key, v)) hit = False - for hay in haystacks: + for key, hay in haystacks: for needle in needles: - if needle and needle in hay: + if not needle: + continue + if key == "file_path": + matched = _needle_in_path_component(needle, hay) + else: + matched = bool( + re.search(rf'\b{re.escape(needle)}\b', hay) + ) + if matched: hit = True evidence = ( f"skill_manage action={args.get('action', '?')} " @@ -1398,6 +1447,52 @@ def run_curator_review( } +def _resolve_review_runtime(cfg: Dict[str, Any]) -> _ReviewRuntimeBinding: + """Resolve provider/model and per-slot credentials for the curator review fork. + + Same precedence as `_resolve_review_model()`. Non-empty ``api_key`` / + ``base_url`` from the active slot are returned as explicit overrides so + ``resolve_runtime_provider`` does not silently reuse the main chat + credential chain for a routed auxiliary model. + """ + _main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {} + _main_provider = _main.get("provider") or "auto" + _main_model = _main.get("default") or _main.get("model") or "" + + # 1. Canonical aux task slot + _aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {} + _cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {} + _task_provider = (_cur_task.get("provider") or "").strip() or None + _task_model = (_cur_task.get("model") or "").strip() or None + if _task_provider and _task_provider != "auto" and _task_model: + return _ReviewRuntimeBinding( + _task_provider, + _task_model, + _strip_aux_credential(_cur_task.get("api_key")), + _strip_aux_credential(_cur_task.get("base_url")), + ) + + # 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification) + _cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {} + _legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {} + _legacy_provider = _legacy.get("provider") or None + _legacy_model = _legacy.get("model") or None + if _legacy_provider and _legacy_model: + logger.info( + "curator: using deprecated curator.auxiliary.{provider,model} " + "config — please migrate to auxiliary.curator.{provider,model}" + ) + return _ReviewRuntimeBinding( + str(_legacy_provider), + str(_legacy_model), + _strip_aux_credential(_legacy.get("api_key")), + _strip_aux_credential(_legacy.get("base_url")), + ) + + # 3. Fall through to the main chat model + return _ReviewRuntimeBinding(_main_provider, _main_model, None, None) + + def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]: """Pick (provider, model) for the curator review fork. @@ -1413,32 +1508,8 @@ def _resolve_review_model(cfg: Dict[str, Any]) -> tuple[str, str]: 2. Legacy ``curator.auxiliary.{provider,model}`` when both are set 3. Main ``model.{provider,default/model}`` pair """ - _main = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {} - _main_provider = _main.get("provider") or "auto" - _main_model = _main.get("default") or _main.get("model") or "" - - # 1. Canonical aux task slot - _aux = cfg.get("auxiliary", {}) if isinstance(cfg.get("auxiliary"), dict) else {} - _cur_task = _aux.get("curator", {}) if isinstance(_aux.get("curator"), dict) else {} - _task_provider = (_cur_task.get("provider") or "").strip() or None - _task_model = (_cur_task.get("model") or "").strip() or None - if _task_provider and _task_provider != "auto" and _task_model: - return _task_provider, _task_model - - # 2. Legacy curator.auxiliary.{provider,model} (deprecated, pre-unification) - _cur = cfg.get("curator", {}) if isinstance(cfg.get("curator"), dict) else {} - _legacy = _cur.get("auxiliary", {}) if isinstance(_cur.get("auxiliary"), dict) else {} - _legacy_provider = _legacy.get("provider") or None - _legacy_model = _legacy.get("model") or None - if _legacy_provider and _legacy_model: - logger.info( - "curator: using deprecated curator.auxiliary.{provider,model} " - "config — please migrate to auxiliary.curator.{provider,model}" - ) - return _legacy_provider, _legacy_model - - # 3. Fall through to the main chat model - return _main_provider, _main_model + b = _resolve_review_runtime(cfg) + return b.provider, b.model def _run_llm_review(prompt: str) -> Dict[str, Any]: @@ -1477,10 +1548,10 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]: # arguments hits an auto-resolution path that fails for OAuth-only # providers and for pool-backed credentials. # - # `_resolve_review_model()` honors `auxiliary.curator.{provider,model}` + # `_resolve_review_runtime()` honors `auxiliary.curator.{provider,model,...}` # (canonical aux-task slot, wired through `hermes model` → auxiliary # picker and the dashboard Models tab), with a legacy fallback to - # `curator.auxiliary.{provider,model}`. See docs/user-guide/features/curator.md. + # `curator.auxiliary.{provider,model,...}`. See docs/user-guide/features/curator.md. _api_key = None _base_url = None _api_mode = None @@ -1490,9 +1561,13 @@ def _run_llm_review(prompt: str) -> Dict[str, Any]: from hermes_cli.config import load_config from hermes_cli.runtime_provider import resolve_runtime_provider _cfg = load_config() - _provider, _model_name = _resolve_review_model(_cfg) + _binding = _resolve_review_runtime(_cfg) + _provider, _model_name = _binding.provider, _binding.model _rp = resolve_runtime_provider( - requested=_provider, target_model=_model_name + requested=_provider, + target_model=_model_name, + explicit_api_key=_binding.explicit_api_key, + explicit_base_url=_binding.explicit_base_url, ) _api_key = _rp.get("api_key") _base_url = _rp.get("base_url") diff --git a/agent/error_classifier.py b/agent/error_classifier.py index 86e99ec1ac..67feaa4304 100644 --- a/agent/error_classifier.py +++ b/agent/error_classifier.py @@ -520,7 +520,12 @@ def classify_api_error( is_disconnect = any(p in error_msg for p in _SERVER_DISCONNECT_PATTERNS) if is_disconnect and not status_code: - is_large = approx_tokens > context_length * 0.6 or approx_tokens > 120000 or num_messages > 200 + # Absolute token/message-count thresholds are only a proxy for smaller + # context windows. Large-context sessions can have hundreds of + # messages while still being far below their actual token budget. + is_large = approx_tokens > context_length * 0.6 or ( + context_length <= 256000 and (approx_tokens > 120000 or num_messages > 200) + ) if is_large: return _result( FailoverReason.context_overflow, @@ -766,7 +771,12 @@ def _classify_400( if not err_body_msg: err_body_msg = str(body.get("message") or "").strip().lower() is_generic = len(err_body_msg) < 30 or err_body_msg in ("error", "") - is_large = approx_tokens > context_length * 0.4 or approx_tokens > 80000 or num_messages > 80 + # Absolute token/message-count thresholds are only a proxy for smaller + # context windows. Large-context sessions can have many messages while + # still being far below their actual token budget. + is_large = approx_tokens > context_length * 0.4 or ( + context_length <= 256000 and (approx_tokens > 80000 or num_messages > 80) + ) if is_generic and is_large: return result_fn( diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py index 5f64636f2f..2416a6bc89 100644 --- a/agent/gemini_native_adapter.py +++ b/agent/gemini_native_adapter.py @@ -679,7 +679,21 @@ def translate_stream_event(event: Dict[str, Any], model: str, tool_call_indices: finish_reason_raw = str(cand.get("finishReason") or "") if finish_reason_raw: mapped = "tool_calls" if tool_call_indices else _map_gemini_finish_reason(finish_reason_raw) - chunks.append(_make_stream_chunk(model=model, finish_reason=mapped)) + finish_chunk = _make_stream_chunk(model=model, finish_reason=mapped) + # Attach usage from this event's usageMetadata so the streaming + # loop in run_agent.py can record token counts (mirrors the + # non-streaming path in translate_gemini_response). + usage_meta = event.get("usageMetadata") or {} + if usage_meta: + finish_chunk.usage = SimpleNamespace( + prompt_tokens=int(usage_meta.get("promptTokenCount") or 0), + completion_tokens=int(usage_meta.get("candidatesTokenCount") or 0), + total_tokens=int(usage_meta.get("totalTokenCount") or 0), + prompt_tokens_details=SimpleNamespace( + cached_tokens=int(usage_meta.get("cachedContentTokenCount") or 0), + ), + ) + chunks.append(finish_chunk) return chunks diff --git a/agent/google_oauth.py b/agent/google_oauth.py index d6b96da6e5..ede64251e2 100644 --- a/agent/google_oauth.py +++ b/agent/google_oauth.py @@ -489,16 +489,29 @@ def save_credentials(creds: GoogleCredentials) -> Path: """Atomically write creds to disk with 0o600 permissions.""" path = _credentials_path() path.parent.mkdir(parents=True, exist_ok=True) + # Tighten parent dir to 0o700 so siblings can't traverse to the creds file. + # On Windows this is a no-op (POSIX mode bits aren't enforced); ignore failures. + try: + os.chmod(path.parent, 0o700) + except OSError: + pass payload = json.dumps(creds.to_dict(), indent=2, sort_keys=True) + "\n" with _credentials_lock(): tmp_path = path.with_suffix(f".tmp.{os.getpid()}.{secrets.token_hex(4)}") try: - with open(tmp_path, "w", encoding="utf-8") as fh: + # Create with 0o600 atomically to close the TOCTOU window where the + # default umask (often 0o644) would briefly expose tokens to other + # local users between open() and chmod(). + fd = os.open( + str(tmp_path), + os.O_WRONLY | os.O_CREAT | os.O_EXCL, + stat.S_IRUSR | stat.S_IWUSR, + ) + with os.fdopen(fd, "w", encoding="utf-8") as fh: fh.write(payload) fh.flush() os.fsync(fh.fileno()) - os.chmod(tmp_path, stat.S_IRUSR | stat.S_IWUSR) atomic_replace(tmp_path, path) finally: try: diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index a9556e2046..8494a70eef 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -183,8 +183,8 @@ SKILLS_GUIDANCE = ( ) KANBAN_GUIDANCE = ( - "# You are a Kanban worker\n" - "You were spawned by the Hermes Kanban dispatcher to execute ONE task from " + "# Kanban task execution protocol\n" + "You have been assigned ONE task from " "the shared board at `~/.hermes/kanban.db`. Your task id is in " "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. " "The `kanban_*` tools in your schema are your primary coordination surface — " diff --git a/agent/redact.py b/agent/redact.py index 970ad5adfb..afdee65288 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -305,13 +305,18 @@ def _redact_form_body(text: str) -> str: return _redact_query_string(text.strip()) -def redact_sensitive_text(text: str, *, force: bool = False) -> str: +def redact_sensitive_text(text: str, *, force: bool = False, code_file: bool = False) -> str: """Apply all redaction patterns to a block of text. Safe to call on any string -- non-matching text passes through unchanged. Disabled by default — enable via security.redact_secrets: true in config.yaml. Set force=True for safety boundaries that must never return raw secrets regardless of the user's global logging redaction preference. + + Set code_file=True to skip the ENV-assignment and JSON-field regex + patterns when the text is known to be source code (e.g. MAX_TOKENS=*** + constants, "apiKey": "test" fixtures). Prefix patterns, auth headers, + private keys, DB connstrings, JWTs, and URL secrets are still redacted. """ if text is None: return None @@ -325,17 +330,18 @@ def redact_sensitive_text(text: str, *, force: bool = False) -> str: # Known prefixes (sk-, ghp_, etc.) text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text) - # ENV assignments: OPENAI_API_KEY=sk-abc... - def _redact_env(m): - name, quote, value = m.group(1), m.group(2), m.group(3) - return f"{name}={quote}{_mask_token(value)}{quote}" - text = _ENV_ASSIGN_RE.sub(_redact_env, text) + # ENV assignments: OPENAI_API_KEY=*** (skip for code files — false positives) + if not code_file: + def _redact_env(m): + name, quote, value = m.group(1), m.group(2), m.group(3) + return f"{name}={quote}{_mask_token(value)}{quote}" + text = _ENV_ASSIGN_RE.sub(_redact_env, text) - # JSON fields: "apiKey": "value" - def _redact_json(m): - key, value = m.group(1), m.group(2) - return f'{key}: "{_mask_token(value)}"' - text = _JSON_FIELD_RE.sub(_redact_json, text) + # JSON fields: "apiKey": "***" (skip for code files — false positives) + def _redact_json(m): + key, value = m.group(1), m.group(2) + return f'{key}: "{_mask_token(value)}"' + text = _JSON_FIELD_RE.sub(_redact_json, text) # Authorization headers text = _AUTH_HEADER_RE.sub( diff --git a/agent/transports/codex.py b/agent/transports/codex.py index 7d6bed46de..2ebc396fbb 100644 --- a/agent/transports/codex.py +++ b/agent/transports/codex.py @@ -143,7 +143,18 @@ class ResponsesApiTransport(ProviderTransport): kwargs["max_output_tokens"] = max_tokens if is_xai_responses and session_id: - kwargs["extra_headers"] = {"x-grok-conv-id": session_id} + existing_extra_headers = kwargs.get("extra_headers") + merged_extra_headers: Dict[str, str] = {} + if isinstance(existing_extra_headers, dict): + merged_extra_headers.update( + { + str(key): str(value) + for key, value in existing_extra_headers.items() + if key and value is not None + } + ) + merged_extra_headers["x-grok-conv-id"] = session_id + kwargs["extra_headers"] = merged_extra_headers return kwargs diff --git a/apps/dashboard/src/App.tsx b/apps/dashboard/src/App.tsx index 813f48cc5f..7e1ca19f13 100644 --- a/apps/dashboard/src/App.tsx +++ b/apps/dashboard/src/App.tsx @@ -80,6 +80,14 @@ function RootRedirect() { return ; } +function UnknownRouteFallback({ pluginsLoading }: { pluginsLoading: boolean }) { + if (pluginsLoading) { + // Render nothing during the plugin-load window — a spinner here would just flash. + return null; + } + return ; +} + const CHAT_NAV_ITEM: NavItem = { path: "/chat", labelKey: "chat", @@ -582,7 +590,9 @@ export default function App() { ))} } + element={ + + } /> diff --git a/apps/dashboard/src/components/ThemeSwitcher.tsx b/apps/dashboard/src/components/ThemeSwitcher.tsx index 4d50e611ef..462ccaacfc 100644 --- a/apps/dashboard/src/components/ThemeSwitcher.tsx +++ b/apps/dashboard/src/components/ThemeSwitcher.tsx @@ -4,6 +4,7 @@ import { Button } from "@nous-research/ui/ui/components/button"; import { ListItem } from "@nous-research/ui/ui/components/list-item"; import { Typography } from "@/components/NouiTypography"; import { BUILTIN_THEMES, useTheme } from "@/themes"; +import type { DashboardTheme } from "@/themes"; import { useI18n } from "@/i18n"; import { cn } from "@/lib/utils"; @@ -11,8 +12,8 @@ import { cn } from "@/lib/utils"; * Compact theme picker mounted next to the language switcher in the header. * Each dropdown row shows a 3-stop swatch (background / midground / warm * glow) so users can preview the palette before committing. User-defined - * themes from `~/.hermes/dashboard-themes/*.yaml` that aren't in - * `BUILTIN_THEMES` render without swatches and apply the default palette. + * themes from `~/.hermes/dashboard-themes/*.yaml` use their API-provided + * definitions so they show real palette swatches just like built-ins. * * When placed at the bottom of a container (e.g. the sidebar rail), pass * `dropUp` so the menu opens above the trigger instead of clipping below @@ -95,7 +96,7 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) { {availableThemes.map((th) => { const isActive = th.name === themeName; - const preset = BUILTIN_THEMES[th.name]; + const paletteTheme = BUILTIN_THEMES[th.name] ?? th.definition; return ( - {preset ? ( - + {paletteTheme ? ( + ) : ( )} @@ -144,10 +145,8 @@ export function ThemeSwitcher({ dropUp = false }: ThemeSwitcherProps) { ); } -function ThemeSwatch({ theme }: { theme: string }) { - const preset = BUILTIN_THEMES[theme]; - if (!preset) return ; - const { background, midground, warmGlow } = preset.palette; +function ThemeSwatch({ theme }: { theme: DashboardTheme }) { + const { background, midground, warmGlow } = theme.palette; return (
- >(() => + const [availableThemes, setAvailableThemes] = useState(() => Object.values(BUILTIN_THEMES).map((t) => ({ name: t.name, label: t.label, @@ -360,6 +358,7 @@ export function ThemeProvider({ children }: { children: ReactNode }) { name: t.name, label: t.label, description: t.description, + definition: t.definition, })), ); // Index any definitions the server shipped (user themes). @@ -430,8 +429,15 @@ const ThemeContext = createContext({ }); interface ThemeContextValue { - availableThemes: Array<{ description: string; label: string; name: string }>; + availableThemes: ThemeSummary[]; setTheme: (name: string) => void; theme: DashboardTheme; themeName: string; } + +interface ThemeSummary { + description: string; + label: string; + name: string; + definition?: DashboardTheme; +} diff --git a/cli.py b/cli.py index da917ae190..3b9f6af531 100644 --- a/cli.py +++ b/cli.py @@ -459,32 +459,19 @@ def load_cli_config() -> Dict[str, Any]: if "backend" in terminal_config: terminal_config["env_type"] = terminal_config["backend"] - # Handle special cwd values: "." or "auto" means use current working directory. - # Only resolve to the host's CWD for the local backend where the host - # filesystem is directly accessible. For ALL remote/container backends - # (ssh, docker, modal, singularity), the host path doesn't exist on the - # target -- remove the key so terminal_tool.py uses its per-backend default. - # - # GUARD: If TERMINAL_CWD is already set to a real absolute path (by the - # gateway's config bridge earlier in the process), don't clobber it. - # This prevents a lazy import of cli.py during gateway runtime from - # rewriting TERMINAL_CWD to the service's working directory. - # See issue #10817. + # CWD resolution for CLI/TUI. The gateway has its own config bridge in + # gateway/run.py but may lazily import cli.py (triggering this code). + # Local backend: always os.getcwd(). Use `cd /dir && hermes` to control it. + # Non-local with placeholder: pop so terminal_tool uses its per-backend default. + # Non-local with explicit path: keep as-is. _CWD_PLACEHOLDERS = (".", "auto", "cwd") - if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - _existing_cwd = os.environ.get("TERMINAL_CWD", "") - if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): - # Gateway (or earlier startup) already resolved a real path — keep it - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - else: - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - else: - # Remove so TERMINAL_CWD stays unset → tool picks backend default - terminal_config.pop("cwd", None) + effective_backend = terminal_config.get("env_type", "local") + + if effective_backend == "local": + terminal_config["cwd"] = os.getcwd() + defaults["terminal"]["cwd"] = terminal_config["cwd"] + elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + terminal_config.pop("cwd", None) env_mappings = { "env_type": "TERMINAL_ENV", @@ -517,13 +504,18 @@ def load_cli_config() -> Dict[str, Any]: "sudo_password": "SUDO_PASSWORD", } - # Apply config values to env vars so terminal_tool picks them up. - # If the config file explicitly has a [terminal] section, those values are - # authoritative and override any .env settings. When using defaults only - # (no config file or no terminal section), don't overwrite env vars that - # were already set by .env -- the user's .env is the fallback source. + # Bridge config → env vars for terminal_tool. TERMINAL_CWD is force-exported + # UNLESS we're inside a gateway process (detected by _HERMES_GATEWAY marker) + # where it was already set correctly by gateway/run.py's config bridge. + _is_gateway = os.environ.get("_HERMES_GATEWAY") == "1" for config_key, env_var in env_mappings.items(): if config_key in terminal_config: + if env_var == "TERMINAL_CWD": + if _is_gateway: + continue + # CLI: always export (overrides stale .env or inherited values) + os.environ[env_var] = str(terminal_config[config_key]) + continue if _file_has_terminal_config or env_var not in os.environ: val = terminal_config[config_key] if isinstance(val, list): @@ -1234,6 +1226,28 @@ def _strip_markdown_syntax(text: str) -> str: return plain.strip("\n") +_WINDOWS_PATH_WITH_DOT_SEGMENT_RE = re.compile( + r"(?i)(?:\b[a-z]:\\|\\\\)[^\s`]*\\\.[^\s`]*" +) + + +def _preserve_windows_dot_segments_for_markdown(text: str) -> str: + r"""Keep Windows path separators before hidden directories in Markdown. + + CommonMark treats ``\.`` as an escaped literal dot, so Rich Markdown would + render ``D:\repo\.ai`` as ``D:\repo.ai``. Doubling only that separator + inside Windows path-looking tokens preserves the path without changing + ordinary markdown escapes like ``1\. not a list``. + """ + if "\\." not in text: + return text + + def _protect(match: re.Match[str]) -> str: + return re.sub(r"(? "dict | None": or stripped.startswith('"~') or stripped.startswith("'/") or stripped.startswith("'~") + or stripped.startswith('"./') + or stripped.startswith('"../') + or stripped.startswith("'./") + or stripped.startswith("'../") or (len(stripped) >= 4 and stripped[0] in ("'", '"') and stripped[2] == ":" and stripped[3] in ("\\", "/") and stripped[1].isalpha()) ) if not starts_like_path: @@ -4936,7 +4955,7 @@ class HermesCLI: except Exception: pass - def new_session(self, silent=False): + def new_session(self, silent=False, title=None): """Start a fresh session with a new session ID and cleared agent state.""" if self.agent and self.conversation_history: # Trigger memory extraction on the old session before session_id rotates. @@ -4991,6 +5010,28 @@ class HermesCLI: self.agent._session_db_created = True except Exception: pass + if title and self._session_db: + from hermes_state import SessionDB + try: + sanitized = SessionDB.sanitize_title(title) + except ValueError as e: + _cprint(f" Title rejected: {e}") + sanitized = None + title = None + if sanitized: + try: + self._session_db.set_session_title(self.session_id, sanitized) + self._pending_title = None + title = sanitized + except ValueError as e: + _cprint(f" {e} — session started untitled.") + title = None + except Exception: + title = None + elif title is not None: + # sanitize_title returned empty (whitespace-only / unprintable) + _cprint(" Title is empty after cleanup — session started untitled.") + title = None # Notify memory providers that session_id rotated to a fresh # conversation. reset=True signals providers to flush accumulated # per-session state (_session_turns, _turn_counter, _document_id). @@ -5010,7 +5051,10 @@ class HermesCLI: self._notify_session_boundary("on_session_reset") if not silent: - print("(^_^)v New session started!") + if title: + print(f"(^_^)v New session started: {title}") + else: + print("(^_^)v New session started!") def _handle_resume_command(self, cmd_original: str) -> None: """Handle /resume — switch to a previous session mid-conversation.""" @@ -6286,7 +6330,7 @@ class HermesCLI: _cmd_def = _resolve_cmd(_base_word) canonical = _cmd_def.name if _cmd_def else _base_word - if canonical in ("quit", "exit", "q"): + if canonical in ("quit", "exit"): return False elif canonical == "help": self.show_help() @@ -6422,7 +6466,9 @@ class HermesCLI: else: _cprint(" Session database not available.") elif canonical == "new": - self.new_session() + parts = cmd_original.split(maxsplit=1) + title = parts[1].strip() if len(parts) > 1 else None + self.new_session(title=title) elif canonical == "resume": self._handle_resume_command(cmd_original) elif canonical == "model": @@ -8383,6 +8429,17 @@ class HermesCLI: _cprint(f"{_DIM}Voice auto-restart failed: {e}{_RST}") threading.Thread(target=_restart_recording, daemon=True).start() + def _voice_speak_response_async(self, text: str) -> None: + """Schedule TTS and mark it pending before continuous recording can restart.""" + if not self._voice_tts or not text: + return + self._voice_tts_done.clear() + threading.Thread( + target=self._voice_speak_response, + args=(text,), + daemon=True, + ).start() + def _voice_speak_response(self, text: str): """Speak the agent's response aloud using TTS (runs in background thread).""" if not self._voice_tts: @@ -9543,11 +9600,7 @@ class HermesCLI: # Speak response aloud if voice TTS is enabled # Skip batch TTS when streaming TTS already handled it if self._voice_tts and response and not use_streaming_tts: - threading.Thread( - target=self._voice_speak_response, - args=(response,), - daemon=True, - ).start() + self._voice_speak_response_async(response) # Re-queue the interrupt message (and any that arrived while we were diff --git a/cron/jobs.py b/cron/jobs.py index 2f572c6acb..5e493ae3f7 100644 --- a/cron/jobs.py +++ b/cron/jobs.py @@ -797,19 +797,36 @@ def get_due_jobs() -> List[Dict[str, Any]]: next_run = job.get("next_run_at") if not next_run: + schedule = job.get("schedule", {}) + kind = schedule.get("kind") + + # One-shot jobs use a small grace window via the dedicated helper. recovered_next = _recoverable_oneshot_run_at( - job.get("schedule", {}), + schedule, now, last_run_at=job.get("last_run_at"), ) + recovery_kind = "one-shot" if recovered_next else None + + # Recurring jobs reach here only when something — typically a + # direct jobs.json edit that bypassed add_job() — left + # next_run_at unset. Without this branch, such jobs are + # silently skipped forever; recompute next_run_at from the + # schedule so they pick up at their next scheduled tick. + if not recovered_next and kind in ("cron", "interval"): + recovered_next = compute_next_run(schedule, now.isoformat()) + if recovered_next: + recovery_kind = kind + if not recovered_next: continue job["next_run_at"] = recovered_next next_run = recovered_next logger.info( - "Job '%s' had no next_run_at; recovering one-shot run at %s", + "Job '%s' had no next_run_at; recovering %s run at %s", job.get("name", job["id"]), + recovery_kind, recovered_next, ) for rj in raw_jobs: diff --git a/cron/scheduler.py b/cron/scheduler.py index 2cb1547ad3..cee1cb4067 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -417,7 +417,7 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option thread_id = target.get("thread_id") # Diagnostic: log thread_id for topic-aware delivery debugging - origin = job.get("origin") or {} + origin = _resolve_origin(job) or {} origin_thread = origin.get("thread_id") if origin_thread and not thread_id: logger.warning( @@ -706,10 +706,8 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: f"{prompt}" ) else: - prompt = ( - "[Script ran successfully but produced no output.]\n\n" - f"{prompt}" - ) + # Script produced no output — nothing to report, skip AI call. + return None else: prompt = ( "## Script Error\n" @@ -782,6 +780,7 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: return prompt from tools.skills_tool import skill_view + from tools.skill_usage import bump_use parts = [] skipped: list[str] = [] @@ -793,6 +792,12 @@ def _build_job_prompt(job: dict, prerun_script: Optional[tuple] = None) -> str: skipped.append(skill_name) continue + # Bump usage so the curator sees this skill as actively used. + try: + bump_use(skill_name) + except Exception: + logger.debug("Cron job: failed to bump skill usage for '%s'", skill_name, exc_info=True) + content = str(loaded.get("content") or "").strip() if parts: parts.append("") @@ -862,6 +867,9 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: return True, silent_doc, SILENT_MARKER, None prompt = _build_job_prompt(job, prerun_script=prerun_script) + if prompt is None: + logger.info("Job '%s': script produced no output, skipping AI call.", job_name) + return True, "", SILENT_MARKER, None origin = _resolve_origin(job) _cron_session_id = f"cron_{job_id}_{_hermes_now().strftime('%Y%m%d_%H%M%S')}" @@ -997,8 +1005,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: ) from hermes_cli.auth import AuthError try: + # Do not inject HERMES_INFERENCE_PROVIDER here. resolve_runtime_provider() + # already prefers persisted config over stale shell/env overrides when + # no explicit provider is requested. Passing the env var here short- + # circuits that precedence and can resurrect old providers (for + # example DeepSeek) for cron jobs that do not pin provider/model. runtime_kwargs = { - "requested": job.get("provider") or os.getenv("HERMES_INFERENCE_PROVIDER"), + "requested": job.get("provider"), } if job.get("base_url"): runtime_kwargs["explicit_base_url"] = job.get("base_url") diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index 299aab97a2..65386e53dd 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -86,6 +86,41 @@ if [ -d "$INSTALL_DIR/skills" ]; then python3 "$INSTALL_DIR/tools/skills_sync.py" fi +# Optionally start `hermes dashboard` as a side-process. +# +# Toggled by HERMES_DASHBOARD=1 (also accepts "true"/"yes", case-insensitive). +# Host/port/TUI can be overridden via: +# HERMES_DASHBOARD_HOST (default 0.0.0.0 — exposed outside the container) +# HERMES_DASHBOARD_PORT (default 9119, matches `hermes dashboard` default) +# HERMES_DASHBOARD_TUI (already honored by `hermes dashboard` itself) +# +# The dashboard is a long-lived server. We background it *before* the final +# `exec hermes "$@"` so the user's chosen foreground command (chat, gateway, +# sleep infinity, …) remains PID-of-interest for the container runtime. When +# the container stops the whole process tree is torn down, so no explicit +# cleanup is needed. +case "${HERMES_DASHBOARD:-}" in + 1|true|TRUE|True|yes|YES|Yes) + dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}" + dash_port="${HERMES_DASHBOARD_PORT:-9119}" + dash_args=(--host "$dash_host" --port "$dash_port" --no-open) + # Binding to anything other than localhost requires --insecure — the + # dashboard refuses otherwise because it exposes API keys. Inside a + # container this is the expected deployment (host reaches it via + # published port), so opt in automatically. + if [ "$dash_host" != "127.0.0.1" ] && [ "$dash_host" != "localhost" ]; then + dash_args+=(--insecure) + fi + echo "Starting hermes dashboard on ${dash_host}:${dash_port} (background)" + # Prefix dashboard output so it's distinguishable from the main + # process in `docker logs`. stdbuf keeps the pipe line-buffered. + ( + stdbuf -oL -eL hermes dashboard "${dash_args[@]}" 2>&1 \ + | sed -u 's/^/[dashboard] /' + ) & + ;; +esac + # Final exec: two supported invocation patterns. # # docker run -> exec `hermes` with no args (legacy default) diff --git a/gateway/config.py b/gateway/config.py index 6527accec4..fa64b9046d 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -846,11 +846,25 @@ def load_gateway_config() -> GatewayConfig: if yaml_key in allow_mentions_cfg and not os.getenv(env_key): os.environ[env_key] = str(allow_mentions_cfg[yaml_key]).lower() + # Bridge top-level require_mention to Telegram when the telegram: section + # does not already provide one. Users often write "require_mention: true" + # at the top level alongside group_sessions_per_user, expecting it to work + # the same way (#3979). + _tl_require_mention = yaml_cfg.get("require_mention") + if _tl_require_mention is not None: + _tg_section = yaml_cfg.get("telegram") or {} + if "require_mention" not in _tg_section: + _tg_plat = platforms_data.setdefault(Platform.TELEGRAM.value, {}) + _tg_extra = _tg_plat.setdefault("extra", {}) + _tg_extra.setdefault("require_mention", _tl_require_mention) + # Telegram settings → env vars (env vars take precedence) telegram_cfg = yaml_cfg.get("telegram", {}) if isinstance(telegram_cfg, dict): - if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"): - os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower() + # Prefer telegram.require_mention; fall back to the top-level shorthand. + _effective_rm = telegram_cfg.get("require_mention", yaml_cfg.get("require_mention")) + if _effective_rm is not None and not os.getenv("TELEGRAM_REQUIRE_MENTION"): + os.environ["TELEGRAM_REQUIRE_MENTION"] = str(_effective_rm).lower() if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"): os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"]) frc = telegram_cfg.get("free_response_chats") diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index dc60887459..230859023b 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -62,6 +62,14 @@ MAX_NORMALIZED_TEXT_LENGTH = 65_536 # 64 KB cap for normalized content parts MAX_CONTENT_LIST_SIZE = 1_000 # Max items when content is an array +def _coerce_port(value: Any, default: int = DEFAULT_PORT) -> int: + """Parse a listen port without letting malformed env/config values crash startup.""" + try: + return int(value) + except (TypeError, ValueError): + return default + + def _normalize_chat_content( content: Any, *, _max_depth: int = 10, _depth: int = 0, ) -> str: @@ -573,7 +581,10 @@ class APIServerAdapter(BasePlatformAdapter): super().__init__(config, Platform.API_SERVER) extra = config.extra or {} self._host: str = extra.get("host", os.getenv("API_SERVER_HOST", DEFAULT_HOST)) - self._port: int = int(extra.get("port", os.getenv("API_SERVER_PORT", str(DEFAULT_PORT)))) + raw_port = extra.get("port") + if raw_port is None: + raw_port = os.getenv("API_SERVER_PORT", str(DEFAULT_PORT)) + self._port: int = _coerce_port(raw_port, DEFAULT_PORT) self._api_key: str = extra.get("key", os.getenv("API_SERVER_KEY", "")) self._cors_origins: tuple[str, ...] = self._parse_cors_origins( extra.get("cors_origins", os.getenv("API_SERVER_CORS_ORIGINS", "")), @@ -727,10 +738,11 @@ class APIServerAdapter(BasePlatformAdapter): gateway platforms), falling back to the hermes-api-server default. """ from run_agent import AIAgent - from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config + from gateway.run import _resolve_runtime_agent_kwargs, _resolve_gateway_model, _load_gateway_config, GatewayRunner from hermes_cli.tools_config import _get_platform_tools runtime_kwargs = _resolve_runtime_agent_kwargs() + reasoning_config = GatewayRunner._load_reasoning_config() model = _resolve_gateway_model() user_config = _load_gateway_config() @@ -740,7 +752,6 @@ class APIServerAdapter(BasePlatformAdapter): # Load fallback provider chain so the API server platform has the # same fallback behaviour as Telegram/Discord/Slack (fixes #4954). - from gateway.run import GatewayRunner fallback_model = GatewayRunner._load_fallback_model() agent = AIAgent( @@ -759,6 +770,7 @@ class APIServerAdapter(BasePlatformAdapter): tool_complete_callback=tool_complete_callback, session_db=self._ensure_session_db(), fallback_model=fallback_model, + reasoning_config=reasoning_config, ) return agent @@ -2566,21 +2578,39 @@ class APIServerAdapter(BasePlatformAdapter): return r, u result, usage = await asyncio.get_running_loop().run_in_executor(None, _run_sync) - final_response = result.get("final_response", "") if isinstance(result, dict) else "" - q.put_nowait({ - "event": "run.completed", - "run_id": run_id, - "timestamp": time.time(), - "output": final_response, - "usage": usage, - }) - self._set_run_status( - run_id, - "completed", - output=final_response, - usage=usage, - last_event="run.completed", - ) + # Check for structured failure (non-retryable client errors like + # 401/400 return failed=True instead of raising, so the except + # block below never fires — issue #15561). + if isinstance(result, dict) and result.get("failed"): + error_msg = result.get("error") or "agent run failed" + q.put_nowait({ + "event": "run.failed", + "run_id": run_id, + "timestamp": time.time(), + "error": error_msg, + }) + self._set_run_status( + run_id, + "failed", + error=error_msg, + last_event="run.failed", + ) + else: + final_response = result.get("final_response", "") if isinstance(result, dict) else "" + q.put_nowait({ + "event": "run.completed", + "run_id": run_id, + "timestamp": time.time(), + "output": final_response, + "usage": usage, + }) + self._set_run_status( + run_id, + "completed", + output=final_response, + usage=usage, + last_event="run.completed", + ) except asyncio.CancelledError: self._set_run_status( run_id, diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 78e0dd7e25..4d611fdaa5 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -2506,7 +2506,13 @@ class BasePlatformAdapter(ABC): _r = await self._send_with_retry( chat_id=event.source.chat_id, content=_text, - reply_to=event.message_id, + reply_to=( + event.reply_to_message_id + if event.source.platform == Platform.FEISHU + and event.source.thread_id + and event.reply_to_message_id + else event.message_id + ), metadata=thread_meta, ) if _eph_ttl > 0 and _r.success and _r.message_id: @@ -2606,7 +2612,13 @@ class BasePlatformAdapter(ABC): _r = await self._send_with_retry( chat_id=event.source.chat_id, content=_text, - reply_to=event.message_id, + reply_to=( + event.reply_to_message_id + if event.source.platform == Platform.FEISHU + and event.source.thread_id + and event.reply_to_message_id + else event.message_id + ), metadata=_thread_meta, ) if _eph_ttl > 0 and _r.success and _r.message_id: @@ -2810,10 +2822,15 @@ class BasePlatformAdapter(ABC): # Send the text portion if text_content: logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id) + _reply_anchor = ( + event.reply_to_message_id + if event.source.platform == Platform.FEISHU and event.source.thread_id and event.reply_to_message_id + else event.message_id + ) result = await self._send_with_retry( chat_id=event.source.chat_id, content=text_content, - reply_to=event.message_id, + reply_to=_reply_anchor, metadata=_thread_metadata, ) _record_delivery(result) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 243e81d3e8..ecfa38c723 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -720,11 +720,22 @@ class DiscordAdapter(BasePlatformAdapter): return # If humans are mentioned but we're not → not for us # (preserves old DISCORD_IGNORE_NO_MENTION=true behavior) + # EXCEPT in free-response channels where the bot should + # answer regardless of who is mentioned. _ignore_no_mention = os.getenv( "DISCORD_IGNORE_NO_MENTION", "true" ).lower() in ("true", "1", "yes") if _ignore_no_mention and not _self_mentioned and not _other_bots_mentioned: - return + _channel_id = str(message.channel.id) + _parent_id = None + if hasattr(message.channel, "parent_id") and message.channel.parent_id: + _parent_id = str(message.channel.parent_id) + _free_channels = adapter_self._discord_free_response_channels() + _channel_ids = {_channel_id} + if _parent_id: + _channel_ids.add(_parent_id) + if "*" not in _free_channels and not (_channel_ids & _free_channels): + return await self._handle_message(message) @@ -3797,7 +3808,7 @@ class DiscordAdapter(BasePlatformAdapter): if not is_thread and not isinstance(message.channel, discord.DMChannel): no_thread_channels_raw = os.getenv("DISCORD_NO_THREAD_CHANNELS", "") no_thread_channels = {ch.strip() for ch in no_thread_channels_raw.split(",") if ch.strip()} - skip_thread = bool(channel_ids & no_thread_channels) or is_free_channel + skip_thread = bool(channel_ids & no_thread_channels) auto_thread = os.getenv("DISCORD_AUTO_THREAD", "true").lower() in ("true", "1", "yes") is_reply_message = getattr(message, "type", None) == discord.MessageType.reply if auto_thread and not skip_thread and not is_voice_linked_channel and not is_reply_message: diff --git a/gateway/platforms/feishu.py b/gateway/platforms/feishu.py index a6b522c4a2..ac920bab69 100644 --- a/gateway/platforms/feishu.py +++ b/gateway/platforms/feishu.py @@ -2757,9 +2757,11 @@ class FeishuAdapter(BasePlatformAdapter): if hint: text = f"{hint}\n\n{text}" if text else hint + thread_id = getattr(message, "thread_id", None) or getattr(message, "root_id", None) or None reply_to_message_id = ( getattr(message, "parent_id", None) or getattr(message, "upper_message_id", None) + or getattr(message, "root_id", None) or None ) reply_to_text = await self._fetch_message_text(reply_to_message_id) if reply_to_message_id else None @@ -2791,7 +2793,7 @@ class FeishuAdapter(BasePlatformAdapter): chat_type=self._resolve_source_chat_type(chat_info=chat_info, event_chat_type=chat_type), user_id=sender_profile["user_id"], user_name=sender_profile["user_name"], - thread_id=getattr(message, "thread_id", None) or None, + thread_id=thread_id, user_id_alt=sender_profile["user_id_alt"], is_bot=is_bot, ) @@ -4227,6 +4229,15 @@ class FeishuAdapter(BasePlatformAdapter): if active_reply_to and not self._response_succeeded(response): code = getattr(response, "code", None) if code in _FEISHU_REPLY_FALLBACK_CODES: + if (metadata or {}).get("thread_id"): + logger.warning( + "[Feishu] Reply to %s failed in thread %s (code %s — message withdrawn/missing); " + "skipping top-level fallback to avoid creating a new topic", + active_reply_to, + (metadata or {}).get("thread_id"), + code, + ) + return response logger.warning( "[Feishu] Reply to %s failed (code %s — message withdrawn/missing); " "falling back to new message in chat %s", diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py index c6e5d428c6..f8d7aed787 100644 --- a/gateway/platforms/qqbot/adapter.py +++ b/gateway/platforms/qqbot/adapter.py @@ -397,13 +397,24 @@ class QQAdapter(BasePlatformAdapter): await self._session.close() self._session = None - self._session = aiohttp.ClientSession() + # Honor WSL proxy env for QQ WebSocket. Hermes upgrades overwrite this + # local patch, so QQ can regress to direct-connect timeouts after update. + self._session = aiohttp.ClientSession(trust_env=True) + ws_proxy = ( + os.getenv("WSS_PROXY") + or os.getenv("wss_proxy") + or os.getenv("HTTPS_PROXY") + or os.getenv("https_proxy") + or os.getenv("ALL_PROXY") + or os.getenv("all_proxy") + ) self._ws = await self._session.ws_connect( gateway_url, headers={ "User-Agent": build_user_agent(), }, timeout=CONNECT_TIMEOUT_SECONDS, + proxy=ws_proxy, ) logger.info("[%s] WebSocket connected to %s", self._log_tag, gateway_url) diff --git a/gateway/platforms/signal.py b/gateway/platforms/signal.py index 77d3c18cb6..a0053317f7 100644 --- a/gateway/platforms/signal.py +++ b/gateway/platforms/signal.py @@ -192,6 +192,15 @@ class SignalAdapter(BasePlatformAdapter): group_allowed_str = os.getenv("SIGNAL_GROUP_ALLOWED_USERS", "") self.group_allow_from = set(_parse_comma_list(group_allowed_str)) + # DM allowlist — mirrors SIGNAL_ALLOWED_USERS checked by run.py. + # Stored here so the reaction hooks can skip unauthorized senders + # (reactions fire before run.py's auth gate, so without this check + # every inbound DM from any contact gets a 👀 reaction). + # "*" means all users allowed (open mode); empty means no restriction + # recorded at adapter level (run.py still enforces auth separately). + dm_allowed_str = os.getenv("SIGNAL_ALLOWED_USERS", "*") + self.dm_allow_from = set(_parse_comma_list(dm_allowed_str)) + # HTTP client self.client: Optional[httpx.AsyncClient] = None @@ -1430,8 +1439,28 @@ class SignalAdapter(BasePlatformAdapter): return None return (author, ts) + def _reactions_enabled(self, event: "MessageEvent" = None) -> bool: + """Check if message reactions are enabled for this event. + + Two gates: + 1. SIGNAL_REACTIONS env var — set to false/0/no to disable globally. + 2. DM allowlist — if SIGNAL_ALLOWED_USERS is set, only react to + messages from senders in that list. This prevents unauthorized + contacts from seeing the 👀 reaction (which fires before run.py's + auth gate and would otherwise reveal that a bot is listening). + """ + if os.getenv("SIGNAL_REACTIONS", "true").lower() in ("false", "0", "no"): + return False + if event is not None: + sender = getattr(getattr(event, "source", None), "user_id", None) + if sender and "*" not in self.dm_allow_from and sender not in self.dm_allow_from: + return False + return True + async def on_processing_start(self, event: MessageEvent) -> None: """React with 👀 when processing begins.""" + if not self._reactions_enabled(event): + return target = self._extract_reaction_target(event) if target: await self.send_reaction(event.source.chat_id, "👀", *target) @@ -1442,6 +1471,8 @@ class SignalAdapter(BasePlatformAdapter): On CANCELLED we leave the 👀 in place — no terminal outcome means the reaction should keep reflecting "in progress" (matches Telegram). """ + if not self._reactions_enabled(event): + return if outcome == ProcessingOutcome.CANCELLED: return target = self._extract_reaction_target(event) diff --git a/gateway/platforms/sms.py b/gateway/platforms/sms.py index 161949dab3..2cf7db69b7 100644 --- a/gateway/platforms/sms.py +++ b/gateway/platforms/sms.py @@ -10,7 +10,7 @@ Shares credentials with the optional telephony skill — same env vars: Gateway-specific env vars: - SMS_WEBHOOK_PORT (default 8080) - - SMS_WEBHOOK_HOST (default 0.0.0.0) + - SMS_WEBHOOK_HOST (default 127.0.0.1) - SMS_WEBHOOK_URL (public URL for Twilio signature validation — required) - SMS_INSECURE_NO_SIGNATURE (true to disable signature validation — dev only) - SMS_ALLOWED_USERS (comma-separated E.164 phone numbers) @@ -41,7 +41,7 @@ logger = logging.getLogger(__name__) TWILIO_API_BASE = "https://api.twilio.com/2010-04-01/Accounts" MAX_SMS_LENGTH = 1600 # ~10 SMS segments DEFAULT_WEBHOOK_PORT = 8080 -DEFAULT_WEBHOOK_HOST = "0.0.0.0" +DEFAULT_WEBHOOK_HOST = "127.0.0.1" def check_sms_requirements() -> bool: @@ -91,19 +91,23 @@ class SmsAdapter(BasePlatformAdapter): from aiohttp import web if not self._from_number: - logger.error("[sms] TWILIO_PHONE_NUMBER not set — cannot send replies") + msg = "[sms] TWILIO_PHONE_NUMBER not set — cannot send replies" + logger.error(msg) + self._set_fatal_error("sms_missing_phone_number", msg, retryable=False) return False insecure_no_sig = os.getenv("SMS_INSECURE_NO_SIGNATURE", "").lower() == "true" if not self._webhook_url and not insecure_no_sig: - logger.error( + msg = ( "[sms] Refusing to start: SMS_WEBHOOK_URL is required for Twilio " "signature validation. Set it to the public URL configured in your " "Twilio console (e.g. https://example.com/webhooks/twilio). " "For local development without validation, set " - "SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production).", + "SMS_INSECURE_NO_SIGNATURE=true (NOT recommended for production)." ) + logger.error(msg) + self._set_fatal_error("sms_missing_webhook_url", msg, retryable=False) return False if insecure_no_sig and not self._webhook_url: diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 188038a1ad..167d47237e 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -2267,13 +2267,54 @@ class TelegramAdapter(BasePlatformAdapter): ) return SendResult(success=True, message_id=str(msg.message_id)) except Exception as e: - logger.error( - "[%s] Failed to send Telegram local image, falling back to base adapter: %s", - self.name, - e, - exc_info=True, + error_str = str(e) + # Dimension-related errors are the expected case for valid image + # files that Telegram just refuses as photos (screenshots, extreme + # aspect ratios). Log at INFO because the document fallback is + # the correct path. Any other send_photo failure also falls back + # to document (rate limits, corrupt file markers, format edge + # cases), but at WARNING because it's unexpected and worth + # surfacing in logs. + is_dim_error = ( + "Photo_invalid_dimensions" in error_str + or "PHOTO_INVALID_DIMENSIONS" in error_str ) - return await super().send_image_file(chat_id, image_path, caption, reply_to) + if is_dim_error: + logger.info( + "[%s] Image dimensions exceed Telegram photo limits, " + "sending as document: %s", + self.name, + image_path, + ) + else: + logger.warning( + "[%s] Failed to send Telegram local image as photo, " + "trying document fallback: %s", + self.name, + e, + exc_info=True, + ) + # Fallback to sending as document (file) — no dimension limit, + # only 50MB size limit. If even that fails, fall back to the + # base adapter's text-only "Image: /path" rendering. + try: + return await self.send_document( + chat_id=chat_id, + file_path=image_path, + caption=caption, + file_name=os.path.basename(image_path), + reply_to=reply_to, + metadata=metadata, + ) + except Exception as doc_err: + logger.error( + "[%s] Failed to send Telegram local image as document, " + "falling back to base adapter: %s", + self.name, + doc_err, + exc_info=True, + ) + return await super().send_image_file(chat_id, image_path, caption, reply_to) async def send_document( self, diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py index 453b95a717..873284de79 100644 --- a/gateway/platforms/wecom.py +++ b/gateway/platforms/wecom.py @@ -142,6 +142,7 @@ class WeComAdapter(BasePlatformAdapter): """WeCom AI Bot adapter backed by a persistent WebSocket connection.""" MAX_MESSAGE_LENGTH = MAX_MESSAGE_LENGTH + SUPPORTS_MESSAGE_EDITING = False # Threshold for detecting WeCom client-side message splits. # When a chunk is near the 4000-char limit, a continuation is almost certain. _SPLIT_THRESHOLD = 3900 diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index 3fd7174270..482692ee7a 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -1333,6 +1333,15 @@ class WeixinAdapter(BasePlatformAdapter): if message_id and self._dedup.is_duplicate(message_id): return + # Secondary content-fingerprint dedup for text messages + item_list = message.get("item_list") or [] + text = _extract_text(item_list) + if text: + content_key = f"content:{sender_id}:{hashlib.md5(text.encode()).hexdigest()}" + if self._dedup.is_duplicate(content_key): + logger.debug("[%s] Content-dedup: skipping duplicate message from %s", self.name, sender_id) + return + chat_type, effective_chat_id = _guess_chat_type(message, self._account_id) if chat_type == "group": if self._group_policy == "disabled": @@ -1347,8 +1356,6 @@ class WeixinAdapter(BasePlatformAdapter): self._token_store.set(self._account_id, sender_id, context_token) asyncio.create_task(self._maybe_fetch_typing_ticket(sender_id, context_token or None)) - item_list = message.get("item_list") or [] - text = _extract_text(item_list) media_paths: List[str] = [] media_types: List[str] = [] diff --git a/gateway/run.py b/gateway/run.py index d604947e99..6047de3220 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -49,6 +49,29 @@ from hermes_cli.config import cfg_get _AGENT_CACHE_MAX_SIZE = 128 _AGENT_CACHE_IDLE_TTL_SECS = 3600.0 # evict agents idle for >1h _PLATFORM_CONNECT_TIMEOUT_SECS_DEFAULT = 30.0 +_TELEGRAM_COMMAND_MENTION_RE = re.compile(r"(? str: + """Rewrite slash-command mentions to Telegram-valid command names. + + Telegram Bot API command names allow only lowercase letters, digits, and + underscores. Keep other platform renderings unchanged, but normalize + Telegram help text so command mentions remain clickable/valid there. + """ + platform_value = getattr(platform, "value", platform) + if platform_value != "telegram": + return text + + from hermes_cli.commands import _sanitize_telegram_name + + def _replace(match: re.Match[str]) -> str: + sanitized = _sanitize_telegram_name(match.group(1)) + return f"/{sanitized}" if sanitized else match.group(0) + + return _TELEGRAM_COMMAND_MENTION_RE.sub(_replace, text) + + # Only auto-continue interrupted gateway turns while the interruption is fresh. # Stale tool-tail/resume markers can otherwise revive an unrelated old task # after a gateway restart when the user's next message starts new work. @@ -293,6 +316,10 @@ def _restart_notification_pending() -> bool: return (_hermes_home / ".restart_notify.json").exists() +# Mark this process as a gateway so cli.py's module-level load_cli_config() +# knows not to clobber TERMINAL_CWD if lazily imported. +os.environ["_HERMES_GATEWAY"] = "1" + _ensure_ssl_certs() # Add parent directory to path @@ -1161,6 +1188,10 @@ class GatewayRunner: # Per-chat voice reply mode: "off" | "voice_only" | "all" self._voice_mode: Dict[str, str] = self._load_voice_modes() + # Recent voice transcripts per (guild,user) for duplicate suppression. + # Protects against the same utterance being emitted twice by the voice + # capture / STT pipeline, which otherwise produces a second delayed reply. + self._recent_voice_transcripts: Dict[tuple[int, int], List[tuple[float, str]]] = {} # Track background tasks to prevent garbage collection mid-execution self._background_tasks: set = set() @@ -3246,6 +3277,11 @@ class GatewayRunner: Runs in the gateway event loop; all SQLite work is pushed to a thread via ``asyncio.to_thread`` so the loop never blocks on the WAL lock. Failures in one tick don't stop subsequent ticks. + + **Multi-board:** iterates every board discovered on disk per + tick. Subscriptions live inside each board's own DB and cannot + cross boards, so delivery semantics are unchanged — this is + purely a fan-out of the single-DB poll. """ from gateway.config import Platform as _Platform try: @@ -3278,40 +3314,54 @@ class GatewayRunner: while self._running: try: def _collect(): - conn = _kb.connect() + deliveries: list[dict] = [] + # Enumerate every board on disk. Cheap: a few + # directory stat calls per tick. Missing/empty + # boards are silently skipped. try: - _kb.init_db() # idempotent; handles first-run + boards = _kb.list_boards(include_archived=False) except Exception: - pass - try: - subs = _kb.list_notify_subs(conn) - deliveries: list[dict] = [] - for sub in subs: - cursor, events = _kb.unseen_events_for_sub( - conn, - task_id=sub["task_id"], - platform=sub["platform"], - chat_id=sub["chat_id"], - thread_id=sub.get("thread_id") or "", - kinds=TERMINAL_KINDS, - ) - if not events: - continue - task = _kb.get_task(conn, sub["task_id"]) - deliveries.append({ - "sub": sub, - "cursor": cursor, - "events": events, - "task": task, - }) - return deliveries - finally: - conn.close() + boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)] + for board_meta in boards: + slug = board_meta.get("slug") or _kb.DEFAULT_BOARD + try: + conn = _kb.connect(board=slug) + except Exception: + continue + try: + try: + _kb.init_db(board=slug) # idempotent; handles first-run + except Exception: + pass + subs = _kb.list_notify_subs(conn) + for sub in subs: + cursor, events = _kb.unseen_events_for_sub( + conn, + task_id=sub["task_id"], + platform=sub["platform"], + chat_id=sub["chat_id"], + thread_id=sub.get("thread_id") or "", + kinds=TERMINAL_KINDS, + ) + if not events: + continue + task = _kb.get_task(conn, sub["task_id"]) + deliveries.append({ + "sub": sub, + "cursor": cursor, + "events": events, + "task": task, + "board": slug, + }) + finally: + conn.close() + return deliveries deliveries = await asyncio.to_thread(_collect) for d in deliveries: sub = d["sub"] task = d["task"] + board_slug = d.get("board") platform_str = (sub["platform"] or "").lower() try: plat = _Platform(platform_str) @@ -3319,7 +3369,7 @@ class GatewayRunner: # Unknown platform string; skip and advance cursor so # we don't replay forever. await asyncio.to_thread( - self._kanban_advance, sub, d["cursor"], + self._kanban_advance, sub, d["cursor"], board_slug, ) continue adapter = self.adapters.get(plat) @@ -3409,14 +3459,14 @@ class GatewayRunner: "%s on %s after %d consecutive send failures", sub["task_id"], platform_str, fails, ) - await asyncio.to_thread(self._kanban_unsub, sub) + await asyncio.to_thread(self._kanban_unsub, sub, board_slug) sub_fail_counts.pop(sub_key, None) # Don't advance cursor on send failure — retry next tick. break else: # All events delivered; advance cursor + maybe unsub. await asyncio.to_thread( - self._kanban_advance, sub, d["cursor"], + self._kanban_advance, sub, d["cursor"], board_slug, ) # Unsubscribe when the LAST delivered event is a # terminal kind (the task hit a "no further updates" @@ -3428,7 +3478,7 @@ class GatewayRunner: event_terminal = last_kind in TERMINAL_EVENT_KINDS if task_terminal or event_terminal: await asyncio.to_thread( - self._kanban_unsub, sub, + self._kanban_unsub, sub, board_slug, ) except Exception as exc: logger.warning("kanban notifier tick failed: %s", exc) @@ -3438,10 +3488,16 @@ class GatewayRunner: return await asyncio.sleep(1) - def _kanban_advance(self, sub: dict, cursor: int) -> None: - """Sync helper: advance a subscription's cursor. Runs in to_thread.""" + def _kanban_advance( + self, sub: dict, cursor: int, board: Optional[str] = None, + ) -> None: + """Sync helper: advance a subscription's cursor. Runs in to_thread. + + ``board`` scopes the DB connection to the board that owns this + subscription. Unsub cursors in one board can't touch another's. + """ from hermes_cli import kanban_db as _kb - conn = _kb.connect() + conn = _kb.connect(board=board) try: _kb.advance_notify_cursor( conn, @@ -3454,9 +3510,9 @@ class GatewayRunner: finally: conn.close() - def _kanban_unsub(self, sub: dict) -> None: + def _kanban_unsub(self, sub: dict, board: Optional[str] = None) -> None: from hermes_cli import kanban_db as _kb - conn = _kb.connect() + conn = _kb.connect(board=board) try: _kb.remove_notify_sub( conn, @@ -3534,20 +3590,25 @@ class GatewayRunner: bad_ticks = 0 last_warn_at = 0 - def _tick_once() -> "Optional[object]": - """Run one dispatch_once; return result or None on error. + def _tick_once_for_board(slug: str) -> "Optional[object]": + """Run one dispatch_once for a specific board. - Runs in a worker thread via `asyncio.to_thread`.""" + Runs in a worker thread via `asyncio.to_thread`. `board=slug` + is passed through `dispatch_once` so `resolve_workspace` and + `_default_spawn` see the right paths. The per-board DB is + opened explicitly so concurrent boards never share a + connection handle or accidentally claim across each other. + """ conn = None try: - conn = _kb.connect() + conn = _kb.connect(board=slug) try: - _kb.init_db() # idempotent, handles first-run + _kb.init_db(board=slug) # idempotent, handles first-run except Exception: pass - return _kb.dispatch_once(conn) + return _kb.dispatch_once(conn, board=slug) except Exception: - logger.exception("kanban dispatcher: tick failed") + logger.exception("kanban dispatcher: tick failed on board %s", slug) return None finally: if conn is not None: @@ -3556,49 +3617,77 @@ class GatewayRunner: except Exception: pass - def _ready_nonempty() -> bool: - """Cheap probe: is there at least one ready+assigned+unclaimed task?""" - conn = None + def _tick_once() -> "list[tuple[str, Optional[object]]]": + """Run one dispatch_once per board. Returns (slug, result) pairs. + + Enumerating boards on every tick keeps the dispatcher honest + when users create a new board mid-run: no restart required, + the next tick picks it up automatically. + """ try: - conn = _kb.connect() - row = conn.execute( - "SELECT 1 FROM tasks " - "WHERE status = 'ready' AND assignee IS NOT NULL " - " AND claim_lock IS NULL LIMIT 1" - ).fetchone() - return row is not None + boards = _kb.list_boards(include_archived=False) except Exception: - return False - finally: - if conn is not None: - try: - conn.close() - except Exception: - pass + boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)] + out: list[tuple[str, "Optional[object]"]] = [] + for b in boards: + slug = b.get("slug") or _kb.DEFAULT_BOARD + out.append((slug, _tick_once_for_board(slug))) + return out + + def _ready_nonempty() -> bool: + """Cheap probe: is there a ready+assigned+unclaimed task on ANY board?""" + try: + boards = _kb.list_boards(include_archived=False) + except Exception: + boards = [_kb.read_board_metadata(_kb.DEFAULT_BOARD)] + for b in boards: + slug = b.get("slug") or _kb.DEFAULT_BOARD + conn = None + try: + conn = _kb.connect(board=slug) + row = conn.execute( + "SELECT 1 FROM tasks " + "WHERE status = 'ready' AND assignee IS NOT NULL " + " AND claim_lock IS NULL LIMIT 1" + ).fetchone() + if row is not None: + return True + except Exception: + continue + finally: + if conn is not None: + try: + conn.close() + except Exception: + pass + return False logger.info( "kanban dispatcher: embedded in gateway (interval=%.1fs)", interval ) while self._running: try: - res = await asyncio.to_thread(_tick_once) - if res is not None and getattr(res, "spawned", None): - # Quiet by default — only log when something actually - # happened, so an idle gateway stays silent. - logger.info( - "kanban dispatcher: tick spawned=%d reclaimed=%d " - "crashed=%d timed_out=%d promoted=%d auto_blocked=%d", - len(res.spawned), - res.reclaimed, - len(res.crashed) if hasattr(res.crashed, "__len__") else 0, - len(res.timed_out) if hasattr(res.timed_out, "__len__") else 0, - res.promoted, - len(res.auto_blocked) if hasattr(res.auto_blocked, "__len__") else 0, - ) - # Health telemetry + results = await asyncio.to_thread(_tick_once) + any_spawned = False + for slug, res in (results or []): + if res is not None and getattr(res, "spawned", None): + any_spawned = True + # Quiet by default — only log when something actually + # happened, so an idle gateway stays silent. + logger.info( + "kanban dispatcher [%s]: spawned=%d reclaimed=%d " + "crashed=%d timed_out=%d promoted=%d auto_blocked=%d", + slug, + len(res.spawned), + res.reclaimed, + len(res.crashed) if hasattr(res.crashed, "__len__") else 0, + len(res.timed_out) if hasattr(res.timed_out, "__len__") else 0, + res.promoted, + len(res.auto_blocked) if hasattr(res.auto_blocked, "__len__") else 0, + ) + # Health telemetry (aggregate across boards) ready_pending = await asyncio.to_thread(_ready_nonempty) - spawned_any = bool(res and getattr(res, "spawned", None)) - if ready_pending and not spawned_any: + if ready_pending and not any_spawned: bad_ticks += 1 else: bad_ticks = 0 @@ -5107,6 +5196,28 @@ class GatewayRunner: _cmd_def = _resolve_cmd(command) if command else None canonical = _cmd_def.name if _cmd_def else command + # Expand alias quick commands before built-in dispatch so targets like + # /model openai/gpt-5.5 --provider openrouter reach the /model handler. + # Preserve built-in precedence; aliases only need early handling when + # the typed command is not already known. + if command and _cmd_def is None: + if isinstance(self.config, dict): + quick_commands = self.config.get("quick_commands", {}) or {} + else: + quick_commands = getattr(self.config, "quick_commands", {}) or {} + if isinstance(quick_commands, dict) and command in quick_commands: + qcmd = quick_commands[command] + if qcmd.get("type") == "alias": + target = qcmd.get("target", "").strip() + if target: + target = target if target.startswith("/") else f"/{target}" + target_command = target.lstrip("/") + user_args = event.get_command_args().strip() + event.text = f"{target} {user_args}".strip() + command = target_command.split()[0] if target_command else target_command + _cmd_def = _resolve_cmd(command) if command else None + canonical = _cmd_def.name if _cmd_def else command + # Fire the ``command:`` hook for any recognized slash # command — built-in OR plugin-registered. Handlers can return a # dict with ``{"decision": "deny" | "handled" | "rewrite", ...}`` @@ -5320,7 +5431,7 @@ class GatewayRunner: target_command = target.lstrip("/") user_args = event.get_command_args().strip() event.text = f"{target} {user_args}".strip() - command = target_command + command = target_command.split()[0] if target_command else target_command # Fall through to normal command dispatch below else: return f"Quick command '/{command}' has no target defined." @@ -6681,6 +6792,7 @@ class GatewayRunner: base_url = None api_key = None custom_provs = None + data = None try: data = _load_gateway_config() @@ -6703,6 +6815,41 @@ class GatewayRunner: except Exception: pass + # Also check custom_providers for context_length when top-level model.context_length is not set + if config_context_length is None and data: + try: + custom_providers = data.get("custom_providers", []) + if custom_providers: + for cp in custom_providers: + if not isinstance(cp, dict): + continue + cp_model = cp.get("model") or "" + cp_models = cp.get("models") or {} + # Match provider model to current model + if cp_model and cp_model == model: + raw_cp_ctx = cp.get("context_length") + if raw_cp_ctx is not None: + try: + config_context_length = int(raw_cp_ctx) + break + except (TypeError, ValueError): + pass + # Also check per-model context_length + if isinstance(cp_models, dict): + model_entry = cp_models.get(model) + if isinstance(model_entry, dict): + model_ctx = model_entry.get("context_length") + else: + model_ctx = model_entry + if model_ctx is not None and isinstance(model_ctx, (int, float)): + try: + config_context_length = int(model_ctx) + break + except (TypeError, ValueError): + pass + except Exception: + pass + # Resolve runtime credentials for probing try: runtime = _resolve_runtime_agent_kwargs() @@ -6843,6 +6990,29 @@ class GatewayRunner: new_entry = self.session_store.get_or_create_session(source, force_new=True) header = "✨ New session started!" + # Set session title if provided with /new + _title_arg = event.get_command_args().strip() + _title_note = "" + if _title_arg and self._session_db and new_entry: + from hermes_state import SessionDB + try: + sanitized = SessionDB.sanitize_title(_title_arg) + except ValueError as e: + sanitized = None + _title_note = f"\n⚠️ Title rejected: {e}" + if sanitized: + try: + self._session_db.set_session_title(new_entry.session_id, sanitized) + header = f"✨ New session started: {sanitized}" + except ValueError as e: + _title_note = f"\n⚠️ {e} — session started untitled." + except Exception: + pass + elif not _title_note: + # sanitize_title returned empty (whitespace-only / unprintable) + _title_note = "\n⚠️ Title is empty after cleanup — session started untitled." + header = header + _title_note + # Fire plugin on_session_reset hook (new session guaranteed to exist) try: from hermes_cli.plugins import invoke_hook as _invoke_hook @@ -7298,7 +7468,10 @@ class GatewayRunner: lines.append(f"\n... and {len(sorted_cmds) - 10} more. Use `/commands` for the full paginated list.") except Exception: pass - return "\n".join(lines) + return _telegramize_command_mentions( + "\n".join(lines), + getattr(getattr(event, "source", None), "platform", None), + ) async def _handle_commands_command(self, event: MessageEvent) -> str: """Handle /commands [page] - paginated list of all commands and skills.""" @@ -7351,7 +7524,10 @@ class GatewayRunner: lines.extend(["", " | ".join(nav_parts)]) if page != requested_page: lines.append(f"_(Requested page {requested_page} was out of range, showing page {page}.)_") - return "\n".join(lines) + return _telegramize_command_mentions( + "\n".join(lines), + getattr(getattr(event, "source", None), "platform", None), + ) async def _handle_model_command(self, event: MessageEvent) -> Optional[str]: """Handle /model command — switch model for this session. @@ -8261,6 +8437,47 @@ class GatewayRunner: adapter = self.adapters.get(Platform.DISCORD) self._set_adapter_auto_tts_disabled(adapter, chat_id, disabled=True) + def _is_duplicate_voice_transcript(self, guild_id: int, user_id: int, transcript: str) -> bool: + """Suppress repeated STT outputs for the same recent utterance. + + Voice capture can occasionally emit the same utterance twice a few + seconds apart, which creates a second queued agent run and overlapping + spoken replies. Dedup exact and near-exact repeats per guild/user over a + short window while allowing genuinely new turns through. + """ + from difflib import SequenceMatcher + + normalized = re.sub(r"\s+", " ", transcript).strip().lower() + normalized = re.sub(r"[^\w\s]", "", normalized) + if not normalized: + return False + + now = time.monotonic() + window_seconds = 12.0 + key = (guild_id, user_id) + recent_store = getattr(self, "_recent_voice_transcripts", None) + if not isinstance(recent_store, dict): + recent_store = {} + self._recent_voice_transcripts = recent_store + recent = [ + (ts, txt) + for ts, txt in recent_store.get(key, []) + if now - ts <= window_seconds + ] + + for _, prior in recent: + if prior == normalized: + recent_store[key] = recent + return True + if len(prior) >= 16 and len(normalized) >= 16: + if SequenceMatcher(None, prior, normalized).ratio() >= 0.95: + recent_store[key] = recent + return True + + recent.append((now, normalized)) + recent_store[key] = recent[-5:] + return False + async def _handle_voice_channel_input( self, guild_id: int, user_id: int, transcript: str ): @@ -8298,6 +8515,15 @@ class GatewayRunner: logger.debug("Unauthorized voice input from user %d, ignoring", user_id) return + if self._is_duplicate_voice_transcript(guild_id, user_id, transcript): + logger.info( + "Suppressing duplicate voice transcript for guild=%s user=%s: %s", + guild_id, + user_id, + transcript[:100], + ) + return + # Show transcript in text channel (after auth, with mention sanitization) try: channel = adapter._client.get_channel(text_ch_id) @@ -11311,6 +11537,12 @@ class GatewayRunner: if not session_key: return + pending_skills_reload_notes = getattr( + self, "_pending_skills_reload_notes", None + ) + if isinstance(pending_skills_reload_notes, dict): + pending_skills_reload_notes.pop(session_key, None) + pending_approvals = getattr(self, "_pending_approvals", None) if isinstance(pending_approvals, dict): pending_approvals.pop(session_key, None) diff --git a/gateway/session.py b/gateway/session.py index 3129f7a325..16de296e0e 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -1121,7 +1121,7 @@ class SessionStore: self._save() return count - def reset_session(self, session_key: str) -> Optional[SessionEntry]: + def reset_session(self, session_key: str, display_name: Optional[str] = None) -> Optional[SessionEntry]: """Force reset a session, creating a new session ID.""" db_end_session_id = None db_create_kwargs = None @@ -1145,7 +1145,7 @@ class SessionStore: created_at=now, updated_at=now, origin=old_entry.origin, - display_name=old_entry.display_name, + display_name=display_name if display_name is not None else old_entry.display_name, platform=old_entry.platform, chat_type=old_entry.chat_type, is_fresh_reset=True, diff --git a/hermes_cli/__init__.py b/hermes_cli/__init__.py index b3482b1e68..9141ea93e7 100644 --- a/hermes_cli/__init__.py +++ b/hermes_cli/__init__.py @@ -5,11 +5,43 @@ Provides subcommands for: - hermes chat - Interactive chat (same as ./hermes) - hermes gateway - Run gateway in foreground - hermes gateway start - Start gateway service -- hermes gateway stop - Stop gateway service +- hermes gateway stop - Stop gateway service - hermes setup - Interactive setup wizard - hermes status - Show status of all components - hermes cron - Manage cron jobs """ +import os +import sys + __version__ = "0.12.0" __release_date__ = "2026.4.30" + + +def _ensure_utf8(): + """Force UTF-8 stdout/stderr on Windows to prevent UnicodeEncodeError. + + Windows services and terminals default to cp1252, which cannot encode + box-drawing characters used in CLI output. This causes unhandled + UnicodeEncodeError crashes on gateway startup. + """ + if sys.platform != "win32": + return + os.environ.setdefault("PYTHONUTF8", "1") + os.environ.setdefault("PYTHONIOENCODING", "utf-8") + for stream_name in ("stdout", "stderr"): + stream = getattr(sys, stream_name, None) + if stream is None: + continue + try: + if getattr(stream, "encoding", "").lower().replace("-", "") != "utf8": + new_stream = open( + stream.fileno(), "w", encoding="utf-8", + buffering=1, closefd=False, + ) + setattr(sys, stream_name, new_stream) + except (AttributeError, OSError): + pass + + +_ensure_utf8() diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 1d77fffa92..5b63d41eb1 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -2589,6 +2589,208 @@ def _poll_for_token( # Nous Portal — token refresh, agent key minting, model discovery # ============================================================================= +# ----------------------------------------------------------------------------- +# Shared Nous token store — lets OAuth credentials persist across profiles +# so a new `hermes --profile <name> auth add nous --type oauth` can one-tap +# import instead of running the full device-code flow every time. +# +# File lives at ${HERMES_SHARED_AUTH_DIR}/nous_auth.json, defaulting to +# ~/.hermes/shared/nous_auth.json. It is OUTSIDE any named profile's +# HERMES_HOME so named profiles (which typically live under +# ~/.hermes/profiles/<name>/) all see the same file. +# +# Written on successful login and on every runtime refresh so the stored +# refresh_token stays current even if one profile refreshes and rotates it. +# If ever the stored refresh_token does go stale server-side, import fails +# gracefully and the user falls back to the normal device-code flow. +# ----------------------------------------------------------------------------- + +NOUS_SHARED_STORE_FILENAME = "nous_auth.json" + + +def _nous_shared_auth_dir() -> Path: + """Resolve the directory that holds the shared Nous token store. + + Honors ``HERMES_SHARED_AUTH_DIR`` so tests can redirect it to a tmp + path without touching the real user's home. Defaults to + ``~/.hermes/shared/``. + """ + override = os.getenv("HERMES_SHARED_AUTH_DIR", "").strip() + if override: + return Path(override).expanduser() + return Path.home() / ".hermes" / "shared" + + +def _nous_shared_store_path() -> Path: + path = _nous_shared_auth_dir() / NOUS_SHARED_STORE_FILENAME + # Seat belt: if pytest is running and this resolves to a path under the + # real user's home, refuse rather than silently corrupt cross-profile + # state. Tests must set HERMES_SHARED_AUTH_DIR to a tmp_path (conftest + # does not do this automatically — mirror the _auth_file_path() guard + # so forgetting to set it fails loudly instead of writing to the real + # shared store). + if os.environ.get("PYTEST_CURRENT_TEST"): + real_home_shared = ( + Path.home() / ".hermes" / "shared" / NOUS_SHARED_STORE_FILENAME + ).resolve(strict=False) + try: + resolved = path.resolve(strict=False) + except Exception: + resolved = path + if resolved == real_home_shared: + raise RuntimeError( + f"Refusing to touch real user shared Nous auth store during test run: " + f"{path}. Set HERMES_SHARED_AUTH_DIR to a tmp_path in your test fixture." + ) + return path + + +def _write_shared_nous_state(state: Dict[str, Any]) -> None: + """Persist a minimal copy of the Nous OAuth state to the shared store. + + Best-effort: any failure is swallowed after logging. The shared store + is a convenience layer; the per-profile auth.json remains the source + of truth. + + We deliberately omit the short-lived ``agent_key`` (24h TTL, profile- + specific) — only the long-lived OAuth tokens are cross-profile useful. + """ + refresh_token = state.get("refresh_token") + access_token = state.get("access_token") + if not (isinstance(refresh_token, str) and refresh_token.strip()): + # No refresh_token = nothing worth sharing across profiles + return + if not (isinstance(access_token, str) and access_token.strip()): + return + + shared = { + "_schema": 1, + "access_token": access_token, + "refresh_token": refresh_token, + "token_type": state.get("token_type") or "Bearer", + "scope": state.get("scope") or DEFAULT_NOUS_SCOPE, + "client_id": state.get("client_id") or DEFAULT_NOUS_CLIENT_ID, + "portal_base_url": state.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL, + "inference_base_url": state.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL, + "obtained_at": state.get("obtained_at"), + "expires_at": state.get("expires_at"), + "updated_at": datetime.now(timezone.utc).isoformat(), + } + try: + path = _nous_shared_store_path() + path.parent.mkdir(parents=True, exist_ok=True) + tmp = path.with_suffix(path.suffix + ".tmp") + tmp.write_text(json.dumps(shared, indent=2, sort_keys=True)) + try: + os.chmod(tmp, 0o600) + except OSError: + pass + os.replace(tmp, path) + _oauth_trace( + "nous_shared_store_written", + path=str(path), + refresh_token_fp=_token_fingerprint(refresh_token), + ) + except Exception as exc: + logger.debug("Failed to write shared Nous auth store: %s", exc) + + +def _read_shared_nous_state() -> Optional[Dict[str, Any]]: + """Return the shared Nous OAuth state if present and well-formed. + + Returns ``None`` when the file is missing, unreadable, malformed, or + lacks required fields. Callers should treat ``None`` as "no shared + credentials available — fall through to device-code". + """ + try: + path = _nous_shared_store_path() + except RuntimeError: + # Test seat belt tripped — treat as missing + return None + if not path.is_file(): + return None + try: + payload = json.loads(path.read_text()) + except (OSError, ValueError) as exc: + logger.debug("Shared Nous auth store at %s is unreadable: %s", path, exc) + return None + if not isinstance(payload, dict): + return None + refresh_token = payload.get("refresh_token") + access_token = payload.get("access_token") + if not (isinstance(refresh_token, str) and refresh_token.strip()): + return None + if not (isinstance(access_token, str) and access_token.strip()): + return None + return payload + + +def _try_import_shared_nous_state( + *, + timeout_seconds: float = 15.0, + min_key_ttl_seconds: int = 5 * 60, +) -> Optional[Dict[str, Any]]: + """Attempt to rehydrate Nous OAuth state from the shared store. + + Reads the shared file (if present), runs a forced refresh+mint using + the stored refresh_token to produce a fresh access_token + agent_key + scoped to this profile, and returns the full auth_state dict ready + for ``persist_nous_credentials()``. + + Returns ``None`` when no shared state is available or the rehydrate + fails for any reason (expired refresh_token, portal unreachable, + etc.) — caller should then fall through to the normal device-code + flow. + """ + shared = _read_shared_nous_state() + if not shared: + return None + + # Build a full state dict so refresh_nous_oauth_from_state has every + # field it needs. force_refresh=True gets us a fresh access_token + # for this profile; force_mint=True gets us a fresh agent_key. + state: Dict[str, Any] = { + "access_token": shared.get("access_token"), + "refresh_token": shared.get("refresh_token"), + "client_id": shared.get("client_id") or DEFAULT_NOUS_CLIENT_ID, + "portal_base_url": shared.get("portal_base_url") or DEFAULT_NOUS_PORTAL_URL, + "inference_base_url": shared.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL, + "token_type": shared.get("token_type") or "Bearer", + "scope": shared.get("scope") or DEFAULT_NOUS_SCOPE, + "obtained_at": shared.get("obtained_at"), + "expires_at": shared.get("expires_at"), + "agent_key": None, + "agent_key_expires_at": None, + "tls": {"insecure": False, "ca_bundle": None}, + } + + try: + refreshed = refresh_nous_oauth_from_state( + state, + min_key_ttl_seconds=min_key_ttl_seconds, + timeout_seconds=timeout_seconds, + force_refresh=True, + force_mint=True, + ) + except AuthError as exc: + _oauth_trace( + "nous_shared_import_failed", + error_type=type(exc).__name__, + error_code=getattr(exc, "code", None), + ) + logger.debug("Shared Nous import failed: %s", exc) + return None + except Exception as exc: + _oauth_trace( + "nous_shared_import_failed", + error_type=type(exc).__name__, + ) + logger.debug("Shared Nous import failed: %s", exc) + return None + + return refreshed + + def _refresh_access_token( *, client: httpx.Client, @@ -2991,6 +3193,12 @@ def persist_nous_credentials( _save_provider_state(auth_store, "nous", state) _save_auth_store(auth_store) + # Mirror to the shared store so a new profile can one-tap import + # these credentials via `hermes auth add nous --type oauth`. Best- + # effort: any I/O failure is logged and swallowed (the per-profile + # auth.json is still the source of truth). + _write_shared_nous_state(state) + pool = load_pool("nous") return next( (e for e in pool.entries() if e.source == NOUS_DEVICE_CODE_SOURCE), @@ -3059,6 +3267,11 @@ def resolve_nous_runtime_credentials( refresh_token_fp=_token_fingerprint(state.get("refresh_token")), access_token_fp=_token_fingerprint(state.get("access_token")), ) + # Mirror post-refresh state to the shared store so sibling + # profiles don't hold stale refresh_tokens after rotation. + # Best-effort — any failure is logged and swallowed inside + # _write_shared_nous_state. + _write_shared_nous_state(state) verify = _resolve_verify(insecure=insecure, ca_bundle=ca_bundle, auth_state=state) timeout = httpx.Timeout(timeout_seconds if timeout_seconds else 15.0) @@ -4283,7 +4496,8 @@ def _minimax_oauth_login( print(f"Portal: {portal_base_url}") with httpx.Client(timeout=httpx.Timeout(timeout_seconds), - headers={"Accept": "application/json"}) as client: + headers={"Accept": "application/json"}, + follow_redirects=True) as client: code_data = _minimax_request_user_code( client, portal_base_url=portal_base_url, client_id=pconfig.client_id, @@ -4360,7 +4574,8 @@ def _refresh_minimax_oauth_state( return state portal_base_url = state["portal_base_url"] - with httpx.Client(timeout=httpx.Timeout(timeout_seconds)) as client: + with httpx.Client(timeout=httpx.Timeout(timeout_seconds), + follow_redirects=True) as client: response = client.post( f"{portal_base_url}/oauth/token", data={ @@ -4598,17 +4813,47 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: ) try: - auth_state = _nous_device_code_login( - portal_base_url=getattr(args, "portal_url", None), - inference_base_url=getattr(args, "inference_url", None), - client_id=getattr(args, "client_id", None) or pconfig.client_id, - scope=getattr(args, "scope", None) or pconfig.scope, - open_browser=not getattr(args, "no_browser", False), - timeout_seconds=timeout_seconds, - insecure=insecure, - ca_bundle=ca_bundle, - min_key_ttl_seconds=5 * 60, - ) + auth_state = None + + # Codex-style auto-import: before launching a fresh device-code + # flow, check the shared store for an existing Nous credential + # from any other profile. If present, offer to rehydrate it. + shared = _read_shared_nous_state() + if shared: + try: + shared_path = _nous_shared_store_path() + except RuntimeError: + shared_path = None + print() + if shared_path: + print(f"Found existing Nous OAuth credentials at {shared_path}") + else: + print("Found existing shared Nous OAuth credentials") + try: + do_import = input("Import these credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + do_import = "y" + if do_import in ("", "y", "yes"): + print("Rehydrating Nous session from shared credentials...") + auth_state = _try_import_shared_nous_state( + timeout_seconds=timeout_seconds, + min_key_ttl_seconds=5 * 60, + ) + if auth_state is None: + print("Could not refresh shared credentials — falling back to device-code login.") + + if auth_state is None: + auth_state = _nous_device_code_login( + portal_base_url=getattr(args, "portal_url", None), + inference_base_url=getattr(args, "inference_url", None), + client_id=getattr(args, "client_id", None) or pconfig.client_id, + scope=getattr(args, "scope", None) or pconfig.scope, + open_browser=not getattr(args, "no_browser", False), + timeout_seconds=timeout_seconds, + insecure=insecure, + ca_bundle=ca_bundle, + min_key_ttl_seconds=5 * 60, + ) inference_base_url = auth_state["inference_base_url"] @@ -4625,6 +4870,11 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: _save_provider_state(auth_store, "nous", auth_state) saved_to = _save_auth_store(auth_store) + # Mirror to the shared store so other profiles can one-tap import + # these credentials. Best-effort: any I/O failure is logged and + # swallowed inside the helper. + _write_shared_nous_state(auth_state) + print() print("Login successful!") print(f" Auth state: {saved_to}") diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index a9eb206647..a29776aea2 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -245,6 +245,47 @@ def auth_add_command(args) -> None: return if provider == "nous": + # Codex-style auto-import: if a shared Nous credential lives at + # ~/.hermes/shared/nous_auth.json (written by any previous + # successful login), offer to import it instead of running the + # full device-code flow. This makes `hermes --profile <name> + # auth add nous --type oauth` a one-tap operation for users who + # run multiple profiles. + shared = auth_mod._read_shared_nous_state() + if shared: + try: + path = auth_mod._nous_shared_store_path() + except RuntimeError: + path = None + print() + if path: + print(f"Found existing Nous OAuth credentials at {path}") + else: + print("Found existing shared Nous OAuth credentials") + try: + do_import = input("Import these credentials? [Y/n]: ").strip().lower() + except (EOFError, KeyboardInterrupt): + do_import = "y" + if do_import in ("", "y", "yes"): + print("Rehydrating Nous session from shared credentials...") + rehydrated = auth_mod._try_import_shared_nous_state( + timeout_seconds=getattr(args, "timeout", None) or 15.0, + min_key_ttl_seconds=max( + 60, int(getattr(args, "min_key_ttl_seconds", 5 * 60)) + ), + ) + if rehydrated is not None: + custom_label = (getattr(args, "label", None) or "").strip() or None + entry = auth_mod.persist_nous_credentials(rehydrated, label=custom_label) + shown_label = entry.label if entry is not None else label_from_token( + rehydrated.get("access_token", ""), _oauth_default_label(provider, 1), + ) + print(f'Imported {provider} OAuth credentials: "{shown_label}"') + return + # Rehydrate failed (expired refresh_token, portal down, etc.) + # — fall through to device-code flow. + print("Could not refresh shared credentials — falling back to device-code login.") + creds = auth_mod._nous_device_code_login( portal_base_url=getattr(args, "portal_url", None), inference_base_url=getattr(args, "inference_url", None), diff --git a/hermes_cli/backup.py b/hermes_cli/backup.py index 2a766f7502..dce199a5ab 100644 --- a/hermes_cli/backup.py +++ b/hermes_cli/backup.py @@ -61,6 +61,9 @@ _EXCLUDED_NAMES = { "cron.pid", } +# zipfile.open() drops Unix mode bits on extract; restore tightens these to 0600. +_SECRET_FILE_NAMES = {".env", "auth.json", "state.db"} + def _should_exclude(rel_path: Path) -> bool: """Return True if *rel_path* (relative to hermes root) should be skipped.""" @@ -381,6 +384,8 @@ def run_import(args) -> None: target.parent.mkdir(parents=True, exist_ok=True) with zf.open(member) as src, open(target, "wb") as dst: dst.write(src.read()) + if target.name in _SECRET_FILE_NAMES: + os.chmod(target, 0o600) restored += 1 except (PermissionError, OSError) as exc: errors.append(f" {rel}: {exc}") @@ -788,9 +793,17 @@ def _prune_pre_update_backups(backup_dir: Path, keep: int) -> int: Returns the number of files deleted. Only touches files matching ``pre-update-*.zip`` so hand-made zips dropped in the same directory are never touched. + + ``keep`` is floored to 1 because this helper is only called immediately + after a fresh backup is written: deleting that backup right after the + user paid the disk/CPU cost to create it would leave them worse off + than no backup at all (and the wrapper in ``main.py`` would still print + a misleading ``Saved: <path>`` line for a file that no longer exists). + Operators who genuinely don't want a backup should set + ``updates.pre_update_backup: false`` in config — that gates creation. """ - if keep < 0: - keep = 0 + if keep < 1: + keep = 1 if not backup_dir.exists(): return 0 diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 07e7273bf7..c7ddfa0fa0 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -64,7 +64,7 @@ class CommandDef: COMMAND_REGISTRY: list[CommandDef] = [ # Session CommandDef("new", "Start a new session (fresh session ID + history)", "Session", - aliases=("reset",)), + aliases=("reset",), args_hint="[name]"), CommandDef("clear", "Clear screen and start a new session", "Session", cli_only=True), CommandDef("redraw", "Force a full UI repaint (recovers from terminal drift)", "Session", @@ -399,6 +399,11 @@ def _is_gateway_available(cmd: CommandDef, config_overrides: set[str] | None = N return False +def _requires_argument(args_hint: str) -> bool: + """Return True when selecting a command without text would be incomplete.""" + return args_hint.strip().startswith("<") + + def gateway_help_lines() -> list[str]: """Generate gateway help text lines from the registry.""" overrides = _resolve_config_gates() @@ -455,7 +460,9 @@ def telegram_bot_commands() -> list[tuple[str, str]]: Telegram command names cannot contain hyphens, so they are replaced with underscores. Aliases are skipped -- Telegram shows one menu entry per - canonical command. + canonical command. Commands that require arguments are skipped because + selecting a Telegram BotCommand sends only ``/command`` and would execute + an incomplete command. Plugin-registered slash commands are included so plugins get native autocomplete in Telegram without touching core code. @@ -465,10 +472,14 @@ def telegram_bot_commands() -> list[tuple[str, str]]: for cmd in COMMAND_REGISTRY: if not _is_gateway_available(cmd, overrides): continue + if _requires_argument(cmd.args_hint): + continue tg_name = _sanitize_telegram_name(cmd.name) if tg_name: result.append((tg_name, cmd.description)) - for name, description, _args_hint in _iter_plugin_command_entries(): + for name, description, args_hint in _iter_plugin_command_entries(): + if _requires_argument(args_hint): + continue tg_name = _sanitize_telegram_name(name) if tg_name: result.append((tg_name, description)) @@ -1115,6 +1126,12 @@ class SlashCommandCompleter(Completer): except Exception: return {} + # Commands that open pickers when run without arguments. + # These should NOT receive a trailing space in completions because: + # - The TUI's submit handler applies completions on Enter if input differs + # - Adding space makes "/model" → "/model " which blocks picker execution + _PICKER_COMMANDS = frozenset({"model", "skin", "personality"}) + @staticmethod def _completion_text(cmd_name: str, word: str) -> str: """Return replacement text for a completion. @@ -1123,8 +1140,17 @@ class SlashCommandCompleter(Completer): returning ``help`` would be a no-op and prompt_toolkit suppresses the menu. Appending a trailing space keeps the dropdown visible and makes backspacing retrigger it naturally. + + However, commands that open pickers (model, skin, personality) should + NOT get a trailing space — the TUI would apply the completion on Enter + and block the picker from opening. """ - return f"{cmd_name} " if cmd_name == word else cmd_name + if cmd_name != word: + return cmd_name + # Don't add space for picker commands — allows Enter to execute them + if cmd_name in SlashCommandCompleter._PICKER_COMMANDS: + return cmd_name + return f"{cmd_name} " @staticmethod def _extract_path_word(text: str) -> str | None: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 672aa6ae26..3ce1e1526f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1292,7 +1292,10 @@ DEFAULT_CONFIG = { # for a single update run. "pre_update_backup": False, # How many pre-update backup zips to retain. Older ones are pruned - # automatically after each successful backup. + # automatically after each successful backup. Values below 1 are + # floored to 1 — the backup just created is always preserved. To + # disable backups entirely, set ``pre_update_backup: false`` above + # rather than ``backup_keep: 0``. "backup_keep": 5, }, @@ -4682,7 +4685,9 @@ def set_config_value(key: str, value: str): "terminal.vercel_runtime": "TERMINAL_VERCEL_RUNTIME", "terminal.docker_mount_cwd_to_workspace": "TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "terminal.docker_run_as_host_user": "TERMINAL_DOCKER_RUN_AS_HOST_USER", - "terminal.cwd": "TERMINAL_CWD", + # terminal.cwd intentionally excluded — CLI resolves at runtime, + # gateway bridges it in gateway/run.py. Persisting to .env causes + # stale values to poison child processes. "terminal.timeout": "TERMINAL_TIMEOUT", "terminal.sandbox_dir": "TERMINAL_SANDBOX_DIR", "terminal.persistent_shell": "TERMINAL_PERSISTENT_SHELL", diff --git a/hermes_cli/curses_ui.py b/hermes_cli/curses_ui.py index b05295f1e6..01d759d387 100644 --- a/hermes_cli/curses_ui.py +++ b/hermes_cli/curses_ui.py @@ -156,6 +156,8 @@ def curses_checklist( flush_stdin() return result_holder[0] if result_holder[0] is not None else cancel_returns + except KeyboardInterrupt: + return cancel_returns except Exception: return _numbered_fallback(title, items, selected, cancel_returns, status_fn) @@ -278,6 +280,8 @@ def curses_radiolist( flush_stdin() return result_holder[0] if result_holder[0] is not None else cancel_returns + except KeyboardInterrupt: + return cancel_returns except Exception: return _radio_numbered_fallback(title, items, selected, cancel_returns) @@ -401,6 +405,8 @@ def curses_single_select( return None return result_holder[0] + except KeyboardInterrupt: + return None except Exception: all_items = list(items) + [cancel_label] cancel_idx = len(items) diff --git a/hermes_cli/debug.py b/hermes_cli/debug.py index 06be05a355..a7338e4ba8 100644 --- a/hermes_cli/debug.py +++ b/hermes_cli/debug.py @@ -1,12 +1,19 @@ -"""``hermes debug`` — debug tools for Hermes Agent. +"""``hermes debug`` debug tools for Hermes Agent. Currently supports: hermes debug share Upload debug report (system info + logs) to a paste service and print a shareable URL. + By default, log content is run through + ``agent.redact.redact_sensitive_text`` with + ``force=True`` before upload so credentials in + ``~/.hermes/logs/*.log`` are not leaked into + the public paste service. Pass ``--no-redact`` + to disable. """ import io import json +import logging import sys import time import urllib.error @@ -19,6 +26,16 @@ from typing import Optional from hermes_constants import get_hermes_home from utils import atomic_replace +logger = logging.getLogger(__name__) + +# Banner prepended to upload-bound log content when redaction is enabled. +# Visible in the public paste so reviewers know the content was sanitized. +# Kept short; the trailing newline guarantees the banner sits on its own line. +_REDACTION_BANNER = ( + "[hermes debug share: log content redacted at upload time. " + "run with --no-redact to disable]\n" +) + # --------------------------------------------------------------------------- # Paste services — try paste.rs first, dpaste.com as fallback. @@ -368,17 +385,40 @@ def _resolve_log_path(log_name: str) -> Optional[Path]: return None +def _redact_log_text(text: str) -> str: + """Run ``redact_sensitive_text`` with ``force=True`` over upload-bound text. + + Uses ``force=True`` so redaction fires regardless of the operator's + ``security.redact_secrets`` setting. The local on-disk log file is + not modified; only the in-memory copy headed for the public paste + service is sanitized. Returns the redacted text (or the original + when empty / non-string). + """ + if not text: + return text + from agent.redact import redact_sensitive_text + + return redact_sensitive_text(text, force=True) + + def _capture_log_snapshot( log_name: str, *, tail_lines: int, max_bytes: int = _MAX_LOG_BYTES, + redact: bool = True, ) -> LogSnapshot: """Capture a log once and derive summary/full-log views from it. The report tail and standalone log upload must come from the same file snapshot. Otherwise a rotation/truncate between reads can make the report look newer than the uploaded ``agent.log`` paste. + + When ``redact`` is True (the default), both ``tail_text`` and + ``full_text`` are run through ``_redact_log_text`` so the snapshot + returned is upload-safe. The on-disk log file is never modified. + Pass ``redact=False`` to capture original log content (used by + ``hermes debug share --no-redact``). """ log_path = _resolve_log_path(log_name) if log_path is None: @@ -438,18 +478,34 @@ def _capture_log_snapshot( if truncated: full_text = f"[... truncated — showing last ~{max_bytes // 1024}KB ...]\n{full_text}" + if redact: + tail_text = _redact_log_text(tail_text) + full_text = _redact_log_text(full_text) + return LogSnapshot(path=log_path, tail_text=tail_text, full_text=full_text) except Exception as exc: return LogSnapshot(path=log_path, tail_text=f"(error reading: {exc})", full_text=None) -def _capture_default_log_snapshots(log_lines: int) -> dict[str, LogSnapshot]: - """Capture all logs used by debug-share exactly once.""" +def _capture_default_log_snapshots( + log_lines: int, *, redact: bool = True +) -> dict[str, LogSnapshot]: + """Capture all logs used by debug-share exactly once. + + ``redact`` is forwarded to each ``_capture_log_snapshot`` call so all + captured logs share the same redaction policy for a given run. + """ errors_lines = min(log_lines, 100) return { - "agent": _capture_log_snapshot("agent", tail_lines=log_lines), - "errors": _capture_log_snapshot("errors", tail_lines=errors_lines), - "gateway": _capture_log_snapshot("gateway", tail_lines=errors_lines), + "agent": _capture_log_snapshot( + "agent", tail_lines=log_lines, redact=redact + ), + "errors": _capture_log_snapshot( + "errors", tail_lines=errors_lines, redact=redact + ), + "gateway": _capture_log_snapshot( + "gateway", tail_lines=errors_lines, redact=redact + ), } @@ -532,6 +588,7 @@ def run_debug_share(args): log_lines = getattr(args, "lines", 200) expiry = getattr(args, "expire", 7) local_only = getattr(args, "local", False) + redact = not getattr(args, "no_redact", False) if not local_only: print(_PRIVACY_NOTICE) @@ -539,8 +596,16 @@ def run_debug_share(args): print("Collecting debug report...") # Capture dump once — prepended to every paste for context. + # The dump is already redacted at extract time via dump.py:_redact; + # log_snapshots are redacted by _capture_default_log_snapshots when + # redact=True so credentials never reach the public paste service. dump_text = _capture_dump() - log_snapshots = _capture_default_log_snapshots(log_lines) + log_snapshots = _capture_default_log_snapshots(log_lines, redact=redact) + + if redact: + logger.info( + "hermes debug share: applied force-mode redaction to log snapshots before upload" + ) report = collect_debug_report( log_lines=log_lines, @@ -556,6 +621,15 @@ def run_debug_share(args): if gateway_log: gateway_log = dump_text + "\n\n--- full gateway.log ---\n" + gateway_log + # Visible banner so reviewers reading the public paste know redaction + # was applied at upload time. Banner is omitted under --no-redact. + if redact: + report = _REDACTION_BANNER + report + if agent_log: + agent_log = _REDACTION_BANNER + agent_log + if gateway_log: + gateway_log = _REDACTION_BANNER + gateway_log + if local_only: print(report) if agent_log: @@ -666,6 +740,7 @@ def run_debug(args): print(" --lines N Number of log lines to include (default: 200)") print(" --expire N Paste expiry in days (default: 7)") print(" --local Print report locally instead of uploading") + print(" --no-redact Disable upload-time secret redaction (default: redact)") print() print("Options (delete):") print(" <url> ... One or more paste URLs to delete") diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 122ed141cc..446f576a61 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -935,6 +935,8 @@ def run_doctor(args): agent_browser_path = PROJECT_ROOT / "node_modules" / "agent-browser" if agent_browser_path.exists(): check_ok("agent-browser (Node.js)", "(browser automation)") + elif shutil.which("agent-browser"): + check_ok("agent-browser", "(browser automation)") else: if _is_termux(): check_info("agent-browser is not installed (expected in the tested Termux path)") @@ -1096,9 +1098,10 @@ def run_doctor(args): ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True), ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True), - # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does. + # MiniMax global: /v1 endpoint supports /models. ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True), - ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True), + # MiniMax CN: /v1 endpoint does NOT support /models (returns 404). + ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", False), ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True), ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True), ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True), diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index af40444922..dff0a4aa75 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -237,6 +237,26 @@ def _graceful_restart_via_sigusr1(pid: int, drain_timeout: float) -> bool: return False +def _get_ancestor_pids() -> set[int]: + """Return the set of PIDs in the current process's ancestor chain. + + Walks from the current PID up to PID 1 (init) so that process-table scans + never match the calling CLI process or any of its parents. This prevents + ``hermes gateway status`` from falsely counting the ``hermes`` CLI that + invoked it as a running gateway instance (see #13242). + """ + ancestors: set[int] = set() + pid = os.getpid() + # Cap iterations to avoid infinite loops on exotic platforms. + for _ in range(64): + ancestors.add(pid) + parent = _get_parent_pid(pid) + if parent is None or parent <= 0 or parent in ancestors: + break + pid = parent + return ancestors + + def _append_unique_pid(pids: list[int], pid: int | None, exclude_pids: set[int]) -> None: if pid is None or pid <= 0: return @@ -252,6 +272,10 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li a live gateway when the PID file is stale/missing, and ``--all`` sweeps can discover gateways outside the current profile. """ + # Exclude the entire ancestor chain so the CLI process that invoked this + # scan (e.g. ``hermes gateway status``) is never mistaken for a running + # gateway. See #13242. + exclude_pids = exclude_pids | _get_ancestor_pids() pids: list[int] = [] patterns = [ "hermes_cli.main gateway", @@ -690,6 +714,32 @@ def _print_gateway_process_mismatch(snapshot: GatewayRuntimeSnapshot) -> None: print(" can refuse to start another copy until this process stops.") +def _print_other_profiles_gateway_status() -> None: + """Print a summary of gateway status across all profiles. + + Shown at the bottom of ``hermes gateway status`` output so users with + multiple profiles can tell at a glance which gateways are running and + avoid confusing another profile's process with the current one. + """ + try: + from hermes_cli.profiles import get_active_profile_name + + current = get_active_profile_name() + other_processes = [ + p for p in find_profile_gateway_processes() + if p.profile != current + ] + if not other_processes: + return + + print() + print("Other profiles:") + for proc in other_processes: + print(f" ✓ {proc.profile:<16s} — PID {proc.pid}") + except Exception: + pass + + def kill_gateway_processes(force: bool = False, exclude_pids: set | None = None, all_profiles: bool = False) -> int: """Kill any running gateway processes. Returns count killed. @@ -1921,6 +1971,15 @@ def systemd_uninstall(system: bool = False): print(f"✓ {_service_scope_label(system).capitalize()} service uninstalled") +def _require_service_installed(action: str, system: bool = False) -> None: + unit_path = get_systemd_unit_path(system=system) + if not unit_path.exists(): + scope_flag = " --system" if system else "" + print(f"✗ Gateway service is not installed") + print(f" Run: {'sudo ' if system else ''}hermes gateway install{scope_flag}") + sys.exit(1) + + def systemd_start(system: bool = False): system = _select_systemd_scope(system) if system: @@ -1930,6 +1989,7 @@ def systemd_start(system: bool = False): # reachable (common on fresh RHEL/Debian SSH sessions without linger). # Raises UserSystemdUnavailableError with a remediation message. _preflight_user_systemd() + _require_service_installed("start", system=system) refresh_systemd_unit_if_needed(system=system) _run_systemctl(["start", get_service_name()], system=system, check=True, timeout=30) print(f"✓ {_service_scope_label(system).capitalize()} service started") @@ -1940,6 +2000,7 @@ def systemd_stop(system: bool = False): system = _select_systemd_scope(system) if system: _require_root_for_system_service("stop") + _require_service_installed("stop", system=system) _run_systemctl(["stop", get_service_name()], system=system, check=True, timeout=90) print(f"✓ {_service_scope_label(system).capitalize()} service stopped") @@ -1951,6 +2012,7 @@ def systemd_restart(system: bool = False): _require_root_for_system_service("restart") else: _preflight_user_systemd() + _require_service_installed("restart", system=system) refresh_systemd_unit_if_needed(system=system) from gateway.status import get_running_pid @@ -2442,6 +2504,20 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False): hasn't fully exited yet. """ sys.path.insert(0, str(PROJECT_ROOT)) + + # Refresh the systemd unit definition on every boot so that restart + # settings (RestartSec, StartLimitIntervalSec, etc.) stay current even + # when the process was respawned via exit-code-75 (stale-code or + # /restart) rather than through `hermes gateway restart` which already + # calls refresh_systemd_unit_if_needed(). Without this, a code update + # that ships new unit settings won't take effect until the next manual + # `hermes gateway start/restart` — leaving the gateway vulnerable to + # the exact failure mode the new settings were meant to prevent. + if supports_systemd_services(): + try: + refresh_systemd_unit_if_needed(system=False) + except Exception: + pass # best-effort; don't block gateway startup from gateway.run import start_gateway @@ -4456,6 +4532,9 @@ def _gateway_command_inner(args): print(" hermes gateway install # Install as user service") print(" sudo hermes gateway install --system # Install as boot-time system service") + # Show other profiles' gateway status for multi-profile awareness + _print_other_profiles_gateway_status() + elif subcmd == "migrate-legacy": # Stop, disable, and remove legacy Hermes gateway unit files from # pre-rename installs (e.g. hermes.service). Profile units and diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py index e23a4923f6..4befd64fa4 100644 --- a/hermes_cli/kanban.py +++ b/hermes_cli/kanban.py @@ -169,11 +169,93 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu "or docs/hermes-kanban-v1-spec.pdf for the full design." ), ) + # --- global --board flag --- + # Applies to every subcommand below. When set, scopes all reads and + # writes to that board's DB. When omitted, resolves via the + # HERMES_KANBAN_BOARD env var, then the persisted current-board + # file, then "default". See kanban_db.get_current_board(). + kanban_parser.add_argument( + "--board", + default=None, + metavar="<slug>", + help=( + "Board slug to operate on. Defaults to the current board " + "(set via `hermes kanban boards switch <slug>` or the " + "HERMES_KANBAN_BOARD env var). Use `hermes kanban boards list` " + "to see all boards." + ), + ) sub = kanban_parser.add_subparsers(dest="kanban_action") # --- init --- sub.add_parser("init", help="Create kanban.db if missing (idempotent)") + # --- boards (new in v2: multi-project support) --- + p_boards = sub.add_parser( + "boards", + help="Manage kanban boards (one board per project / workstream)", + description=( + "Boards let you separate unrelated streams of work " + "(projects, repos, domains) into isolated queues. Each " + "board has its own DB, workspaces directory, and dispatcher " + "loop — tasks on one board cannot collide with tasks on " + "another. The first board is 'default' and always exists." + ), + ) + boards_sub = p_boards.add_subparsers(dest="boards_action") + + b_list = boards_sub.add_parser( + "list", aliases=["ls"], + help="List all boards with task counts", + ) + b_list.add_argument("--json", action="store_true") + b_list.add_argument("--all", action="store_true", + help="Include archived boards too") + + b_create = boards_sub.add_parser( + "create", aliases=["new"], + help="Create a new board", + ) + b_create.add_argument("slug", + help="Board slug (kebab-case, e.g. atm10-server)") + b_create.add_argument("--name", default=None, + help="Human-readable display name (defaults to Title Case of slug)") + b_create.add_argument("--description", default=None, + help="Optional description") + b_create.add_argument("--icon", default=None, + help="Optional emoji or single-character icon for the dashboard") + b_create.add_argument("--color", default=None, + help="Optional hex color (e.g. '#8b5cf6') for the dashboard") + b_create.add_argument("--switch", action="store_true", + help="Switch to the new board after creating it") + + b_rm = boards_sub.add_parser( + "rm", aliases=["remove", "delete"], + help="Archive (default) or delete a board", + ) + b_rm.add_argument("slug") + b_rm.add_argument("--delete", action="store_true", + help="Hard-delete the board directory instead of archiving it. " + "Default is to move it to boards/_archived/ so it's recoverable.") + + b_switch = boards_sub.add_parser( + "switch", aliases=["use"], + help="Set the active board for subsequent CLI calls", + ) + b_switch.add_argument("slug") + + boards_sub.add_parser( + "show", aliases=["current"], + help="Print the currently-active board slug", + ) + + b_rename = boards_sub.add_parser( + "rename", + help="Change a board's human-readable display name (slug is immutable)", + ) + b_rename.add_argument("slug") + b_rename.add_argument("name", help="New display name") + # --- create --- p_create = sub.add_parser("create", help="Create a new task") p_create.add_argument("title", help="Task title") @@ -366,7 +448,7 @@ def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.Argu # --- log --- p_log = sub.add_parser( "log", - help="Print the worker log for a task (from $HERMES_HOME/kanban/logs/)", + help="Print the worker log for a task (from <kanban-root>/kanban/logs/)", ) p_log.add_argument("task_id") p_log.add_argument("--tail", type=int, default=None, @@ -442,6 +524,38 @@ def kanban_command(args: argparse.Namespace) -> int: ) return 0 + # `--board <slug>` applies to every subcommand below by way of an + # env-var pin for the duration of this call. Using HERMES_KANBAN_BOARD + # (rather than threading `board=` through 50+ kb.connect() sites) + # keeps the patch small and inherits the exact same resolution the + # dispatcher uses for workers — consistency is a feature here. + board_override = getattr(args, "board", None) + if board_override: + try: + normed = kb._normalize_board_slug(board_override) + except ValueError as exc: + print(f"kanban: {exc}", file=sys.stderr) + return 2 + if not normed: + print("kanban: --board requires a slug", file=sys.stderr) + return 2 + # Boards other than 'default' must already exist — typoed slugs + # would otherwise silently create an empty board. + if normed != kb.DEFAULT_BOARD and not kb.board_exists(normed): + print( + f"kanban: board {normed!r} does not exist. " + f"Create it with `hermes kanban boards create {normed}`.", + file=sys.stderr, + ) + return 1 + os.environ["HERMES_KANBAN_BOARD"] = normed + + # Boards management doesn't touch the DB at all — dispatch early so + # fresh installs that haven't initialized any DB can still use + # `hermes kanban boards create …`. + if action == "boards": + return _dispatch_boards(args) + # Auto-initialize the DB before dispatching any subcommand. init_db # is idempotent, so running it every invocation is cheap (one # SELECT against sqlite_master when tables already exist) and @@ -513,6 +627,185 @@ def _profile_author() -> str: return "user" +# --------------------------------------------------------------------------- +# Boards management (hermes kanban boards …) +# --------------------------------------------------------------------------- + +def _dispatch_boards(args: argparse.Namespace) -> int: + """Handle ``hermes kanban boards <action>``. + + Boards management is deliberately separate from the task-level + commands: it operates on the filesystem (board directories, + ``current`` pointer, ``board.json``), not on the per-board SQLite + DB, so a fresh HERMES_HOME that has never called ``kanban init`` + can still run ``boards create`` / ``boards list``. + """ + sub = getattr(args, "boards_action", None) or "list" + if sub in ("list", "ls"): + return _cmd_boards_list(args) + if sub in ("create", "new"): + return _cmd_boards_create(args) + if sub in ("rm", "remove", "delete"): + return _cmd_boards_rm(args) + if sub in ("switch", "use"): + return _cmd_boards_switch(args) + if sub in ("show", "current"): + return _cmd_boards_show(args) + if sub == "rename": + return _cmd_boards_rename(args) + print(f"kanban boards: unknown action {sub!r}", file=sys.stderr) + return 2 + + +def _board_task_counts(slug: str) -> dict[str, int]: + """Return ``{status: count}`` for a board. Safe to call on an empty DB.""" + try: + path = kb.kanban_db_path(board=slug) + if not path.exists(): + return {} + with kb.connect(board=slug) as conn: + rows = conn.execute( + "SELECT status, COUNT(*) AS n FROM tasks GROUP BY status" + ).fetchall() + return {r["status"]: int(r["n"]) for r in rows} + except Exception: + return {} + + +def _cmd_boards_list(args: argparse.Namespace) -> int: + include_archived = bool(getattr(args, "all", False)) + boards = kb.list_boards(include_archived=include_archived) + # Enrich each entry with task counts + whether it's the current board. + current = kb.get_current_board() + for b in boards: + b["is_current"] = (b["slug"] == current) + b["counts"] = _board_task_counts(b["slug"]) + b["total"] = sum(b["counts"].values()) + if getattr(args, "json", False): + print(json.dumps(boards, indent=2, ensure_ascii=False)) + return 0 + # Human table: marker (•) for current, slug, display name, counts. + if not boards: + print("(no boards — create one with `hermes kanban boards create <slug>`)") + return 0 + print(f"{'':2s} {'SLUG':24s} {'NAME':28s} COUNTS") + for b in boards: + marker = "●" if b["is_current"] else " " + counts = b["counts"] or {} + counts_str = ( + ", ".join(f"{k}={v}" for k, v in sorted(counts.items())) + or "(empty)" + ) + name = b.get("name") or "" + if b.get("archived"): + name += " [archived]" + print(f"{marker:2s} {b['slug']:24s} {name:28s} {counts_str}") + print() + print(f"Current board: {current}") + if len(boards) > 1: + print("Switch boards with `hermes kanban boards switch <slug>`.") + return 0 + + +def _cmd_boards_create(args: argparse.Namespace) -> int: + try: + normed = kb._normalize_board_slug(args.slug) + except ValueError as exc: + print(f"kanban boards create: {exc}", file=sys.stderr) + return 2 + if not normed: + print("kanban boards create: slug is required", file=sys.stderr) + return 2 + already = kb.board_exists(normed) and normed != kb.DEFAULT_BOARD + meta = kb.create_board( + normed, + name=args.name, + description=args.description, + icon=args.icon, + color=args.color, + ) + verb = "already exists" if already else "created" + print(f"Board {meta['slug']!r} {verb}.") + print(f" Display name: {meta.get('name', '')}") + print(f" DB path: {meta['db_path']}") + if getattr(args, "switch", False): + kb.set_current_board(meta["slug"]) + print(f" Switched to {meta['slug']!r}.") + else: + print(f" Use `hermes kanban boards switch {meta['slug']}` to make it current.") + return 0 + + +def _cmd_boards_rm(args: argparse.Namespace) -> int: + try: + res = kb.remove_board(args.slug, archive=not getattr(args, "delete", False)) + except ValueError as exc: + print(f"kanban boards rm: {exc}", file=sys.stderr) + return 1 + if res["action"] == "archived": + print(f"Board {res['slug']!r} archived → {res['new_path']}") + print("Recover by moving the directory back to " + "<root>/kanban/boards/<slug>/.") + else: + print(f"Board {res['slug']!r} deleted.") + return 0 + + +def _cmd_boards_switch(args: argparse.Namespace) -> int: + try: + normed = kb._normalize_board_slug(args.slug) + except ValueError as exc: + print(f"kanban boards switch: {exc}", file=sys.stderr) + return 2 + if not normed: + print("kanban boards switch: slug is required", file=sys.stderr) + return 2 + if not kb.board_exists(normed): + print( + f"kanban boards switch: board {normed!r} does not exist. " + f"Create it with `hermes kanban boards create {normed}`.", + file=sys.stderr, + ) + return 1 + kb.set_current_board(normed) + print(f"Active board is now {normed!r}.") + return 0 + + +def _cmd_boards_show(args: argparse.Namespace) -> int: + current = kb.get_current_board() + meta = kb.read_board_metadata(current) + counts = _board_task_counts(current) + total = sum(counts.values()) + print(f"Current board: {current}") + print(f" Display name: {meta.get('name', '')}") + if meta.get("description"): + print(f" Description: {meta['description']}") + print(f" DB path: {meta['db_path']}") + print(f" Tasks: {total} total" + + (f" ({', '.join(f'{k}={v}' for k, v in sorted(counts.items()))})" + if counts else "")) + return 0 + + +def _cmd_boards_rename(args: argparse.Namespace) -> int: + try: + normed = kb._normalize_board_slug(args.slug) + except ValueError as exc: + print(f"kanban boards rename: {exc}", file=sys.stderr) + return 2 + if not normed or not kb.board_exists(normed): + print(f"kanban boards rename: board {args.slug!r} does not exist", + file=sys.stderr) + return 1 + meta = kb.write_board_metadata(normed, name=args.name) + print(f"Board {normed!r} renamed to {meta['name']!r}.") + return 0 + + +# --------------------------------------------------------------------------- + + def _parse_duration(val) -> Optional[int]: """Parse ``30s`` / ``5m`` / ``2h`` / ``1d`` or a raw integer → seconds. @@ -662,6 +955,21 @@ def _cmd_list(args: argparse.Namespace) -> int: if getattr(args, "json", False): print(json.dumps([_task_to_dict(t) for t in tasks], indent=2, ensure_ascii=False)) return 0 + # Passive discoverability: when the user has multiple boards, surface + # which one they're looking at in the list header. Single-board users + # never see this — the feature stays invisible until you opt in. + try: + all_boards = kb.list_boards(include_archived=False) + except Exception: + all_boards = [] + if len(all_boards) > 1: + current = kb.get_current_board() + other_count = len(all_boards) - 1 + print( + f"Board: {current} " + f"({other_count} other board{'s' if other_count != 1 else ''} — " + f"`hermes kanban boards list`)\n" + ) if not tasks: print("(no matching tasks)") return 0 diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 1e8be214fb..a58e542ac6 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -1,8 +1,56 @@ -"""SQLite-backed Kanban board for multi-profile collaboration. +"""SQLite-backed Kanban board for multi-profile, multi-project collaboration. -The board lives at ``$HERMES_HOME/kanban.db`` (profile-agnostic on purpose: -multiple profiles on the same machine all see the same board, which IS the -coordination primitive). +In a fresh install the board lives at ``<root>/kanban.db`` where +``<root>`` is the **shared Hermes root** (the parent of any active +profile). Profiles intentionally collapse onto a shared board: it IS +the cross-profile coordination primitive. A worker spawned with +``hermes -p <profile>`` joins the same board as the dispatcher that +claimed the task. The same applies to ``<root>/kanban/workspaces/`` and +``<root>/kanban/logs/``. + +**Multiple boards (projects):** users can create additional boards to +separate unrelated streams of work (e.g. one per project / repo / domain). +Each board is a directory under ``<root>/kanban/boards/<slug>/`` with +its own ``kanban.db``, ``workspaces/``, and ``logs/``. All boards share +the profile's Hermes home but are otherwise isolated: a worker spawned +for a task on board ``atm10-server`` sees only that board's tasks, +cannot enumerate other boards, and its dispatcher ticks don't touch +other boards' DBs. + +The first (and for single-project users, only) board is ``default``. +For back-compat its on-disk DB is ``<root>/kanban.db`` (not +``boards/default/kanban.db``), so installs that predate the boards +feature keep working with zero migration. See :func:`kanban_db_path`. + +Board resolution order (highest precedence first, all optional): + +* ``board=`` argument passed directly to :func:`connect` / :func:`init_db` + (explicit — used by the CLI ``--board`` flag and the dashboard + ``?board=...`` query param). +* ``HERMES_KANBAN_BOARD`` env var (used by the dispatcher to pin workers + to the board their task lives on — workers cannot see other boards). +* ``HERMES_KANBAN_DB`` env var (pins the DB file path directly — legacy + override still honoured; highest precedence when the file path itself + is what the caller wants to force). +* ``<root>/kanban/current`` — a one-line text file holding the slug of + the "currently selected" board. Written by ``hermes kanban boards + switch <slug>``. When absent, the active board is ``default``. + +In standard installs ``<root>`` is ``~/.hermes``. In Docker / custom +deployments where ``HERMES_HOME`` points outside ``~/.hermes`` (e.g. +``/opt/hermes``), ``<root>`` is ``HERMES_HOME``. Legacy env-var +overrides still work: + +* ``HERMES_KANBAN_DB`` — pin the database file path directly. +* ``HERMES_KANBAN_WORKSPACES_ROOT`` — pin the workspaces root directly. +* ``HERMES_KANBAN_HOME`` — pin the umbrella root that anchors kanban + paths. Useful for tests and unusual deployments. + +The dispatcher injects ``HERMES_KANBAN_DB``, +``HERMES_KANBAN_WORKSPACES_ROOT``, and ``HERMES_KANBAN_BOARD`` into +worker subprocess env so workers converge on the exact DB the +dispatcher used to claim their task — even under unusual symlink or +Docker layouts. Schema is intentionally small: tasks, task_links, task_comments, task_events. The ``workspace_kind`` field decouples coordination from git @@ -15,6 +63,9 @@ transactions + compare-and-swap (CAS) updates on ``tasks.status`` and ``tasks.claim_lock``. SQLite serializes writers via its WAL lock, so at most one claimer can win any given task. Losers observe zero affected rows and move on -- no retry loops, no distributed-lock machinery. +The CAS coordination is **per-board** — each board is a separate DB, +so multi-board installs get the same atomicity guarantees without any +new locking. """ from __future__ import annotations @@ -22,6 +73,7 @@ from __future__ import annotations import contextlib import json import os +import re import secrets import sqlite3 import sys @@ -61,16 +113,438 @@ _CTX_MAX_COMMENT_BYTES = 2 * 1024 # 2 KB per comment # Paths # --------------------------------------------------------------------------- -def kanban_db_path() -> Path: - """Return the path to ``kanban.db`` inside the active HERMES_HOME.""" - from hermes_constants import get_hermes_home - return get_hermes_home() / "kanban.db" +DEFAULT_BOARD = "default" + +# Slug validator: lowercase alphanumerics, digits, hyphens; 1–64 chars. +# Strict enough to stop traversal (`..`) and embedded path separators, loose +# enough that kebab-case names like ``atm10-server`` or ``hermes-agent`` +# pass without fuss. Board names with display formatting (spaces, emoji) +# live in ``board.json``; the slug is just the directory name. +_BOARD_SLUG_RE = re.compile(r"^[a-z0-9][a-z0-9\-_]{0,63}$") -def workspaces_root() -> Path: - """Return the directory under which ``scratch`` workspaces are created.""" - from hermes_constants import get_hermes_home - return get_hermes_home() / "kanban" / "workspaces" +def _normalize_board_slug(slug: Optional[str]) -> Optional[str]: + """Lowercase + strip a slug; validate; return ``None`` for empty.""" + if slug is None: + return None + s = str(slug).strip().lower() + if not s: + return None + if not _BOARD_SLUG_RE.match(s): + raise ValueError( + f"invalid board slug {slug!r}: must be 1-64 chars, lowercase " + f"alphanumerics / hyphens / underscores, not starting with '-' or '_'" + ) + return s + + +def kanban_home() -> Path: + """Return the shared Hermes root that anchors the kanban board. + + Resolution order: + + 1. ``HERMES_KANBAN_HOME`` env var when set and non-empty (explicit + override for tests and unusual deployments). + 2. ``get_default_hermes_root()``, which already returns ``<root>`` + when ``HERMES_HOME`` is ``<root>/profiles/<name>``, and returns + ``HERMES_HOME`` directly for Docker / custom deployments. + + The kanban board is shared across profiles **by design** (see the + module docstring). Resolving the kanban paths through the active + profile's ``HERMES_HOME`` would silently fork the board per profile, + which breaks the dispatcher / worker handoff. + """ + override = os.environ.get("HERMES_KANBAN_HOME", "").strip() + if override: + return Path(override).expanduser() + from hermes_constants import get_default_hermes_root + return get_default_hermes_root() + + +def boards_root() -> Path: + """Return ``<root>/kanban/boards`` — the parent of non-default board dirs. + + ``default`` is intentionally NOT under this directory — its DB lives at + ``<root>/kanban.db`` for back-compat with pre-boards installs. This + function returns the directory where *additional* named boards live, + used by :func:`list_boards` to enumerate them. + """ + return kanban_home() / "kanban" / "boards" + + +def current_board_path() -> Path: + """Return the path to ``<root>/kanban/current``. + + One-line text file written by ``hermes kanban boards switch <slug>`` + to persist the user's board selection across CLI invocations. Absent + by default (meaning: active board is ``default``). + """ + return kanban_home() / "kanban" / "current" + + +def get_current_board() -> str: + """Return the active board slug, honouring the resolution chain. + + Order (highest precedence first): + + 1. ``HERMES_KANBAN_BOARD`` env var (set by the dispatcher on worker + spawn, or manually for ad-hoc overrides). + 2. ``<root>/kanban/current`` on disk (set by ``hermes kanban boards + switch``). + 3. ``DEFAULT_BOARD`` (``"default"``). + + A malformed slug at any step falls through to the next layer with a + best-effort warning — the dispatcher must never crash because a user + hand-edited a file. + """ + env = os.environ.get("HERMES_KANBAN_BOARD", "").strip() + if env: + try: + normed = _normalize_board_slug(env) + if normed: + return normed + except ValueError: + pass + try: + f = current_board_path() + if f.exists(): + val = f.read_text(encoding="utf-8").strip() + if val: + try: + normed = _normalize_board_slug(val) + if normed: + return normed + except ValueError: + pass + except OSError: + pass + return DEFAULT_BOARD + + +def set_current_board(slug: str) -> Path: + """Persist ``slug`` as the active board. Returns the file written. + + Writes ``<root>/kanban/current``. The caller should validate the slug + exists first (via :func:`board_exists`) — this function does not — + so that ``hermes kanban boards switch <typo>`` returns an error + instead of silently pointing at nothing. + """ + normed = _normalize_board_slug(slug) + if not normed: + raise ValueError("board slug is required") + path = current_board_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(normed + "\n", encoding="utf-8") + return path + + +def clear_current_board() -> None: + """Remove ``<root>/kanban/current`` so the active board reverts to ``default``.""" + try: + current_board_path().unlink() + except FileNotFoundError: + pass + + +def board_dir(board: Optional[str] = None) -> Path: + """Return the on-disk directory for ``board``. + + ``default`` is ``<root>/kanban/boards/default/`` **for metadata only** + (board.json + workspaces/ + logs/). Its DB file stays at + ``<root>/kanban.db`` for back-compat — see :func:`kanban_db_path`. + + All other boards live at ``<root>/kanban/boards/<slug>/`` with + everything inside that directory including the ``kanban.db``. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + return boards_root() / slug + + +def board_exists(board: Optional[str] = None) -> bool: + """Return True if the board has a DB or a metadata dir on disk. + + ``default`` is considered to always exist — its DB is created + on first :func:`connect` and there's no way for it to be missing + in a configuration where the kanban feature is usable at all. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + if slug == DEFAULT_BOARD: + return True + d = board_dir(slug) + return d.is_dir() or (d / "kanban.db").exists() + + +def kanban_db_path(board: Optional[str] = None) -> Path: + """Return the path to the ``kanban.db`` for ``board``. + + Resolution (highest precedence first): + + 1. ``HERMES_KANBAN_DB`` env var — pins the path directly. Honoured for + back-compat and for the dispatcher→worker handoff (defense in + depth: dispatcher injects this into worker env so workers are + immune to any path-resolution disagreement). + 2. When ``board`` arg is None, the active board from + :func:`get_current_board` is used. + 3. Board ``default`` → ``<root>/kanban.db`` (back-compat path). + Other boards → ``<root>/kanban/boards/<slug>/kanban.db``. + """ + override = os.environ.get("HERMES_KANBAN_DB", "").strip() + if override: + return Path(override).expanduser() + slug = _normalize_board_slug(board) + if slug is None: + slug = get_current_board() + if slug == DEFAULT_BOARD: + return kanban_home() / "kanban.db" + return board_dir(slug) / "kanban.db" + + +def workspaces_root(board: Optional[str] = None) -> Path: + """Return the directory under which ``scratch`` workspaces are created. + + Anchored per-board so workspaces don't leak between projects. + ``HERMES_KANBAN_WORKSPACES_ROOT`` pins the path directly (highest + precedence) — the dispatcher injects this into worker env. + + ``default`` keeps the legacy path ``<root>/kanban/workspaces/`` so + that existing scratch workspaces from before the boards feature are + preserved. Other boards use ``<root>/kanban/boards/<slug>/workspaces/``. + """ + override = os.environ.get("HERMES_KANBAN_WORKSPACES_ROOT", "").strip() + if override: + return Path(override).expanduser() + slug = _normalize_board_slug(board) + if slug is None: + slug = get_current_board() + if slug == DEFAULT_BOARD: + return kanban_home() / "kanban" / "workspaces" + return board_dir(slug) / "workspaces" + + +def worker_logs_dir(board: Optional[str] = None) -> Path: + """Return the directory under which per-task worker logs are written. + + ``default`` keeps the legacy path ``<root>/kanban/logs/``. Other + boards use ``<root>/kanban/boards/<slug>/logs/``. Logs follow the + board — makes ``hermes kanban log`` unambiguous even when multiple + boards have tasks with the same id. + """ + slug = _normalize_board_slug(board) + if slug is None: + slug = get_current_board() + if slug == DEFAULT_BOARD: + return kanban_home() / "kanban" / "logs" + return board_dir(slug) / "logs" + + +def board_metadata_path(board: Optional[str] = None) -> Path: + """Return the path to ``board.json`` for ``board``. + + Stores display metadata (display name, description, icon, color, + created_at). The on-disk slug is the canonical identity; this file + is purely for presentation in the CLI / dashboard. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + return board_dir(slug) / "board.json" + + +def _default_board_display_name(slug: str) -> str: + """Turn a slug into a reasonable default display name. + + ``atm10-server`` → ``Atm10 Server``. Users can override via + ``board.json`` but the default should look presentable in the + dashboard without any follow-up editing. + """ + return " ".join(part.capitalize() for part in slug.replace("_", "-").split("-") if part) or slug + + +def read_board_metadata(board: Optional[str] = None) -> dict: + """Return ``board.json`` contents (or synthesized defaults). + + Never raises — a missing / malformed ``board.json`` falls back to a + synthesised entry so the dashboard always has something to render. + Includes the canonical ``slug`` and ``db_path`` so the caller + doesn't need to reconstruct them. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + meta: dict[str, Any] = { + "slug": slug, + "name": _default_board_display_name(slug), + "description": "", + "icon": "", + "color": "", + "created_at": None, + "archived": False, + } + try: + p = board_metadata_path(slug) + if p.exists(): + raw = json.loads(p.read_text(encoding="utf-8")) + if isinstance(raw, dict): + # Never let the metadata file claim a different slug than + # its directory — trust the filesystem. + raw["slug"] = slug + meta.update(raw) + except (OSError, json.JSONDecodeError): + pass + meta["db_path"] = str(kanban_db_path(slug)) + return meta + + +def write_board_metadata( + board: Optional[str], + *, + name: Optional[str] = None, + description: Optional[str] = None, + icon: Optional[str] = None, + color: Optional[str] = None, + archived: Optional[bool] = None, +) -> dict: + """Create / update ``board.json`` for ``board``. + + Preserves any existing fields not mentioned in the call. Sets + ``created_at`` on first write. Returns the resulting metadata dict. + """ + slug = _normalize_board_slug(board) or DEFAULT_BOARD + meta = read_board_metadata(slug) + # Preserve existing DB-derived fields — they get re-computed each + # read but shouldn't be written into board.json. + meta.pop("db_path", None) + if name is not None: + meta["name"] = str(name).strip() or _default_board_display_name(slug) + if description is not None: + meta["description"] = str(description) + if icon is not None: + meta["icon"] = str(icon) + if color is not None: + meta["color"] = str(color) + if archived is not None: + meta["archived"] = bool(archived) + if not meta.get("created_at"): + meta["created_at"] = int(time.time()) + path = board_metadata_path(slug) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps(meta, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + meta["db_path"] = str(kanban_db_path(slug)) + return meta + + +def create_board( + slug: str, + *, + name: Optional[str] = None, + description: Optional[str] = None, + icon: Optional[str] = None, + color: Optional[str] = None, +) -> dict: + """Create a new board directory + DB + metadata. Idempotent. + + Returns the resulting metadata. Raises :class:`ValueError` for a + malformed slug; returns the existing metadata (not an error) if the + board already exists — matching ``mkdir -p`` semantics. + """ + normed = _normalize_board_slug(slug) + if not normed: + raise ValueError("board slug is required") + meta = write_board_metadata( + normed, + name=name, + description=description, + icon=icon, + color=color, + ) + # Touch the DB so list_boards() sees it immediately. + init_db(board=normed) + return meta + + +def list_boards(*, include_archived: bool = True) -> list[dict]: + """Enumerate all boards that exist on disk. + + Always includes ``default`` (even when the ``boards/default/`` + metadata dir doesn't exist, because its DB is at the legacy path). + Other boards are discovered by scanning ``boards/`` for subdirectories + that either contain a ``kanban.db`` or a ``board.json``. + + Returns a list of metadata dicts, sorted with ``default`` first and + the rest alphabetically. + """ + entries: list[dict] = [] + seen: set[str] = set() + + # Default board is always first. + entries.append(read_board_metadata(DEFAULT_BOARD)) + seen.add(DEFAULT_BOARD) + + root = boards_root() + if root.is_dir(): + for child in sorted(root.iterdir(), key=lambda p: p.name.lower()): + if not child.is_dir(): + continue + slug = child.name + # Keep slug normalisation soft for discovery — but skip dirs + # that don't parse as valid slugs so we don't surface junk. + try: + normed = _normalize_board_slug(slug) + except ValueError: + continue + if not normed or normed in seen: + continue + has_db = (child / "kanban.db").exists() + has_meta = (child / "board.json").exists() + if not (has_db or has_meta): + continue + meta = read_board_metadata(normed) + if meta.get("archived") and not include_archived: + continue + entries.append(meta) + seen.add(normed) + return entries + + +def remove_board(slug: str, *, archive: bool = True) -> dict: + """Remove or archive a board. + + ``archive=True`` (default) moves the board's directory to + ``<root>/kanban/boards/_archived/<slug>-<timestamp>/`` so the data + is recoverable. ``archive=False`` deletes the directory outright. + + The ``default`` board cannot be removed — raises :class:`ValueError`. + Returns a summary dict describing what happened (``{"slug", "action", + "new_path"}``). + """ + normed = _normalize_board_slug(slug) + if not normed: + raise ValueError("board slug is required") + if normed == DEFAULT_BOARD: + raise ValueError("the 'default' board cannot be removed") + d = board_dir(normed) + if not d.exists(): + raise ValueError(f"board {normed!r} does not exist") + + # If the user removed the currently-active board, revert to default. + if get_current_board() == normed: + clear_current_board() + + if archive: + archive_root = boards_root() / "_archived" + archive_root.mkdir(parents=True, exist_ok=True) + ts = int(time.time()) + target = archive_root / f"{normed}-{ts}" + # Avoid collision on rapid double-archives. + suffix = 1 + while target.exists(): + target = archive_root / f"{normed}-{ts}-{suffix}" + suffix += 1 + d.rename(target) + return {"slug": normed, "action": "archived", "new_path": str(target)} + else: + import shutil + shutil.rmtree(d) + return {"slug": normed, "action": "deleted", "new_path": ""} # --------------------------------------------------------------------------- @@ -368,7 +842,11 @@ CREATE INDEX IF NOT EXISTS idx_notify_task ON kanban_notify_subs(task_ _INITIALIZED_PATHS: set[str] = set() -def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: +def connect( + db_path: Optional[Path] = None, + *, + board: Optional[str] = None, +) -> sqlite3.Connection: """Open (and initialize if needed) the kanban DB. WAL mode is enabled on every connection; it's a no-op after the first @@ -378,8 +856,19 @@ def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: fresh installs and test harnesses that construct `connect()` directly don't have to remember a separate init step. Subsequent connections skip the schema check via a module-level path cache. + + Path resolution: + + * ``db_path`` explicit → used as-is (legacy callers, tests). + * ``board`` explicit → resolves to that board's DB. + * Neither → :func:`kanban_db_path` resolves via + ``HERMES_KANBAN_DB`` env → ``HERMES_KANBAN_BOARD`` env → + ``<root>/kanban/current`` → ``default``. """ - path = db_path or kanban_db_path() + if db_path is not None: + path = db_path + else: + path = kanban_db_path(board=board) path.parent.mkdir(parents=True, exist_ok=True) resolved = str(path.resolve()) needs_init = resolved not in _INITIALIZED_PATHS @@ -398,7 +887,11 @@ def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: return conn -def init_db(db_path: Optional[Path] = None) -> Path: +def init_db( + db_path: Optional[Path] = None, + *, + board: Optional[str] = None, +) -> Path: """Create the schema if it doesn't exist; return the path used. Kept as a public entry point so CLI ``hermes kanban init`` and the @@ -409,7 +902,10 @@ def init_db(db_path: Optional[Path] = None) -> Path: external tools that upgrade an old DB file — can call this to force re-migration. """ - path = db_path or kanban_db_path() + if db_path is not None: + path = db_path + else: + path = kanban_db_path(board=board) path.parent.mkdir(parents=True, exist_ok=True) resolved = str(path.resolve()) # Clear the cache entry so the underlying connect() re-runs the @@ -590,6 +1086,15 @@ def _claimer_id() -> str: # Task creation / mutation # --------------------------------------------------------------------------- +def _canonical_assignee(assignee: Optional[str]) -> Optional[str]: + """Lowercase-assignee normalization for Kanban rows (dashboard/CLI parity).""" + if assignee is None: + return None + from hermes_cli.profiles import normalize_profile_name + + return normalize_profile_name(assignee) + + def create_task( conn: sqlite3.Connection, *, @@ -631,6 +1136,7 @@ def create_task( (e.g. ``skills=["translation"]`` so the worker loads the translation skill regardless of the profile's default config). """ + assignee = _canonical_assignee(assignee) if not title or not title.strip(): raise ValueError("title is required") if workspace_kind not in VALID_WORKSPACE_KINDS: @@ -795,7 +1301,7 @@ def list_tasks( params: list[Any] = [] if assignee is not None: query += " AND assignee = ?" - params.append(assignee) + params.append(_canonical_assignee(assignee)) if status is not None: if status not in VALID_STATUSES: raise ValueError(f"status must be one of {sorted(VALID_STATUSES)}") @@ -819,6 +1325,7 @@ def assign_task(conn: sqlite3.Connection, task_id: str, profile: Optional[str]) Refuses to reassign a task that's currently running (claim_lock set). Reassign after the current run completes if needed. """ + profile = _canonical_assignee(profile) with write_txn(conn): row = conn.execute( "SELECT status, claim_lock FROM tasks WHERE id = ?", (task_id,) @@ -1513,15 +2020,18 @@ def archive_task(conn: sqlite3.Connection, task_id: str) -> bool: # Workspace resolution # --------------------------------------------------------------------------- -def resolve_workspace(task: Task) -> Path: +def resolve_workspace(task: Task, *, board: Optional[str] = None) -> Path: """Resolve (and create if needed) the workspace for a task. - - ``scratch``: a fresh dir under ``$HERMES_HOME/kanban/workspaces/<id>/``. + - ``scratch``: a fresh dir under ``<board-root>/workspaces/<id>/``, + where ``<board-root>`` is the active board's root. The path is the + same for the dispatcher and every profile worker, so handoff is + path-stable. - ``dir:<path>``: the path stored in ``workspace_path``. Created if missing. MUST be absolute — relative paths are rejected to prevent confused-deputy traversal where ``../../../tmp/attacker`` resolves against the dispatcher's CWD instead of a meaningful - root. Users who want a HERMES_HOME-relative workspace should + root. Users who want a kanban-root-relative workspace should compute the absolute path themselves. - ``worktree``: a git worktree at ``workspace_path``. Not created automatically in v1 -- the kanban-worker skill documents @@ -1543,7 +2053,7 @@ def resolve_workspace(task: Task) -> Path: f"{task.workspace_path!r}; workspace paths must be absolute" ) else: - p = workspaces_root() / task.id + p = workspaces_root(board=board) / task.id p.mkdir(parents=True, exist_ok=True) return p if kind == "dir": @@ -1957,6 +2467,7 @@ def dispatch_once( dry_run: bool = False, max_spawn: Optional[int] = None, failure_limit: int = DEFAULT_SPAWN_FAILURE_LIMIT, + board: Optional[str] = None, ) -> DispatchResult: """Run one dispatcher tick. @@ -1965,15 +2476,17 @@ def dispatch_once( 2. Reclaim crashed running tasks (host-local PID no longer alive). 3. Promote todo -> ready where all parents are done. 4. For each ready task with an assignee, atomically claim and call - ``spawn_fn(task, workspace_path) -> Optional[int]``. The return - value (if any) is recorded as ``worker_pid`` so subsequent ticks - can detect crashes before the TTL expires. + ``spawn_fn(task, workspace_path, board) -> Optional[int]``. The + return value (if any) is recorded as ``worker_pid`` so subsequent + ticks can detect crashes before the TTL expires. Spawn failures are counted per-task. After ``failure_limit`` consecutive failures the task is auto-blocked with the last error as its reason — prevents the dispatcher from thrashing forever on an unfixable task. ``spawn_fn`` defaults to ``_default_spawn``. Tests pass a stub. + ``board`` pins workspace/log/db resolution for this tick to a specific + board. When omitted, the current-board resolution chain is used. """ result = DispatchResult() result.reclaimed = release_stale_claims(conn) @@ -2000,7 +2513,7 @@ def dispatch_once( if claimed is None: continue try: - workspace = resolve_workspace(claimed) + workspace = resolve_workspace(claimed, board=board) except Exception as exc: auto = _record_spawn_failure( conn, claimed.id, f"workspace: {exc}", @@ -2013,7 +2526,18 @@ def dispatch_once( set_workspace_path(conn, claimed.id, str(workspace)) _spawn = spawn_fn if spawn_fn is not None else _default_spawn try: - pid = _spawn(claimed, str(workspace)) + # Back-compat: older spawn_fn signatures accept only + # (task, workspace). Test stubs in the suite rely on that. + # Introspect the callable and pass `board` only when supported. + import inspect + try: + sig = inspect.signature(_spawn) + if "board" in sig.parameters: + pid = _spawn(claimed, str(workspace), board=board) + else: + pid = _spawn(claimed, str(workspace)) + except (TypeError, ValueError): + pid = _spawn(claimed, str(workspace)) if pid: _set_worker_pid(conn, claimed.id, int(pid)) _clear_spawn_failures(conn, claimed.id) @@ -2052,33 +2576,60 @@ def _rotate_worker_log(log_path: Path, max_bytes: int) -> None: pass -def _default_spawn(task: Task, workspace: str) -> Optional[int]: +def _default_spawn( + task: Task, + workspace: str, + *, + board: Optional[str] = None, +) -> Optional[int]: """Fire-and-forget ``hermes -p <profile> chat -q ...`` subprocess. Returns the spawned child's PID so the dispatcher can detect crashes before the claim TTL expires. The child's completion is still observed via the ``complete`` / ``block`` transitions the worker writes itself; the PID check is a safety net for crashes, OOM kills, and Ctrl+C. + + ``board`` pins the child's kanban context to that board: the child's + ``HERMES_KANBAN_DB`` / ``HERMES_KANBAN_BOARD`` / workspaces_root env + vars all resolve to the same board the dispatcher claimed the task + from. Workers cannot accidentally see other boards. """ import subprocess if not task.assignee: raise ValueError(f"task {task.id} has no assignee") + from hermes_cli.profiles import normalize_profile_name + + profile_arg = normalize_profile_name(task.assignee) + prompt = f"work kanban task {task.id}" env = dict(os.environ) if task.tenant: env["HERMES_TENANT"] = task.tenant env["HERMES_KANBAN_TASK"] = task.id env["HERMES_KANBAN_WORKSPACE"] = workspace + # Pin the shared board + workspaces root the dispatcher resolved, so + # that even when the worker activates a profile (`hermes -p <name>` + # rewrites HERMES_HOME), its kanban paths still match the + # dispatcher's. Belt-and-braces with the `get_default_hermes_root()` + # resolution in `kanban_home()` — symmetric resolution is the norm, + # but unusual symlink / Docker layouts are caught here too. + env["HERMES_KANBAN_DB"] = str(kanban_db_path(board=board)) + env["HERMES_KANBAN_WORKSPACES_ROOT"] = str(workspaces_root(board=board)) + # Board slug — the final defense-in-depth pin. If the worker ever + # resolves kanban paths without the DB / workspaces env vars, the + # board slug still forces it to the right directory. + resolved_board = _normalize_board_slug(board) or get_current_board() + env["HERMES_KANBAN_BOARD"] = resolved_board # HERMES_PROFILE is the author the kanban_comment tool defaults to. # `hermes -p <assignee>` activates the profile, but the env var is # what the tool reads — set it explicitly here so comments are # attributed correctly regardless of how the child loads config. - env["HERMES_PROFILE"] = task.assignee + env["HERMES_PROFILE"] = profile_arg cmd = [ "hermes", - "-p", task.assignee, + "-p", profile_arg, # Auto-load the kanban-worker skill so every dispatched worker # has the pattern library (good summary/metadata shapes, retry # diagnostics, block-reason examples) in its context, even if @@ -2104,9 +2655,11 @@ def _default_spawn(task: Task, workspace: str) -> Optional[int]: "chat", "-q", prompt, ]) - # Redirect output to a per-task log under HERMES_HOME/kanban/logs/. - from hermes_constants import get_hermes_home - log_dir = get_hermes_home() / "kanban" / "logs" + # Redirect output to a per-task log under <board-root>/logs/. + # Anchored at the board root (not the shared kanban root), so + # `hermes kanban log` on a specific board reads its own file and + # logs don't collide across boards that happen to share task ids. + log_dir = worker_logs_dir(board=board) log_dir.mkdir(parents=True, exist_ok=True) log_path = log_dir / f"{task.id}.log" _rotate_worker_log(log_path, DEFAULT_LOG_ROTATE_BYTES) @@ -2587,12 +3140,14 @@ def gc_events( def gc_worker_logs( *, older_than_seconds: int = 30 * 24 * 3600, + board: Optional[str] = None, ) -> int: """Delete worker log files older than ``older_than_seconds``. Returns the number of files removed. Kept separate from ``gc_events`` because - log files live on disk, not in SQLite.""" - from hermes_constants import get_hermes_home - log_dir = get_hermes_home() / "kanban" / "logs" + log files live on disk, not in SQLite. Scoped to ``board`` (defaults + to the active board) — per-board isolation means deleting logs from + board A cannot touch board B's logs.""" + log_dir = worker_logs_dir(board=board) if not log_dir.exists(): return 0 cutoff = time.time() - older_than_seconds @@ -2611,20 +3166,25 @@ def gc_worker_logs( # Worker log accessor # --------------------------------------------------------------------------- -def worker_log_path(task_id: str) -> Path: +def worker_log_path(task_id: str, *, board: Optional[str] = None) -> Path: """Return the path to a worker's log file. The file may not exist - (task never spawned, or log already GC'd).""" - from hermes_constants import get_hermes_home - return get_hermes_home() / "kanban" / "logs" / f"{task_id}.log" + (task never spawned, or log already GC'd). + + When ``board`` is None, resolves via the active board (env var → + current-board file → default). The dispatcher always passes the + board explicitly to avoid any resolution ambiguity when multiple + boards exist.""" + return worker_logs_dir(board=board) / f"{task_id}.log" def read_worker_log( task_id: str, *, tail_bytes: Optional[int] = None, + board: Optional[str] = None, ) -> Optional[str]: """Read the worker log for ``task_id``. Returns None if the file doesn't exist. If ``tail_bytes`` is set, only the last N bytes are returned (useful for the dashboard drawer which shouldn't page megabytes).""" - path = worker_log_path(task_id) + path = worker_log_path(task_id, board=board) if not path.exists(): return None try: @@ -2661,7 +3221,8 @@ def list_profiles_on_disk() -> list[str]: ``config.yaml`` — a bare dir without config isn't a real profile. """ try: - home = Path.home() / ".hermes" / "profiles" + from hermes_constants import get_default_hermes_root + home = get_default_hermes_root() / "profiles" except Exception: return [] if not home.is_dir(): diff --git a/hermes_cli/main.py b/hermes_cli/main.py index a12999b77e..a94c96132b 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -114,6 +114,16 @@ def _apply_profile_override() -> None: consume = 1 break + # 1b. Reject values that can't be valid profile names (e.g. pytest's + # "-p no:xdist" would be misread as profile "no:xdist" otherwise). + # Mirrors hermes_cli.profiles._PROFILE_ID_RE so we never call + # resolve_profile_env() with a value it must reject + sys.exit on. + if profile_name is not None and consume == 2: + import re as _re + if not _re.match(r"^[a-z0-9][a-z0-9_-]{0,63}$", profile_name): + profile_name = None + consume = 0 + # 1.5 If HERMES_HOME is already set and no explicit flag was given, trust it. # This lets child processes (relaunch, subprocess) inherit the parent's # profile choice without having to pass --profile again. @@ -837,7 +847,17 @@ def _print_tui_exit_summary(session_id: Optional[str], active_session_file: Opti ) -_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert"}) +_NPM_LOCK_RUNTIME_KEYS = frozenset({"ideallyInert", "peer"}) +"""Lockfile fields npm writes non-deterministically at install time. + +``ideallyInert`` is npm's runtime annotation for packages it skipped installing +(per-platform opt-outs). ``peer`` is dropped from the hidden ``.package-lock.json`` +on dev-dependencies that are *also* declared as peers — the canonical +``package-lock.json`` records the dual role, but npm 9's actualized tree strips +it. Neither key represents a real skew between what was declared and what was +installed, so we exclude them from the comparison in :func:`_tui_need_npm_install` +to avoid false-positive reinstalls on every launch. +""" def _tui_need_npm_install(root: Path) -> bool: @@ -1042,17 +1062,21 @@ def _make_tui_argv(tui_dir: Path, tui_dev: bool) -> tuple[list[str], Path]: if _tui_need_npm_install(tui_dir): if not os.environ.get("HERMES_QUIET"): print("Installing TUI dependencies…") + # Capture stdout as well as stderr — some npm errors (notably EACCES on a + # root-owned node_modules in containers) are emitted on stdout, and a + # bare "npm install failed." with no preview defeats debugging. We keep + # the failure-only print path so a successful install stays silent. result = subprocess.run( [npm, "install", "--silent", "--no-fund", "--no-audit", "--progress=false"], cwd=str(tui_dir), - stdout=subprocess.DEVNULL, + stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env={**os.environ, "CI": "1"}, ) if result.returncode != 0: - err = (result.stderr or "").strip() - preview = "\n".join(err.splitlines()[-30:]) + combined = f"{result.stdout or ''}\n{result.stderr or ''}".strip() + preview = "\n".join(combined.splitlines()[-30:]) print("npm install failed.") if preview: print(preview) @@ -3399,10 +3423,10 @@ def _model_flow_named_custom(config, provider_info): print() print("Fetching available models...") - models = fetch_api_models( - api_key, base_url, timeout=8.0, - api_mode=api_mode or None, - ) + fetch_kwargs = {"timeout": 8.0} + if api_mode: + fetch_kwargs["api_mode"] = api_mode + models = fetch_api_models(api_key, base_url, **fetch_kwargs) if models: default_idx = 0 @@ -6477,13 +6501,29 @@ def _cmd_update_check(): if sys.platform == "win32": git_cmd = ["git", "-c", "windows.appendAtomically=false"] - print("→ Fetching from origin...") + # Fetch both origin and upstream; prefer upstream as the canonical reference + print("→ Fetching from upstream...") fetch_result = subprocess.run( - git_cmd + ["fetch", "origin"], + git_cmd + ["fetch", "upstream"], cwd=PROJECT_ROOT, capture_output=True, text=True, ) + if fetch_result.returncode != 0: + # Fallback to origin if upstream doesn't exist + print("→ Fetching from origin...") + fetch_result = subprocess.run( + git_cmd + ["fetch", "origin"], + cwd=PROJECT_ROOT, + capture_output=True, + text=True, + ) + upstream_exists = False + compare_branch = "origin/main" + else: + upstream_exists = True + compare_branch = "upstream/main" + if fetch_result.returncode != 0: stderr = fetch_result.stderr.strip() if "Could not resolve host" in stderr or "unable to access" in stderr: @@ -6491,13 +6531,13 @@ def _cmd_update_check(): elif "Authentication failed" in stderr or "could not read Username" in stderr: print("✗ Authentication failed — check your git credentials or SSH key.") else: - print("✗ Failed to fetch from origin.") + print("✗ Failed to fetch.") if stderr: print(f" {stderr.splitlines()[0]}") sys.exit(1) rev_result = subprocess.run( - git_cmd + ["rev-list", "HEAD..origin/main", "--count"], + git_cmd + ["rev-list", f"HEAD..{compare_branch}", "--count"], cwd=PROJECT_ROOT, capture_output=True, text=True, @@ -6509,7 +6549,7 @@ def _cmd_update_check(): print("✓ Already up to date.") else: commits_word = "commit" if behind == 1 else "commits" - print(f"⚕ Update available: {behind} {commits_word} behind origin/main.") + print(f"⚕ Update available: {behind} {commits_word} behind {compare_branch}.") from hermes_cli.config import recommended_update_command print(f" Run '{recommended_update_command()}' to install.") @@ -8897,6 +8937,7 @@ Examples: hermes debug share --lines 500 Include more log lines hermes debug share --expire 30 Keep paste for 30 days hermes debug share --local Print report locally (no upload) + hermes debug share --no-redact Disable upload-time secret redaction hermes debug delete <url> Delete a previously uploaded paste """, ) @@ -8922,6 +8963,16 @@ Examples: action="store_true", help="Print the report locally instead of uploading", ) + share_parser.add_argument( + "--no-redact", + action="store_true", + help=( + "Disable upload-time secret redaction (default: redact). Logs " + "are normally run through agent.redact.redact_sensitive_text " + "with force=True before upload so credentials are not leaked " + "into the public paste service." + ), + ) delete_parser = debug_sub.add_parser( "delete", help="Delete a paste uploaded by 'hermes debug share'", diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 4c323145da..c7edca0a07 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -904,6 +904,26 @@ def switch_model( if any(m.get("name") == new_model for m in cfg_models if isinstance(m, dict)): override = True break + # Also check custom_providers list — models declared there should be accepted + # even if the remote /v1/models endpoint doesn't list them. + if not override and custom_providers and isinstance(custom_providers, list): + for entry in custom_providers: + if not isinstance(entry, dict): + continue + # Match by provider slug (custom:<name>) or by base_url + entry_name = entry.get("name", "") + entry_slug = f"custom:{entry_name}" if entry_name else "" + entry_url = entry.get("base_url", "") + if entry_slug == target_provider or entry_url == base_url: + # Check if the requested model matches the entry's model + entry_model = entry.get("model", "") + entry_models = entry.get("models", {}) + if new_model == entry_model: + override = True + break + if isinstance(entry_models, dict) and new_model in entry_models: + override = True + break if override: validation = {"accepted": True, "persist": True, "recognized": False, "message": validation.get("message", "")} else: @@ -1244,11 +1264,7 @@ def list_authenticated_providers( from hermes_cli.auth import _load_auth_store store = _load_auth_store() providers_store = store.get("providers", {}) - pool_store = store.get("credential_pool", {}) - if store and ( - pid in providers_store or hermes_slug in providers_store - or pid in pool_store or hermes_slug in pool_store - ): + if store and (pid in providers_store or hermes_slug in providers_store): has_creds = True except Exception as exc: logger.debug("Auth store check failed for %s: %s", pid, exc) @@ -1344,11 +1360,7 @@ def list_authenticated_providers( from hermes_cli.auth import _load_auth_store _cp_store = _load_auth_store() _cp_providers_store = _cp_store.get("providers", {}) - _cp_pool_store = _cp_store.get("credential_pool", {}) - if _cp_store and ( - _cp.slug in _cp_providers_store - or _cp.slug in _cp_pool_store - ): + if _cp_store and _cp.slug in _cp_providers_store: _cp_has_creds = True except Exception: pass diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 755bac72e3..b1630b3d83 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -1740,10 +1740,20 @@ def model_supports_fast_mode(model_id: Optional[str]) -> bool: def _is_anthropic_fast_model(model_id: Optional[str]) -> bool: - """Return True if the model is a Claude model eligible for Anthropic Fast Mode.""" + """Return True if the model is a Claude model eligible for Anthropic Fast Mode. + + Fast mode is currently supported on Claude Opus 4.6 only. Per Anthropic's + docs (https://platform.claude.com/docs/en/build-with-claude/fast-mode): + "Fast mode is currently supported on Opus 4.6 only. Sending speed: fast + with an unsupported model returns an error." Opus 4.7 explicitly rejects + the ``speed`` parameter with HTTP 400. + """ raw = _strip_vendor_prefix(str(model_id or "")) base = raw.split(":")[0] - return base.startswith("claude-") + if not base.startswith("claude-"): + return False + # Only Opus 4.6 supports fast mode at present. + return "opus-4-6" in base or "opus-4.6" in base def resolve_fast_mode_overrides(model_id: Optional[str]) -> dict[str, Any] | None: @@ -3087,7 +3097,7 @@ def validate_requested_model( "message": f"Model `{requested}` was not found in LM Studio's model listing.", } - if normalized == "custom": + if normalized == "custom" or normalized.startswith("custom:"): # Try probing with correct auth for the api_mode. if api_mode == "anthropic_messages": probe = probe_api_models(api_key, base_url, api_mode=api_mode) @@ -3185,11 +3195,12 @@ def validate_requested_model( if suggestions: suggestion_text = "\n Similar models: " + ", ".join(f"`{s}`" for s in suggestions) return { - "accepted": False, - "persist": False, + "accepted": True, + "persist": True, "recognized": False, "message": ( - f"Model `{requested}` was not found in the OpenAI Codex model listing." + f"Note: `{requested}` was not found in the OpenAI Codex model listing. " + "It may still work if your ChatGPT/Codex account has access to a newer or hidden model ID." f"{suggestion_text}" ), } diff --git a/hermes_cli/profiles.py b/hermes_cli/profiles.py index dd5fabcec4..10cd36b88c 100644 --- a/hermes_cli/profiles.py +++ b/hermes_cli/profiles.py @@ -179,8 +179,33 @@ def _get_wrapper_dir() -> Path: # Validation # --------------------------------------------------------------------------- +def normalize_profile_name(name: str) -> str: + """Return the canonical profile id used on disk and in CLI ``-p`` argv. + + Named profiles are stored lowercase under ``profiles/<id>/``. The special + alias ``default`` is matched case-insensitively (``Default`` → ``default``). + Dashboards and tools may pass title-cased display labels; normalize before + validation, assignment, and subprocess spawn (see issue #18498). + """ + if not isinstance(name, str): + name = str(name) + stripped = name.strip() + if not stripped: + raise ValueError("profile name cannot be empty") + if stripped.casefold() == "default": + return "default" + return stripped.lower() + + def validate_profile_name(name: str) -> None: - """Raise ``ValueError`` if *name* is not a valid profile identifier.""" + """Raise ``ValueError`` if *name* is not a valid profile identifier. + + Validates the input as-given — strict lowercase match. Callers that accept + mixed-case or title-cased input from users (dashboard UI, CLI args) should + call :func:`normalize_profile_name` first. This separation keeps validate + honest about what the on-disk directory name must look like, while + ingress-point normalization handles UX flexibility (see #18498). + """ if name == "default": return # special alias for ~/.hermes if not _PROFILE_ID_RE.match(name): @@ -192,16 +217,18 @@ def validate_profile_name(name: str) -> None: def get_profile_dir(name: str) -> Path: """Resolve a profile name to its HERMES_HOME directory.""" - if name == "default": + canon = normalize_profile_name(name) + if canon == "default": return _get_default_hermes_home() - return _get_profiles_root() / name + return _get_profiles_root() / canon def profile_exists(name: str) -> bool: """Check whether a profile directory exists.""" - if name == "default": + canon = normalize_profile_name(name) + if canon == "default": return True - return get_profile_dir(name).is_dir() + return get_profile_dir(canon).is_dir() # --------------------------------------------------------------------------- @@ -213,28 +240,29 @@ def check_alias_collision(name: str) -> Optional[str]: Checks: reserved names, hermes subcommands, existing binaries in PATH. """ - if name in _RESERVED_NAMES: - return f"'{name}' is a reserved name" - if name in _HERMES_SUBCOMMANDS: - return f"'{name}' conflicts with a hermes subcommand" + canon = normalize_profile_name(name) + if canon in _RESERVED_NAMES: + return f"'{canon}' is a reserved name" + if canon in _HERMES_SUBCOMMANDS: + return f"'{canon}' conflicts with a hermes subcommand" # Check existing commands in PATH wrapper_dir = _get_wrapper_dir() try: result = subprocess.run( - ["which", name], capture_output=True, text=True, timeout=5, + ["which", canon], capture_output=True, text=True, timeout=5, ) if result.returncode == 0: existing_path = result.stdout.strip() # Allow overwriting our own wrappers - if existing_path == str(wrapper_dir / name): + if existing_path == str(wrapper_dir / canon): try: - content = (wrapper_dir / name).read_text() + content = (wrapper_dir / canon).read_text() if "hermes -p" in content: return None # it's our wrapper, safe to overwrite except Exception: pass - return f"'{name}' conflicts with an existing command ({existing_path})" + return f"'{canon}' conflicts with an existing command ({existing_path})" except (FileNotFoundError, subprocess.TimeoutExpired): pass @@ -252,6 +280,7 @@ def create_wrapper_script(name: str) -> Optional[Path]: Returns the path to the created wrapper, or None if creation failed. """ + canon = normalize_profile_name(name) wrapper_dir = _get_wrapper_dir() try: wrapper_dir.mkdir(parents=True, exist_ok=True) @@ -259,9 +288,9 @@ def create_wrapper_script(name: str) -> Optional[Path]: print(f"⚠ Could not create {wrapper_dir}: {e}") return None - wrapper_path = wrapper_dir / name + wrapper_path = wrapper_dir / canon try: - wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {name} "$@"\n') + wrapper_path.write_text(f'#!/bin/sh\nexec hermes -p {canon} "$@"\n') wrapper_path.chmod(wrapper_path.stat().st_mode | stat.S_IEXEC | stat.S_IXGRP | stat.S_IXOTH) return wrapper_path except OSError as e: @@ -271,7 +300,7 @@ def create_wrapper_script(name: str) -> Optional[Path]: def remove_wrapper_script(name: str) -> bool: """Remove the wrapper script for a profile. Returns True if removed.""" - wrapper_path = _get_wrapper_dir() / name + wrapper_path = _get_wrapper_dir() / normalize_profile_name(name) if wrapper_path.exists(): try: # Verify it's our wrapper before removing @@ -421,16 +450,17 @@ def create_profile( Path The newly created profile directory. """ - validate_profile_name(name) + canon = normalize_profile_name(name) + validate_profile_name(canon) - if name == "default": + if canon == "default": raise ValueError( "Cannot create a profile named 'default' — it is the built-in profile (~/.hermes)." ) - profile_dir = get_profile_dir(name) + profile_dir = get_profile_dir(canon) if profile_dir.exists(): - raise FileExistsError(f"Profile '{name}' already exists at {profile_dir}") + raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}") # Resolve clone source source_dir = None @@ -440,6 +470,7 @@ def create_profile( from hermes_constants import get_hermes_home source_dir = get_hermes_home() else: + clone_from = normalize_profile_name(clone_from) validate_profile_name(clone_from) source_dir = get_profile_dir(clone_from) if not source_dir.is_dir(): @@ -540,24 +571,25 @@ def delete_profile(name: str, yes: bool = False) -> Path: Returns the path that was removed. """ - validate_profile_name(name) + canon = normalize_profile_name(name) + validate_profile_name(canon) - if name == "default": + if canon == "default": raise ValueError( "Cannot delete the default profile (~/.hermes).\n" "To remove everything, use: hermes uninstall" ) - profile_dir = get_profile_dir(name) + profile_dir = get_profile_dir(canon) if not profile_dir.is_dir(): - raise FileNotFoundError(f"Profile '{name}' does not exist.") + raise FileNotFoundError(f"Profile '{canon}' does not exist.") # Show what will be deleted model, provider = _read_config_model(profile_dir) gw_running = _check_gateway_running(profile_dir) skill_count = _count_skills(profile_dir) - print(f"\nProfile: {name}") + print(f"\nProfile: {canon}") print(f"Path: {profile_dir}") if model: print(f"Model: {model}" + (f" ({provider})" if provider else "")) @@ -569,7 +601,7 @@ def delete_profile(name: str, yes: bool = False) -> Path: ] # Check for service - wrapper_path = _get_wrapper_dir() / name + wrapper_path = _get_wrapper_dir() / canon has_wrapper = wrapper_path.exists() if has_wrapper: items.append(f"Command alias ({wrapper_path})") @@ -584,16 +616,16 @@ def delete_profile(name: str, yes: bool = False) -> Path: if not yes: print() try: - confirm = input(f"Type '{name}' to confirm: ").strip() + confirm = input(f"Type '{canon}' to confirm: ").strip() except (KeyboardInterrupt, EOFError): print("\nCancelled.") return profile_dir - if confirm != name: + if confirm != canon: print("Cancelled.") return profile_dir # 1. Disable service (prevents auto-restart) - _cleanup_gateway_service(name, profile_dir) + _cleanup_gateway_service(canon, profile_dir) # 2. Stop running gateway if gw_running: @@ -601,7 +633,7 @@ def delete_profile(name: str, yes: bool = False) -> Path: # 3. Remove wrapper script if has_wrapper: - if remove_wrapper_script(name): + if remove_wrapper_script(canon): print(f"✓ Removed {wrapper_path}") # 4. Remove profile directory @@ -614,13 +646,13 @@ def delete_profile(name: str, yes: bool = False) -> Path: # 5. Clear active_profile if it pointed to this profile try: active = get_active_profile() - if active == name: + if active == canon: set_active_profile("default") print("✓ Active profile reset to default") except Exception: pass - print(f"\nProfile '{name}' deleted.") + print(f"\nProfile '{canon}' deleted.") return profile_dir @@ -730,22 +762,23 @@ def set_active_profile(name: str) -> None: Writes to ``~/.hermes/active_profile``. Use ``"default"`` to clear. """ - validate_profile_name(name) - if name != "default" and not profile_exists(name): + canon = normalize_profile_name(name) + validate_profile_name(canon) + if canon != "default" and not profile_exists(canon): raise FileNotFoundError( - f"Profile '{name}' does not exist. " - f"Create it with: hermes profile create {name}" + f"Profile '{canon}' does not exist. " + f"Create it with: hermes profile create {canon}" ) path = _get_active_profile_path() path.parent.mkdir(parents=True, exist_ok=True) - if name == "default": + if canon == "default": # Remove the file to indicate default path.unlink(missing_ok=True) else: # Atomic write tmp = path.with_suffix(".tmp") - tmp.write_text(name + "\n") + tmp.write_text(canon + "\n") tmp.replace(path) @@ -811,16 +844,17 @@ def export_profile(name: str, output_path: str) -> Path: """ import tempfile - validate_profile_name(name) - profile_dir = get_profile_dir(name) + canon = normalize_profile_name(name) + validate_profile_name(canon) + profile_dir = get_profile_dir(canon) if not profile_dir.is_dir(): - raise FileNotFoundError(f"Profile '{name}' does not exist.") + raise FileNotFoundError(f"Profile '{canon}' does not exist.") output = Path(output_path) # shutil.make_archive wants the base name without extension base = str(output).removesuffix(".tar.gz").removesuffix(".tgz") - if name == "default": + if canon == "default": # The default profile IS ~/.hermes itself — its parent is ~/ and its # directory name is ".hermes", not "default". We stage a clean copy # under a temp dir so the archive contains ``default/...``. @@ -836,14 +870,14 @@ def export_profile(name: str, output_path: str) -> Path: # Named profiles — stage a filtered copy to exclude credentials with tempfile.TemporaryDirectory() as tmpdir: - staged = Path(tmpdir) / name + staged = Path(tmpdir) / canon _CREDENTIAL_FILES = {"auth.json", ".env"} shutil.copytree( profile_dir, staged, ignore=lambda d, contents: _CREDENTIAL_FILES & set(contents), ) - result = shutil.make_archive(base, "gztar", tmpdir, name) + result = shutil.make_archive(base, "gztar", tmpdir, canon) return Path(result) @@ -952,16 +986,17 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path: # Archives exported from the default profile have "default/" as top-level # dir. Importing as "default" would target ~/.hermes itself — disallow # that and guide the user toward a named profile. - if inferred_name == "default": + canon = normalize_profile_name(inferred_name) + validate_profile_name(canon) + if canon == "default": raise ValueError( "Cannot import as 'default' — that is the built-in root profile (~/.hermes). " "Specify a different name: hermes profile import <archive> --name <name>" ) - validate_profile_name(inferred_name) - profile_dir = get_profile_dir(inferred_name) + profile_dir = get_profile_dir(canon) if profile_dir.exists(): - raise FileExistsError(f"Profile '{inferred_name}' already exists at {profile_dir}") + raise FileExistsError(f"Profile '{canon}' already exists at {profile_dir}") profiles_root = _get_profiles_root() profiles_root.mkdir(parents=True, exist_ok=True) @@ -977,8 +1012,8 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path: ) final_source = extracted - if archive_root != inferred_name: - final_source = staging_root / inferred_name + if archive_root != canon: + final_source = staging_root / canon extracted.rename(final_source) shutil.move(str(final_source), str(profile_dir)) @@ -1048,25 +1083,27 @@ def rename_profile(old_name: str, new_name: str) -> Path: Returns the new profile directory. """ - validate_profile_name(old_name) - validate_profile_name(new_name) + old_canon = normalize_profile_name(old_name) + new_canon = normalize_profile_name(new_name) + validate_profile_name(old_canon) + validate_profile_name(new_canon) - if old_name == "default": + if old_canon == "default": raise ValueError("Cannot rename the default profile.") - if new_name == "default": + if new_canon == "default": raise ValueError("Cannot rename to 'default' — it is reserved.") - old_dir = get_profile_dir(old_name) - new_dir = get_profile_dir(new_name) + old_dir = get_profile_dir(old_canon) + new_dir = get_profile_dir(new_canon) if not old_dir.is_dir(): - raise FileNotFoundError(f"Profile '{old_name}' does not exist.") + raise FileNotFoundError(f"Profile '{old_canon}' does not exist.") if new_dir.exists(): - raise FileExistsError(f"Profile '{new_name}' already exists.") + raise FileExistsError(f"Profile '{new_canon}' already exists.") # 1. Stop gateway if running if _check_gateway_running(old_dir): - _cleanup_gateway_service(old_name, old_dir) + _cleanup_gateway_service(old_canon, old_dir) _stop_gateway_process(old_dir) # 2. Rename directory @@ -1074,22 +1111,22 @@ def rename_profile(old_name: str, new_name: str) -> Path: print(f"✓ Renamed {old_dir.name} → {new_dir.name}") # 3. Update profile-scoped Honcho host blocks, preserving aiPeer identity - _migrate_honcho_profile_host(old_name, new_name, new_dir) + _migrate_honcho_profile_host(old_canon, new_canon, new_dir) # 4. Update wrapper script - remove_wrapper_script(old_name) - collision = check_alias_collision(new_name) + remove_wrapper_script(old_canon) + collision = check_alias_collision(new_canon) if not collision: - create_wrapper_script(new_name) - print(f"✓ Alias updated: {new_name}") + create_wrapper_script(new_canon) + print(f"✓ Alias updated: {new_canon}") else: - print(f"⚠ Cannot create alias '{new_name}' — {collision}") + print(f"⚠ Cannot create alias '{new_canon}' — {collision}") # 5. Update active_profile if it pointed to old name try: - if get_active_profile() == old_name: - set_active_profile(new_name) - print(f"✓ Active profile updated: {new_name}") + if get_active_profile() == old_canon: + set_active_profile(new_canon) + print(f"✓ Active profile updated: {new_canon}") except Exception: pass @@ -1191,13 +1228,14 @@ def resolve_profile_env(profile_name: str) -> str: Called early in the CLI entry point, before any hermes modules are imported, to set the HERMES_HOME environment variable. """ - validate_profile_name(profile_name) - profile_dir = get_profile_dir(profile_name) + canon = normalize_profile_name(profile_name) + validate_profile_name(canon) + profile_dir = get_profile_dir(canon) - if profile_name != "default" and not profile_dir.is_dir(): + if canon != "default" and not profile_dir.is_dir(): raise FileNotFoundError( - f"Profile '{profile_name}' does not exist. " - f"Create it with: hermes profile create {profile_name}" + f"Profile '{canon}' does not exist. " + f"Create it with: hermes profile create {canon}" ) return str(profile_dir) diff --git a/hermes_cli/pty_bridge.py b/hermes_cli/pty_bridge.py index 9a8a73badd..66fdb4ac72 100644 --- a/hermes_cli/pty_bridge.py +++ b/hermes_cli/pty_bridge.py @@ -108,9 +108,14 @@ class PtyBridge: "(or pip install -e '.[pty]')." ) raise PtyUnavailableError("Pseudo-terminals are unavailable.") - # Let caller-supplied env fully override inheritance; if they pass - # None we inherit the server's env (same semantics as subprocess). - spawn_env = os.environ.copy() if env is None else env + # PTY-hosted programs expect TERM to describe the terminal type. + # CI often runs without TERM in the parent process, which makes + # simple terminal probes like `tput cols` fail before winsize reads. + # Preserve explicit caller overrides, but backfill a sensible default + # when TERM is missing or blank. + spawn_env = (os.environ.copy() if env is None else env.copy()) + if not spawn_env.get("TERM"): + spawn_env["TERM"] = "xterm-256color" proc = ptyprocess.PtyProcess.spawn( # type: ignore[union-attr] list(argv), cwd=cwd, diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 31cb846012..63f5267ddf 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -964,7 +964,8 @@ def setup_model_provider(config: dict, *, quick: bool = False): ) else: _selected_vision_model = prompt(" Vision model (blank = use main/custom default)").strip() - save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model) + if _selected_vision_model: + save_env_value("AUXILIARY_VISION_MODEL", _selected_vision_model) print_success( f"Vision configured with {_base_url}" + (f" ({_selected_vision_model})" if _selected_vision_model else "") @@ -1328,15 +1329,13 @@ def setup_terminal_backend(config: dict): print_success("Terminal backend: Local") print_info("Commands run directly on this machine.") - # CWD for messaging + # Gateway/cron working directory print() - print_info("Working directory for messaging sessions:") - print_info(" When using Hermes via Telegram/Discord, this is where") - print_info( - " the agent starts. CLI mode always starts in the current directory." - ) + print_info("Gateway working directory:") + print_info(" Used by Telegram/Discord/cron sessions.") + print_info(" CLI/TUI always uses your launch directory instead.") current_cwd = cfg_get(config, "terminal", "cwd", default="") - cwd = prompt(" Messaging working directory", current_cwd or str(Path.home())) + cwd = prompt(" Gateway working directory", current_cwd or str(Path.home())) if cwd: config["terminal"]["cwd"] = cwd @@ -2049,6 +2048,16 @@ def _setup_slack(): print_warning("⚠️ No Slack allowlist set - unpaired users will be denied by default.") print_info(" Set SLACK_ALLOW_ALL_USERS=true or GATEWAY_ALLOW_ALL_USERS=true only if you intentionally want open workspace access.") + print() + print_info("📬 Home Channel: where Hermes delivers cron job results,") + print_info(" cross-platform messages, and notifications.") + print_info(" To get a channel ID: open the channel in Slack, then right-click") + print_info(" the channel name → Copy link — the ID starts with C (e.g. C01ABC2DE3F).") + print_info(" You can also set this later by typing /set-home in a Slack channel.") + home_channel = prompt("Home channel ID (leave empty to set later with /set-home)") + if home_channel: + save_env_value("SLACK_HOME_CHANNEL", home_channel.strip()) + def _write_slack_manifest_and_instruct(): """Generate the Slack manifest, write it under HERMES_HOME, and print @@ -2995,6 +3004,21 @@ def run_setup_wizard(args): config = load_config() hermes_home = get_hermes_home() + # Back up existing config before setup modifies it (#3522) + config_path = get_config_path() + if config_path.exists(): + from datetime import datetime as _dt + _backup_path = config_path.with_suffix( + f".yaml.bak.{_dt.now().strftime('%Y%m%d_%H%M%S')}" + ) + try: + import shutil + shutil.copy2(config_path, _backup_path) + except Exception: + _backup_path = None + else: + _backup_path = None + # Detect non-interactive environments (headless SSH, Docker, CI/CD) non_interactive = getattr(args, 'non_interactive', False) if not non_interactive and not is_interactive_stdin(): @@ -3164,6 +3188,10 @@ def run_setup_wizard(args): # Save and show summary save_config(config) + if _backup_path and _backup_path.exists(): + print_info(f"Previous config backed up to: {_backup_path}") + print_info("If setup changed a value you customized, restore it with:") + print_info(f" cp {_backup_path} {config_path}") _print_setup_summary(config, hermes_home) _offer_launch_chat() diff --git a/hermes_cli/status.py b/hermes_cli/status.py index 38b22a03eb..9a40c8d9b7 100644 --- a/hermes_cli/status.py +++ b/hermes_cli/status.py @@ -122,11 +122,16 @@ def show_status(args): print() print(color("◆ API Keys", Colors.CYAN, Colors.BOLD)) - keys = { + # Values may be a single env var name (str) or a tuple of alternates (first found wins). + keys: dict[str, str | tuple[str, ...]] = { "OpenRouter": "OPENROUTER_API_KEY", "OpenAI": "OPENAI_API_KEY", - "NVIDIA": "NVIDIA_API_KEY", - "Z.AI/GLM": "GLM_API_KEY", + "Anthropic": ("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN"), + "Google / Gemini": ("GOOGLE_API_KEY", "GEMINI_API_KEY"), + "DeepSeek": "DEEPSEEK_API_KEY", + "xAI / Grok": "XAI_API_KEY", + "NVIDIA NIM": "NVIDIA_API_KEY", + "Z.AI / GLM": "GLM_API_KEY", "Kimi": "KIMI_API_KEY", "StepFun Step Plan": "STEPFUN_API_KEY", "MiniMax": "MINIMAX_API_KEY", @@ -142,8 +147,23 @@ def show_status(args): "GitHub": "GITHUB_TOKEN", } - for name, env_var in keys.items(): - value = get_env_value(env_var) or "" + def _resolve_env(env_ref) -> str: + """Return first non-empty env var value from a str or tuple of names.""" + if isinstance(env_ref, tuple): + for candidate in env_ref: + v = get_env_value(candidate) or "" + if v: + return v + return "" + return get_env_value(env_ref) or "" + + for name, env_ref in keys.items(): + # Anthropic already has a dedicated lookup below; keep that as the + # single source of truth (it also resolves OAuth tokens), skip here + # so we don't print two "Anthropic" rows. + if name == "Anthropic": + continue + value = _resolve_env(env_ref) has_key = bool(value) display = redact_key(value) if not show_all else value print(f" {name:<12} {check_mark(has_key)} {display}") diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index b3df18d932..14d82caa65 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -56,6 +56,7 @@ CONFIGURABLE_TOOLSETS = [ ("file", "📁 File Operations", "read, write, patch, search"), ("code_execution", "⚡ Code Execution", "execute_code"), ("vision", "👁️ Vision / Image Analysis", "vision_analyze"), + ("video", "🎬 Video Analysis", "video_analyze (requires video-capable model)"), ("image_gen", "🎨 Image Generation", "image_generate"), ("moa", "🧠 Mixture of Agents", "mixture_of_agents"), ("tts", "🔊 Text-to-Speech", "text_to_speech"), @@ -78,7 +79,7 @@ CONFIGURABLE_TOOLSETS = [ # Toolsets that are OFF by default for new installs. # They're still in _HERMES_CORE_TOOLS (available at runtime if enabled), # but the setup checklist won't pre-select them for first-time users. -_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin"} +_DEFAULT_OFF_TOOLSETS = {"moa", "homeassistant", "rl", "spotify", "discord", "discord_admin", "video"} # Platform-scoped toolsets: only appear in the `hermes tools` checklist for # these platforms, and only resolve/save for these platforms. A toolset @@ -1919,21 +1920,27 @@ def _reconfigure_provider(provider: dict, config: dict): return if provider.get("tts_provider"): - config.setdefault("tts", {})["provider"] = provider["tts_provider"] + tts_cfg = config.setdefault("tts", {}) + tts_cfg["provider"] = provider["tts_provider"] + tts_cfg["use_gateway"] = bool(managed_feature) _print_success(f" TTS provider set to: {provider['tts_provider']}") if "browser_provider" in provider: bp = provider["browser_provider"] + browser_cfg = config.setdefault("browser", {}) if bp == "local": - config.setdefault("browser", {})["cloud_provider"] = "local" + browser_cfg["cloud_provider"] = "local" _print_success(" Browser set to local mode") elif bp: - config.setdefault("browser", {})["cloud_provider"] = bp + browser_cfg["cloud_provider"] = bp _print_success(f" Browser cloud provider set to: {bp}") + browser_cfg["use_gateway"] = bool(managed_feature) # Set web search backend in config if applicable if provider.get("web_backend"): - config.setdefault("web", {})["backend"] = provider["web_backend"] + web_cfg = config.setdefault("web", {}) + web_cfg["backend"] = provider["web_backend"] + web_cfg["use_gateway"] = bool(managed_feature) _print_success(f" Web backend set to: {provider['web_backend']}") if managed_feature and managed_feature not in ("web", "tts", "browser"): diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index ac46adf207..e5b5730b87 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -510,10 +510,23 @@ except (ValueError, TypeError): ) _GATEWAY_HEALTH_TIMEOUT = 3.0 +# DEPRECATED (scheduled for removal): GATEWAY_HEALTH_URL / GATEWAY_HEALTH_TIMEOUT. +# Cross-container / cross-host gateway liveness detection will be folded into a +# first-class dashboard config key so it's no longer Docker-adjacent lore buried +# in env vars. The env vars still work for now so existing Compose deployments +# don't break. Do not add new callers — wire new uses through the planned +# config surface. + def _probe_gateway_health() -> tuple[bool, dict | None]: """Probe the gateway via its HTTP health endpoint (cross-container). + .. deprecated:: + Driven by the deprecated ``GATEWAY_HEALTH_URL`` / + ``GATEWAY_HEALTH_TIMEOUT`` env vars. Scheduled for removal alongside + a move to a first-class dashboard config key. See + :data:`_GATEWAY_HEALTH_URL` for context. + Uses ``/health/detailed`` first (returns full state), falling back to the simpler ``/health`` endpoint. Returns ``(is_alive, body_dict)``. diff --git a/model_tools.py b/model_tools.py index 2eb31ab0df..8721e9ee6a 100644 --- a/model_tools.py +++ b/model_tools.py @@ -511,6 +511,12 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]: Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``, and union types (``"type": ["integer", "string"]``). + + Also wraps bare scalar values in a single-element list when the schema + declares ``"type": "array"``. Open-weight models (DeepSeek, Qwen, GLM) + sometimes emit ``{"urls": "https://a.com"}`` when the tool expects + ``{"urls": ["https://a.com"]}``; wrapping here avoids a confusing tool + failure on what is otherwise a well-formed call. """ if not args or not isinstance(args, dict): return args @@ -523,13 +529,42 @@ def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]: if not properties: return args - for key, value in args.items(): - if not isinstance(value, str): - continue + for key, value in list(args.items()): prop_schema = properties.get(key) if not prop_schema: continue expected = prop_schema.get("type") + + # Wrap bare non-list values when the schema declares ``array``. + # Strings still go through _coerce_value first so JSON-encoded + # arrays (``'["a","b"]'``) get parsed and nullable ``"null"`` + # becomes ``None`` rather than ``["null"]``. + # ``None`` itself is preserved — we don't know whether the model + # meant "omit" or "empty list", and tools with sensible defaults + # (e.g. read_file's normalize_read_pagination) already handle it. + if expected == "array" and value is not None and not isinstance(value, (list, tuple)): + if isinstance(value, str): + coerced = _coerce_value(value, expected, schema=prop_schema) + if coerced is not value: + # _coerce_value handled it (JSON-parsed list or + # nullable "null" → None). + args[key] = coerced + continue + args[key] = [value] + logger.info( + "coerce_tool_args: wrapped bare string in list for %s.%s", + tool_name, key, + ) + continue + args[key] = [value] + logger.info( + "coerce_tool_args: wrapped bare %s in list for %s.%s", + type(value).__name__, tool_name, key, + ) + continue + + if not isinstance(value, str): + continue if not expected and not _schema_allows_null(prop_schema): continue coerced = _coerce_value(value, expected, schema=prop_schema) diff --git a/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md b/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md index 5a4cb207a2..95eaeb33b6 100644 --- a/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md +++ b/optional-skills/creative/kanban-video-orchestrator/references/role-archetypes.md @@ -82,14 +82,14 @@ film and music video. Often pairs with a diagramming tool. Designs the visual language: framing, color, motion, transitions. Reviews generator output for visual consistency. Hands off per-scene `VISUAL_SPEC.md`. -- **Toolsets:** kanban, terminal, file +- **Toolsets:** kanban, terminal, file, video, vision - **Skills:** `kanban-worker` plus the visual skill that matches the project (e.g., `ascii-video` for ASCII work, `manim-video` for explainers, `touchdesigner-mcp` for real-time visuals, etc.) - **Outputs:** `scenes/scene-NN/VISUAL_SPEC.md`, review comments on renderer tasks -- **Reviews via:** any media-analysis approach (Gemini multimodal, manual - inspection of clip thumbnails, ffprobe summaries) +- **Reviews via:** `video_analyze` (sends full clip to multimodal LLM for + native review), `vision_analyze` for spot-checking frames, ffprobe summaries ## Production roles @@ -247,10 +247,10 @@ specifically on what's off (pacing, sync, brand alignment, technical quality). Distinct from the cinematographer (who reviews visuals during production) and the editor (who reviews for assembly). -- **Toolsets:** kanban, terminal, file +- **Toolsets:** kanban, terminal, file, video, vision - **Skills:** `kanban-worker` -- **External tools:** any media-analysis approach (Gemini multimodal, - ffprobe, manual frame extraction) +- **Review tools:** `video_analyze` (native clip review via multimodal LLM), + `vision_analyze` (frame/thumbnail review), ffprobe - **Outputs:** `review-notes.md`, comments on tasks ### brand-cop diff --git a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md index 5c78c4ff3d..5a52d15ddd 100644 --- a/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md +++ b/optional-skills/creative/kanban-video-orchestrator/references/tool-matrix.md @@ -81,7 +81,16 @@ them directly. | Remotion CLI (`npx remotion render`) | React-based motion graphics | renderer-motion-graphics | | Manim CE (`manim`) | Math animation render (driven by `manim-video` skill's recipes) | renderer-manim | | Blender (`blender -b`) | 3D rendering (alternative to `blender-mcp`) | renderer-3d | -| Gemini multimodal / Claude vision | AI review of clips | reviewer, cinematographer, editor | + +## Built-in Hermes tools for media review + +These are native Hermes tools — not invoked via terminal but through their own +toolsets. Enable them per-profile by adding the toolset to the profile config. + +| Tool | Toolset | What it does | Profile that uses it | +|------|---------|--------------|----------------------| +| `video_analyze` | `video` (opt-in — `hermes tools enable video`) | Native video understanding — sends full clip to a multimodal LLM (Gemini via OpenRouter) for review without frame extraction. Supports mp4, webm, mov, avi, mkv. 50 MB cap. Model: `AUXILIARY_VIDEO_MODEL` env → `AUXILIARY_VISION_MODEL` fallback. | reviewer, cinematographer, editor | +| `vision_analyze` | `vision` (core — enabled by default) | Image/frame analysis — review stills, thumbnails, exported frames. Already available to all profiles without opt-in. | reviewer, cinematographer, concept-artist | ## Standard toolset configurations per role @@ -156,6 +165,8 @@ toolsets: - kanban - terminal - file + - video # video_analyze — review full clips natively + - vision # vision_analyze — review stills / exported frames skills: always_load: - kanban-worker @@ -246,6 +257,8 @@ toolsets: - kanban - terminal - file + - video # video_analyze — editor reviews assembled cuts natively + - vision # vision_analyze — spot-check frames skills: always_load: - kanban-worker @@ -259,14 +272,13 @@ For captioner add Whisper invocation patterns to the SOUL.md. ```yaml toolsets: - kanban - - terminal # for media inspection + - terminal # for media inspection (ffprobe, etc.) - file + - video # video_analyze — review full clips natively + - vision # vision_analyze — review stills / exported frames skills: always_load: - kanban-worker -env_required: - - OPENROUTER_API_KEY # if using Gemini multimodal review - # or ANTHROPIC_API_KEY if using Claude vision (already required globally) ``` ## API key requirements @@ -278,7 +290,7 @@ key is present in `~/.hermes/.env` (or macOS Keychain) before firing the kanban. |---------|---------|---------| | ElevenLabs | `ELEVENLABS_API_KEY` | voice-talent | | OpenAI | `OPENAI_API_KEY` | image-generator (DALL-E), voice-talent (TTS) | -| OpenRouter | `OPENROUTER_API_KEY` | reviewer, cinematographer, editor (Gemini multimodal review) | +| OpenRouter | `OPENROUTER_API_KEY` | reviewer, cinematographer, editor (`video_analyze` routes through `AUXILIARY_VIDEO_MODEL` → OpenRouter) | | FAL | `FAL_KEY` | image-generator (FAL flux models) | | Replicate | `REPLICATE_API_TOKEN` | image-generator (alternate provider) | | Runway | `RUNWAY_API_KEY` | image-to-video-generator | diff --git a/plugins/google_meet/node/server.py b/plugins/google_meet/node/server.py index a0d802dfdc..cff01d265f 100644 --- a/plugins/google_meet/node/server.py +++ b/plugins/google_meet/node/server.py @@ -43,7 +43,7 @@ class NodeServer: def __init__( self, - host: str = "0.0.0.0", + host: str = "127.0.0.1", port: int = 18789, token_path: Optional[Path] = None, display_name: str = "hermes-meet-node", @@ -76,6 +76,13 @@ class NodeServer: json.dumps({"token": tok, "generated_at": time.time()}, indent=2), encoding="utf-8", ) + # Restrict to owner-read-write only — the token grants full RPC + # access to the meet bot (start, transcribe, speak in meetings). + try: + tmp.chmod(0o600) + except (OSError, NotImplementedError): + # Best-effort on non-POSIX filesystems; mode is set on POSIX. + pass tmp.replace(self.token_path) self._token = tok return tok diff --git a/plugins/hermes-achievements/README.md b/plugins/hermes-achievements/README.md index dd360197e8..33641a9d72 100644 --- a/plugins/hermes-achievements/README.md +++ b/plugins/hermes-achievements/README.md @@ -11,6 +11,8 @@ Achievement system for the Hermes Dashboard: collectible, tiered badges generate The screenshots use temporary demo tier data to show the full visual range. The plugin itself reads real local Hermes session history by default. > **Update notice (2026-04-29):** If you installed this plugin before today, update to the latest version. The achievements scan path was refactored for much faster warm loads (snapshot cache + incremental checkpoint scan). +> +> **Share cards (2026-05-04, vendored in hermes-agent v0.4.0):** Unlocked achievement cards now have a "Share" button that renders a 1200×630 PNG share card (client-side canvas, no backend, no network) with Download + Copy-to-clipboard actions. Fits X/Twitter, Discord, LinkedIn, Bluesky link-preview dimensions. ## What it does diff --git a/plugins/hermes-achievements/dashboard/dist/index.js b/plugins/hermes-achievements/dashboard/dist/index.js index 56b9427e84..d30f34e11e 100644 --- a/plugins/hermes-achievements/dashboard/dist/index.js +++ b/plugins/hermes-achievements/dashboard/dist/index.js @@ -66,6 +66,296 @@ }); } + const TIER_HEX = { + "Copper": "#b87333", + "Silver": "#c0c7d2", + "Gold": "#f2c94c", + "Diamond": "#67e8f9", + "Olympian": "#c084fc", + }; + + function tierHex(tier) { + return TIER_HEX[tier] || "#67e8f9"; + } + + // Render a LUCIDE icon path fragment into a standalone SVG string with an + // explicit stroke color so it can be rasterized onto a <canvas> via Image. + // The normal render path uses stroke="currentColor" which browsers honor in + // DOM but NOT when the SVG is drawn to a canvas from a data URL. + function iconSvgForCanvas(iconKey, strokeColor) { + const paths = LUCIDE[iconKey] || LUCIDE.secret; + return "<svg xmlns=\"http://www.w3.org/2000/svg\" viewBox=\"0 0 24 24\" fill=\"none\" " + + "stroke=\"" + strokeColor + "\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\">" + + paths + "</svg>"; + } + + function loadSvgImage(svgString) { + return new Promise(function (resolve, reject) { + const blob = new Blob([svgString], { type: "image/svg+xml;charset=utf-8" }); + const url = URL.createObjectURL(blob); + const img = new Image(); + img.onload = function () { URL.revokeObjectURL(url); resolve(img); }; + img.onerror = function (e) { URL.revokeObjectURL(url); reject(e); }; + img.src = url; + }); + } + + function wrapText(ctx, text, maxWidth) { + const words = String(text || "").split(/\s+/).filter(Boolean); + const lines = []; + let current = ""; + for (let i = 0; i < words.length; i++) { + const candidate = current ? current + " " + words[i] : words[i]; + if (ctx.measureText(candidate).width <= maxWidth) { + current = candidate; + } else { + if (current) lines.push(current); + current = words[i]; + } + } + if (current) lines.push(current); + return lines; + } + + // Build a 1200x630 share card PNG for a single achievement. Returns a Blob. + // Pure client-side render via Canvas2D — no external deps, no network. + async function buildShareImage(achievement) { + const W = 1200; + const H = 630; + const canvas = document.createElement("canvas"); + canvas.width = W; + canvas.height = H; + const ctx = canvas.getContext("2d"); + + const tier = achievement.tier || achievement.next_tier || "Copper"; + const color = tierHex(tier); + + // Background: dark charcoal with a tier-tinted radial highlight on the + // top-left, echoing the card visual language. + ctx.fillStyle = "#0b0d11"; + ctx.fillRect(0, 0, W, H); + const bgGrad = ctx.createRadialGradient(260, 220, 60, 260, 220, 820); + bgGrad.addColorStop(0, color + "33"); + bgGrad.addColorStop(0.55, color + "0a"); + bgGrad.addColorStop(1, "#0b0d1100"); + ctx.fillStyle = bgGrad; + ctx.fillRect(0, 0, W, H); + + // Outer border + ctx.strokeStyle = color + "66"; + ctx.lineWidth = 2; + ctx.strokeRect(1, 1, W - 2, H - 2); + + // Icon block — 380x380 on the left + try { + const svg = iconSvgForCanvas(achievement.icon || "secret", color); + const iconImg = await loadSvgImage(svg); + const ix = 90; + const iy = 125; + const isize = 380; + // Icon glow + ctx.save(); + ctx.shadowColor = color; + ctx.shadowBlur = 40; + ctx.drawImage(iconImg, ix, iy, isize, isize); + ctx.restore(); + } catch (_) { + // Icon render failure is non-fatal; card still useful without it. + } + + // Right column text layout + const rx = 520; + const rMaxWidth = W - rx - 70; + + // Category label (kicker) + ctx.fillStyle = "#8b95a8"; + ctx.font = "600 22px ui-monospace, 'SF Mono', Menlo, monospace"; + ctx.textBaseline = "top"; + ctx.fillText((achievement.category || "").toUpperCase(), rx, 112); + + // Achievement name — wrap to 2 lines if needed + ctx.fillStyle = "#ffffff"; + ctx.font = "780 68px system-ui, -apple-system, 'Segoe UI', sans-serif"; + const nameLines = wrapText(ctx, achievement.name || "Achievement", rMaxWidth).slice(0, 2); + let cursorY = 150; + for (let i = 0; i < nameLines.length; i++) { + ctx.fillText(nameLines[i], rx, cursorY); + cursorY += 76; + } + + // Tier badge pill + const badgeLabel = tier.toUpperCase() + " TIER"; + ctx.font = "700 22px ui-monospace, 'SF Mono', Menlo, monospace"; + const badgeWidth = ctx.measureText(badgeLabel).width + 32; + const badgeX = rx; + const badgeY = cursorY + 14; + const badgeH = 40; + ctx.fillStyle = color + "1f"; + ctx.strokeStyle = color; + ctx.lineWidth = 1.5; + ctx.beginPath(); + ctx.rect(badgeX, badgeY, badgeWidth, badgeH); + ctx.fill(); + ctx.stroke(); + ctx.fillStyle = color; + ctx.textBaseline = "middle"; + ctx.fillText(badgeLabel, badgeX + 16, badgeY + badgeH / 2 + 1); + ctx.textBaseline = "top"; + + // Description — wrap up to 3 lines + ctx.fillStyle = "#c3cad6"; + ctx.font = "400 26px system-ui, -apple-system, 'Segoe UI', sans-serif"; + const descLines = wrapText(ctx, achievement.description || "", rMaxWidth).slice(0, 3); + let descY = badgeY + badgeH + 28; + for (let i = 0; i < descLines.length; i++) { + ctx.fillText(descLines[i], rx, descY); + descY += 34; + } + + // Progress / stat line (if meaningful) + const progressValue = achievement.progress; + const threshold = achievement.next_threshold; + let statLine = null; + if (progressValue && threshold) { + statLine = progressValue.toLocaleString() + " / " + threshold.toLocaleString(); + } else if (progressValue) { + statLine = progressValue.toLocaleString(); + } + if (statLine) { + ctx.fillStyle = color; + ctx.font = "700 28px ui-monospace, 'SF Mono', Menlo, monospace"; + ctx.fillText(statLine, rx, descY + 14); + } + + // Footer watermark + ctx.fillStyle = "#8b95a8"; + ctx.font = "600 20px ui-monospace, 'SF Mono', Menlo, monospace"; + ctx.textBaseline = "bottom"; + ctx.fillText("HERMES AGENT · hermes-agent.nousresearch.com", 70, H - 40); + + // "UNLOCKED" stamp upper-right + ctx.textBaseline = "top"; + ctx.fillStyle = color; + ctx.font = "800 24px ui-monospace, 'SF Mono', Menlo, monospace"; + const stamp = "◆ UNLOCKED"; + const stampW = ctx.measureText(stamp).width; + ctx.fillText(stamp, W - 70 - stampW, 70); + + return await new Promise(function (resolve, reject) { + canvas.toBlob(function (blob) { + if (blob) resolve(blob); else reject(new Error("canvas.toBlob returned null")); + }, "image/png"); + }); + } + + function ShareDialog({ achievement, onClose }) { + const [status, setStatus] = hooks.useState("rendering"); // rendering | ready | copied | error + const [errorMsg, setErrorMsg] = hooks.useState(null); + const [previewUrl, setPreviewUrl] = hooks.useState(null); + const blobRef = React.useRef(null); + + hooks.useEffect(function () { + let cancelled = false; + let createdUrl = null; + buildShareImage(achievement).then(function (blob) { + if (cancelled) return; + blobRef.current = blob; + createdUrl = URL.createObjectURL(blob); + setPreviewUrl(createdUrl); + setStatus("ready"); + }).catch(function (err) { + if (cancelled) return; + setErrorMsg(String(err && err.message || err)); + setStatus("error"); + }); + return function () { + cancelled = true; + if (createdUrl) URL.revokeObjectURL(createdUrl); + }; + }, [achievement.id]); + + function download() { + if (!blobRef.current) return; + const url = URL.createObjectURL(blobRef.current); + const a = document.createElement("a"); + a.href = url; + a.download = "hermes-achievement-" + (achievement.id || "badge") + ".png"; + document.body.appendChild(a); + a.click(); + a.remove(); + setTimeout(function () { URL.revokeObjectURL(url); }, 1000); + } + + async function copyToClipboard() { + if (!blobRef.current) return; + try { + if (!navigator.clipboard || !window.ClipboardItem) { + throw new Error("Clipboard image copy not supported in this browser — use Download instead."); + } + await navigator.clipboard.write([ + new window.ClipboardItem({ "image/png": blobRef.current }), + ]); + setStatus("copied"); + setTimeout(function () { setStatus("ready"); }, 1800); + } catch (err) { + setErrorMsg(String(err && err.message || err)); + setStatus("error"); + } + } + + // Build the pre-filled tweet text. Keep it short so X doesn't truncate + // when the user hasn't attached the PNG yet — they'll copy-image and + // paste in the same flow. + function tweetText() { + const tierPart = achievement.tier ? (achievement.tier + " tier ") : ""; + return "Just unlocked " + tierPart + "\"" + achievement.name + "\" in Hermes Agent ☤\n\n" + + "@NousResearch · https://hermes-agent.nousresearch.com"; + } + + function shareOnX() { + const url = "https://x.com/intent/post?text=" + encodeURIComponent(tweetText()); + window.open(url, "_blank", "noopener,noreferrer"); + } + + return React.createElement("div", { + className: "ha-share-backdrop", + onClick: function (e) { if (e.target === e.currentTarget) onClose(); }, + }, + React.createElement("div", { className: "ha-share-dialog", role: "dialog", "aria-label": "Share achievement" }, + React.createElement("div", { className: "ha-share-head" }, + React.createElement("strong", null, "Share: " + achievement.name), + React.createElement("button", { className: "ha-share-close", onClick: onClose, "aria-label": "Close" }, "×") + ), + React.createElement("div", { className: "ha-share-preview" }, + status === "rendering" && React.createElement("div", { className: "ha-share-placeholder" }, "Rendering…"), + previewUrl && React.createElement("img", { src: previewUrl, alt: achievement.name + " share card" }) + ), + status === "error" && React.createElement("div", { className: "ha-share-error" }, errorMsg || "Something went wrong."), + React.createElement("div", { className: "ha-share-actions" }, + React.createElement("button", { + className: "ha-share-btn ha-share-btn-primary", + onClick: shareOnX, + title: "Opens X with a pre-filled post", + }, "Share on X"), + React.createElement("button", { + className: "ha-share-btn", + onClick: copyToClipboard, + disabled: status !== "ready" && status !== "copied", + title: "Copy the image to paste into your post", + }, status === "copied" ? "Copied ✓" : "Copy image"), + React.createElement("button", { + className: "ha-share-btn", + onClick: download, + disabled: status !== "ready" && status !== "copied", + }, "Download PNG") + ), + React.createElement("p", { className: "ha-share-hint" }, + "Share on X opens a pre-filled post in a new tab. Click Copy image first if you want the 1200×630 badge attached — X lets you paste it right into the tweet composer. Download PNG saves the file for use anywhere." + ) + ) + ); + } + function StatCard(props) { return React.createElement(C.Card, { className: "ha-stat" }, React.createElement(C.CardContent, { className: "ha-stat-content" }, @@ -170,6 +460,7 @@ const targetTier = achievement.next_tier || achievement.tier; const tierLabel = achievement.tier ? achievement.tier : (targetTier ? "Target " + targetTier : (state === "secret" ? "Hidden" : (unlocked ? "Complete" : "Objective"))); const progressText = state === "secret" ? "hidden" : (progress + (achievement.next_threshold ? " / " + achievement.next_threshold : "")); + const [shareOpen, setShareOpen] = hooks.useState(false); return React.createElement(C.Card, { className: cn("ha-card", "ha-state-" + state, tierClass(achievement.tier || achievement.next_tier)) }, React.createElement(C.CardContent, { className: "ha-card-content" }, React.createElement("div", { className: "ha-card-head" }, @@ -180,7 +471,13 @@ ), React.createElement("div", { className: "ha-badges" }, React.createElement("span", { className: "ha-state-badge" }, stateLabel), - React.createElement("span", { className: "ha-tier-badge" }, tierLabel) + React.createElement("span", { className: "ha-tier-badge" }, tierLabel), + state === "unlocked" && React.createElement("button", { + className: "ha-share-trigger", + onClick: function () { setShareOpen(true); }, + title: "Share this achievement", + "aria-label": "Share " + achievement.name, + }, "Share") ) ), React.createElement("p", { className: "ha-description" }, achievement.description), @@ -200,7 +497,11 @@ ), React.createElement("span", { className: "ha-progress-text" }, progressText) ) - ) + ), + shareOpen && React.createElement(ShareDialog, { + achievement: achievement, + onClose: function () { setShareOpen(false); }, + }) ); } diff --git a/plugins/hermes-achievements/dashboard/dist/style.css b/plugins/hermes-achievements/dashboard/dist/style.css index fc0e138f4e..2b4321ec25 100644 --- a/plugins/hermes-achievements/dashboard/dist/style.css +++ b/plugins/hermes-achievements/dashboard/dist/style.css @@ -118,3 +118,29 @@ .ha-scan-banner-text p { margin: .25rem 0 0; font-size: .78rem; line-height: 1.35; color: var(--color-muted-foreground); text-transform: none; letter-spacing: normal; } .ha-scan-progress-track { height: .4rem; border: 1px solid color-mix(in srgb, #67e8f9 28%, var(--color-border)); background: rgba(0,0,0,.22); overflow: hidden; } .ha-scan-progress-fill { height: 100%; background: linear-gradient(90deg, #67e8f9, color-mix(in srgb, #67e8f9 48%, white)); transition: width .4s ease-out; } + +/* Share achievement — trigger button on unlocked cards + modal dialog. + * Added to the vendored bundle (on top of the upstream PCinkusz base). + * Canvas rendering is pure client-side, no backend, no network. + */ +.ha-share-trigger { border: 1px solid color-mix(in srgb, var(--ha-tier) 58%, var(--color-border)); color: var(--ha-tier); background: color-mix(in srgb, var(--ha-tier) 8%, transparent); padding: .18rem .42rem; font-size: .66rem; text-transform: uppercase; letter-spacing: .08em; font-family: var(--font-mono, ui-monospace, monospace); cursor: pointer; margin-top: .05rem; transition: background .12s ease, border-color .12s ease; } +.ha-share-trigger:hover { background: color-mix(in srgb, var(--ha-tier) 20%, transparent); border-color: var(--ha-tier); } +.ha-share-trigger:focus-visible { outline: 2px solid var(--ha-tier); outline-offset: 2px; } + +.ha-share-backdrop { position: fixed; inset: 0; z-index: 1000; background: rgba(4,6,10,.72); backdrop-filter: blur(6px); display: flex; align-items: center; justify-content: center; padding: 1.5rem; animation: ha-fade-in .14s ease-out; } +.ha-share-dialog { width: min(760px, 100%); max-height: calc(100vh - 3rem); overflow: auto; border: 1px solid color-mix(in srgb, var(--color-border) 70%, var(--color-ring)); background: color-mix(in srgb, var(--color-card) 94%, #000); box-shadow: 0 24px 60px rgba(0,0,0,.55); display: flex; flex-direction: column; gap: .9rem; padding: 1rem 1.1rem 1.1rem; } +.ha-share-head { display: flex; align-items: center; justify-content: space-between; gap: .75rem; } +.ha-share-head strong { font-size: .82rem; text-transform: uppercase; letter-spacing: .1em; font-family: var(--font-mono, ui-monospace, monospace); color: var(--color-foreground); } +.ha-share-close { width: 1.9rem; height: 1.9rem; display: grid; place-items: center; border: 1px solid var(--color-border); background: transparent; color: var(--color-muted-foreground); font-size: 1.1rem; cursor: pointer; line-height: 1; } +.ha-share-close:hover { color: var(--color-foreground); border-color: var(--color-ring); } +.ha-share-preview { position: relative; border: 1px solid var(--color-border); background: #0b0d11; overflow: hidden; aspect-ratio: 1200 / 630; } +.ha-share-preview img { display: block; width: 100%; height: 100%; object-fit: contain; } +.ha-share-placeholder { position: absolute; inset: 0; display: grid; place-items: center; color: var(--color-muted-foreground); font-family: var(--font-mono, ui-monospace, monospace); font-size: .82rem; text-transform: uppercase; letter-spacing: .1em; animation: ha-pulse 1.4s ease-in-out infinite; border-radius: 0; } +.ha-share-error { border: 1px solid #ef4444; color: #fecaca; background: color-mix(in srgb, #ef4444 10%, transparent); padding: .55rem .7rem; font-size: .78rem; font-family: var(--font-mono, ui-monospace, monospace); } +.ha-share-actions { display: flex; gap: .55rem; flex-wrap: wrap; } +.ha-share-btn { border: 1px solid var(--color-border); background: color-mix(in srgb, var(--color-card) 72%, transparent); color: var(--color-foreground); padding: .5rem .85rem; font-size: .82rem; font-family: var(--font-mono, ui-monospace, monospace); text-transform: uppercase; letter-spacing: .08em; cursor: pointer; transition: border-color .12s ease, background .12s ease; } +.ha-share-btn:hover:not(:disabled) { border-color: var(--color-ring); background: color-mix(in srgb, var(--color-primary) 16%, var(--color-card)); } +.ha-share-btn:disabled { opacity: .5; cursor: not-allowed; } +.ha-share-btn-primary { border-color: #ffffff; color: #ffffff; background: #000000; } +.ha-share-btn-primary:hover:not(:disabled) { background: #1a1a1a; border-color: #67e8f9; color: #67e8f9; } +.ha-share-hint { margin: 0; color: var(--color-muted-foreground); font-size: .76rem; line-height: 1.45; } diff --git a/plugins/hermes-achievements/dashboard/manifest.json b/plugins/hermes-achievements/dashboard/manifest.json index 02c4050f34..5fcc39313b 100644 --- a/plugins/hermes-achievements/dashboard/manifest.json +++ b/plugins/hermes-achievements/dashboard/manifest.json @@ -3,7 +3,7 @@ "label": "Achievements", "description": "Steam-style achievements for vibe coding and agentic Hermes workflows.", "icon": "Star", - "version": "0.3.1", + "version": "0.4.0", "tab": { "path": "/achievements", "position": "after:analytics" }, "entry": "dist/index.js", "css": "dist/style.css", diff --git a/plugins/image_gen/xai/__init__.py b/plugins/image_gen/xai/__init__.py index b1ec4368ef..93fd10ce39 100644 --- a/plugins/image_gen/xai/__init__.py +++ b/plugins/image_gen/xai/__init__.py @@ -203,11 +203,12 @@ class XAIImageGenProvider(ImageGenProvider): ) response.raise_for_status() except requests.HTTPError as exc: - status = exc.response.status_code if exc.response else 0 + response = exc.response + status = response.status_code if response is not None else 0 try: - err_msg = exc.response.json().get("error", {}).get("message", exc.response.text[:300]) + err_msg = response.json().get("error", {}).get("message", response.text[:300]) except Exception: - err_msg = exc.response.text[:300] if exc.response else str(exc) + err_msg = response.text[:300] if response is not None else str(exc) logger.error("xAI image gen failed (%d): %s", status, err_msg) return error_response( error=f"xAI image generation failed ({status}): {err_msg}", diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js index 1b37ef72d4..3bdd92d47e 100644 --- a/plugins/kanban/dashboard/dist/index.js +++ b/plugins/kanban/dashboard/dist/index.js @@ -63,6 +63,53 @@ const API = "/api/plugins/kanban"; const MIME_TASK = "text/x-hermes-task"; + // localStorage key for the user's selected board. Independent of the + // CLI's on-disk ``<root>/kanban/current`` pointer so browser users + // can inspect any board without shifting the CLI's active board out + // from under a terminal they left open. + const LS_BOARD_KEY = "hermes.kanban.selectedBoard"; + + function readSelectedBoard() { + try { + const v = window.localStorage.getItem(LS_BOARD_KEY); + return (v || "").trim() || null; + } catch (_e) { return null; } + } + + function writeSelectedBoard(slug) { + try { + if (slug && slug !== "default") window.localStorage.setItem(LS_BOARD_KEY, slug); + else window.localStorage.removeItem(LS_BOARD_KEY); + } catch (_e) { /* ignore quota / private mode */ } + } + + function withBoard(url, board) { + // Append ?board=<slug> when a non-default board is active. Omitted + // for default so the URL stays clean and the backend falls through + // to its own resolution chain (env var → ``current`` file → + // default) which is already correct. + if (!board || board === "default") return url; + const sep = url.indexOf("?") >= 0 ? "&" : "?"; + return `${url}${sep}board=${encodeURIComponent(board)}`; + } + + // The SDK's Select component fires ``onValueChange(value)`` directly + // (it's a shadcn-style popup, not a native <select>). Older plugin + // code calls ``onChange({target: {value}})`` which silently never + // fires. This helper wires both signatures so a setter works with + // either API — use it as: + // + // h(Select, {..., ...selectChangeHandler(setState), ...}) + function selectChangeHandler(setter) { + return { + onValueChange: function (v) { setter(v == null ? "" : v); }, + onChange: function (e) { + const v = e && e.target ? e.target.value : e; + setter(v == null ? "" : v); + }, + }; + } + // ------------------------------------------------------------------------- // Minimal safe markdown renderer. // @@ -245,7 +292,19 @@ // ------------------------------------------------------------------------- function KanbanPage() { - const [board, setBoard] = useState(null); + const [board, setBoard] = useState(() => readSelectedBoard() || "default"); + const [boardList, setBoardList] = useState([]); // [{slug, name, counts, ...}] + const [showNewBoard, setShowNewBoard] = useState(false); + + const [kanbanBoard, setKanbanBoard] = useState(null); // the grid data + // Alias so the rest of the function can keep using `board` semantically + // for the grid data (card columns + tenants + assignees) without + // colliding with the selected-board slug above. History: the old + // component had `const [board, setBoard]` for the grid data. We + // renamed the grid data to `kanbanBoard` so the more useful name + // (`board`) belongs to the selected slug. + const boardData = kanbanBoard; + const setBoardData = setKanbanBoard; const [config, setConfig] = useState(null); const [loading, setLoading] = useState(true); const [error, setError] = useState(null); @@ -292,9 +351,9 @@ if (tenantFilter) qs.set("tenant", tenantFilter); if (includeArchived) qs.set("include_archived", "true"); const url = qs.toString() ? `${API}/board?${qs}` : `${API}/board`; - return SDK.fetchJSON(url) + return SDK.fetchJSON(withBoard(url, board)) .then(function (data) { - setBoard(data); + setBoardData(data); cursorRef.current = data.latest_event_id || 0; setError(null); }) @@ -302,7 +361,26 @@ setError(String(err && err.message ? err.message : err)); }) .finally(function () { setLoading(false); }); - }, [tenantFilter, includeArchived]); + }, [tenantFilter, includeArchived, board]); + + // --- load list of boards for the switcher ------------------------------ + const loadBoardList = useCallback(function () { + return SDK.fetchJSON(`${API}/boards`) + .then(function (data) { + const boards = (data && data.boards) || []; + setBoardList(boards); + // If the stored slug isn't in the list any longer (board was + // deleted in the CLI while dashboard was open), fall back to + // default so the UI doesn't hang on a 404. + if (board !== "default" && !boards.find(function (b) { return b.slug === board; })) { + setBoard("default"); + writeSelectedBoard("default"); + } + }) + .catch(function () { /* non-fatal */ }); + }, [board]); + + useEffect(function () { loadBoardList(); }, [loadBoardList]); const scheduleReload = useCallback(function () { if (reloadTimerRef.current) return; @@ -324,16 +402,21 @@ // --- WebSocket --------------------------------------------------------- useEffect(function () { - if (!board) return undefined; + if (!boardData) return undefined; wsClosedRef.current = false; function openWs() { if (wsClosedRef.current) return; const token = window.__HERMES_SESSION_TOKEN__ || ""; const proto = window.location.protocol === "https:" ? "wss:" : "ws:"; - const qs = new URLSearchParams({ + const qsParams = { since: String(cursorRef.current || 0), token: token, - }); + }; + // Pin the WS stream to the currently-selected board so events + // from other boards don't bleed in. Only set for non-default so + // single-board installs keep the cleaner URL. + if (board && board !== "default") qsParams.board = board; + const qs = new URLSearchParams(qsParams); const url = `${proto}//${window.location.host}${API}/events?${qs}`; let ws; try { ws = new WebSocket(url); } catch (_e) { return; } @@ -372,11 +455,11 @@ wsClosedRef.current = true; try { wsRef.current && wsRef.current.close(); } catch (_e) { /* noop */ } }; - }, [!!board, scheduleReload]); + }, [!!boardData, board, scheduleReload]); // --- filtering ---------------------------------------------------------- const filteredBoard = useMemo(function () { - if (!board) return null; + if (!boardData) return null; const q = search.trim().toLowerCase(); const filterTask = function (t) { if (assigneeFilter && t.assignee !== assigneeFilter) return false; @@ -386,18 +469,18 @@ } return true; }; - return Object.assign({}, board, { - columns: board.columns.map(function (col) { + return Object.assign({}, boardData, { + columns: boardData.columns.map(function (col) { return Object.assign({}, col, { tasks: col.tasks.filter(filterTask) }); }), }); - }, [board, assigneeFilter, search]); + }, [boardData, assigneeFilter, search]); // --- actions ------------------------------------------------------------ const moveTask = useCallback(function (taskId, newStatus) { const confirmMsg = DESTRUCTIVE_TRANSITIONS[newStatus]; if (confirmMsg && !window.confirm(confirmMsg)) return; - setBoard(function (b) { + setBoardData(function (b) { if (!b) return b; let moved = null; const columns = b.columns.map(function (col) { @@ -413,7 +496,7 @@ } return Object.assign({}, b, { columns }); }); - SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(taskId)}`, { + SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(taskId)}`, board), { method: "PATCH", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ status: newStatus }), @@ -421,10 +504,10 @@ setError(`Move failed: ${err.message || err}`); loadBoard(); }); - }, [loadBoard]); + }, [loadBoard, board]); const createTask = useCallback(function (body) { - return SDK.fetchJSON(`${API}/tasks`, { + return SDK.fetchJSON(withBoard(`${API}/tasks`, board), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -437,9 +520,10 @@ setError("Task created, but: " + res.warning); } loadBoard(); + loadBoardList(); // refresh counts in the switcher return res; }); - }, [loadBoard]); + }, [loadBoard, loadBoardList, board]); const toggleSelected = useCallback(function (id, additive) { setSelectedIds(function (prev) { @@ -455,7 +539,7 @@ if (selectedIds.size === 0) return; if (confirmMsg && !window.confirm(confirmMsg)) return; const body = Object.assign({ ids: Array.from(selectedIds) }, patch); - SDK.fetchJSON(`${API}/tasks/bulk`, { + SDK.fetchJSON(withBoard(`${API}/tasks/bulk`, board), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify(body), @@ -470,14 +554,50 @@ loadBoard(); }) .catch(function (e) { setError(String(e.message || e)); }); - }, [selectedIds, loadBoard, clearSelected]); + }, [selectedIds, loadBoard, clearSelected, board]); + + // --- board switching ---------------------------------------------------- + const switchBoard = useCallback(function (nextSlug) { + if (!nextSlug || nextSlug === board) return; + // Optimistic UI: clear the current grid + show loading, reset the + // event cursor so the WS reopens aligned to the new board's + // latest_event_id on the next loadBoard. + setBoardData(null); + cursorRef.current = 0; + setLoading(true); + setBoard(nextSlug); + writeSelectedBoard(nextSlug); + }, [board]); + + const createNewBoard = useCallback(function (payload) { + return SDK.fetchJSON(`${API}/boards`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(payload), + }).then(function (res) { + loadBoardList(); + const slug = res && res.board && res.board.slug; + if (slug && payload.switch) switchBoard(slug); + return res; + }); + }, [loadBoardList, switchBoard]); + + const deleteBoard = useCallback(function (slug) { + if (!slug || slug === "default") return Promise.resolve(); + return SDK.fetchJSON(`${API}/boards/${encodeURIComponent(slug)}`, { + method: "DELETE", + }).then(function () { + loadBoardList(); + if (board === slug) switchBoard("default"); + }); + }, [board, loadBoardList, switchBoard]); // --- render ------------------------------------------------------------- - if (loading && !board) { + if (loading && !boardData) { return h("div", { className: "p-8 text-sm text-muted-foreground" }, "Loading Kanban board…"); } - if (error && !board) { + if (error && !boardData) { return h(Card, null, h(CardContent, { className: "p-6" }, h("div", { className: "text-sm text-destructive" }, @@ -493,15 +613,28 @@ return h(ErrorBoundary, null, h("div", { className: "hermes-kanban flex flex-col gap-4" }, - h(BoardToolbar, { + h(BoardSwitcher, { board: board, + boardList: boardList, + onSwitch: switchBoard, + onNewClick: function () { setShowNewBoard(true); }, + onDeleteBoard: deleteBoard, + }), + showNewBoard ? h(NewBoardDialog, { + onCancel: function () { setShowNewBoard(false); }, + onCreate: function (payload) { + return createNewBoard(payload).then(function () { setShowNewBoard(false); }); + }, + }) : null, + h(BoardToolbar, { + board: boardData, tenantFilter, setTenantFilter, assigneeFilter, setAssigneeFilter, includeArchived, setIncludeArchived, laneByProfile, setLaneByProfile, search, setSearch, onNudgeDispatch: function () { - SDK.fetchJSON(`${API}/dispatch?max=8`, { method: "POST" }) + SDK.fetchJSON(withBoard(`${API}/dispatch?max=8`, board), { method: "POST" }) .then(loadBoard) .catch(function (e) { setError(String(e.message || e)); }); }, @@ -509,7 +642,7 @@ }), selectedIds.size > 0 ? h(BulkActionBar, { count: selectedIds.size, - assignees: (board && board.assignees) || [], + assignees: (boardData && boardData.assignees) || [], onApply: applyBulk, onClear: clearSelected, }) : null, @@ -522,20 +655,215 @@ onMove: moveTask, onOpen: setSelectedTaskId, onCreate: createTask, - allTasks: board.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), + allTasks: boardData.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), }), selectedTaskId ? h(TaskDrawer, { taskId: selectedTaskId, + boardSlug: board, onClose: function () { setSelectedTaskId(null); }, onRefresh: loadBoard, renderMarkdown: renderMd, - allTasks: board.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), + allTasks: boardData.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), eventTick: taskEventTick[selectedTaskId] || 0, }) : null, ), ); } + // ------------------------------------------------------------------------- + // Board switcher (multi-project) + // ------------------------------------------------------------------------- + + function BoardSwitcher(props) { + const list = props.boardList || []; + const current = list.find(function (b) { return b.slug === props.board; }); + const currentName = current && current.name ? current.name : props.board; + const currentTotal = current ? current.total : 0; + const hasMultipleBoards = list.length > 1; + + // Hide entirely when only the default board exists AND it's empty — + // single-project users never see boards UI unless they ask for it. + // We show the [+ New board] affordance as soon as any board has a + // task (so the user can discover multi-project before they need it) + // OR when any non-default board exists. + const totalAcrossAllBoards = list.reduce(function (n, b) { return n + (b.total || 0); }, 0); + const shouldShow = hasMultipleBoards || totalAcrossAllBoards > 0; + if (!shouldShow) { + return h("div", { + className: "hermes-kanban-boardswitcher-compact", + title: "Boards let you separate unrelated streams of work", + }, + h(Button, { + onClick: props.onNewClick, + size: "sm", + className: "h-7 text-xs", + }, "+ New board"), + ); + } + + return h("div", { className: "hermes-kanban-boardswitcher" }, + h("div", { className: "hermes-kanban-boardswitcher-inner" }, + h("div", { className: "flex flex-col gap-0.5" }, + h("div", { className: "text-[11px] uppercase tracking-wider text-muted-foreground" }, + "Board"), + h("div", { className: "flex items-center gap-2" }, + h(Select, Object.assign({ + value: props.board, + className: "h-8 min-w-[220px]", + "aria-label": "Switch kanban board", + }, selectChangeHandler(function (v) { if (v) props.onSwitch(v); })), + list.map(function (b) { + const label = b.total > 0 + ? `${b.name || b.slug} · ${b.total}` + : (b.name || b.slug); + return h(SelectOption, { key: b.slug, value: b.slug }, label); + }), + ), + h("span", { className: "text-xs text-muted-foreground" }, + `${currentTotal || 0} task${currentTotal === 1 ? "" : "s"}`), + ), + ), + h("div", { className: "flex-1" }), + h(Button, { + onClick: props.onNewClick, + size: "sm", + className: "h-8", + }, "+ New board"), + props.board !== "default" + ? h(Button, { + onClick: function () { + const msg = + `Archive board '${currentName}'? ` + + `It will be moved to boards/_archived/ so you can recover it later. ` + + `Tasks on this board will no longer appear anywhere in the UI.`; + if (window.confirm(msg)) props.onDeleteBoard(props.board); + }, + size: "sm", + className: "h-8", + title: "Archive this board", + }, "Archive") + : null, + ), + ); + } + + function NewBoardDialog(props) { + const [slug, setSlug] = useState(""); + const [name, setName] = useState(""); + const [description, setDescription] = useState(""); + const [icon, setIcon] = useState(""); + const [switchTo, setSwitchTo] = useState(true); + const [submitting, setSubmitting] = useState(false); + const [err, setErr] = useState(null); + + // Auto-derive a name from the slug if the user hasn't typed one. + const autoName = useMemo(function () { + if (!slug) return ""; + return slug.replace(/[-_]+/g, " ") + .split(" ") + .filter(Boolean) + .map(function (w) { return w[0].toUpperCase() + w.slice(1); }) + .join(" "); + }, [slug]); + + function onSubmit(ev) { + if (ev) ev.preventDefault(); + if (!slug.trim()) { setErr("slug is required"); return; } + setSubmitting(true); + setErr(null); + props.onCreate({ + slug: slug.trim(), + name: name.trim() || autoName || undefined, + description: description.trim() || undefined, + icon: icon.trim() || undefined, + switch: switchTo, + }).catch(function (e) { + setErr(String(e && e.message ? e.message : e)); + setSubmitting(false); + }); + } + + return h("div", { + className: "hermes-kanban-dialog-backdrop", + onClick: function (e) { if (e.target === e.currentTarget) props.onCancel(); }, + }, + h("form", { + className: "hermes-kanban-dialog", + onSubmit: onSubmit, + }, + h("div", { className: "hermes-kanban-dialog-title" }, "New board"), + h("div", { className: "text-xs text-muted-foreground mb-2" }, + "Boards let you separate unrelated streams of work — one per project, repo, or domain. Workers on one board never see another board's tasks."), + h("div", { className: "flex flex-col gap-3" }, + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Slug ", + h("span", { className: "text-muted-foreground" }, + "— lowercase, hyphens, e.g. atm10-server")), + h(Input, { + value: slug, + onChange: function (e) { setSlug(e.target.value.toLowerCase().replace(/[^a-z0-9\-_]/g, "-")); }, + placeholder: "atm10-server", + autoFocus: true, + className: "h-8", + }), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Display name ", + h("span", { className: "text-muted-foreground" }, "(optional)")), + h(Input, { + value: name, + onChange: function (e) { setName(e.target.value); }, + placeholder: autoName || "Display name", + className: "h-8", + }), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Description ", + h("span", { className: "text-muted-foreground" }, "(optional)")), + h(Input, { + value: description, + onChange: function (e) { setDescription(e.target.value); }, + placeholder: "What goes on this board?", + className: "h-8", + }), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs" }, "Icon ", + h("span", { className: "text-muted-foreground" }, "(single character or emoji)")), + h(Input, { + value: icon, + onChange: function (e) { setIcon(e.target.value.slice(0, 4)); }, + placeholder: "📦", + className: "h-8 w-24", + }), + ), + h("label", { className: "flex items-center gap-2 text-xs" }, + h("input", { + type: "checkbox", + checked: switchTo, + onChange: function (e) { setSwitchTo(e.target.checked); }, + }), + "Switch to this board after creating it", + ), + ), + err ? h("div", { className: "text-xs text-destructive mt-2" }, err) : null, + h("div", { className: "hermes-kanban-dialog-actions" }, + h(Button, { + type: "button", + onClick: props.onCancel, + size: "sm", + disabled: submitting, + }, "Cancel"), + h(Button, { + type: "submit", + size: "sm", + disabled: submitting || !slug.trim(), + }, submitting ? "Creating…" : "Create board"), + ), + ), + ); + } + // ------------------------------------------------------------------------- // Toolbar // ------------------------------------------------------------------------- @@ -555,11 +883,10 @@ ), h("div", { className: "flex flex-col gap-1" }, h(Label, { className: "text-xs text-muted-foreground" }, "Tenant"), - h(Select, { + h(Select, Object.assign({ value: props.tenantFilter, - onChange: function (e) { props.setTenantFilter(e.target.value); }, className: "h-8", - }, + }, selectChangeHandler(props.setTenantFilter)), h(SelectOption, { value: "" }, "All tenants"), tenants.map(function (t) { return h(SelectOption, { key: t, value: t }, t); @@ -568,11 +895,10 @@ ), h("div", { className: "flex flex-col gap-1" }, h(Label, { className: "text-xs text-muted-foreground" }, "Assignee"), - h(Select, { + h(Select, Object.assign({ value: props.assigneeFilter, - onChange: function (e) { props.setAssigneeFilter(e.target.value); }, className: "h-8", - }, + }, selectChangeHandler(props.setAssigneeFilter)), h(SelectOption, { value: "" }, "All profiles"), assignees.map(function (a) { return h(SelectOption, { key: a, value: a }, a); @@ -919,6 +1245,12 @@ const [priority, setPriority] = useState(0); const [parent, setParent] = useState(""); const [skills, setSkills] = useState(""); + // Workspace controls. `scratch` (default) ignores path; `worktree` optionally + // takes a path (dispatcher derives one from the assignee profile otherwise); + // `dir` requires a path. Backend enforces the rule — we only hide/show the + // input here to save vertical space in the common `scratch` case. + const [workspaceKind, setWorkspaceKind] = useState("scratch"); + const [workspacePath, setWorkspacePath] = useState(""); const submit = function () { const trimmed = title.trim(); @@ -938,10 +1270,23 @@ .map(function (s) { return s.trim(); }) .filter(function (s) { return s.length > 0; }); if (skillList.length > 0) body.skills = skillList; + // Only send workspace_kind when it's non-default. Keeps the request + // shape small and interoperable with older dispatcher versions. + if (workspaceKind && workspaceKind !== "scratch") { + body.workspace_kind = workspaceKind; + } + const wpTrim = workspacePath.trim(); + if (wpTrim) body.workspace_path = wpTrim; props.onSubmit(body); setTitle(""); setAssignee(""); setPriority(0); setParent(""); setSkills(""); + setWorkspaceKind("scratch"); setWorkspacePath(""); }; + const showPathInput = workspaceKind !== "scratch"; + const pathPlaceholder = workspaceKind === "dir" + ? "workspace path (required, e.g. ~/projects/my-app)" + : "workspace path (optional, derived from assignee if blank)"; + return h("div", { className: "hermes-kanban-inline-create" }, h(Input, { value: title, @@ -978,6 +1323,24 @@ title: "Force-load these skills into the worker (in addition to the built-in kanban-worker).", className: "h-7 text-xs", }), + h("div", { className: "flex gap-2" }, + h(Select, { + value: workspaceKind, + onChange: function (e) { setWorkspaceKind(e.target.value); }, + title: "scratch: isolated temp dir (default). worktree: git worktree on the assignee profile. dir: exact path (required below).", + className: "h-7 text-xs w-28", + }, + h(SelectOption, { value: "scratch" }, "scratch"), + h(SelectOption, { value: "worktree" }, "worktree"), + h(SelectOption, { value: "dir" }, "dir"), + ), + showPathInput ? h(Input, { + value: workspacePath, + onChange: function (e) { setWorkspacePath(e.target.value); }, + placeholder: pathPlaceholder, + className: "h-7 text-xs flex-1", + }) : null, + ), h(Select, { value: parent, onChange: function (e) { setParent(e.target.value); }, @@ -1012,13 +1375,14 @@ const [err, setErr] = useState(null); const [newComment, setNewComment] = useState(""); const [editing, setEditing] = useState(false); + const boardSlug = props.boardSlug; const load = useCallback(function () { - return SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}`) + return SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}`, boardSlug)) .then(function (d) { setData(d); setErr(null); }) .catch(function (e) { setErr(String(e.message || e)); }) .finally(function () { setLoading(false); }); - }, [props.taskId]); + }, [props.taskId, boardSlug]); // Reload when the WS stream reports new events for this task id // (completion, block, crash, etc. — anything that'd make the drawer @@ -1033,7 +1397,7 @@ const handleComment = function () { const body = newComment.trim(); if (!body) return; - SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}/comments`, { + SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/comments`, boardSlug), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ body }), @@ -1048,7 +1412,7 @@ if (opts && opts.confirm && !window.confirm(opts.confirm)) { return Promise.resolve(); } - return SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}`, { + return SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}`, boardSlug), { method: "PATCH", headers: { "Content-Type": "application/json" }, body: JSON.stringify(patch), @@ -1056,7 +1420,7 @@ }; const addLink = function (parentId) { - return SDK.fetchJSON(`${API}/links`, { + return SDK.fetchJSON(withBoard(`${API}/links`, boardSlug), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ parent_id: parentId, child_id: props.taskId }), @@ -1065,12 +1429,12 @@ }; const removeLink = function (parentId) { const qs = new URLSearchParams({ parent_id: parentId, child_id: props.taskId }); - return SDK.fetchJSON(`${API}/links?${qs}`, { method: "DELETE" }) + return SDK.fetchJSON(withBoard(`${API}/links?${qs}`, boardSlug), { method: "DELETE" }) .then(function () { load(); props.onRefresh(); }) .catch(function (e) { setErr(String(e.message || e)); }); }; const addChild = function (childId) { - return SDK.fetchJSON(`${API}/links`, { + return SDK.fetchJSON(withBoard(`${API}/links`, boardSlug), { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ parent_id: props.taskId, child_id: childId }), @@ -1079,7 +1443,7 @@ }; const removeChild = function (childId) { const qs = new URLSearchParams({ parent_id: props.taskId, child_id: childId }); - return SDK.fetchJSON(`${API}/links?${qs}`, { method: "DELETE" }) + return SDK.fetchJSON(withBoard(`${API}/links?${qs}`, boardSlug), { method: "DELETE" }) .then(function () { load(); props.onRefresh(); }) .catch(function (e) { setErr(String(e.message || e)); }); }; @@ -1104,6 +1468,7 @@ data, editing, setEditing, renderMarkdown: props.renderMarkdown, allTasks: props.allTasks, + boardSlug: boardSlug, onPatch: doPatch, onAddParent: addLink, onRemoveParent: removeLink, @@ -1216,7 +1581,7 @@ ); }), ), - h(WorkerLogSection, { taskId: t.id }), + h(WorkerLogSection, { taskId: t.id, boardSlug: props.boardSlug }), h(RunHistorySection, { runs: props.data.runs || [] }), ); } @@ -1287,10 +1652,10 @@ const [state, setState] = useState({ loading: false, data: null, err: null }); const load = useCallback(function () { setState({ loading: true, data: null, err: null }); - SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}/log?tail=100000`) + SDK.fetchJSON(withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/log?tail=100000`, props.boardSlug)) .then(function (d) { setState({ loading: false, data: d, err: null }); }) .catch(function (e) { setState({ loading: false, data: null, err: String(e.message || e) }); }); - }, [props.taskId]); + }, [props.taskId, props.boardSlug]); // Auto-load when the section mounts; the user opened the drawer so the // cost is one small HTTP round-trip. diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css index 74876aeff5..3c197e6209 100644 --- a/plugins/kanban/dashboard/dist/style.css +++ b/plugins/kanban/dashboard/dist/style.css @@ -268,7 +268,7 @@ } .hermes-kanban-drawer { - width: min(480px, 92vw); + width: min(var(--hermes-kanban-drawer-width, 640px), 92vw); height: 100vh; background: var(--color-card); border-left: 1px solid var(--color-border); @@ -334,7 +334,7 @@ .hermes-kanban-meta-row { display: flex; gap: 0.5rem; - font-size: 0.72rem; + font-size: 0.8rem; } .hermes-kanban-meta-label { width: 92px; @@ -367,14 +367,15 @@ .hermes-kanban-pre { margin: 0; - padding: 0.45rem 0.55rem; + padding: 0.5rem 0.6rem; white-space: pre-wrap; word-break: break-word; background: color-mix(in srgb, var(--color-foreground) 4%, transparent); border: 1px solid var(--color-border); border-radius: var(--radius-sm, 0.25rem); font-family: var(--font-mono, ui-monospace, monospace); - font-size: 0.72rem; + font-size: 0.8rem; + line-height: 1.5; color: var(--color-foreground); } @@ -605,8 +606,8 @@ /* ---- Markdown rendering -------------------------------------------- */ .hermes-kanban-md { - font-size: 0.8rem; - line-height: 1.55; + font-size: 0.85rem; + line-height: 1.6; color: var(--color-foreground); } .hermes-kanban-md p { margin: 0.25rem 0; } @@ -632,15 +633,22 @@ } .hermes-kanban-md code { font-family: var(--font-mono, ui-monospace, monospace); - font-size: 0.75rem; + font-size: 0.8rem; padding: 0.05rem 0.3rem; background: color-mix(in srgb, var(--color-foreground) 8%, transparent); border-radius: 3px; + color: inherit; } +/* Fenced code block. Set a visible background even when --color-foreground + * is empty (color-mix falls through to transparent in that case), and force + * color: inherit so the text tracks the drawer foreground rather than the + * UA default on <code> elements — otherwise themes that don't set + * --color-foreground leave code text rendering near-black on dark themes + * (see issue #18576). */ .hermes-kanban-md-code { margin: 0.35rem 0; padding: 0.5rem 0.6rem; - background: color-mix(in srgb, var(--color-foreground) 5%, transparent); + background: color-mix(in srgb, currentColor 6%, transparent); border: 1px solid var(--color-border); border-radius: var(--radius-sm, 0.25rem); overflow-x: auto; @@ -648,8 +656,9 @@ .hermes-kanban-md-code code { background: transparent; padding: 0; - font-size: 0.75rem; + font-size: 0.8rem; white-space: pre; + color: inherit; } .hermes-kanban-md strong { font-weight: 600; } @@ -684,11 +693,11 @@ /* ---- Worker log pane ------------------------------------------------ */ .hermes-kanban-log { - max-height: 340px; + max-height: 360px; overflow: auto; white-space: pre; - font-size: 0.7rem; - line-height: 1.45; + font-size: 0.78rem; + line-height: 1.5; } @@ -739,7 +748,8 @@ color: var(--color-muted-foreground); } .hermes-kanban-run-summary { - font-size: 0.75rem; + font-size: 0.82rem; + line-height: 1.5; padding: 0.2rem 0 0; color: var(--color-foreground); } @@ -751,10 +761,65 @@ } .hermes-kanban-run-meta { display: block; - font-size: 0.65rem; + font-size: 0.72rem; + line-height: 1.5; padding: 0.15rem 0 0; color: var(--color-muted-foreground); white-space: pre-wrap; word-break: break-word; font-family: var(--font-mono, ui-monospace, monospace); } + +/* ------------------------------------------------------------------------- + Multi-project: board switcher + create-board dialog + ------------------------------------------------------------------------- */ +.hermes-kanban-boardswitcher { + border: 1px solid var(--color-border, rgba(120, 120, 140, 0.25)); + border-radius: 0.5rem; + padding: 0.6rem 0.85rem; + background: var(--color-card-subtle, rgba(255, 255, 255, 0.02)); +} +.hermes-kanban-boardswitcher-inner { + display: flex; + align-items: flex-end; + gap: 0.75rem; + flex-wrap: wrap; +} +.hermes-kanban-boardswitcher-compact { + display: flex; + justify-content: flex-end; + padding: 0 0.25rem; +} +.hermes-kanban-dialog-backdrop { + position: fixed; + inset: 0; + background: rgba(8, 10, 16, 0.55); + backdrop-filter: blur(2px); + z-index: 60; + display: flex; + align-items: center; + justify-content: center; +} +.hermes-kanban-dialog { + background: var(--color-card, #121421); + color: var(--color-foreground); + border: 1px solid var(--color-border, rgba(120, 120, 140, 0.25)); + border-radius: 0.5rem; + padding: 1.1rem 1.2rem 1rem; + width: 28rem; + max-width: calc(100vw - 2rem); + max-height: calc(100vh - 3rem); + overflow: auto; + box-shadow: 0 18px 40px rgba(0, 0, 0, 0.5); +} +.hermes-kanban-dialog-title { + font-size: 1rem; + font-weight: 600; + margin-bottom: 0.25rem; +} +.hermes-kanban-dialog-actions { + display: flex; + justify-content: flex-end; + gap: 0.5rem; + margin-top: 1rem; +} diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py index acccf63c9d..1c25f372e6 100644 --- a/plugins/kanban/dashboard/plugin_api.py +++ b/plugins/kanban/dashboard/plugin_api.py @@ -72,19 +72,45 @@ def _check_ws_token(provided: Optional[str]) -> bool: return hmac.compare_digest(str(provided), str(expected)) -def _conn(): +def _resolve_board(board: Optional[str]) -> Optional[str]: + """Validate and normalise a board slug from a query param. + + Raises :class:`HTTPException` 400 on malformed slugs so the browser + sees a clean error instead of a 500. Returns the normalised slug, + or ``None`` when the caller omitted the param (which then falls + through to the active board inside ``kb.connect()``). + """ + if board is None or board == "": + return None + try: + normed = kanban_db._normalize_board_slug(board) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if normed and normed != kanban_db.DEFAULT_BOARD and not kanban_db.board_exists(normed): + raise HTTPException( + status_code=404, + detail=f"board {normed!r} does not exist", + ) + return normed + + +def _conn(board: Optional[str] = None): """Open a kanban_db connection, creating the schema on first use. Every handler that mutates the DB goes through this so the plugin self-heals on a fresh install (no user-visible "no such table" error if somebody hits POST /tasks before GET /board). ``init_db`` is idempotent. + + ``board`` is the query-param slug (already normalised by + :func:`_resolve_board`). When ``None`` the active board is used + via the resolution chain (env var → ``current`` file → ``default``). """ try: - kanban_db.init_db() + kanban_db.init_db(board=board) except Exception as exc: log.warning("kanban init_db failed: %s", exc) - return kanban_db.connect() + return kanban_db.connect(board=board) # --------------------------------------------------------------------------- @@ -177,13 +203,19 @@ def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]: def get_board( tenant: Optional[str] = Query(None, description="Filter to a single tenant"), include_archived: bool = Query(False), + board: Optional[str] = Query(None, description="Kanban board slug (omit for current)"), ): """Return the full board grouped by status column. ``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh install doesn't surface a "failed to load" error on the plugin tab. + + ``board`` selects which board to read from. Omitting it falls + through to the active board (``HERMES_KANBAN_BOARD`` env → on-disk + ``current`` pointer → ``default``). """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: tasks = kanban_db.list_tasks( conn, tenant=tenant, include_archived=include_archived @@ -274,8 +306,9 @@ def get_board( # --------------------------------------------------------------------------- @router.get("/tasks/{task_id}") -def get_task(task_id: str): - conn = _conn() +def get_task(task_id: str, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: task = kanban_db.get_task(conn, task_id) if task is None: @@ -311,8 +344,9 @@ class CreateTaskBody(BaseModel): @router.post("/tasks") -def create_task(payload: CreateTaskBody): - conn = _conn() +def create_task(payload: CreateTaskBody, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: task_id = kanban_db.create_task( conn, @@ -373,8 +407,9 @@ class UpdateTaskBody(BaseModel): @router.patch("/tasks/{task_id}") -def update_task(task_id: str, payload: UpdateTaskBody): - conn = _conn() +def update_task(task_id: str, payload: UpdateTaskBody, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: task = kanban_db.get_task(conn, task_id) if task is None: @@ -414,7 +449,12 @@ def update_task(task_id: str, payload: UpdateTaskBody): ok = _set_status_direct(conn, task_id, "ready") elif s == "archived": ok = kanban_db.archive_task(conn, task_id) - elif s in ("todo", "running", "triage"): + elif s == "running": + raise HTTPException( + status_code=400, + detail="Cannot set status to 'running' directly; use the dispatcher/claim path", + ) + elif s in ("todo", "triage"): ok = _set_status_direct(conn, task_id, s) else: raise HTTPException(status_code=400, detail=f"unknown status: {s}") @@ -527,10 +567,11 @@ class CommentBody(BaseModel): @router.post("/tasks/{task_id}/comments") -def add_comment(task_id: str, payload: CommentBody): +def add_comment(task_id: str, payload: CommentBody, board: Optional[str] = Query(None)): if not payload.body.strip(): raise HTTPException(status_code=400, detail="body is required") - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: if kanban_db.get_task(conn, task_id) is None: raise HTTPException(status_code=404, detail=f"task {task_id} not found") @@ -552,8 +593,9 @@ class LinkBody(BaseModel): @router.post("/links") -def add_link(payload: LinkBody): - conn = _conn() +def add_link(payload: LinkBody, board: Optional[str] = Query(None)): + board = _resolve_board(board) + conn = _conn(board=board) try: kanban_db.link_tasks(conn, payload.parent_id, payload.child_id) return {"ok": True} @@ -564,8 +606,13 @@ def add_link(payload: LinkBody): @router.delete("/links") -def delete_link(parent_id: str = Query(...), child_id: str = Query(...)): - conn = _conn() +def delete_link( + parent_id: str = Query(...), + child_id: str = Query(...), + board: Optional[str] = Query(None), +): + board = _resolve_board(board) + conn = _conn(board=board) try: ok = kanban_db.unlink_tasks(conn, parent_id, child_id) return {"ok": bool(ok)} @@ -586,7 +633,7 @@ class BulkTaskBody(BaseModel): @router.post("/tasks/bulk") -def bulk_update(payload: BulkTaskBody): +def bulk_update(payload: BulkTaskBody, board: Optional[str] = Query(None)): """Apply the same patch to every id in ``payload.ids``. This is an *independent* iteration — per-task failures don't abort @@ -596,7 +643,8 @@ def bulk_update(payload: BulkTaskBody): if not ids: raise HTTPException(status_code=400, detail="ids is required") results: list[dict] = [] - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: for tid in ids: entry: dict[str, Any] = {"id": tid, "ok": True} @@ -690,14 +738,15 @@ def get_config(): # --------------------------------------------------------------------------- @router.get("/stats") -def get_stats(): +def get_stats(board: Optional[str] = Query(None)): """Per-status + per-assignee counts + oldest-ready age. Designed for the dashboard HUD and for router profiles that need to answer "is this specialist overloaded?" without scanning the whole board themselves. """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: return kanban_db.board_stats(conn) finally: @@ -705,7 +754,7 @@ def get_stats(): @router.get("/assignees") -def get_assignees(): +def get_assignees(board: Optional[str] = Query(None)): """Known profiles + per-profile task counts. Returns the union of ``~/.hermes/profiles/*`` on disk and every @@ -713,7 +762,8 @@ def get_assignees(): this to populate its assignee dropdown so a freshly-created profile appears in the picker before it's been given any task. """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: return {"assignees": kanban_db.known_assignees(conn)} finally: @@ -725,7 +775,11 @@ def get_assignees(): # --------------------------------------------------------------------------- @router.get("/tasks/{task_id}/log") -def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)): +def get_task_log( + task_id: str, + tail: Optional[int] = Query(None, ge=1, le=2_000_000), + board: Optional[str] = Query(None), +): """Return the worker's stdout/stderr log. ``tail`` caps the response size (bytes) so the dashboard drawer @@ -734,15 +788,16 @@ def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_ ``_rotate_worker_log`` — a single ``.log.1`` is kept, no further generations, so disk usage per task is bounded at ~4 MiB. """ - conn = _conn() + board = _resolve_board(board) + conn = _conn(board=board) try: task = kanban_db.get_task(conn, task_id) finally: conn.close() if task is None: raise HTTPException(status_code=404, detail=f"task {task_id} not found") - content = kanban_db.read_worker_log(task_id, tail_bytes=tail) - log_path = kanban_db.worker_log_path(task_id) + content = kanban_db.read_worker_log(task_id, tail_bytes=tail, board=board) + log_path = kanban_db.worker_log_path(task_id, board=board) size = log_path.stat().st_size if log_path.exists() else 0 return { "task_id": task_id, @@ -760,11 +815,16 @@ def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_ # --------------------------------------------------------------------------- @router.post("/dispatch") -def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")): - conn = _conn() +def dispatch( + dry_run: bool = Query(False), + max_n: int = Query(8, alias="max"), + board: Optional[str] = Query(None), +): + board = _resolve_board(board) + conn = _conn(board=board) try: result = kanban_db.dispatch_once( - conn, dry_run=dry_run, max_spawn=max_n, + conn, dry_run=dry_run, max_spawn=max_n, board=board, ) # DispatchResult is a dataclass. try: @@ -775,6 +835,124 @@ def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")): conn.close() +# --------------------------------------------------------------------------- +# Boards CRUD (multi-project support) +# --------------------------------------------------------------------------- + +class CreateBoardBody(BaseModel): + slug: str + name: Optional[str] = None + description: Optional[str] = None + icon: Optional[str] = None + color: Optional[str] = None + switch: bool = False + + +class RenameBoardBody(BaseModel): + name: Optional[str] = None + description: Optional[str] = None + icon: Optional[str] = None + color: Optional[str] = None + + +def _board_counts(slug: str) -> dict[str, int]: + """Return ``{status: count}`` for a board. Safe on an empty DB.""" + try: + path = kanban_db.kanban_db_path(board=slug) + if not path.exists(): + return {} + conn = kanban_db.connect(board=slug) + try: + rows = conn.execute( + "SELECT status, COUNT(*) AS n FROM tasks GROUP BY status" + ).fetchall() + return {r["status"]: int(r["n"]) for r in rows} + finally: + conn.close() + except Exception: + return {} + + +@router.get("/boards") +def list_boards(include_archived: bool = Query(False)): + """Return every board on disk with task counts and the active slug.""" + boards = kanban_db.list_boards(include_archived=include_archived) + current = kanban_db.get_current_board() + for b in boards: + b["is_current"] = (b["slug"] == current) + b["counts"] = _board_counts(b["slug"]) + b["total"] = sum(b["counts"].values()) + return {"boards": boards, "current": current} + + +@router.post("/boards") +def create_board_endpoint(payload: CreateBoardBody): + """Create a new board. Idempotent — ``slug`` collision returns existing.""" + try: + meta = kanban_db.create_board( + payload.slug, + name=payload.name, + description=payload.description, + icon=payload.icon, + color=payload.color, + ) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if payload.switch: + try: + kanban_db.set_current_board(meta["slug"]) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + return {"board": meta, "current": kanban_db.get_current_board()} + + +@router.patch("/boards/{slug}") +def rename_board(slug: str, payload: RenameBoardBody): + """Update a board's display metadata (slug is immutable — create a new one to rename the directory).""" + try: + normed = kanban_db._normalize_board_slug(slug) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if not normed or not kanban_db.board_exists(normed): + raise HTTPException(status_code=404, detail=f"board {slug!r} does not exist") + meta = kanban_db.write_board_metadata( + normed, + name=payload.name, + description=payload.description, + icon=payload.icon, + color=payload.color, + ) + return {"board": meta} + + +@router.delete("/boards/{slug}") +def delete_board(slug: str, delete: bool = Query(False, description="Hard-delete instead of archive")): + """Archive (default) or hard-delete a board.""" + try: + res = kanban_db.remove_board(slug, archive=not delete) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + return {"result": res, "current": kanban_db.get_current_board()} + + +@router.post("/boards/{slug}/switch") +def switch_board(slug: str): + """Persist ``slug`` as the active board for subsequent CLI / slash calls. + + Dashboard users pick boards via a client-side ``localStorage`` — this + endpoint is for ``/kanban boards switch`` parity so gateway slash + commands and the CLI share the same current-board pointer. + """ + try: + normed = kanban_db._normalize_board_slug(slug) + except ValueError as exc: + raise HTTPException(status_code=400, detail=str(exc)) + if not normed or not kanban_db.board_exists(normed): + raise HTTPException(status_code=404, detail=f"board {slug!r} does not exist") + kanban_db.set_current_board(normed) + return {"current": normed} + + # --------------------------------------------------------------------------- # WebSocket: /events?since=<event_id> # --------------------------------------------------------------------------- @@ -802,8 +980,18 @@ async def stream_events(ws: WebSocket): except ValueError: cursor = 0 + # Board selection — pinned at the WS handshake; re-subscribe to + # switch boards. Changing boards mid-stream would require + # reconciling two cursors, so the UI just opens a new WS on + # board change. + ws_board_raw = ws.query_params.get("board") + try: + ws_board = kanban_db._normalize_board_slug(ws_board_raw) if ws_board_raw else None + except ValueError: + ws_board = None + def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]: - conn = kanban_db.connect() + conn = kanban_db.connect(board=ws_board) try: rows = conn.execute( "SELECT id, task_id, run_id, kind, payload, created_at " diff --git a/plugins/platforms/teams/adapter.py b/plugins/platforms/teams/adapter.py index 945ffa0795..d0a2b7adbc 100644 --- a/plugins/platforms/teams/adapter.py +++ b/plugins/platforms/teams/adapter.py @@ -592,6 +592,8 @@ def interactive_setup() -> None: from hermes_cli.config import ( get_env_value, save_env_value, + ) + from hermes_cli.cli_output import ( prompt, prompt_yes_no, print_info, diff --git a/run_agent.py b/run_agent.py index cfcd325eb6..c8388bd0ae 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3611,7 +3611,7 @@ class AIAgent: _parent_runtime = self._current_main_runtime() review_agent = AIAgent( model=self.model, - max_iterations=8, + max_iterations=16, quiet_mode=True, platform=self.platform, provider=self.provider, @@ -3629,6 +3629,14 @@ class AIAgent: review_agent._user_profile_enabled = self._user_profile_enabled review_agent._memory_nudge_interval = 0 review_agent._skill_nudge_interval = 0 + # Suppress all status/warning emits from the fork so the + # user only sees the final successful-action summary. + # Without this, mid-review "Iteration budget exhausted", + # rate-limit retries, compression warnings, and other + # lifecycle messages bubble up through _emit_status -> + # _vprint and leak past the stdout redirect (they go via + # _print_fn/status_callback, which bypass sys.stdout). + review_agent.suppress_status_output = True review_agent.run_conversation( user_message=prompt, @@ -5056,6 +5064,23 @@ class AIAgent: return tc.get("call_id", "") or tc.get("id", "") or "" return getattr(tc, "call_id", "") or getattr(tc, "id", "") or "" + @staticmethod + def _get_tool_call_name_static(tc) -> str: + """Extract function name from a tool_call entry (dict or object). + + Gemini's OpenAI-compatibility endpoint requires every `role: tool` + message to carry the matching function name. OpenAI/Anthropic/ollama + tolerate its absence, so the field is best-effort: callers fall back + to "" and the message still works elsewhere. + """ + if isinstance(tc, dict): + fn = tc.get("function") + if isinstance(fn, dict): + return fn.get("name", "") or "" + return "" + fn = getattr(tc, "function", None) + return getattr(fn, "name", "") or "" + _VALID_API_ROLES = frozenset({"system", "user", "assistant", "tool", "function", "developer"}) @staticmethod @@ -5118,6 +5143,7 @@ class AIAgent: if cid in missing_results: patched.append({ "role": "tool", + "name": AIAgent._get_tool_call_name_static(tc), "content": "[Result unavailable — see context summary above]", "tool_call_id": cid, }) @@ -5816,6 +5842,17 @@ class AIAgent: return primary_client with self._openai_client_lock(): request_kwargs = dict(self._client_kwargs) + # Per-request OpenAI-wire clients (used by both the non-streaming + # chat-completions path and the streaming chat-completions path + # in `_interruptible_api_call`) should not run the SDK's built-in + # retry loop: the agent's outer loop owns retries with credential + # rotation, provider fallback, and backoff that the SDK can't + # see. Leaving SDK retries on (default 2) compounds with our outer + # retries and lets a single hung provider request stretch to ~3x + # the per-call timeout before our stale detector reports it. + # Shared/primary clients and Anthropic / Bedrock paths are + # unaffected (they don't go through here). + request_kwargs["max_retries"] = 0 if ( base_url_host_matches(str(request_kwargs.get("base_url", "")), "api.githubcopilot.com") and self._api_kwargs_have_image_parts(api_kwargs or {}) @@ -8192,6 +8229,7 @@ class AIAgent: """True when using an anthropic-compatible endpoint that preserves dots in model names. Alibaba/DashScope keeps dots (e.g. qwen3.5-plus). MiniMax keeps dots (e.g. MiniMax-M2.7). + Xiaomi MiMo keeps dots (e.g. mimo-v2.5, mimo-v2.5-pro). OpenCode Go/Zen keeps dots for non-Claude models (e.g. minimax-m2.5-free). ZAI/Zhipu keeps dots (e.g. glm-4.7, glm-5.1). AWS Bedrock uses dotted inference-profile IDs @@ -8205,6 +8243,7 @@ class AIAgent: "alibaba", "minimax", "minimax-cn", "opencode-go", "opencode-zen", "zai", "bedrock", + "xiaomi", }: return True base = (getattr(self, "base_url", "") or "").lower() @@ -8214,6 +8253,7 @@ class AIAgent: or "minimax" in base or "opencode.ai/zen/" in base or "bigmodel.cn" in base + or "xiaomimimo.com" in base # AWS Bedrock runtime endpoints — defense-in-depth when # ``provider`` is unset but ``base_url`` still names Bedrock. or "bedrock-runtime." in base @@ -9008,6 +9048,7 @@ class AIAgent: insert_at, { "role": "tool", + "name": function_name if function_name != "?" else "", "tool_call_id": tool_call_id, "content": marker, }, @@ -9412,6 +9453,7 @@ class AIAgent: for tc in tool_calls: messages.append({ "role": "tool", + "name": tc.function.name, "content": f"[Tool execution cancelled — {tc.function.name} was skipped due to user interrupt]", "tool_call_id": tc.id, }) @@ -9753,6 +9795,7 @@ class AIAgent: tool_msg = { "role": "tool", + "name": name, "content": function_result, "tool_call_id": tc.id, } @@ -9790,6 +9833,7 @@ class AIAgent: skipped_name = skipped_tc.function.name skip_msg = { "role": "tool", + "name": skipped_name, "content": f"[Tool execution cancelled — {skipped_name} was skipped due to user interrupt]", "tool_call_id": skipped_tc.id, } @@ -10140,6 +10184,7 @@ class AIAgent: tool_msg = { "role": "tool", + "name": function_name, "content": function_result, "tool_call_id": tool_call.id } @@ -10166,6 +10211,7 @@ class AIAgent: skipped_name = skipped_tc.function.name skip_msg = { "role": "tool", + "name": skipped_name, "content": f"[Tool execution skipped — {skipped_name} was not started. User sent a new message]", "tool_call_id": skipped_tc.id } @@ -10300,7 +10346,10 @@ class AIAgent: provider_preferences["order"] = self.providers_order if self.provider_sort: provider_preferences["sort"] = self.provider_sort - if provider_preferences: + if provider_preferences and ( + (self.provider or "").strip().lower() == "openrouter" + or self._is_openrouter_url() + ): summary_extra_body["provider"] = provider_preferences if summary_extra_body: @@ -10418,6 +10467,15 @@ class AIAgent: from hermes_logging import set_session_context set_session_context(self.session_id) + # Bind the skill write-origin ContextVar for this thread so tool + # handlers (e.g. skill_manage create) can tell whether they are + # running inside the background self-improvement review fork vs. + # a foreground user-directed turn. Set at the top of each call; + # the review fork runs on its own thread with a fresh context, + # so the foreground value here does not leak into it. + from tools.skill_provenance import set_current_write_origin + set_current_write_origin(getattr(self, "_memory_write_origin", "assistant_tool")) + # If the previous turn activated fallback, restore the primary # runtime so this turn gets a fresh attempt with the preferred model. # No-op when _fallback_activated is False (gateway, first turn, etc.). @@ -10623,11 +10681,11 @@ class AIAgent: self.model, f"{self.context_compressor.context_length:,}", ) - if not self.quiet_mode: - self._safe_print( - f"📦 Preflight compression: ~{_preflight_tokens:,} tokens " - f">= {self.context_compressor.threshold_tokens:,} threshold" - ) + self._emit_status( + f"📦 Preflight compression: ~{_preflight_tokens:,} tokens " + f">= {self.context_compressor.threshold_tokens:,} threshold. " + "This may take a moment." + ) # May need multiple passes for very large sessions with small # context windows (each pass summarises the middle N turns). for _pass in range(3): @@ -13076,6 +13134,7 @@ class AIAgent: content = "Skipped: another tool call in this turn used an invalid name. Please retry this tool call." messages.append({ "role": "tool", + "name": tc.function.name, "tool_call_id": tc.id, "content": content, }) @@ -13167,6 +13226,7 @@ class AIAgent: tool_result = "Skipped: other tool call in this response had invalid JSON." messages.append({ "role": "tool", + "name": tc.function.name, "tool_call_id": tc.id, "content": tool_result, }) @@ -13415,9 +13475,22 @@ class AIAgent: m.get("role") == "tool" for m in messages[-5:] # check recent messages ) + # Detect Qwen3/Ollama-style in-content thinking blocks. + # Ollama puts <think> in the content field (not in + # reasoning_content), so _has_structured below would + # miss it. We check here so thinking-only responses + # after tool calls route to prefill instead of nudge. + _has_inline_thinking = bool( + re.search( + r'<think>|<thinking>|<reasoning>', + final_response or "", + re.IGNORECASE, + ) + ) if ( _prior_was_tool and not getattr(self, "_post_tool_empty_retried", False) + and not _has_inline_thinking # thinking model still working — let prefill handle ): self._post_tool_empty_retried = True # Clear stale narration so it doesn't resurface @@ -13457,10 +13530,13 @@ class AIAgent: # continue — the model will see its own reasoning # on the next turn and produce the text portion. # Inspired by clawdbot's "incomplete-text" recovery. + # Also covers Qwen3/Ollama in-content <think> blocks + # (detected above as _has_inline_thinking). _has_structured = bool( getattr(assistant_message, "reasoning", None) or getattr(assistant_message, "reasoning_content", None) or getattr(assistant_message, "reasoning_details", None) + or _has_inline_thinking ) if _has_structured and self._thinking_prefill_retries < 2: self._thinking_prefill_retries += 1 @@ -13667,6 +13743,7 @@ class AIAgent: if tc["id"] not in answered_ids: err_msg = { "role": "tool", + "name": AIAgent._get_tool_call_name_static(tc), "tool_call_id": tc["id"], "content": f"Error executing tool: {error_msg}", } diff --git a/scripts/release.py b/scripts/release.py index a752ffb98e..7197f3d833 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -48,6 +48,7 @@ AUTHOR_MAP = { "127238744+teknium1@users.noreply.github.com": "teknium1", "159539633+MottledShadow@users.noreply.github.com": "MottledShadow", "aludwin+gh@gmail.com": "adamludwin", + "ngusev@astralinux.ru": "NikolayGusev-astra", "2093036+exiao@users.noreply.github.com": "exiao", "rylen.anil@gmail.com": "rylena", "godnanijatin@gmail.com": "jatingodnani", @@ -67,6 +68,8 @@ AUTHOR_MAP = { "nbot@liizfq.top": "liizfq", "274096618+hermes-agent-dhabibi@users.noreply.github.com": "dhabibi", "dejie.guo@gmail.com": "JayGwod", + "133716830+0xKingBack@users.noreply.github.com": "0xKingBack", + "daixin1204@gmail.com": "SimbaKingjoe", "maxence@groine.fr": "MaxyMoos", "61830395+leprincep35700@users.noreply.github.com": "leprincep35700", # OpenViking viking_read salvage (April 2026) @@ -95,6 +98,7 @@ AUTHOR_MAP = { "252818347@qq.com": "hejuntt1014", "uzmpsk.dilekakbas@gmail.com": "dlkakbs", "beliefanx@gmail.com": "BeliefanX", + "changchun989@proton.me": "changchun989", "jefferson@heimdallstrategy.com": "Mind-Dragon", "44753291+Nanako0129@users.noreply.github.com": "Nanako0129", "steve.westerhouse@origami-analytics.com": "westers", @@ -339,6 +343,8 @@ AUTHOR_MAP = { "haileymarshall005@gmail.com": "haileymarshall", "greer.guthrie@gmail.com": "g-guthrie", "kennyx102@gmail.com": "bobashopcashier", + "77253505+bobashopcashier@users.noreply.github.com": "bobashopcashier", + "25355950+megastary@users.noreply.github.com": "megastary", # PR #18325 "shokatalishaikh95@gmail.com": "areu01or00", "bryan@intertwinesys.com": "bryanyoung", "christo.mitov@gmail.com": "christomitov", @@ -457,6 +463,7 @@ AUTHOR_MAP = { "centripetal-star@users.noreply.github.com": "centripetal-star", "LeonSGP43@users.noreply.github.com": "LeonSGP43", "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", + "cine.dreamer.one@gmail.com": "LeonSGP43", "Lubrsy706@users.noreply.github.com": "Lubrsy706", "niyant@spicefi.xyz": "spniyant", "olafthiele@gmail.com": "olafthiele", @@ -510,6 +517,7 @@ AUTHOR_MAP = { "nftpoetrist@gmail.com": "nftpoetrist", # PR #18982 "millerc79@users.noreply.github.com": "millerc79", # PR #19033 "hermes@example.com": "shellybotmoyer", # PR #18915 (bot-committed) + "exx@example.com": "exxmen", # PR #19555 "hypnosis.mda@gmail.com": "Hypn0sis", "ywt000818@gmail.com": "OwenYWT", "dhandhalyabhavik@gmail.com": "v1k22", @@ -621,6 +629,84 @@ AUTHOR_MAP = { "2114364329@qq.com": "cuyua9", "2557058999@qq.com": "Disaster-Terminator", "cine.dreamer.one@gmail.com": "LeonSGP43", + "zyprothh@gmail.com": "Zyproth", + "amitgaur@gmail.com": "amitgaur", + "albuquerque.abner@gmail.com": "mrbob-git", + "kiala@users.noreply.github.com": "kiala9", + "alanxchen@gmail.com": "alanxchen85", + "clawbot@clawbots-Mac-mini.local": "John-tip", + "der@konsi.org": "konsisumer", + "cirwel@The-CIRWEL-Group.local": "CIRWEL", + "molvikar8@gmail.com": "molvikar", + "nftpoetrist@gmail.com": "nftpoetrist", + "dodofun@126.com": "colorcross", + "1615063567@qq.com": "zhao0112", + "ethanguo.2003@gmail.com": "EthanGuo-coder", + "dev0jsh@gmail.com": "tmdgusya", + "leavr@163.com": "leavrcn", + "17683456+wanazhar@users.noreply.github.com": "wanazhar", + "26782336+cixuuz@users.noreply.github.com": "cixuuz", + "aleksandr.pasevin@openzeppelin.com": "pasevin", + "ubuntu@localhost.localdomain": "holynn-q", + "holynn@placeholder.local": "holynn-q", + "agent@hermes.local": "jacdevos", + "sunsky.lau@gmail.com": "liuhao1024", + "qiuqfang98@qq.com": "keepcalmqqf", + "261867348+ai-ag2026@users.noreply.github.com": "ai-ag2026", + "yanzh.su@gmail.com": "YanzhongSu", + "wanderwang@users.noreply.github.com": "WanderWang", + "yueheime@gmail.com": "yuehei", + "emidomh@gmail.com": "Emidomenge", + "2642448440@qq.com": "BlackJulySnow", + "4317663+helix4u@users.noreply.github.com": "helix4u", + "floptopbot33@gmail.com": "flobo3", + "dpaluy@users.noreply.github.com": "dpaluy", + "psikonetik@gmail.com": "el-analista", + "chenb19870707@gmail.com": "ms-alan", + "hex-clawd@users.noreply.github.com": "hex-clawd", + "154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43", + "barteq@hacknotes.local": "barteqpl", + "pama0227@gmail.com": "pama0227", + "52785845+ee-blog@users.noreply.github.com": "ee-blog", + "simplenamebox@gmail.com": "simplenamebox-ops", + "balyan.sid@gmail.com": "alt-glitch", + "xdord@xdorddeMac-mini.local": "foreverxdord", + "k2767567815@gmail.com": "QifengKuang", + "88077783+jjjojoj@users.noreply.github.com": "jjjojoj", + "valda@underscore.jp": "valda", + "lling486@163.com": "M3RCUR2Y", + "buraysandro9@gmail.com": "ygd58", + "ideathinklab01-source@users.noreply.github.com": "ideathinklab01-source", + "27987889@qq.com": "zng8418", + "daniuxie88@proton.me": "DaniuXie", + "panchanler@gmail.com": "ChanlerDev", + "252620095+briandevans@users.noreply.github.com": "briandevans", + "141889580+h0tp-ftw@users.noreply.github.com": "h0tp-ftw", + "chinadbo@foxmail.com": "chinadbo", + "82637225+kshitijk4poor@users.noreply.github.com": "kshitijk4poor", + "xyywtt@gmail.com": "xyiy001", + "charliekerfoot@gmail.com": "CharlieKerfoot", + "grey0202@users.noreply.github.com": "Grey0202", + "vominh1919@gmail.com": "vominh1919", + "giwavictor9@gmail.com": "giwaov", + "yoimexex@gmail.com": "Yoimex", + "76803960+atongrun@users.noreply.github.com": "atongrun", + "michaeldanko@icloud.com": "MichaelWDanko", + "xudavid429@gmail.com": "YX234", + "kathy@Kathy.local": "julysir", + "274902531@qq.com": "JanCong", + "225304168+e-shizz@users.noreply.github.com": "e-shizz", + "vincent_hh@users.noreply.github.com": "VinVC", + "1243352777@qq.com": "zons-zhaozhy", + "dejie.guo@gmail.com": "JayGwod", + "52840391+swithek@users.noreply.github.com": "swithek", + "raipratik0101@gmail.com": "PratikRai0101", + "code@sasha.id": "sasha-id", + "chen.yunbo@xydigit.com": "chenyunbo411", + "openclaw@local": "Asce66", + "59465365+0xsir0000@users.noreply.github.com": "0xsir0000", + "lisanhu2014@hotmail.com": "lisanhu", + "0668001438@zte.com.cn": "chenyunbo411", "leozeli@qq.com": "leozeli", "linlehao@cuhk.edu.cn": "LehaoLin", "liutong@isacas.ac.cn": "I3eg1nner", @@ -679,6 +765,8 @@ AUTHOR_MAP = { "ztzheng@163.com": "chengoak", # PR #17467 "24110240104@m.fudan.edu.cn": "YuShu", # co-author only "charliekerfoot@gmail.com": "CharlieKerfoot", # PR #18951 + # Debug share upload-time redaction (May 2026) + "dhuysamen@gmail.com": "GodsBoy", # PR #19318 } diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 2e676aef62..0bb607d741 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -1113,6 +1113,45 @@ class TestBuildAnthropicKwargs: assert _forbids_sampling_params("claude-opus-4-6") is False assert _forbids_sampling_params("claude-sonnet-4-5") is False + def test_supports_fast_mode_predicate(self): + """Fast mode is Opus 4.6 only — Opus 4.7 and others must be excluded.""" + from agent.anthropic_adapter import _supports_fast_mode + assert _supports_fast_mode("claude-opus-4-6") is True + assert _supports_fast_mode("anthropic/claude-opus-4-6") is True + assert _supports_fast_mode("claude-opus-4-7") is False + assert _supports_fast_mode("claude-sonnet-4-6") is False + assert _supports_fast_mode("claude-haiku-4-5") is False + assert _supports_fast_mode("") is False + + def test_fast_mode_omitted_for_unsupported_model(self): + """fast_mode=True on Opus 4.7 must NOT inject speed=fast (API 400s).""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-7", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + fast_mode=True, + ) + # extra_body either absent or doesn't carry "speed" + assert "speed" not in kwargs.get("extra_body", {}) + # No fast-mode beta header should be added either + beta_header = (kwargs.get("extra_headers") or {}).get("anthropic-beta", "") + assert "fast-mode-2026-02-01" not in beta_header + + def test_fast_mode_still_applied_on_opus_46(self): + """Regression guard — fast mode must still work on Opus 4.6.""" + kwargs = build_anthropic_kwargs( + model="claude-opus-4-6", + messages=[{"role": "user", "content": "hi"}], + tools=None, + max_tokens=1024, + reasoning_config=None, + fast_mode=True, + ) + assert kwargs.get("extra_body", {}).get("speed") == "fast" + assert "fast-mode-2026-02-01" in kwargs["extra_headers"]["anthropic-beta"] + def test_reasoning_disabled(self): kwargs = build_anthropic_kwargs( model="claude-sonnet-4-20250514", diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index c57a0b6372..43125554df 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1893,3 +1893,53 @@ class TestOpenRouterExplicitApiKey: assert call_kwargs["api_key"] == "env-fallback-key", ( f"Expected env fallback key to be used when explicit_api_key is None, got: {call_kwargs['api_key']}" ) + + +class TestAnthropicExplicitApiKey: + """Test that explicit_api_key is correctly propagated to _try_anthropic(). + + Parity with the OpenRouter fix in #18768: resolve_provider_client() passes + explicit_api_key to _try_openrouter(), but the anthropic branch was not + updated — _try_anthropic() always fell back to resolve_anthropic_token() + even when an explicit key was supplied (e.g. from a fallback_model entry). + """ + + def test_try_anthropic_uses_explicit_api_key_over_env(self): + """_try_anthropic(explicit_api_key) must use the supplied key, not the env fallback.""" + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-fallback-key"), \ + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): + mock_build.return_value = MagicMock() + from agent.auxiliary_client import _try_anthropic + client, model = _try_anthropic("explicit-pool-key") + assert client is not None + assert mock_build.call_args.args[0] == "explicit-pool-key", ( + f"Expected explicit_api_key to be passed, got: {mock_build.call_args.args[0]}" + ) + assert mock_build.call_args.args[0] != "env-fallback-key" + + def test_try_anthropic_without_explicit_key_falls_back_to_resolve(self): + """Without explicit_api_key, _try_anthropic falls back to resolve_anthropic_token.""" + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-fallback-key"), \ + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): + mock_build.return_value = MagicMock() + from agent.auxiliary_client import _try_anthropic + client, model = _try_anthropic() + assert client is not None + assert mock_build.call_args.args[0] == "env-fallback-key" + + def test_resolve_provider_client_passes_explicit_api_key_to_anthropic(self): + """resolve_provider_client(provider='anthropic', explicit_api_key=...) must propagate the key.""" + with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="env-key"), \ + patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)): + mock_build.return_value = MagicMock() + client, model = resolve_provider_client( + provider="anthropic", + explicit_api_key="explicit-fallback-key", + ) + assert client is not None + assert mock_build.call_args.args[0] == "explicit-fallback-key", ( + "resolve_provider_client must forward explicit_api_key to _try_anthropic()" + ) diff --git a/tests/agent/test_bedrock_adapter.py b/tests/agent/test_bedrock_adapter.py index 2005a6c13c..27c55cb1e9 100644 --- a/tests/agent/test_bedrock_adapter.py +++ b/tests/agent/test_bedrock_adapter.py @@ -1283,18 +1283,21 @@ class TestIsStaleConnectionError: """Classifier that decides whether an exception warrants client eviction.""" def test_detects_botocore_connection_closed_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import is_stale_connection_error from botocore.exceptions import ConnectionClosedError exc = ConnectionClosedError(endpoint_url="https://bedrock.example") assert is_stale_connection_error(exc) is True def test_detects_botocore_endpoint_connection_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import is_stale_connection_error from botocore.exceptions import EndpointConnectionError exc = EndpointConnectionError(endpoint_url="https://bedrock.example") assert is_stale_connection_error(exc) is True def test_detects_botocore_read_timeout(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import is_stale_connection_error from botocore.exceptions import ReadTimeoutError exc = ReadTimeoutError(endpoint_url="https://bedrock.example") @@ -1355,6 +1358,7 @@ class TestCallConverseInvalidatesOnStaleError: reconnects instead of reusing the dead socket.""" def test_converse_evicts_client_on_stale_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import ( _bedrock_runtime_client_cache, call_converse, @@ -1381,6 +1385,7 @@ class TestCallConverseInvalidatesOnStaleError: ) def test_converse_stream_evicts_client_on_stale_error(self): + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import ( _bedrock_runtime_client_cache, call_converse_stream, @@ -1406,6 +1411,7 @@ class TestCallConverseInvalidatesOnStaleError: def test_converse_does_not_evict_on_non_stale_error(self): """Non-stale errors (e.g. ValidationException) leave the client cache alone.""" + pytest.importorskip("botocore", reason="botocore required for Bedrock exception tests") from agent.bedrock_adapter import ( _bedrock_runtime_client_cache, call_converse, diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 8d1de377b0..fd88cc7a96 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -1281,6 +1281,47 @@ class TestTokenBudgetTailProtection: assert isinstance(cut, int) assert 0 <= cut <= len(messages) + def test_generous_budget_protects_everything_floor_does_not_override( + self, budget_compressor + ): + """A budget that covers the whole transcript must prune nothing — + ``protect_tail_count`` is a minimum floor, not a ceiling.""" + c = budget_compressor + + # 100 alternating assistant/tool messages. Each tool result has + # *unique* content so the dedup pass (Pass 1, which is independent + # of prune_boundary) is a no-op and we isolate the boundary logic. + messages = [] + for i in range(50): + messages.append({ + "role": "assistant", "content": None, + "tool_calls": [{ + "id": f"c{i}", + "type": "function", + "function": {"name": "noop", "arguments": "{}"}, + }], + }) + messages.append({ + "role": "tool", + "tool_call_id": f"c{i}", + "content": f"unique-tool-output-{i:03d}-" + ("x" * 250), + }) + + # Budget large enough to cover the whole transcript many times over, + # so the budget walk completes without hitting its break condition + # and the boundary lands at 0 ("protect everything"). + _, pruned = c._prune_old_tool_results( + messages, + protect_tail_count=20, + protect_tail_tokens=10_000_000, + ) + + assert pruned == 0, ( + "budget said protect everything, but the floor still pruned " + f"{pruned} messages — protect_tail_count is acting as a ceiling, " + "not a minimum floor" + ) + class TestUpdateModelBudgets: """Regression: update_model() must recalculate token budgets.""" diff --git a/tests/agent/test_curator.py b/tests/agent/test_curator.py index 2c1595a22b..87f15d03e8 100644 --- a/tests/agent/test_curator.py +++ b/tests/agent/test_curator.py @@ -154,6 +154,7 @@ def test_unused_skill_transitions_to_stale(curator_env): long_ago = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat() data = u.load_usage() data["old-skill"] = u._empty_record() + data["old-skill"]["created_by"] = "agent" data["old-skill"]["last_used_at"] = long_ago data["old-skill"]["created_at"] = long_ago u.save_usage(data) @@ -172,6 +173,7 @@ def test_very_old_skill_gets_archived(curator_env): super_old = (datetime.now(timezone.utc) - timedelta(days=120)).isoformat() data = u.load_usage() data["ancient"] = u._empty_record() + data["ancient"]["created_by"] = "agent" data["ancient"]["last_used_at"] = super_old data["ancient"]["created_at"] = super_old u.save_usage(data) @@ -192,6 +194,7 @@ def test_pinned_skill_is_never_touched(curator_env): super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat() data = u.load_usage() data["precious"] = u._empty_record() + data["precious"]["created_by"] = "agent" data["precious"]["last_used_at"] = super_old data["precious"]["created_at"] = super_old data["precious"]["pinned"] = True @@ -214,6 +217,7 @@ def test_stale_skill_reactivates_on_recent_use(curator_env): recent = datetime.now(timezone.utc).isoformat() data = u.load_usage() data["revived"] = u._empty_record() + data["revived"]["created_by"] = "agent" data["revived"]["state"] = "stale" data["revived"]["last_used_at"] = recent data["revived"]["created_at"] = recent @@ -240,6 +244,27 @@ def test_new_skill_without_last_used_not_immediately_archived(curator_env): assert (skills_dir / "fresh").exists() +def test_manual_skill_is_not_auto_archived(curator_env): + """Manual skills can have usage records, but without the agent-created + marker they must stay out of curator transitions.""" + c = curator_env["curator"] + u = curator_env["usage"] + skills_dir = curator_env["home"] / "skills" + skill_dir = _write_skill(skills_dir, "manual") + + super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat() + data = u.load_usage() + data["manual"] = u._empty_record() + data["manual"]["last_used_at"] = super_old + data["manual"]["created_at"] = super_old + u.save_usage(data) + + counts = c.apply_automatic_transitions() + assert counts["checked"] == 0 + assert counts["archived"] == 0 + assert skill_dir.exists() + + def test_bundled_skill_not_touched_by_transitions(curator_env): c = curator_env["curator"] u = curator_env["usage"] @@ -267,8 +292,10 @@ def test_bundled_skill_not_touched_by_transitions(curator_env): def test_run_review_records_state(curator_env): c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") result = c.run_curator_review(synchronous=True) assert "started_at" in result @@ -284,8 +311,10 @@ def test_dry_run_does_not_advance_state(curator_env, monkeypatch): `hermes curator status`. Fixes #18373. """ c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") # Stub the LLM so the test doesn't need a provider. monkeypatch.setattr( @@ -311,8 +340,10 @@ def test_dry_run_injects_report_only_banner(curator_env, monkeypatch): skips automatic transitions — but the LLM prompt is the only guard against the model calling skill_manage directly.""" c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") captured = {} def _stub(prompt): @@ -331,8 +362,10 @@ def test_dry_run_skips_automatic_transitions(curator_env, monkeypatch): archives skills deterministically, and a preview must not touch the filesystem.""" c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") called = {"n": 0} def _explode(*_a, **_kw): @@ -351,8 +384,10 @@ def test_dry_run_skips_automatic_transitions(curator_env, monkeypatch): def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch): c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") calls = [] def _stub(prompt): @@ -409,8 +444,10 @@ def test_maybe_run_curator_enforces_idle_gate(curator_env, monkeypatch): def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch): c = curator_env["curator"] + u = curator_env["usage"] skills_dir = curator_env["home"] / "skills" _write_skill(skills_dir, "a") + u.mark_agent_created("a") # Seed last_run_at far in the past so the interval gate opens — the # "no state" path intentionally defers the first run now (#18373). long_ago = datetime.now(timezone.utc) - timedelta(hours=c.get_interval_hours() * 2) @@ -645,6 +682,86 @@ def test_review_model_honors_auxiliary_curator_slot(curator_env): ) +def test_review_runtime_passes_auxiliary_curator_credentials(curator_env): + """Per-slot api_key/base_url must ride into resolve_runtime_provider (not main-only creds).""" + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": { + "provider": "custom", + "model": "local-mini", + "api_key": "sk-curator-only", + "base_url": "http://localhost:11434/v1", + }, + }, + } + binding = curator._resolve_review_runtime(cfg) + assert binding.provider == "custom" + assert binding.model == "local-mini" + assert binding.explicit_api_key == "sk-curator-only" + assert binding.explicit_base_url == "http://localhost:11434/v1" + + +def test_review_runtime_strips_blank_aux_credentials(curator_env): + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": { + "provider": "openrouter", + "model": "x/y", + "api_key": " ", + "base_url": "", + }, + }, + } + binding = curator._resolve_review_runtime(cfg) + assert binding.explicit_api_key is None + assert binding.explicit_base_url is None + + +def test_review_runtime_ignores_auxiliary_credentials_when_using_main(curator_env): + """Falling through to main model must not pick up stray auxiliary.curator secrets.""" + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "auxiliary": { + "curator": { + "provider": "auto", + "model": "", + "api_key": "must-not-leak", + "base_url": "http://curator-slot-ignored/", + }, + }, + } + binding = curator._resolve_review_runtime(cfg) + assert (binding.provider, binding.model) == ("openrouter", "openai/gpt-5.5") + assert binding.explicit_api_key is None + assert binding.explicit_base_url is None + + +def test_review_runtime_legacy_auxiliary_carry_credentials(curator_env, caplog): + curator = curator_env["curator"] + cfg = { + "model": {"provider": "openrouter", "default": "openai/gpt-5.5"}, + "curator": { + "auxiliary": { + "provider": "custom", + "model": "m", + "api_key": "legacy-key", + "base_url": "http://legacy/v1", + }, + }, + } + import logging + with caplog.at_level(logging.INFO, logger="agent.curator"): + binding = curator._resolve_review_runtime(cfg) + assert binding.explicit_api_key == "legacy-key" + assert binding.explicit_base_url == "http://legacy/v1" + assert any("deprecated curator.auxiliary" in rec.message for rec in caplog.records) + + def test_review_model_auxiliary_curator_partial_override_falls_back(curator_env): """Only one of slot provider/model set → fall back to the main pair. diff --git a/tests/agent/test_curator_classification.py b/tests/agent/test_curator_classification.py index 031d66529b..625776f537 100644 --- a/tests/agent/test_curator_classification.py +++ b/tests/agent/test_curator_classification.py @@ -220,6 +220,81 @@ def test_classify_handles_malformed_arguments_string(curator_env): assert len(result["pruned"]) == 1 +def test_classify_no_false_positive_short_name_in_file_path(curator_env): + """Short skill name that is a substring of another filename = pruned, not consolidated.""" + # e.g. "api" should NOT match "references/api-design.md" + result = curator_env._classify_removed_skills( + removed=["api"], + added=[], + after_names={"conventions"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "write_file", + "name": "conventions", + "file_path": "references/api-design.md", + "file_content": "# API Design\n...", + }), + }, + ], + ) + assert result["consolidated"] == [], ( + f"Short name 'api' should NOT match file_path 'references/api-design.md'" + ) + assert len(result["pruned"]) == 1 + assert result["pruned"][0]["name"] == "api" + + +def test_classify_no_false_positive_short_name_in_content(curator_env): + """Short skill name embedded in longer word in content = pruned, not consolidated.""" + # e.g. "test" should NOT match content "running latest tests" + result = curator_env._classify_removed_skills( + removed=["test"], + added=[], + after_names={"umbrella"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "patch", + "name": "umbrella", + "old_string": "old", + "new_string": "running latest tests with pytest", + }), + }, + ], + ) + assert result["consolidated"] == [], ( + f"Short name 'test' should NOT match 'latest' via word boundary" + ) + assert len(result["pruned"]) == 1 + + +def test_classify_still_matches_exact_word_in_content(curator_env): + """Word-boundary match still works for exact word occurrences.""" + # "api" SHOULD match content "use the api gateway" + result = curator_env._classify_removed_skills( + removed=["api"], + added=[], + after_names={"gateway"}, + tool_calls=[ + { + "name": "skill_manage", + "arguments": json.dumps({ + "action": "edit", + "name": "gateway", + "content": "# Gateway\n\nUse the api gateway for all requests.\n", + }), + }, + ], + ) + assert len(result["consolidated"]) == 1, ( + f"'api' should match as a standalone word in content" + ) + assert result["consolidated"][0]["into"] == "gateway" + + def test_report_md_splits_consolidated_and_pruned_sections(curator_env): """End-to-end: REPORT.md shows both sections distinctly.""" curator = curator_env diff --git a/tests/agent/test_error_classifier.py b/tests/agent/test_error_classifier.py index 9d52c7bdf2..5a28797349 100644 --- a/tests/agent/test_error_classifier.py +++ b/tests/agent/test_error_classifier.py @@ -410,6 +410,24 @@ class TestClassifyApiError: result = classify_api_error(e, approx_tokens=1000, context_length=200000) assert result.reason == FailoverReason.format_error + def test_400_generic_many_messages_below_large_context_pressure_is_format_error(self): + """Large-context sessions should not overflow solely due to message count.""" + e = MockAPIError( + "Error", + status_code=400, + body={"error": {"message": "Error"}}, + ) + result = classify_api_error( + e, + provider="openai-codex", + model="gpt-5.5", + approx_tokens=74320, + context_length=1_000_000, + num_messages=432, + ) + assert result.reason == FailoverReason.format_error + assert result.should_compress is False + # ── Server disconnect + large session ── def test_disconnect_large_session_context_overflow(self): @@ -425,6 +443,20 @@ class TestClassifyApiError: result = classify_api_error(e, approx_tokens=5000, context_length=200000) assert result.reason == FailoverReason.timeout + def test_disconnect_many_messages_below_large_context_pressure_is_timeout(self): + """Large-context disconnects should not overflow solely due to message count.""" + e = Exception("server disconnected without sending complete message") + result = classify_api_error( + e, + provider="openai-codex", + model="gpt-5.5", + approx_tokens=74320, + context_length=1_000_000, + num_messages=432, + ) + assert result.reason == FailoverReason.timeout + assert result.should_compress is False + # ── Provider-specific: Anthropic thinking signature ── def test_anthropic_thinking_signature(self): diff --git a/tests/agent/test_vision_resolved_args.py b/tests/agent/test_vision_resolved_args.py index aace435784..6558effadd 100644 --- a/tests/agent/test_vision_resolved_args.py +++ b/tests/agent/test_vision_resolved_args.py @@ -13,16 +13,13 @@ def test_vision_call_uses_resolved_provider_args(): usage=MagicMock(prompt_tokens=10, completion_tokens=5), ) - with ( - patch( - "agent.auxiliary_client._resolve_task_provider_model", - return_value=("my-resolved-provider", "my-resolved-model", "http://resolved", "resolved-key", "chat_completions"), - ), - patch( - "agent.auxiliary_client.resolve_vision_provider_client", - return_value=("my-resolved-provider", fake_client, "my-resolved-model"), - ) as mock_vision, - ): + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=("my-resolved-provider", "my-resolved-model", "http://resolved", "resolved-key", "chat_completions"), + ), patch( + "agent.auxiliary_client.resolve_vision_provider_client", + return_value=("my-resolved-provider", fake_client, "my-resolved-model"), + ) as mock_vision: call_llm( "vision", provider="raw-provider", @@ -38,3 +35,30 @@ def test_vision_call_uses_resolved_provider_args(): assert call_args.kwargs["model"] == "my-resolved-model" assert call_args.kwargs["base_url"] == "http://resolved" assert call_args.kwargs["api_key"] == "resolved-key" + + +def test_vision_base_url_override_keeps_explicit_provider(): + """Explicit provider should still drive credential resolution with custom base_url.""" + from agent.auxiliary_client import resolve_vision_provider_client + + fake_client = MagicMock() + with patch( + "agent.auxiliary_client._resolve_task_provider_model", + return_value=( + "zai", + "glm-4v", + "https://open.bigmodel.cn/api/paas/v4", + None, + "chat_completions", + ), + ), patch( + "agent.auxiliary_client.resolve_provider_client", + return_value=(fake_client, "glm-4v"), + ) as mock_resolve: + provider, client, model = resolve_vision_provider_client() + + assert provider == "zai" + assert client is fake_client + assert model == "glm-4v" + assert mock_resolve.call_args.args[0] == "zai" + assert mock_resolve.call_args.kwargs["explicit_base_url"] == "https://open.bigmodel.cn/api/paas/v4" diff --git a/tests/agent/transports/test_codex_transport.py b/tests/agent/transports/test_codex_transport.py index d9db3be7c3..26145660cc 100644 --- a/tests/agent/transports/test_codex_transport.py +++ b/tests/agent/transports/test_codex_transport.py @@ -126,6 +126,20 @@ class TestCodexBuildKwargs: ) assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123" + def test_xai_headers_preserve_request_override_headers(self, transport): + messages = [{"role": "user", "content": "Hi"}] + kw = transport.build_kwargs( + model="grok-3", messages=messages, tools=[], + session_id="conv-123", + is_xai_responses=True, + request_overrides={"extra_headers": {"X-Test": "1", "X-Trace": "abc"}}, + ) + assert kw.get("extra_headers") == { + "X-Test": "1", + "X-Trace": "abc", + "x-grok-conv-id": "conv-123", + } + def test_minimal_effort_clamped(self, transport): messages = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( diff --git a/tests/cli/test_cli_init.py b/tests/cli/test_cli_init.py index e0fa9e4c23..d2d6398b96 100644 --- a/tests/cli/test_cli_init.py +++ b/tests/cli/test_cli_init.py @@ -123,6 +123,13 @@ class TestBusyInputMode: cli.process_command("/queue follow up") assert cli._pending_input.get_nowait() == "follow up" + def test_q_alias_queues_prompt(self): + """The /q alias should resolve to /queue, not /quit.""" + cli = _make_cli() + cli._agent_running = False + assert cli.process_command("/q follow up") is True + assert cli._pending_input.get_nowait() == "follow up" + def test_queue_mode_routes_busy_enter_to_pending(self): """In queue mode, Enter while busy should go to _pending_input, not _interrupt_queue.""" cli = _make_cli(config_overrides={"display": {"busy_input_mode": "queue"}}) diff --git a/tests/cli/test_cli_markdown_rendering.py b/tests/cli/test_cli_markdown_rendering.py index 01f0bab6c6..032c8875b3 100644 --- a/tests/cli/test_cli_markdown_rendering.py +++ b/tests/cli/test_cli_markdown_rendering.py @@ -22,6 +22,23 @@ def test_final_assistant_content_uses_markdown_renderable(): assert "two" in output +def test_final_assistant_content_preserves_windows_hidden_dir_paths(): + renderable = _render_final_assistant_content( + r"D:\Projects\SourceCode\hermes-agent\.ai\skills" + "\\" + ) + + output = _render_to_text(renderable) + assert r"D:\Projects\SourceCode\hermes-agent\.ai\skills" + "\\" in output + + +def test_final_assistant_content_keeps_non_path_markdown_escapes(): + renderable = _render_final_assistant_content(r"1\. Not an ordered list") + + output = _render_to_text(renderable) + assert "1. Not an ordered list" in output + assert r"1\." not in output + + def test_final_assistant_content_strips_ansi_before_markdown_rendering(): renderable = _render_final_assistant_content("\x1b[31m# Title\x1b[0m") diff --git a/tests/cli/test_cli_new_session.py b/tests/cli/test_cli_new_session.py index 63d07d26d2..4f453fea32 100644 --- a/tests/cli/test_cli_new_session.py +++ b/tests/cli/test_cli_new_session.py @@ -5,7 +5,7 @@ from __future__ import annotations import importlib import os import sys -from datetime import timedelta +from datetime import datetime, timedelta from unittest.mock import MagicMock, patch from hermes_state import SessionDB @@ -219,3 +219,59 @@ def test_new_session_resets_token_counters(tmp_path): assert comp.last_total_tokens == 0 assert comp.compression_count == 0 assert comp._context_probed is False + + +def test_new_session_with_title(capsys): + """new_session(title=...) creates a session and sets the title.""" + cli = _make_cli() + cli._session_db = MagicMock() + cli.agent = _FakeAgent("old_session_id", datetime.now()) + cli.conversation_history = [] + + cli.new_session(title="My Test Session") + + # Assert set_session_title was called with the new session ID and sanitized title + cli._session_db.set_session_title.assert_called_once() + call_args = cli._session_db.set_session_title.call_args + assert call_args[0][0] == cli.session_id + assert call_args[0][1] == "My Test Session" + + captured = capsys.readouterr() + assert "My Test Session" in captured.out + + +def test_new_session_with_duplicate_title_surfaces_error(capsys): + """new_session(title=...) handles ValueError from a duplicate-title conflict. + + The session is still created; the title assignment fails; the success banner + must not claim the rejected title as the session name. + """ + cli = _make_cli() + cli._session_db = MagicMock() + cli._session_db.set_session_title.side_effect = ValueError( + "Title 'Dup' is already in use by session abc-123" + ) + cli.agent = _FakeAgent("old_session_id", datetime.now()) + cli.conversation_history = [] + + # Capture warnings printed via cli._cprint. After importlib.reload(), + # the method's __globals__ dict is the one from the live module — patch + # the exact dict the method will read. + warnings: list[str] = [] + method_globals = cli.new_session.__globals__ + original = method_globals["_cprint"] + method_globals["_cprint"] = lambda msg: warnings.append(msg) + try: + cli.new_session(title="Dup") + finally: + method_globals["_cprint"] = original + + cli._session_db.set_session_title.assert_called_once() + joined = "\n".join(warnings) + assert "already in use" in joined + assert "session started untitled" in joined + + # The success banner must NOT claim the rejected title as the session name. + captured = capsys.readouterr() + assert "New session started: Dup" not in captured.out + assert "New session started!" in captured.out diff --git a/tests/cli/test_cwd_env_respect.py b/tests/cli/test_cwd_env_respect.py index e9f3341d2a..04e62cc12f 100644 --- a/tests/cli/test_cwd_env_respect.py +++ b/tests/cli/test_cwd_env_respect.py @@ -1,107 +1,101 @@ -"""Tests that load_cli_config() guards against lazy-import TERMINAL_CWD clobbering. +"""Tests for CLI/TUI CWD resolution in load_cli_config(). -When the gateway resolves TERMINAL_CWD at startup and cli.py is later -imported lazily (via delegate_tool → CLI_CONFIG), load_cli_config() must -not overwrite the already-resolved value with os.getcwd(). - -config.yaml terminal.cwd is the canonical source of truth. -.env TERMINAL_CWD and MESSAGING_CWD are deprecated. -See issue #10817. +Rules: +- Local backend CLI/TUI: always os.getcwd(), ignoring config and inherited env. +- Non-local with placeholder: pop cwd for backend default. +- Non-local with explicit path: keep as-is. """ import os import pytest - -# The sentinel values that mean "resolve at runtime" _CWD_PLACEHOLDERS = (".", "auto", "cwd") -def _resolve_terminal_cwd(terminal_config: dict, defaults: dict, env: dict): - """Simulate the CWD resolution logic from load_cli_config(). +def _resolve_cwd(terminal_config: dict, defaults: dict, env: dict): + """Mirror the CWD resolution logic from cli.py load_cli_config().""" + effective_backend = terminal_config.get("env_type", "local") - This mirrors the code in cli.py that checks for a pre-resolved - TERMINAL_CWD before falling back to os.getcwd(). - """ - if terminal_config.get("cwd") in _CWD_PLACEHOLDERS: - _existing_cwd = env.get("TERMINAL_CWD", "") - if _existing_cwd and _existing_cwd not in _CWD_PLACEHOLDERS and os.path.isabs(_existing_cwd): - terminal_config["cwd"] = _existing_cwd - defaults["terminal"]["cwd"] = _existing_cwd - else: - effective_backend = terminal_config.get("env_type", "local") - if effective_backend == "local": - terminal_config["cwd"] = "/fake/getcwd" # stand-in for os.getcwd() - defaults["terminal"]["cwd"] = terminal_config["cwd"] - else: - terminal_config.pop("cwd", None) + if effective_backend == "local": + terminal_config["cwd"] = "/fake/getcwd" + defaults["terminal"]["cwd"] = terminal_config["cwd"] + elif terminal_config.get("cwd") in _CWD_PLACEHOLDERS: + terminal_config.pop("cwd", None) - # Simulate the bridging loop: write terminal_config["cwd"] to env - _file_has_terminal = defaults.get("_file_has_terminal", False) + # Bridge: TERMINAL_CWD always exported in CLI, skipped in gateway + _is_gateway = env.get("_HERMES_GATEWAY") == "1" if "cwd" in terminal_config: - if _file_has_terminal or "TERMINAL_CWD" not in env: + if _is_gateway: + pass # don't touch env + else: env["TERMINAL_CWD"] = str(terminal_config["cwd"]) return env.get("TERMINAL_CWD", "") -class TestLazyImportGuard: - """TERMINAL_CWD resolved by gateway must survive a lazy cli.py import.""" +class TestLocalBackendCli: + """Local backend always uses os.getcwd().""" - def test_gateway_resolved_cwd_survives(self): - """Gateway set TERMINAL_CWD → lazy cli import must not clobber.""" - env = {"TERMINAL_CWD": "/home/user/workspace"} - terminal_config = {"cwd": ".", "env_type": "local"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/home/user/workspace" - - def test_gateway_resolved_cwd_survives_with_file_terminal(self): - """Even when config.yaml has a terminal: section, resolved CWD survives.""" - env = {"TERMINAL_CWD": "/home/user/workspace"} - terminal_config = {"cwd": ".", "env_type": "local"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": True} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/home/user/workspace" - - -class TestConfigCwdResolution: - """config.yaml terminal.cwd is the canonical source of truth.""" - - def test_explicit_config_cwd_wins(self): - """terminal.cwd: /explicit/path always wins.""" - env = {"TERMINAL_CWD": "/old/gateway/value"} - terminal_config = {"cwd": "/explicit/path"} - defaults = {"terminal": {"cwd": "/explicit/path"}, "_file_has_terminal": True} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/explicit/path" - - def test_dot_cwd_resolves_to_getcwd_when_no_prior(self): - """With no pre-set TERMINAL_CWD, "." resolves to os.getcwd().""" + def test_explicit_config_ignored(self): env = {} - terminal_config = {"cwd": "."} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} + tc = {"cwd": "/explicit/path", "env_type": "local"} + d = {"terminal": {"cwd": "/explicit/path"}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" - result = _resolve_terminal_cwd(terminal_config, defaults, env) + def test_inherited_env_overwritten(self): + env = {"TERMINAL_CWD": "/parent/hermes"} + tc = {"cwd": "/home/user", "env_type": "local"} + d = {"terminal": {"cwd": "/home/user"}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" + + def test_placeholder_resolved(self): + env = {} + tc = {"cwd": "."} + d = {"terminal": {"cwd": "."}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" + + def test_env_and_no_config_file(self): + env = {"TERMINAL_CWD": "/stale/value"} + tc = {"cwd": ".", "env_type": "local"} + d = {"terminal": {"cwd": "."}} + assert _resolve_cwd(tc, d, env) == "/fake/getcwd" + + +class TestNonLocalBackends: + """Non-local backends use config or per-backend defaults.""" + + def test_placeholder_popped(self): + env = {} + tc = {"cwd": ".", "env_type": "docker"} + d = {"terminal": {"cwd": "."}} + assert _resolve_cwd(tc, d, env) == "" + + def test_explicit_path_kept(self): + env = {} + tc = {"cwd": "/srv/app", "env_type": "ssh"} + d = {"terminal": {"cwd": "/srv/app"}} + assert _resolve_cwd(tc, d, env) == "/srv/app" + + def test_auto_placeholder_popped(self): + env = {} + tc = {"cwd": "auto", "env_type": "modal"} + d = {"terminal": {"cwd": "auto"}} + assert _resolve_cwd(tc, d, env) == "" + + +class TestGatewayLazyImport: + """Gateway lazy import of cli.py must not clobber TERMINAL_CWD.""" + + def test_gateway_cwd_preserved(self): + env = {"_HERMES_GATEWAY": "1", "TERMINAL_CWD": "/home/user/project"} + tc = {"cwd": "/home/user", "env_type": "local"} + d = {"terminal": {"cwd": "/home/user"}} + result = _resolve_cwd(tc, d, env) + assert result == "/home/user/project" + + def test_cli_overwrites_stale_env(self): + env = {"TERMINAL_CWD": "/stale/from/dotenv"} + tc = {"cwd": "/home/user", "env_type": "local"} + d = {"terminal": {"cwd": "/home/user"}} + result = _resolve_cwd(tc, d, env) assert result == "/fake/getcwd" - - def test_remote_backend_pops_cwd(self): - """Remote backend + placeholder cwd → popped for backend default.""" - env = {} - terminal_config = {"cwd": ".", "env_type": "docker"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "" # cwd popped, no env var set - - def test_remote_backend_with_prior_cwd_preserves(self): - """Remote backend + pre-resolved TERMINAL_CWD → adopted.""" - env = {"TERMINAL_CWD": "/project"} - terminal_config = {"cwd": ".", "env_type": "docker"} - defaults = {"terminal": {"cwd": "."}, "_file_has_terminal": False} - - result = _resolve_terminal_cwd(terminal_config, defaults, env) - assert result == "/project" diff --git a/tests/cli/test_fast_command.py b/tests/cli/test_fast_command.py index 343c05658c..a98ae75444 100644 --- a/tests/cli/test_fast_command.py +++ b/tests/cli/test_fast_command.py @@ -128,17 +128,34 @@ class TestPriorityProcessingModels(unittest.TestCase): assert model_supports_fast_mode(model), f"{model} should support fast mode" def test_all_anthropic_models_supported(self): + """Per Anthropic docs, fast mode is currently Opus 4.6 only. + + Sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400. + Pre-fix this test asserted all Claude variants supported fast mode, + which mirrored the bug rather than the API contract. + """ from hermes_cli.models import model_supports_fast_mode - # All Claude models support Anthropic Fast Mode — Opus, Sonnet, Haiku. + # Supported: Opus 4.6 in any form supported = [ - "claude-opus-4-7", "claude-opus-4-6", "claude-opus-4.6", - "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4", - "claude-haiku-4-5", "claude-3-5-haiku", + "claude-opus-4-6", "claude-opus-4.6", + "anthropic/claude-opus-4-6", "anthropic/claude-opus-4.6", ] for model in supported: assert model_supports_fast_mode(model), f"{model} should support fast mode" + # Unsupported per Anthropic API: Opus 4.7, Sonnet, Haiku + unsupported = [ + "claude-opus-4-7", + "claude-sonnet-4-6", "claude-sonnet-4.6", "claude-sonnet-4", + "claude-haiku-4-5", "claude-3-5-haiku", + ] + for model in unsupported: + assert not model_supports_fast_mode(model), ( + f"{model} should NOT support fast mode — Anthropic restricts " + f"speed=fast to Opus 4.6" + ) + def test_codex_models_excluded(self): """Codex models route through Responses API and don't accept service_tier.""" from hermes_cli.models import model_supports_fast_mode @@ -257,18 +274,20 @@ class TestAnthropicFastMode(unittest.TestCase): assert model_supports_fast_mode("anthropic/claude-opus-4-6") is True assert model_supports_fast_mode("anthropic/claude-opus-4.6") is True - def test_anthropic_all_claude_models_supported(self): + def test_anthropic_non_opus46_models_excluded(self): + """Anthropic restricts fast mode to Opus 4.6 — others must be excluded. + + Per https://platform.claude.com/docs/en/build-with-claude/fast-mode, + sending speed=fast to Opus 4.7, Sonnet, or Haiku returns HTTP 400. + """ from hermes_cli.models import model_supports_fast_mode - # All Claude models support fast mode — Opus, Sonnet, Haiku. - # The anthropic adapter gates speed=fast on native Anthropic - # endpoints only, so third-party proxies that reject the beta - # are protected downstream (see _is_third_party_anthropic_endpoint). - assert model_supports_fast_mode("claude-sonnet-4-6") is True - assert model_supports_fast_mode("claude-sonnet-4.6") is True - assert model_supports_fast_mode("claude-haiku-4-5") is True - assert model_supports_fast_mode("claude-opus-4-7") is True - assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is True + assert model_supports_fast_mode("claude-sonnet-4-6") is False + assert model_supports_fast_mode("claude-sonnet-4.6") is False + assert model_supports_fast_mode("claude-haiku-4-5") is False + assert model_supports_fast_mode("claude-opus-4-7") is False + assert model_supports_fast_mode("anthropic/claude-sonnet-4.6") is False + assert model_supports_fast_mode("anthropic/claude-opus-4-7") is False def test_non_claude_models_not_anthropic_fast(self): """Non-Claude models should not be treated as Anthropic fast-mode.""" @@ -294,6 +313,17 @@ class TestAnthropicFastMode(unittest.TestCase): result = resolve_fast_mode_overrides("anthropic/claude-opus-4.6") assert result == {"speed": "fast"} + def test_resolve_overrides_returns_none_for_unsupported_claude(self): + """Opus 4.7 and other Claude models don't support fast mode (API 400s). + + Per Anthropic docs, fast mode is currently Opus 4.6 only. + """ + from hermes_cli.models import resolve_fast_mode_overrides + + assert resolve_fast_mode_overrides("claude-opus-4-7") is None + assert resolve_fast_mode_overrides("claude-sonnet-4-6") is None + assert resolve_fast_mode_overrides("claude-haiku-4-5") is None + def test_resolve_overrides_returns_service_tier_for_openai(self): """OpenAI models should still get service_tier, not speed.""" from hermes_cli.models import resolve_fast_mode_overrides @@ -302,13 +332,21 @@ class TestAnthropicFastMode(unittest.TestCase): assert result == {"service_tier": "priority"} def test_is_anthropic_fast_model(self): + """Fast mode is currently Opus 4.6 only — other Claude variants must be excluded.""" from hermes_cli.models import _is_anthropic_fast_model + # Supported: Opus 4.6 in any form assert _is_anthropic_fast_model("claude-opus-4-6") is True assert _is_anthropic_fast_model("claude-opus-4.6") is True - assert _is_anthropic_fast_model("claude-sonnet-4-6") is True - assert _is_anthropic_fast_model("claude-haiku-4-5") is True assert _is_anthropic_fast_model("anthropic/claude-opus-4-6") is True + assert _is_anthropic_fast_model("claude-opus-4.6:fast") is True + + # Unsupported per Anthropic API contract — would 400 if we sent speed=fast + assert _is_anthropic_fast_model("claude-opus-4-7") is False + assert _is_anthropic_fast_model("claude-sonnet-4-6") is False + assert _is_anthropic_fast_model("claude-haiku-4-5") is False + + # Non-Claude assert _is_anthropic_fast_model("gpt-5.4") is False assert _is_anthropic_fast_model("") is False @@ -320,14 +358,23 @@ class TestAnthropicFastMode(unittest.TestCase): ) assert cli_mod.HermesCLI._fast_command_available(stub) is True - def test_fast_command_exposed_for_anthropic_sonnet(self): - """Sonnet now supports Anthropic Fast Mode — the adapter gates on base_url.""" + def test_fast_command_hidden_for_anthropic_sonnet(self): + """Sonnet doesn't support fast mode (Opus 4.6 only) — /fast must be hidden.""" cli_mod = _import_cli() stub = SimpleNamespace( provider="anthropic", requested_provider="anthropic", model="claude-sonnet-4-6", agent=None, ) - assert cli_mod.HermesCLI._fast_command_available(stub) is True + assert cli_mod.HermesCLI._fast_command_available(stub) is False + + def test_fast_command_hidden_for_anthropic_opus_47(self): + """Opus 4.7 doesn't support fast mode — /fast must be hidden.""" + cli_mod = _import_cli() + stub = SimpleNamespace( + provider="anthropic", requested_provider="anthropic", + model="claude-opus-4-7", agent=None, + ) + assert cli_mod.HermesCLI._fast_command_available(stub) is False def test_fast_command_hidden_for_non_claude_non_openai(self): """Non-Claude, non-OpenAI models should not expose /fast.""" diff --git a/tests/cron/test_jobs.py b/tests/cron/test_jobs.py index 30bd6b41d5..b9d34e1a5c 100644 --- a/tests/cron/test_jobs.py +++ b/tests/cron/test_jobs.py @@ -647,6 +647,74 @@ class TestGetDueJobs: assert get_due_jobs() == [] assert get_job("oneshot-stale")["next_run_at"] is None + def test_broken_cron_without_next_run_is_recovered(self, tmp_cron_dir, monkeypatch): + now = datetime(2026, 3, 18, 10, 0, 0, tzinfo=timezone.utc) + monkeypatch.setattr("cron.jobs._hermes_now", lambda: now) + + save_jobs( + [{ + "id": "cron-recover", + "name": "AI Daily Digest", + "prompt": "...", + "schedule": {"kind": "cron", "expr": "0 12 * * *", "display": "0 12 * * *"}, + "schedule_display": "0 12 * * *", + "repeat": {"times": None, "completed": 0}, + "enabled": True, + "state": "scheduled", + "paused_at": None, + "paused_reason": None, + "created_at": "2026-03-18T09:00:00+00:00", + "next_run_at": None, + "last_run_at": None, + "last_status": None, + "last_error": None, + "deliver": "local", + "origin": None, + }] + ) + + assert get_due_jobs() == [] + recovered = get_job("cron-recover")["next_run_at"] + assert recovered is not None + recovered_dt = datetime.fromisoformat(recovered) + if recovered_dt.tzinfo is None: + recovered_dt = recovered_dt.replace(tzinfo=timezone.utc) + assert recovered_dt > now + + def test_broken_interval_without_next_run_is_recovered(self, tmp_cron_dir, monkeypatch): + now = datetime(2026, 3, 18, 10, 0, 0, tzinfo=timezone.utc) + monkeypatch.setattr("cron.jobs._hermes_now", lambda: now) + + save_jobs( + [{ + "id": "interval-recover", + "name": "Hourly heartbeat", + "prompt": "...", + "schedule": {"kind": "interval", "minutes": 60, "display": "every 60m"}, + "schedule_display": "every 1h", + "repeat": {"times": None, "completed": 0}, + "enabled": True, + "state": "scheduled", + "paused_at": None, + "paused_reason": None, + "created_at": "2026-03-18T09:00:00+00:00", + "next_run_at": None, + "last_run_at": None, + "last_status": None, + "last_error": None, + "deliver": "local", + "origin": None, + }] + ) + + assert get_due_jobs() == [] + recovered = get_job("interval-recover")["next_run_at"] + assert recovered is not None + recovered_dt = datetime.fromisoformat(recovered) + if recovered_dt.tzinfo is None: + recovered_dt = recovered_dt.replace(tzinfo=timezone.utc) + assert recovered_dt > now + class TestEnabledToolsets: def test_enabled_toolsets_stored(self, tmp_cron_dir): diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index b12bb578a3..66df251a45 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1857,6 +1857,54 @@ class TestBuildJobPromptMissingSkill: assert "go" in result +class TestBuildJobPromptBumpUse: + """Verify that cron jobs bump skill usage counters so the curator sees them as active.""" + + def test_bump_use_called_for_loaded_skill(self): + """bump_use is called for each successfully loaded skill.""" + + def _skill_view(name: str) -> str: + return json.dumps({"success": True, "content": f"Content for {name}."}) + + with patch("tools.skills_tool.skill_view", side_effect=_skill_view), \ + patch("tools.skill_usage.bump_use") as mock_bump: + _build_job_prompt({"skills": ["alpha", "beta"], "prompt": "go"}) + + assert mock_bump.call_count == 2 + calls = [c[0][0] for c in mock_bump.call_args_list] + assert "alpha" in calls + assert "beta" in calls + + def test_bump_use_not_called_for_missing_skill(self): + """bump_use is NOT called when a skill fails to load.""" + + def _missing_view(name: str) -> str: + return json.dumps({"success": False, "error": "not found"}) + + with patch("tools.skills_tool.skill_view", side_effect=_missing_view), \ + patch("tools.skill_usage.bump_use") as mock_bump: + _build_job_prompt({"skills": ["ghost"], "prompt": "go"}) + + assert mock_bump.call_count == 0 + + def test_bump_failure_does_not_break_prompt(self, caplog): + """If bump_use raises, the prompt still builds — error is logged at DEBUG.""" + + def _skill_view(name: str) -> str: + return json.dumps({"success": True, "content": "Works."}) + + with patch("tools.skills_tool.skill_view", side_effect=_skill_view), \ + patch("tools.skill_usage.bump_use", side_effect=RuntimeError("boom")), \ + caplog.at_level(logging.DEBUG, logger="cron.scheduler"): + result = _build_job_prompt({"skills": ["good-skill"], "prompt": "go"}) + + # Prompt should still contain the skill content and original instruction + assert "Works." in result + assert "go" in result + # The error should be logged at DEBUG level, not crash + assert any("failed to bump" in r.message for r in caplog.records) + + class TestSendMediaViaAdapter: """Unit tests for _send_media_via_adapter — routes files to typed adapter methods.""" diff --git a/tests/e2e/test_platform_commands.py b/tests/e2e/test_platform_commands.py index b891ea7372..4924eed6a9 100644 --- a/tests/e2e/test_platform_commands.py +++ b/tests/e2e/test_platform_commands.py @@ -138,6 +138,29 @@ class TestSlashCommands: response_text = send.call_args[1].get("content") or send.call_args[0][1] assert "compress" in response_text.lower() or "context" in response_text.lower() + @pytest.mark.asyncio + async def test_quick_command_alias_targets_builtin_command_with_args( + self, adapter, runner, platform + ): + """Alias targets with args must reach the built-in command handler.""" + runner.config.quick_commands = { + "s": {"type": "alias", "target": "/status extra-arg"} + } + async def _handle_status(event): + assert event.get_command_args() == "extra-arg" + return "status via alias" + + runner._handle_status_command = AsyncMock(side_effect=_handle_status) + + send = await send_and_capture(adapter, "/s", platform) + + send.assert_called_once() + response_text = send.call_args[1].get("content") or send.call_args[0][1] + assert response_text == "status via alias" + runner._handle_status_command.assert_awaited_once() + runner._handle_message_with_agent.assert_not_awaited() + + class TestSessionLifecycle: """Verify session state changes across command sequences.""" diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 74a30541dc..d519eee278 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -240,6 +240,48 @@ class TestAdapterInit: "http://127.0.0.1:3000", ) + def test_invalid_port_from_env_falls_back_to_default(self, monkeypatch): + monkeypatch.setenv("API_SERVER_PORT", "not-a-port") + config = PlatformConfig(enabled=True) + adapter = APIServerAdapter(config) + assert adapter._port == 8642 + + def test_create_agent_forwards_config_reasoning_effort(self, monkeypatch): + captured = {} + + class FakeAgent: + def __init__(self, **kwargs): + captured.update(kwargs) + + monkeypatch.setattr("run_agent.AIAgent", FakeAgent) + monkeypatch.setattr( + "gateway.run._resolve_runtime_agent_kwargs", + lambda: { + "provider": "openai-codex", + "base_url": "https://example.test/v1", + "api_mode": "codex_responses", + }, + ) + monkeypatch.setattr("gateway.run._resolve_gateway_model", lambda: "gpt-5.5") + monkeypatch.setattr( + "gateway.run._load_gateway_config", + lambda: {"agent": {"reasoning_effort": "xhigh"}}, + ) + monkeypatch.setattr( + "gateway.run.GatewayRunner._load_reasoning_config", + staticmethod(lambda: {"enabled": True, "effort": "xhigh"}), + ) + monkeypatch.setattr("gateway.run.GatewayRunner._load_fallback_model", staticmethod(lambda: None)) + monkeypatch.setattr("hermes_cli.tools_config._get_platform_tools", lambda *_: set()) + + adapter = APIServerAdapter(PlatformConfig(enabled=True)) + monkeypatch.setattr(adapter, "_ensure_session_db", lambda: None) + + agent = adapter._create_agent(session_id="api-session") + + assert isinstance(agent, FakeAgent) + assert captured["reasoning_config"] == {"enabled": True, "effort": "xhigh"} + # --------------------------------------------------------------------------- # Auth checking diff --git a/tests/gateway/test_gateway_command_help.py b/tests/gateway/test_gateway_command_help.py new file mode 100644 index 0000000000..61d5d73de0 --- /dev/null +++ b/tests/gateway/test_gateway_command_help.py @@ -0,0 +1,78 @@ +"""Gateway command help rendering tests.""" + +import pytest + +from gateway.config import Platform +from gateway.platforms.base import MessageEvent +from gateway.session import SessionSource + + +def _make_event(text: str, platform: Platform) -> MessageEvent: + return MessageEvent( + text=text, + source=SessionSource( + platform=platform, + chat_id="chat-1", + user_id="user-1", + user_name="tester", + chat_type="dm", + ), + ) + + +def _make_runner(): + from gateway.run import GatewayRunner + + return object.__new__(GatewayRunner) + + +@pytest.mark.asyncio +async def test_help_sanitizes_slash_command_mentions_for_telegram(monkeypatch): + """Telegram help output must not expose invalid uppercase/hyphenated slashes.""" + monkeypatch.setattr( + "agent.skill_commands.get_skill_commands", + lambda: { + "/Linear": {"description": "Open Linear"}, + "/Custom-Thing": {"description": "Run a custom thing"}, + }, + ) + + result = await _make_runner()._handle_help_command( + _make_event("/help", Platform.TELEGRAM) + ) + + assert "`/linear`" in result + assert "`/custom_thing`" in result + assert "`/Linear`" not in result + assert "`/Custom-Thing`" not in result + + +@pytest.mark.asyncio +async def test_commands_sanitizes_slash_command_mentions_for_telegram(monkeypatch): + """Paginated Telegram /commands output uses Telegram-valid slash mentions.""" + monkeypatch.setattr( + "agent.skill_commands.get_skill_commands", + lambda: {"/Linear": {"description": "Open Linear"}}, + ) + + result = await _make_runner()._handle_commands_command( + _make_event("/commands 999", Platform.TELEGRAM) + ) + + assert "`/linear`" in result + assert "`/Linear`" not in result + + +@pytest.mark.asyncio +async def test_help_keeps_non_telegram_slash_command_mentions_unchanged(monkeypatch): + """Only Telegram needs slash mentions rewritten to Telegram command names.""" + monkeypatch.setattr( + "agent.skill_commands.get_skill_commands", + lambda: {"/Linear": {"description": "Open Linear"}}, + ) + + result = await _make_runner()._handle_help_command( + _make_event("/help", Platform.DISCORD) + ) + + assert "`/Linear`" in result diff --git a/tests/gateway/test_qqbot.py b/tests/gateway/test_qqbot.py index a5aeb62516..a01bb946ad 100644 --- a/tests/gateway/test_qqbot.py +++ b/tests/gateway/test_qqbot.py @@ -191,6 +191,50 @@ class TestVoiceAttachmentSSRFProtection: assert kwargs.get("follow_redirects") is True assert kwargs.get("event_hooks", {}).get("response") == [_ssrf_redirect_guard] + +# --------------------------------------------------------------------------- +# WebSocket proxy handling +# --------------------------------------------------------------------------- + +class TestQQWebSocketProxy: + @pytest.mark.asyncio + async def test_open_ws_honors_proxy_env(self, monkeypatch): + from gateway.platforms.qqbot import QQAdapter + + for key in ( + "WSS_PROXY", + "wss_proxy", + "HTTPS_PROXY", + "https_proxy", + "ALL_PROXY", + "all_proxy", + ): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("HTTPS_PROXY", "http://127.0.0.1:7897") + + adapter = QQAdapter(_make_config(app_id="a", client_secret="b")) + + seen_session_kwargs = {} + seen_ws_kwargs = {} + + class FakeSession: + def __init__(self, **kwargs): + seen_session_kwargs.update(kwargs) + self.closed = False + + async def close(self): + self.closed = True + + async def ws_connect(self, *args, **kwargs): + seen_ws_kwargs.update(kwargs) + return mock.AsyncMock(closed=False) + + with mock.patch("gateway.platforms.qqbot.adapter.aiohttp.ClientSession", side_effect=FakeSession): + await adapter._open_ws("wss://api.sgroup.qq.com/websocket") + + assert seen_session_kwargs.get("trust_env") is True + assert seen_ws_kwargs.get("proxy") == "http://127.0.0.1:7897" + # --------------------------------------------------------------------------- # _strip_at_mention # --------------------------------------------------------------------------- diff --git a/tests/gateway/test_session_boundary_security_state.py b/tests/gateway/test_session_boundary_security_state.py index 47cf475275..57b5855070 100644 --- a/tests/gateway/test_session_boundary_security_state.py +++ b/tests/gateway/test_session_boundary_security_state.py @@ -124,6 +124,10 @@ async def test_resume_clears_session_scoped_approval_and_yolo_state(): runner, session_key = _make_resume_runner() other_key = "agent:main:telegram:dm:other-chat" + runner._pending_skills_reload_notes = { + session_key: "[USER INITIATED SKILLS RELOAD: target]", + other_key: "[USER INITIATED SKILLS RELOAD: other]", + } approve_session(session_key, "recursive delete") approve_session(other_key, "recursive delete") enable_session_yolo(session_key) @@ -140,10 +144,12 @@ async def test_resume_clears_session_scoped_approval_and_yolo_state(): assert is_session_yolo_enabled(session_key) is False assert session_key not in runner._pending_approvals assert session_key not in runner._update_prompt_pending + assert session_key not in runner._pending_skills_reload_notes assert is_approved(other_key, "recursive delete") is True assert is_session_yolo_enabled(other_key) is True assert other_key in runner._pending_approvals assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes @pytest.mark.asyncio @@ -151,6 +157,10 @@ async def test_branch_clears_session_scoped_approval_and_yolo_state(): runner, session_key = _make_branch_runner() other_key = "agent:main:telegram:dm:other-chat" + runner._pending_skills_reload_notes = { + session_key: "[USER INITIATED SKILLS RELOAD: target]", + other_key: "[USER INITIATED SKILLS RELOAD: other]", + } approve_session(session_key, "recursive delete") approve_session(other_key, "recursive delete") enable_session_yolo(session_key) @@ -167,10 +177,12 @@ async def test_branch_clears_session_scoped_approval_and_yolo_state(): assert is_session_yolo_enabled(session_key) is False assert session_key not in runner._pending_approvals assert session_key not in runner._update_prompt_pending + assert session_key not in runner._pending_skills_reload_notes assert is_approved(other_key, "recursive delete") is True assert is_session_yolo_enabled(other_key) is True assert other_key in runner._pending_approvals assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes @pytest.mark.asyncio @@ -216,6 +228,7 @@ def test_clear_session_boundary_security_state_is_scoped(): runner = object.__new__(GatewayRunner) runner._pending_approvals = {} runner._update_prompt_pending = {} + runner._pending_skills_reload_notes = {} source = _make_source() session_key = build_session_key(source) @@ -229,6 +242,12 @@ def test_clear_session_boundary_security_state_is_scoped(): runner._pending_approvals[other_key] = {"command": "rm -rf /tmp/other"} runner._update_prompt_pending[session_key] = True runner._update_prompt_pending[other_key] = True + runner._pending_skills_reload_notes[session_key] = ( + "[USER INITIATED SKILLS RELOAD: target]" + ) + runner._pending_skills_reload_notes[other_key] = ( + "[USER INITIATED SKILLS RELOAD: other]" + ) runner._clear_session_boundary_security_state(session_key) @@ -237,16 +256,19 @@ def test_clear_session_boundary_security_state_is_scoped(): assert is_session_yolo_enabled(session_key) is False assert session_key not in runner._pending_approvals assert session_key not in runner._update_prompt_pending + assert session_key not in runner._pending_skills_reload_notes # Other session untouched assert is_approved(other_key, "recursive delete") is True assert is_session_yolo_enabled(other_key) is True assert other_key in runner._pending_approvals assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes # Empty session_key is a no-op runner._clear_session_boundary_security_state("") assert is_approved(other_key, "recursive delete") is True assert other_key in runner._update_prompt_pending + assert other_key in runner._pending_skills_reload_notes def test_clear_session_boundary_security_state_wakes_blocked_approvals(): diff --git a/tests/gateway/test_sms.py b/tests/gateway/test_sms.py index 524d540f81..e3ec86d90a 100644 --- a/tests/gateway/test_sms.py +++ b/tests/gateway/test_sms.py @@ -169,9 +169,9 @@ class TestSmsRequirements: class TestWebhookHostConfig: """Verify SMS_WEBHOOK_HOST env var and default.""" - def test_default_host_is_all_interfaces(self): + def test_default_host_is_localhost(self): from gateway.platforms.sms import DEFAULT_WEBHOOK_HOST - assert DEFAULT_WEBHOOK_HOST == "0.0.0.0" + assert DEFAULT_WEBHOOK_HOST == "127.0.0.1" def test_host_from_env(self): from gateway.platforms.sms import SmsAdapter @@ -242,6 +242,48 @@ class TestStartupGuard: result = await adapter.connect() assert result is False + @pytest.mark.asyncio + async def test_missing_webhook_url_is_non_retryable(self): + adapter = self._make_adapter() + await adapter.connect() + assert adapter.has_fatal_error is True + assert adapter.fatal_error_retryable is False + assert "sms_missing_webhook_url" == adapter.fatal_error_code + + @pytest.mark.asyncio + async def test_missing_phone_number_is_non_retryable(self): + from gateway.platforms.sms import SmsAdapter + + env = { + "TWILIO_ACCOUNT_SID": "ACtest", + "TWILIO_AUTH_TOKEN": "tok", + "TWILIO_PHONE_NUMBER": "", + "SMS_WEBHOOK_URL": "", + } + with patch.dict(os.environ, env, clear=True): + pc = PlatformConfig(enabled=True, api_key="tok") + adapter = SmsAdapter(pc) + await adapter.connect() + assert adapter.has_fatal_error is True + assert adapter.fatal_error_retryable is False + assert adapter.fatal_error_code == "sms_missing_phone_number" + + @pytest.mark.asyncio + async def test_insecure_flag_does_not_set_fatal_error(self): + mock_session = AsyncMock() + with patch.dict(os.environ, {"SMS_INSECURE_NO_SIGNATURE": "true"}), \ + patch("aiohttp.web.AppRunner") as mock_runner_cls, \ + patch("aiohttp.web.TCPSite") as mock_site_cls, \ + patch("aiohttp.ClientSession", return_value=mock_session): + mock_runner_cls.return_value.setup = AsyncMock() + mock_runner_cls.return_value.cleanup = AsyncMock() + mock_site_cls.return_value.start = AsyncMock() + adapter = self._make_adapter() + result = await adapter.connect() + assert result is True + assert adapter.has_fatal_error is False + await adapter.disconnect() + @pytest.mark.asyncio async def test_insecure_flag_allows_start_without_url(self): mock_session = AsyncMock() diff --git a/tests/gateway/test_teams.py b/tests/gateway/test_teams.py index 7a035142ed..2befceec94 100644 --- a/tests/gateway/test_teams.py +++ b/tests/gateway/test_teams.py @@ -313,9 +313,33 @@ class TestTeamsPluginRegistration: # --------------------------------------------------------------------------- -# Tests: Connect / Disconnect +# Tests: Interactive setup (import fix regression — #18325 / #19173) # --------------------------------------------------------------------------- +class TestTeamsInteractiveSetup: + def test_interactive_setup_persists_credentials(self, tmp_path, monkeypatch): + """Regression for #19173: interactive_setup must import prompt helpers + from hermes_cli.cli_output (not hermes_cli.config) and persist + credentials to .env without crashing. + """ + hermes_home = tmp_path / "hermes" + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + import hermes_cli.cli_output as cli_output_mod + + answers = iter(["client-id", "client-secret", "tenant-id", "aad-1, aad-2"]) + monkeypatch.setattr(cli_output_mod, "prompt", lambda *_a, **_kw: next(answers)) + monkeypatch.setattr(cli_output_mod, "prompt_yes_no", lambda *_a, **_kw: True) + monkeypatch.setattr(cli_output_mod, "print_info", lambda *_a, **_kw: None) + monkeypatch.setattr(cli_output_mod, "print_success", lambda *_a, **_kw: None) + monkeypatch.setattr(cli_output_mod, "print_warning", lambda *_a, **_kw: None) + + _teams_mod.interactive_setup() + + env_text = (hermes_home / ".env").read_text(encoding="utf-8") + assert "TEAMS_CLIENT_ID=client-id" in env_text + assert "TEAMS_TENANT_ID=tenant-id" in env_text + class TestTeamsConnect: @pytest.mark.asyncio async def test_connect_fails_without_sdk(self, monkeypatch): diff --git a/tests/gateway/test_telegram_group_gating.py b/tests/gateway/test_telegram_group_gating.py index a560d6cdd6..52e4a5e6d3 100644 --- a/tests/gateway/test_telegram_group_gating.py +++ b/tests/gateway/test_telegram_group_gating.py @@ -261,6 +261,57 @@ def test_group_allow_from_is_enforced_by_gateway_authorization_not_trigger_gate( assert adapter._should_process_message(_group_message("hello", from_user_id=333)) is True +def test_top_level_require_mention_bridges_to_telegram(monkeypatch, tmp_path): + """require_mention at the config.yaml top level (alongside group_sessions_per_user) + must behave identically to telegram.require_mention: true (#3979). + """ + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + # Intentionally no "telegram:" section — keys are at the top level. + (hermes_home / "config.yaml").write_text( + "require_mention: true\n" + "group_sessions_per_user: true\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("TELEGRAM_REQUIRE_MENTION", raising=False) + + config = load_gateway_config() + + assert config is not None + assert __import__("os").environ.get("TELEGRAM_REQUIRE_MENTION") == "true" + + # The adapter's extra dict must also carry the setting so that + # _telegram_require_mention() works even without the env var. + tg_cfg = config.platforms.get(__import__("gateway.config", fromlist=["Platform"]).Platform.TELEGRAM) + if tg_cfg is not None: + assert tg_cfg.extra.get("require_mention") is True + + +def test_top_level_require_mention_does_not_override_telegram_section(monkeypatch, tmp_path): + """When telegram.require_mention is explicitly set, top-level require_mention + must not override it (platform-specific config takes precedence). + """ + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + (hermes_home / "config.yaml").write_text( + "require_mention: true\n" + "telegram:\n" + " require_mention: false\n", + encoding="utf-8", + ) + + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("TELEGRAM_REQUIRE_MENTION", raising=False) + + config = load_gateway_config() + + assert config is not None + # The telegram-specific "false" must win over the top-level "true". + assert __import__("os").environ.get("TELEGRAM_REQUIRE_MENTION") == "false" + + def test_config_bridges_telegram_ignored_threads(monkeypatch, tmp_path): hermes_home = tmp_path / ".hermes" hermes_home.mkdir() diff --git a/tests/gateway/test_title_command.py b/tests/gateway/test_title_command.py index d5bad6c57a..c09a2202f4 100644 --- a/tests/gateway/test_title_command.py +++ b/tests/gateway/test_title_command.py @@ -5,11 +5,12 @@ across all gateway messenger platforms. """ import os -from unittest.mock import MagicMock, patch +from types import SimpleNamespace +from unittest.mock import AsyncMock, MagicMock, patch import pytest -from gateway.config import Platform +from gateway.config import GatewayConfig, Platform, PlatformConfig from gateway.platforms.base import MessageEvent from gateway.session import SessionSource @@ -206,3 +207,152 @@ class TestTitleInHelp: import inspect source = inspect.getsource(GatewayRunner._handle_message) assert '"title"' in source + + +# --------------------------------------------------------------------------- +# /new with title +# --------------------------------------------------------------------------- + + +class TestResetCommandWithTitle: + """Tests for GatewayRunner._handle_reset_command with a title argument.""" + + @pytest.mark.asyncio + async def test_reset_command_with_title(self): + """Sending /new <title> resets session and sets the title.""" + from datetime import datetime + + from gateway.run import GatewayRunner + from gateway.session import SessionEntry, SessionSource, build_session_key + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + adapter = MagicMock() + adapter.send = AsyncMock() + runner.adapters = {Platform.TELEGRAM: adapter} + runner._voice_mode = {} + runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) + runner._session_model_overrides = {} + runner._pending_model_notes = {} + runner._background_tasks = set() + + source = SessionSource( + platform=Platform.TELEGRAM, + user_id="12345", + chat_id="67890", + user_name="testuser", + ) + session_key = build_session_key(source) + new_session_entry = SessionEntry( + session_key=session_key, + session_id="sess-new", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = new_session_entry + runner.session_store.reset_session.return_value = new_session_entry + runner.session_store._entries = {session_key: new_session_entry} + runner.session_store._generate_session_key.return_value = session_key + runner._running_agents = {} + runner._pending_messages = {} + runner._pending_approvals = {} + runner._session_db = MagicMock() + runner._agent_cache = {} + runner._agent_cache_lock = None + runner._is_user_authorized = lambda _source: True + runner._format_session_info = lambda: "" + + event = _make_event(text="/new Custom Name") + result = await runner._handle_reset_command(event) + + runner.session_store.reset_session.assert_called_once() + runner._session_db.set_session_title.assert_called_once_with( + "sess-new", "Custom Name" + ) + # Header reflects the applied title + assert "Custom Name" in str(result) + + @pytest.mark.asyncio + async def test_reset_command_duplicate_title_surfaces_warning(self): + """/new <title> with an already-in-use title returns a warning in the reply.""" + from datetime import datetime + + from gateway.run import GatewayRunner + from gateway.session import SessionEntry, SessionSource, build_session_key + + runner = object.__new__(GatewayRunner) + runner.config = GatewayConfig( + platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="***")} + ) + adapter = MagicMock() + adapter.send = AsyncMock() + runner.adapters = {Platform.TELEGRAM: adapter} + runner._voice_mode = {} + runner.hooks = SimpleNamespace(emit=AsyncMock(), loaded_hooks=False) + runner._session_model_overrides = {} + runner._pending_model_notes = {} + runner._background_tasks = set() + + source = SessionSource( + platform=Platform.TELEGRAM, + user_id="12345", + chat_id="67890", + user_name="testuser", + ) + session_key = build_session_key(source) + new_session_entry = SessionEntry( + session_key=session_key, + session_id="sess-new", + created_at=datetime.now(), + updated_at=datetime.now(), + platform=Platform.TELEGRAM, + chat_type="dm", + ) + runner.session_store = MagicMock() + runner.session_store.get_or_create_session.return_value = new_session_entry + runner.session_store.reset_session.return_value = new_session_entry + runner.session_store._entries = {session_key: new_session_entry} + runner.session_store._generate_session_key.return_value = session_key + runner._running_agents = {} + runner._pending_messages = {} + runner._pending_approvals = {} + runner._session_db = MagicMock() + runner._session_db.set_session_title.side_effect = ValueError( + "Title 'Dup' is already in use by session abc-123" + ) + runner._agent_cache = {} + runner._agent_cache_lock = None + runner._is_user_authorized = lambda _source: True + runner._format_session_info = lambda: "" + + event = _make_event(text="/new Dup") + result = await runner._handle_reset_command(event) + + runner._session_db.set_session_title.assert_called_once() + reply = str(result) + assert "already in use" in reply + assert "session started untitled" in reply + # Header must NOT claim the rejected title as the session name + assert "New session started: Dup" not in reply + + +# --------------------------------------------------------------------------- +# /new in help output +# --------------------------------------------------------------------------- + + +class TestNewInHelp: + """Verify /new appears in help text with the [name] args hint.""" + + def test_new_command_in_help_output(self): + """The gateway help output includes /new with the [name] hint.""" + from hermes_cli.commands import gateway_help_lines + lines = gateway_help_lines() + new_line = next((line for line in lines if line.startswith("`/new ")), None) + assert new_line is not None + assert "[name]" in new_line diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py index 2e9c54608a..947d4904aa 100644 --- a/tests/gateway/test_voice_command.py +++ b/tests/gateway/test_voice_command.py @@ -954,6 +954,46 @@ class TestVoiceChannelCommands: assert "Test transcript" in msg assert "42" in msg # user_id in mention + @pytest.mark.asyncio + async def test_input_suppresses_duplicate_transcript(self, runner): + """Near-immediate duplicate STT output should not dispatch twice.""" + from gateway.config import Platform + + mock_adapter = AsyncMock() + mock_adapter._voice_text_channels = {111: 123} + mock_adapter._voice_sources = {} + mock_channel = AsyncMock() + mock_adapter._client = MagicMock() + mock_adapter._client.get_channel = MagicMock(return_value=mock_channel) + mock_adapter.handle_message = AsyncMock() + runner.adapters[Platform.DISCORD] = mock_adapter + + await runner._handle_voice_channel_input(111, 42, "Hello from VC") + await runner._handle_voice_channel_input(111, 42, "Hello from VC") + + mock_adapter.handle_message.assert_called_once() + mock_channel.send.assert_called_once() + + @pytest.mark.asyncio + async def test_input_suppresses_near_duplicate_transcript(self, runner): + """Small STT wording drift should still be treated as the same utterance.""" + from gateway.config import Platform + + mock_adapter = AsyncMock() + mock_adapter._voice_text_channels = {111: 123} + mock_adapter._voice_sources = {} + mock_channel = AsyncMock() + mock_adapter._client = MagicMock() + mock_adapter._client.get_channel = MagicMock(return_value=mock_channel) + mock_adapter.handle_message = AsyncMock() + runner.adapters[Platform.DISCORD] = mock_adapter + + await runner._handle_voice_channel_input(111, 42, "This is a test of the voice system") + await runner._handle_voice_channel_input(111, 42, "This is a test for the voice system") + + mock_adapter.handle_message.assert_called_once() + mock_channel.send.assert_called_once() + # -- _get_guild_id -- def test_get_guild_id_from_guild(self, runner): diff --git a/tests/gateway/test_wecom.py b/tests/gateway/test_wecom.py index 3c4ec357bc..18de405e39 100644 --- a/tests/gateway/test_wecom.py +++ b/tests/gateway/test_wecom.py @@ -36,6 +36,11 @@ class TestWeComRequirements: class TestWeComAdapterInit: + def test_declares_non_editable_message_capability(self): + from gateway.platforms.wecom import WeComAdapter + + assert WeComAdapter.SUPPORTS_MESSAGE_EDITING is False + def test_reads_config_from_extra(self): from gateway.platforms.wecom import WeComAdapter diff --git a/tests/gateway/test_weixin.py b/tests/gateway/test_weixin.py index 506936f711..8deccf18cb 100644 --- a/tests/gateway/test_weixin.py +++ b/tests/gateway/test_weixin.py @@ -5,7 +5,7 @@ import base64 import json import os from pathlib import Path -from unittest.mock import AsyncMock, patch +from unittest.mock import AsyncMock, Mock, patch from gateway.config import PlatformConfig from gateway.config import GatewayConfig, HomeChannel, Platform, _apply_env_overrides @@ -788,3 +788,43 @@ class TestIsStaleSessionRet: def test_success_codes_are_not_stale(self): assert weixin._is_stale_session_ret(0, 0, "") is False assert weixin._is_stale_session_ret(None, None, "unknown error") is False + + +class TestWeixinContentDedup: + """Regression tests for Issue #16182 — upstream API sends duplicate content + with different message_ids, bypassing message_id deduplication. + """ + + def test_duplicate_content_with_different_message_ids_is_dropped(self): + adapter = _make_adapter() + adapter._poll_session = object() + adapter.handle_message = AsyncMock() + + base_msg = { + "from_user_id": "wxid_user1", + "item_list": [{"type": 1, "text_item": {"text": "hello world"}}], + } + + asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-1"})) + asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-2"})) + + assert adapter.handle_message.await_count == 1 + event = adapter.handle_message.await_args[0][0] + assert event.text == "hello world" + + def test_content_dedup_not_called_for_messages_without_text(self): + adapter = _make_adapter() + adapter._poll_session = object() + adapter.handle_message = AsyncMock() + adapter._dedup.is_duplicate = Mock(return_value=False) + + empty_msg = { + "from_user_id": "wxid_user1", + "message_id": "msg-1", + "item_list": [], + } + asyncio.run(adapter._process_message(empty_msg)) + + assert adapter.handle_message.await_count == 0 + # is_duplicate should only be called for message_id, never for content + assert all("content:" not in str(call) for call in adapter._dedup.is_duplicate.call_args_list) diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py index a8e337c1a0..d0e24aeaab 100644 --- a/tests/hermes_cli/test_auth_nous_provider.py +++ b/tests/hermes_cli/test_auth_nous_provider.py @@ -896,3 +896,286 @@ def test_refresh_non_reuse_error_keeps_original_description(): assert "Refresh session has been revoked" in str(exc_info.value) # Must not have been rewritten with the reuse message. assert "external process" not in str(exc_info.value).lower() + + +# ============================================================================= +# Shared Nous token store — cross-profile persistence (Codex-style auto-import) +# ============================================================================= + + +@pytest.fixture +def shared_store_env(tmp_path, monkeypatch): + """Redirect HERMES_SHARED_AUTH_DIR to a tmp_path. + + Required for every test that exercises the shared Nous store — the + in-auth.py seat belt refuses to touch the real user's shared store + under pytest, so tests that forget this fixture fail loudly instead + of corrupting real state. + """ + shared_dir = tmp_path / "shared" + monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(shared_dir)) + return shared_dir + + +def test_shared_store_seat_belt_refuses_real_home_under_pytest(monkeypatch): + """Without HERMES_SHARED_AUTH_DIR override, the seat belt must trip. + + Mirrors the existing ``_auth_file_path`` seat belt: forgetting to + redirect this store in a test must fail loudly instead of silently + writing to the user's real ``~/.hermes/shared/`` across CI runs. + """ + from hermes_cli.auth import _nous_shared_store_path + + monkeypatch.delenv("HERMES_SHARED_AUTH_DIR", raising=False) + + with pytest.raises(RuntimeError, match="shared Nous auth store"): + _nous_shared_store_path() + + +def test_shared_store_honors_env_override(tmp_path, monkeypatch): + """HERMES_SHARED_AUTH_DIR must redirect the path.""" + from hermes_cli.auth import _nous_shared_store_path, NOUS_SHARED_STORE_FILENAME + + custom_dir = tmp_path / "custom_shared" + monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(custom_dir)) + + path = _nous_shared_store_path() + assert path == custom_dir / NOUS_SHARED_STORE_FILENAME + + +def test_shared_store_read_missing_returns_none(shared_store_env): + """Missing file → ``_read_shared_nous_state()`` returns None.""" + from hermes_cli.auth import _read_shared_nous_state + + assert _read_shared_nous_state() is None + + +def test_shared_store_read_malformed_returns_none(shared_store_env): + """Unreadable / non-JSON file → None, not an exception.""" + from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state + + path = _nous_shared_store_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("{ not json") + + assert _read_shared_nous_state() is None + + +def test_shared_store_read_missing_required_fields_returns_none(shared_store_env): + """Payload without refresh_token → None (nothing worth importing).""" + from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state + + path = _nous_shared_store_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps({"_schema": 1, "access_token": "abc"})) + + assert _read_shared_nous_state() is None + + +def test_shared_store_write_and_read_roundtrip(shared_store_env): + """Write → read must preserve refresh_token + OAuth URLs.""" + from hermes_cli.auth import ( + _nous_shared_store_path, + _read_shared_nous_state, + _write_shared_nous_state, + ) + + _write_shared_nous_state(_full_state_fixture()) + + path = _nous_shared_store_path() + assert path.is_file() + + # Permissions should be 0600 where the platform supports it. + mode = path.stat().st_mode & 0o777 + assert mode == 0o600 or mode == 0o644 # 0o644 on platforms without chmod + + loaded = _read_shared_nous_state() + assert loaded is not None + assert loaded["refresh_token"] == "refresh-tok" + assert loaded["access_token"] == "access-tok" + assert loaded["portal_base_url"] == "https://portal.example.com" + assert loaded["inference_base_url"] == "https://inference.example.com/v1" + # Volatile agent_key MUST NOT be persisted to the shared store + # (24h TTL, profile-specific — only long-lived OAuth tokens are + # cross-profile useful). + assert "agent_key" not in loaded + + +def test_shared_store_write_skips_when_refresh_token_missing(shared_store_env): + """Write is a no-op when refresh_token is absent (nothing to share).""" + from hermes_cli.auth import _nous_shared_store_path, _write_shared_nous_state + + state = dict(_full_state_fixture()) + state["refresh_token"] = "" + + _write_shared_nous_state(state) + + assert not _nous_shared_store_path().is_file() + + +def test_persist_nous_credentials_mirrors_to_shared_store( + tmp_path, monkeypatch, shared_store_env, +): + """persist_nous_credentials must populate BOTH per-profile auth.json + AND the shared store, so a future profile's `hermes auth add nous + --type oauth` can one-tap import instead of redoing device-code. + """ + from hermes_cli.auth import ( + _nous_shared_store_path, + _read_shared_nous_state, + persist_nous_credentials, + ) + + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + (hermes_home / "auth.json").write_text( + json.dumps({"version": 1, "providers": {}}) + ) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + persist_nous_credentials(_full_state_fixture()) + + # Per-profile auth.json populated + payload = json.loads((hermes_home / "auth.json").read_text()) + assert "nous" in payload.get("providers", {}) + + # Shared store populated with the same refresh_token + shared = _read_shared_nous_state() + assert shared is not None + assert shared["refresh_token"] == "refresh-tok" + + # Shared file path lives under the tmp override, NOT the real home + assert str(_nous_shared_store_path()).startswith(str(shared_store_env)) + + +def test_try_import_shared_returns_none_when_store_missing(shared_store_env): + """No shared store → no rehydrate (fall through to device-code).""" + from hermes_cli.auth import _try_import_shared_nous_state + + assert _try_import_shared_nous_state() is None + + +def test_try_import_shared_returns_none_on_refresh_failure( + shared_store_env, monkeypatch, +): + """If the portal rejects the stored refresh_token (revoked, expired, + portal down), _try_import_shared_nous_state must return None so the + login flow falls back to a fresh device-code run. + """ + from hermes_cli import auth as auth_mod + + # Seed the shared store + auth_mod._write_shared_nous_state(_full_state_fixture()) + + # Make refresh fail + def _boom(*_args, **_kwargs): + raise AuthError( + "Refresh session has been revoked", + provider="nous", + code="invalid_grant", + relogin_required=True, + ) + + monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _boom) + + assert auth_mod._try_import_shared_nous_state() is None + + +def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch): + """Happy path: stored refresh_token is accepted, forced refresh+mint + returns a fresh access_token + agent_key, and the returned dict has + every field persist_nous_credentials() needs. + """ + from hermes_cli import auth as auth_mod + + auth_mod._write_shared_nous_state(_full_state_fixture()) + + def _fake_refresh(state, **kwargs): + # Simulate portal returning fresh tokens + a new agent_key + assert kwargs.get("force_refresh") is True + assert kwargs.get("force_mint") is True + return { + **state, + "access_token": "fresh-access-tok", + "refresh_token": "fresh-refresh-tok", # rotated + "agent_key": "new-agent-key", + "agent_key_expires_at": "2026-04-19T22:00:00+00:00", + } + + monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh) + + result = auth_mod._try_import_shared_nous_state() + + assert result is not None + assert result["access_token"] == "fresh-access-tok" + assert result["refresh_token"] == "fresh-refresh-tok" + assert result["agent_key"] == "new-agent-key" + # Preserved from shared state + assert result["portal_base_url"] == "https://portal.example.com" + assert result["client_id"] == "hermes-cli" + + +def test_shared_store_survives_across_profile_switch( + tmp_path, monkeypatch, shared_store_env, +): + """End-to-end: profile A logs in → shared store populated → profile B + (different HERMES_HOME) sees the same shared state and can rehydrate + without re-running device-code. + """ + from hermes_cli import auth as auth_mod + + # Profile A: login, which mirrors to shared store + profile_a = tmp_path / "profile_a" + profile_a.mkdir(parents=True, exist_ok=True) + (profile_a / "auth.json").write_text( + json.dumps({"version": 1, "providers": {}}) + ) + monkeypatch.setenv("HERMES_HOME", str(profile_a)) + auth_mod.persist_nous_credentials(_full_state_fixture()) + + # Profile A's auth.json has nous + a_payload = json.loads((profile_a / "auth.json").read_text()) + assert "nous" in a_payload.get("providers", {}) + + # Profile B: fresh HERMES_HOME, no auth yet, but the shared store + # persists — _read_shared_nous_state() must still return the tokens. + profile_b = tmp_path / "profile_b" + profile_b.mkdir(parents=True, exist_ok=True) + (profile_b / "auth.json").write_text( + json.dumps({"version": 1, "providers": {}}) + ) + monkeypatch.setenv("HERMES_HOME", str(profile_b)) + + # B's own auth.json has no nous + b_payload = json.loads((profile_b / "auth.json").read_text()) + assert "nous" not in b_payload.get("providers", {}) + + # But the shared store is visible + shared = auth_mod._read_shared_nous_state() + assert shared is not None + assert shared["refresh_token"] == "refresh-tok" + + # And a successful rehydrate + persist lands nous into profile B + def _fake_refresh(state, **kwargs): + return { + **state, + "access_token": "b-access-tok", + "refresh_token": "b-refresh-tok", + "agent_key": "b-agent-key", + "agent_key_expires_at": "2026-04-19T22:00:00+00:00", + } + + monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh) + result = auth_mod._try_import_shared_nous_state() + assert result is not None + + auth_mod.persist_nous_credentials(result) + + b_payload = json.loads((profile_b / "auth.json").read_text()) + assert "nous" in b_payload.get("providers", {}) + assert b_payload["providers"]["nous"]["refresh_token"] == "b-refresh-tok" + + # Shared store was updated with the rotated refresh_token too + shared_after = auth_mod._read_shared_nous_state() + assert shared_after is not None + assert shared_after["refresh_token"] == "b-refresh-tok" diff --git a/tests/hermes_cli/test_backup.py b/tests/hermes_cli/test_backup.py index 346c38dbe6..ab7ba21370 100644 --- a/tests/hermes_cli/test_backup.py +++ b/tests/hermes_cli/test_backup.py @@ -471,6 +471,32 @@ class TestImport: with pytest.raises(SystemExit): run_import(args) + @pytest.mark.skipif(os.name != "posix", reason="POSIX file permissions only") + def test_restores_secret_files_with_0600_perms(self, tmp_path, monkeypatch): + """Secret files must end up at 0600 after restore (zipfile drops mode bits).""" + hermes_home = tmp_path / ".hermes" + hermes_home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + zip_path = tmp_path / "backup.zip" + self._make_backup_zip(zip_path, { + "config.yaml": "model: openrouter\n", + ".env": "OPENROUTER_API_KEY=sk-secret\n", + "auth.json": '{"providers": {"nous": "token"}}', + "state.db": b"SQLite format 3\x00", + "profiles/coder/.env": "ANTHROPIC_API_KEY=sk-ant-secret\n", + }) + + args = Namespace(zipfile=str(zip_path), force=True) + + from hermes_cli.backup import run_import + run_import(args) + + for rel in (".env", "auth.json", "state.db", "profiles/coder/.env"): + mode = (hermes_home / rel).stat().st_mode & 0o777 + assert mode == 0o600, f"{rel} restored with mode {oct(mode)}, expected 0o600" + # --------------------------------------------------------------------------- # Round-trip test @@ -1348,6 +1374,53 @@ class TestPreUpdateBackup: from hermes_cli.backup import create_pre_update_backup assert create_pre_update_backup(hermes_home=tmp_path / "does-not-exist") is None + def test_keep_zero_does_not_delete_freshly_created_backup(self, hermes_home): + """Regression: ``backup_keep: 0`` previously triggered ``backups[0:]`` + in the pruner — wiping the just-created zip and leaving the user + with no recovery point. The floor (keep>=1) preserves the new file + regardless of misconfiguration; users who don't want backups should + set ``pre_update_backup: false`` instead. + """ + from hermes_cli.backup import create_pre_update_backup + out = create_pre_update_backup(hermes_home=hermes_home, keep=0) + assert out is not None + assert out.exists(), ( + "keep=0 silently deleted the freshly-created backup; floor " + "should preserve the just-written file." + ) + + def test_keep_negative_does_not_delete_freshly_created_backup(self, hermes_home): + """Mirror coverage: any value <1 should be floored, not literally + applied as a slice index.""" + from hermes_cli.backup import create_pre_update_backup + out = create_pre_update_backup(hermes_home=hermes_home, keep=-3) + assert out is not None + assert out.exists() + + def test_keep_zero_still_prunes_older_backups(self, hermes_home): + """The floor preserves the new backup but should NOT regress the + rotation behaviour for older zips: a third call with keep=0 must + still remove pre-existing backups beyond the (floored) limit of 1. + """ + import time as _t + from hermes_cli.backup import create_pre_update_backup + + first = create_pre_update_backup(hermes_home=hermes_home, keep=5) + _t.sleep(1.05) + second = create_pre_update_backup(hermes_home=hermes_home, keep=5) + _t.sleep(1.05) + third = create_pre_update_backup(hermes_home=hermes_home, keep=0) + + remaining = { + p.name for p in (hermes_home / "backups").iterdir() + if p.name.startswith("pre-update-") + } + assert third.name in remaining, "Floor must preserve the new backup" + assert first.name not in remaining and second.name not in remaining, ( + f"keep=0 floor of 1 should still prune older backups; " + f"remaining={remaining}" + ) + class TestRunPreUpdateBackup: """Tests for the ``_run_pre_update_backup`` wrapper in main.py — diff --git a/tests/hermes_cli/test_commands.py b/tests/hermes_cli/test_commands.py index d505c8a1a7..620611ad42 100644 --- a/tests/hermes_cli/test_commands.py +++ b/tests/hermes_cli/test_commands.py @@ -236,6 +236,13 @@ class TestTelegramBotCommands: tg_name = cmd.name.replace("-", "_") assert tg_name not in names + def test_excludes_commands_with_required_args(self): + names = {name for name, _ in telegram_bot_commands()} + assert "background" not in names + assert "queue" not in names + assert "steer" not in names + assert "background" in GATEWAY_KNOWN_COMMANDS + class TestSlackSubcommandMap: def test_returns_dict(self): @@ -1661,6 +1668,19 @@ class TestPluginCommandEnumeration: names = {name for name, _desc in telegram_bot_commands()} assert "metricas" in names + def test_plugin_command_with_required_args_excluded_from_telegram_menu(self, monkeypatch): + """Telegram BotCommand selections cannot supply required arguments.""" + self._patch_plugin_commands(monkeypatch, { + "background-job": { + "handler": lambda _a: "ok", + "description": "Run a background job", + "args_hint": "<prompt>", + "plugin": "jobs-plugin", + } + }) + names = {name for name, _desc in telegram_bot_commands()} + assert "background_job" not in names + def test_plugin_command_appears_in_slack_subcommand_map(self, monkeypatch): """/hermes metricas must route through the Slack subcommand map.""" self._patch_plugin_commands(monkeypatch, { diff --git a/tests/hermes_cli/test_curator_status.py b/tests/hermes_cli/test_curator_status.py index 3be5862592..b4c3548c42 100644 --- a/tests/hermes_cli/test_curator_status.py +++ b/tests/hermes_cli/test_curator_status.py @@ -114,6 +114,12 @@ def test_status_shows_most_and_least_used_sections(curator_status_env): env["make_skill"]("top-dog") env["make_skill"]("middling") env["make_skill"]("never-used") + # Mark all three as agent-created so they enter the curator's catalog. + # Under the provenance-marker semantics, skills must be explicitly opted + # into curator management (normally via the background-review fork when + # it creates a skill through skill_manage). + for n in ("top-dog", "middling", "never-used"): + env["skill_usage"].mark_agent_created(n) # Bump use_count differentially. All three counters (use/view/patch) feed # into activity_count, so bumping use alone is enough to make activity @@ -150,7 +156,9 @@ def test_status_hides_most_active_when_all_zero(curator_status_env): env = curator_status_env env["make_skill"]("a") env["make_skill"]("b") - # No bumps. + # Mark both as agent-created so the catalog lists them. No bumps. + env["skill_usage"].mark_agent_created("a") + env["skill_usage"].mark_agent_created("b") out = _capture_status(env["curator_cli"]) diff --git a/tests/hermes_cli/test_custom_provider_model_switch.py b/tests/hermes_cli/test_custom_provider_model_switch.py index 454337592d..d123120ed8 100644 --- a/tests/hermes_cli/test_custom_provider_model_switch.py +++ b/tests/hermes_cli/test_custom_provider_model_switch.py @@ -56,7 +56,6 @@ class TestCustomProviderModelSwitch: "sk-test", "https://vllm.example.com/v1", timeout=8.0, - api_mode=None, ) def test_can_switch_to_different_model(self, config_home): @@ -141,12 +140,18 @@ class TestCustomProviderModelSwitch: "api_mode": "anthropic_messages", } - with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \ + with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]) as mock_fetch, \ patch.dict("sys.modules", {"simple_term_menu": None}), \ patch("builtins.input", return_value="1"), \ patch("builtins.print"): _model_flow_named_custom({}, provider_info) + mock_fetch.assert_called_once_with( + "***", + "https://proxy.example.com/anthropic", + timeout=8.0, + api_mode="anthropic_messages", + ) config = yaml.safe_load((config_home / "config.yaml").read_text()) or {} model = config.get("model") assert isinstance(model, dict) @@ -215,7 +220,6 @@ class TestCustomProviderModelSwitch: "sk-live-example-provider", "https://api.example-provider.test/v1", timeout=8.0, - api_mode=None, ) config = yaml.safe_load(config_path.read_text()) or {} assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}" diff --git a/tests/hermes_cli/test_debug.py b/tests/hermes_cli/test_debug.py index 4bba56867e..b83023a76a 100644 --- a/tests/hermes_cli/test_debug.py +++ b/tests/hermes_cli/test_debug.py @@ -273,6 +273,101 @@ class TestCaptureLogSnapshot: assert "rotated agent data" in snap.full_text +# --------------------------------------------------------------------------- +# Capture log redaction (force=True applies regardless of HERMES_REDACT_SECRETS) +# --------------------------------------------------------------------------- + +# A vendor-prefixed token used across redaction tests. Long enough to clear +# the redactor's `floor` parameter so it actually masks rather than fully blanks. +_REDACT_FIXTURE_TOKEN = "sk-proj-A1B2C3D4E5F6G7H8I9J0aA" + + +class TestCaptureLogSnapshotRedaction: + """Pin upload-time redaction at the _capture_log_snapshot boundary.""" + + @pytest.fixture + def hermes_home_with_secret(self, tmp_path, monkeypatch): + """Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + # Critical: ensure the user has NOT opted in to redaction. The whole + # point of this PR is that share-time redaction works for users who + # never set this env var. + monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + + logs_dir = home / "logs" + logs_dir.mkdir() + (logs_dir / "agent.log").write_text( + f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n" + ) + (logs_dir / "errors.log").write_text("") + (logs_dir / "gateway.log").write_text("") + return home + + def test_default_redacts_tail_and_full_text(self, hermes_home_with_secret): + from hermes_cli.debug import _capture_log_snapshot + + snap = _capture_log_snapshot("agent", tail_lines=10) + + # Both views the upload uses must be sanitized. + assert _REDACT_FIXTURE_TOKEN not in snap.tail_text + assert snap.full_text is not None + assert _REDACT_FIXTURE_TOKEN not in snap.full_text + + def test_redact_false_passes_through(self, hermes_home_with_secret): + from hermes_cli.debug import _capture_log_snapshot + + snap = _capture_log_snapshot("agent", tail_lines=10, redact=False) + + # Original token survives when the caller opts out. + assert _REDACT_FIXTURE_TOKEN in snap.tail_text + assert _REDACT_FIXTURE_TOKEN in (snap.full_text or "") + + def test_force_true_overrides_unset_env_var(self, hermes_home_with_secret): + """Regression test: redact_sensitive_text short-circuits without force=True. + + If a future refactor drops `force=True` from `_redact_log_text`, this + test fails immediately. Without `force=True`, the redactor returns the + input unchanged when HERMES_REDACT_SECRETS is unset, and the feature + ships silently broken for its target audience. + """ + import os + + from hermes_cli.debug import _capture_log_snapshot + + # Belt-and-suspenders: confirm the env var is genuinely unset for this + # test so we know we're exercising the force=True path. + assert os.environ.get("HERMES_REDACT_SECRETS", "") == "" + + snap = _capture_log_snapshot("agent", tail_lines=10) + + assert _REDACT_FIXTURE_TOKEN not in snap.tail_text + assert snap.full_text is not None + assert _REDACT_FIXTURE_TOKEN not in snap.full_text + + def test_capture_default_log_snapshots_threads_redact( + self, hermes_home_with_secret + ): + from hermes_cli.debug import _capture_default_log_snapshots + + snaps = _capture_default_log_snapshots(50) + + # Default threads redact=True to all three captured logs. + assert _REDACT_FIXTURE_TOKEN not in snaps["agent"].tail_text + assert _REDACT_FIXTURE_TOKEN not in (snaps["agent"].full_text or "") + + def test_capture_default_log_snapshots_no_redact_passes_through( + self, hermes_home_with_secret + ): + from hermes_cli.debug import _capture_default_log_snapshots + + snaps = _capture_default_log_snapshots(50, redact=False) + + assert _REDACT_FIXTURE_TOKEN in snaps["agent"].tail_text + assert _REDACT_FIXTURE_TOKEN in (snaps["agent"].full_text or "") + + # --------------------------------------------------------------------------- # Debug report collection # --------------------------------------------------------------------------- @@ -556,6 +651,124 @@ class TestRunDebugShare: assert "all failed" in out.err +# --------------------------------------------------------------------------- +# Share-time redaction wiring + visible banner +# --------------------------------------------------------------------------- + +class TestRunDebugShareRedaction: + """End-to-end: --no-redact flag, banner injection, default behavior.""" + + @pytest.fixture + def hermes_home_with_secret(self, tmp_path, monkeypatch): + """Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + + logs_dir = home / "logs" + logs_dir.mkdir() + (logs_dir / "agent.log").write_text( + f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n" + ) + (logs_dir / "errors.log").write_text("") + (logs_dir / "gateway.log").write_text( + f"2026-04-12 17:00:01 INFO gateway.run: token {_REDACT_FIXTURE_TOKEN}\n" + ) + return home + + def test_default_share_redacts_uploaded_content( + self, hermes_home_with_secret, capsys + ): + """The uploaded report and full-log pastes do not contain the raw token.""" + from hermes_cli.debug import run_debug_share + + args = MagicMock() + args.lines = 50 + args.expire = 7 + args.local = False + args.no_redact = False + + captured: list[str] = [] + + def fake_upload(content, expiry_days=7): + captured.append(content) + return f"https://paste.rs/{len(captured)}" + + with patch("hermes_cli.dump.run_dump"), \ + patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \ + patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload): + run_debug_share(args) + + # At least the report plus one full log paste reached the upload path. + assert len(captured) >= 2 + for content in captured: + assert _REDACT_FIXTURE_TOKEN not in content, ( + "raw token leaked into upload-bound content" + ) + + def test_default_share_includes_redaction_banner( + self, hermes_home_with_secret, capsys + ): + """Each upload-bound paste carries the visible redaction banner.""" + from hermes_cli.debug import run_debug_share + + args = MagicMock() + args.lines = 50 + args.expire = 7 + args.local = False + args.no_redact = False + + captured: list[str] = [] + + def fake_upload(content, expiry_days=7): + captured.append(content) + return f"https://paste.rs/{len(captured)}" + + with patch("hermes_cli.dump.run_dump"), \ + patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \ + patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload): + run_debug_share(args) + + for content in captured: + assert "redacted at upload time" in content, ( + "redaction banner missing from upload-bound content" + ) + + def test_no_redact_flag_disables_redaction_and_banner( + self, hermes_home_with_secret, capsys + ): + """--no-redact preserves original log content and omits the banner.""" + from hermes_cli.debug import run_debug_share + + args = MagicMock() + args.lines = 50 + args.expire = 7 + args.local = False + args.no_redact = True + + captured: list[str] = [] + + def fake_upload(content, expiry_days=7): + captured.append(content) + return f"https://paste.rs/{len(captured)}" + + with patch("hermes_cli.dump.run_dump"), \ + patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \ + patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload): + run_debug_share(args) + + # The agent.log paste should now contain the raw token. + assert any(_REDACT_FIXTURE_TOKEN in c for c in captured), ( + "expected raw token in --no-redact upload" + ) + # No banner anywhere when redaction is disabled. + for content in captured: + assert "redacted at upload time" not in content, ( + "banner present with --no-redact" + ) + + # --------------------------------------------------------------------------- # run_debug router # --------------------------------------------------------------------------- diff --git a/tests/hermes_cli/test_doctor.py b/tests/hermes_cli/test_doctor.py index 4a5981c07a..de80e240d1 100644 --- a/tests/hermes_cli/test_doctor.py +++ b/tests/hermes_cli/test_doctor.py @@ -481,6 +481,46 @@ def test_run_doctor_accepts_hermes_provider_ids_that_catalog_aliases( ) + + +def test_run_doctor_accepts_kimi_coding_cn_provider(monkeypatch, tmp_path): + home = tmp_path / ".hermes" + home.mkdir(parents=True, exist_ok=True) + (home / ".env").write_text("KIMI_CN_API_KEY=***\n", encoding="utf-8") + (home / "config.yaml").write_text( + "model:\n" + " provider: kimi-coding-cn\n" + " default: kimi-k2.6\n", + encoding="utf-8", + ) + + monkeypatch.setattr(doctor_mod, "HERMES_HOME", home) + monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project") + monkeypatch.setattr(doctor_mod, "_DHH", str(home)) + (tmp_path / "project").mkdir(exist_ok=True) + + fake_model_tools = types.SimpleNamespace( + check_tool_availability=lambda *a, **kw: ([], []), + TOOLSET_REQUIREMENTS={}, + ) + monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools) + + try: + from hermes_cli import auth as _auth_mod + monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {}) + monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {}) + monkeypatch.setattr(_auth_mod, "get_auth_status", lambda provider: {"logged_in": True}) + except Exception: + pass + + buf = io.StringIO() + with contextlib.redirect_stdout(buf): + doctor_mod.run_doctor(Namespace(fix=False)) + + out = buf.getvalue() + assert "model.provider 'kimi-coding-cn' is not a recognised provider" not in out + + def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path): home = tmp_path / ".hermes" home.mkdir(parents=True, exist_ok=True) diff --git a/tests/hermes_cli/test_gateway.py b/tests/hermes_cli/test_gateway.py index 0a44ac9532..6dfbd636f4 100644 --- a/tests/hermes_cli/test_gateway.py +++ b/tests/hermes_cli/test_gateway.py @@ -310,6 +310,10 @@ def test_find_gateway_pids_falls_back_to_pid_file_when_process_scan_fails(monkey def fake_run(cmd, **kwargs): if cmd[:4] == ["ps", "-A", "eww", "-o"]: return SimpleNamespace(returncode=1, stdout="", stderr="ps failed") + if cmd[:3] == ["ps", "-o", "ppid="]: + # _get_ancestor_pids() walks up the tree; return "no parent" so + # the loop terminates cleanly. + return SimpleNamespace(returncode=1, stdout="", stderr="") raise AssertionError(f"Unexpected command: {cmd}") monkeypatch.setattr(gateway.subprocess, "run", fake_run) diff --git a/tests/hermes_cli/test_gateway_service.py b/tests/hermes_cli/test_gateway_service.py index f2bfa8b870..210c9c144e 100644 --- a/tests/hermes_cli/test_gateway_service.py +++ b/tests/hermes_cli/test_gateway_service.py @@ -107,6 +107,61 @@ class TestSystemdServiceRefresh: ] + def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch): + """run_gateway() should refresh the systemd unit on boot so that + restart settings take effect even when the process was respawned + via exit-code-75 (bypassing `hermes gateway restart`).""" + unit_path = tmp_path / "hermes-gateway.service" + unit_path.write_text("old unit\n", encoding="utf-8") + + monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path) + monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n") + monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True) + + calls = [] + + def fake_run(cmd, check=True, **kwargs): + calls.append(cmd) + return SimpleNamespace(returncode=0, stdout="", stderr="") + + monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run) + + # Prevent run_gateway from actually starting the gateway + def fake_start_gateway(**kwargs): + import asyncio + f = asyncio.Future() + f.set_result(True) + return f + + monkeypatch.setattr("gateway.run.start_gateway", fake_start_gateway) + + gateway_cli.run_gateway() + + assert unit_path.read_text(encoding="utf-8") == "new unit\n" + assert ["systemctl", "--user", "daemon-reload"] in calls + + +class TestRequireServiceInstalled: + def test_exits_with_install_hint_when_unit_missing(self, tmp_path, monkeypatch, capsys): + unit_path = tmp_path / "hermes-gateway.service" + monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path) + + with pytest.raises(SystemExit) as exc_info: + gateway_cli._require_service_installed("start") + + assert exc_info.value.code == 1 + out = capsys.readouterr().out + assert "not installed" in out + assert "hermes gateway install" in out + + def test_passes_when_unit_exists(self, tmp_path, monkeypatch): + unit_path = tmp_path / "hermes-gateway.service" + unit_path.write_text("[Unit]\n", encoding="utf-8") + monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path) + + gateway_cli._require_service_installed("start") + + class TestGeneratedSystemdUnits: def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self): unit = gateway_cli.generate_systemd_unit(system=False) @@ -487,6 +542,7 @@ class TestGatewaySystemServiceRouting: calls = [] monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False) + monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None) monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system))) monkeypatch.setattr( "gateway.status.get_running_pid", @@ -541,6 +597,7 @@ class TestGatewaySystemServiceRouting: def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys): monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False) + monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None) monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None) monkeypatch.setattr( "gateway.status.read_runtime_status", diff --git a/tests/hermes_cli/test_kanban_boards.py b/tests/hermes_cli/test_kanban_boards.py new file mode 100644 index 0000000000..a86a871330 --- /dev/null +++ b/tests/hermes_cli/test_kanban_boards.py @@ -0,0 +1,483 @@ +"""Tests for the multi-board kanban layer (``hermes kanban boards …``). + +Covers the pieces added when boards became a first-class concept: + +* Slug validation and normalisation. +* Path resolution for ``default`` (legacy ``<root>/kanban.db``) vs + named boards (``<root>/kanban/boards/<slug>/kanban.db``). +* Current-board persistence via ``<root>/kanban/current`` and + ``HERMES_KANBAN_BOARD`` env var. +* ``connect(board=)`` isolation — writes on one board don't leak. +* ``create_board`` / ``list_boards`` / ``remove_board`` round trip. +* CLI surface: ``hermes kanban boards list/create/switch/rm``. +* ``_default_spawn`` injects ``HERMES_KANBAN_BOARD`` into worker env. +""" + +from __future__ import annotations + +import json +import os +import subprocess +import sys +from pathlib import Path + +import pytest + +# Ensure the worktree (not the stale global clone) is first on sys.path. +_WORKTREE = Path(__file__).resolve().parents[2] +if str(_WORKTREE) not in sys.path: + sys.path.insert(0, str(_WORKTREE)) + +from hermes_cli import kanban_db as kb + + +# --------------------------------------------------------------------------- +# Fixture +# --------------------------------------------------------------------------- + +@pytest.fixture +def fresh_home(tmp_path, monkeypatch): + """Isolated HERMES_HOME with no prior kanban state. + + The autouse hermetic conftest already nukes credentials + TZ; this + fixture layers a per-test HERMES_HOME plus a path-init cache reset + so each test sees a truly empty board set. + """ + home = tmp_path / "hermes_home" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + for var in ( + "HERMES_KANBAN_DB", + "HERMES_KANBAN_WORKSPACES_ROOT", + "HERMES_KANBAN_HOME", + "HERMES_KANBAN_BOARD", + ): + monkeypatch.delenv(var, raising=False) + # Also reset hermes_constants cache so get_default_hermes_root() re-reads. + try: + import hermes_constants + hermes_constants._cached_default_hermes_root = None # type: ignore[attr-defined] + except Exception: + pass + # Kanban module-level init cache must not leak between tests. + kb._INITIALIZED_PATHS.clear() + return home + + +# --------------------------------------------------------------------------- +# Slug validation +# --------------------------------------------------------------------------- + +class TestSlugValidation: + @pytest.mark.parametrize("good", [ + "default", "atm10-server", "hermes-agent", "proj_1", "a", + "very-long-but-still-ok-slug-with-hyphens-and-numbers-1234", + ]) + def test_accepts_valid(self, good): + assert kb._normalize_board_slug(good) == good + + @pytest.mark.parametrize("bad", [ + "-leading-hyphen", "_leading_underscore", + "with/slash", "with space", + "has.dot", "has?question", + "..", "../etc", "foo\x00bar", + ]) + def test_rejects_invalid(self, bad): + with pytest.raises(ValueError): + kb._normalize_board_slug(bad) + + def test_empty_returns_none(self): + assert kb._normalize_board_slug(None) is None + assert kb._normalize_board_slug("") is None + assert kb._normalize_board_slug(" ") is None + + def test_auto_lowercases(self): + # Uppercase is auto-downcased (friendlier than rejecting). ``Default`` + # → ``default``, ``ATM10`` → ``atm10``. The on-disk slug is always + # lowercase regardless of what the user typed. + assert kb._normalize_board_slug("Default") == "default" + assert kb._normalize_board_slug("ATM10-Server") == "atm10-server" + + +# --------------------------------------------------------------------------- +# Path resolution +# --------------------------------------------------------------------------- + +class TestPathResolution: + def test_default_board_legacy_path(self, fresh_home): + """The default board's DB lives at ``<root>/kanban.db`` for back-compat.""" + assert kb.kanban_db_path() == fresh_home / "kanban.db" + assert kb.kanban_db_path(board="default") == fresh_home / "kanban.db" + + def test_named_board_under_boards_dir(self, fresh_home): + p = kb.kanban_db_path(board="atm10-server") + assert p == fresh_home / "kanban" / "boards" / "atm10-server" / "kanban.db" + + def test_workspaces_per_board(self, fresh_home): + assert kb.workspaces_root() == fresh_home / "kanban" / "workspaces" + # Uppercase input gets auto-downcased to the on-disk slug. + assert kb.workspaces_root(board="projA") == ( + fresh_home / "kanban" / "boards" / "proja" / "workspaces" + ) + + def test_logs_per_board(self, fresh_home): + assert kb.worker_logs_dir() == fresh_home / "kanban" / "logs" + assert kb.worker_logs_dir(board="other") == ( + fresh_home / "kanban" / "boards" / "other" / "logs" + ) + + def test_env_var_db_override_still_wins(self, fresh_home, tmp_path, monkeypatch): + """``HERMES_KANBAN_DB`` pins the file regardless of board= arg.""" + forced = tmp_path / "custom.db" + monkeypatch.setenv("HERMES_KANBAN_DB", str(forced)) + assert kb.kanban_db_path() == forced + assert kb.kanban_db_path(board="ignored") == forced + + def test_env_var_workspaces_override(self, fresh_home, tmp_path, monkeypatch): + forced = tmp_path / "ws" + monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", str(forced)) + assert kb.workspaces_root(board="any") == forced + + +# --------------------------------------------------------------------------- +# Current-board resolution +# --------------------------------------------------------------------------- + +class TestCurrentBoard: + def test_default_when_unset(self, fresh_home): + assert kb.get_current_board() == "default" + + def test_env_var_takes_precedence(self, fresh_home, monkeypatch): + # Create the board so the env-var value is honoured (get_current_board + # trusts env-var validity, but the resolution chain doesn't require + # the board to exist; we just test that env trumps). + kb.create_board("envboard") + monkeypatch.setenv("HERMES_KANBAN_BOARD", "envboard") + assert kb.get_current_board() == "envboard" + + def test_file_pointer_honoured(self, fresh_home): + kb.create_board("filepick") + kb.set_current_board("filepick") + assert kb.get_current_board() == "filepick" + + def test_env_beats_file(self, fresh_home, monkeypatch): + kb.create_board("a") + kb.create_board("b") + kb.set_current_board("a") + monkeypatch.setenv("HERMES_KANBAN_BOARD", "b") + assert kb.get_current_board() == "b" + + def test_invalid_env_falls_through(self, fresh_home, monkeypatch): + monkeypatch.setenv("HERMES_KANBAN_BOARD", "!!bad!!") + # Should not crash — falls through to default. + assert kb.get_current_board() == "default" + + def test_clear_current_board(self, fresh_home): + kb.create_board("x") + kb.set_current_board("x") + kb.clear_current_board() + assert kb.get_current_board() == "default" + + def test_kanban_db_path_reads_current(self, fresh_home): + """kanban_db_path() with no args respects the on-disk pointer.""" + kb.create_board("my-proj") + kb.set_current_board("my-proj") + expected = fresh_home / "kanban" / "boards" / "my-proj" / "kanban.db" + assert kb.kanban_db_path() == expected + + +# --------------------------------------------------------------------------- +# Board CRUD +# --------------------------------------------------------------------------- + +class TestBoardCRUD: + def test_create_and_list(self, fresh_home): + assert [b["slug"] for b in kb.list_boards()] == ["default"] + kb.create_board("foo", name="Foo Board", description="test") + slugs = [b["slug"] for b in kb.list_boards()] + assert slugs == ["default", "foo"] + + def test_create_is_idempotent(self, fresh_home): + kb.create_board("bar") + kb.create_board("bar") # no error + slugs = [b["slug"] for b in kb.list_boards()] + assert slugs == ["default", "bar"] + + def test_create_writes_metadata(self, fresh_home): + meta = kb.create_board( + "baz", + name="Baz", + description="desc", + icon="📦", + color="#abcdef", + ) + assert meta["slug"] == "baz" + assert meta["name"] == "Baz" + assert meta["icon"] == "📦" + # Round-trip via read_board_metadata. + again = kb.read_board_metadata("baz") + assert again["name"] == "Baz" + assert again["description"] == "desc" + assert again["icon"] == "📦" + + def test_remove_archive(self, fresh_home): + kb.create_board("toremove") + res = kb.remove_board("toremove") + assert res["action"] == "archived" + assert Path(res["new_path"]).exists() + assert "toremove" not in [b["slug"] for b in kb.list_boards()] + + def test_remove_hard_delete(self, fresh_home): + kb.create_board("nuke") + d = kb.board_dir("nuke") + assert d.exists() + res = kb.remove_board("nuke", archive=False) + assert res["action"] == "deleted" + assert not d.exists() + + def test_remove_default_forbidden(self, fresh_home): + with pytest.raises(ValueError, match="default"): + kb.remove_board("default") + + def test_remove_nonexistent_raises(self, fresh_home): + with pytest.raises(ValueError, match="does not exist"): + kb.remove_board("nosuch") + + def test_remove_clears_current_pointer(self, fresh_home): + kb.create_board("pinned") + kb.set_current_board("pinned") + kb.remove_board("pinned") + assert kb.get_current_board() == "default" + + def test_rename_updates_metadata(self, fresh_home): + kb.create_board("slug-immutable") + kb.write_board_metadata("slug-immutable", name="New Display Name") + assert kb.read_board_metadata("slug-immutable")["name"] == "New Display Name" + # Slug must not change. + assert kb.board_exists("slug-immutable") + + +# --------------------------------------------------------------------------- +# Connection isolation +# --------------------------------------------------------------------------- + +class TestConnectionIsolation: + def test_tasks_do_not_leak_across_boards(self, fresh_home): + kb.create_board("alpha") + kb.create_board("beta") + + with kb.connect(board="alpha") as conn: + kb.create_task(conn, title="alpha-task-1", assignee="dev") + kb.create_task(conn, title="alpha-task-2", assignee="dev") + + with kb.connect(board="beta") as conn: + kb.create_task(conn, title="beta-only", assignee="dev") + + with kb.connect(board="alpha") as conn: + a = kb.list_tasks(conn) + with kb.connect(board="beta") as conn: + b = kb.list_tasks(conn) + with kb.connect(board="default") as conn: + d = kb.list_tasks(conn) + + assert {t.title for t in a} == {"alpha-task-1", "alpha-task-2"} + assert {t.title for t in b} == {"beta-only"} + assert d == [] + + def test_connect_without_args_uses_current(self, fresh_home): + kb.create_board("curr") + kb.set_current_board("curr") + with kb.connect() as conn: + kb.create_task(conn, title="implicit", assignee="x") + with kb.connect(board="curr") as conn: + tasks = kb.list_tasks(conn) + assert [t.title for t in tasks] == ["implicit"] + + def test_connect_env_var_overrides_current(self, fresh_home, monkeypatch): + kb.create_board("persist") + kb.create_board("envwin") + kb.set_current_board("persist") + monkeypatch.setenv("HERMES_KANBAN_BOARD", "envwin") + with kb.connect() as conn: + kb.create_task(conn, title="via-env", assignee="x") + with kb.connect(board="envwin") as conn: + assert [t.title for t in kb.list_tasks(conn)] == ["via-env"] + with kb.connect(board="persist") as conn: + assert kb.list_tasks(conn) == [] + + +# --------------------------------------------------------------------------- +# Worker spawn env injection +# --------------------------------------------------------------------------- + +class TestWorkerSpawnEnv: + """Ensure the dispatcher pins ``HERMES_KANBAN_BOARD`` / DB / workspaces on spawn. + + We monkey-patch ``subprocess.Popen`` to capture the child env without + actually spawning anything. + """ + + def test_default_spawn_sets_env_vars(self, fresh_home, monkeypatch): + captured = {} + + class FakeProc: + pid = 12345 + + def fake_popen(cmd, *args, **kwargs): + captured["cmd"] = cmd + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr(subprocess, "Popen", fake_popen) + kb.create_board("spawntest") + + task = kb.Task( + id="t_abc", + title="worker test", + body=None, + assignee="teknium", + status="ready", + priority=0, + created_by="user", + created_at=0, + started_at=None, + completed_at=None, + workspace_kind="scratch", + workspace_path=None, + claim_lock=None, + claim_expires=None, + tenant=None, + ) + + kb._default_spawn(task, str(fresh_home / "ws"), board="spawntest") + + env = captured["env"] + assert env["HERMES_KANBAN_BOARD"] == "spawntest" + assert env["HERMES_KANBAN_TASK"] == "t_abc" + # DB path should match the per-board DB, not the legacy default. + expected_db = fresh_home / "kanban" / "boards" / "spawntest" / "kanban.db" + assert env["HERMES_KANBAN_DB"] == str(expected_db) + expected_ws = fresh_home / "kanban" / "boards" / "spawntest" / "workspaces" + assert env["HERMES_KANBAN_WORKSPACES_ROOT"] == str(expected_ws) + + def test_default_board_spawn_keeps_legacy_paths(self, fresh_home, monkeypatch): + captured = {} + + class FakeProc: + pid = 1 + + def fake_popen(cmd, *args, **kwargs): + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr(subprocess, "Popen", fake_popen) + task = kb.Task( + id="t_def", + title="", + body=None, + assignee="teknium", + status="ready", + priority=0, + created_by=None, + created_at=0, + started_at=None, + completed_at=None, + workspace_kind="scratch", + workspace_path=None, + claim_lock=None, + claim_expires=None, + tenant=None, + ) + kb._default_spawn(task, str(fresh_home / "ws"), board=None) + env = captured["env"] + assert env["HERMES_KANBAN_BOARD"] == "default" + assert env["HERMES_KANBAN_DB"] == str(fresh_home / "kanban.db") + + +# --------------------------------------------------------------------------- +# CLI surface +# --------------------------------------------------------------------------- + +def _cli(args: list[str], env_extra: dict | None = None) -> subprocess.CompletedProcess: + """Run ``hermes kanban …`` with PYTHONPATH pinned to the worktree.""" + env = dict(os.environ) + env["PYTHONPATH"] = str(_WORKTREE) + if env_extra: + env.update(env_extra) + return subprocess.run( + [sys.executable, "-m", "hermes_cli.main", "kanban"] + args, + env=env, + capture_output=True, + text=True, + cwd=str(_WORKTREE), + timeout=30, + ) + + +class TestCLI: + def test_boards_list_default_only(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + res = _cli(["boards", "list", "--json"], env_extra=env) + assert res.returncode == 0, res.stderr + data = json.loads(res.stdout) + slugs = [b["slug"] for b in data] + assert slugs == ["default"] + assert data[0]["is_current"] is True + + def test_boards_create_and_switch(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + r1 = _cli( + ["boards", "create", "myproj", "--name", "My Project", "--switch"], + env_extra=env, + ) + assert r1.returncode == 0, r1.stderr + assert "created" in r1.stdout + assert "Switched" in r1.stdout + + r2 = _cli(["boards", "list", "--json"], env_extra=env) + data = json.loads(r2.stdout) + cur = [b for b in data if b["is_current"]][0] + assert cur["slug"] == "myproj" + + def test_per_board_task_isolation_via_cli(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + assert _cli(["boards", "create", "projA"], env_extra=env).returncode == 0 + assert _cli(["boards", "create", "projB"], env_extra=env).returncode == 0 + + # Create one task on each via --board. + r = _cli(["--board", "projA", "create", "Task A", "--assignee", "dev"], env_extra=env) + assert r.returncode == 0, r.stderr + r = _cli(["--board", "projB", "create", "Task B", "--assignee", "dev"], env_extra=env) + assert r.returncode == 0, r.stderr + + # list on each board only shows its own. + listA = _cli(["--board", "projA", "list", "--json"], env_extra=env) + listB = _cli(["--board", "projB", "list", "--json"], env_extra=env) + listD = _cli(["list", "--json"], env_extra=env) + + titlesA = [t["title"] for t in json.loads(listA.stdout)] + titlesB = [t["title"] for t in json.loads(listB.stdout)] + titlesD = [t["title"] for t in json.loads(listD.stdout)] + + assert titlesA == ["Task A"] + assert titlesB == ["Task B"] + assert titlesD == [] + + def test_board_flag_rejects_unknown(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + r = _cli(["--board", "ghost", "list"], env_extra=env) + # main.py's dispatcher doesn't propagate return codes today, so we + # assert the user-visible signal: a stderr error message. Whether + # the exit code stays 0 is a separate (pre-existing) issue. + assert "does not exist" in r.stderr + + def test_boards_rm_archives(self, tmp_path): + env = {"HERMES_HOME": str(tmp_path)} + _cli(["boards", "create", "rmme"], env_extra=env) + r = _cli(["boards", "rm", "rmme"], env_extra=env) + assert r.returncode == 0, r.stderr + assert "archived" in r.stdout + # Default board list no longer shows it. + res = _cli(["boards", "list", "--json"], env_extra=env) + slugs = [b["slug"] for b in json.loads(res.stdout)] + assert "rmme" not in slugs diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py index 551480ff72..a7896bf940 100644 --- a/tests/hermes_cli/test_kanban_core_functionality.py +++ b/tests/hermes_cli/test_kanban_core_functionality.py @@ -902,12 +902,13 @@ def test_list_profiles_on_disk(tmp_path, monkeypatch): """list_profiles_on_disk returns directories under ~/.hermes/profiles/ that contain a config.yaml.""" monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.delenv("HERMES_HOME", raising=False) profiles = tmp_path / ".hermes" / "profiles" profiles.mkdir(parents=True) - (profiles / "researcher").mkdir() - (profiles / "researcher" / "config.yaml").write_text("model: {}\n") - (profiles / "writer").mkdir() - (profiles / "writer" / "config.yaml").write_text("model: {}\n") + for name in ("researcher", "writer"): + d = profiles / name + d.mkdir() + (d / "config.yaml").write_text("model: {}\n") (profiles / "empty_dir").mkdir() # A stray file; should be ignored. (profiles / "stray.txt").write_text("noise") @@ -916,6 +917,20 @@ def test_list_profiles_on_disk(tmp_path, monkeypatch): assert names == ["researcher", "writer"] +def test_list_profiles_on_disk_custom_root(tmp_path, monkeypatch): + """list_profiles_on_disk respects a custom HERMES_HOME root.""" + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + profiles = tmp_path / "profiles" + profiles.mkdir(parents=True) + for name in ("researcher", "writer"): + d = profiles / name + d.mkdir() + (d / "config.yaml").write_text("model: {}\n") + + names = kb.list_profiles_on_disk() + assert names == ["researcher", "writer"] + + def test_known_assignees_merges_disk_and_board(tmp_path, monkeypatch): """known_assignees unions profiles on disk with currently-assigned names, and reports per-status counts.""" diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py index fcc6396be4..1907938b42 100644 --- a/tests/hermes_cli/test_kanban_db.py +++ b/tests/hermes_cli/test_kanban_db.py @@ -252,6 +252,22 @@ def test_assign_reassigns_when_not_running(kanban_home): assert kb.get_task(conn, t).assignee == "b" +def test_assignee_normalized_to_lowercase_on_create_and_assign(kanban_home): + """Dashboard/CLI may pass title-cased profile labels; DB + spawn use canonical id.""" + with kb.connect() as conn: + tid = kb.create_task(conn, title="cased", assignee="Jules") + assert kb.get_task(conn, tid).assignee == "jules" + assert kb.assign_task(conn, tid, "Librarian") + assert kb.get_task(conn, tid).assignee == "librarian" + + +def test_list_tasks_assignee_filter_case_insensitive(kanban_home): + with kb.connect() as conn: + tid = kb.create_task(conn, title="q", assignee="jules") + found = kb.list_tasks(conn, assignee="Jules") + assert len(found) == 1 and found[0].id == tid + + def test_archive_hides_from_default_list(kanban_home): with kb.connect() as conn: t = kb.create_task(conn, title="x") @@ -436,3 +452,279 @@ def test_tenant_propagates_to_events(kanban_home): # The "created" event should have tenant in its payload. created = [e for e in events if e.kind == "created"] assert created and created[0].payload.get("tenant") == "biz-a" + + +# --------------------------------------------------------------------------- +# Shared-board path resolution (issue #19348) +# +# The kanban board is a cross-profile coordination primitive: a worker +# spawned with `hermes -p <profile>` must read/write the same kanban.db +# as the dispatcher that claimed the task. These tests exercise the +# path-resolution layer directly and would have caught the regression +# where `kanban_db_path()` resolved to the active profile's HERMES_HOME. +# --------------------------------------------------------------------------- + +class TestSharedBoardPaths: + """`kanban_home`/`kanban_db_path`/`workspaces_root`/`worker_log_path` + must anchor at the **shared root**, not the active profile's HERMES_HOME.""" + + def _set_home(self, monkeypatch, tmp_path, hermes_home): + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("HERMES_KANBAN_HOME", raising=False) + + def test_default_install_anchors_at_home_dot_hermes( + self, tmp_path, monkeypatch + ): + # Standard install: HERMES_HOME == ~/.hermes, no profile active. + default_home = tmp_path / ".hermes" + default_home.mkdir() + self._set_home(monkeypatch, tmp_path, default_home) + + assert kb.kanban_home() == default_home + assert kb.kanban_db_path() == default_home / "kanban.db" + assert kb.workspaces_root() == default_home / "kanban" / "workspaces" + assert ( + kb.worker_log_path("t_demo") + == default_home / "kanban" / "logs" / "t_demo.log" + ) + + def test_profile_worker_resolves_to_shared_root( + self, tmp_path, monkeypatch + ): + # Reproduces the bug: dispatcher uses ~/.hermes/kanban.db, + # worker spawned with -p <profile> previously resolved to + # ~/.hermes/profiles/<profile>/kanban.db. After the fix both + # converge on ~/.hermes/kanban.db. + default_home = tmp_path / ".hermes" + default_home.mkdir() + profile_home = default_home / "profiles" / "nehemiahkanban" + profile_home.mkdir(parents=True) + self._set_home(monkeypatch, tmp_path, profile_home) + + # All four resolvers must anchor at the shared root, not the + # profile-local HERMES_HOME. + assert kb.kanban_home() == default_home + assert kb.kanban_db_path() == default_home / "kanban.db" + assert kb.workspaces_root() == default_home / "kanban" / "workspaces" + assert ( + kb.worker_log_path("t_0d214f19") + == default_home / "kanban" / "logs" / "t_0d214f19.log" + ) + + # Sanity: the profile-local path that used to be returned is + # explicitly NOT what we resolve to anymore. + assert kb.kanban_db_path() != profile_home / "kanban.db" + + def test_dispatcher_and_profile_worker_converge( + self, tmp_path, monkeypatch + ): + # End-to-end convergence: resolve the path under each side's + # HERMES_HOME and confirm equality. This is the property the + # dispatcher/worker handoff actually depends on. + default_home = tmp_path / ".hermes" + default_home.mkdir() + profile_home = default_home / "profiles" / "coder" + profile_home.mkdir(parents=True) + + # Dispatcher's perspective. + self._set_home(monkeypatch, tmp_path, default_home) + dispatcher_db = kb.kanban_db_path() + dispatcher_ws = kb.workspaces_root() + dispatcher_log = kb.worker_log_path("t_handoff") + + # Worker's perspective (profile activated by `hermes -p coder`). + monkeypatch.setenv("HERMES_HOME", str(profile_home)) + worker_db = kb.kanban_db_path() + worker_ws = kb.workspaces_root() + worker_log = kb.worker_log_path("t_handoff") + + assert dispatcher_db == worker_db + assert dispatcher_ws == worker_ws + assert dispatcher_log == worker_log + + def test_docker_custom_hermes_home_uses_env_path_directly( + self, tmp_path, monkeypatch + ): + # Docker / custom deployment: HERMES_HOME points outside ~/.hermes. + # `get_default_hermes_root()` returns env_home directly when it + # is not a `<root>/profiles/<name>` shape and not under + # `Path.home() / ".hermes"`. + custom_root = tmp_path / "opt" / "hermes" + custom_root.mkdir(parents=True) + self._set_home(monkeypatch, tmp_path, custom_root) + + assert kb.kanban_home() == custom_root + assert kb.kanban_db_path() == custom_root / "kanban.db" + + def test_docker_profile_layout_uses_grandparent( + self, tmp_path, monkeypatch + ): + # Docker profile shape: HERMES_HOME=/opt/hermes/profiles/coder; + # `get_default_hermes_root()` walks up to /opt/hermes because + # the immediate parent dir is named "profiles". + custom_root = tmp_path / "opt" / "hermes" + profile = custom_root / "profiles" / "coder" + profile.mkdir(parents=True) + self._set_home(monkeypatch, tmp_path, profile) + + assert kb.kanban_home() == custom_root + assert kb.kanban_db_path() == custom_root / "kanban.db" + + def test_explicit_override_via_hermes_kanban_home( + self, tmp_path, monkeypatch + ): + # Explicit override: HERMES_KANBAN_HOME beats every other + # resolution rule. + default_home = tmp_path / ".hermes" + profile_home = default_home / "profiles" / "any" + profile_home.mkdir(parents=True) + override = tmp_path / "shared-board" + override.mkdir() + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(profile_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", str(override)) + + assert kb.kanban_home() == override + assert kb.kanban_db_path() == override / "kanban.db" + assert kb.workspaces_root() == override / "kanban" / "workspaces" + + def test_empty_override_falls_through(self, tmp_path, monkeypatch): + # Empty/whitespace override is treated as unset. + default_home = tmp_path / ".hermes" + default_home.mkdir() + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", " ") + + assert kb.kanban_home() == default_home + + def test_dispatcher_and_worker_share_a_real_database( + self, tmp_path, monkeypatch + ): + # Belt-and-suspenders: round-trip a task across the two + # HERMES_HOME perspectives via a real SQLite file. Without the + # fix the worker would open a different file and see no rows. + default_home = tmp_path / ".hermes" + default_home.mkdir() + profile_home = default_home / "profiles" / "nehemiahkanban" + profile_home.mkdir(parents=True) + + # Dispatcher creates the board and a task. + self._set_home(monkeypatch, tmp_path, default_home) + kb.init_db() + with kb.connect() as conn: + task_id = kb.create_task(conn, title="cross-profile") + + # Worker switches to the profile HERMES_HOME and reads. + monkeypatch.setenv("HERMES_HOME", str(profile_home)) + with kb.connect() as conn: + task = kb.get_task(conn, task_id) + assert task is not None + assert task.title == "cross-profile" + + def test_hermes_kanban_db_pin_beats_kanban_home( + self, tmp_path, monkeypatch + ): + # HERMES_KANBAN_DB pins the file path directly and beats both + # HERMES_KANBAN_HOME and the `get_default_hermes_root()` path. + # This is the env the dispatcher injects into workers. + default_home = tmp_path / ".hermes" + default_home.mkdir() + umbrella = tmp_path / "umbrella" + umbrella.mkdir() + pinned_db = tmp_path / "pinned" / "board.db" + pinned_db.parent.mkdir() + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", str(umbrella)) + monkeypatch.setenv("HERMES_KANBAN_DB", str(pinned_db)) + + assert kb.kanban_db_path() == pinned_db + # workspaces_root still follows HERMES_KANBAN_HOME -- the pins + # are independent. + assert kb.workspaces_root() == umbrella / "kanban" / "workspaces" + + def test_hermes_kanban_workspaces_root_pin_beats_kanban_home( + self, tmp_path, monkeypatch + ): + # HERMES_KANBAN_WORKSPACES_ROOT pins the workspaces root directly. + default_home = tmp_path / ".hermes" + default_home.mkdir() + umbrella = tmp_path / "umbrella" + umbrella.mkdir() + pinned_ws = tmp_path / "pinned-workspaces" + pinned_ws.mkdir() + + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_HOME", str(umbrella)) + monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", str(pinned_ws)) + + assert kb.workspaces_root() == pinned_ws + # kanban_db_path still follows HERMES_KANBAN_HOME. + assert kb.kanban_db_path() == umbrella / "kanban.db" + + def test_empty_per_path_overrides_fall_through( + self, tmp_path, monkeypatch + ): + # Empty/whitespace pins are treated as unset, same as + # HERMES_KANBAN_HOME. + default_home = tmp_path / ".hermes" + default_home.mkdir() + monkeypatch.setattr(Path, "home", lambda: tmp_path) + monkeypatch.setenv("HERMES_HOME", str(default_home)) + monkeypatch.setenv("HERMES_KANBAN_DB", " ") + monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", "") + + assert kb.kanban_db_path() == default_home / "kanban.db" + assert kb.workspaces_root() == default_home / "kanban" / "workspaces" + + def test_dispatcher_spawn_injects_kanban_db_and_workspaces_root( + self, tmp_path, monkeypatch + ): + # The dispatcher's `_default_spawn` must inject HERMES_KANBAN_DB + # and HERMES_KANBAN_WORKSPACES_ROOT into the worker env so the + # worker converges on the dispatcher's paths even when the + # `-p <profile>` flag rewrites HERMES_HOME. + default_home = tmp_path / ".hermes" + default_home.mkdir() + self._set_home(monkeypatch, tmp_path, default_home) + + captured = {} + + class _FakePopen: + def __init__(self, cmd, **kwargs): + captured["cmd"] = cmd + captured["env"] = kwargs.get("env", {}) + self.pid = 4242 + + monkeypatch.setattr("subprocess.Popen", _FakePopen) + + task = kb.Task( + id="t_dispatch_env", + title="x", + body=None, + assignee="coder", + status="ready", + priority=0, + created_by=None, + created_at=0, + started_at=None, + completed_at=None, + workspace_kind="scratch", + workspace_path=None, + claim_lock=None, + claim_expires=None, + tenant=None, + ) + kb._default_spawn(task, str(tmp_path / "ws")) + + env = captured["env"] + assert env["HERMES_KANBAN_DB"] == str(default_home / "kanban.db") + assert env["HERMES_KANBAN_WORKSPACES_ROOT"] == str( + default_home / "kanban" / "workspaces" + ) + assert env["HERMES_KANBAN_TASK"] == "t_dispatch_env" diff --git a/tests/hermes_cli/test_openai_codex_model_validation_fallback.py b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py new file mode 100644 index 0000000000..e33dbe2ba4 --- /dev/null +++ b/tests/hermes_cli/test_openai_codex_model_validation_fallback.py @@ -0,0 +1,55 @@ +"""Regression tests for OpenAI Codex model validation when the listing lags behind +actually usable backend model IDs. + +The bug: `/model` and `switch_model()` reject `gpt-5.3-codex-spark` because the +OpenAI Codex listing omits it, even though direct runtime calls with +`--provider openai-codex -m gpt-5.3-codex-spark` succeed. +""" + +from unittest.mock import patch + +from hermes_cli.model_switch import switch_model +from hermes_cli.models import validate_requested_model + + +def test_openai_codex_unknown_but_plausible_model_is_accepted_with_warning(): + """If the Codex listing is incomplete, `/model` should soft-accept the model + with a warning instead of hard-rejecting it. + """ + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"], + ): + result = validate_requested_model("gpt-5.3-codex-spark", "openai-codex") + + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "gpt-5.3-codex-spark" in result["message"] + assert "OpenAI Codex model listing" in result["message"] + assert "Similar models" in result["message"] + assert "gpt-5.3-codex" in result["message"] + + +def test_switch_model_allows_openai_codex_model_missing_from_listing(): + """switch_model() should succeed for Codex models that the runtime accepts + even when the listing has not caught up yet. + """ + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"], + ): + result = switch_model( + "gpt-5.3-codex-spark", + current_provider="openai-codex", + current_model="gpt-5.4", + current_base_url="", + current_api_key="", + user_providers=None, + ) + + assert result.success is True + assert result.new_model == "gpt-5.3-codex-spark" + assert result.target_provider == "openai-codex" + assert result.warning_message + assert "OpenAI Codex model listing" in result.warning_message diff --git a/tests/hermes_cli/test_plugins_cmd.py b/tests/hermes_cli/test_plugins_cmd.py index 72b9bdde2c..11231350e1 100644 --- a/tests/hermes_cli/test_plugins_cmd.py +++ b/tests/hermes_cli/test_plugins_cmd.py @@ -508,7 +508,7 @@ class TestPromptPluginEnvVars: class TestCursesRadiolist: - """Test the curses_radiolist function (non-TTY fallback path).""" + """Test the curses_radiolist function.""" def test_non_tty_returns_default(self): from hermes_cli.curses_ui import curses_radiolist @@ -524,6 +524,14 @@ class TestCursesRadiolist: result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=1) assert result == 1 + def test_keyboard_interrupt_returns_cancel_value(self): + from hermes_cli.curses_ui import curses_radiolist + + with patch("sys.stdin") as mock_stdin, patch("curses.wrapper", side_effect=KeyboardInterrupt): + mock_stdin.isatty.return_value = True + result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=-1) + assert result == -1 + # ── Provider discovery helpers ─────────────────────────────────────────── diff --git a/tests/hermes_cli/test_profiles.py b/tests/hermes_cli/test_profiles.py index 9177930f22..7ddb8fd20a 100644 --- a/tests/hermes_cli/test_profiles.py +++ b/tests/hermes_cli/test_profiles.py @@ -15,6 +15,7 @@ from unittest.mock import patch, MagicMock import pytest from hermes_cli.profiles import ( + normalize_profile_name, validate_profile_name, get_profile_dir, create_profile, @@ -58,6 +59,24 @@ def profile_env(tmp_path, monkeypatch): # TestValidateProfileName # =================================================================== +class TestNormalizeProfileName: + """Tests for normalize_profile_name().""" + + def test_title_case_normalized(self): + assert normalize_profile_name("Jules") == "jules" + assert normalize_profile_name(" Librarian ") == "librarian" + + def test_default_case_insensitive(self): + assert normalize_profile_name("Default") == "default" + assert normalize_profile_name("DEFAULT") == "default" + + def test_empty_raises(self): + with pytest.raises(ValueError, match="cannot be empty"): + normalize_profile_name("") + with pytest.raises(ValueError, match="cannot be empty"): + normalize_profile_name(" ") + + class TestValidateProfileName: """Tests for validate_profile_name().""" @@ -66,6 +85,11 @@ class TestValidateProfileName: # Should not raise validate_profile_name(name) + def test_uppercase_rejected(self): + # validate_profile_name is strict — callers normalize first, then validate. + with pytest.raises(ValueError): + validate_profile_name("Jules") + @pytest.mark.parametrize("name", ["UPPER", "has space", ".hidden", "-leading"]) def test_invalid_names_rejected(self, name): with pytest.raises(ValueError): @@ -107,6 +131,10 @@ class TestGetProfileDir: result = get_profile_dir("coder") assert result == tmp_path / ".hermes" / "profiles" / "coder" + def test_named_profile_matching_is_case_insensitive(self, profile_env): + tmp_path = profile_env + assert get_profile_dir("Coder") == tmp_path / ".hermes" / "profiles" / "coder" + # =================================================================== # TestCreateProfile diff --git a/tests/hermes_cli/test_setup.py b/tests/hermes_cli/test_setup.py index 72adc27c0c..f7b491ddf3 100644 --- a/tests/hermes_cli/test_setup.py +++ b/tests/hermes_cli/test_setup.py @@ -613,3 +613,35 @@ def test_offer_launch_chat_falls_back_to_module(monkeypatch): setup_mod._offer_launch_chat() assert exec_calls == [(sys.executable, [sys.executable, "-m", "hermes_cli.main", "chat"])] + + +def test_setup_slack_saves_home_channel(monkeypatch): + """_setup_slack() saves SLACK_HOME_CHANNEL when the user provides one.""" + saved = {} + prompts = iter(["xoxb-test-token", "xapp-test-token", "", "C01ABC2DE3F"]) + + monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "") + monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v})) + monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts)) + monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False) + monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None) + + setup_mod._setup_slack() + + assert saved.get("SLACK_HOME_CHANNEL") == "C01ABC2DE3F" + + +def test_setup_slack_home_channel_empty_not_saved(monkeypatch): + """_setup_slack() does not save SLACK_HOME_CHANNEL when left blank.""" + saved = {} + prompts = iter(["xoxb-test-token", "xapp-test-token", "", ""]) + + monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "") + monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v})) + monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts)) + monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False) + monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None) + + setup_mod._setup_slack() + + assert "SLACK_HOME_CHANNEL" not in saved diff --git a/tests/hermes_cli/test_tools_config.py b/tests/hermes_cli/test_tools_config.py index abe211f4fb..0bde24fc74 100644 --- a/tests/hermes_cli/test_tools_config.py +++ b/tests/hermes_cli/test_tools_config.py @@ -2,10 +2,13 @@ from unittest.mock import patch +import pytest + from hermes_cli.tools_config import ( _DEFAULT_OFF_TOOLSETS, _apply_toolset_change, _configure_provider, + _reconfigure_provider, _get_platform_tools, _platform_toolset_summary, _reconfigure_tool, @@ -898,3 +901,27 @@ def test_get_effective_configurable_toolsets_dedupes_bundled_plugins(): assert len(spotify_rows) == 1, spotify_rows # Built-in label wins over the plugin label. assert spotify_rows[0][1] == "🎵 Spotify" + + +@pytest.mark.parametrize("provider,config_key,expected", [ + # managed provider → use_gateway True + ({"name": "T", "tts_provider": "elevenlabs", "managed_nous_feature": "tts", "env_vars": []}, "tts", True), + ({"name": "B", "browser_provider": "browserbase", "managed_nous_feature": "browser", "env_vars": []}, "browser", True), + ({"name": "W", "web_backend": "tavily", "managed_nous_feature": "web", "env_vars": []}, "web", True), + # self-hosted provider → use_gateway False + ({"name": "T", "tts_provider": "elevenlabs", "env_vars": []}, "tts", False), + ({"name": "B", "browser_provider": "browserbase", "env_vars": []}, "browser", False), + ({"name": "W", "web_backend": "tavily", "env_vars": []}, "web", False), +]) +def test_reconfigure_provider_syncs_use_gateway(provider, config_key, expected): + config = {} + _reconfigure_provider(provider, config) + assert config[config_key]["use_gateway"] is expected + + +def test_reconfigure_browser_provider_overwrites_stale_use_gateway(): + # Switching from managed (use_gateway=True) to self-hosted must clear the stale flag. + config = {"browser": {"cloud_provider": "managed-browser", "use_gateway": True}} + provider = {"name": "Browserbase", "browser_provider": "browserbase", "env_vars": []} + _reconfigure_provider(provider, config) + assert config["browser"]["use_gateway"] is False diff --git a/tests/hermes_cli/test_tui_npm_install.py b/tests/hermes_cli/test_tui_npm_install.py index e56196e07e..1dec625716 100644 --- a/tests/hermes_cli/test_tui_npm_install.py +++ b/tests/hermes_cli/test_tui_npm_install.py @@ -69,6 +69,39 @@ def test_no_install_when_only_optional_peer_package_missing_from_hidden_lock(tmp assert main_mod._tui_need_npm_install(tmp_path) is False +def test_no_install_when_only_peer_annotation_differs(tmp_path: Path, main_mod) -> None: + """npm 9 drops the ``peer`` flag from the hidden lock on dev-deps that are + *also* declared as peers. That's a cosmetic difference — the package is + installed at the requested version — so it must not trigger a reinstall. + Regression for the TUI-in-Docker failure where 16 such mismatches caused + `Installing TUI dependencies…` → EACCES on every launch. + """ + _touch_ink(tmp_path) + (tmp_path / "package-lock.json").write_text( + '{"packages":{' + '"node_modules/foo":{"version":"1.0.0","dev":true,"peer":true,"resolved":"https://x/foo.tgz"}' + '}}' + ) + (tmp_path / "node_modules" / ".package-lock.json").write_text( + '{"packages":{' + '"node_modules/foo":{"version":"1.0.0","dev":true,"resolved":"https://x/foo.tgz"}' + '}}' + ) + assert main_mod._tui_need_npm_install(tmp_path) is False + + +def test_install_when_version_differs_even_with_peer_drop(tmp_path: Path, main_mod) -> None: + """The peer-drop tolerance must not mask a real version skew.""" + _touch_ink(tmp_path) + (tmp_path / "package-lock.json").write_text( + '{"packages":{"node_modules/foo":{"version":"2.0.0","dev":true,"peer":true}}}' + ) + (tmp_path / "node_modules" / ".package-lock.json").write_text( + '{"packages":{"node_modules/foo":{"version":"1.0.0","dev":true}}}' + ) + assert main_mod._tui_need_npm_install(tmp_path) is True + + def test_no_install_when_lock_older_than_marker(tmp_path: Path, main_mod) -> None: _touch_ink(tmp_path) (tmp_path / "package-lock.json").write_text("{}") diff --git a/tests/plugins/image_gen/test_xai_provider.py b/tests/plugins/image_gen/test_xai_provider.py index ab1bf88345..0da46d43ec 100644 --- a/tests/plugins/image_gen/test_xai_provider.py +++ b/tests/plugins/image_gen/test_xai_provider.py @@ -172,6 +172,27 @@ class TestGenerate: assert result["success"] is False assert result["error_type"] == "api_error" + def test_api_error_preserves_real_response_status(self): + import requests as req_lib + from plugins.image_gen.xai import XAIImageGenProvider + + response = req_lib.Response() + response.status_code = 401 + response._content = json.dumps({"error": {"message": "Invalid API key"}}).encode() + response.headers["Content-Type"] = "application/json" + + response.raise_for_status = MagicMock( + side_effect=req_lib.HTTPError(response=response) + ) + + with patch("plugins.image_gen.xai.requests.post", return_value=response): + provider = XAIImageGenProvider() + result = provider.generate(prompt="test") + + assert result["success"] is False + assert result["error_type"] == "api_error" + assert "xAI image generation failed (401): Invalid API key" in result["error"] + def test_timeout(self): import requests as req_lib diff --git a/tests/plugins/test_kanban_dashboard_plugin.py b/tests/plugins/test_kanban_dashboard_plugin.py index 4bbc621f1a..0055fc80f0 100644 --- a/tests/plugins/test_kanban_dashboard_plugin.py +++ b/tests/plugins/test_kanban_dashboard_plugin.py @@ -253,6 +253,33 @@ def test_patch_invalid_status(client): assert r.status_code == 400 +def test_patch_status_running_rejected(client): + """Dashboard PATCH cannot transition a task directly to 'running'. + + The only legitimate path into 'running' is through the dispatcher's + ``claim_task`` — which atomically creates a ``task_runs`` row, + claim_lock, expiry, and worker-PID metadata. Allowing a direct set + creates orphaned 'running' tasks with no run row or claim, which + violate the board's run-history invariants. See issue #19535. + """ + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"status": "running"}, + ) + assert r.status_code == 400 + assert "running" in r.json()["detail"] + # Task's status should still be its pre-request value — the direct-set + # was rejected before any mutation. + board = client.get("/api/plugins/kanban/board").json() + statuses = { + tt["id"]: col["name"] + for col in board["columns"] + for tt in col["tasks"] + } + assert statuses.get(t["id"]) != "running" + + # --------------------------------------------------------------------------- # Comments + Links # --------------------------------------------------------------------------- diff --git a/tests/run_agent/test_413_compression.py b/tests/run_agent/test_413_compression.py index 8bd357d3d2..5410f196e6 100644 --- a/tests/run_agent/test_413_compression.py +++ b/tests/run_agent/test_413_compression.py @@ -432,6 +432,8 @@ class TestPreflightCompression: ok_resp = _mock_response(content="After preflight", finish_reason="stop") agent.client.chat.completions.create.side_effect = [ok_resp] + status_messages = [] + agent.status_callback = lambda ev, msg: status_messages.append((ev, msg)) with ( patch.object(agent, "_compress_context") as mock_compress, @@ -460,6 +462,10 @@ class TestPreflightCompression: ) assert result["completed"] is True assert result["final_response"] == "After preflight" + assert any( + ev == "lifecycle" and "Preflight compression" in msg + for ev, msg in status_messages + ) def test_no_preflight_when_under_threshold(self, agent): """When history fits within context, no preflight compression needed.""" diff --git a/tests/run_agent/test_agent_guardrails.py b/tests/run_agent/test_agent_guardrails.py index 032057d59f..b222b3320e 100644 --- a/tests/run_agent/test_agent_guardrails.py +++ b/tests/run_agent/test_agent_guardrails.py @@ -263,3 +263,34 @@ class TestGetToolCallIdStatic: def test_object_without_id_attr(self): tc = types.SimpleNamespace() assert AIAgent._get_tool_call_id_static(tc) == "" + + +# --------------------------------------------------------------------------- +# _get_tool_call_name_static +# --------------------------------------------------------------------------- + +class TestGetToolCallNameStatic: + + def test_dict_with_valid_name(self): + assert AIAgent._get_tool_call_name_static( + {"id": "call_1", "function": {"name": "terminal", "arguments": "{}"}} + ) == "terminal" + + def test_dict_with_missing_function(self): + assert AIAgent._get_tool_call_name_static({"id": "call_1"}) == "" + + def test_dict_with_none_function(self): + assert AIAgent._get_tool_call_name_static({"id": "call_1", "function": None}) == "" + + def test_dict_with_none_name(self): + assert AIAgent._get_tool_call_name_static( + {"function": {"name": None, "arguments": "{}"}} + ) == "" + + def test_object_with_valid_name(self): + tc = make_tc("read_file") + assert AIAgent._get_tool_call_name_static(tc) == "read_file" + + def test_object_without_function_attr(self): + tc = types.SimpleNamespace(id="call_1") + assert AIAgent._get_tool_call_name_static(tc) == "" diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 55ce86e51a..d663805f8f 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -2181,6 +2181,83 @@ class TestHandleMaxIterations: kwargs = agent.client.chat.completions.create.call_args.kwargs assert "reasoning" not in kwargs.get("extra_body", {}) + def test_summary_request_removes_orphan_tool_result(self, agent): + """Regression: max-iterations summary request must NOT contain + orphan tool results (tool_call_id with no matching assistant tool_call).""" + resp = _mock_response(content="Summary of work done.") + agent.client.chat.completions.create.return_value = resp + agent._cached_system_prompt = "You are helpful." + messages = [ + {"role": "user", "content": "Analyze finance-data-router"}, + {"role": "assistant", "content": "[Session Arc Summary] ..."}, + {"role": "tool", "tool_call_id": "call_cfedFhJjGmu1RvRc1OUC38j8", "content": "file content here"}, + {"role": "assistant", "tool_calls": [{"id": "call_8fXBXsT592Vpvm7wnW4obPEu", "function": {"name": "patch", "arguments": "{}"}}]}, + {"role": "tool", "tool_call_id": "call_8fXBXsT592Vpvm7wnW4obPEu", "content": "patch result"}, + {"role": "assistant", "content": "Done."}, + ] + + result = agent._handle_max_iterations(messages, 120) + + assert result == "Summary of work done." + kwargs = agent.client.chat.completions.create.call_args.kwargs + sent_msgs = kwargs.get("messages", []) + orphan_ids = [ + m.get("tool_call_id") for m in sent_msgs + if m.get("role") == "tool" and m.get("tool_call_id") == "call_cfedFhJjGmu1RvRc1OUC38j8" + ] + assert len(orphan_ids) == 0, f"Orphan tool result still present: {orphan_ids}" + + def test_summary_request_inserts_stub_for_missing_tool_result(self, agent): + """If an assistant tool_call has no matching tool result in the + summary request, a stub must be inserted to satisfy the API contract.""" + resp = _mock_response(content="Summary") + agent.client.chat.completions.create.return_value = resp + agent._cached_system_prompt = "You are helpful." + messages = [ + {"role": "user", "content": "do stuff"}, + {"role": "assistant", "tool_calls": [{"id": "call_no_result", "function": {"name": "terminal", "arguments": "{}"}}]}, + {"role": "assistant", "content": "Continuing..."}, + ] + + result = agent._handle_max_iterations(messages, 60) + + assert result == "Summary" + kwargs = agent.client.chat.completions.create.call_args.kwargs + sent_msgs = kwargs.get("messages", []) + stub_ids = [ + m.get("tool_call_id") for m in sent_msgs + if m.get("role") == "tool" and m.get("tool_call_id") == "call_no_result" + ] + assert len(stub_ids) >= 1, f"No stub result for assistant tool_call: {stub_ids}" + + def test_summary_omits_provider_preferences_for_non_openrouter(self, agent): + agent.base_url = "https://api.openai.com/v1" + agent._base_url_lower = agent.base_url.lower() + agent.provider = "openai" + agent.providers_allowed = ["Anthropic"] + agent.client.chat.completions.create.return_value = _mock_response(content="Summary") + agent._cached_system_prompt = "You are helpful." + + result = agent._handle_max_iterations([{"role": "user", "content": "do stuff"}], 60) + + assert result == "Summary" + kwargs = agent.client.chat.completions.create.call_args.kwargs + assert "provider" not in kwargs.get("extra_body", {}) + + def test_summary_keeps_provider_preferences_for_openrouter(self, agent): + agent.base_url = "https://openrouter.ai/api/v1" + agent._base_url_lower = agent.base_url.lower() + agent.provider = "openrouter" + agent.providers_allowed = ["Anthropic"] + agent.client.chat.completions.create.return_value = _mock_response(content="Summary") + agent._cached_system_prompt = "You are helpful." + + result = agent._handle_max_iterations([{"role": "user", "content": "do stuff"}], 60) + + assert result == "Summary" + kwargs = agent.client.chat.completions.create.call_args.kwargs + assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"] + def test_codex_summary_sanitizes_orphan_tool_results(self, agent): agent.api_mode = "codex_responses" agent.provider = "openai-codex" diff --git a/tests/run_agent/test_tool_arg_coercion.py b/tests/run_agent/test_tool_arg_coercion.py index 8a14da9ea2..d9ac5dd20f 100644 --- a/tests/run_agent/test_tool_arg_coercion.py +++ b/tests/run_agent/test_tool_arg_coercion.py @@ -64,10 +64,23 @@ class TestCoerceNumber: def test_scientific_notation(self): assert _coerce_number("1e5") == 100000 - def test_inf_stays_string_for_integer_only(self): - """Infinity should not be converted to int.""" + def test_inf_stays_string(self): + """Infinity is not JSON-serializable, so it should stay as string.""" result = _coerce_number("inf") assert result == "inf" + assert isinstance(result, str) + + def test_negative_inf_stays_string(self): + """Negative infinity should also stay as string.""" + result = _coerce_number("-inf") + assert result == "-inf" + assert isinstance(result, str) + + def test_nan_stays_string(self): + """NaN is not JSON-serializable, so it should stay as string.""" + result = _coerce_number("nan") + assert result == "nan" + assert isinstance(result, str) def test_negative_float(self): assert _coerce_number("-2.5") == -2.5 @@ -284,13 +297,69 @@ class TestCoerceToolArgs: result = coerce_tool_args("test_tool", args) assert result["stages"] is None - def test_invalid_json_array_preserved_as_string(self): - """If the string isn't valid JSON, pass it through — let the tool decide.""" + def test_invalid_json_array_wrapped_in_single_element_list(self): + """A bare string gets wrapped into ``[value]`` when the schema says array. + + Open-weight models (DeepSeek, Qwen, GLM) sometimes emit + ``{"urls": "https://a.com"}`` when the tool expects a list. + Wrapping produces a valid dispatch rather than a confusing tool + failure. This supersedes the earlier "pass the string through" + behavior — no real tool handles a bare string as an array + gracefully. + """ schema = self._mock_schema({"items": {"type": "array"}}) with patch("model_tools.registry.get_schema", return_value=schema): args = {"items": "not-json"} result = coerce_tool_args("test_tool", args) - assert result["items"] == "not-json" + assert result["items"] == ["not-json"] + + def test_bare_string_wrapped_as_array(self): + """Bare string on array field → single-element list.""" + schema = self._mock_schema({"urls": {"type": "array", "items": {"type": "string"}}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"urls": "https://a.com"} + result = coerce_tool_args("test_tool", args) + assert result["urls"] == ["https://a.com"] + + def test_bare_int_wrapped_as_array(self): + """Bare non-string scalars (int, bool, float) also get wrapped.""" + schema = self._mock_schema({"ids": {"type": "array", "items": {"type": "integer"}}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"ids": 5} + result = coerce_tool_args("test_tool", args) + assert result["ids"] == [5] + + def test_bare_dict_wrapped_as_array(self): + """Bare dict on array field → single-element list.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": {"a": 1}} + result = coerce_tool_args("test_tool", args) + assert result["items"] == [{"a": 1}] + + def test_none_on_array_field_preserved(self): + """``None`` is never wrapped — tools with defaults handle it.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": None} + result = coerce_tool_args("test_tool", args) + assert result["items"] is None + + def test_existing_list_passthrough(self): + """An already-valid list is not touched.""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": ["a", "b"]} + result = coerce_tool_args("test_tool", args) + assert result["items"] == ["a", "b"] + + def test_json_encoded_array_still_parses(self): + """JSON-encoded strings still parse (not double-wrapped).""" + schema = self._mock_schema({"items": {"type": "array"}}) + with patch("model_tools.registry.get_schema", return_value=schema): + args = {"items": '["a","b"]'} + result = coerce_tool_args("test_tool", args) + assert result["items"] == ["a", "b"] def test_extra_args_without_schema_left_alone(self): """Args not in the schema properties are not touched.""" diff --git a/tests/run_agent/test_tool_call_args_sanitizer.py b/tests/run_agent/test_tool_call_args_sanitizer.py index 79f4d82c5a..57ba9839fa 100644 --- a/tests/run_agent/test_tool_call_args_sanitizer.py +++ b/tests/run_agent/test_tool_call_args_sanitizer.py @@ -96,6 +96,7 @@ def test_marker_message_inserted_when_missing(): assert repaired == 1 assert messages[1] == { "role": "tool", + "name": "read_file", "tool_call_id": "call_1", "content": marker, } diff --git a/tests/tools/test_base_environment.py b/tests/tools/test_base_environment.py index 28ce08e840..eb3661cafd 100644 --- a/tests/tools/test_base_environment.py +++ b/tests/tools/test_base_environment.py @@ -30,7 +30,7 @@ class TestWrapCommand: wrapped = env._wrap_command("echo hello", "/tmp") assert "source" in wrapped - assert "cd /tmp" in wrapped or "cd '/tmp'" in wrapped + assert "cd -- /tmp" in wrapped or "cd -- '/tmp'" in wrapped assert "eval 'echo hello'" in wrapped assert "__hermes_ec=$?" in wrapped assert "export -p >" in wrapped @@ -57,24 +57,31 @@ class TestWrapCommand: env._snapshot_ready = True wrapped = env._wrap_command("ls", "~") - assert "cd ~" in wrapped - assert "cd '~'" not in wrapped + assert "cd -- ~" in wrapped + assert "cd -- '~'" not in wrapped def test_tilde_subpath_with_spaces_uses_home_and_quotes_suffix(self): env = _TestableEnv() env._snapshot_ready = True wrapped = env._wrap_command("ls", "~/my repo") - assert "cd $HOME/'my repo'" in wrapped - assert "cd ~/my repo" not in wrapped + assert "cd -- $HOME/'my repo'" in wrapped + assert "cd -- ~/my repo" not in wrapped def test_tilde_slash_maps_to_home(self): env = _TestableEnv() env._snapshot_ready = True wrapped = env._wrap_command("ls", "~/") - assert "cd $HOME" in wrapped - assert "cd ~/" not in wrapped + assert "cd -- $HOME" in wrapped + assert "cd -- ~/" not in wrapped + + def test_hyphen_prefixed_workdir_is_passed_after_double_dash(self): + env = _TestableEnv() + env._snapshot_ready = True + wrapped = env._wrap_command("pwd", "-demo") + + assert "builtin cd -- -demo || exit 126" in wrapped def test_cd_failure_exit_126(self): env = _TestableEnv() diff --git a/tests/tools/test_browser_homebrew_paths.py b/tests/tools/test_browser_homebrew_paths.py index eb4a699851..221d2e6602 100644 --- a/tests/tools/test_browser_homebrew_paths.py +++ b/tests/tools/test_browser_homebrew_paths.py @@ -209,6 +209,13 @@ class TestFindAgentBrowser: class TestBrowserRequirements: + def test_cdp_override_does_not_require_agent_browser_cli(self, monkeypatch): + monkeypatch.setenv("BROWSER_CDP_URL", "ws://127.0.0.1:9222/devtools/browser/test") + monkeypatch.setattr("tools.browser_tool._is_camofox_mode", lambda: False) + monkeypatch.setattr("tools.browser_tool._find_agent_browser", lambda: (_ for _ in ()).throw(FileNotFoundError("not found"))) + + assert check_browser_requirements() is True + def test_termux_requires_real_agent_browser_install_not_npx_fallback(self, monkeypatch): monkeypatch.setenv("TERMUX_VERSION", "0.118.3") monkeypatch.setenv("PREFIX", "/data/data/com.termux/files/usr") diff --git a/tests/tools/test_delegate.py b/tests/tools/test_delegate.py index 1806a7e60f..c45de2a581 100644 --- a/tests/tools/test_delegate.py +++ b/tests/tools/test_delegate.py @@ -821,7 +821,9 @@ class TestDelegationCredentialResolution(unittest.TestCase): self.assertEqual(creds["api_key"], "local-key") self.assertEqual(creds["api_mode"], "chat_completions") - def test_direct_endpoint_falls_back_to_openai_api_key_env(self): + def test_direct_endpoint_returns_none_api_key_when_not_configured(self): + # When base_url is set without api_key, api_key should be None so + # _build_child_agent inherits the parent's key (effective_api_key = override or parent). parent = _make_mock_parent(depth=0) cfg = { "model": "qwen2.5-coder", @@ -829,10 +831,11 @@ class TestDelegationCredentialResolution(unittest.TestCase): } with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False): creds = _resolve_delegation_credentials(cfg, parent) - self.assertEqual(creds["api_key"], "env-openai-key") + self.assertIsNone(creds["api_key"]) self.assertEqual(creds["provider"], "custom") - def test_direct_endpoint_does_not_fall_back_to_openrouter_api_key_env(self): + def test_direct_endpoint_no_raise_when_only_provider_env_key_present(self): + # Even if OPENAI_API_KEY is absent, no ValueError — _build_child_agent uses parent key. parent = _make_mock_parent(depth=0) cfg = { "model": "qwen2.5-coder", @@ -846,9 +849,9 @@ class TestDelegationCredentialResolution(unittest.TestCase): }, clear=False, ): - with self.assertRaises(ValueError) as ctx: - _resolve_delegation_credentials(cfg, parent) - self.assertIn("OPENAI_API_KEY", str(ctx.exception)) + creds = _resolve_delegation_credentials(cfg, parent) + self.assertIsNone(creds["api_key"]) + self.assertEqual(creds["provider"], "custom") @patch("hermes_cli.runtime_provider.resolve_runtime_provider") def test_nous_provider_resolves_nous_credentials(self, mock_resolve): @@ -977,6 +980,48 @@ class TestDelegationProviderIntegration(unittest.TestCase): self.assertNotEqual(kwargs["base_url"], parent.base_url) self.assertNotEqual(kwargs["api_key"], parent.api_key) + @patch("tools.delegate_tool._load_config") + @patch("tools.delegate_tool._resolve_delegation_credentials") + def test_provider_override_clears_parent_openrouter_filters( + self, mock_creds, mock_cfg + ): + """Delegated provider should not inherit parent provider-preference filters.""" + mock_cfg.return_value = { + "max_iterations": 45, + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + } + mock_creds.return_value = { + "model": "google/gemini-3-flash-preview", + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-or-key", + "api_mode": "chat_completions", + } + parent = _make_mock_parent(depth=0) + parent.providers_allowed = ["anthropic/claude-3.5-sonnet"] + parent.providers_ignored = ["openai/gpt-4o-mini"] + parent.providers_order = ["google/gemini-2.5-pro"] + parent.provider_sort = "price" + + with patch("run_agent.AIAgent") as MockAgent: + mock_child = MagicMock() + mock_child.run_conversation.return_value = { + "final_response": "done", + "completed": True, + "api_calls": 1, + } + MockAgent.return_value = mock_child + + delegate_task(goal="Cross-provider test", parent_agent=parent) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["provider"], "openrouter") + self.assertIsNone(kwargs["providers_allowed"]) + self.assertIsNone(kwargs["providers_ignored"]) + self.assertIsNone(kwargs["providers_order"]) + self.assertIsNone(kwargs["provider_sort"]) + @patch("tools.delegate_tool._load_config") @patch("tools.delegate_tool._resolve_delegation_credentials") def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg): @@ -2403,5 +2448,52 @@ class TestSubagentApprovalCallback(unittest.TestCase): self.assertIsNone(_get_approval_callback()) +class TestFallbackModelInheritance(unittest.TestCase): + """Subagents must inherit the parent's fallback provider chain.""" + + def test_child_inherits_fallback_chain(self): + """_build_child_agent passes parent._fallback_chain as fallback_model.""" + parent = _make_mock_parent(depth=0) + fallback_entry = {"provider": "openrouter", "model": "gpt-4o-mini", "api_key": "sk-or-x"} + parent._fallback_chain = [fallback_entry] + + with patch("run_agent.AIAgent") as MockAgent: + MockAgent.return_value = MagicMock() + _build_child_agent( + task_index=0, + goal="test fallback inheritance", + context=None, + toolsets=None, + model=None, + max_iterations=10, + parent_agent=parent, + task_count=1, + ) + + _, kwargs = MockAgent.call_args + self.assertEqual(kwargs["fallback_model"], [fallback_entry]) + + def test_child_gets_no_fallback_when_parent_chain_empty(self): + """When parent._fallback_chain is empty, fallback_model is None.""" + parent = _make_mock_parent(depth=0) + parent._fallback_chain = [] + + with patch("run_agent.AIAgent") as MockAgent: + MockAgent.return_value = MagicMock() + _build_child_agent( + task_index=0, + goal="test no fallback", + context=None, + toolsets=None, + model=None, + max_iterations=10, + parent_agent=parent, + task_count=1, + ) + + _, kwargs = MockAgent.call_args + self.assertIsNone(kwargs["fallback_model"]) + + if __name__ == "__main__": unittest.main() diff --git a/tests/tools/test_file_operations.py b/tests/tools/test_file_operations.py index dfd54ba634..500cd6141a 100644 --- a/tests/tools/test_file_operations.py +++ b/tests/tools/test_file_operations.py @@ -271,6 +271,58 @@ class TestShellFileOpsHelpers: ops = ShellFileOperations(env) assert ops.cwd == "/" + def test_read_file_strips_leaked_terminal_fence_markers(self, mock_env): + leaked = ( + "'\x07__HERMES_FENCE_a9f7b3__\x1b]0;cat " + "'/tmp/test/a.py' 2> /dev/null\x07\n" + "print('ok')\n" + "__HERMES_FENCE_a9f7b3__\x07'\n" + ) + + def side_effect(command, **kwargs): + if command.startswith("wc -c"): + return {"output": "12\n", "returncode": 0} + if command.startswith("head -c"): + return {"output": "print('ok')\n", "returncode": 0} + if command.startswith("sed -n"): + return {"output": leaked, "returncode": 0} + if command.startswith("wc -l"): + return {"output": "1\n", "returncode": 0} + return {"output": "", "returncode": 0} + + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.read_file("/tmp/test/a.py") + + assert result.error is None + assert "HERMES_FENCE" not in result.content + assert "\x1b]" not in result.content + assert "\x07" not in result.content + assert " 1|print('ok')" in result.content + + def test_read_file_raw_strips_leaked_terminal_fence_markers(self, mock_env): + leaked = ( + "__HERMES_FENCE_a9f7b3__\x07'\n" + "alpha\n" + "\x1b]0;cat '/tmp/test/a.txt'\x07__HERMES_FENCE_a9f7b3__\n" + ) + + def side_effect(command, **kwargs): + if command.startswith("wc -c"): + return {"output": "6\n", "returncode": 0} + if command.startswith("head -c"): + return {"output": "alpha\n", "returncode": 0} + if command.startswith("cat "): + return {"output": leaked, "returncode": 0} + return {"output": "", "returncode": 0} + + mock_env.execute.side_effect = side_effect + ops = ShellFileOperations(mock_env) + result = ops.read_file_raw("/tmp/test/a.txt") + + assert result.error is None + assert result.content == "alpha\n" + class TestSearchPathValidation: """Test that search() returns an error for non-existent paths.""" diff --git a/tests/tools/test_heartbeat_stale_thresholds.py b/tests/tools/test_heartbeat_stale_thresholds.py new file mode 100644 index 0000000000..fb7db68efb --- /dev/null +++ b/tests/tools/test_heartbeat_stale_thresholds.py @@ -0,0 +1,35 @@ +"""Tests for delegate heartbeat stale threshold configuration.""" + +import pytest + + +class TestHeartbeatStaleThresholds: + """Verify the heartbeat stale threshold constants are correct.""" + + def test_idle_cycles_value(self): + """IDLE stale cycles should be 15 (15 * 30s = 450s).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IDLE + assert _HEARTBEAT_STALE_CYCLES_IDLE == 15 + + def test_in_tool_cycles_value(self): + """IN_TOOL stale cycles should be 40 (40 * 30s = 1200s).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IN_TOOL + assert _HEARTBEAT_STALE_CYCLES_IN_TOOL == 40 + + def test_idle_timeout_seconds(self): + """Effective idle stale timeout: 15 * 30 = 450s (> typical LLM response time).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IDLE, _HEARTBEAT_INTERVAL + effective = _HEARTBEAT_STALE_CYCLES_IDLE * _HEARTBEAT_INTERVAL + assert effective == 450 + assert effective > 300 # Must be > 5 minutes for slow LLM responses + + def test_in_tool_timeout_seconds(self): + """Effective in-tool stale timeout: 40 * 30 = 1200s (= 20 minutes).""" + from tools.delegate_tool import _HEARTBEAT_STALE_CYCLES_IN_TOOL, _HEARTBEAT_INTERVAL + effective = _HEARTBEAT_STALE_CYCLES_IN_TOOL * _HEARTBEAT_INTERVAL + assert effective == 1200 + + def test_interval_unchanged(self): + """Heartbeat interval should remain 30s.""" + from tools.delegate_tool import _HEARTBEAT_INTERVAL + assert _HEARTBEAT_INTERVAL == 30 diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py index a7a8fda44d..9031d81d8e 100644 --- a/tests/tools/test_kanban_tools.py +++ b/tests/tools/test_kanban_tools.py @@ -467,8 +467,8 @@ def test_kanban_guidance_in_worker_prompt(monkeypatch, tmp_path): skip_memory=True, ) prompt = a._build_system_prompt() - # Header phrase - assert "You are a Kanban worker" in prompt + # Header phrase (identity-free — SOUL.md owns identity, layer 3 is protocol) + assert "Kanban task execution protocol" in prompt # Lifecycle signals assert "kanban_show()" in prompt assert "kanban_complete" in prompt @@ -492,3 +492,121 @@ def test_kanban_guidance_prompt_size_bounded(monkeypatch, tmp_path): assert 1_500 < len(KANBAN_GUIDANCE) < 4_096, ( f"KANBAN_GUIDANCE is {len(KANBAN_GUIDANCE)} chars — too short (missing?) or too long" ) + + +# --------------------------------------------------------------------------- +# Worker task-ownership enforcement (regression tests for #19534) +# --------------------------------------------------------------------------- +# +# A worker process has HERMES_KANBAN_TASK set to its own task id. The +# destructive tools (kanban_complete, kanban_block, kanban_heartbeat) +# must refuse to operate on any OTHER task id, even if the caller +# supplies an explicit `task_id` argument. Workers legitimately call +# kanban_show / kanban_comment / kanban_create / kanban_link on other +# tasks, so those are unrestricted. +# +# Orchestrator profiles (no HERMES_KANBAN_TASK in env) are intentionally +# exempt — their job is routing, and they sometimes close out child +# tasks on behalf of the child. + + +def test_worker_complete_rejects_foreign_task_id(worker_env): + """A worker cannot complete a task that isn't its own (#19534).""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + other = kb.create_task(conn, title="sibling") + conn.execute("UPDATE tasks SET status='ready' WHERE id=?", (other,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_complete({"task_id": other, "summary": "HIJACK"}) + d = json.loads(out) + assert d.get("ok") is not True + assert "refusing to mutate" in d.get("error", "") + + # Sibling task must be untouched. + conn = kb.connect() + try: + assert kb.get_task(conn, other).status == "ready" + finally: + conn.close() + + +def test_worker_block_rejects_foreign_task_id(worker_env): + """A worker cannot block a task that isn't its own (#19534).""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + other = kb.create_task(conn, title="sibling") + conn.execute("UPDATE tasks SET status='ready' WHERE id=?", (other,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_block({"task_id": other, "reason": "evil"}) + d = json.loads(out) + assert "refusing to mutate" in d.get("error", "") + + conn = kb.connect() + try: + assert kb.get_task(conn, other).status == "ready" + finally: + conn.close() + + +def test_worker_heartbeat_rejects_foreign_task_id(worker_env): + """A worker cannot heartbeat a task that isn't its own (#19534).""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + other = kb.create_task(conn, title="sibling") + # Put sibling in running state so heartbeat would otherwise succeed. + conn.execute("UPDATE tasks SET status='running' WHERE id=?", (other,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_heartbeat({"task_id": other}) + d = json.loads(out) + assert "refusing to mutate" in d.get("error", "") + + +def test_worker_complete_own_task_still_works(worker_env): + """The ownership check doesn't break the normal own-task happy path.""" + from tools import kanban_tools as kt + # Both implicit (no task_id arg) and explicit (matching env) must work. + out = kt._handle_complete({"task_id": worker_env, "summary": "explicit own"}) + d = json.loads(out) + assert d.get("ok") is True and d.get("task_id") == worker_env + + +def test_orchestrator_complete_any_task_allowed(monkeypatch, tmp_path): + """Orchestrator profiles (no HERMES_KANBAN_TASK) can still complete + any task via explicit task_id. The check only applies to workers.""" + monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False) + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + from pathlib import Path as _P + monkeypatch.setattr(_P, "home", lambda: tmp_path) + + from hermes_cli import kanban_db as kb + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="child to close out") + conn.execute("UPDATE tasks SET status='ready' WHERE id=?", (tid,)) + conn.commit() + finally: + conn.close() + + from tools import kanban_tools as kt + out = kt._handle_complete({"task_id": tid, "summary": "orchestrator close"}) + d = json.loads(out) + assert d.get("ok") is True and d.get("task_id") == tid diff --git a/tests/tools/test_mcp_oauth.py b/tests/tools/test_mcp_oauth.py index db0342e993..319620e412 100644 --- a/tests/tools/test_mcp_oauth.py +++ b/tests/tools/test_mcp_oauth.py @@ -440,6 +440,7 @@ class TestBuildOAuthAuthNonInteractive: def test_build_client_metadata_basic(): """_build_client_metadata returns metadata with expected defaults.""" + pytest.importorskip("mcp") from tools.mcp_oauth import _build_client_metadata, _configure_callback_port cfg = {"client_name": "Test Client"} @@ -453,6 +454,7 @@ def test_build_client_metadata_basic(): def test_build_client_metadata_without_secret_is_public(): """Without client_secret, token endpoint auth is 'none' (public client).""" + pytest.importorskip("mcp") from tools.mcp_oauth import _build_client_metadata, _configure_callback_port cfg = {} @@ -463,6 +465,7 @@ def test_build_client_metadata_without_secret_is_public(): def test_build_client_metadata_with_secret_is_confidential(): """With client_secret, token endpoint auth is 'client_secret_post'.""" + pytest.importorskip("mcp") from tools.mcp_oauth import _build_client_metadata, _configure_callback_port cfg = {"client_secret": "shh"} diff --git a/tests/tools/test_mcp_tool_session_expired.py b/tests/tools/test_mcp_tool_session_expired.py index 67e6e58741..4533282e70 100644 --- a/tests/tools/test_mcp_tool_session_expired.py +++ b/tests/tools/test_mcp_tool_session_expired.py @@ -46,6 +46,13 @@ def test_is_session_expired_detects_session_not_found(): assert _is_session_expired_error(RuntimeError("Unknown session: abc123")) is True +def test_is_session_expired_detects_session_terminated(): + """Remote Playwright MCP reports transport loss as ``Session terminated``.""" + from tools.mcp_tool import _is_session_expired_error + + assert _is_session_expired_error(RuntimeError("Session terminated")) is True + + def test_is_session_expired_is_case_insensitive(): """Match uses lower-cased comparison so servers that emit the message in different cases (SDK formatter quirks) still trigger.""" diff --git a/tests/tools/test_session_search.py b/tests/tools/test_session_search.py index 304387e1fe..468a492ad8 100644 --- a/tests/tools/test_session_search.py +++ b/tests/tools/test_session_search.py @@ -498,3 +498,65 @@ class TestSessionSearch: assert result["count"] == 0 assert result["results"] == [] assert result["sessions_searched"] == 0 + + def test_source_from_resolved_parent_not_fts5_child(self): + """source in output must reflect the resolved parent session, not the child that matched FTS5. + + Regression test for #15909: when a delegation child session (source='telegram') + resolves to a parent (source='api_server'), the result entry must report + 'api_server', not 'telegram'. + """ + from unittest.mock import MagicMock, AsyncMock, patch as _patch + from tools.session_search_tool import session_search + + mock_db = MagicMock() + # FTS5 hit is in the child delegation session which carries source='telegram' + mock_db.search_messages.return_value = [ + { + "session_id": "child_sid", + "content": "hello world", + "source": "telegram", # child session source — wrong value to surface + "session_started": 1709400000, + "model": "gpt-4o-mini", + }, + ] + + def _get_session(session_id): + if session_id == "child_sid": + return { + "id": "child_sid", + "parent_session_id": "parent_sid", + "source": "telegram", + "started_at": 1709400000, + "model": "gpt-4o-mini", + } + if session_id == "parent_sid": + return { + "id": "parent_sid", + "parent_session_id": None, + "source": "api_server", # correct parent source + "started_at": 1709300000, + "model": "gpt-4o-mini", + } + return None + + mock_db.get_session.side_effect = _get_session + mock_db.get_messages_as_conversation.return_value = [ + {"role": "user", "content": "hello world"}, + {"role": "assistant", "content": "hi there"}, + ] + + with _patch( + "tools.session_search_tool.async_call_llm", + new_callable=AsyncMock, + side_effect=RuntimeError("no provider"), + ): + result = json.loads(session_search(query="hello world", db=mock_db)) + + assert result["success"] is True + assert result["count"] == 1 + entry = result["results"][0] + assert entry["session_id"] == "parent_sid", "should report resolved parent session ID" + assert entry["source"] == "api_server", ( + f"source should be parent's 'api_server', got {entry['source']!r}" + ) diff --git a/tests/tools/test_skill_manager_tool.py b/tests/tools/test_skill_manager_tool.py index 004924b9f4..e24e19dea1 100644 --- a/tests/tools/test_skill_manager_tool.py +++ b/tests/tools/test_skill_manager_tool.py @@ -531,10 +531,41 @@ class TestSkillManageDispatcher: assert result["success"] is False def test_full_create_via_dispatcher(self, tmp_path): + """Foreground create does NOT mark the skill as agent-created. + + Skills created by user-directed foreground turns belong to the user; + only the background self-improvement review fork should mark its + own sediment as agent-created (so the curator can later consolidate + or prune it). + """ with _skill_dir(tmp_path): raw = skill_manage(action="create", name="test-skill", content=VALID_SKILL_CONTENT) + from tools.skill_usage import load_usage + usage = load_usage() result = json.loads(raw) assert result["success"] is True + # No provenance marker on a foreground create — record either missing + # entirely (telemetry best-effort) or present with created_by unset. + rec = usage.get("test-skill") or {} + assert rec.get("created_by") in (None, "", False) + + def test_create_from_background_review_marks_agent_created(self, tmp_path): + """Background-review fork creates ARE marked as agent-created.""" + from tools.skill_provenance import set_current_write_origin, BACKGROUND_REVIEW + token = set_current_write_origin(BACKGROUND_REVIEW) + try: + with _skill_dir(tmp_path): + raw = skill_manage( + action="create", name="review-sediment", content=VALID_SKILL_CONTENT + ) + from tools.skill_usage import load_usage + usage = load_usage() + finally: + from tools.skill_provenance import reset_current_write_origin + reset_current_write_origin(token) + result = json.loads(raw) + assert result["success"] is True + assert usage["review-sediment"]["created_by"] == "agent" def test_delete_via_dispatcher_threads_absorbed_into(self, tmp_path): # Dispatcher must plumb absorbed_into through to _delete_skill so the diff --git a/tests/tools/test_skill_provenance.py b/tests/tools/test_skill_provenance.py new file mode 100644 index 0000000000..77f505bb86 --- /dev/null +++ b/tests/tools/test_skill_provenance.py @@ -0,0 +1,102 @@ +"""Tests for tools/skill_provenance.py — write-origin ContextVar.""" + +import contextvars + +import pytest + + +def test_default_origin_is_foreground(): + from tools.skill_provenance import get_current_write_origin + # In a fresh ContextVar context, default kicks in. + ctx = contextvars.copy_context() + origin = ctx.run(get_current_write_origin) + assert origin == "foreground" + + +def test_set_and_get_origin(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + token = set_current_write_origin("background_review") + try: + assert get_current_write_origin() == "background_review" + finally: + reset_current_write_origin(token) + + +def test_reset_restores_prior_origin(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + outer = set_current_write_origin("assistant_tool") + try: + inner = set_current_write_origin("background_review") + try: + assert get_current_write_origin() == "background_review" + finally: + reset_current_write_origin(inner) + assert get_current_write_origin() == "assistant_tool" + finally: + reset_current_write_origin(outer) + + +def test_is_background_review_truthy_only_for_review(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + is_background_review, + BACKGROUND_REVIEW, + ) + for origin, expected in ( + ("foreground", False), + ("assistant_tool", False), + ("random_other_value", False), + (BACKGROUND_REVIEW, True), + ): + token = set_current_write_origin(origin) + try: + assert is_background_review() is expected, ( + f"is_background_review() wrong for origin={origin!r}" + ) + finally: + reset_current_write_origin(token) + + +def test_empty_origin_falls_back_to_foreground(): + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + token = set_current_write_origin("") + try: + # Empty is coerced to "foreground" at the set() boundary. + assert get_current_write_origin() == "foreground" + finally: + reset_current_write_origin(token) + + +def test_context_isolation_between_copies(): + """ContextVar scoping: modifications in one copy do not leak out.""" + from tools.skill_provenance import ( + set_current_write_origin, + get_current_write_origin, + BACKGROUND_REVIEW, + ) + + # Start at the module default. + original = get_current_write_origin() + + def _run_in_copy(): + set_current_write_origin(BACKGROUND_REVIEW) + return get_current_write_origin() + + ctx = contextvars.copy_context() + inside = ctx.run(_run_in_copy) + assert inside == BACKGROUND_REVIEW + # Parent context unaffected. + assert get_current_write_origin() == original diff --git a/tests/tools/test_skill_usage.py b/tests/tools/test_skill_usage.py index 7dd92eb18c..b66e2bba76 100644 --- a/tests/tools/test_skill_usage.py +++ b/tests/tools/test_skill_usage.py @@ -194,10 +194,11 @@ def test_forget_removes_record(skills_home): # --------------------------------------------------------------------------- def test_agent_created_excludes_bundled(skills_home): - from tools.skill_usage import list_agent_created_skill_names + from tools.skill_usage import list_agent_created_skill_names, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "bundled-skill", category="github") _write_skill(skills_dir, "my-skill") + mark_agent_created("my-skill") # Seed a bundled manifest marking bundled-skill as upstream (skills_dir / ".bundled_manifest").write_text( "bundled-skill:abc123\n", encoding="utf-8", @@ -208,10 +209,11 @@ def test_agent_created_excludes_bundled(skills_home): def test_agent_created_excludes_hub_installed(skills_home): - from tools.skill_usage import list_agent_created_skill_names + from tools.skill_usage import list_agent_created_skill_names, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "hub-skill") _write_skill(skills_dir, "my-skill") + mark_agent_created("my-skill") hub_dir = skills_dir / ".hub" hub_dir.mkdir() (hub_dir / "lock.json").write_text( @@ -238,9 +240,10 @@ def test_is_agent_created(skills_home): def test_agent_created_skips_archive_and_hub_dirs(skills_home): - from tools.skill_usage import list_agent_created_skill_names + from tools.skill_usage import list_agent_created_skill_names, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "real-skill") + mark_agent_created("real-skill") # Dot-prefixed dirs must be ignored even if they contain SKILL.md archive = skills_dir / ".archive" / "old-skill" archive.mkdir(parents=True) @@ -368,27 +371,41 @@ def test_archive_collision_gets_suffix(skills_home): # Reporting # --------------------------------------------------------------------------- -def test_agent_created_report_includes_defaults(skills_home): - from tools.skill_usage import agent_created_report, bump_view +def test_agent_created_report_includes_marked_skills_with_defaults(skills_home): + from tools.skill_usage import agent_created_report, bump_view, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "a") _write_skill(skills_dir, "b") + mark_agent_created("a") + mark_agent_created("b") bump_view("a") rows = agent_created_report() by_name = {r["name"]: r for r in rows} assert "a" in by_name and "b" in by_name assert by_name["a"]["view_count"] == 1 - # b has no usage record yet — must still appear with defaults + # b has only the provenance marker — activity fields still default. assert by_name["b"]["view_count"] == 0 assert by_name["b"]["state"] == "active" +def test_manual_skill_with_usage_is_not_curator_managed(skills_home): + from tools.skill_usage import agent_created_report, bump_view, list_agent_created_skill_names + skills_dir = skills_home / "skills" + _write_skill(skills_dir, "manual-skill") + + bump_view("manual-skill") + + assert "manual-skill" not in list_agent_created_skill_names() + assert "manual-skill" not in {r["name"] for r in agent_created_report()} + + def test_agent_created_report_excludes_bundled_and_hub(skills_home): - from tools.skill_usage import agent_created_report + from tools.skill_usage import agent_created_report, mark_agent_created skills_dir = skills_home / "skills" _write_skill(skills_dir, "mine") _write_skill(skills_dir, "bundled") _write_skill(skills_dir, "hubbed") + mark_agent_created("mine") (skills_dir / ".bundled_manifest").write_text("bundled:abc\n", encoding="utf-8") hub = skills_dir / ".hub" hub.mkdir() @@ -414,6 +431,7 @@ def test_agent_created_report_derives_activity_from_view_and_patch(skills_home, ]) monkeypatch.setattr(skill_usage, "_now_iso", lambda: next(timestamps)) + skill_usage.mark_agent_created("mine") skill_usage.bump_view("mine") skill_usage.bump_patch("mine") diff --git a/tests/tools/test_skills_hub.py b/tests/tools/test_skills_hub.py index 8e3453c04d..1969272411 100644 --- a/tests/tools/test_skills_hub.py +++ b/tests/tools/test_skills_hub.py @@ -901,6 +901,69 @@ class TestCheckForSkillUpdates: assert bundle_content_hash(bundle) == content_hash(skill_dir) + def test_bundle_content_hash_accepts_binary_files(self): + bundle = SkillBundle( + name="demo-binary-skill", + files={ + "SKILL.md": "# Demo\n", + "assets/logo.png": b"\x89PNG\r\n\x1a\nbinary", + }, + source="github", + identifier="owner/repo/demo-binary-skill", + trust_level="community", + ) + + digest = bundle_content_hash(bundle) + + assert digest.startswith("sha256:") + + def test_bundle_content_hash_bytes_matches_str_equivalent(self): + """Bytes content must hash identically to its str-decoded form.""" + text_bundle = SkillBundle( + name="demo-skill", + files={ + "SKILL.md": "same content", + "references/checklist.md": "- [ ] security\n", + }, + source="github", + identifier="owner/repo/demo-skill", + trust_level="community", + ) + bytes_bundle = SkillBundle( + name="demo-skill", + files={ + "SKILL.md": b"same content", + "references/checklist.md": b"- [ ] security\n", + }, + source="github", + identifier="owner/repo/demo-skill", + trust_level="community", + ) + + assert bundle_content_hash(bytes_bundle) == bundle_content_hash(text_bundle) + + def test_bundle_content_hash_mixed_matches_on_disk(self, tmp_path): + """In-memory bundle hash must equal on-disk content_hash for mixed bytes+str.""" + from tools.skills_guard import content_hash + + bundle = SkillBundle( + name="demo-skill", + files={ + "SKILL.md": b"# Demo Skill\n", + "references/checklist.md": "- [ ] security\n", + }, + source="github", + identifier="owner/repo/demo-skill", + trust_level="community", + ) + skill_dir = tmp_path / "demo-skill" + skill_dir.mkdir() + (skill_dir / "SKILL.md").write_bytes(b"# Demo Skill\n") + (skill_dir / "references").mkdir() + (skill_dir / "references" / "checklist.md").write_text("- [ ] security\n") + + assert bundle_content_hash(bundle) == content_hash(skill_dir) + def test_reports_update_when_remote_hash_differs(self): lock = MagicMock() lock.list_installed.return_value = [{ diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py index 0bbb95bbd6..3cea3b59ff 100644 --- a/tests/tools/test_tool_result_storage.py +++ b/tests/tools/test_tool_result_storage.py @@ -516,12 +516,25 @@ class TestPerToolThresholds: except ImportError: pytest.skip("terminal_tool not importable in test env") - def test_read_file_never_persisted(self): + def test_read_file_result_size_cap(self): from tools.registry import registry try: import tools.file_tools # noqa: F401 val = registry.get_max_result_size("read_file") - assert val == float("inf") + assert val == 100_000 + except ImportError: + pytest.skip("file_tools not importable in test env") + + def test_read_file_registry_cap_is_100k(self): + """Regression test: read_file must have a 100_000 char registry cap (Layer 2 safety net).""" + from tools.registry import registry + try: + import tools.file_tools # noqa: F401 + val = registry.get_max_result_size("read_file") + assert val == 100_000, ( + f"read_file registry cap must be 100_000, got {val!r}. " + "float('inf') is not allowed — it disables the Layer 2 result-size guard." + ) except ImportError: pytest.skip("file_tools not importable in test env") diff --git a/tests/tools/test_transcription_tools.py b/tests/tools/test_transcription_tools.py index c075cfa9eb..00f5bf3542 100644 --- a/tests/tools/test_transcription_tools.py +++ b/tests/tools/test_transcription_tools.py @@ -415,6 +415,10 @@ class TestTranscribeLocalCommand: # _transcribe_local — additional tests # ============================================================================ +@pytest.mark.skipif( + not __import__("importlib").util.find_spec("faster_whisper"), + reason="faster_whisper not installed", +) class TestTranscribeLocalExtended: def test_model_reuse_on_second_call(self, tmp_path): """Second call with same model should NOT reload the model.""" diff --git a/tests/tools/test_video_analyze.py b/tests/tools/test_video_analyze.py new file mode 100644 index 0000000000..62987d96b2 --- /dev/null +++ b/tests/tools/test_video_analyze.py @@ -0,0 +1,337 @@ +"""Tests for video_analyze tool in tools/vision_tools.py.""" + +import asyncio +import json +import os +from pathlib import Path +from typing import Awaitable +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from tools.vision_tools import ( + _detect_video_mime_type, + _video_to_base64_data_url, + _handle_video_analyze, + _MAX_VIDEO_BASE64_BYTES, + _VIDEO_MIME_TYPES, + _VIDEO_SIZE_WARN_BYTES, + video_analyze_tool, + VIDEO_ANALYZE_SCHEMA, +) + + +# --------------------------------------------------------------------------- +# _detect_video_mime_type +# --------------------------------------------------------------------------- + + +class TestDetectVideoMimeType: + """Extension-based MIME detection for video files.""" + + def test_mp4(self, tmp_path): + p = tmp_path / "clip.mp4" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + def test_webm(self, tmp_path): + p = tmp_path / "clip.webm" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/webm" + + def test_mov(self, tmp_path): + p = tmp_path / "clip.mov" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mov" + + def test_avi_fallback_mp4(self, tmp_path): + p = tmp_path / "clip.avi" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + def test_mkv_fallback_mp4(self, tmp_path): + p = tmp_path / "clip.mkv" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + def test_mpeg(self, tmp_path): + p = tmp_path / "clip.mpeg" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mpeg" + + def test_mpg(self, tmp_path): + p = tmp_path / "clip.mpg" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mpeg" + + def test_unsupported_extension(self, tmp_path): + p = tmp_path / "clip.flv" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) is None + + def test_case_insensitive(self, tmp_path): + p = tmp_path / "clip.MP4" + p.write_bytes(b"\x00" * 10) + assert _detect_video_mime_type(p) == "video/mp4" + + +# --------------------------------------------------------------------------- +# _video_to_base64_data_url +# --------------------------------------------------------------------------- + + +class TestVideoToBase64DataUrl: + """Base64 encoding of video files.""" + + def test_produces_data_url(self, tmp_path): + p = tmp_path / "test.mp4" + p.write_bytes(b"\x00\x01\x02\x03") + result = _video_to_base64_data_url(p) + assert result.startswith("data:video/mp4;base64,") + + def test_custom_mime_type(self, tmp_path): + p = tmp_path / "test.webm" + p.write_bytes(b"\x00\x01\x02\x03") + result = _video_to_base64_data_url(p, mime_type="video/webm") + assert result.startswith("data:video/webm;base64,") + + def test_default_mime_for_unknown_ext(self, tmp_path): + p = tmp_path / "test.xyz" + p.write_bytes(b"\x00\x01\x02\x03") + result = _video_to_base64_data_url(p) + # Falls back to video/mp4 + assert result.startswith("data:video/mp4;base64,") + + +# --------------------------------------------------------------------------- +# Schema validation +# --------------------------------------------------------------------------- + + +class TestVideoAnalyzeSchema: + """Schema structure is correct.""" + + def test_schema_name(self): + assert VIDEO_ANALYZE_SCHEMA["name"] == "video_analyze" + + def test_schema_has_required_fields(self): + params = VIDEO_ANALYZE_SCHEMA["parameters"] + assert "video_url" in params["properties"] + assert "question" in params["properties"] + assert params["required"] == ["video_url", "question"] + + def test_schema_description_mentions_video(self): + assert "video" in VIDEO_ANALYZE_SCHEMA["description"].lower() + + +# --------------------------------------------------------------------------- +# _handle_video_analyze handler +# --------------------------------------------------------------------------- + + +class TestHandleVideoAnalyze: + """Tests for the registry handler wrapper.""" + + def test_returns_awaitable(self, tmp_path, monkeypatch): + video_file = tmp_path / "test.mp4" + video_file.write_bytes(b"\x00" * 100) + monkeypatch.setenv("AUXILIARY_VIDEO_MODEL", "") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "") + + with patch("tools.vision_tools.video_analyze_tool", new_callable=AsyncMock) as mock_tool: + mock_tool.return_value = json.dumps({"success": True, "analysis": "test"}) + result = _handle_video_analyze({"video_url": str(video_file), "question": "what is this?"}) + # Should return an awaitable (coroutine) + assert asyncio.iscoroutine(result) + # Clean up the unawaited coroutine + result.close() + + def test_uses_auxiliary_video_model_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("AUXILIARY_VIDEO_MODEL", "google/gemini-2.5-flash") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "other-model") + + with patch("tools.vision_tools.video_analyze_tool", new_callable=AsyncMock) as mock_tool: + mock_tool.return_value = json.dumps({"success": True, "analysis": "ok"}) + asyncio.get_event_loop().run_until_complete( + _handle_video_analyze({"video_url": "/tmp/test.mp4", "question": "test"}) + ) + args = mock_tool.call_args[0] + assert args[2] == "google/gemini-2.5-flash" + + def test_falls_back_to_vision_model_env(self, tmp_path, monkeypatch): + monkeypatch.setenv("AUXILIARY_VIDEO_MODEL", "") + monkeypatch.setenv("AUXILIARY_VISION_MODEL", "google/gemini-flash") + + with patch("tools.vision_tools.video_analyze_tool", new_callable=AsyncMock) as mock_tool: + mock_tool.return_value = json.dumps({"success": True, "analysis": "ok"}) + asyncio.get_event_loop().run_until_complete( + _handle_video_analyze({"video_url": "/tmp/test.mp4", "question": "test"}) + ) + args = mock_tool.call_args[0] + assert args[2] == "google/gemini-flash" + + +# --------------------------------------------------------------------------- +# video_analyze_tool — integration-style tests with mocked LLM +# --------------------------------------------------------------------------- + + +class TestVideoAnalyzeTool: + """Core video analysis function tests.""" + + def _run(self, coro): + return asyncio.get_event_loop().run_until_complete(coro) + + def test_local_file_success(self, tmp_path, monkeypatch): + """Analyze a local video file — happy path.""" + video = tmp_path / "demo.mp4" + video.write_bytes(b"\x00" * 1024) + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "A short video showing a demo." + + with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock, return_value=mock_response): + with patch("tools.vision_tools.extract_content_or_reasoning", return_value="A short video showing a demo."): + result = self._run(video_analyze_tool(str(video), "What is this?")) + + data = json.loads(result) + assert data["success"] is True + assert "demo" in data["analysis"].lower() + + def test_local_file_not_found(self, tmp_path): + """Non-existent file raises appropriate error.""" + result = self._run(video_analyze_tool("/nonexistent/video.mp4", "What?")) + data = json.loads(result) + assert data["success"] is False + assert "invalid video source" in data["analysis"].lower() + + def test_unsupported_format(self, tmp_path): + """Unsupported extension raises error.""" + video = tmp_path / "clip.flv" + video.write_bytes(b"\x00" * 100) + + result = self._run(video_analyze_tool(str(video), "What is this?")) + data = json.loads(result) + assert data["success"] is False + assert "unsupported video format" in data["analysis"].lower() + + def test_video_too_large(self, tmp_path, monkeypatch): + """Video exceeding max size is rejected.""" + video = tmp_path / "huge.mp4" + # Don't actually write 50MB — mock the stat + video.write_bytes(b"\x00" * 100) + + # Patch the base64 encoding to return something huge + with patch("tools.vision_tools._video_to_base64_data_url") as mock_encode: + mock_encode.return_value = "data:video/mp4;base64," + "A" * (_MAX_VIDEO_BASE64_BYTES + 1) + result = self._run(video_analyze_tool(str(video), "What?")) + + data = json.loads(result) + assert data["success"] is False + assert "too large" in data["analysis"].lower() + + def test_interrupt_check(self, tmp_path): + """Tool respects interrupt flag.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + with patch("tools.interrupt.is_interrupted", return_value=True): + result = self._run(video_analyze_tool(str(video), "What?")) + + data = json.loads(result) + assert data["success"] is False + + def test_empty_response_retries(self, tmp_path): + """Retries once on empty model response.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + call_count = 0 + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "Video analysis result." + + async def fake_llm(**kwargs): + nonlocal call_count + call_count += 1 + return mock_response + + with patch("tools.vision_tools.async_call_llm", side_effect=fake_llm): + with patch("tools.vision_tools.extract_content_or_reasoning", side_effect=["", "Video analysis result."]): + result = self._run(video_analyze_tool(str(video), "What?")) + + data = json.loads(result) + assert data["success"] is True + assert call_count == 2 # Initial call + retry + + def test_file_scheme_stripped(self, tmp_path): + """file:// prefix is stripped correctly.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "OK" + + with patch("tools.vision_tools.async_call_llm", new_callable=AsyncMock, return_value=mock_response): + with patch("tools.vision_tools.extract_content_or_reasoning", return_value="OK"): + result = self._run(video_analyze_tool(f"file://{video}", "What?")) + + data = json.loads(result) + assert data["success"] is True + + def test_api_message_format(self, tmp_path): + """Verify the message sent to LLM uses video_url content type.""" + video = tmp_path / "test.mp4" + video.write_bytes(b"\x00" * 100) + + captured_kwargs = {} + + async def capture_llm(**kwargs): + captured_kwargs.update(kwargs) + mock_response = MagicMock() + mock_response.choices = [MagicMock()] + mock_response.choices[0].message.content = "OK" + return mock_response + + with patch("tools.vision_tools.async_call_llm", side_effect=capture_llm): + with patch("tools.vision_tools.extract_content_or_reasoning", return_value="OK"): + self._run(video_analyze_tool(str(video), "Describe this")) + + messages = captured_kwargs["messages"] + assert len(messages) == 1 + content = messages[0]["content"] + assert len(content) == 2 + assert content[0]["type"] == "text" + assert content[1]["type"] == "video_url" + assert "video_url" in content[1] + assert content[1]["video_url"]["url"].startswith("data:video/mp4;base64,") + + +# --------------------------------------------------------------------------- +# Toolset registration +# --------------------------------------------------------------------------- + + +class TestVideoToolsetRegistration: + """Verify the tool is registered correctly.""" + + def test_registered_in_video_toolset(self): + from tools.registry import registry + entry = registry.get_entry("video_analyze") + assert entry is not None + assert entry.toolset == "video" + assert entry.is_async is True + assert entry.emoji == "🎬" + + def test_not_in_core_tools(self): + """video_analyze should NOT be in _HERMES_CORE_TOOLS (default disabled).""" + from toolsets import _HERMES_CORE_TOOLS + assert "video_analyze" not in _HERMES_CORE_TOOLS + + def test_in_video_toolset_definition(self): + """Toolset 'video' should contain video_analyze.""" + from toolsets import TOOLSETS + assert "video" in TOOLSETS + assert "video_analyze" in TOOLSETS["video"]["tools"] diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py index e7d8811e02..93dffa649a 100644 --- a/tests/tools/test_voice_cli_integration.py +++ b/tests/tools/test_voice_cli_integration.py @@ -1040,6 +1040,25 @@ class TestDisableVoiceModeReal: class TestVoiceSpeakResponseReal: """Tests _voice_speak_response with real CLI instance.""" + def test_async_scheduling_clears_done_before_thread_start(self): + cli = _make_voice_cli(_voice_tts=True) + starts = [] + + class FakeThread: + def __init__(self, target=None, args=(), daemon=None): + self.target = target + self.args = args + self.daemon = daemon + + def start(self): + starts.append(cli._voice_tts_done.is_set()) + + with patch("cli.threading.Thread", FakeThread): + cli._voice_speak_response_async("Hello") + + assert starts == [False] + assert not cli._voice_tts_done.is_set() + @patch("cli._cprint") def test_early_return_when_tts_off(self, _cp): cli = _make_voice_cli(_voice_tts=False) diff --git a/tests/tui_gateway/test_protocol.py b/tests/tui_gateway/test_protocol.py index 88282b023c..549a2da633 100644 --- a/tests/tui_gateway/test_protocol.py +++ b/tests/tui_gateway/test_protocol.py @@ -479,6 +479,99 @@ def test_slash_exec_rejects_skill_commands(server): assert "skill command" in resp["error"]["message"] +def test_slash_exec_handles_plugin_commands_in_live_gateway(server): + """Plugin slash commands return normal slash.exec output without using the worker.""" + sid = "test-session" + + class Worker: + def __init__(self): + self.calls = [] + + def run(self, cmd): + self.calls.append(cmd) + return f"worker:{cmd}" + + worker = Worker() + server._sessions[sid] = {"session_key": sid, "agent": None, "slash_worker": worker} + + with patch( + "hermes_cli.plugins.get_plugin_command_handler", + lambda name: (lambda arg: f"plugin:{arg}") if name == "plugin-cmd" else None, + ): + resp = server.handle_request({ + "id": "r-plugin-slash", + "method": "slash.exec", + "params": {"command": "plugin-cmd hello", "session_id": sid}, + }) + + assert "error" not in resp + assert resp["result"] == {"output": "plugin:hello"} + assert worker.calls == [] + + +def test_slash_exec_plugin_lookup_failure_falls_back_to_worker(server): + """Plugin discovery failures must not break ordinary slash-worker commands.""" + sid = "test-session" + + class Worker: + def __init__(self): + self.calls = [] + + def run(self, cmd): + self.calls.append(cmd) + return f"worker:{cmd}" + + worker = Worker() + server._sessions[sid] = {"session_key": sid, "agent": None, "slash_worker": worker} + + with patch( + "hermes_cli.plugins.get_plugin_command_handler", + side_effect=RuntimeError("discovery boom"), + ): + resp = server.handle_request({ + "id": "r-plugin-lookup-failure", + "method": "slash.exec", + "params": {"command": "help", "session_id": sid}, + }) + + assert "error" not in resp + assert resp["result"] == {"output": "worker:help"} + assert worker.calls == ["help"] + + +def test_slash_exec_plugin_handler_error_returns_output(server): + """Plugin handler failures return slash output so the TUI does not redispatch.""" + sid = "test-session" + + class Worker: + def __init__(self): + self.calls = [] + + def run(self, cmd): + self.calls.append(cmd) + return f"worker:{cmd}" + + def handler(arg): + raise RuntimeError(f"handler boom: {arg}") + + worker = Worker() + server._sessions[sid] = {"session_key": sid, "agent": None, "slash_worker": worker} + + with patch( + "hermes_cli.plugins.get_plugin_command_handler", + lambda name: handler if name == "plugin-cmd" else None, + ): + resp = server.handle_request({ + "id": "r-plugin-handler-error", + "method": "slash.exec", + "params": {"command": "plugin-cmd hello", "session_id": sid}, + }) + + assert "error" not in resp + assert resp["result"] == {"output": "Plugin command error: handler boom: hello"} + assert worker.calls == [] + + @pytest.mark.parametrize("cmd", ["retry", "queue hello", "q hello", "steer fix the test", "plan"]) def test_slash_exec_rejects_pending_input_commands(server, cmd): """slash.exec must reject commands that use _pending_input in the CLI.""" diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 5cd431de31..f394e5b2f6 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -1482,6 +1482,34 @@ def _run_browser_command( if "AGENT_BROWSER_IDLE_TIMEOUT_MS" not in browser_env: idle_ms = str(BROWSER_SESSION_INACTIVITY_TIMEOUT * 1000) browser_env["AGENT_BROWSER_IDLE_TIMEOUT_MS"] = idle_ms + + # Inject --no-sandbox when needed (issue #15765): + # - Running as root: Chromium always refuses to start without it + # - Ubuntu 23.10+ / AppArmor systems: unprivileged user namespaces + # are restricted, causing Chromium to exit with "No usable sandbox" + # even for non-root users running under systemd or containers. + if "AGENT_BROWSER_CHROME_FLAGS" not in browser_env: + _needs_sandbox_bypass = False + if hasattr(os, "geteuid") and os.geteuid() == 0: + _needs_sandbox_bypass = True + logger.debug("browser: running as root — injecting --no-sandbox") + else: + # Detect AppArmor user namespace restrictions (Ubuntu 23.10+) + _userns_restrict = "/proc/sys/kernel/apparmor_restrict_unprivileged_userns" + try: + with open(_userns_restrict) as _f: + if _f.read().strip() == "1": + _needs_sandbox_bypass = True + logger.debug( + "browser: AppArmor userns restrictions detected — " + "injecting --no-sandbox" + ) + except OSError: + pass + if _needs_sandbox_bypass: + browser_env["AGENT_BROWSER_CHROME_FLAGS"] = ( + "--no-sandbox --disable-dev-shm-usage" + ) # Use temp files for stdout/stderr instead of pipes. # agent-browser starts a background daemon that inherits file @@ -2757,17 +2785,40 @@ def _chromium_search_roots() -> List[str]: def _chromium_installed() -> bool: """Return True when a usable Chromium (or headless-shell) build is on disk. + Checks, in order: + + 1. ``AGENT_BROWSER_EXECUTABLE_PATH`` env var — the official way to point + agent-browser at a pre-installed Chrome/Chromium. + 2. System Chrome/Chromium in PATH (``google-chrome``, ``chromium-browser``, + ``chrome``). + 3. Playwright's browser cache (current logic) — directories containing + ``chromium-*`` or ``chromium_headless_shell-*``. + agent-browser (0.26+) downloads Playwright's chromium / headless-shell - builds into ``PLAYWRIGHT_BROWSERS_PATH`` and won't start without them. - When the CLI is present but no browser build is, the first browser tool - call hangs for the full command timeout (often ~30s each) before - surfacing a useless error. Guarding the tool behind this check prevents - advertising a capability that will fail at runtime. + builds into ``PLAYWRIGHT_BROWSERS_PATH`` and won't start without at least + one of the three above being present. Without a browser binary the CLI + hangs on first use until the command timeout fires (often ~30s). Guarding + the tool behind this check prevents advertising a capability that will + fail at runtime. """ global _cached_chromium_installed if _cached_chromium_installed is not None: return _cached_chromium_installed + # 1. AGENT_BROWSER_EXECUTABLE_PATH — explicit user-configured browser + ab_path = os.environ.get("AGENT_BROWSER_EXECUTABLE_PATH", "").strip() + if ab_path: + if os.path.isfile(ab_path) or shutil.which(ab_path): + _cached_chromium_installed = True + return True + + # 2. System Chrome/Chromium in PATH (common names) + system_chrome = shutil.which("google-chrome") or shutil.which("chromium-browser") or shutil.which("chrome") + if system_chrome: + _cached_chromium_installed = True + return True + + # 3. Playwright browser cache (legacy — chromium-* / chromium_headless_shell-* dirs) for root in _chromium_search_roots(): if not root or not os.path.isdir(root): continue @@ -2817,7 +2868,12 @@ def check_browser_requirements() -> bool: if _is_camofox_mode(): return True - # The agent-browser CLI is always required + # CDP override mode can connect to an existing remote/local browser endpoint + # without requiring the local agent-browser binary on PATH. + if _get_cdp_override(): + return True + + # The agent-browser CLI is required for local launch and cloud-provider flows. try: browser_cmd = _find_agent_browser() except FileNotFoundError: diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index 53e778a7db..ec4b41b3c7 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -128,6 +128,15 @@ def _resolve_model_override(model_obj: Optional[Dict[str, Any]]) -> tuple: return (None, None) model_name = (model_obj.get("model") or "").strip() or None provider_name = (model_obj.get("provider") or "").strip() or None + # Bare "custom" is an incomplete spec — the canonical form is + # "custom:<name>" matching a custom_providers entry. LLMs frequently + # supply the bare type because the schema does not advertise the + # ":<name>" suffix, which used to bypass the pinning path below and + # leave the job stored with an unresolvable "custom" provider. Treat + # the bare value as "no provider supplied" so the current main + # provider gets pinned instead. + if provider_name == "custom": + provider_name = None if model_name and not provider_name: # Pin to the current main provider so the job is stable try: @@ -513,7 +522,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr "properties": { "provider": { "type": "string", - "description": "Provider name (e.g. 'openrouter', 'anthropic'). Omit to use and pin the current provider." + "description": "Provider name (e.g. 'openrouter', 'anthropic', or 'custom:<name>' for a provider defined in custom_providers config — always include the ':<name>' suffix, never pass the bare 'custom'). Omit to use and pin the current provider." }, "model": { "type": "string", diff --git a/tools/delegate_tool.py b/tools/delegate_tool.py index 844e7bdfb0..5c7c431b25 100644 --- a/tools/delegate_tool.py +++ b/tools/delegate_tool.py @@ -483,8 +483,8 @@ _HEARTBEAT_INTERVAL = 30 # seconds between parent activity heartbeats during de # The idle ceiling stays tight so genuinely stuck children don't mask the gateway # timeout. The in-tool ceiling is much higher so legit long-running tools get # time to finish; child_timeout_seconds (default 600s) is still the hard cap. -_HEARTBEAT_STALE_CYCLES_IDLE = 5 # 5 * 30s = 150s idle between turns → stale -_HEARTBEAT_STALE_CYCLES_IN_TOOL = 20 # 20 * 30s = 600s stuck on same tool → stale +_HEARTBEAT_STALE_CYCLES_IDLE = 15 # 15 * 30s = 450s idle between turns → stale +_HEARTBEAT_STALE_CYCLES_IN_TOOL = 40 # 40 * 30s = 1200s stuck on same tool → stale DEFAULT_TOOLSETS = ["terminal", "file", "web"] @@ -1026,6 +1026,29 @@ def _build_child_agent( except Exception as exc: logger.debug("Could not load delegation reasoning_effort: %s", exc) + # Inherit the parent's fallback provider chain so subagents can recover + # from rate-limits and credential exhaustion exactly like the top-level + # agent does. _fallback_chain is a list accepted by AIAgent's + # fallback_model parameter (which handles both list and dict forms). + parent_fallback = getattr(parent_agent, "_fallback_chain", None) or None + + # Inherit the parent's OpenRouter provider-preference filters by default + # (so subagents routed to the same provider honour the same routing + # constraints). BUT: when `delegation.provider` is set the user is + # explicitly asking the child to run on a different provider, and + # parent-level OpenRouter filters (e.g. `only=["Anthropic"]`) would + # silently force the child back onto the parent's provider. Clear the + # filters in that case so the delegated provider is honoured. + child_providers_allowed = getattr(parent_agent, "providers_allowed", None) + child_providers_ignored = getattr(parent_agent, "providers_ignored", None) + child_providers_order = getattr(parent_agent, "providers_order", None) + child_provider_sort = getattr(parent_agent, "provider_sort", None) + if override_provider: + child_providers_allowed = None + child_providers_ignored = None + child_providers_order = None + child_provider_sort = None + child = AIAgent( base_url=effective_base_url, api_key=effective_api_key, @@ -1038,6 +1061,7 @@ def _build_child_agent( max_tokens=getattr(parent_agent, "max_tokens", None), reasoning_config=child_reasoning, prefill_messages=getattr(parent_agent, "prefill_messages", None), + fallback_model=parent_fallback, enabled_toolsets=child_toolsets, quiet_mode=True, ephemeral_system_prompt=child_prompt, @@ -1049,10 +1073,10 @@ def _build_child_agent( thinking_callback=child_thinking_cb, session_db=getattr(parent_agent, "_session_db", None), parent_session_id=getattr(parent_agent, "session_id", None), - providers_allowed=parent_agent.providers_allowed, - providers_ignored=parent_agent.providers_ignored, - providers_order=parent_agent.providers_order, - provider_sort=parent_agent.provider_sort, + providers_allowed=child_providers_allowed, + providers_ignored=child_providers_ignored, + providers_order=child_providers_order, + provider_sort=child_provider_sort, tool_progress_callback=child_progress_cb, iteration_budget=None, # fresh budget per subagent ) @@ -2230,11 +2254,17 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: """Resolve credentials for subagent delegation. If ``delegation.base_url`` is configured, subagents use that direct - OpenAI-compatible endpoint. Otherwise, if ``delegation.provider`` is - configured, the full credential bundle (base_url, api_key, api_mode, - provider) is resolved via the runtime provider system — the same path used - by CLI/gateway startup. This lets subagents run on a completely different - provider:model pair. + OpenAI-compatible endpoint. ``delegation.api_key`` overrides the key; when + omitted, ``api_key`` is returned as ``None`` so ``_build_child_agent`` + inherits the parent agent's key (``effective_api_key = override_api_key or + parent_api_key``). This lets providers that store their key outside + ``OPENAI_API_KEY`` (e.g. ``MINIMAX_API_KEY``, ``DASHSCOPE_API_KEY``) work + without a duplicate config entry. + + Otherwise, if ``delegation.provider`` is configured, the full credential + bundle (base_url, api_key, api_mode, provider) is resolved via the runtime + provider system — the same path used by CLI/gateway startup. This lets + subagents run on a completely different provider:model pair. If neither base_url nor provider is configured, returns None values so the child inherits everything from the parent agent. @@ -2247,12 +2277,13 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: configured_api_key = str(cfg.get("api_key") or "").strip() or None if configured_base_url: - api_key = configured_api_key or os.getenv("OPENAI_API_KEY", "").strip() - if not api_key: - raise ValueError( - "Delegation base_url is configured but no API key was found. " - "Set delegation.api_key or OPENAI_API_KEY." - ) + # When delegation.api_key is not set, return None so _build_child_agent + # falls back to the parent agent's API key via the credential inheritance + # path (effective_api_key = override_api_key or parent_api_key). This + # lets providers that store their key in a non-OPENAI_API_KEY env var + # (e.g. MINIMAX_API_KEY, DASHSCOPE_API_KEY) work without requiring + # callers to duplicate the key under delegation.api_key. + api_key = configured_api_key # None → inherited from parent in _build_child_agent base_lower = configured_base_url.lower() provider = "custom" @@ -2292,7 +2323,7 @@ def _resolve_delegation_credentials(cfg: dict, parent_agent) -> dict: try: from hermes_cli.runtime_provider import resolve_runtime_provider - runtime = resolve_runtime_provider(requested=configured_provider) + runtime = resolve_runtime_provider(requested=configured_provider, target_model=configured_model) except Exception as exc: raise ValueError( f"Cannot resolve delegation provider '{configured_provider}': {exc}. " @@ -2330,7 +2361,7 @@ def _load_config() -> dict: try: from cli import CLI_CONFIG - cfg = CLI_CONFIG.get("delegation", {}) + cfg = CLI_CONFIG.get("delegation") or {} if cfg: return cfg except Exception: @@ -2339,7 +2370,7 @@ def _load_config() -> dict: from hermes_cli.config import load_config full = load_config() - return full.get("delegation", {}) + return full.get("delegation") or {} except Exception: return {} diff --git a/tools/environments/base.py b/tools/environments/base.py index 2f565fe5f8..3f21f1294b 100644 --- a/tools/environments/base.py +++ b/tools/environments/base.py @@ -405,7 +405,8 @@ class BaseEnvironment(ABC): # Preserve bare ``~`` expansion, but rewrite ``~/...`` through # ``$HOME`` so suffixes with spaces remain a single shell word. quoted_cwd = self._quote_cwd_for_cd(cwd) - parts.append(f"builtin cd {quoted_cwd} || exit 126") + # ``--`` keeps hyphen-prefixed directory names from being parsed as options. + parts.append(f"builtin cd -- {quoted_cwd} || exit 126") # Run the actual command parts.append(f"eval '{escaped}'") diff --git a/tools/file_operations.py b/tools/file_operations.py index aa7a482509..73e739e730 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -53,6 +53,27 @@ WRITE_DENIED_PATHS = build_write_denied_paths(_HOME) WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME) +_OSC_SEQUENCE_RE = re.compile(r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)") +_FENCE_MARKER_RE = re.compile(r"'?\x07?__HERMES_FENCE_[A-Za-z0-9]+__\x07?'?") + + +def _strip_terminal_fence_leaks(text: str) -> str: + """Strip leaked terminal fence wrappers from file read output.""" + if not text: + return text + + cleaned_lines: List[str] = [] + for line in text.splitlines(keepends=True): + had_terminal_wrapper = "__HERMES_FENCE_" in line or "\x1b]" in line + cleaned = _OSC_SEQUENCE_RE.sub("", line) + cleaned = _FENCE_MARKER_RE.sub("", cleaned) + cleaned = cleaned.replace("\x07", "") + if had_terminal_wrapper and cleaned.strip("'\r\n\t ") == "": + continue + cleaned_lines.append(cleaned) + return "".join(cleaned_lines) + + def _get_safe_write_root() -> Optional[str]: """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset. @@ -511,8 +532,9 @@ class ShellFileOperations(FileOperations): # File not found - try to suggest similar files return self._suggest_similar_files(path) + stat_output = _strip_terminal_fence_leaks(stat_result.stdout) try: - file_size = int(stat_result.stdout.strip()) + file_size = int(stat_output.strip()) except ValueError: file_size = 0 @@ -536,8 +558,9 @@ class ShellFileOperations(FileOperations): # Read a sample to check for binary content sample_cmd = f"head -c 1000 {self._escape_shell_arg(path)} 2>/dev/null" sample_result = self._exec(sample_cmd) + sample_output = _strip_terminal_fence_leaks(sample_result.stdout) - if self._is_likely_binary(path, sample_result.stdout): + if self._is_likely_binary(path, sample_output): return ReadResult( is_binary=True, file_size=file_size, @@ -551,12 +574,14 @@ class ShellFileOperations(FileOperations): if read_result.exit_code != 0: return ReadResult(error=f"Failed to read file: {read_result.stdout}") + read_output = _strip_terminal_fence_leaks(read_result.stdout) # Get total line count wc_cmd = f"wc -l < {self._escape_shell_arg(path)}" wc_result = self._exec(wc_cmd) + wc_output = _strip_terminal_fence_leaks(wc_result.stdout) try: - total_lines = int(wc_result.stdout.strip()) + total_lines = int(wc_output.strip()) except ValueError: total_lines = 0 @@ -567,7 +592,7 @@ class ShellFileOperations(FileOperations): hint = f"Use offset={end_line + 1} to continue reading (showing {offset}-{end_line} of {total_lines} lines)" return ReadResult( - content=self._add_line_numbers(read_result.stdout, offset), + content=self._add_line_numbers(read_output, offset), total_lines=total_lines, file_size=file_size, truncated=truncated, @@ -637,14 +662,16 @@ class ShellFileOperations(FileOperations): stat_result = self._exec(stat_cmd) if stat_result.exit_code != 0: return self._suggest_similar_files(path) + stat_output = _strip_terminal_fence_leaks(stat_result.stdout) try: - file_size = int(stat_result.stdout.strip()) + file_size = int(stat_output.strip()) except ValueError: file_size = 0 if self._is_image(path): return ReadResult(is_image=True, is_binary=True, file_size=file_size) sample_result = self._exec(f"head -c 1000 {self._escape_shell_arg(path)} 2>/dev/null") - if self._is_likely_binary(path, sample_result.stdout): + sample_output = _strip_terminal_fence_leaks(sample_result.stdout) + if self._is_likely_binary(path, sample_output): return ReadResult( is_binary=True, file_size=file_size, error="Binary file — cannot display as text." @@ -652,7 +679,10 @@ class ShellFileOperations(FileOperations): cat_result = self._exec(f"cat {self._escape_shell_arg(path)}") if cat_result.exit_code != 0: return ReadResult(error=f"Failed to read file: {cat_result.stdout}") - return ReadResult(content=cat_result.stdout, file_size=file_size) + return ReadResult( + content=_strip_terminal_fence_leaks(cat_result.stdout), + file_size=file_size, + ) def delete_file(self, path: str) -> WriteResult: """Delete a file via rm.""" diff --git a/tools/file_tools.py b/tools/file_tools.py index a4187b6aa9..106bd295be 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -570,7 +570,7 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = # ── Redact secrets (after guard check to skip oversized content) ── if result.content: - result.content = redact_sensitive_text(result.content) + result.content = redact_sensitive_text(result.content, code_file=True) result_dict["content"] = result.content # Large-file hint: if the file is big and the caller didn't ask @@ -993,7 +993,7 @@ def search_tool(pattern: str, target: str = "content", path: str = ".", if hasattr(result, 'matches'): for m in result.matches: if hasattr(m, 'content') and m.content: - m.content = redact_sensitive_text(m.content) + m.content = redact_sensitive_text(m.content, code_file=True) result_dict = result.to_dict() if count >= 3: @@ -1137,7 +1137,7 @@ def _handle_search_files(args, **kw): output_mode=args.get("output_mode", "content"), context=args.get("context", 0), task_id=tid) -registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=float('inf')) +registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=100_000) registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️", max_result_size_chars=100_000) registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧", max_result_size_chars=100_000) registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=100_000) diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py index de5d180c83..1f99f6896c 100644 --- a/tools/kanban_tools.py +++ b/tools/kanban_tools.py @@ -40,13 +40,31 @@ logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- def _check_kanban_mode() -> bool: - """Tools are available iff the current process has ``HERMES_KANBAN_TASK`` - set in its env, which the dispatcher sets when spawning a worker. + """Tools are available when: - Humans running ``hermes chat`` see zero kanban tools. Workers spawned - by the kanban dispatcher (gateway-embedded by default) see all seven. + 1. ``HERMES_KANBAN_TASK`` is set (dispatcher-spawned worker), OR + 2. The current profile has ``kanban`` in its toolsets config + (orchestrator profiles like techlead that route work via Kanban). + + Humans running ``hermes chat`` without the kanban toolset see zero + kanban tools. Workers spawned by the kanban dispatcher (gateway- + embedded by default) and orchestrator profiles with the kanban + toolset enabled see all seven. """ - return bool(os.environ.get("HERMES_KANBAN_TASK")) + if os.environ.get("HERMES_KANBAN_TASK"): + return True + + # Check if the current profile has the kanban toolset enabled. + # Uses load_config() which has mtime-based caching, so this adds + # negligible overhead. The check_fn results are further TTL-cached + # (~30s) by the tool registry. + try: + from hermes_cli.config import load_config + cfg = load_config() + toolsets = cfg.get("toolsets", []) + return "kanban" in toolsets + except Exception: + return False # --------------------------------------------------------------------------- @@ -61,6 +79,38 @@ def _default_task_id(arg: Optional[str]) -> Optional[str]: return env_tid or None +def _enforce_worker_task_ownership(tid: str) -> Optional[str]: + """Reject worker-driven destructive calls on foreign task IDs. + + A process spawned by the dispatcher has ``HERMES_KANBAN_TASK`` set + to its own task id. Tools like ``kanban_complete`` / ``kanban_block`` + / ``kanban_heartbeat`` mutate run-lifecycle state, so a buggy or + prompt-injected worker that passed an explicit ``task_id`` for some + other task could corrupt sibling or cross-tenant runs (see #19534). + + Orchestrator profiles (kanban toolset enabled but **no** + ``HERMES_KANBAN_TASK`` in env) aren't subject to this check — their + job is routing, and they sometimes legitimately close out child + tasks or reopen blocked ones. Workers are narrowly scoped to their + one task. + + Returns ``None`` when the call is allowed, or a tool-error string + when it must be rejected. Callers should ``return`` the error + verbatim. + """ + env_tid = os.environ.get("HERMES_KANBAN_TASK") + if not env_tid: + # Orchestrator or CLI context — no task-scope restriction. + return None + if tid != env_tid: + return tool_error( + f"worker is scoped to task {env_tid}; refusing to mutate " + f"{tid}. Use kanban_comment to hand off information to other " + f"tasks, or kanban_create to spawn follow-up work." + ) + return None + + def _connect(): """Import + connect lazily so the module imports cleanly in non-kanban contexts (e.g. test rigs that import every tool module).""" @@ -154,6 +204,9 @@ def _handle_complete(args: dict, **kw) -> str: return tool_error( "task_id is required (or set HERMES_KANBAN_TASK in the env)" ) + ownership_err = _enforce_worker_task_ownership(tid) + if ownership_err: + return ownership_err summary = args.get("summary") metadata = args.get("metadata") result = args.get("result") @@ -192,6 +245,9 @@ def _handle_block(args: dict, **kw) -> str: return tool_error( "task_id is required (or set HERMES_KANBAN_TASK in the env)" ) + ownership_err = _enforce_worker_task_ownership(tid) + if ownership_err: + return ownership_err reason = args.get("reason") if not reason or not str(reason).strip(): return tool_error("reason is required — explain what input you need") @@ -220,6 +276,9 @@ def _handle_heartbeat(args: dict, **kw) -> str: return tool_error( "task_id is required (or set HERMES_KANBAN_TASK in the env)" ) + ownership_err = _enforce_worker_task_ownership(tid) + if ownership_err: + return ownership_err note = args.get("note") try: kb, conn = _connect() diff --git a/tools/mcp_oauth.py b/tools/mcp_oauth.py index 51e243c6c1..80dacdc420 100644 --- a/tools/mcp_oauth.py +++ b/tools/mcp_oauth.py @@ -53,7 +53,7 @@ logger = logging.getLogger(__name__) # Lazy imports -- MCP SDK with OAuth support is optional # --------------------------------------------------------------------------- -_OAUTH_AVAILABLE = False +_OAUTH_AVAILABLE=False try: from mcp.client.auth import OAuthClientProvider from mcp.shared.auth import ( @@ -61,12 +61,16 @@ try: OAuthClientMetadata, OAuthToken, ) - from pydantic import AnyUrl - _OAUTH_AVAILABLE = True + _OAUTH_AVAILABLE=True except ImportError: logger.debug("MCP OAuth types not available -- OAuth MCP auth disabled") +try: + from pydantic import AnyUrl +except ImportError: + AnyUrl = None # type: ignore[assignment, misc] + # --------------------------------------------------------------------------- # Exceptions diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index 2a0115ec85..21e935a12f 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1667,6 +1667,7 @@ _SESSION_EXPIRED_MARKERS: tuple = ( "session expired", "session not found", "unknown session", + "session terminated", ) diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index 62712e4581..938cb977b6 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -10,9 +10,10 @@ import json import logging import os import re -from typing import Dict, Optional import ssl import time +from email.utils import formatdate +from typing import Dict, Optional from agent.redact import redact_sensitive_text @@ -588,11 +589,28 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, last_result = result return last_result + # --- Feishu: native media attachment support via adapter --- + if platform == Platform.FEISHU and media_files: + last_result = None + for i, chunk in enumerate(chunks): + is_last = (i == len(chunks) - 1) + result = await _send_feishu( + pconfig, + chat_id, + chunk, + media_files=media_files if is_last else None, + thread_id=thread_id, + ) + if isinstance(result, dict) and result.get("error"): + return result + last_result = result + return last_result + # --- Non-media platforms --- if media_files and not message.strip(): return { "error": ( - f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, weixin, signal and yuanbao; " + f"send_message MEDIA delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao and feishu; " f"target {platform.value} had only media attachments" ) } @@ -600,7 +618,7 @@ async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, if media_files: warning = ( f"MEDIA attachments were omitted for {platform.value}; " - "native send_message media delivery is currently only supported for telegram, discord, matrix, weixin, signal and yuanbao" + "native send_message media delivery is currently only supported for telegram, discord, matrix, weixin, signal, yuanbao and feishu" ) last_result = None @@ -1652,8 +1670,8 @@ async def _send_qqbot(pconfig, chat_id, message): """Send via QQBot using the REST API directly (no WebSocket needed). Uses the QQ Bot Open Platform REST endpoints to get an access token - and post a message. Works for guild channels without requiring - a running gateway adapter. + and post a message. Supports guild channels, C2C (private) chats, + and group chats by trying the appropriate endpoints. """ try: import httpx @@ -1682,20 +1700,40 @@ async def _send_qqbot(pconfig, chat_id, message): return _error(f"QQBot: no access_token in response") # Step 2: Send message via REST + # QQ Bot API has separate endpoints for channels, C2C, and groups. + # We try them in order: channel first, then fallback to C2C. headers = { "Authorization": f"QQBot {access_token}", "Content-Type": "application/json", } - url = f"https://api.sgroup.qq.com/channels/{chat_id}/messages" payload = {"content": message[:4000], "msg_type": 0} + # Try channel endpoint first (works for guild channels) + url = f"https://api.sgroup.qq.com/channels/{chat_id}/messages" resp = await client.post(url, json=payload, headers=headers) if resp.status_code in (200, 201): data = resp.json() return {"success": True, "platform": "qqbot", "chat_id": chat_id, "message_id": data.get("id")} - else: - return _error(f"QQBot send failed: {resp.status_code} {resp.text}") + + # If channel endpoint failed (likely "频道不存在"), try C2C endpoint + url_c2c = f"https://api.sgroup.qq.com/v2/users/{chat_id}/messages" + resp_c2c = await client.post(url_c2c, json=payload, headers=headers) + if resp_c2c.status_code in (200, 201): + data = resp_c2c.json() + return {"success": True, "platform": "qqbot", "chat_id": chat_id, + "message_id": data.get("id")} + + # If C2C also failed, try group endpoint + url_group = f"https://api.sgroup.qq.com/v2/groups/{chat_id}/messages" + resp_group = await client.post(url_group, json=payload, headers=headers) + if resp_group.status_code in (200, 201): + data = resp_group.json() + return {"success": True, "platform": "qqbot", "chat_id": chat_id, + "message_id": data.get("id")} + + # All endpoints failed — return the most informative error + return _error(f"QQBot send failed: channel={resp.status_code} c2c={resp_c2c.status_code} group={resp_group.status_code}") except Exception as e: return _error(f"QQBot send failed: {e}") diff --git a/tools/session_search_tool.py b/tools/session_search_tool.py index f770fe8886..efc450b322 100644 --- a/tools/session_search_tool.py +++ b/tools/session_search_tool.py @@ -3,7 +3,9 @@ Session Search Tool - Long-Term Conversation Recall Searches past session transcripts in SQLite via FTS5, then summarizes the top -matching sessions using a cheap/fast model (same pattern as web_extract). +matching sessions using the configured auxiliary session_search model (same +pattern as web_extract). By default, auxiliary "auto" routing uses the main +chat provider/model unless the user overrides auxiliary.session_search. Returns focused summaries of past conversations rather than raw transcripts, keeping the main model's context window clean. @@ -11,7 +13,7 @@ Flow: 1. FTS5 search finds matching messages ranked by relevance 2. Groups by session, takes the top N unique sessions (default 3) 3. Loads each session's conversation, truncates to ~100k chars centered on matches - 4. Sends to Gemini Flash with a focused summarization prompt + 4. Sends to the configured auxiliary model with a focused summarization prompt 5. Returns per-session summaries with metadata """ @@ -330,7 +332,8 @@ def session_search( """ Search past sessions and return focused summaries of matching conversations. - Uses FTS5 to find matches, then summarizes the top sessions with Gemini Flash. + Uses FTS5 to find matches, then summarizes the top sessions with the + configured auxiliary session_search model. The current session is excluded from results since the agent already has that context. """ if db is None: @@ -483,7 +486,7 @@ def session_search( }, ensure_ascii=False) summaries = [] - for (session_id, match_info, conversation_text, _), result in zip(tasks, results): + for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results): if isinstance(result, Exception): logging.warning( "Failed to summarize session %s: %s", @@ -491,11 +494,18 @@ def session_search( ) result = None + # Prefer resolved parent session metadata over FTS5 match metadata. + # match_info carries source/model from the *child* session that contained + # the FTS5 hit; after _resolve_to_parent() the session_id points to the + # root, so session_meta has the authoritative platform/source for the + # session the user actually cares about (#15909). entry = { "session_id": session_id, - "when": _format_timestamp(match_info.get("session_started")), - "source": match_info.get("source", "unknown"), - "model": match_info.get("model"), + "when": _format_timestamp( + session_meta.get("started_at") or match_info.get("session_started") + ), + "source": session_meta.get("source") or match_info.get("source", "unknown"), + "model": session_meta.get("model") or match_info.get("model"), } if result: diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index d8d44f1a8b..58c3fe3d2d 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -784,10 +784,17 @@ def skill_manage( pass # Curator telemetry: bump patch_count on edit/patch/write_file (the actions # that mutate an existing skill's guidance), drop the record on delete. - # Best-effort; telemetry failures never break the tool. + # Only mark a skill as agent-created when the background self-improvement + # review fork creates it — foreground `skill_manage(create)` calls are + # user-directed, and those skills belong to the user (the curator must + # not touch them). Best-effort; telemetry failures never break the tool. try: - from tools.skill_usage import bump_patch, forget - if action in ("patch", "edit", "write_file", "remove_file"): + from tools.skill_usage import bump_patch, forget, mark_agent_created + from tools.skill_provenance import is_background_review + if action == "create": + if is_background_review(): + mark_agent_created(name) + elif action in ("patch", "edit", "write_file", "remove_file"): bump_patch(name) elif action == "delete": forget(name) diff --git a/tools/skill_provenance.py b/tools/skill_provenance.py new file mode 100644 index 0000000000..9f43efc3fc --- /dev/null +++ b/tools/skill_provenance.py @@ -0,0 +1,78 @@ +"""Skill write-origin provenance — ContextVar for distinguishing agent-sediment skill writes from foreground user-directed writes. + +The curator only consolidates/prunes skills it autonomously created via the +background self-improvement review fork. Skills a user asks a foreground +agent to write belong to the user and must never be auto-curated. + +This module exposes a ContextVar that run_agent.py sets before each tool +loop so tool handlers (e.g. skill_manage create) can check whether they +are executing inside the background-review fork. + +The signal piggybacks on AIAgent._memory_write_origin, which is already +set to "background_review" for review-fork instances (see +_spawn_background_review in run_agent.py) and defaults to "assistant_tool" +for normal (foreground) agents. + +Usage: + from tools.skill_provenance import ( + set_current_write_origin, + reset_current_write_origin, + get_current_write_origin, + ) + + token = set_current_write_origin("background_review") + try: + ... # tool runs here + finally: + reset_current_write_origin(token) + + # inside a tool: + if get_current_write_origin() == "background_review": + mark_agent_created(skill_name) +""" + +import contextvars + + +_write_origin: contextvars.ContextVar[str] = contextvars.ContextVar( + "skill_write_origin", + default="foreground", +) + +# The sentinel value the background review fork uses; mirrors +# run_agent.py's AIAgent._memory_write_origin override in +# _spawn_background_review(). +BACKGROUND_REVIEW = "background_review" + + +def set_current_write_origin(origin: str) -> contextvars.Token[str]: + """Bind the active write origin to the current context. + + Returns a Token the caller must pass to reset_current_write_origin + in a finally block. + """ + return _write_origin.set(origin or "foreground") + + +def reset_current_write_origin(token: contextvars.Token[str]) -> None: + """Restore the prior write origin context.""" + _write_origin.reset(token) + + +def get_current_write_origin() -> str: + """Return the active write origin. + + Default: "foreground" — any tool call made by a regular (non-review) + agent, from the CLI, the gateway, cron, or a subagent. + + "background_review" — the self-improvement review fork; only skills + created under this origin should be marked agent-created for curator + management. + """ + return _write_origin.get() + + +def is_background_review() -> bool: + """Convenience: True iff the current write origin is the background + review fork.""" + return get_current_write_origin() == BACKGROUND_REVIEW diff --git a/tools/skill_usage.py b/tools/skill_usage.py index 8a4a1aa425..0491f1d8b1 100644 --- a/tools/skill_usage.py +++ b/tools/skill_usage.py @@ -11,8 +11,9 @@ Design notes: - Atomic writes via tempfile + os.replace (same pattern as .bundled_manifest). - All counter bumps are best-effort: failures log at DEBUG and return silently. A broken sidecar never breaks the underlying tool call. - - Provenance filter: "agent-created" == not in .bundled_manifest AND not in - .hub/lock.json. The curator only ever mutates agent-created skills. + - Provenance filter: curator-managed skills are explicitly marked when + created through skill_manage. Bundled / hub-installed skills stay + off-limits, and manually authored skills are not inferred from location. Lifecycle states: active -> default @@ -149,11 +150,13 @@ def _read_hub_installed_names() -> Set[str]: def list_agent_created_skill_names() -> List[str]: - """Enumerate skills that were authored by the agent (or user), NOT by a - bundled or hub-installed source. + """Enumerate skills explicitly authored by the agent. - The curator operates exclusively on this set. Bundled / hub skills are - maintained by their upstream sources and must never be pruned here. + The curator operates exclusively on this set. Skills are only eligible + after ``skill_manage(action="create")`` marks them in ``.usage.json``; + manually authored skills must not be inferred from filesystem location. + Bundled / hub skills are maintained by their upstream sources and must + never be pruned here. """ base = _skills_dir() if not base.exists(): @@ -161,6 +164,7 @@ def list_agent_created_skill_names() -> List[str]: bundled = _read_bundled_manifest_names() hub = _read_hub_installed_names() off_limits = bundled | hub + usage = load_usage() names: List[str] = [] # Top-level SKILL.md files (flat layout) AND nested category/skill/SKILL.md @@ -176,6 +180,8 @@ def list_agent_created_skill_names() -> List[str]: name = _read_skill_name(skill_md, fallback=skill_md.parent.name) if name in off_limits: continue + if not _is_curator_managed_record(usage.get(name)): + continue names.append(name) return sorted(set(names)) @@ -207,12 +213,20 @@ def is_agent_created(skill_name: str) -> bool: return skill_name not in off_limits +def _is_curator_managed_record(record: Any) -> bool: + """Return True when a usage record opts a skill into curator management.""" + if not isinstance(record, dict): + return False + return record.get("created_by") == "agent" or record.get("agent_created") is True + + # --------------------------------------------------------------------------- # Sidecar I/O # --------------------------------------------------------------------------- def _empty_record() -> Dict[str, Any]: return { + "created_by": None, "use_count": 0, "view_count": 0, "last_used_at": None, @@ -287,9 +301,8 @@ def _mutate(skill_name: str, mutator) -> None: """Load, apply *mutator(record)* in place, save. Best-effort. Bundled and hub-installed skills are NEVER recorded in the sidecar. - This keeps .usage.json focused on agent-created skills (the only ones - the curator considers) and prevents stale counters from hanging around - for upstream-managed skills. + Local manual skills may still accrue usage telemetry, but they only + become curator-managed when ``created_by`` is explicitly marked. """ if not skill_name: return @@ -336,6 +349,17 @@ def bump_patch(skill_name: str) -> None: _mutate(skill_name, _apply) +def mark_agent_created(skill_name: str) -> None: + """Opt a skill created by skill_manage into curator management. + + Viewing or invoking a manually authored skill may still create telemetry, + but only this explicit marker makes it eligible for automatic curation. + """ + def _apply(rec: Dict[str, Any]) -> None: + rec["created_by"] = "agent" + _mutate(skill_name, _apply) + + def set_state(skill_name: str, state: str) -> None: """Set lifecycle state. No-op if *state* is invalid.""" if state not in _VALID_STATES: diff --git a/tools/skills_hub.py b/tools/skills_hub.py index 0ce1d9b34e..aaeabd2c28 100644 --- a/tools/skills_hub.py +++ b/tools/skills_hub.py @@ -2801,7 +2801,11 @@ def bundle_content_hash(bundle: SkillBundle) -> str: """Compute a deterministic hash for an in-memory skill bundle.""" h = hashlib.sha256() for rel_path in sorted(bundle.files): - h.update(bundle.files[rel_path].encode("utf-8")) + content = bundle.files[rel_path] + if isinstance(content, bytes): + h.update(content) + else: + h.update(content.encode("utf-8")) return f"sha256:{h.hexdigest()[:16]}" diff --git a/tools/vision_tools.py b/tools/vision_tools.py index 233b737272..611e6bcef6 100644 --- a/tools/vision_tools.py +++ b/tools/vision_tools.py @@ -440,6 +440,8 @@ async def vision_analyze_tool( - For local file paths, the file is used directly and NOT deleted - Supports common image formats (JPEG, PNG, GIF, WebP, etc.) """ + if not isinstance(user_prompt, str): + user_prompt = str(user_prompt) if user_prompt is not None else "" debug_call_data = { "parameters": { "image_url": image_url, @@ -801,3 +803,366 @@ registry.register( is_async=True, emoji="👁️", ) + + +# --------------------------------------------------------------------------- +# Video Analysis Tool +# --------------------------------------------------------------------------- + +# Extension → MIME. avi/mkv fall back to mp4. +_VIDEO_MIME_TYPES = { + ".mp4": "video/mp4", + ".webm": "video/webm", + ".mov": "video/mov", + ".avi": "video/mp4", + ".mkv": "video/mp4", + ".mpeg": "video/mpeg", + ".mpg": "video/mpeg", +} + +_MAX_VIDEO_BASE64_BYTES = 50 * 1024 * 1024 # 50 MB hard cap +_VIDEO_SIZE_WARN_BYTES = 20 * 1024 * 1024 + + +def _detect_video_mime_type(video_path: Path) -> Optional[str]: + """Return a video MIME type based on file extension, or None if unsupported.""" + ext = video_path.suffix.lower() + return _VIDEO_MIME_TYPES.get(ext) + + +def _video_to_base64_data_url(video_path: Path, mime_type: Optional[str] = None) -> str: + """Convert a video file to a base64-encoded data URL.""" + data = video_path.read_bytes() + encoded = base64.b64encode(data).decode("ascii") + mime = mime_type or _VIDEO_MIME_TYPES.get(video_path.suffix.lower(), "video/mp4") + return f"data:{mime};base64,{encoded}" + + +async def _download_video(video_url: str, destination: Path, max_retries: int = 3) -> Path: + """Download video from URL with SSRF protection and retry.""" + import asyncio + + destination.parent.mkdir(parents=True, exist_ok=True) + + async def _ssrf_redirect_guard(response): + if response.is_redirect and response.next_request: + redirect_url = str(response.next_request.url) + from tools.url_safety import is_safe_url + if not is_safe_url(redirect_url): + raise ValueError( + f"Blocked redirect to private/internal address: {redirect_url}" + ) + + last_error = None + for attempt in range(max_retries): + try: + blocked = check_website_access(video_url) + if blocked: + raise PermissionError(blocked["message"]) + + async with httpx.AsyncClient( + timeout=60.0, + follow_redirects=True, + event_hooks={"response": [_ssrf_redirect_guard]}, + ) as client: + response = await client.get( + video_url, + headers={ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Accept": "video/*,*/*;q=0.8", + }, + ) + response.raise_for_status() + + cl = response.headers.get("content-length") + if cl and int(cl) > _MAX_VIDEO_BASE64_BYTES: + raise ValueError( + f"Video too large ({int(cl)} bytes, max {_MAX_VIDEO_BASE64_BYTES})" + ) + + final_url = str(response.url) + blocked = check_website_access(final_url) + if blocked: + raise PermissionError(blocked["message"]) + + body = response.content + if len(body) > _MAX_VIDEO_BASE64_BYTES: + raise ValueError( + f"Video too large ({len(body)} bytes, max {_MAX_VIDEO_BASE64_BYTES})" + ) + destination.write_bytes(body) + + return destination + except Exception as e: + last_error = e + if attempt < max_retries - 1: + wait_time = 2 ** (attempt + 1) + logger.warning("Video download failed (attempt %s/%s): %s", attempt + 1, max_retries, str(e)[:50]) + await asyncio.sleep(wait_time) + else: + logger.error( + "Video download failed after %s attempts: %s", + max_retries, str(e)[:100], exc_info=True, + ) + + if last_error is None: + raise RuntimeError( + f"_download_video exited retry loop without attempting (max_retries={max_retries})" + ) + raise last_error + + +async def video_analyze_tool( + video_url: str, + user_prompt: str, + model: str = None, +) -> str: + """Analyze a video via multimodal LLM. Returns JSON {success, analysis}.""" + if not isinstance(user_prompt, str): + user_prompt = str(user_prompt) if user_prompt is not None else "" + debug_call_data = { + "parameters": { + "video_url": video_url, + "user_prompt": user_prompt[:200] + "..." if len(user_prompt) > 200 else user_prompt, + "model": model, + }, + "error": None, + "success": False, + "analysis_length": 0, + "model_used": model, + "video_size_bytes": 0, + } + + temp_video_path = None + should_cleanup = True + + try: + from tools.interrupt import is_interrupted + if is_interrupted(): + return tool_error("Interrupted", success=False) + + logger.info("Analyzing video: %s", video_url[:60]) + logger.info("User prompt: %s", user_prompt[:100]) + + # Resolve local path vs remote URL + resolved_url = video_url + if resolved_url.startswith("file://"): + resolved_url = resolved_url[len("file://"):] + local_path = Path(os.path.expanduser(resolved_url)) + + if local_path.is_file(): + logger.info("Using local video file: %s", video_url) + temp_video_path = local_path + should_cleanup = False + elif _validate_image_url(video_url): + blocked = check_website_access(video_url) + if blocked: + raise PermissionError(blocked["message"]) + temp_dir = get_hermes_dir("cache/video", "temp_video_files") + temp_video_path = temp_dir / f"temp_video_{uuid.uuid4()}.mp4" + await _download_video(video_url, temp_video_path) + should_cleanup = True + else: + raise ValueError( + "Invalid video source. Provide an HTTP/HTTPS URL or a valid local file path." + ) + + video_size_bytes = temp_video_path.stat().st_size + video_size_mb = video_size_bytes / (1024 * 1024) + logger.info("Video ready (%.1f MB)", video_size_mb) + + detected_mime = _detect_video_mime_type(temp_video_path) + if not detected_mime: + raise ValueError( + f"Unsupported video format: '{temp_video_path.suffix}'. " + f"Supported: {', '.join(sorted(_VIDEO_MIME_TYPES.keys()))}" + ) + + if video_size_bytes > _VIDEO_SIZE_WARN_BYTES: + logger.warning("Video is %.1f MB — may be slow or rejected", video_size_mb) + + video_data_url = _video_to_base64_data_url(temp_video_path, mime_type=detected_mime) + data_size_mb = len(video_data_url) / (1024 * 1024) + + if len(video_data_url) > _MAX_VIDEO_BASE64_BYTES: + raise ValueError( + f"Video too large for API: base64 payload is {data_size_mb:.1f} MB " + f"(limit {_MAX_VIDEO_BASE64_BYTES / (1024 * 1024):.0f} MB). " + f"Compress or trim the video and retry." + ) + + debug_call_data["video_size_bytes"] = video_size_bytes + + messages = [ + { + "role": "user", + "content": [ + { + "type": "text", + "text": user_prompt, + }, + { + "type": "video_url", + "video_url": { + "url": video_data_url, + }, + }, + ], + } + ] + + vision_timeout = 180.0 + vision_temperature = 0.1 + try: + from hermes_cli.config import cfg_get, load_config + _cfg = load_config() + _vision_cfg = cfg_get(_cfg, "auxiliary", "vision", default={}) + _vt = _vision_cfg.get("timeout") + if _vt is not None: + vision_timeout = max(float(_vt), 180.0) + _vtemp = _vision_cfg.get("temperature") + if _vtemp is not None: + vision_temperature = float(_vtemp) + except Exception: + pass + + call_kwargs = { + "task": "vision", + "messages": messages, + "temperature": vision_temperature, + "max_tokens": 4000, + "timeout": vision_timeout, + } + if model: + call_kwargs["model"] = model + + response = await async_call_llm(**call_kwargs) + analysis = extract_content_or_reasoning(response) + + if not analysis: + logger.warning("Empty video response, retrying once") + response = await async_call_llm(**call_kwargs) + analysis = extract_content_or_reasoning(response) + + analysis_length = len(analysis) if analysis else 0 + logger.info("Video analysis completed (%s characters)", analysis_length) + + result = { + "success": True, + "analysis": analysis or "There was a problem with the request and the video could not be analyzed.", + } + + debug_call_data["success"] = True + debug_call_data["analysis_length"] = analysis_length + _debug.log_call("video_analyze_tool", debug_call_data) + _debug.save() + + return json.dumps(result, indent=2, ensure_ascii=False) + + except Exception as e: + error_msg = f"Error analyzing video: {str(e)}" + logger.error("%s", error_msg, exc_info=True) + + err_str = str(e).lower() + if any(hint in err_str for hint in ( + "402", "insufficient", "payment required", "credits", "billing", + )): + analysis = ( + "Insufficient credits or payment required. Please top up your " + f"API provider account and try again. Error: {e}" + ) + elif any(hint in err_str for hint in ( + "does not support", "not support video", + "content_policy", "multimodal", + "unrecognized request argument", "video input", + "video_url", + )): + analysis = ( + f"The model does not support video analysis or the request was " + f"rejected. Ensure you're using a video-capable model " + f"(e.g. google/gemini-2.5-flash). Error: {e}" + ) + elif any(hint in err_str for hint in ( + "too large", "payload", "413", "content_too_large", + "request_too_large", "exceeds", "size limit", + )): + analysis = ( + "The video is too large for the API. Try compressing or trimming " + f"the video (max ~50 MB). Error: {e}" + ) + else: + analysis = ( + "There was a problem with the request and the video could not " + f"be analyzed. Error: {e}" + ) + + result = { + "success": False, + "error": error_msg, + "analysis": analysis, + } + + debug_call_data["error"] = error_msg + _debug.log_call("video_analyze_tool", debug_call_data) + _debug.save() + + return json.dumps(result, indent=2, ensure_ascii=False) + + finally: + if should_cleanup and temp_video_path and temp_video_path.exists(): + try: + temp_video_path.unlink() + logger.debug("Cleaned up temporary video file") + except Exception as cleanup_error: + logger.warning( + "Could not delete temporary file: %s", cleanup_error, exc_info=True + ) + + +VIDEO_ANALYZE_SCHEMA = { + "name": "video_analyze", + "description": ( + "Analyze a video from a URL or local file path using a multimodal AI model. " + "Sends the video to a video-capable model (e.g. Gemini) for understanding. " + "Use this for video files — for images, use vision_analyze instead. " + "Supports mp4, webm, mov, avi, mkv, mpeg formats. " + "Note: large videos (>20 MB) may be slow; max ~50 MB." + ), + "parameters": { + "type": "object", + "properties": { + "video_url": { + "type": "string", + "description": "Video URL (http/https) or local file path to analyze.", + }, + "question": { + "type": "string", + "description": "Your specific question about the video. The AI will describe what happens in the video and answer your question.", + }, + }, + "required": ["video_url", "question"], + }, +} + + +def _handle_video_analyze(args: Dict[str, Any], **kw: Any) -> Awaitable[str]: + video_url = args.get("video_url", "") + question = args.get("question", "") + full_prompt = ( + "Fully describe and explain everything happening in this video, " + "including visual content, motion, audio cues, text overlays, and scene " + f"transitions. Then answer the following question:\n\n{question}" + ) + model = os.getenv("AUXILIARY_VIDEO_MODEL", "").strip() or os.getenv("AUXILIARY_VISION_MODEL", "").strip() or None + return video_analyze_tool(video_url, full_prompt, model) + + +registry.register( + name="video_analyze", + toolset="video", + schema=VIDEO_ANALYZE_SCHEMA, + handler=_handle_video_analyze, + check_fn=check_vision_requirements, + is_async=True, + emoji="🎬", +) diff --git a/toolsets.py b/toolsets.py index 57e226d3c0..2a77f615ce 100644 --- a/toolsets.py +++ b/toolsets.py @@ -89,6 +89,12 @@ TOOLSETS = { "tools": ["vision_analyze"], "includes": [] }, + + "video": { + "description": "Video analysis and understanding tools (opt-in, not in default toolset)", + "tools": ["video_analyze"], + "includes": [] + }, "image_gen": { "description": "Creative generation tools (images)", diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 605603e041..27fc3a5380 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -2722,6 +2722,12 @@ def _(rid, params: dict) -> dict: unregister_gateway_notify(session["session_key"]) except Exception: pass + try: + agent = session.get("agent") + if agent and hasattr(agent, "close"): + agent.close() + except Exception: + pass try: worker = session.get("slash_worker") if worker: @@ -5606,9 +5612,13 @@ def _(rid, params: dict) -> dict: return _err(rid, 4004, "empty command") # Skill slash commands and _pending_input commands must NOT go through the - # slash worker — see _PENDING_INPUT_COMMANDS definition above. - _cmd_parts = cmd.split() if not cmd.startswith("/") else cmd.lstrip("/").split() - _cmd_base = _cmd_parts[0] if _cmd_parts else "" + # slash worker — see _PENDING_INPUT_COMMANDS definition above. Plugin + # commands must also avoid the worker, but unlike skills/pending-input they + # still return normal slash.exec output so the TUI keeps the pager path. + _cmd_text = cmd.lstrip("/") if cmd.startswith("/") else cmd + _cmd_parts = _cmd_text.split(maxsplit=1) + _cmd_base = (_cmd_parts[0] if _cmd_parts else "").lower() + _cmd_arg = _cmd_parts[1] if len(_cmd_parts) > 1 else "" if _cmd_base in _PENDING_INPUT_COMMANDS: return _err( @@ -5626,6 +5636,27 @@ def _(rid, params: dict) -> dict: except Exception: pass + plugin_handler = None + resolve_plugin_command_result = None + if _cmd_base: + try: + from hermes_cli.plugins import ( + get_plugin_command_handler, + resolve_plugin_command_result, + ) + + plugin_handler = get_plugin_command_handler(_cmd_base) + except Exception: + plugin_handler = None + resolve_plugin_command_result = None + + if plugin_handler and resolve_plugin_command_result: + try: + result = resolve_plugin_command_result(plugin_handler(_cmd_arg)) + return _ok(rid, {"output": str(result or "(no output)")}) + except Exception as e: + return _ok(rid, {"output": f"Plugin command error: {e}"}) + worker = session.get("slash_worker") if not worker: try: diff --git a/ui-tui/package-lock.json b/ui-tui/package-lock.json index 2efd64fe40..0677e8bdc1 100644 --- a/ui-tui/package-lock.json +++ b/ui-tui/package-lock.json @@ -12,6 +12,7 @@ "@nanostores/react": "^1.1.0", "ink": "^6.8.0", "ink-text-input": "^6.0.0", + "nanostores": "^1.2.0", "react": "^19.2.4", "unicode-animations": "^1.0.3" }, @@ -5303,7 +5304,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": "^20.0.0 || >=22.0.0" } diff --git a/ui-tui/package.json b/ui-tui/package.json index 061e3bc448..2bb1616a0a 100644 --- a/ui-tui/package.json +++ b/ui-tui/package.json @@ -21,6 +21,7 @@ "@nanostores/react": "^1.1.0", "ink": "^6.8.0", "ink-text-input": "^6.0.0", + "nanostores": "^1.2.0", "react": "^19.2.4", "unicode-animations": "^1.0.3" }, diff --git a/ui-tui/packages/hermes-ink/index.js b/ui-tui/packages/hermes-ink/index.js index 758fef3073..8c0fa9c5b5 100644 --- a/ui-tui/packages/hermes-ink/index.js +++ b/ui-tui/packages/hermes-ink/index.js @@ -1 +1 @@ -export * from './dist/ink-bundle.js' +export * from './dist/entry-exports.js' diff --git a/ui-tui/packages/hermes-ink/package.json b/ui-tui/packages/hermes-ink/package.json index 8e23491310..8df3c02a4a 100644 --- a/ui-tui/packages/hermes-ink/package.json +++ b/ui-tui/packages/hermes-ink/package.json @@ -4,7 +4,7 @@ "private": true, "type": "module", "scripts": { - "build": "esbuild src/entry-exports.ts --bundle --platform=node --format=esm --packages=external --outfile=dist/ink-bundle.js" + "build": "esbuild src/entry-exports.ts --bundle --platform=node --format=esm --packages=external --outdir=dist" }, "sideEffects": true, "main": "./index.js", diff --git a/ui-tui/src/__tests__/messages.test.ts b/ui-tui/src/__tests__/messages.test.ts index 1da4bfd4ae..1ad2b788df 100644 --- a/ui-tui/src/__tests__/messages.test.ts +++ b/ui-tui/src/__tests__/messages.test.ts @@ -1,7 +1,13 @@ +import { renderSync } from '@hermes/ink' +import React from 'react' +import { PassThrough } from 'stream' import { describe, expect, it } from 'vitest' +import { MessageLine } from '../components/messageLine.js' import { toTranscriptMessages } from '../domain/messages.js' import { upsert } from '../lib/messages.js' +import { stripAnsi } from '../lib/text.js' +import { DEFAULT_THEME } from '../theme.js' describe('toTranscriptMessages', () => { it('preserves assistant tool-call rows so resume does not drop prior turns', () => { @@ -21,6 +27,50 @@ describe('toTranscriptMessages', () => { }) }) +describe('MessageLine', () => { + it('preserves a separator after compound user prompt glyphs in transcript rows', () => { + const stdout = new PassThrough() + const stdin = new PassThrough() + const stderr = new PassThrough() + let output = '' + + Object.assign(stdout, { columns: 80, isTTY: false, rows: 24 }) + Object.assign(stdin, { isTTY: false }) + Object.assign(stderr, { isTTY: false }) + stdout.on('data', chunk => { + output += chunk.toString() + }) + + const t = { + ...DEFAULT_THEME, + brand: { ...DEFAULT_THEME.brand, prompt: 'Ψ >' } + } + + const instance = renderSync( + React.createElement(MessageLine, { + cols: 80, + msg: { role: 'user', text: 'Okay' }, + t + }), + { + patchConsole: false, + stderr: stderr as NodeJS.WriteStream, + stdin: stdin as NodeJS.ReadStream, + stdout: stdout as NodeJS.WriteStream + } + ) + + instance.unmount() + instance.cleanup() + + const renderedLine = stripAnsi(output) + .split('\n') + .find(line => line.includes('Okay')) + + expect(renderedLine).toContain('Ψ > Okay') + }) +}) + describe('upsert', () => { it('appends when last role differs', () => { expect(upsert([{ role: 'user', text: 'hi' }], 'assistant', 'hello')).toHaveLength(2) diff --git a/ui-tui/src/__tests__/virtualHeights.test.ts b/ui-tui/src/__tests__/virtualHeights.test.ts index 4b05aa3996..f407976db3 100644 --- a/ui-tui/src/__tests__/virtualHeights.test.ts +++ b/ui-tui/src/__tests__/virtualHeights.test.ts @@ -17,6 +17,13 @@ describe('virtual height estimates', () => { expect(estimatedMsgHeight(msg, 35, { compact: false, details: false })).toBeGreaterThan(5) }) + it('uses compound user prompt width when estimating user message wrapping', () => { + const msg: Msg = { role: 'user', text: 'x'.repeat(21) } + + expect(estimatedMsgHeight(msg, 26, { compact: false, details: false, userPrompt: '❯' })).toBe(3) + expect(estimatedMsgHeight(msg, 26, { compact: false, details: false, userPrompt: 'Ψ >' })).toBe(4) + }) + it('includes detail sections when visible', () => { const msg: Msg = { role: 'assistant', text: 'ok', thinking: 'line 1\nline 2', tools: ['Tool A', 'Tool B'] } diff --git a/ui-tui/src/app/useMainApp.ts b/ui-tui/src/app/useMainApp.ts index 9ec18337bb..218654f531 100644 --- a/ui-tui/src/app/useMainApp.ts +++ b/ui-tui/src/app/useMainApp.ts @@ -17,6 +17,7 @@ import type { import { useGitBranch } from '../hooks/useGitBranch.js' import { useVirtualHistory } from '../hooks/useVirtualHistory.js' import { appendTranscriptMessage } from '../lib/messages.js' +import { composerPromptWidth } from '../lib/inputMetrics.js' import { isMac } from '../lib/platform.js' import { asRpcResult, rpcErrorMessage } from '../lib/rpc.js' import { terminalParityHints } from '../lib/terminalParity.js' @@ -244,7 +245,8 @@ export function useMainApp(gw: GatewayClient) { }, [ui.detailsMode, ui.detailsModeCommandOverride, ui.sections]) const detailsVisible = detailsLayoutKey !== 'hidden:hidden' - const heightCacheKey = `${ui.sid ?? 'draft'}:${cols}:${ui.compact ? '1' : '0'}:${detailsLayoutKey}` + const userPromptWidth = composerPromptWidth(ui.theme.brand.prompt) + const heightCacheKey = `${ui.sid ?? 'draft'}:${cols}:${userPromptWidth}:${ui.compact ? '1' : '0'}:${detailsLayoutKey}` const heightCache = useMemo(() => { let cache = heightCachesRef.current.get(heightCacheKey) @@ -266,9 +268,10 @@ export function useMainApp(gw: GatewayClient) { estimatedMsgHeight(virtualRows[index]!.msg, cols, { compact: ui.compact, details: detailsVisible, - limitHistory: index < virtualRows.length - FULL_RENDER_TAIL_ITEMS + limitHistory: index < virtualRows.length - FULL_RENDER_TAIL_ITEMS, + userPrompt: ui.theme.brand.prompt }), - [cols, detailsVisible, ui.compact, virtualRows] + [cols, detailsVisible, ui.compact, ui.theme.brand.prompt, virtualRows] ) const syncHeightCache = useCallback( @@ -358,6 +361,13 @@ export function useMainApp(gw: GatewayClient) { const die = useCallback(() => { gw.kill() exit() + // Ink's exit() calls unmount() which resets terminal modes but does NOT + // call process.exit(). Without an explicit exit the Node process stays + // alive (stdin listener keeps the event loop open), so the process.on('exit') + // handler in entry.tsx — which sends the final resetTerminalModes() — never + // fires. This leaves kitty keyboard protocol, mouse modes, etc. enabled + // in the parent shell. See issue #19194. + process.exit(0) }, [exit, gw]) const session = useSessionLifecycle({ diff --git a/ui-tui/src/components/messageLine.tsx b/ui-tui/src/components/messageLine.tsx index 0bf9ba6d9b..7bdfb443b7 100644 --- a/ui-tui/src/components/messageLine.tsx +++ b/ui-tui/src/components/messageLine.tsx @@ -5,6 +5,7 @@ import { LONG_MSG } from '../config/limits.js' import { sectionMode } from '../domain/details.js' import { userDisplay } from '../domain/messages.js' import { ROLE } from '../domain/roles.js' +import { transcriptBodyWidth, transcriptGutterWidth } from '../lib/inputMetrics.js' import { boundedHistoryRenderText, boundedLiveRenderText, @@ -95,6 +96,7 @@ export const MessageLine = memo(function MessageLine({ } const { body, glyph, prefix } = ROLE[msg.role](t) + const gutterWidth = transcriptGutterWidth(msg.role, t.brand.prompt) const showDetails = (toolsMode !== 'hidden' && Boolean(msg.tools?.length)) || (thinkingMode !== 'hidden' && Boolean(thinking)) @@ -163,13 +165,13 @@ export const MessageLine = memo(function MessageLine({ )} <Box> - <NoSelect flexShrink={0} fromLeftEdge width={3}> + <NoSelect flexShrink={0} fromLeftEdge width={gutterWidth}> <Text bold={msg.role === 'user'} color={prefix}> {glyph}{' '} </Text> </NoSelect> - <Box width={Math.max(20, cols - 5)}>{content}</Box> + <Box width={transcriptBodyWidth(cols, msg.role, t.brand.prompt)}>{content}</Box> </Box> </Box> ) diff --git a/ui-tui/src/lib/inputMetrics.ts b/ui-tui/src/lib/inputMetrics.ts index 245baae96f..b5645b4331 100644 --- a/ui-tui/src/lib/inputMetrics.ts +++ b/ui-tui/src/lib/inputMetrics.ts @@ -1,5 +1,7 @@ import { stringWidth } from '@hermes/ink' +import type { Role } from '../types.js' + export const COMPOSER_PROMPT_GAP_WIDTH = 1 let _seg: Intl.Segmenter | null = null @@ -162,6 +164,14 @@ export function composerPromptWidth(promptText: string) { return Math.max(1, stringWidth(promptText)) + COMPOSER_PROMPT_GAP_WIDTH } +export function transcriptGutterWidth(role: Role, userPrompt: string) { + return role === 'user' ? composerPromptWidth(userPrompt) : 3 +} + +export function transcriptBodyWidth(totalCols: number, role: Role, userPrompt: string) { + return Math.max(20, totalCols - transcriptGutterWidth(role, userPrompt) - 2) +} + export function stableComposerColumns(totalCols: number, promptWidth: number) { // Physical render/wrap width. Always reserve outer composer padding and // prompt prefix. Only reserve the transcript scrollbar gutter when the diff --git a/ui-tui/src/lib/virtualHeights.ts b/ui-tui/src/lib/virtualHeights.ts index 0c673fd93a..e9439d42dd 100644 --- a/ui-tui/src/lib/virtualHeights.ts +++ b/ui-tui/src/lib/virtualHeights.ts @@ -1,5 +1,6 @@ import type { Msg } from '../types.js' +import { transcriptBodyWidth } from './inputMetrics.js' import { boundedHistoryRenderText } from './text.js' const hashText = (text: string) => { @@ -38,7 +39,12 @@ export const wrappedLines = (text: string, width: number) => { export const estimatedMsgHeight = ( msg: Msg, cols: number, - { compact, details, limitHistory = false }: { compact: boolean; details: boolean; limitHistory?: boolean } + { + compact, + details, + limitHistory = false, + userPrompt = '' + }: { compact: boolean; details: boolean; limitHistory?: boolean; userPrompt?: string } ) => { if (msg.kind === 'intro') { return msg.info?.version ? 9 : 5 @@ -56,7 +62,7 @@ export const estimatedMsgHeight = ( return Math.max(2, msg.todos.length + 2) } - const bodyWidth = Math.max(20, cols - 5) + const bodyWidth = transcriptBodyWidth(cols, msg.role, userPrompt) const text = msg.role === 'assistant' && limitHistory ? boundedHistoryRenderText(msg.text) : msg.text let h = wrappedLines(text || ' ', bodyWidth) diff --git a/website/docs/developer-guide/adding-tools.md b/website/docs/developer-guide/adding-tools.md index f1ab79f31e..6bd4c7cca4 100644 --- a/website/docs/developer-guide/adding-tools.md +++ b/website/docs/developer-guide/adding-tools.md @@ -8,6 +8,18 @@ description: "How to add a new tool to Hermes Agent — schemas, handlers, regis Before writing a tool, ask yourself: **should this be a [skill](creating-skills.md) instead?** +:::warning Built-in Core Tools Only +This page is for adding a **built-in Hermes tool** to the repository itself. +If you want a personal, project-local, or otherwise custom tool without +modifying Hermes core, use the plugin route instead: + +- [Plugins](/docs/user-guide/features/plugins) +- [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin) + +Default to plugins for most custom tool creation. Only follow this page when +you explicitly want to ship a new built-in tool in `tools/` and `toolsets.py`. +::: + Make it a **Skill** when the capability can be expressed as instructions + shell commands + existing tools (arXiv search, git workflows, Docker management, PDF processing). Make it a **Tool** when it requires end-to-end integration with API keys, custom processing logic, binary data handling, or streaming (browser automation, TTS, vision analysis). @@ -21,7 +33,7 @@ Adding a tool touches **2 files**: Any `tools/*.py` file with a top-level `registry.register()` call is auto-discovered at startup — no manual import list required. -## Step 1: Create the Tool File +## Step 1: Create the Built-in Tool File Every tool file follows the same structure: @@ -106,7 +118,7 @@ registry.register( - The `handler` receives `(args: dict, **kwargs)` where `args` is the LLM's tool call arguments ::: -## Step 2: Add to a Toolset +## Step 2: Add the Built-in Tool to a Toolset In `toolsets.py`, add the tool name: @@ -192,6 +204,7 @@ OPTIONAL_ENV_VARS = { - [ ] Tool file created with handler, schema, check function, and registration - [ ] Added to appropriate toolset in `toolsets.py` +- [ ] Confirmed this really should be a built-in/core tool and not a plugin - [ ] Handler returns JSON strings, errors returned as `{"error": "..."}` - [ ] Optional: API key added to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` - [ ] Optional: Added to `toolset_distributions.py` for batch processing diff --git a/website/docs/developer-guide/contributing.md b/website/docs/developer-guide/contributing.md index f75fd85ebb..8cfa618ad6 100644 --- a/website/docs/developer-guide/contributing.md +++ b/website/docs/developer-guide/contributing.md @@ -22,7 +22,8 @@ We value contributions in this order: ## Common contribution paths -- Building a new tool? Start with [Adding Tools](./adding-tools.md) +- Building a custom/local tool without modifying Hermes core? Start with [Build a Hermes Plugin](../guides/build-a-hermes-plugin.md) +- Building a new built-in core tool for Hermes itself? Start with [Adding Tools](./adding-tools.md) - Building a new skill? Start with [Creating Skills](./creating-skills.md) - Building a new inference provider? Start with [Adding Providers](./adding-providers.md) diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md index 41170ccccd..79953751a1 100644 --- a/website/docs/getting-started/learning-path.md +++ b/website/docs/getting-started/learning-path.md @@ -80,15 +80,18 @@ Cron jobs let Hermes Agent run tasks on a schedule — daily summaries, periodic Extend Hermes Agent with your own tools and reusable skill packages. -1. [Tools Overview](/docs/user-guide/features/tools) -2. [Skills Overview](/docs/user-guide/features/skills) -3. [MCP (Model Context Protocol)](/docs/user-guide/features/mcp) -4. [Architecture](/docs/developer-guide/architecture) -5. [Adding Tools](/docs/developer-guide/adding-tools) -6. [Creating Skills](/docs/developer-guide/creating-skills) +1. [Plugins](/docs/user-guide/features/plugins) +2. [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin) +3. [Tools Overview](/docs/user-guide/features/tools) +4. [Skills Overview](/docs/user-guide/features/skills) +5. [MCP (Model Context Protocol)](/docs/user-guide/features/mcp) +6. [Architecture](/docs/developer-guide/architecture) +7. [Adding Tools](/docs/developer-guide/adding-tools) +8. [Creating Skills](/docs/developer-guide/creating-skills) :::tip -Tools are individual functions the agent can call. Skills are bundles of tools, prompts, and configuration packaged together. Start with tools, graduate to skills. +For most custom tool creation, start with plugins. The [Adding Tools](/docs/developer-guide/adding-tools) +page is for built-in Hermes core development, not the usual user/custom-tool path. ::: ### "I want to train models" diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 862c51606e..4f307f15e7 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -340,32 +340,64 @@ hermes cron <list|create|edit|pause|resume|run|remove|status|tick> ## `hermes kanban` ```bash -hermes kanban <action> [options] +hermes kanban [--board <slug>] <action> [options] ``` -Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode SQLite); every profile reads and writes the same board. A `cron`-driven dispatcher (`hermes kanban dispatch`) atomically claims ready tasks and spawns the assigned profile as its own process with an isolated workspace. +Multi-profile, multi-project collaboration board. Each install can host many boards (one per project, repo, or domain); each board is a standalone queue with its own SQLite DB and dispatcher scope. New installs start with one board called `default`, whose DB is `~/.hermes/kanban.db` for back-compat; additional boards live at `~/.hermes/kanban/boards/<slug>/kanban.db`. The gateway-embedded dispatcher sweeps every board per tick. + +**Global flags (apply to every action below):** + +| Flag | Purpose | +|------|---------| +| `--board <slug>` | Operate on a specific board. Defaults to the current board (set via `hermes kanban boards switch`, the `HERMES_KANBAN_BOARD` env var, or `default`). | + +**This is the human / scripting surface.** Agent workers spawned by the dispatcher drive the board through a dedicated `kanban_*` [toolset](/docs/user-guide/features/kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_create`, `kanban_link`, `kanban_comment`, `kanban_heartbeat`) instead of shelling to `hermes kanban`. Workers have `HERMES_KANBAN_BOARD` pinned in their env so they physically cannot see other boards. | Action | Purpose | |--------|---------| | `init` | Create `kanban.db` if missing. Idempotent. | -| `create "<title>"` | Create a new task. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`. | -| `list` / `ls` | List tasks. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. | +| `boards list` / `boards ls` | List all boards with task counts. `--json`, `--all` (include archived). | +| `boards create <slug>` | Create a new board. Flags: `--name`, `--description`, `--icon`, `--color`, `--switch` (make active). Slug is kebab-case, auto-downcased. | +| `boards switch <slug>` / `boards use` | Persist `<slug>` as the active board (writes `~/.hermes/kanban/current`). | +| `boards show` / `boards current` | Print the currently-active board's name, DB path, and task counts. | +| `boards rename <slug> "<name>"` | Change a board's display name. Slug is immutable. | +| `boards rm <slug>` | Archive (default) or hard-delete a board. `--delete` skips the archive step. Archived boards move to `boards/_archived/<slug>-<ts>/`. Refused for `default`. | +| `create "<title>"` | Create a new task on the active board. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`, `--triage`, `--idempotency-key`, `--max-runtime`, `--skill` (repeatable). | +| `list` / `ls` | List tasks on the active board. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. | | `show <id>` | Show a task with comments and events. `--json` for machine output. | | `assign <id> <profile>` | Assign or reassign. Use `none` to unassign. Refused while task is running. | -| `link <parent> <child>` | Add a dependency. Cycle-detected. | +| `link <parent> <child>` | Add a dependency. Cycle-detected. Both tasks must be on the same board. | | `unlink <parent> <child>` | Remove a dependency. | | `claim <id>` | Atomically claim a ready task. Prints resolved workspace path. | -| `comment <id> "<text>"` | Append a comment. Visible to the next worker that runs the task. | -| `complete <id>` | Mark task done. Flag: `--result "<summary>"` (goes into children's parent-result context). | +| `comment <id> "<text>"` | Append a comment. The next worker that claims the task reads it as part of its `kanban_show()` response. | +| `complete <id>` | Mark task done. Flags: `--result`, `--summary`, `--metadata`. | | `block <id> "<reason>"` | Mark task blocked. Also appends the reason as a comment. | | `unblock <id>` | Return a blocked task to ready. | | `archive <id>` | Hide from default list. `gc` will remove scratch workspaces. | | `tail <id>` | Follow a task's event stream. | -| `dispatch` | One dispatcher pass. Flags: `--dry-run`, `--max N`, `--json`. | +| `dispatch` | One dispatcher pass on the active board. Flags: `--dry-run`, `--max N`, `--json`. | | `context <id>` | Print the full context a worker would see (title + body + parent results + comments). | | `gc` | Remove scratch workspaces for archived tasks. | -All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface. +Examples: + +```bash +# Create a second board and put a task on it without switching away. +hermes kanban boards create atm10-server --name "ATM10 Server" --icon 🎮 +hermes kanban --board atm10-server create "Restart server" --assignee ops + +# Switch the active board for subsequent calls. +hermes kanban boards switch atm10-server +hermes kanban list # shows atm10-server tasks + +# Archive a board (recoverable) or hard-delete it. +hermes kanban boards rm atm10-server +hermes kanban boards rm atm10-server --delete +``` + +Board resolution order (highest precedence first): `--board <slug>` flag → `HERMES_KANBAN_BOARD` env var → `~/.hermes/kanban/current` file → `default`. + +All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface — including `boards` subcommands and the `--board` flag. For the full design — comparison with Cline Kanban / Paperclip / NanoClaw / Gemini Enterprise, eight collaboration patterns, four user stories, concurrency correctness proof — see `docs/hermes-kanban-v1-spec.pdf` in the repository or the [Kanban user guide](/docs/user-guide/features/kanban). diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 955f460014..9bcda5695e 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -88,6 +88,10 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `HERMES_LOCAL_STT_COMMAND` | Optional local speech-to-text command template. Supports `{input_path}`, `{output_dir}`, `{language}`, and `{model}` placeholders | | `HERMES_LOCAL_STT_LANGUAGE` | Default language passed to `HERMES_LOCAL_STT_COMMAND` or auto-detected local `whisper` CLI fallback (default: `en`) | | `HERMES_HOME` | Override Hermes config directory (default: `~/.hermes`). Also scopes the gateway PID file and systemd service name, so multiple installations can run concurrently | +| `HERMES_KANBAN_HOME` | Override the shared Hermes root that anchors the kanban board (db + workspaces + worker logs). Falls back to `get_default_hermes_root()` (the parent of any active profile). Useful for tests and unusual deployments | +| `HERMES_KANBAN_BOARD` | Pin the active kanban board for this process. Takes precedence over `~/.hermes/kanban/current`; the dispatcher injects this into worker subprocess env so workers physically cannot see tasks on other boards. Defaults to `default`. Slug validation: lowercase alphanumerics + hyphens + underscores, 1-64 chars | +| `HERMES_KANBAN_DB` | Pin the kanban database file path directly (highest precedence; beats `HERMES_KANBAN_BOARD` and `HERMES_KANBAN_HOME`). The dispatcher injects this into worker subprocess env so profile workers converge on the dispatcher's board | +| `HERMES_KANBAN_WORKSPACES_ROOT` | Pin the kanban workspaces root directly (highest precedence for workspaces; beats `HERMES_KANBAN_HOME`). The dispatcher injects this into worker subprocess env | ## Provider Auth (OAuth) @@ -184,7 +188,7 @@ These variables configure the [Tool Gateway](/docs/user-guide/features/tool-gate | `TERMINAL_VERCEL_RUNTIME` | Vercel Sandbox runtime (`node24`, `node22`, `python3.13`) | | `TERMINAL_TIMEOUT` | Command timeout in seconds | | `TERMINAL_LIFETIME_SECONDS` | Max lifetime for terminal sessions in seconds | -| `TERMINAL_CWD` | Working directory for all terminal sessions | +| `TERMINAL_CWD` | Working directory for terminal sessions (gateway/cron only; CLI uses launch dir) | | `SUDO_PASSWORD` | Enable sudo without interactive prompt | For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETIME_SECONDS` controls when Hermes cleans up an idle terminal session, and later resumes may recreate the sandbox rather than keep the same live processes running. diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index ef566cd5ba..ceab9190b8 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -70,6 +70,7 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/skills` | Search, install, inspect, or manage skills from online registries | | `/cron` | Manage scheduled tasks (list, add/create, edit, pause, resume, run, remove) | | `/curator` | Background skill maintenance — `status`, `run`, `pin`, `archive`. See [Curator](/docs/user-guide/features/curator). | +| `/kanban <action>` | Drive the multi-profile, multi-project collaboration board without leaving chat. Full `hermes kanban` surface is available: `/kanban list`, `/kanban show t_abc`, `/kanban create "title" --assignee X`, `/kanban comment t_abc "text"`, `/kanban unblock t_abc`, `/kanban dispatch`, etc. Multi-board support included: `/kanban boards list`, `/kanban boards create <slug>`, `/kanban boards switch <slug>`, `/kanban --board <slug> <action>`. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | | `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config.yaml | | `/reload` | Reload `.env` variables into the running session (picks up new API keys without restarting) | | `/plugins` | List installed plugins and their status | @@ -157,6 +158,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/goal <text>` | Set a standing goal Hermes works toward across turns — our take on the Ralph loop. A judge model checks after each turn; if not done, Hermes auto-continues until it is, you pause/clear it, or the turn budget (default 20) is hit. Subcommands: `/goal status`, `/goal pause`, `/goal resume`, `/goal clear`. Safe to run mid-agent for status/pause/clear; setting a new goal requires `/stop` first. See [Persistent Goals](/docs/user-guide/features/goals). | | `/footer [on\|off\|status]` | Toggle the runtime-metadata footer on final replies (shows model, tool counts, timing). | | `/curator [status\|run\|pin\|archive]` | Background skill maintenance controls. | +| `/kanban <action>` | Drive the multi-profile, multi-project collaboration board from chat — identical argument surface to the CLI. Bypasses the running-agent guard, so `/kanban unblock t_abc`, `/kanban comment t_abc "…"`, `/kanban list --mine`, `/kanban boards switch <slug>`, etc. work mid-turn. `/kanban create …` auto-subscribes the originating chat to the new task's terminal events. See [Kanban slash command](/docs/user-guide/features/kanban#kanban-slash-command). | | `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config. | | `/yolo` | Toggle YOLO mode — skip all dangerous command approval prompts. | | `/commands [page]` | Browse all commands and skills (paginated). | @@ -173,5 +175,5 @@ The messaging gateway supports the following built-in commands inside Telegram, - `/skin`, `/snapshot`, `/gquota`, `/reload`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/statusbar`, `/plugins`, `/busy`, `/indicator`, `/redraw`, `/clear`, `/history`, `/save`, `/copy`, and `/quit` are **CLI-only** commands. - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config. - `/sethome`, `/update`, `/restart`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands. -- `/status`, `/background`, `/queue`, `/steer`, `/voice`, `/reload-mcp`, `/rollback`, `/debug`, `/fast`, `/footer`, `/curator`, and `/yolo` work in **both** the CLI and the messaging gateway. +- `/status`, `/background`, `/queue`, `/steer`, `/voice`, `/reload-mcp`, `/rollback`, `/debug`, `/fast`, `/footer`, `/curator`, `/kanban`, and `/yolo` work in **both** the CLI and the messaging gateway. - `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 18c96b8b18..517cb2e988 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -88,7 +88,7 @@ Hermes supports seven terminal backends. Each determines where the agent's shell ```yaml terminal: backend: local # local | docker | ssh | modal | daytona | vercel_sandbox | singularity - cwd: "." # Working directory ("." = current dir for local, "/root" for containers) + cwd: "." # Gateway/cron working directory (CLI always uses launch dir) timeout: 180 # Per-command timeout in seconds env_passthrough: [] # Env var names to forward to sandboxed execution (terminal + execute_code) singularity_image: "docker://nikolaik/python-nodejs:python3.11-nodejs20" # Container image for Singularity backend diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index 21f8246ace..2a13fe6662 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -45,28 +45,33 @@ Opening any port on an internet facing machine is a security risk. You should no ## Running the dashboard -The built-in web dashboard can run alongside the gateway as a separate container. - -To run the dashboard as its own container, point it at the gateway's health endpoint so it can detect gateway status across containers: +The built-in web dashboard runs as an optional side-process inside the same container as the gateway. Set `HERMES_DASHBOARD=1` and expose port `9119` alongside the gateway's `8642`: ```sh docker run -d \ - --name hermes-dashboard \ + --name hermes \ --restart unless-stopped \ -v ~/.hermes:/opt/data \ + -p 8642:8642 \ -p 9119:9119 \ - -e GATEWAY_HEALTH_URL=http://$HOST_IP:8642 \ - nousresearch/hermes-agent dashboard + -e HERMES_DASHBOARD=1 \ + nousresearch/hermes-agent gateway run ``` -Replace `$HOST_IP` with the IP address of the machine running the gateway container (e.g. `192.168.1.100`), or use a Docker network hostname if both containers share a network (see the [Compose example](#docker-compose-example) below). +The entrypoint starts `hermes dashboard` in the background (running as the non-root `hermes` user) before `exec`-ing the main command. Dashboard output is prefixed with `[dashboard]` in `docker logs` so it's easy to separate from gateway logs. | Environment variable | Description | Default | |---------------------|-------------|---------| -| `GATEWAY_HEALTH_URL` | Base URL of the gateway's API server, e.g. `http://gateway:8642` | *(unset — local PID check only)* | -| `GATEWAY_HEALTH_TIMEOUT` | Health probe timeout in seconds | `3` | +| `HERMES_DASHBOARD` | Set to `1` (or `true` / `yes`) to launch the dashboard alongside the main command | *(unset — dashboard not started)* | +| `HERMES_DASHBOARD_HOST` | Bind address for the dashboard HTTP server | `0.0.0.0` | +| `HERMES_DASHBOARD_PORT` | Port for the dashboard HTTP server | `9119` | +| `HERMES_DASHBOARD_TUI` | Set to `1` to expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket) | *(unset)* | -Without `GATEWAY_HEALTH_URL`, the dashboard falls back to local process detection — which only works when the gateway runs in the same container or on the same host. +The default `HERMES_DASHBOARD_HOST=0.0.0.0` is required for the host to reach the dashboard through the published port; the entrypoint automatically passes `--insecure` to `hermes dashboard` in that case. Override to `127.0.0.1` if you want to restrict the dashboard to in-container access only (e.g. behind a reverse proxy in a sidecar). + +:::note +The dashboard side-process is **not supervised** — if it crashes, it stays down until the container restarts. Running it as a separate container is not supported: the dashboard's gateway-liveness detection requires a shared PID namespace with the gateway process. +::: ## Running interactively (CLI chat) @@ -102,7 +107,7 @@ The `/opt/data` volume is the single source of truth for all Hermes state. It ma | `skins/` | Custom CLI skins | :::warning -Never run two Hermes **gateway** containers against the same data directory simultaneously — session files and memory stores are not designed for concurrent write access. Running a dashboard container alongside the gateway is safe since the dashboard only reads data. +Never run two Hermes **gateway** containers against the same data directory simultaneously — session files and memory stores are not designed for concurrent write access. ::: ## Multi-profile support @@ -188,49 +193,24 @@ services: restart: unless-stopped command: gateway run ports: - - "8642:8642" + - "8642:8642" # gateway API + - "9119:9119" # dashboard (only reached when HERMES_DASHBOARD=1) volumes: - ~/.hermes:/opt/data - networks: - - hermes-net - # Uncomment to forward specific env vars instead of using .env file: - # environment: - # - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} - # - OPENAI_API_KEY=${OPENAI_API_KEY} - # - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN} + environment: + - HERMES_DASHBOARD=1 + # Uncomment to forward specific env vars instead of using .env file: + # - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY} + # - OPENAI_API_KEY=${OPENAI_API_KEY} + # - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN} deploy: resources: limits: memory: 4G cpus: "2.0" - - dashboard: - image: nousresearch/hermes-agent:latest - container_name: hermes-dashboard - restart: unless-stopped - command: dashboard --host 0.0.0.0 --insecure - ports: - - "9119:9119" - volumes: - - ~/.hermes:/opt/data - environment: - - GATEWAY_HEALTH_URL=http://hermes:8642 - networks: - - hermes-net - depends_on: - - hermes - deploy: - resources: - limits: - memory: 512M - cpus: "0.5" - -networks: - hermes-net: - driver: bridge ``` -Start with `docker compose up -d` and view logs with `docker compose logs -f`. +Start with `docker compose up -d` and view logs with `docker compose logs -f`. Dashboard output is prefixed with `[dashboard]` so it's easy to filter from gateway logs. ## Resource limits @@ -273,6 +253,7 @@ The entrypoint script (`docker/entrypoint.sh`) bootstraps the data volume on fir - Copies default `config.yaml` if missing - Copies default `SOUL.md` if missing - Syncs bundled skills using a manifest-based approach (preserves user edits) +- Optionally launches `hermes dashboard` as a background side-process when `HERMES_DASHBOARD=1` (see [Running the dashboard](#running-the-dashboard)) - Then runs `hermes` with whatever arguments you pass ## Upgrading diff --git a/website/docs/user-guide/features/kanban-tutorial.md b/website/docs/user-guide/features/kanban-tutorial.md index 89b5c1c024..f8d9501cb2 100644 --- a/website/docs/user-guide/features/kanban-tutorial.md +++ b/website/docs/user-guide/features/kanban-tutorial.md @@ -10,7 +10,11 @@ hermes dashboard # opens http://127.0.0.1:9119 in your browser # click Kanban in the left nav ``` -The dashboard is the most comfortable place to learn the system. Everything you see here is also available via `hermes kanban <verb>` on the CLI — the two surfaces share the same SQLite database at `~/.hermes/kanban.db`. +The dashboard is the most comfortable place for **you** to watch the system. Agent workers the dispatcher spawns never see the dashboard or the CLI — they drive the board through a dedicated `kanban_*` [toolset](./kanban#how-workers-interact-with-the-board) (`kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`, `kanban_create`, `kanban_link`). All three surfaces — dashboard, CLI, worker tools — route through the same per-board SQLite DB (`~/.hermes/kanban.db` for the default board, `~/.hermes/kanban/boards/<slug>/kanban.db` for any board you create later), so each board is consistent no matter which side of the fence a change came from. + +This tutorial uses the `default` board throughout. If you want multiple isolated queues (one per project / repo / domain), see [Boards (multi-project)](./kanban#boards-multi-project) in the overview — the same CLI / dashboard / worker flows apply per board, and workers physically cannot see tasks on other boards. + +Throughout the tutorial, **code blocks labelled `bash` are commands *you* run.** Code blocks labelled `# worker tool calls` are what the spawned worker's model emits as tool calls — shown here so you can see the loop end-to-end, not because you'd ever run them yourself. ## The board at a glance @@ -57,22 +61,32 @@ hermes kanban create "Write auth integration tests" \ Because `API` has `SCHEMA` as its parent, and `tests` has `API` as its parent, only `SCHEMA` starts in `ready`. The other two sit in `todo` until their parents complete. This is the dependency promotion engine doing its job — no other worker will pick up the test-writing until there's an API to test. -Claim the schema task, do the work, hand off: +On the next dispatcher tick (60s by default, or immediately if you hit **Nudge dispatcher**) the `backend-dev` profile spawns as a worker with `HERMES_KANBAN_TASK=$SCHEMA` in its env. Here's what the worker's tool-call loop looks like from inside the agent: -```bash -hermes kanban claim $SCHEMA +```python +# worker tool calls — NOT commands you run +kanban_show() +# → returns title, body, worker_context, parents, prior attempts, comments -# (you design the schema, commit, etc.) +# (worker reads worker_context, uses terminal/file tools to design the schema, +# write migrations, run its own checks, commit — the real work happens here) -hermes kanban complete $SCHEMA \ - --summary "users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens stored as sessions with type='refresh'" \ - --metadata '{ +kanban_heartbeat(note="schema drafted, writing migrations now") + +kanban_complete( + summary="users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); " + "refresh tokens stored as sessions with type='refresh'", + metadata={ "changed_files": ["migrations/001_users.sql", "migrations/002_sessions.sql"], - "decisions": ["bcrypt for hashing", "JWT for session tokens", "7-day refresh, 15-min access"] - }' + "decisions": ["bcrypt for hashing", "JWT for session tokens", + "7-day refresh, 15-min access"], + }, +) ``` -When `SCHEMA` hits `done`, the dependency engine promotes `API` to `ready` automatically. The API worker, when it picks up, will read `SCHEMA`'s summary and metadata in its context — so it knows the schema decisions without re-reading a long design doc. +`kanban_show` defaults `task_id` to `$HERMES_KANBAN_TASK`, so the worker doesn't need to know its own id. `kanban_complete` writes the summary + metadata onto the current `task_runs` row, closes that run, and transitions the task to `done` — all in one atomic hop through `kanban_db`. + +When `SCHEMA` hits `done`, the dependency engine promotes `API` to `ready` automatically. The API worker, when it picks up, will call `kanban_show()` and see `SCHEMA`'s summary and metadata attached to the parent handoff — so it knows the schema decisions without re-reading a long design doc. Click the completed schema task on the board and the drawer shows everything: @@ -80,7 +94,7 @@ Click the completed schema task on the board and the drawer shows everything: The Run History section at the bottom is the key addition. One attempt: outcome `completed`, worker `@backend-dev`, duration, timestamp, and the handoff summary in full. The metadata blob (`changed_files`, `decisions`) is stored on the run too and surfaced to any downstream worker that reads this parent. -On the CLI: +You can inspect the same data from your terminal at any time — these commands are **you** peeking at the board, not the worker: ```bash hermes kanban show $SCHEMA @@ -125,7 +139,7 @@ Now filter the board to `content-ops` (or just search for "Transcribe") and you Two transcribes done, one running, two ready waiting for the next dispatcher tick. The In Progress column is grouped by profile (the "Lanes by profile" default) so you see each worker's active task without scanning a mixed list. The dispatcher will promote the next ready task to running as soon as the current one completes. With three daemons working on three assignee pools in parallel, the whole content queue drains without further human input. -**Everything Story 1 said about structured handoff still applies here.** A translator worker completing a call can pass `--summary "translated 4 pages, style matched existing marketing voice"` and `--metadata '{"duration_seconds": 720, "tokens_used": 2100}'` — useful for analytics and for any downstream task that depends on this one. +**Everything Story 1 said about structured handoff still applies here.** A translator worker completing a call emits `kanban_complete(summary="translated 4 pages, style matched existing marketing voice", metadata={"duration_seconds": 720, "tokens_used": 2100})` — useful for analytics and for any downstream task that depends on this one. ## Story 3 — Role pipeline with retry @@ -137,32 +151,64 @@ The dashboard view, filtered by `auth-project`: Three-stage chain visible at once: `Spec: password reset flow` (DONE, pm), `Implement password reset flow` (DONE, backend-dev), `Review password reset PR` (READY, reviewer). Each has its parent in green at the bottom and children as dependencies. -The interesting one is the implementation task, because it was blocked and retried: +The interesting one is the implementation task, because it was blocked and retried. Here's the full three-agent choreography, shown as the tool calls each worker's model makes: -```bash -# PM completes the spec with acceptance criteria in metadata -hermes kanban complete $SPEC \ - --summary "spec approved; POST /forgot-password sends email, GET /reset/:token renders form, POST /reset applies new password" \ - --metadata '{"acceptance": [ +```python +# --- PM worker spawns on $SPEC and writes the acceptance criteria --- +# worker tool calls +kanban_show() +kanban_complete( + summary="spec approved; POST /forgot-password sends email, " + "GET /reset/:token renders form, POST /reset applies new password", + metadata={"acceptance": [ "expired token returns 410", "reused last-3 password returns 400 with message", - "successful reset invalidates all active sessions" - ]}' + "successful reset invalidates all active sessions", + ]}, +) +# → $SPEC is done; $IMPL auto-promotes from todo to ready -# Engineer claims + implements, but review blocks it for missing strength check -hermes kanban claim $IMPL -hermes kanban block $IMPL "Review: password strength check missing, reset link isn't single-use (can be replayed within 30min)" +# --- Engineer worker spawns on $IMPL (first attempt) --- +# worker tool calls +kanban_show() # reads $SPEC's summary + acceptance metadata in worker_context +# (engineer writes code, runs tests, opens PR) +# Reviewer feedback arrives — engineer decides the concerns are valid and blocks +kanban_block( + reason="Review: password strength check missing, reset link isn't " + "single-use (can be replayed within 30min)", +) +# → $IMPL transitions to blocked; run 1 closes with outcome='blocked' +``` -# Engineer iterates, resolves, completes +Now you (the human, or a separate reviewer profile) read the block reason, decide the fix direction is clear, and unblock from the dashboard's "Unblock" button — or from the CLI / slash command: + +```bash hermes kanban unblock $IMPL -hermes kanban claim $IMPL -hermes kanban complete $IMPL \ - --summary "added zxcvbn strength check, reset tokens are now single-use (stored + deleted on success)" \ - --metadata '{ - "changed_files": ["auth/reset.py", "auth/tests/test_reset.py", "migrations/003_single_use_reset_tokens.sql"], +# or from a chat: /kanban unblock $IMPL +``` + +The dispatcher promotes `$IMPL` back to `ready` and, on the next tick, respawns the `backend-dev` worker. This second spawn is a **new run** on the same task: + +```python +# --- Engineer worker spawns on $IMPL (second attempt) --- +# worker tool calls +kanban_show() +# → worker_context now includes the run 1 block reason, so this worker knows +# which two things to fix instead of re-reading the whole spec +# (engineer adds zxcvbn check, makes reset tokens single-use, re-runs tests) +kanban_complete( + summary="added zxcvbn strength check, reset tokens are now single-use " + "(stored + deleted on success)", + metadata={ + "changed_files": [ + "auth/reset.py", + "auth/tests/test_reset.py", + "migrations/003_single_use_reset_tokens.sql", + ], "tests_run": 11, - "review_iteration": 2 - }' + "review_iteration": 2, + }, +) ``` Click the implementation task. The drawer shows **two attempts**: @@ -178,7 +224,7 @@ The reviewer picks up next. When they open `Review password reset PR`, they see: ![Reviewer's drawer view of the pipeline](/img/kanban-tutorial/09-drawer-pipeline-review.png) -The parent link is the completed implementation. When the reviewer's worker calls `build_worker_context`, it pulls the parent's most-recent-completed-run summary + metadata — so the reviewer reads "added zxcvbn strength check, reset tokens are now single-use" and has the list of changed files in hand before looking at a diff. +The parent link is the completed implementation. When the reviewer's worker spawns on `Review password reset PR` and calls `kanban_show()`, the returned `worker_context` includes the parent's most-recent-completed-run summary + metadata — so the reviewer reads "added zxcvbn strength check, reset tokens are now single-use" and has the list of changed files in hand before looking at a diff. ## Story 4 — Circuit breaker and crash recovery @@ -234,18 +280,18 @@ The drawer shows the full two-attempt history: Run 1 — `crashed`, with the error `OOM kill at row 2.3M (process 99999 gone)`. Run 2 — `completed`, with `"strategy": "chunked with LIMIT + WHERE id > last_id"` in its metadata. The retrying worker saw the crash of run 1 in its context and picked a safer strategy; the metadata makes it obvious to a future observer (or postmortem writer) what changed. -## Structured handoff — why `--summary` and `--metadata` matter +## Structured handoff — why `summary` and `metadata` matter -In every story above, workers passed `--summary` and `--metadata` on completion. That's not decoration — it's the primary handoff channel between stages of a workflow. +In every story above, workers called `kanban_complete(summary=..., metadata=...)` at the end. That's not decoration — it's the primary handoff channel between stages of a workflow. -When a worker on task B reads its context, it gets: +When a worker on task B is spawned and calls `kanban_show()`, the `worker_context` it gets back includes: - B's **prior attempts** (previous runs: outcome, summary, error, metadata) so a retrying worker doesn't repeat a failed path. - **Parent task results** — for each parent, the most-recent completed run's summary and metadata — so downstream workers see why and how the upstream work was done. -This replaces the "dig through comments and the work output" dance that plagues flat kanban systems. A PM writes acceptance criteria in the spec's metadata, and the engineer's worker sees them structurally. An engineer records which tests they ran and how many passed, and the reviewer's worker has that list in hand before opening a diff. +This replaces the "dig through comments and the work output" dance that plagues flat kanban systems. A PM writes acceptance criteria in the spec's metadata, and the engineer's worker sees them structurally in the parent handoff. An engineer records which tests they ran and how many passed, and the reviewer's worker has that list in hand before opening a diff. -The bulk-close guard exists because this data is per-run. `hermes kanban complete a b c --summary X` is refused — copy-pasting the same summary to three tasks is almost always wrong. Bulk close without the handoff flags still works for the common "I finished a pile of admin tasks" case. +The bulk-close guard exists because this data is per-run. `hermes kanban complete a b c --summary X` (you, from the CLI) is refused — copy-pasting the same summary to three tasks is almost always wrong. Bulk close without the handoff flags still works for the common "I finished a pile of admin tasks" case. The tool surface doesn't expose a bulk variant at all; `kanban_complete` is always single-task-at-a-time for the same reason. ## Inspecting a task currently running diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md index 7bbea34855..f1bad41a20 100644 --- a/website/docs/user-guide/features/kanban.md +++ b/website/docs/user-guide/features/kanban.md @@ -10,6 +10,15 @@ description: "Durable SQLite-backed task board for coordinating multiple Hermes Hermes Kanban is a durable task board, shared across all your Hermes profiles, that lets multiple named agents collaborate on work without fragile in-process subagent swarms. Every task is a row in `~/.hermes/kanban.db`; every handoff is a row anyone can read and write; every worker is a full OS process with its own identity. +### Two surfaces: the model talks through tools, you talk through the CLI + +The board has two front doors, both backed by the same `~/.hermes/kanban.db`: + +- **Agents drive the board through a dedicated `kanban_*` toolset** — `kanban_show`, `kanban_complete`, `kanban_block`, `kanban_heartbeat`, `kanban_comment`, `kanban_create`, `kanban_link`. The dispatcher spawns each worker with these tools already in its schema; the model reads its task and hands work off by calling them directly, *not* by shelling out to `hermes kanban`. See [How workers interact with the board](#how-workers-interact-with-the-board) below. +- **You (and scripts, and cron) drive the board through `hermes kanban …`** on the CLI, `/kanban …` as a slash command, or the dashboard. These are for humans and automation — the places without a tool-calling model behind them. + +Both surfaces route through the same `kanban_db` layer, so reads see a consistent view and writes can't drift. The rest of this page shows CLI examples because they're easy to copy-paste, but every CLI verb has a tool-call equivalent the model uses. + This is the shape that covers the workloads `delegate_task` can't: - **Research triage** — parallel researchers + analyst + writer, human-in-the-loop. @@ -45,36 +54,131 @@ They coexist: a kanban worker may call `delegate_task` internally during its run ## Core concepts +- **Board** — a standalone queue of tasks with its own SQLite DB, workspaces + directory, and dispatcher loop. A single install can have many boards + (e.g. one per project, repo, or domain); see [Boards (multi-project)](#boards-multi-project) + below. Single-project users stay on the `default` board and never see the + word "board" outside this docs section. - **Task** — a row with title, optional body, one assignee (a profile name), status (`triage | todo | ready | running | blocked | done | archived`), optional tenant namespace, optional idempotency key (dedup for retried automation). - **Link** — `task_links` row recording a parent → child dependency. The dispatcher promotes `todo → ready` when all parents are `done`. - **Comment** — the inter-agent protocol. Agents and humans append comments; when a worker is (re-)spawned it reads the full comment thread as part of its context. - **Workspace** — the directory a worker operates in. Three kinds: - - `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/`. + - `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/` (or `~/.hermes/kanban/boards/<slug>/workspaces/<id>/` on non-default boards). - `dir:<path>` — an existing shared directory (Obsidian vault, mail ops dir, per-account folder). **Must be an absolute path.** Relative paths like `dir:../tenants/foo/` are rejected at dispatch because they'd resolve against whatever CWD the dispatcher happens to be in, which is ambiguous and a confused-deputy escape vector. The path is otherwise trusted — it's your box, your filesystem, the worker runs with your uid. This is the trusted-local-user threat model; kanban is single-host by design. - `worktree` — a git worktree under `.worktrees/<id>/` for coding tasks. Worker-side `git worktree add` creates it. -- **Dispatcher** — a long-lived loop that, every N seconds (default 60): reclaims stale claims, reclaims crashed workers (PID gone but TTL not yet expired), promotes ready tasks, atomically claims, spawns assigned profiles. Runs **inside the gateway** by default (`kanban.dispatch_in_gateway: true`). After ~5 consecutive spawn failures on the same task the dispatcher auto-blocks it with the last error as the reason — prevents thrashing on tasks whose profile doesn't exist, workspace can't mount, etc. -- **Tenant** — optional string namespace. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix. +- **Dispatcher** — a long-lived loop that, every N seconds (default 60): reclaims stale claims, reclaims crashed workers (PID gone but TTL not yet expired), promotes ready tasks, atomically claims, spawns assigned profiles. Runs **inside the gateway** by default (`kanban.dispatch_in_gateway: true`). One dispatcher sweeps all boards per tick; workers are spawned with `HERMES_KANBAN_BOARD` pinned so they can't see other boards. After ~5 consecutive spawn failures on the same task the dispatcher auto-blocks it with the last error as the reason — prevents thrashing on tasks whose profile doesn't exist, workspace can't mount, etc. +- **Tenant** — optional string namespace *within* a board. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix. Tenants are a soft filter; boards are the hard isolation boundary. + +## Boards (multi-project) + +Boards let you separate unrelated streams of work — one per project, repo, +or domain — into isolated queues. A new install has exactly one board +called `default` (DB at `~/.hermes/kanban.db` for back-compat). Users who +only want one stream of work never need to know about boards; the feature +is opt-in. + +Per-board isolation is absolute: + +- Separate SQLite DB per board (`~/.hermes/kanban/boards/<slug>/kanban.db`). +- Separate `workspaces/` and `logs/` directories. +- Workers spawned for a task see **only** their board's tasks — the + dispatcher sets `HERMES_KANBAN_BOARD` in the child env and every + `kanban_*` tool the worker has access to reads it. +- Linking tasks across boards is not allowed (keeps the schema simple; if + you really need cross-project refs, use free-text mentions and look + them up by id manually). + +### Managing boards from the CLI + +```bash +# See what's on disk. Fresh installs show only "default". +hermes kanban boards list + +# Create a new board. +hermes kanban boards create atm10-server \ + --name "ATM10 Server" \ + --description "Minecraft modded server ops" \ + --icon 🎮 \ + --switch # optional: make it the active board + +# Operate on a specific board without switching. +hermes kanban --board atm10-server list +hermes kanban --board atm10-server create "Restart ATM server" --assignee ops + +# Change which board is "current" for subsequent calls. +hermes kanban boards switch atm10-server +hermes kanban boards show # who's active right now? + +# Rename the display name (the slug is immutable — it's the directory name). +hermes kanban boards rename atm10-server "ATM10 (Prod)" + +# Archive (default) — moves the board's dir to boards/_archived/<slug>-<ts>/. +# Recoverable by moving the dir back. +hermes kanban boards rm atm10-server + +# Hard delete — `rm -rf` the board dir. No recovery. +hermes kanban boards rm atm10-server --delete +``` + +Board resolution order (highest precedence first): + +1. Explicit `--board <slug>` on the CLI call. +2. `HERMES_KANBAN_BOARD` env var (set by the dispatcher when spawning a + worker, so workers can't see other boards). +3. `~/.hermes/kanban/current` — the slug persisted by `hermes kanban + boards switch`. +4. `default`. + +Slugs are validated: lowercase alphanumerics + hyphens + underscores, 1-64 +chars, must start with alphanumeric. Uppercase input is auto-downcased. +Anything else (slashes, spaces, dots, `..`) is rejected at the CLI layer +so path-traversal tricks can't name a board. + +### Managing boards from the dashboard + +`hermes dashboard` → Kanban tab shows a board switcher at the top as soon +as more than one board exists (or any board has tasks). Single-board users +see only a small `+ New board` button; the switcher is hidden until it +matters. + +- **Board dropdown** — pick the active board. Your selection is saved to + the browser's `localStorage` so it persists across reloads without + shifting the CLI's `current` pointer out from under a terminal you left + open. +- **+ New board** — opens a modal asking for slug, display name, + description, and icon. Option to auto-switch to the new board. +- **Archive** — only shown on non-`default` boards. Confirms, then moves + the board dir to `boards/_archived/`. + +All dashboard API endpoints accept `?board=<slug>` for board scoping. The +events WebSocket is pinned to a board at connection time; switching in +the UI opens a fresh WS against the new board. + ## Quick start +The commands below are **you** (the human) setting up the board and creating tasks. Once a task is assigned, the dispatcher spawns the assigned profile as a worker, and from there **the model drives the task through `kanban_*` tool calls, not CLI commands** — see [How workers interact with the board](#how-workers-interact-with-the-board). + ```bash -# 1. Create the board +# 1. Create the board (you) hermes kanban init # 2. Start the gateway (hosts the embedded dispatcher) hermes gateway start -# 3. Create a task +# 3. Create a task (you — or an orchestrator agent via kanban_create) hermes kanban create "research AI funding landscape" --assignee researcher -# 4. Watch activity live +# 4. Watch activity live (you) hermes kanban watch -# 5. See the board +# 5. See the board (you) hermes kanban list hermes kanban stats ``` +When the dispatcher picks up `t_abcd` and spawns the `researcher` profile, the very first thing that worker's model does is call `kanban_show()` to read its task. It doesn't run `hermes kanban show t_abcd`. + ### Gateway-embedded dispatcher (default) The dispatcher runs inside the gateway process. Nothing to install, no @@ -127,22 +231,61 @@ hermes kanban block t_abc "need input" --ids t_def t_hij ## How workers interact with the board -When the dispatcher spawns a worker, it sets `HERMES_KANBAN_TASK` in the child's env. That env var is the gate for a dedicated **kanban toolset** — 7 tools that the normal agent schema never sees: +**Workers do not shell out to `hermes kanban`.** When the dispatcher spawns a worker it sets `HERMES_KANBAN_TASK=t_abcd` in the child's env, and that env var flips on a dedicated **kanban toolset** in the model's schema — seven tools that read and mutate the board directly via the Python `kanban_db` layer, same as the CLI does. A running worker calls these like any other tool; it never sees or needs the `hermes kanban` CLI. -| Tool | Purpose | -|---|---| -| `kanban_show` | Read the current task (title, body, prior attempts, parent handoffs, comments, full `worker_context`). Defaults to the env's task id. | -| `kanban_complete` | Finish with `summary` + `metadata` structured handoff. | -| `kanban_block` | Escalate for human input. | -| `kanban_heartbeat` | Signal liveness during long operations. | -| `kanban_comment` | Append to the task thread. | -| `kanban_create` | (Orchestrators) fan out into child tasks. | -| `kanban_link` | (Orchestrators) add dependency edges after the fact. | +| Tool | Purpose | Required params | +|---|---|---| +| `kanban_show` | Read the current task (title, body, prior attempts, parent handoffs, comments, full pre-formatted `worker_context`). Defaults to the env's task id. | — | +| `kanban_complete` | Finish with `summary` + `metadata` structured handoff. | at least one of `summary` / `result` | +| `kanban_block` | Escalate for human input with a `reason`. | `reason` | +| `kanban_heartbeat` | Signal liveness during long operations. Pure side-effect. | — | +| `kanban_comment` | Append a durable note to the task thread. | `task_id`, `body` | +| `kanban_create` | (Orchestrators) fan out into child tasks with an `assignee`, optional `parents`, `skills`, etc. | `title`, `assignee` | +| `kanban_link` | (Orchestrators) add a `parent_id → child_id` dependency edge after the fact. | `parent_id`, `child_id` | -**Why tools and not just shelling to `hermes kanban`?** Three reasons: +A typical worker turn looks like: -1. **Backend portability.** Workers whose terminal tool points at a remote backend (Docker / Modal / Singularity / SSH) would run `hermes kanban complete` inside the container where `hermes` isn't installed and the DB isn't mounted. The kanban tools run in the agent's own Python process and always reach `~/.hermes/kanban.db` regardless of terminal backend. -2. **No shell-quoting fragility.** Passing `--metadata '{"files": [...]}'` through shlex + argparse is a latent footgun. Structured tool args skip it. +``` +# Model's tool calls, in order: +kanban_show() # no args — uses HERMES_KANBAN_TASK +# (model reads the returned worker_context, does the work via terminal/file tools) +kanban_heartbeat(note="halfway through — 4 of 8 files transformed") +# (more work) +kanban_complete( + summary="migrated limiter.py to token-bucket; added 14 tests, all pass", + metadata={"changed_files": ["limiter.py", "tests/test_limiter.py"], "tests_run": 14}, +) +``` + +An **orchestrator** worker fans out instead: + +``` +kanban_show() +kanban_create( + title="research ICP funding 2024-2026", + assignee="researcher-a", + body="focus on seed + series A, North America, AI-adjacent", +) +# → returns {"task_id": "t_r1", ...} +kanban_create(title="research ICP funding — EU angle", assignee="researcher-b", body="…") +# → returns {"task_id": "t_r2", ...} +kanban_create( + title="synthesize findings into launch brief", + assignee="writer", + parents=["t_r1", "t_r2"], # promotes to ready when both complete + body="one-pager, 300 words, neutral tone", +) +kanban_complete(summary="decomposed into 2 research tasks + 1 writer; linked dependencies") +``` + +The three "(Orchestrators)" tools — `kanban_create`, `kanban_link`, and `kanban_comment` on foreign tasks — are available to every worker; the convention (enforced by the `kanban-orchestrator` skill) is that worker profiles don't fan out and orchestrator profiles don't execute. + +### Why tools instead of shelling to `hermes kanban` + +Three reasons: + +1. **Backend portability.** Workers whose terminal tool points at a remote backend (Docker / Modal / Singularity / SSH) would run `hermes kanban complete` *inside* the container, where `hermes` isn't installed and `~/.hermes/kanban.db` isn't mounted. The kanban tools run in the agent's own Python process and always reach `~/.hermes/kanban.db` regardless of terminal backend. +2. **No shell-quoting fragility.** Passing `--metadata '{"files": [...]}'` through shlex + argparse is a latent footgun. Structured tool args skip it entirely. 3. **Better errors.** Tool results are structured JSON the model can reason about, not stderr strings it has to parse. **Zero schema footprint on normal sessions.** A regular `hermes chat` session has zero `kanban_*` tools in its schema. The `check_fn` on each tool only returns True when `HERMES_KANBAN_TASK` is set, which only happens when the dispatcher spawned this process. No tool bloat for users who never touch kanban. @@ -151,14 +294,14 @@ The `kanban-worker` and `kanban-orchestrator` skills teach the model which tool ### The worker skill -Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle: +Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle in **tool calls**, not CLI commands: 1. On spawn, call `kanban_show()` to read title + body + parent handoffs + prior attempts + full comment thread. -2. `cd $HERMES_KANBAN_WORKSPACE` and do the work there. +2. `cd $HERMES_KANBAN_WORKSPACE` (via the terminal tool) and do the work there. 3. Call `kanban_heartbeat(note="...")` every few minutes during long operations. 4. Complete with `kanban_complete(summary="...", metadata={...})`, or `kanban_block(reason="...")` if stuck. -Load it with: +Load it with (this one is **you**, installing into a profile — not a tool call): ```bash hermes skills install devops/kanban-worker @@ -168,22 +311,9 @@ The dispatcher also auto-passes `--skills kanban-worker` when spawning every wor ### Pinning extra skills to a specific task -Sometimes a single task needs specialist context the assignee profile doesn't carry by default — a translation job that needs the `translation` skill, a review task that needs `github-code-review`, a security audit that needs `security-pr-audit`. Rather than editing the assignee's profile every time, attach the skills directly to the task: +Sometimes a single task needs specialist context the assignee profile doesn't carry by default — a translation job that needs the `translation` skill, a review task that needs `github-code-review`, a security audit that needs `security-pr-audit`. Rather than editing the assignee's profile every time, attach the skills directly to the task. -```bash -# CLI — repeat --skill for each extra skill -hermes kanban create "translate README to Japanese" \ - --assignee linguist \ - --skill translation - -# Multiple skills -hermes kanban create "audit auth flow" \ - --assignee reviewer \ - --skill security-pr-audit \ - --skill github-code-review -``` - -From the dashboard's inline create form, type the skills comma-separated into the **skills** field. From another agent (orchestrator pattern), use `kanban_create(skills=[...])`: +**From an orchestrator agent** (the usual case — one agent routing work to another), use the `kanban_create` tool's `skills` array: ``` kanban_create( @@ -191,13 +321,53 @@ kanban_create( assignee="linguist", skills=["translation"], ) + +kanban_create( + title="audit auth flow", + assignee="reviewer", + skills=["security-pr-audit", "github-code-review"], +) ``` +**From a human (CLI / slash command)**, repeat `--skill` for each one: + +```bash +hermes kanban create "translate README to Japanese" \ + --assignee linguist \ + --skill translation + +hermes kanban create "audit auth flow" \ + --assignee reviewer \ + --skill security-pr-audit \ + --skill github-code-review +``` + +**From the dashboard**, type the skills comma-separated into the **skills** field of the inline create form. + These skills are **additive** to the built-in `kanban-worker` — the dispatcher emits one `--skills <name>` flag for each (and for the built-in), so the worker spawns with all of them loaded. The skill names must match skills that are actually installed on the assignee's profile (run `hermes skills list` to see what's available); there's no runtime install. ### The orchestrator skill -A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook. +A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this as tool-call patterns: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook keyed on `kanban_create` / `kanban_link` / `kanban_comment`. + +A canonical orchestrator turn (two parallel researchers handing off to a writer): + +``` +# Goal from user: "draft a launch post on the ICP funding landscape" +kanban_create(title="research ICP funding, NA angle", assignee="researcher-a", body="…") # → t_r1 +kanban_create(title="research ICP funding, EU angle", assignee="researcher-b", body="…") # → t_r2 +kanban_create( + title="synthesize ICP funding research into launch post draft", + assignee="writer", + parents=["t_r1", "t_r2"], # promoted to 'ready' when both researchers complete + body="one-pager, neutral tone, cite sources inline", +) # → t_w1 +# Optional: add cross-cutting deps discovered later without re-creating tasks +kanban_link(parent_id="t_r1", child_id="t_followup") +kanban_complete( + summary="decomposed into 2 parallel research tasks → 1 synthesis task; writer starts when both researchers finish", +) +``` Load it into your orchestrator profile: @@ -324,6 +494,8 @@ The GUI is deliberately thin. Everything the plugin does is reachable from the C ## CLI command reference +This is the surface **you** (or scripts, cron, the dashboard) use to drive the board. Workers running inside the dispatcher use the `kanban_*` [tool surface](#how-workers-interact-with-the-board) for the same operations — the CLI here and the tools there both route through `kanban_db`, so the two surfaces agree by construction. + ``` hermes kanban init # create kanban.db + print daemon hint hermes kanban create "<title>" [--body ...] [--assignee <profile>] @@ -369,7 +541,57 @@ hermes kanban gc [--event-retention-days N] # workspaces + old events [--log-retention-days N] ``` -All commands are also available as a slash command in the gateway (`/kanban list`, `/kanban comment t_abc "need docs"`, etc.). The slash command bypasses the running-agent guard, so you can `/kanban unblock` a stuck worker while the main agent is still chatting. +All commands are also available as a slash command in the interactive CLI and in the messaging gateway (see [`/kanban` slash command](#kanban-slash-command) below). + +## `/kanban` slash command {#kanban-slash-command} + +Every `hermes kanban <action>` verb is also reachable as `/kanban <action>` — from inside an interactive `hermes chat` session **and** from any gateway platform (Telegram, Discord, Slack, WhatsApp, Signal, Matrix, Mattermost, email, SMS). Both surfaces call the exact same `hermes_cli.kanban.run_slash()` entry point that reuses the `hermes kanban` argparse tree, so the argument surface, flags, and output format are identical across CLI, `/kanban`, and `hermes kanban`. You don't have to leave the chat to drive the board. + +``` +/kanban list +/kanban show t_abcd +/kanban create "write launch post" --assignee writer --parent t_research +/kanban comment t_abcd "looks good, ship it" +/kanban unblock t_abcd +/kanban dispatch --max 3 +``` + +Quote multi-word arguments the same way you would on a shell — `run_slash` parses the rest of the line with `shlex.split`, so `"..."` and `'...'` both work. + +### Mid-run usage: `/kanban` bypasses the running-agent guard + +The gateway normally queues slash commands and user messages while an agent is still thinking — that's what stops you from accidentally starting a second turn while the first is in flight. **`/kanban` is explicitly exempted from this guard.** The board lives in `~/.hermes/kanban.db`, not in the running agent's state, so reads (`list`, `show`, `context`, `tail`, `watch`, `stats`, `runs`) and writes (`comment`, `unblock`, `block`, `assign`, `archive`, `create`, `link`, …) all go through immediately, even mid-turn. + +This is the whole point of the separation: + +- A worker blocks waiting on a peer → you send `/kanban unblock t_abcd` from your phone and the dispatcher picks the peer up on its next tick. The blocked worker isn't interrupted — it just stops being blocked. +- You spot a card that needs human context → `/kanban comment t_xyz "use the 2026 schema, not 2025"` lands on the task thread and the *next* run of that task will read it in `kanban_show()`. +- You want to know what your fleet is doing without stopping the orchestrator → `/kanban list --mine` or `/kanban stats` inspects the board without touching your main conversation. + +### Auto-subscribe on `/kanban create` (gateway only) + +When you create a task from the gateway with `/kanban create "…"`, the originating chat (platform + chat id + thread id) is automatically subscribed to that task's terminal events (`completed`, `blocked`, `gave_up`, `crashed`, `timed_out`). You'll get one message back per terminal event — including the first line of the worker's result summary on `completed` — without having to poll or remember the task id. + +``` +you> /kanban create "transcribe today's podcast" --assignee transcriber +bot> Created t_9fc1a3 (ready, assignee=transcriber) + (subscribed — you'll be notified when t_9fc1a3 completes or blocks) + +… ~8 minutes later … + +bot> ✓ t_9fc1a3 completed by transcriber + transcribed 42 minutes, saved to podcast/2026-05-04.md +``` + +Subscriptions auto-remove themselves once the task reaches `done` or `archived`. If you script a create with `--json` (machine output) the auto-subscribe is skipped — the assumption is that scripted callers want to manage subscriptions explicitly via `/kanban notify-subscribe`. + +### Output truncation in messaging + +Gateway platforms have practical message-length caps. If `/kanban list`, `/kanban show`, or `/kanban tail` produce more than ~3800 characters of output, the response is truncated with a `… (truncated; use \`hermes kanban …\` in your terminal for full output)` footer. The CLI surface has no such cap. + +### Autocomplete + +In the interactive CLI, typing `/kanban ` and hitting Tab cycles through the built-in subcommand list (`list`, `ls`, `show`, `create`, `assign`, `link`, `unlink`, `claim`, `comment`, `complete`, `block`, `unblock`, `archive`, `tail`, `dispatch`, `context`, `init`, `gc`). The remaining verbs listed in the CLI reference above (`watch`, `stats`, `runs`, `log`, `assignees`, `heartbeat`, `notify-subscribe`, `notify-list`, `notify-unsubscribe`, `daemon`) also work — they're just not in the autocomplete hint list yet. ## Collaboration patterns @@ -424,16 +646,26 @@ A task is a logical unit of work; a **run** is one attempt to execute it. When t Why two tables instead of just mutating the task: you need **full attempt history** for real-world postmortems ("the second reviewer attempt got to approve, the third merged"), and you need a clean place to hang per-attempt metadata — which files changed, which tests ran, which findings a reviewer noted. Those are run facts, not task facts. -Runs are also where **structured handoff** lives. When a worker completes a task it can pass: +Runs are also where **structured handoff** lives. When a worker completes a task (via `kanban_complete(...)`) it can pass: -- `--result "<short log line>"` — goes on the task row as before (for back-compat). -- `--summary "<human handoff>"` — goes on the run; downstream children see it in their `build_worker_context`. -- `--metadata '{"changed_files": [...], "tests_run": 12}'` — JSON dict on the run; children see it serialized alongside the summary. +- `summary` (tool param) / `--summary` (CLI) — human handoff; goes on the run; downstream children see it in their `build_worker_context`. +- `metadata` (tool param) / `--metadata` (CLI) — free-form JSON dict on the run; children see it serialized alongside the summary. +- `result` (tool param) / `--result` (CLI) — short log line that goes on the task row (legacy field, kept for back-compat). Downstream children read the most recent completed run's summary + metadata for each parent. Retrying workers read the prior attempts on their own task (outcome, summary, error) so they don't repeat a path that already failed. +``` +# What a worker actually does — a tool call, from inside the agent loop: +kanban_complete( + summary="implemented token bucket, keys on user_id with IP fallback, all tests pass", + metadata={"changed_files": ["limiter.py", "tests/test_limiter.py"], "tests_run": 14}, + result="rate limiter shipped", +) +``` + +The same handoff is reachable from the CLI when you (the human) need to close out a task a worker can't — e.g. a task that was abandoned, or one you marked done manually from the dashboard: + ```bash -# Worker completes with a structured handoff: hermes kanban complete t_abcd \ --result "rate limiter shipped" \ --summary "implemented token bucket, keys on user_id with IP fallback, all tests pass" \ diff --git a/website/docs/user-guide/features/plugins.md b/website/docs/user-guide/features/plugins.md index 0e99fd12d2..ee19888225 100644 --- a/website/docs/user-guide/features/plugins.md +++ b/website/docs/user-guide/features/plugins.md @@ -9,6 +9,11 @@ description: "Extend Hermes with custom tools, hooks, and integrations via the p Hermes has a plugin system for adding custom tools, hooks, and integrations without modifying core code. +If you want to create a custom tool for yourself, your team, or one project, +this is usually the right path. The developer guide's +[Adding Tools](/docs/developer-guide/adding-tools) page is for built-in Hermes +core tools that live in `tools/` and `toolsets.py`. + **→ [Build a Hermes Plugin](/docs/guides/build-a-hermes-plugin)** — step-by-step guide with a complete working example. ## Quick overview @@ -42,6 +47,8 @@ description: A minimal example plugin ```python """Minimal Hermes plugin — registers a tool and a hook.""" +import json + def register(ctx): # --- Tool: hello_world --- @@ -60,11 +67,18 @@ def register(ctx): }, } - def handle_hello(params): + def handle_hello(params, **kwargs): + del kwargs name = params.get("name", "World") - return f"Hello, {name}! 👋 (from the hello-world plugin)" + return json.dumps({"success": True, "greeting": f"Hello, {name}!"}) - ctx.register_tool("hello_world", schema, handle_hello) + ctx.register_tool( + name="hello_world", + toolset="hello_world", + schema=schema, + handler=handle_hello, + description="Return a friendly greeting for the given name.", + ) # --- Hook: log every tool call --- def on_tool_call(tool_name, params, result): @@ -81,7 +95,7 @@ Project-local plugins under `./.hermes/plugins/` are disabled by default. Enable | Capability | How | |-----------|-----| -| Add tools | `ctx.register_tool(name, schema, handler)` | +| Add tools | `ctx.register_tool(name=..., toolset=..., schema=..., handler=...)` | | Add hooks | `ctx.register_hook("post_tool_call", callback)` | | Add slash commands | `ctx.register_command(name, handler, description)` — adds `/name` in CLI and gateway sessions | | Add CLI commands | `ctx.register_cli_command(name, help, setup_fn, handler_fn)` — adds `hermes <plugin> <subcommand>` | diff --git a/website/docs/user-guide/messaging/open-webui.md b/website/docs/user-guide/messaging/open-webui.md index efdf901371..9c90eb7998 100644 --- a/website/docs/user-guide/messaging/open-webui.md +++ b/website/docs/user-guide/messaging/open-webui.md @@ -26,11 +26,15 @@ Open WebUI talks to Hermes server-to-server, so you do not need `API_SERVER_CORS ### 1. Enable the API server -Add to `~/.hermes/.env`: +```bash +hermes config set API_SERVER_ENABLED true +hermes config set API_SERVER_KEY your-secret-key +``` + +`hermes config set` auto-routes the flag to `config.yaml` and the secret to `~/.hermes/.env`. If the gateway is already running, restart it so the change takes effect: ```bash -API_SERVER_ENABLED=true -API_SERVER_KEY=your-secret-key +hermes gateway stop && hermes gateway ``` ### 2. Start Hermes Agent gateway @@ -45,12 +49,25 @@ You should see: [API Server] API server listening on http://127.0.0.1:8642 ``` -### 3. Start Open WebUI +### 3. Verify the API server is reachable + +```bash +curl -s http://127.0.0.1:8642/health +# {"status": "ok", ...} + +curl -s -H "Authorization: Bearer your-secret-key" http://127.0.0.1:8642/v1/models +# {"object":"list","data":[{"id":"hermes-agent", ...}]} +``` + +If `/health` fails, the gateway didn't pick up `API_SERVER_ENABLED=true` — restart it. If `/v1/models` returns `401`, your `Authorization` header doesn't match `API_SERVER_KEY`. + +### 4. Start Open WebUI ```bash docker run -d -p 3000:8080 \ -e OPENAI_API_BASE_URL=http://host.docker.internal:8642/v1 \ -e OPENAI_API_KEY=your-secret-key \ + -e ENABLE_OLLAMA_API=false \ --add-host=host.docker.internal:host-gateway \ -v open-webui:/app/backend/data \ --name open-webui \ @@ -58,7 +75,11 @@ docker run -d -p 3000:8080 \ ghcr.io/open-webui/open-webui:main ``` -### 4. Open the UI +`ENABLE_OLLAMA_API=false` suppresses the default Ollama backend, which would otherwise show up empty and clutter the model picker. Omit it if you actually have Ollama running alongside. + +First launch takes 15–30 seconds: Open WebUI downloads sentence-transformer embedding models (~150MB) the first time it starts. Wait for `docker logs open-webui` to settle before opening the UI. + +### 5. Open the UI Go to **http://localhost:3000**. Create your admin account (the first user becomes admin). You should see your agent in the model dropdown (named after your profile, or **hermes-agent** for the default profile). Start chatting! @@ -77,6 +98,7 @@ services: environment: - OPENAI_API_BASE_URL=http://host.docker.internal:8642/v1 - OPENAI_API_KEY=your-secret-key + - ENABLE_OLLAMA_API=false extra_hosts: - "host.docker.internal:host-gateway" restart: always @@ -181,8 +203,9 @@ With streaming enabled (the default), you'll see brief inline indicators as tool - **Check the URL has `/v1` suffix**: `http://host.docker.internal:8642/v1` (not just `:8642`) - **Verify the gateway is running**: `curl http://localhost:8642/health` should return `{"status": "ok"}` -- **Check model listing**: `curl http://localhost:8642/v1/models` should return a list with `hermes-agent` +- **Check model listing**: `curl -H "Authorization: Bearer your-secret-key" http://localhost:8642/v1/models` should return a list with `hermes-agent` - **Docker networking**: From inside Docker, `localhost` means the container, not your host. Use `host.docker.internal` or `--network=host`. +- **Empty Ollama backend shadowing the picker**: If you omitted `ENABLE_OLLAMA_API=false`, Open WebUI shows an empty Ollama section above your Hermes models. Restart the container with `-e ENABLE_OLLAMA_API=false` or disable Ollama in **Admin Settings → Connections**. ### Connection test passes but no models load diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index 5873303a04..dd933aa2fd 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -293,9 +293,9 @@ Hermes Agent works in Telegram group chats with a few considerations: - `TELEGRAM_ALLOWED_USERS` still applies — only authorized users can trigger the bot, even in groups - You can keep the bot from responding to ordinary group chatter with `telegram.require_mention: true` - With `telegram.require_mention: true`, group messages are accepted when they are: - - slash commands - replies to one of the bot's messages - `@botusername` mentions + - `/command@botusername` (Telegram's bot-menu command form that includes the bot name) - matches for one of your configured regex wake words in `telegram.mention_patterns` - Use `telegram.ignored_threads` to keep Hermes silent in specific Telegram forum topics, even when the group would otherwise allow free responses or mention-triggered replies - If `telegram.require_mention` is left unset or false, Hermes keeps the previous open-group behavior and responds to normal group messages it can see diff --git a/website/static/api/model-catalog.json b/website/static/api/model-catalog.json index 0845f7339a..f19beab074 100644 --- a/website/static/api/model-catalog.json +++ b/website/static/api/model-catalog.json @@ -1,6 +1,6 @@ { "version": 1, - "updated_at": "2026-04-30T03:06:09Z", + "updated_at": "2026-05-04T09:41:25Z", "metadata": { "source": "hermes-agent repo", "docs": "https://hermes-agent.nousresearch.com/docs/reference/model-catalog" @@ -232,7 +232,7 @@ "id": "z-ai/glm-5-turbo" }, { - "id": "x-ai/grok-4.20-beta" + "id": "x-ai/grok-4.20" }, { "id": "nvidia/nemotron-3-super-120b-a12b"