diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index f3fba0e9be8..a9556e20468 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -182,6 +182,64 @@ SKILLS_GUIDANCE = ( "Skills that aren't maintained become liabilities." ) +KANBAN_GUIDANCE = ( + "# You are a Kanban worker\n" + "You were spawned by the Hermes Kanban dispatcher to execute ONE task from " + "the shared board at `~/.hermes/kanban.db`. Your task id is in " + "`$HERMES_KANBAN_TASK`; your workspace is `$HERMES_KANBAN_WORKSPACE`. " + "The `kanban_*` tools in your schema are your primary coordination surface — " + "they write directly to the shared SQLite DB and work regardless of terminal " + "backend (local/docker/modal/ssh).\n" + "\n" + "## Lifecycle\n" + "\n" + "1. **Orient.** Call `kanban_show()` first (no args — it defaults to your " + "task). The response includes title, body, parent-task handoffs (summary + " + "metadata), any prior attempts on this task if you're a retry, the full " + "comment thread, and a pre-formatted `worker_context` you can treat as " + "ground truth.\n" + "2. **Work inside the workspace.** `cd $HERMES_KANBAN_WORKSPACE` before " + "any file operations. The workspace is yours for this run. Don't modify " + "files outside it unless the task explicitly asks.\n" + "3. **Heartbeat on long operations.** Call `kanban_heartbeat(note=...)` " + "every few minutes during long subprocesses (training, encoding, crawling). " + "Skip heartbeats for short tasks.\n" + "4. **Block on genuine ambiguity.** If you need a human decision you cannot " + "infer (missing credentials, UX choice, paywalled source, peer output you " + "need first), call `kanban_block(reason=\"...\")` and stop. Don't guess. " + "The user will unblock with context and the dispatcher will respawn you.\n" + "5. **Complete with structured handoff.** Call `kanban_complete(summary=..., " + "metadata=...)`. `summary` is 1–3 human-readable sentences naming concrete " + "artifacts. `metadata` is machine-readable facts " + "(`{changed_files: [...], tests_run: N, decisions: [...]}`). Downstream " + "workers read both via their own `kanban_show`. Never put secrets / " + "tokens / raw PII in either field — run rows are durable forever.\n" + "6. **If follow-up work appears, create it; don't do it.** Use " + "`kanban_create(title=..., assignee=, parents=[your-task-id])` " + "to spawn a child task for the appropriate specialist profile instead of " + "scope-creeping into the next thing.\n" + "\n" + "## Orchestrator mode\n" + "\n" + "If your task is itself a decomposition task (e.g. a planner profile given " + "a high-level goal), use `kanban_create` to fan out into child tasks — one " + "per specialist, each with an explicit `assignee` and `parents=[...]` to " + "express dependencies. Then `kanban_complete` your own task with a summary " + "of the decomposition. Do NOT execute the work yourself; your job is " + "routing, not implementation.\n" + "\n" + "## Do NOT\n" + "\n" + "- Do not shell out to `hermes kanban ` for board operations. Use " + "the `kanban_*` tools — they work across all terminal backends.\n" + "- Do not complete a task you didn't actually finish. Block it.\n" + "- Do not assign follow-up work to yourself. Assign it to the right " + "specialist profile.\n" + "- Do not call `delegate_task` as a board substitute. `delegate_task` is " + "for short reasoning subtasks inside your own run; board tasks are for " + "cross-agent handoffs that outlive one API loop." +) + TOOL_USE_ENFORCEMENT_GUIDANCE = ( "# Tool-use enforcement\n" "You MUST use your tools to take action — do not describe what you would do " diff --git a/cli.py b/cli.py index f11de7ffab2..48a71b16817 100644 --- a/cli.py +++ b/cli.py @@ -6087,6 +6087,27 @@ class HermesCLI: except Exception as exc: print(f"(._.) curator: {exc}") + def _handle_kanban_command(self, cmd: str): + """Handle the /kanban command — delegate to the shared kanban CLI. + + The string form passed here is the user's full ``/kanban ...`` + including the leading slash; we strip it and hand the remainder + to ``kanban.run_slash`` which returns a single formatted string. + """ + from hermes_cli.kanban import run_slash + + rest = cmd.strip() + if rest.startswith("/"): + rest = rest.lstrip("/") + if rest.startswith("kanban"): + rest = rest[len("kanban"):].lstrip() + try: + output = run_slash(rest) + except Exception as exc: # pragma: no cover - defensive + output = f"(._.) kanban error: {exc}" + if output: + print(output) + def _handle_skills_command(self, cmd: str): """Handle /skills slash command — delegates to hermes_cli.skills_hub.""" from hermes_cli.skills_hub import handle_skills_slash @@ -6332,6 +6353,8 @@ class HermesCLI: self._handle_cron_command(cmd_original) elif canonical == "curator": self._handle_curator_command(cmd_original) + elif canonical == "kanban": + self._handle_kanban_command(cmd_original) elif canonical == "skills": with self._busy_command(self._slow_command_status(cmd_original)): self._handle_skills_command(cmd_original) diff --git a/docs/hermes-kanban-v1-spec.pdf b/docs/hermes-kanban-v1-spec.pdf new file mode 100644 index 00000000000..c7899cd12a9 Binary files /dev/null and b/docs/hermes-kanban-v1-spec.pdf differ diff --git a/gateway/run.py b/gateway/run.py index 9107f6c485e..a80f42650e8 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2732,6 +2732,17 @@ class GatewayRunner: # Start background session expiry watcher to finalize expired sessions asyncio.create_task(self._session_expiry_watcher()) + # Start background kanban notifier — delivers `completed`, `blocked`, + # `spawn_auto_blocked`, and `crashed` events to gateway subscribers + # so human-in-the-loop workflows hear back without polling. + asyncio.create_task(self._kanban_notifier_watcher()) + + # Start background kanban dispatcher — spawns workers for ready + # tasks. Gated by `kanban.dispatch_in_gateway` (default True). + # When false, users run `hermes kanban daemon` externally or + # simply don't use kanban; this loop becomes a no-op. + asyncio.create_task(self._kanban_dispatcher_watcher()) + # Start background reconnection watcher for platforms that failed at startup if self._failed_platforms: logger.info( @@ -2907,6 +2918,399 @@ class GatewayRunner: break await asyncio.sleep(1) + async def _kanban_notifier_watcher(self, interval: float = 5.0) -> None: + """Poll ``kanban_notify_subs`` and deliver terminal events to users. + + For each subscription row, fetches ``task_events`` newer than the + stored cursor with kind in the terminal set (``completed``, + ``blocked``, ``gave_up``, ``crashed``, ``timed_out``). Sends one + message per new event to ``(platform, chat_id, thread_id)``, + then advances the cursor. When a task reaches a terminal state + (``completed`` / ``archived``), the subscription is removed. + + Runs in the gateway event loop; all SQLite work is pushed to a + thread via ``asyncio.to_thread`` so the loop never blocks on the + WAL lock. Failures in one tick don't stop subsequent ticks. + """ + from gateway.config import Platform as _Platform + try: + from hermes_cli import kanban_db as _kb + except Exception: + logger.warning("kanban notifier: kanban_db not importable; notifier disabled") + return + + TERMINAL_KINDS = ("completed", "blocked", "gave_up", "crashed", "timed_out") + # Terminal event kinds trigger automatic unsubscription — the task + # is done, blocked, or in a retry-needed state that the human + # shouldn't keep pinging a stale chat for. Previously we only + # unsubbed when task.status in ('done', 'archived'), which left + # subscriptions on 'blocked' / 'gave_up' / 'crashed' / 'timed_out' + # tasks stranded forever. + TERMINAL_EVENT_KINDS = TERMINAL_KINDS + # Per-subscription send-failure counter. Adapter.send raising + # means the chat is dead (deleted, bot kicked, etc.) — after N + # consecutive send failures the sub is dropped so we don't spin + # against a dead chat every 5 seconds forever. + MAX_SEND_FAILURES = 3 + sub_fail_counts: dict[tuple, int] = getattr( + self, "_kanban_sub_fail_counts", {} + ) + self._kanban_sub_fail_counts = sub_fail_counts + + # Initial delay so the gateway can finish wiring adapters. + await asyncio.sleep(5) + + while self._running: + try: + def _collect(): + conn = _kb.connect() + try: + _kb.init_db() # idempotent; handles first-run + except Exception: + pass + try: + subs = _kb.list_notify_subs(conn) + deliveries: list[dict] = [] + for sub in subs: + cursor, events = _kb.unseen_events_for_sub( + conn, + task_id=sub["task_id"], + platform=sub["platform"], + chat_id=sub["chat_id"], + thread_id=sub.get("thread_id") or "", + kinds=TERMINAL_KINDS, + ) + if not events: + continue + task = _kb.get_task(conn, sub["task_id"]) + deliveries.append({ + "sub": sub, + "cursor": cursor, + "events": events, + "task": task, + }) + return deliveries + finally: + conn.close() + + deliveries = await asyncio.to_thread(_collect) + for d in deliveries: + sub = d["sub"] + task = d["task"] + platform_str = (sub["platform"] or "").lower() + try: + plat = _Platform(platform_str) + except ValueError: + # Unknown platform string; skip and advance cursor so + # we don't replay forever. + await asyncio.to_thread( + self._kanban_advance, sub, d["cursor"], + ) + continue + adapter = self.adapters.get(plat) + if adapter is None: + continue # platform not currently connected + title = (task.title if task else sub["task_id"])[:120] + for ev in d["events"]: + kind = ev.kind + # Identity prefix: attribute terminal pings to the + # worker that did the work. Makes fleets (where one + # chat subscribes to many tasks) legible at a glance. + who = (task.assignee if task and task.assignee else None) + tag = f"@{who} " if who else "" + if kind == "completed": + # Prefer the run's summary (the worker's + # intentional human-facing handoff, carried + # in the event payload), then fall back to + # task.result for legacy rows written before + # runs shipped. + handoff = "" + payload_summary = None + if ev.payload and ev.payload.get("summary"): + payload_summary = str(ev.payload["summary"]) + if payload_summary: + h = payload_summary.strip().splitlines()[0][:200] + handoff = f"\n{h}" + elif task and task.result: + r = task.result.strip().splitlines()[0][:160] + handoff = f"\n{r}" + msg = ( + f"✔ {tag}Kanban {sub['task_id']} done" + f" — {title}{handoff}" + ) + elif kind == "blocked": + reason = "" + if ev.payload and ev.payload.get("reason"): + reason = f": {str(ev.payload['reason'])[:160]}" + msg = f"⏸ {tag}Kanban {sub['task_id']} blocked{reason}" + elif kind == "gave_up": + err = "" + if ev.payload and ev.payload.get("error"): + err = f"\n{str(ev.payload['error'])[:200]}" + msg = ( + f"✖ {tag}Kanban {sub['task_id']} gave up " + f"after repeated spawn failures{err}" + ) + elif kind == "crashed": + msg = ( + f"✖ {tag}Kanban {sub['task_id']} worker crashed " + f"(pid gone); dispatcher will retry" + ) + elif kind == "timed_out": + limit = 0 + if ev.payload and ev.payload.get("limit_seconds"): + limit = int(ev.payload["limit_seconds"]) + msg = ( + f"⏱ {tag}Kanban {sub['task_id']} timed out " + f"(max_runtime={limit}s); will retry" + ) + else: + continue + metadata: dict[str, Any] = {} + if sub.get("thread_id"): + metadata["thread_id"] = sub["thread_id"] + sub_key = ( + sub["task_id"], sub["platform"], + sub["chat_id"], sub.get("thread_id") or "", + ) + try: + await adapter.send( + sub["chat_id"], msg, metadata=metadata, + ) + # Reset the failure counter on success. + sub_fail_counts.pop(sub_key, None) + except Exception as exc: + fails = sub_fail_counts.get(sub_key, 0) + 1 + sub_fail_counts[sub_key] = fails + logger.warning( + "kanban notifier: send failed for %s on %s " + "(attempt %d/%d): %s", + sub["task_id"], platform_str, fails, + MAX_SEND_FAILURES, exc, + ) + if fails >= MAX_SEND_FAILURES: + logger.warning( + "kanban notifier: dropping subscription " + "%s on %s after %d consecutive send failures", + sub["task_id"], platform_str, fails, + ) + await asyncio.to_thread(self._kanban_unsub, sub) + sub_fail_counts.pop(sub_key, None) + # Don't advance cursor on send failure — retry next tick. + break + else: + # All events delivered; advance cursor + maybe unsub. + await asyncio.to_thread( + self._kanban_advance, sub, d["cursor"], + ) + # Unsubscribe when the LAST delivered event is a + # terminal kind (the task hit a "no further updates" + # state), not just on task.status in {done, archived}. + # Covers blocked / gave_up / crashed / timed_out which + # used to leak subs forever. + last_kind = d["events"][-1].kind if d["events"] else None + task_terminal = task and task.status in ("done", "archived") + event_terminal = last_kind in TERMINAL_EVENT_KINDS + if task_terminal or event_terminal: + await asyncio.to_thread( + self._kanban_unsub, sub, + ) + except Exception as exc: + logger.warning("kanban notifier tick failed: %s", exc) + # Sleep with cancellation checks. + for _ in range(int(max(1, interval))): + if not self._running: + return + await asyncio.sleep(1) + + def _kanban_advance(self, sub: dict, cursor: int) -> None: + """Sync helper: advance a subscription's cursor. Runs in to_thread.""" + from hermes_cli import kanban_db as _kb + conn = _kb.connect() + try: + _kb.advance_notify_cursor( + conn, + task_id=sub["task_id"], + platform=sub["platform"], + chat_id=sub["chat_id"], + thread_id=sub.get("thread_id") or "", + new_cursor=cursor, + ) + finally: + conn.close() + + def _kanban_unsub(self, sub: dict) -> None: + from hermes_cli import kanban_db as _kb + conn = _kb.connect() + try: + _kb.remove_notify_sub( + conn, + task_id=sub["task_id"], + platform=sub["platform"], + chat_id=sub["chat_id"], + thread_id=sub.get("thread_id") or "", + ) + finally: + conn.close() + + async def _kanban_dispatcher_watcher(self) -> None: + """Embedded kanban dispatcher — one tick every `dispatch_interval_seconds`. + + Gated by `kanban.dispatch_in_gateway` in config.yaml (default True). + When true, the gateway hosts the single dispatcher for this profile: + no separate `hermes kanban daemon` process needed. When false, the + loop exits immediately and an external daemon is expected. + + Each tick calls :func:`kanban_db.dispatch_once` inside + ``asyncio.to_thread`` so the SQLite WAL lock never blocks the + event loop. Failures in one tick don't stop subsequent ticks — + same pattern as `_kanban_notifier_watcher`. + + Shutdown: the loop checks ``self._running`` between ticks; gateway + stop() flips it to False and cancels pending tasks, and the + in-flight ``to_thread`` returns on its own after the current + ``dispatch_once`` call finishes (typically <1ms on an idle board). + """ + # Read config once at boot. If the user flips the flag later, they + # restart the gateway; same pattern as every other background + # watcher here. Honours HERMES_KANBAN_DISPATCH_IN_GATEWAY env var + # as an escape hatch (false-y value disables without editing YAML). + try: + from hermes_cli.config import load_config as _load_config + except Exception: + logger.warning("kanban dispatcher: config loader unavailable; disabled") + return + env_override = os.environ.get("HERMES_KANBAN_DISPATCH_IN_GATEWAY", "").strip().lower() + if env_override in ("0", "false", "no", "off"): + logger.info("kanban dispatcher: disabled via HERMES_KANBAN_DISPATCH_IN_GATEWAY env") + return + + try: + cfg = _load_config() + except Exception as exc: + logger.warning("kanban dispatcher: cannot load config (%s); disabled", exc) + return + kanban_cfg = cfg.get("kanban", {}) if isinstance(cfg, dict) else {} + if not kanban_cfg.get("dispatch_in_gateway", True): + logger.info( + "kanban dispatcher: disabled via config kanban.dispatch_in_gateway=false" + ) + return + + try: + from hermes_cli import kanban_db as _kb + except Exception: + logger.warning("kanban dispatcher: kanban_db not importable; dispatcher disabled") + return + + interval = float(kanban_cfg.get("dispatch_interval_seconds", 60) or 60) + if interval < 1.0: + interval = 1.0 # sanity floor — tighter than this is a footgun + + # Initial delay so the gateway finishes wiring adapters before the + # dispatcher spawns workers (those workers may hit gateway notify + # subscriptions etc.). Matches the notifier watcher's delay. + await asyncio.sleep(5) + + # Health telemetry mirrored from `_cmd_daemon`: warn when ready + # queue is non-empty but spawns are 0 for N consecutive ticks — + # usually means broken PATH, missing venv, or credential loss. + HEALTH_WINDOW = 6 + bad_ticks = 0 + last_warn_at = 0 + + def _tick_once() -> "Optional[object]": + """Run one dispatch_once; return result or None on error. + + Runs in a worker thread via `asyncio.to_thread`.""" + conn = None + try: + conn = _kb.connect() + try: + _kb.init_db() # idempotent, handles first-run + except Exception: + pass + return _kb.dispatch_once(conn) + except Exception: + logger.exception("kanban dispatcher: tick failed") + return None + finally: + if conn is not None: + try: + conn.close() + except Exception: + pass + + def _ready_nonempty() -> bool: + """Cheap probe: is there at least one ready+assigned+unclaimed task?""" + conn = None + try: + conn = _kb.connect() + row = conn.execute( + "SELECT 1 FROM tasks " + "WHERE status = 'ready' AND assignee IS NOT NULL " + " AND claim_lock IS NULL LIMIT 1" + ).fetchone() + return row is not None + except Exception: + return False + finally: + if conn is not None: + try: + conn.close() + except Exception: + pass + + logger.info( + "kanban dispatcher: embedded in gateway (interval=%.1fs)", interval + ) + while self._running: + try: + res = await asyncio.to_thread(_tick_once) + if res is not None and getattr(res, "spawned", None): + # Quiet by default — only log when something actually + # happened, so an idle gateway stays silent. + logger.info( + "kanban dispatcher: tick spawned=%d reclaimed=%d " + "crashed=%d timed_out=%d promoted=%d auto_blocked=%d", + len(res.spawned), + res.reclaimed, + len(res.crashed) if hasattr(res.crashed, "__len__") else 0, + len(res.timed_out) if hasattr(res.timed_out, "__len__") else 0, + res.promoted, + len(res.auto_blocked) if hasattr(res.auto_blocked, "__len__") else 0, + ) + # Health telemetry + ready_pending = await asyncio.to_thread(_ready_nonempty) + spawned_any = bool(res and getattr(res, "spawned", None)) + if ready_pending and not spawned_any: + bad_ticks += 1 + else: + bad_ticks = 0 + if bad_ticks >= HEALTH_WINDOW: + now = int(time.time()) + if now - last_warn_at >= 300: + logger.warning( + "kanban dispatcher stuck: ready queue non-empty for " + "%d consecutive ticks but 0 workers spawned. Check " + "profile health (venv, PATH, credentials) and " + "`hermes kanban list --status ready`.", + bad_ticks, + ) + last_warn_at = now + except asyncio.CancelledError: + logger.debug("kanban dispatcher: cancelled") + raise + except Exception: + logger.exception("kanban dispatcher: unexpected watcher error") + + # Sleep in 1s slices so shutdown is snappy — otherwise a stop() + # waits up to `interval` seconds for the current sleep to finish. + slept = 0.0 + while slept < interval and self._running: + await asyncio.sleep(min(1.0, interval - slept)) + slept += 1.0 + async def _platform_reconnect_watcher(self) -> None: """Background task that periodically retries connecting failed platforms. @@ -4168,6 +4572,14 @@ class GatewayRunner: if _cmd_def_inner and _cmd_def_inner.name == "background": return await self._handle_background_command(event) + # /kanban must bypass the guard. It writes to a profile-agnostic + # DB (kanban.db), not to the running agent's state. In fact + # /kanban unblock is often the only way to free a worker that + # has blocked waiting for a peer — letting that be dispatched + # mid-run is the whole point of the board. + if _cmd_def_inner and _cmd_def_inner.name == "kanban": + return await self._handle_kanban_command(event) + # Session-level toggles that are safe to run mid-agent — # /yolo can unblock a pending approval prompt, /verbose cycles # the tool-progress display mode for the ongoing stream. @@ -4415,6 +4827,9 @@ class GatewayRunner: if canonical == "personality": return await self._handle_personality_command(event) + if canonical == "kanban": + return await self._handle_kanban_command(event) + if canonical == "retry": return await self._handle_retry_command(event) @@ -6031,6 +6446,84 @@ class GatewayRunner: return "\n".join(lines) + + async def _handle_kanban_command(self, event: MessageEvent) -> str: + """Handle /kanban — delegate to the shared kanban CLI. + + Run the potentially-blocking DB work in a thread pool so the + gateway event loop stays responsive. Read operations (list, + show, context, tail) are permitted while an agent is running; + mutations are allowed too because the board is profile-agnostic + and does not touch the running agent's state. + + For ``/kanban create`` invocations we also auto-subscribe the + originating gateway source (platform + chat + thread) to the new + task's terminal events, so the user hears back when the worker + completes / blocks / auto-blocks / crashes without having to poll. + """ + import asyncio + import re + from hermes_cli.kanban import run_slash + + text = (event.text or "").strip() + # Strip the leading "/kanban" (with or without slash), leaving args. + if text.startswith("/"): + text = text.lstrip("/") + if text.startswith("kanban"): + text = text[len("kanban"):].lstrip() + + is_create = text.split(None, 1)[:1] == ["create"] + + try: + output = await asyncio.to_thread(run_slash, text) + except Exception as exc: # pragma: no cover - defensive + return f"⚠ kanban error: {exc}" + + # Auto-subscribe on create. Parse the task id from the CLI's standard + # success line ("Created t_abcd (ready, assignee=...)"). If the user + # passed --json we don't subscribe; they're clearly scripting and + # can call /kanban notify-subscribe explicitly. + if is_create and output: + m = re.search(r"Created\s+(t_[0-9a-f]+)\b", output) + if m: + task_id = m.group(1) + try: + source = event.source + platform = getattr(source, "platform", None) + platform_str = ( + platform.value if hasattr(platform, "value") else str(platform or "") + ).lower() + chat_id = str(getattr(source, "chat_id", "") or "") + thread_id = str(getattr(source, "thread_id", "") or "") + user_id = str(getattr(source, "user_id", "") or "") or None + if platform_str and chat_id: + def _sub(): + from hermes_cli import kanban_db as _kb + conn = _kb.connect() + try: + _kb.add_notify_sub( + conn, task_id=task_id, + platform=platform_str, chat_id=chat_id, + thread_id=thread_id or None, + user_id=user_id, + ) + finally: + conn.close() + await asyncio.to_thread(_sub) + output = ( + output.rstrip() + + f"\n(subscribed — you'll be notified when {task_id} " + f"completes or blocks)" + ) + except Exception as exc: + logger.warning("kanban create auto-subscribe failed: %s", exc) + + # Gateway messages have practical length caps; truncate long + # listings to keep the UX reasonable. + if len(output) > 3800: + output = output[:3800] + "\n… (truncated; use `hermes kanban …` in your terminal for full output)" + return output or "(no output)" + async def _handle_status_command(self, event: MessageEvent) -> str: """Handle /status command.""" source = event.source diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 5ca562d87a2..15e211b997f 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -151,6 +151,11 @@ COMMAND_REGISTRY: list[CommandDef] = [ CommandDef("curator", "Background skill maintenance (status, run, pin, archive)", "Tools & Skills", args_hint="[subcommand]", subcommands=("status", "run", "pause", "resume", "pin", "unpin", "restore")), + CommandDef("kanban", "Multi-profile collaboration board (tasks, links, comments)", + "Tools & Skills", args_hint="[subcommand]", + subcommands=("list", "ls", "show", "create", "assign", "link", "unlink", + "claim", "comment", "complete", "block", "unblock", "archive", + "tail", "dispatch", "context", "init", "gc")), CommandDef("reload", "Reload .env variables into the running session", "Tools & Skills", cli_only=True), CommandDef("reload-mcp", "Reload MCP servers from config", "Tools & Skills", diff --git a/hermes_cli/config.py b/hermes_cli/config.py index e880e936ab4..153b9f5b2d4 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1104,6 +1104,24 @@ DEFAULT_CONFIG = { "max_parallel_jobs": None, }, + # Kanban multi-agent coordination — controls the dispatcher loop that + # spawns workers for ready tasks. The dispatcher ticks every N seconds + # (default 60), reclaims stale claims, promotes dependency-satisfied + # todos to ready, and fires `hermes -p chat -q ...` for + # each claimable ready task. One dispatcher per profile is sufficient; + # running more than one on the same kanban.db will race for claims. + "kanban": { + # Run the dispatcher inside the gateway process. On by default — + # the cost is ~300µs every `dispatch_interval_seconds` when idle, + # and gateway is the supervisor users already have. Set to false + # only if you run the dispatcher as a separate systemd unit or + # don't want the gateway to spawn workers. + "dispatch_in_gateway": True, + # Seconds between dispatcher ticks (idle or not). Lower = snappier + # pickup of newly-ready tasks; higher = less SQL pressure. + "dispatch_interval_seconds": 60, + }, + # execute_code settings — controls the tool used for programmatic tool calls. "code_execution": { # Execution mode: diff --git a/hermes_cli/kanban.py b/hermes_cli/kanban.py new file mode 100644 index 00000000000..e23a4923f6d --- /dev/null +++ b/hermes_cli/kanban.py @@ -0,0 +1,1393 @@ +"""CLI for the Hermes Kanban board — ``hermes kanban …`` subcommand. + +Exposes the full 15-verb surface documented in the design spec +(``docs/hermes-kanban-v1-spec.pdf``). All DB work is delegated to +``kanban_db``. This module adds: + + * Argparse subcommand construction (``build_parser``). + * Argument dispatch (``kanban_command``). + * Output formatting (plain text + ``--json``). + * A short shared helper that parses a single slash-style string + (used by ``/kanban …`` in CLI and gateway) and forwards it to the + argparse surface. +""" + +from __future__ import annotations + +import argparse +import json +import os +import shlex +import sys +import time +from pathlib import Path +from typing import Any, Optional + +from hermes_cli import kanban_db as kb + + +# --------------------------------------------------------------------------- +# Small formatting helpers +# --------------------------------------------------------------------------- + +_STATUS_ICONS = { + "todo": "◻", + "ready": "▶", + "running": "●", + "blocked": "⊘", + "done": "✓", + "archived": "—", +} + + +def _fmt_ts(ts: Optional[int]) -> str: + if not ts: + return "" + return time.strftime("%Y-%m-%d %H:%M", time.localtime(ts)) + + +def _fmt_task_line(t: kb.Task) -> str: + icon = _STATUS_ICONS.get(t.status, "?") + assignee = t.assignee or "(unassigned)" + tenant = f" [{t.tenant}]" if t.tenant else "" + return f"{icon} {t.id} {t.status:8s} {assignee:20s}{tenant} {t.title}" + + +def _task_to_dict(t: kb.Task) -> dict[str, Any]: + return { + "id": t.id, + "title": t.title, + "body": t.body, + "assignee": t.assignee, + "status": t.status, + "priority": t.priority, + "tenant": t.tenant, + "workspace_kind": t.workspace_kind, + "workspace_path": t.workspace_path, + "created_by": t.created_by, + "created_at": t.created_at, + "started_at": t.started_at, + "completed_at": t.completed_at, + "result": t.result, + "skills": list(t.skills) if t.skills else [], + } + + +def _parse_workspace_flag(value: str) -> tuple[str, Optional[str]]: + """Parse ``--workspace`` into ``(kind, path|None)``. + + Accepts: ``scratch``, ``worktree``, ``dir:``. + """ + if not value: + return ("scratch", None) + v = value.strip() + if v in ("scratch", "worktree"): + return (v, None) + if v.startswith("dir:"): + path = v[len("dir:"):].strip() + if not path: + raise argparse.ArgumentTypeError( + "--workspace dir: requires a path after the colon" + ) + return ("dir", os.path.expanduser(path)) + raise argparse.ArgumentTypeError( + f"unknown --workspace value {value!r}: use scratch, worktree, or dir:" + ) + + +def _check_dispatcher_presence() -> tuple[bool, str]: + """Return ``(running, message)``. + + - ``running=True``: a gateway is alive for this HERMES_HOME and its + config has ``kanban.dispatch_in_gateway`` on (default). Message + is a short status line. + - ``running=False``: either no gateway is running, or the gateway + is running but the config flag is off. Message is human guidance + explaining the next step. + + Used by ``hermes kanban create`` (and callers) to warn when a task + will sit in ``ready`` because nothing is there to pick it up. + Defensive against import failures and config-read errors — if the + probe itself errors, we return ``(True, "")`` so we don't spam + false warnings (better to miss a warning than to cry wolf). + """ + try: + from gateway.status import get_running_pid # type: ignore + except Exception: + return (True, "") # can't probe — silent + try: + pid = get_running_pid() + except Exception: + return (True, "") # probe errored — silent + + # Even if the gateway is up, dispatch_in_gateway may be off. + try: + from hermes_cli.config import load_config + cfg = load_config() + dispatch_on = bool(cfg.get("kanban", {}).get("dispatch_in_gateway", True)) + except Exception: + dispatch_on = True # can't tell — assume default + + if pid and dispatch_on: + return (True, f"gateway pid={pid}, dispatch enabled") + if pid and not dispatch_on: + return ( + False, + "Gateway is running but kanban.dispatch_in_gateway=false in " + "config.yaml — the task will sit in 'ready' until you flip it " + "back on and restart the gateway, OR run the legacy " + "standalone daemon (`hermes kanban daemon --force`)." + ) + return ( + False, + "No gateway is running — the task will sit in 'ready' until you " + "start it. Run:\n" + " hermes gateway start\n" + "The gateway hosts an embedded dispatcher (tick interval 60s by " + "default); your task will be picked up on the next tick after " + "the gateway comes up." + ) + + +# --------------------------------------------------------------------------- +# Argparse builder +# --------------------------------------------------------------------------- + +def build_parser(parent_subparsers: argparse._SubParsersAction) -> argparse.ArgumentParser: + """Attach the ``kanban`` subcommand tree under an existing subparsers. + + Returns the top-level ``kanban`` parser so caller can ``set_defaults``. + """ + kanban_parser = parent_subparsers.add_parser( + "kanban", + help="Multi-profile collaboration board (tasks, links, comments)", + description=( + "Durable SQLite-backed task board shared across Hermes profiles. " + "Tasks are claimed atomically, can depend on other tasks, and " + "are executed by a named profile in an isolated workspace. " + "See https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban " + "or docs/hermes-kanban-v1-spec.pdf for the full design." + ), + ) + sub = kanban_parser.add_subparsers(dest="kanban_action") + + # --- init --- + sub.add_parser("init", help="Create kanban.db if missing (idempotent)") + + # --- create --- + p_create = sub.add_parser("create", help="Create a new task") + p_create.add_argument("title", help="Task title") + p_create.add_argument("--body", default=None, help="Optional opening post") + p_create.add_argument("--assignee", default=None, help="Profile name to assign") + p_create.add_argument("--parent", action="append", default=[], + help="Parent task id (repeatable)") + p_create.add_argument("--workspace", default="scratch", + help="scratch | worktree | dir: (default: scratch)") + p_create.add_argument("--tenant", default=None, help="Tenant namespace") + p_create.add_argument("--priority", type=int, default=0, help="Priority tiebreaker") + p_create.add_argument("--triage", action="store_true", + help="Park in triage — a specifier will flesh out the spec and promote to todo") + p_create.add_argument("--idempotency-key", default=None, + help="Dedup key. If a non-archived task with this key exists, " + "its id is returned instead of creating a duplicate.") + p_create.add_argument("--max-runtime", default=None, + help="Per-task runtime cap. Accepts seconds (300) or " + "durations (90s, 30m, 2h, 1d). When exceeded, " + "the dispatcher SIGTERMs (then SIGKILLs) the worker " + "and re-queues the task.") + p_create.add_argument("--created-by", default="user", + help="Author name recorded on the task (default: user)") + p_create.add_argument("--skill", action="append", default=[], dest="skills", + help="Skill to force-load into the worker " + "(repeatable). Appended to the built-in " + "kanban-worker skill. Example: " + "--skill translation --skill github-code-review") + p_create.add_argument("--json", action="store_true", help="Emit JSON output") + + # --- list --- + p_list = sub.add_parser("list", aliases=["ls"], help="List tasks") + p_list.add_argument("--mine", action="store_true", + help="Filter by $HERMES_PROFILE as assignee") + p_list.add_argument("--assignee", default=None) + p_list.add_argument("--status", default=None, + choices=sorted(kb.VALID_STATUSES)) + p_list.add_argument("--tenant", default=None) + p_list.add_argument("--archived", action="store_true", + help="Include archived tasks") + p_list.add_argument("--json", action="store_true") + + # --- show --- + p_show = sub.add_parser("show", help="Show a task with comments + events") + p_show.add_argument("task_id") + p_show.add_argument("--json", action="store_true") + + # --- assign --- + p_assign = sub.add_parser("assign", help="Assign or reassign a task") + p_assign.add_argument("task_id") + p_assign.add_argument("profile", help="Profile name (or 'none' to unassign)") + + # --- link / unlink --- + p_link = sub.add_parser("link", help="Add a parent->child dependency") + p_link.add_argument("parent_id") + p_link.add_argument("child_id") + p_unlink = sub.add_parser("unlink", help="Remove a parent->child dependency") + p_unlink.add_argument("parent_id") + p_unlink.add_argument("child_id") + + # --- claim --- + p_claim = sub.add_parser( + "claim", + help="Atomically claim a ready task (prints resolved workspace path)", + ) + p_claim.add_argument("task_id") + p_claim.add_argument("--ttl", type=int, default=kb.DEFAULT_CLAIM_TTL_SECONDS, + help="Claim TTL in seconds (default: 900)") + + # --- comment / complete / block / unblock / archive --- + p_comment = sub.add_parser("comment", help="Append a comment") + p_comment.add_argument("task_id") + p_comment.add_argument("text", nargs="+", help="Comment body") + p_comment.add_argument("--author", default=None, + help="Author name (default: $HERMES_PROFILE or 'user')") + + p_complete = sub.add_parser("complete", help="Mark one or more tasks done") + p_complete.add_argument("task_ids", nargs="+", + help="One or more task ids (only --result applies to all of them)") + p_complete.add_argument("--result", default=None, help="Result summary") + p_complete.add_argument("--summary", default=None, + help="Structured handoff summary for downstream tasks. " + "Falls back to --result if omitted.") + p_complete.add_argument("--metadata", default=None, + help='JSON dict of structured facts (e.g. \'{"changed_files": [...], ' + '"tests_run": 12}\'). Stored on the closing run.') + + p_block = sub.add_parser("block", help="Mark one or more tasks blocked") + p_block.add_argument("task_id") + p_block.add_argument("reason", nargs="*", help="Reason (also appended as a comment)") + p_block.add_argument("--ids", nargs="+", default=None, + help="Additional task ids to block with the same reason (bulk mode)") + + p_unblock = sub.add_parser("unblock", help="Return one or more blocked tasks to ready") + p_unblock.add_argument("task_ids", nargs="+") + + p_archive = sub.add_parser("archive", help="Archive one or more tasks") + p_archive.add_argument("task_ids", nargs="+") + + # --- tail --- + p_tail = sub.add_parser("tail", help="Follow a task's event stream") + p_tail.add_argument("task_id") + p_tail.add_argument("--interval", type=float, default=1.0) + + # --- dispatch --- + p_disp = sub.add_parser( + "dispatch", + help="One dispatcher pass: reclaim stale, promote ready, spawn workers", + ) + p_disp.add_argument("--dry-run", action="store_true", + help="Don't actually spawn processes; just print what would happen") + p_disp.add_argument("--max", type=int, default=None, + help="Cap number of spawns this pass") + p_disp.add_argument("--failure-limit", type=int, + default=kb.DEFAULT_SPAWN_FAILURE_LIMIT, + help=f"Auto-block a task after this many consecutive spawn failures " + f"(default: {kb.DEFAULT_SPAWN_FAILURE_LIMIT})") + p_disp.add_argument("--json", action="store_true") + + # --- daemon (deprecated) --- + p_daemon = sub.add_parser( + "daemon", + help="DEPRECATED — dispatcher now runs in the gateway. Use `hermes gateway start`.", + ) + p_daemon.add_argument("--interval", type=float, default=60.0, + help="Seconds between dispatch ticks (default: 60)") + p_daemon.add_argument("--max", type=int, default=None, + help="Cap number of spawns per tick") + p_daemon.add_argument("--failure-limit", type=int, + default=kb.DEFAULT_SPAWN_FAILURE_LIMIT) + p_daemon.add_argument("--pidfile", default=None, + help="Write the daemon's PID to this file on start") + p_daemon.add_argument("--verbose", "-v", action="store_true", + help="Log each tick's outcome to stdout") + # Undocumented escape hatch for users who truly cannot run the gateway. + # Intentionally excluded from --help so nobody discovers it casually and + # keeps the old double-dispatcher pattern alive. + p_daemon.add_argument("--force", action="store_true", + help=argparse.SUPPRESS) + + # --- watch --- + p_watch = sub.add_parser( + "watch", + help="Live-stream task_events to the terminal (Ctrl+C to exit)", + ) + p_watch.add_argument("--assignee", default=None, + help="Only show events for tasks assigned to this profile") + p_watch.add_argument("--tenant", default=None, + help="Only show events from tasks in this tenant") + p_watch.add_argument("--kinds", default=None, + help="Comma-separated event kinds to include " + "(e.g. 'completed,blocked,gave_up,crashed,timed_out')") + p_watch.add_argument("--interval", type=float, default=0.5, + help="Poll interval in seconds (default: 0.5)") + + # --- stats --- + p_stats = sub.add_parser( + "stats", help="Per-status + per-assignee counts + oldest-ready age", + ) + p_stats.add_argument("--json", action="store_true") + + # --- notify subscribe / list / remove --- + p_nsub = sub.add_parser( + "notify-subscribe", + help="Subscribe a gateway source to a task's terminal events " + "(used by /kanban subscribe in the gateway adapter)", + ) + p_nsub.add_argument("task_id") + p_nsub.add_argument("--platform", required=True) + p_nsub.add_argument("--chat-id", required=True) + p_nsub.add_argument("--thread-id", default=None) + p_nsub.add_argument("--user-id", default=None) + + p_nlist = sub.add_parser( + "notify-list", + help="List notification subscriptions (optionally for a single task)", + ) + p_nlist.add_argument("task_id", nargs="?", default=None) + p_nlist.add_argument("--json", action="store_true") + + p_nrm = sub.add_parser( + "notify-unsubscribe", + help="Remove a gateway subscription from a task", + ) + p_nrm.add_argument("task_id") + p_nrm.add_argument("--platform", required=True) + p_nrm.add_argument("--chat-id", required=True) + p_nrm.add_argument("--thread-id", default=None) + + # --- log --- + p_log = sub.add_parser( + "log", + help="Print the worker log for a task (from $HERMES_HOME/kanban/logs/)", + ) + p_log.add_argument("task_id") + p_log.add_argument("--tail", type=int, default=None, + help="Only print the last N bytes") + + # --- runs (per-attempt history for a task) --- + p_runs = sub.add_parser( + "runs", + help="Show attempt history for a task (one row per run: profile, " + "outcome, elapsed, summary)", + ) + p_runs.add_argument("task_id") + p_runs.add_argument("--json", action="store_true") + + # --- heartbeat (worker liveness signal) --- + p_hb = sub.add_parser( + "heartbeat", + help="Emit a heartbeat event for a running task (worker liveness signal)", + ) + p_hb.add_argument("task_id") + p_hb.add_argument("--note", default=None, + help="Optional short note attached to the heartbeat event") + + # --- assignees --- + p_asg = sub.add_parser( + "assignees", + help="List known profiles + per-profile task counts " + "(union of ~/.hermes/profiles/ and current assignees on the board)", + ) + p_asg.add_argument("--json", action="store_true") + + # --- context --- (for spawned workers) + p_ctx = sub.add_parser( + "context", + help="Print the full context a worker sees for a task " + "(title + body + parent results + comments).", + ) + p_ctx.add_argument("task_id") + + # --- gc --- + p_gc = sub.add_parser( + "gc", help="Garbage-collect archived-task workspaces, old events, and old logs", + ) + p_gc.add_argument("--event-retention-days", type=int, default=30, + help="Delete task_events older than N days for terminal tasks (default: 30)") + p_gc.add_argument("--log-retention-days", type=int, default=30, + help="Delete worker log files older than N days (default: 30)") + + kanban_parser.set_defaults(_kanban_parser=kanban_parser) + return kanban_parser + + +# --------------------------------------------------------------------------- +# Command dispatch +# --------------------------------------------------------------------------- + +def kanban_command(args: argparse.Namespace) -> int: + """Entry point from ``hermes kanban …`` argparse dispatch. + + Returns a shell-style exit code (0 on success, non-zero on error). + """ + action = getattr(args, "kanban_action", None) + if not action: + # No subaction given: print help via the stored parser reference. + parser = getattr(args, "_kanban_parser", None) + if parser is not None: + parser.print_help() + else: + print( + "usage: hermes kanban [options]\n" + "Run 'hermes kanban --help' for the full list of actions.", + file=sys.stderr, + ) + return 0 + + # Auto-initialize the DB before dispatching any subcommand. init_db + # is idempotent, so running it every invocation is cheap (one + # SELECT against sqlite_master when tables already exist) and + # prevents "no such table: tasks" on first use from a fresh + # HERMES_HOME. Previously only `init` and `daemon` triggered + # schema creation; `create` / `list` / every other command would + # error out on a fresh install. + try: + kb.init_db() + except Exception as exc: + print(f"kanban: could not initialize database: {exc}", file=sys.stderr) + return 1 + + handlers = { + "init": _cmd_init, + "create": _cmd_create, + "list": _cmd_list, + "ls": _cmd_list, + "show": _cmd_show, + "assign": _cmd_assign, + "link": _cmd_link, + "unlink": _cmd_unlink, + "claim": _cmd_claim, + "comment": _cmd_comment, + "complete": _cmd_complete, + "block": _cmd_block, + "unblock": _cmd_unblock, + "archive": _cmd_archive, + "tail": _cmd_tail, + "dispatch": _cmd_dispatch, + "daemon": _cmd_daemon, + "watch": _cmd_watch, + "stats": _cmd_stats, + "log": _cmd_log, + "runs": _cmd_runs, + "heartbeat": _cmd_heartbeat, + "assignees": _cmd_assignees, + "notify-subscribe": _cmd_notify_subscribe, + "notify-list": _cmd_notify_list, + "notify-unsubscribe": _cmd_notify_unsubscribe, + "context": _cmd_context, + "gc": _cmd_gc, + } + handler = handlers.get(action) + if not handler: + print(f"kanban: unknown action {action!r}", file=sys.stderr) + return 2 + try: + return int(handler(args) or 0) + except (ValueError, RuntimeError) as exc: + print(f"kanban: {exc}", file=sys.stderr) + return 1 + + +# --------------------------------------------------------------------------- +# Handlers +# --------------------------------------------------------------------------- + +def _profile_author() -> str: + """Best-effort author name for an interactive CLI call.""" + for env in ("HERMES_PROFILE_NAME", "HERMES_PROFILE"): + v = os.environ.get(env) + if v: + return v + try: + from hermes_cli.profiles import get_active_profile_name + return get_active_profile_name() or "user" + except Exception: + return "user" + + +def _parse_duration(val) -> Optional[int]: + """Parse ``30s`` / ``5m`` / ``2h`` / ``1d`` or a raw integer → seconds. + + Returns None for empty input. Raises ValueError on malformed input so + the CLI can surface a usage error cleanly. + """ + if val is None or val == "": + return None + s = str(val).strip().lower() + # Bare integer → seconds. + try: + return int(s) + except ValueError: + pass + # Suffixed form. + units = {"s": 1, "m": 60, "h": 3600, "d": 86400} + if s and s[-1] in units: + try: + n = float(s[:-1]) + except ValueError as exc: + raise ValueError(f"malformed duration {val!r}") from exc + return int(n * units[s[-1]]) + raise ValueError(f"malformed duration {val!r} (expected 30s, 5m, 2h, 1d, or a number)") + + +def _cmd_init(args: argparse.Namespace) -> int: + path = kb.init_db() + print(f"Kanban DB initialized at {path}") + print() + # Enumerate profiles on disk so the user knows what assignees are + # already addressable. Multica does this auto-detection on its + # daemon start; we do it here at init time instead because our + # dispatcher doesn't need to enumerate — we just pass the name + # through to `hermes -p `. + try: + profiles = kb.list_profiles_on_disk() + except Exception: + profiles = [] + if profiles: + print(f"Discovered {len(profiles)} profile(s) on disk; any of these can " + f"be an --assignee:") + for name in profiles: + print(f" {name}") + else: + print("No profiles found under ~/.hermes/profiles/.") + print("Create one with `hermes -p setup` before assigning tasks.") + print() + print("Next step: start the gateway so ready tasks actually get picked up.") + print(" hermes gateway start") + print() + print( + "The gateway hosts an embedded dispatcher that ticks every 60 seconds\n" + "by default (config: kanban.dispatch_interval_seconds). Without a\n" + "running gateway, tasks stay in 'ready' forever." + ) + return 0 + + +def _cmd_heartbeat(args: argparse.Namespace) -> int: + with kb.connect() as conn: + ok = kb.heartbeat_worker(conn, args.task_id, note=getattr(args, "note", None)) + if not ok: + print(f"cannot heartbeat {args.task_id} (not running?)", file=sys.stderr) + return 1 + print(f"Heartbeat recorded for {args.task_id}") + return 0 + + +def _cmd_assignees(args: argparse.Namespace) -> int: + with kb.connect() as conn: + data = kb.known_assignees(conn) + if getattr(args, "json", False): + print(json.dumps(data, indent=2, ensure_ascii=False)) + return 0 + if not data: + print("(no assignees — create a profile with `hermes -p setup`)") + return 0 + # Header + print(f"{'NAME':20s} {'ON DISK':8s} COUNTS") + for entry in data: + on_disk = "yes" if entry["on_disk"] else "no" + counts = entry["counts"] or {} + count_str = ", ".join(f"{k}={v}" for k, v in sorted(counts.items())) or "(idle)" + print(f"{entry['name']:20s} {on_disk:8s} {count_str}") + return 0 + + +def _cmd_create(args: argparse.Namespace) -> int: + ws_kind, ws_path = _parse_workspace_flag(args.workspace) + try: + max_runtime = _parse_duration(getattr(args, "max_runtime", None)) + except ValueError as exc: + print(f"kanban: --max-runtime: {exc}", file=sys.stderr) + return 2 + with kb.connect() as conn: + task_id = kb.create_task( + conn, + title=args.title, + body=args.body, + assignee=args.assignee, + created_by=args.created_by or _profile_author(), + workspace_kind=ws_kind, + workspace_path=ws_path, + tenant=args.tenant, + priority=args.priority, + parents=tuple(args.parent or ()), + triage=bool(getattr(args, "triage", False)), + idempotency_key=getattr(args, "idempotency_key", None), + max_runtime_seconds=max_runtime, + skills=getattr(args, "skills", None) or None, + ) + task = kb.get_task(conn, task_id) + if getattr(args, "json", False): + print(json.dumps(_task_to_dict(task), indent=2, ensure_ascii=False)) + else: + print(f"Created {task_id} ({task.status}, assignee={task.assignee or '-'})") + + # Warn when the task would sit in `ready` because no dispatcher is + # present. Only warn on ready+assigned tasks — triage/todo are + # expected to sit idle until promoted, and unassigned tasks + # can't be dispatched. Skipped in --json mode so the stdout + # stream stays strictly machine-parseable for callers (the JSON + # response itself carries enough info for them to decide if + # they want to check dispatcher presence separately). + if task.status == "ready" and task.assignee: + running, message = _check_dispatcher_presence() + if not running and message: + print(f"\n⚠ {message}", file=sys.stderr) + return 0 + + +def _cmd_list(args: argparse.Namespace) -> int: + assignee = args.assignee + if args.mine and not assignee: + assignee = _profile_author() + with kb.connect() as conn: + # Cheap "mini-dispatch": recompute ready so list output reflects + # dependencies that may have cleared since the last dispatcher tick. + kb.recompute_ready(conn) + tasks = kb.list_tasks( + conn, + assignee=assignee, + status=args.status, + tenant=args.tenant, + include_archived=args.archived, + ) + if getattr(args, "json", False): + print(json.dumps([_task_to_dict(t) for t in tasks], indent=2, ensure_ascii=False)) + return 0 + if not tasks: + print("(no matching tasks)") + return 0 + for t in tasks: + print(_fmt_task_line(t)) + return 0 + + +def _cmd_show(args: argparse.Namespace) -> int: + with kb.connect() as conn: + task = kb.get_task(conn, args.task_id) + if not task: + print(f"no such task: {args.task_id}", file=sys.stderr) + return 1 + comments = kb.list_comments(conn, args.task_id) + events = kb.list_events(conn, args.task_id) + parents = kb.parent_ids(conn, args.task_id) + children = kb.child_ids(conn, args.task_id) + runs = kb.list_runs(conn, args.task_id) + + if getattr(args, "json", False): + payload = { + "task": _task_to_dict(task), + "parents": parents, + "children": children, + "comments": [ + {"author": c.author, "body": c.body, "created_at": c.created_at} + for c in comments + ], + "events": [ + { + "kind": e.kind, + "payload": e.payload, + "created_at": e.created_at, + "run_id": e.run_id, + } + for e in events + ], + "runs": [ + { + "id": r.id, + "profile": r.profile, + "step_key": r.step_key, + "status": r.status, + "outcome": r.outcome, + "summary": r.summary, + "error": r.error, + "metadata": r.metadata, + "worker_pid": r.worker_pid, + "started_at": r.started_at, + "ended_at": r.ended_at, + } + for r in runs + ], + } + print(json.dumps(payload, indent=2, ensure_ascii=False)) + return 0 + + print(f"Task {task.id}: {task.title}") + print(f" status: {task.status}") + print(f" assignee: {task.assignee or '-'}") + if task.tenant: + print(f" tenant: {task.tenant}") + print(f" workspace: {task.workspace_kind}" + + (f" @ {task.workspace_path}" if task.workspace_path else "")) + if task.skills: + print(f" skills: {', '.join(task.skills)}") + print(f" created: {_fmt_ts(task.created_at)} by {task.created_by or '-'}") + if task.started_at: + print(f" started: {_fmt_ts(task.started_at)}") + if task.completed_at: + print(f" completed: {_fmt_ts(task.completed_at)}") + if parents: + print(f" parents: {', '.join(parents)}") + if children: + print(f" children: {', '.join(children)}") + if task.body: + print() + print("Body:") + print(task.body) + if task.result: + print() + print("Result:") + print(task.result) + if comments: + print() + print(f"Comments ({len(comments)}):") + for c in comments: + print(f" [{_fmt_ts(c.created_at)}] {c.author}: {c.body}") + if events: + print() + print(f"Events ({len(events)}):") + for e in events[-20:]: + pl = f" {e.payload}" if e.payload else "" + run_tag = f" [run {e.run_id}]" if e.run_id else "" + print(f" [{_fmt_ts(e.created_at)}]{run_tag} {e.kind}{pl}") + if runs: + print() + print(f"Runs ({len(runs)}):") + for r in runs: + # Clamp to 0 so NTP backward-jumps don't print negative seconds. + elapsed = (max(0, r.ended_at - r.started_at) + if r.ended_at else None) + el = f"{elapsed}s" if elapsed is not None else "active" + outcome = r.outcome or r.status or "active" + print(f" #{r.id:<3} {outcome:<12} @{r.profile or '-'} {el} " + f"{_fmt_ts(r.started_at)}") + if r.summary: + print(f" → {r.summary.splitlines()[0][:160]}") + if r.error: + print(f" ! {r.error.splitlines()[0][:160]}") + return 0 + + +def _cmd_assign(args: argparse.Namespace) -> int: + profile = None if args.profile.lower() in ("none", "-", "null") else args.profile + with kb.connect() as conn: + ok = kb.assign_task(conn, args.task_id, profile) + if not ok: + print(f"no such task: {args.task_id}", file=sys.stderr) + return 1 + print(f"Assigned {args.task_id} to {profile or '(unassigned)'}") + return 0 + + +def _cmd_link(args: argparse.Namespace) -> int: + with kb.connect() as conn: + kb.link_tasks(conn, args.parent_id, args.child_id) + print(f"Linked {args.parent_id} -> {args.child_id}") + return 0 + + +def _cmd_unlink(args: argparse.Namespace) -> int: + with kb.connect() as conn: + ok = kb.unlink_tasks(conn, args.parent_id, args.child_id) + if not ok: + print(f"No such link: {args.parent_id} -> {args.child_id}", file=sys.stderr) + return 1 + print(f"Unlinked {args.parent_id} -> {args.child_id}") + return 0 + + +def _cmd_claim(args: argparse.Namespace) -> int: + with kb.connect() as conn: + task = kb.claim_task(conn, args.task_id, ttl_seconds=args.ttl) + if task is None: + # Report why + existing = kb.get_task(conn, args.task_id) + if existing is None: + print(f"no such task: {args.task_id}", file=sys.stderr) + return 1 + print( + f"cannot claim {args.task_id}: status={existing.status} " + f"lock={existing.claim_lock or '(none)'}", + file=sys.stderr, + ) + return 1 + workspace = kb.resolve_workspace(task) + kb.set_workspace_path(conn, task.id, str(workspace)) + print(f"Claimed {task.id}") + print(f"Workspace: {workspace}") + return 0 + + +def _cmd_comment(args: argparse.Namespace) -> int: + body = " ".join(args.text).strip() + author = args.author or _profile_author() + with kb.connect() as conn: + kb.add_comment(conn, args.task_id, author, body) + print(f"Comment added to {args.task_id}") + return 0 + + +def _cmd_complete(args: argparse.Namespace) -> int: + """Mark one or more tasks done. Supports a single id or a list.""" + ids = list(args.task_ids or []) + if not ids: + print("at least one task_id is required", file=sys.stderr) + return 1 + summary = getattr(args, "summary", None) + raw_meta = getattr(args, "metadata", None) + # Guard: structured handoff fields are per-run, so they'd be + # copy-pasted identically across N runs — almost always a footgun. + # Refuse instead of silently doing the wrong thing. + if len(ids) > 1 and (summary or raw_meta): + print( + "kanban: --summary / --metadata are per-task and can't be used " + "with multiple ids (would apply the same handoff to every task). " + "Complete tasks one at a time, or drop the flags for the bulk close.", + file=sys.stderr, + ) + return 2 + metadata = None + if raw_meta: + try: + metadata = json.loads(raw_meta) + if not isinstance(metadata, dict): + raise ValueError("must be a JSON object") + except (ValueError, json.JSONDecodeError) as exc: + print(f"kanban: --metadata: {exc}", file=sys.stderr) + return 2 + failed: list[str] = [] + with kb.connect() as conn: + for tid in ids: + if not kb.complete_task( + conn, tid, + result=args.result, + summary=summary, + metadata=metadata, + ): + failed.append(tid) + print(f"cannot complete {tid} (unknown id or terminal state)", file=sys.stderr) + else: + print(f"Completed {tid}") + return 0 if not failed else 1 + + +def _cmd_block(args: argparse.Namespace) -> int: + reason = " ".join(args.reason).strip() if args.reason else None + author = _profile_author() + ids = [args.task_id] + list(getattr(args, "ids", None) or []) + failed: list[str] = [] + with kb.connect() as conn: + for tid in ids: + if reason: + kb.add_comment(conn, tid, author, f"BLOCKED: {reason}") + if not kb.block_task(conn, tid, reason=reason): + failed.append(tid) + print(f"cannot block {tid}", file=sys.stderr) + else: + print(f"Blocked {tid}" + (f": {reason}" if reason else "")) + return 0 if not failed else 1 + + +def _cmd_unblock(args: argparse.Namespace) -> int: + ids = list(args.task_ids or []) + if not ids: + print("at least one task_id is required", file=sys.stderr) + return 1 + failed: list[str] = [] + with kb.connect() as conn: + for tid in ids: + if not kb.unblock_task(conn, tid): + failed.append(tid) + print(f"cannot unblock {tid} (not blocked?)", file=sys.stderr) + else: + print(f"Unblocked {tid}") + return 0 if not failed else 1 + + +def _cmd_archive(args: argparse.Namespace) -> int: + ids = list(args.task_ids or []) + if not ids: + print("at least one task_id is required", file=sys.stderr) + return 1 + failed: list[str] = [] + with kb.connect() as conn: + for tid in ids: + if not kb.archive_task(conn, tid): + failed.append(tid) + print(f"cannot archive {tid}", file=sys.stderr) + else: + print(f"Archived {tid}") + return 0 if not failed else 1 + + +def _cmd_tail(args: argparse.Namespace) -> int: + last_id = 0 + print(f"Tailing events for {args.task_id}. Ctrl-C to stop.") + try: + while True: + with kb.connect() as conn: + events = kb.list_events(conn, args.task_id) + for e in events: + if e.id > last_id: + pl = f" {e.payload}" if e.payload else "" + print(f"[{_fmt_ts(e.created_at)}] {e.kind}{pl}", flush=True) + last_id = e.id + time.sleep(max(0.1, args.interval)) + except KeyboardInterrupt: + print("\n(stopped)") + return 0 + + +def _cmd_dispatch(args: argparse.Namespace) -> int: + with kb.connect() as conn: + res = kb.dispatch_once( + conn, + dry_run=args.dry_run, + max_spawn=args.max, + failure_limit=getattr(args, "failure_limit", kb.DEFAULT_SPAWN_FAILURE_LIMIT), + ) + if getattr(args, "json", False): + print(json.dumps({ + "reclaimed": res.reclaimed, + "crashed": res.crashed, + "timed_out": res.timed_out, + "auto_blocked": res.auto_blocked, + "promoted": res.promoted, + "spawned": [ + {"task_id": tid, "assignee": who, "workspace": ws} + for (tid, who, ws) in res.spawned + ], + "skipped_unassigned": res.skipped_unassigned, + }, indent=2)) + return 0 + print(f"Reclaimed: {res.reclaimed}") + print(f"Crashed: {len(res.crashed)}") + if res.crashed: + print(f" {', '.join(res.crashed)}") + print(f"Timed out: {len(res.timed_out)}") + if res.timed_out: + print(f" {', '.join(res.timed_out)}") + print(f"Auto-blocked: {len(res.auto_blocked)}") + if res.auto_blocked: + print(f" {', '.join(res.auto_blocked)}") + print(f"Promoted: {res.promoted}") + print(f"Spawned: {len(res.spawned)}") + for tid, who, ws in res.spawned: + tag = " (dry)" if args.dry_run else "" + print(f" - {tid} -> {who} @ {ws or '-'}{tag}") + if res.skipped_unassigned: + print(f"Skipped (unassigned): {', '.join(res.skipped_unassigned)}") + return 0 + + +def _cmd_daemon(args: argparse.Namespace) -> int: + """Deprecated — the dispatcher now runs inside the gateway. + + Left in as a stub so users with the old command in scripts/systemd + units get a clear migration message instead of a cryptic + "no such command" error. A ``--force`` escape hatch keeps the old + standalone daemon alive for the rare edge case where someone truly + cannot run the gateway (e.g. running on a host that forbids + long-lived background services), but the default path exits 2 + with guidance so nobody accidentally keeps running two dispatchers + against the same kanban.db. + """ + # --force lets power users keep the standalone loop for one more + # release cycle. Undocumented in `--help` so nobody discovers it + # casually — intentional. + if not getattr(args, "force", False): + print( + "hermes kanban daemon: DEPRECATED — the dispatcher now runs\n" + "inside the gateway. To use kanban:\n" + "\n" + " hermes gateway start # starts the gateway + embedded dispatcher\n" + "\n" + "Ready tasks will be picked up on the next dispatcher tick\n" + "(default: every 60 seconds). Configure via config.yaml:\n" + "\n" + " kanban:\n" + " dispatch_in_gateway: true # default\n" + " dispatch_interval_seconds: 60\n" + "\n" + "Running both the gateway AND this standalone daemon will\n" + "race for claims. If you truly need the old standalone\n" + "daemon (no gateway available), rerun with --force.", + file=sys.stderr, + ) + return 2 + + # Legacy path — same logic as before, kept behind --force. + # Make sure the DB exists before printing "started" so the user sees the + # correct DB path and any init error surfaces immediately. + kb.init_db() + + pidfile = getattr(args, "pidfile", None) + if pidfile: + try: + Path(pidfile).parent.mkdir(parents=True, exist_ok=True) + Path(pidfile).write_text(str(os.getpid()), encoding="utf-8") + except OSError as exc: + print(f"warning: could not write pidfile {pidfile}: {exc}", file=sys.stderr) + + verbose = bool(getattr(args, "verbose", False)) + print( + f"Kanban dispatcher running STANDALONE via --force " + f"(interval={args.interval}s, pid={os.getpid()}). " + f"Ctrl-C to stop. NOTE: if a gateway is also running with " + f"dispatch_in_gateway=true (default), you have two dispatchers " + f"racing for claims.", + file=sys.stderr, + ) + + # Health telemetry: warn when every tick finds ready work but fails to + # spawn any worker. Catches broken profiles, PATH drift, missing venv, + # credential loss — cases where the per-task circuit breaker auto-blocks + # each task quietly but the operator has no signal that the dispatcher + # itself is dysfunctional. + HEALTH_WINDOW = 6 # ticks (default 30s at interval=5) + health_state = {"bad_ticks": 0, "last_warn_at": 0} + + def _on_tick(res): + ready_pending = bool(res.skipped_unassigned) or _ready_queue_nonempty() + spawned_any = bool(res.spawned) + if ready_pending and not spawned_any: + health_state["bad_ticks"] += 1 + else: + health_state["bad_ticks"] = 0 + # Emit a warning once per HEALTH_WINDOW bad ticks (not every tick) + # so log volume stays bounded while the problem persists. + if health_state["bad_ticks"] >= HEALTH_WINDOW: + now = int(time.time()) + # Rate-limit repeats: at most one warning per 5 minutes. + if now - health_state["last_warn_at"] >= 300: + print( + f"[{_fmt_ts(now)}] WARN dispatcher stuck: " + f"ready queue non-empty for {health_state['bad_ticks']} " + f"consecutive ticks but 0 workers spawned successfully. " + f"Check profile health (venv, PATH, credentials) and " + f"`hermes kanban list --status ready` / " + f"`hermes kanban list --status blocked` for recent " + f"spawn_failed tasks.", + file=sys.stderr, flush=True, + ) + health_state["last_warn_at"] = now + if not verbose: + return + did_work = ( + res.reclaimed or res.crashed or res.timed_out or res.promoted + or res.spawned or res.auto_blocked + ) + if did_work: + print( + f"[{_fmt_ts(int(time.time()))}] " + f"reclaimed={res.reclaimed} crashed={len(res.crashed)} " + f"timed_out={len(res.timed_out)} " + f"promoted={res.promoted} spawned={len(res.spawned)} " + f"auto_blocked={len(res.auto_blocked)}", + flush=True, + ) + + def _ready_queue_nonempty() -> bool: + """Cheap SELECT — just asks whether there's at least one ready + task with an assignee that the dispatcher could have picked up.""" + try: + with kb.connect() as conn: + row = conn.execute( + "SELECT 1 FROM tasks " + "WHERE status = 'ready' AND assignee IS NOT NULL " + " AND claim_lock IS NULL LIMIT 1" + ).fetchone() + return row is not None + except Exception: + return False + + try: + kb.run_daemon( + interval=args.interval, + max_spawn=args.max, + failure_limit=getattr(args, "failure_limit", kb.DEFAULT_SPAWN_FAILURE_LIMIT), + on_tick=_on_tick, + ) + finally: + if pidfile: + try: + Path(pidfile).unlink() + except OSError: + pass + print("(dispatcher stopped)") + return 0 + + +def _cmd_watch(args: argparse.Namespace) -> int: + """Live-stream task_events to the terminal.""" + kinds = ( + {k.strip() for k in args.kinds.split(",") if k.strip()} + if args.kinds else None + ) + cursor = 0 + print("Watching kanban events. Ctrl-C to stop.", flush=True) + # Seed cursor at the latest id so we don't replay history. + with kb.connect() as conn: + row = conn.execute( + "SELECT COALESCE(MAX(id), 0) AS m FROM task_events" + ).fetchone() + cursor = int(row["m"]) + + try: + while True: + with kb.connect() as conn: + rows = conn.execute( + "SELECT e.id, e.task_id, e.kind, e.payload, e.created_at, " + " t.assignee, t.tenant " + "FROM task_events e LEFT JOIN tasks t ON t.id = e.task_id " + "WHERE e.id > ? ORDER BY e.id ASC LIMIT 200", + (cursor,), + ).fetchall() + for r in rows: + cursor = max(cursor, int(r["id"])) + if kinds and r["kind"] not in kinds: + continue + if args.assignee and r["assignee"] != args.assignee: + continue + if args.tenant and r["tenant"] != args.tenant: + continue + try: + payload = json.loads(r["payload"]) if r["payload"] else None + except Exception: + payload = None + pl = f" {payload}" if payload else "" + print( + f"[{_fmt_ts(r['created_at'])}] {r['task_id']:10s} " + f"{r['kind']:18s} (@{r['assignee'] or '-'}){pl}", + flush=True, + ) + time.sleep(max(0.1, args.interval)) + except KeyboardInterrupt: + print("\n(stopped)") + return 0 + + +def _cmd_stats(args: argparse.Namespace) -> int: + with kb.connect() as conn: + stats = kb.board_stats(conn) + if getattr(args, "json", False): + print(json.dumps(stats, indent=2, ensure_ascii=False)) + return 0 + print("By status:") + for k in ("triage", "todo", "ready", "running", "blocked", "done"): + print(f" {k:8s} {stats['by_status'].get(k, 0)}") + if stats["by_assignee"]: + print("\nBy assignee:") + for who, counts in sorted(stats["by_assignee"].items()): + parts = ", ".join(f"{k}={v}" for k, v in sorted(counts.items())) + print(f" {who:20s} {parts}") + age = stats["oldest_ready_age_seconds"] + if age is not None: + print(f"\nOldest ready task age: {int(age)}s") + return 0 + + +def _cmd_notify_subscribe(args: argparse.Namespace) -> int: + with kb.connect() as conn: + if kb.get_task(conn, args.task_id) is None: + print(f"no such task: {args.task_id}", file=sys.stderr) + return 1 + kb.add_notify_sub( + conn, task_id=args.task_id, + platform=args.platform, chat_id=args.chat_id, + thread_id=args.thread_id, user_id=args.user_id, + ) + print(f"Subscribed {args.platform}:{args.chat_id}" + + (f":{args.thread_id}" if args.thread_id else "") + + f" to {args.task_id}") + return 0 + + +def _cmd_notify_list(args: argparse.Namespace) -> int: + with kb.connect() as conn: + subs = kb.list_notify_subs(conn, args.task_id) + if getattr(args, "json", False): + print(json.dumps(subs, indent=2, ensure_ascii=False)) + return 0 + if not subs: + print("(no subscriptions)") + return 0 + for s in subs: + thr = f":{s['thread_id']}" if s.get("thread_id") else "" + print(f" {s['task_id']:10s} {s['platform']}:{s['chat_id']}{thr}" + f" (since event {s['last_event_id']})") + return 0 + + +def _cmd_notify_unsubscribe(args: argparse.Namespace) -> int: + with kb.connect() as conn: + ok = kb.remove_notify_sub( + conn, task_id=args.task_id, + platform=args.platform, chat_id=args.chat_id, + thread_id=args.thread_id, + ) + if not ok: + print("(no such subscription)", file=sys.stderr) + return 1 + print(f"Unsubscribed from {args.task_id}") + return 0 + + +def _cmd_log(args: argparse.Namespace) -> int: + content = kb.read_worker_log(args.task_id, tail_bytes=args.tail) + if content is None: + print(f"(no log for {args.task_id} — task may not have spawned yet)", + file=sys.stderr) + return 1 + sys.stdout.write(content) + if not content.endswith("\n"): + sys.stdout.write("\n") + return 0 + + +def _cmd_runs(args: argparse.Namespace) -> int: + """Show attempt history for a task.""" + with kb.connect() as conn: + runs = kb.list_runs(conn, args.task_id) + if getattr(args, "json", False): + print(json.dumps([ + { + "id": r.id, "profile": r.profile, "status": r.status, + "outcome": r.outcome, "started_at": r.started_at, + "ended_at": r.ended_at, "summary": r.summary, + "error": r.error, "metadata": r.metadata, + "worker_pid": r.worker_pid, "step_key": r.step_key, + } for r in runs + ], indent=2, ensure_ascii=False)) + return 0 + if not runs: + print(f"(no runs yet for {args.task_id})") + return 0 + print(f"{'#':3s} {'OUTCOME':12s} {'PROFILE':16s} {'ELAPSED':>8s} STARTED") + for i, r in enumerate(runs, 1): + end = r.ended_at or int(time.time()) + # Clamp to 0 so NTP backward-jumps don't print negative durations. + elapsed = max(0, end - r.started_at) + if elapsed < 60: + el = f"{elapsed}s" + elif elapsed < 3600: + el = f"{elapsed // 60}m" + else: + el = f"{elapsed / 3600:.1f}h" + outcome = r.outcome or ("(running)" if not r.ended_at else r.status) + print(f"{i:3d} {outcome:12s} {(r.profile or '-'):16s} {el:>8s} {_fmt_ts(r.started_at)}") + if r.summary: + # Indent and truncate long summaries to keep the table readable. + summary = r.summary.splitlines()[0][:100] + print(f" → {summary}") + if r.error: + print(f" ✖ {r.error[:100]}") + return 0 + + +def _cmd_context(args: argparse.Namespace) -> int: + with kb.connect() as conn: + text = kb.build_worker_context(conn, args.task_id) + print(text) + return 0 + + +def _cmd_gc(args: argparse.Namespace) -> int: + """Remove scratch workspaces of archived tasks, prune old events, and + delete old worker logs.""" + import shutil + scratch_root = kb.workspaces_root() + removed_ws = 0 + with kb.connect() as conn: + rows = conn.execute( + "SELECT id, workspace_kind, workspace_path FROM tasks WHERE status = 'archived'" + ).fetchall() + for row in rows: + if row["workspace_kind"] != "scratch": + continue + path = Path(row["workspace_path"] or (scratch_root / row["id"])) + try: + path = path.resolve() + except OSError: + continue + try: + path.relative_to(scratch_root.resolve()) + except ValueError: + # Safety: never delete outside the scratch root. + continue + if path.exists() and path.is_dir(): + shutil.rmtree(path, ignore_errors=True) + removed_ws += 1 + + event_days = getattr(args, "event_retention_days", 30) + log_days = getattr(args, "log_retention_days", 30) + with kb.connect() as conn: + removed_events = kb.gc_events( + conn, older_than_seconds=event_days * 24 * 3600, + ) + removed_logs = kb.gc_worker_logs( + older_than_seconds=log_days * 24 * 3600, + ) + print(f"GC complete: {removed_ws} workspace(s), " + f"{removed_events} event row(s), {removed_logs} log file(s) removed") + return 0 + + +# --------------------------------------------------------------------------- +# Slash-command entry point (used by /kanban from CLI and gateway) +# --------------------------------------------------------------------------- + +def run_slash(rest: str) -> str: + """Execute a ``/kanban …`` string and return captured stdout/stderr. + + ``rest`` is everything after ``/kanban`` (may be empty). Used from + both the interactive CLI (``self._handle_kanban_command``) and the + gateway (``_handle_kanban_command``) so formatting is identical. + """ + import io + import contextlib + + tokens = shlex.split(rest) if rest and rest.strip() else [] + + parser = argparse.ArgumentParser(prog="/kanban", add_help=False) + parser.exit_on_error = False # type: ignore[attr-defined] + sub = parser.add_subparsers(dest="kanban_action") + # Reuse the argparse builder -- call it with a throwaway parent + # subparsers via a wrapping top-level parser. + wrap = argparse.ArgumentParser(prog="/", add_help=False) + wrap.exit_on_error = False # type: ignore[attr-defined] + wrap_sub = wrap.add_subparsers(dest="_top") + build_parser(wrap_sub) + + buf_out = io.StringIO() + buf_err = io.StringIO() + try: + # Prepend the "kanban" token so our top-level subparser routes here. + argv = ["kanban", *tokens] if tokens else ["kanban"] + args = wrap.parse_args(argv) + except SystemExit as exc: + return f"(usage error: {exc})" + except argparse.ArgumentError as exc: + return f"(usage error: {exc})" + + with contextlib.redirect_stdout(buf_out), contextlib.redirect_stderr(buf_err): + try: + kanban_command(args) + except SystemExit: + pass + except Exception as exc: + print(f"error: {exc}", file=sys.stderr) + + out = buf_out.getvalue().rstrip() + err = buf_err.getvalue().rstrip() + if err and out: + return f"{out}\n{err}" + return err if err else (out or "(no output)") diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py new file mode 100644 index 00000000000..1e8be214fb3 --- /dev/null +++ b/hermes_cli/kanban_db.py @@ -0,0 +1,2765 @@ +"""SQLite-backed Kanban board for multi-profile collaboration. + +The board lives at ``$HERMES_HOME/kanban.db`` (profile-agnostic on purpose: +multiple profiles on the same machine all see the same board, which IS the +coordination primitive). + +Schema is intentionally small: tasks, task_links, task_comments, +task_events. The ``workspace_kind`` field decouples coordination from git +worktrees so that research / ops / digital-twin workloads work alongside +coding workloads. See ``docs/hermes-kanban-v1-spec.pdf`` for the full +design specification. + +Concurrency strategy: WAL mode + ``BEGIN IMMEDIATE`` for write +transactions + compare-and-swap (CAS) updates on ``tasks.status`` and +``tasks.claim_lock``. SQLite serializes writers via its WAL lock, so at +most one claimer can win any given task. Losers observe zero affected +rows and move on -- no retry loops, no distributed-lock machinery. +""" + +from __future__ import annotations + +import contextlib +import json +import os +import secrets +import sqlite3 +import sys +import time +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Iterable, Optional + + +# --------------------------------------------------------------------------- +# Constants +# --------------------------------------------------------------------------- + +VALID_STATUSES = {"triage", "todo", "ready", "running", "blocked", "done", "archived"} +VALID_WORKSPACE_KINDS = {"scratch", "worktree", "dir"} + +# A running task's claim is valid for 15 minutes; after that the next +# dispatcher tick reclaims it. Workers that outlive this window should call +# ``heartbeat_claim(task_id)`` periodically. In practice most kanban +# workloads either finish within 15m or set a longer claim explicitly. +DEFAULT_CLAIM_TTL_SECONDS = 15 * 60 + + +# Worker-context caps so build_worker_context() stays bounded on +# pathological boards (retry-heavy tasks, comment storms, giant +# summaries). Values chosen to fit a typical 100k-char LLM prompt with +# plenty of headroom. Each constant is tuned independently so users +# who need to relax one don't have to relax all of them. +_CTX_MAX_PRIOR_ATTEMPTS = 10 # most recent N prior runs shown in full +_CTX_MAX_COMMENTS = 30 # most recent N comments shown in full +_CTX_MAX_FIELD_BYTES = 4 * 1024 # 4 KB per summary/error/metadata/result +_CTX_MAX_BODY_BYTES = 8 * 1024 # 8 KB per task.body (opening post) +_CTX_MAX_COMMENT_BYTES = 2 * 1024 # 2 KB per comment + + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +def kanban_db_path() -> Path: + """Return the path to ``kanban.db`` inside the active HERMES_HOME.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "kanban.db" + + +def workspaces_root() -> Path: + """Return the directory under which ``scratch`` workspaces are created.""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "kanban" / "workspaces" + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + +@dataclass +class Task: + """In-memory view of a row from the ``tasks`` table.""" + + id: str + title: str + body: Optional[str] + assignee: Optional[str] + status: str + priority: int + created_by: Optional[str] + created_at: int + started_at: Optional[int] + completed_at: Optional[int] + workspace_kind: str + workspace_path: Optional[str] + claim_lock: Optional[str] + claim_expires: Optional[int] + tenant: Optional[str] + result: Optional[str] = None + idempotency_key: Optional[str] = None + spawn_failures: int = 0 + worker_pid: Optional[int] = None + last_spawn_error: Optional[str] = None + max_runtime_seconds: Optional[int] = None + last_heartbeat_at: Optional[int] = None + current_run_id: Optional[int] = None + workflow_template_id: Optional[str] = None + current_step_key: Optional[str] = None + # Force-loaded skills for the worker on this task (appended to the + # dispatcher's built-in `kanban-worker` via --skills). Stored as a + # JSON array of skill names. None = use only the defaults; empty + # list = explicitly no extra skills. + skills: Optional[list] = None + + @classmethod + def from_row(cls, row: sqlite3.Row) -> "Task": + keys = set(row.keys()) + # Parse skills JSON blob if present + skills_value: Optional[list] = None + if "skills" in keys and row["skills"]: + try: + parsed = json.loads(row["skills"]) + if isinstance(parsed, list): + skills_value = [str(s) for s in parsed if s] + except Exception: + skills_value = None + return cls( + id=row["id"], + title=row["title"], + body=row["body"], + assignee=row["assignee"], + status=row["status"], + priority=row["priority"], + created_by=row["created_by"], + created_at=row["created_at"], + started_at=row["started_at"], + completed_at=row["completed_at"], + workspace_kind=row["workspace_kind"], + workspace_path=row["workspace_path"], + claim_lock=row["claim_lock"], + claim_expires=row["claim_expires"], + tenant=row["tenant"] if "tenant" in keys else None, + result=row["result"] if "result" in keys else None, + idempotency_key=row["idempotency_key"] if "idempotency_key" in keys else None, + spawn_failures=row["spawn_failures"] if "spawn_failures" in keys else 0, + worker_pid=row["worker_pid"] if "worker_pid" in keys else None, + last_spawn_error=row["last_spawn_error"] if "last_spawn_error" in keys else None, + max_runtime_seconds=( + row["max_runtime_seconds"] if "max_runtime_seconds" in keys else None + ), + last_heartbeat_at=( + row["last_heartbeat_at"] if "last_heartbeat_at" in keys else None + ), + current_run_id=( + row["current_run_id"] if "current_run_id" in keys else None + ), + workflow_template_id=( + row["workflow_template_id"] if "workflow_template_id" in keys else None + ), + current_step_key=( + row["current_step_key"] if "current_step_key" in keys else None + ), + skills=skills_value, + ) + + +@dataclass +class Run: + """In-memory view of a ``task_runs`` row. + + A run is one attempt to execute a task — created on claim, closed + on complete/block/crash/timeout/spawn_failure/reclaim. Multiple runs + per task when retries happen. Carries the claim machinery, PID, + heartbeat, and the structured handoff summary that downstream workers + read via ``build_worker_context``. + """ + + id: int + task_id: str + profile: Optional[str] + step_key: Optional[str] + status: str + claim_lock: Optional[str] + claim_expires: Optional[int] + worker_pid: Optional[int] + max_runtime_seconds: Optional[int] + last_heartbeat_at: Optional[int] + started_at: int + ended_at: Optional[int] + outcome: Optional[str] + summary: Optional[str] + metadata: Optional[dict] + error: Optional[str] + + @classmethod + def from_row(cls, row: sqlite3.Row) -> "Run": + try: + meta = json.loads(row["metadata"]) if row["metadata"] else None + except Exception: + meta = None + return cls( + id=int(row["id"]), + task_id=row["task_id"], + profile=row["profile"], + step_key=row["step_key"], + status=row["status"], + claim_lock=row["claim_lock"], + claim_expires=row["claim_expires"], + worker_pid=row["worker_pid"], + max_runtime_seconds=row["max_runtime_seconds"], + last_heartbeat_at=row["last_heartbeat_at"], + started_at=int(row["started_at"]), + ended_at=(int(row["ended_at"]) if row["ended_at"] is not None else None), + outcome=row["outcome"], + summary=row["summary"], + metadata=meta, + error=row["error"], + ) + + +@dataclass +class Comment: + id: int + task_id: str + author: str + body: str + created_at: int + + +@dataclass +class Event: + id: int + task_id: str + kind: str + payload: Optional[dict] + created_at: int + run_id: Optional[int] = None + + +# --------------------------------------------------------------------------- +# Schema +# --------------------------------------------------------------------------- + +SCHEMA_SQL = """ +CREATE TABLE IF NOT EXISTS tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + body TEXT, + assignee TEXT, + status TEXT NOT NULL, + priority INTEGER DEFAULT 0, + created_by TEXT, + created_at INTEGER NOT NULL, + started_at INTEGER, + completed_at INTEGER, + workspace_kind TEXT NOT NULL DEFAULT 'scratch', + workspace_path TEXT, + claim_lock TEXT, + claim_expires INTEGER, + tenant TEXT, + result TEXT, + idempotency_key TEXT, + spawn_failures INTEGER NOT NULL DEFAULT 0, + worker_pid INTEGER, + last_spawn_error TEXT, + max_runtime_seconds INTEGER, + last_heartbeat_at INTEGER, + -- Pointer into task_runs for the currently-active run (NULL if no + -- run is in-flight). Denormalised for cheap reads. + current_run_id INTEGER, + -- Forward-compat for v2 workflow routing. In v1 the kernel writes + -- these when the task is opted into a template but otherwise ignores + -- them; the dispatcher doesn't consult them for routing yet. + workflow_template_id TEXT, + current_step_key TEXT, + -- Force-loaded skills for the worker on this task, stored as JSON. + -- Appended to the dispatcher's built-in `--skills kanban-worker`. + -- NULL or empty array = no extras. + skills TEXT +); + +CREATE TABLE IF NOT EXISTS task_links ( + parent_id TEXT NOT NULL, + child_id TEXT NOT NULL, + PRIMARY KEY (parent_id, child_id) +); + +CREATE TABLE IF NOT EXISTS task_comments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + author TEXT NOT NULL, + body TEXT NOT NULL, + created_at INTEGER NOT NULL +); + +CREATE TABLE IF NOT EXISTS task_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + run_id INTEGER, + kind TEXT NOT NULL, + payload TEXT, + created_at INTEGER NOT NULL +); + +-- Historical attempt record. Each time the dispatcher claims a task, a +-- new row is created here; claim state, PID, heartbeat, runtime cap, +-- and structured summary all live on the run, not the task. Multiple +-- rows per task id when the task was retried after crash/timeout/block. +-- v2 of the kanban schema will use ``step_key`` to drive per-stage +-- workflow routing; in v1 the column is nullable and unused (kernel +-- ignores it). +CREATE TABLE IF NOT EXISTS task_runs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + profile TEXT, + step_key TEXT, + status TEXT NOT NULL, + -- status: running | done | blocked | crashed | timed_out | failed | released + claim_lock TEXT, + claim_expires INTEGER, + worker_pid INTEGER, + max_runtime_seconds INTEGER, + last_heartbeat_at INTEGER, + started_at INTEGER NOT NULL, + ended_at INTEGER, + outcome TEXT, + -- outcome: completed | blocked | crashed | timed_out | spawn_failed | + -- gave_up | reclaimed | (null while still running) + summary TEXT, + metadata TEXT, + error TEXT +); + +-- Subscription from a gateway source (platform + chat + thread) to a +-- task. The gateway's kanban-notifier watcher tails task_events and +-- pushes ``completed`` / ``blocked`` / ``spawn_auto_blocked`` events to +-- the original requester so human-in-the-loop workflows close the loop. +CREATE TABLE IF NOT EXISTS kanban_notify_subs ( + task_id TEXT NOT NULL, + platform TEXT NOT NULL, + chat_id TEXT NOT NULL, + thread_id TEXT NOT NULL DEFAULT '', + user_id TEXT, + created_at INTEGER NOT NULL, + last_event_id INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (task_id, platform, chat_id, thread_id) +); + +CREATE INDEX IF NOT EXISTS idx_tasks_assignee_status ON tasks(assignee, status); +CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status); +CREATE INDEX IF NOT EXISTS idx_tasks_tenant ON tasks(tenant); +CREATE INDEX IF NOT EXISTS idx_tasks_idempotency ON tasks(idempotency_key); +CREATE INDEX IF NOT EXISTS idx_links_child ON task_links(child_id); +CREATE INDEX IF NOT EXISTS idx_links_parent ON task_links(parent_id); +CREATE INDEX IF NOT EXISTS idx_comments_task ON task_comments(task_id, created_at); +CREATE INDEX IF NOT EXISTS idx_events_task ON task_events(task_id, created_at); +CREATE INDEX IF NOT EXISTS idx_events_run ON task_events(run_id, id); +CREATE INDEX IF NOT EXISTS idx_runs_task ON task_runs(task_id, started_at); +CREATE INDEX IF NOT EXISTS idx_runs_status ON task_runs(status); +CREATE INDEX IF NOT EXISTS idx_notify_task ON kanban_notify_subs(task_id); +""" + + +# --------------------------------------------------------------------------- +# Connection helpers +# --------------------------------------------------------------------------- + +_INITIALIZED_PATHS: set[str] = set() + + +def connect(db_path: Optional[Path] = None) -> sqlite3.Connection: + """Open (and initialize if needed) the kanban DB. + + WAL mode is enabled on every connection; it's a no-op after the first + time but keeps the code robust if the DB file is ever re-created. + + The first connection to a given path auto-runs :func:`init_db` so + fresh installs and test harnesses that construct `connect()` + directly don't have to remember a separate init step. Subsequent + connections skip the schema check via a module-level path cache. + """ + path = db_path or kanban_db_path() + path.parent.mkdir(parents=True, exist_ok=True) + resolved = str(path.resolve()) + needs_init = resolved not in _INITIALIZED_PATHS + conn = sqlite3.connect(str(path), isolation_level=None, timeout=30) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA synchronous=NORMAL") + conn.execute("PRAGMA foreign_keys=ON") + if needs_init: + # Idempotent: runs CREATE TABLE IF NOT EXISTS + the additive + # migrations. Cached so subsequent connect() calls in the same + # process are cheap. + conn.executescript(SCHEMA_SQL) + _migrate_add_optional_columns(conn) + _INITIALIZED_PATHS.add(resolved) + return conn + + +def init_db(db_path: Optional[Path] = None) -> Path: + """Create the schema if it doesn't exist; return the path used. + + Kept as a public entry point so CLI ``hermes kanban init`` and the + daemon have something explicit to call. Unlike :func:`connect`'s + first-time auto-init (which caches by path), ``init_db`` always + re-runs the migration pass. Callers that know the on-disk schema + may have drifted — tests that write legacy event kinds directly, + external tools that upgrade an old DB file — can call this to + force re-migration. + """ + path = db_path or kanban_db_path() + path.parent.mkdir(parents=True, exist_ok=True) + resolved = str(path.resolve()) + # Clear the cache entry so the underlying connect() re-runs the + # schema + migration pass unconditionally. + _INITIALIZED_PATHS.discard(resolved) + with contextlib.closing(connect(path)): + pass + return path + + +def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None: + """Add columns that were introduced after v1 release to legacy DBs. + + Called by ``init_db`` so opening an old DB is always safe. + """ + cols = {row["name"] for row in conn.execute("PRAGMA table_info(tasks)")} + if "tenant" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN tenant TEXT") + if "result" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN result TEXT") + if "idempotency_key" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN idempotency_key TEXT") + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_tasks_idempotency " + "ON tasks(idempotency_key)" + ) + if "spawn_failures" not in cols: + conn.execute( + "ALTER TABLE tasks ADD COLUMN spawn_failures INTEGER NOT NULL DEFAULT 0" + ) + if "worker_pid" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN worker_pid INTEGER") + if "last_spawn_error" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN last_spawn_error TEXT") + if "max_runtime_seconds" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN max_runtime_seconds INTEGER") + if "last_heartbeat_at" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN last_heartbeat_at INTEGER") + if "current_run_id" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN current_run_id INTEGER") + if "workflow_template_id" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN workflow_template_id TEXT") + if "current_step_key" not in cols: + conn.execute("ALTER TABLE tasks ADD COLUMN current_step_key TEXT") + if "skills" not in cols: + # JSON array of skill names the dispatcher force-loads into the + # worker (additive to the built-in `kanban-worker`). NULL is fine + # for existing rows. + conn.execute("ALTER TABLE tasks ADD COLUMN skills TEXT") + + # task_events gained a run_id column; back-fill it as NULL for + # historical events (they predate runs and can't be attributed). + ev_cols = {row["name"] for row in conn.execute("PRAGMA table_info(task_events)")} + if "run_id" not in ev_cols: + conn.execute("ALTER TABLE task_events ADD COLUMN run_id INTEGER") + conn.execute( + "CREATE INDEX IF NOT EXISTS idx_events_run " + "ON task_events(run_id, id)" + ) + + # One-shot backfill: any task that is 'running' before runs existed + # had its claim_lock / claim_expires / worker_pid on the task row. + # Synthesize a matching task_runs row so subsequent end-run / heartbeat + # calls have something to write to. Wrapped in write_txn to serialize + # against any concurrent dispatcher, and the per-row UPDATE uses + # ``current_run_id IS NULL`` as a CAS guard so a racing claim can't + # produce an orphaned row if it interleaves with the backfill pass. + runs_exist = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='task_runs'" + ).fetchone() is not None + if runs_exist: + with write_txn(conn): + inflight = conn.execute( + "SELECT id, assignee, claim_lock, claim_expires, worker_pid, " + " max_runtime_seconds, last_heartbeat_at, started_at " + "FROM tasks " + "WHERE status = 'running' AND current_run_id IS NULL" + ).fetchall() + for row in inflight: + started = row["started_at"] or int(time.time()) + cur = conn.execute( + """ + INSERT INTO task_runs ( + task_id, profile, status, + claim_lock, claim_expires, worker_pid, + max_runtime_seconds, last_heartbeat_at, + started_at + ) VALUES (?, ?, 'running', ?, ?, ?, ?, ?, ?) + """, + ( + row["id"], row["assignee"], row["claim_lock"], + row["claim_expires"], row["worker_pid"], + row["max_runtime_seconds"], row["last_heartbeat_at"], + started, + ), + ) + # CAS: only install the pointer if nothing else claimed + # the task between our SELECT and here (shouldn't happen + # under the write_txn, but belt-and-suspenders). If the + # CAS fails we've got an orphan run_row — mark it + # reclaimed so it doesn't look in-flight. + upd = conn.execute( + "UPDATE tasks SET current_run_id = ? " + "WHERE id = ? AND current_run_id IS NULL", + (cur.lastrowid, row["id"]), + ) + if upd.rowcount != 1: + conn.execute( + "UPDATE task_runs SET status = 'reclaimed', " + " outcome = 'reclaimed', ended_at = ? " + "WHERE id = ?", + (int(time.time()), cur.lastrowid), + ) + + # One-shot event-kind rename pass. The old names ("ready", "priority", + # "spawn_auto_blocked") still worked but were awkward on the wire; + # rename them in-place so existing DBs migrate cleanly. Fires once + # per DB because after the UPDATE no rows match the old kinds. + _EVENT_RENAMES = ( + # (old, new) + ("ready", "promoted"), + ("priority", "reprioritized"), + ("spawn_auto_blocked", "gave_up"), + ) + for old, new in _EVENT_RENAMES: + conn.execute( + "UPDATE task_events SET kind = ? WHERE kind = ?", + (new, old), + ) + + +@contextlib.contextmanager +def write_txn(conn: sqlite3.Connection): + """Context manager for an IMMEDIATE write transaction. + + Use for any multi-statement write (creating a task + link, claiming a + task + recording an event, etc.). A claim CAS inside this context is + atomic -- at most one concurrent writer can succeed. + """ + conn.execute("BEGIN IMMEDIATE") + try: + yield conn + except Exception: + conn.execute("ROLLBACK") + raise + else: + conn.execute("COMMIT") + + +# --------------------------------------------------------------------------- +# ID generation +# --------------------------------------------------------------------------- + +def _new_task_id() -> str: + """Generate a short, URL-safe task id. + + 4 hex bytes = ~4.3B possibilities. At 10k tasks the collision + probability is ~1.2e-5; at 100k it's ~1.2e-3. Previously we used 2 + hex bytes (65k possibilities) which hit the birthday paradox hard: + ~5% collision probability at 1k tasks, ~50% at 10k. Callers that + care about idempotency should pass ``idempotency_key`` to + :func:`create_task` rather than rely on id uniqueness. + """ + return "t_" + secrets.token_hex(4) + + +def _claimer_id() -> str: + """Return a ``host:pid`` string that identifies this claimer.""" + import socket + try: + host = socket.gethostname() or "unknown" + except Exception: + host = "unknown" + return f"{host}:{os.getpid()}" + + +# --------------------------------------------------------------------------- +# Task creation / mutation +# --------------------------------------------------------------------------- + +def create_task( + conn: sqlite3.Connection, + *, + title: str, + body: Optional[str] = None, + assignee: Optional[str] = None, + created_by: Optional[str] = None, + workspace_kind: str = "scratch", + workspace_path: Optional[str] = None, + tenant: Optional[str] = None, + priority: int = 0, + parents: Iterable[str] = (), + triage: bool = False, + idempotency_key: Optional[str] = None, + max_runtime_seconds: Optional[int] = None, + skills: Optional[Iterable[str]] = None, +) -> str: + """Create a new task and optionally link it under parent tasks. + + Returns the new task id. Status is ``ready`` when there are no + parents (or all parents already ``done``), otherwise ``todo``. + If ``triage=True``, status is forced to ``triage`` regardless of + parents — a specifier/triager is expected to promote the task to + ``todo`` once the spec is fleshed out. + + If ``idempotency_key`` is provided and a non-archived task with the + same key already exists, returns the existing task's id instead of + creating a duplicate. Useful for retried webhooks / automation that + should not double-write. + + ``max_runtime_seconds`` caps how long a worker may run before the + dispatcher SIGTERMs (then SIGKILLs after a grace window) and + re-queues the task. ``None`` means no cap (default). + + ``skills`` is an optional list of skill names to force-load into + the worker when dispatched. Stored as JSON; the dispatcher passes + each name to ``hermes --skills ...`` alongside the built-in + ``kanban-worker``. Use this to pin a task to a specialist skill + (e.g. ``skills=["translation"]`` so the worker loads the + translation skill regardless of the profile's default config). + """ + if not title or not title.strip(): + raise ValueError("title is required") + if workspace_kind not in VALID_WORKSPACE_KINDS: + raise ValueError( + f"workspace_kind must be one of {sorted(VALID_WORKSPACE_KINDS)}, " + f"got {workspace_kind!r}" + ) + parents = tuple(p for p in parents if p) + + # Normalise + validate skills: strip whitespace, drop empties, dedupe + # (preserving order). Refuse commas inside a single name so we don't + # invisibly splatter a comma-joined string into one argv slot — the + # `hermes --skills X,Y` comma syntax is handled in the dispatcher, + # not here. + skills_list: Optional[list[str]] = None + if skills is not None: + cleaned: list[str] = [] + seen: set[str] = set() + for s in skills: + if not s: + continue + name = str(s).strip() + if not name: + continue + if "," in name: + raise ValueError( + f"skill name cannot contain comma: {name!r} " + f"(pass a list of separate names instead of a comma-joined string)" + ) + if name in seen: + continue + seen.add(name) + cleaned.append(name) + skills_list = cleaned + + # Idempotency check — return the existing task instead of creating a + # duplicate. Done BEFORE entering write_txn to keep the fast path fast + # and to avoid holding a write lock during the lookup. Race is + # acceptable: two concurrent creators with the same key might both + # insert, at which point both rows exist but the next lookup stabilises. + if idempotency_key: + row = conn.execute( + "SELECT id FROM tasks WHERE idempotency_key = ? " + "AND status != 'archived' " + "ORDER BY created_at DESC LIMIT 1", + (idempotency_key,), + ).fetchone() + if row: + return row["id"] + + now = int(time.time()) + + # Retry once on the extremely unlikely id collision. + for attempt in range(2): + task_id = _new_task_id() + try: + with write_txn(conn): + # Determine initial status from parent status, unless the + # caller is parking this task in triage for a specifier. + if triage: + initial_status = "triage" + else: + initial_status = "ready" + if parents: + missing = _find_missing_parents(conn, parents) + if missing: + raise ValueError(f"unknown parent task(s): {', '.join(missing)}") + # If any parent is not yet done, we're todo. + rows = conn.execute( + "SELECT status FROM tasks WHERE id IN " + "(" + ",".join("?" * len(parents)) + ")", + parents, + ).fetchall() + if any(r["status"] != "done" for r in rows): + initial_status = "todo" + # Even in triage mode we still need to validate parent ids + # so the eventual link rows don't dangle. + if triage and parents: + missing = _find_missing_parents(conn, parents) + if missing: + raise ValueError(f"unknown parent task(s): {', '.join(missing)}") + + conn.execute( + """ + INSERT INTO tasks ( + id, title, body, assignee, status, priority, + created_by, created_at, workspace_kind, workspace_path, + tenant, idempotency_key, max_runtime_seconds, skills + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + task_id, + title.strip(), + body, + assignee, + initial_status, + priority, + created_by, + now, + workspace_kind, + workspace_path, + tenant, + idempotency_key, + int(max_runtime_seconds) if max_runtime_seconds else None, + json.dumps(skills_list) if skills_list is not None else None, + ), + ) + for pid in parents: + conn.execute( + "INSERT OR IGNORE INTO task_links (parent_id, child_id) VALUES (?, ?)", + (pid, task_id), + ) + _append_event( + conn, + task_id, + "created", + { + "assignee": assignee, + "status": initial_status, + "parents": list(parents), + "tenant": tenant, + "skills": list(skills_list) if skills_list else None, + }, + ) + return task_id + except sqlite3.IntegrityError: + if attempt == 1: + raise + # Retry with a fresh id. + continue + raise RuntimeError("unreachable") + + +def _find_missing_parents(conn: sqlite3.Connection, parents: Iterable[str]) -> list[str]: + parents = list(parents) + if not parents: + return [] + placeholders = ",".join("?" * len(parents)) + rows = conn.execute( + f"SELECT id FROM tasks WHERE id IN ({placeholders})", + parents, + ).fetchall() + present = {r["id"] for r in rows} + return [p for p in parents if p not in present] + + +def get_task(conn: sqlite3.Connection, task_id: str) -> Optional[Task]: + row = conn.execute("SELECT * FROM tasks WHERE id = ?", (task_id,)).fetchone() + return Task.from_row(row) if row else None + + +def list_tasks( + conn: sqlite3.Connection, + *, + assignee: Optional[str] = None, + status: Optional[str] = None, + tenant: Optional[str] = None, + include_archived: bool = False, + limit: Optional[int] = None, +) -> list[Task]: + query = "SELECT * FROM tasks WHERE 1=1" + params: list[Any] = [] + if assignee is not None: + query += " AND assignee = ?" + params.append(assignee) + if status is not None: + if status not in VALID_STATUSES: + raise ValueError(f"status must be one of {sorted(VALID_STATUSES)}") + query += " AND status = ?" + params.append(status) + if tenant is not None: + query += " AND tenant = ?" + params.append(tenant) + if not include_archived and status != "archived": + query += " AND status != 'archived'" + query += " ORDER BY priority DESC, created_at ASC" + if limit: + query += f" LIMIT {int(limit)}" + rows = conn.execute(query, params).fetchall() + return [Task.from_row(r) for r in rows] + + +def assign_task(conn: sqlite3.Connection, task_id: str, profile: Optional[str]) -> bool: + """Assign or reassign a task. Returns True on success. + + Refuses to reassign a task that's currently running (claim_lock set). + Reassign after the current run completes if needed. + """ + with write_txn(conn): + row = conn.execute( + "SELECT status, claim_lock FROM tasks WHERE id = ?", (task_id,) + ).fetchone() + if not row: + return False + if row["claim_lock"] is not None and row["status"] == "running": + raise RuntimeError( + f"cannot reassign {task_id}: currently running (claimed). " + "Wait for completion or reclaim the stale lock first." + ) + conn.execute("UPDATE tasks SET assignee = ? WHERE id = ?", (profile, task_id)) + _append_event(conn, task_id, "assigned", {"assignee": profile}) + return True + + +# --------------------------------------------------------------------------- +# Links +# --------------------------------------------------------------------------- + +def link_tasks(conn: sqlite3.Connection, parent_id: str, child_id: str) -> None: + if parent_id == child_id: + raise ValueError("a task cannot depend on itself") + with write_txn(conn): + missing = _find_missing_parents(conn, [parent_id, child_id]) + if missing: + raise ValueError(f"unknown task(s): {', '.join(missing)}") + if _would_cycle(conn, parent_id, child_id): + raise ValueError( + f"linking {parent_id} -> {child_id} would create a cycle" + ) + conn.execute( + "INSERT OR IGNORE INTO task_links (parent_id, child_id) VALUES (?, ?)", + (parent_id, child_id), + ) + # If child was ready but parent is not yet done, demote child to todo. + parent_status = conn.execute( + "SELECT status FROM tasks WHERE id = ?", (parent_id,) + ).fetchone()["status"] + if parent_status != "done": + conn.execute( + "UPDATE tasks SET status = 'todo' WHERE id = ? AND status = 'ready'", + (child_id,), + ) + _append_event( + conn, child_id, "linked", + {"parent": parent_id, "child": child_id}, + ) + + +def _would_cycle(conn: sqlite3.Connection, parent_id: str, child_id: str) -> bool: + """Return True if adding parent->child creates a cycle. + + A cycle exists iff ``parent_id`` is already a descendant of + ``child_id`` via existing parent->child links. We walk downward + from ``child_id`` and check whether we reach ``parent_id``. + """ + seen = set() + stack = [child_id] + while stack: + node = stack.pop() + if node == parent_id: + return True + if node in seen: + continue + seen.add(node) + rows = conn.execute( + "SELECT child_id FROM task_links WHERE parent_id = ?", (node,) + ).fetchall() + stack.extend(r["child_id"] for r in rows) + return False + + +def unlink_tasks(conn: sqlite3.Connection, parent_id: str, child_id: str) -> bool: + with write_txn(conn): + cur = conn.execute( + "DELETE FROM task_links WHERE parent_id = ? AND child_id = ?", + (parent_id, child_id), + ) + if cur.rowcount: + _append_event( + conn, child_id, "unlinked", + {"parent": parent_id, "child": child_id}, + ) + return cur.rowcount > 0 + + +def parent_ids(conn: sqlite3.Connection, task_id: str) -> list[str]: + rows = conn.execute( + "SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id", + (task_id,), + ).fetchall() + return [r["parent_id"] for r in rows] + + +def child_ids(conn: sqlite3.Connection, task_id: str) -> list[str]: + rows = conn.execute( + "SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id", + (task_id,), + ).fetchall() + return [r["child_id"] for r in rows] + + +def parent_results(conn: sqlite3.Connection, task_id: str) -> list[tuple[str, Optional[str]]]: + """Return ``(parent_id, result)`` for every done parent of ``task_id``.""" + rows = conn.execute( + """ + SELECT t.id AS id, t.result AS result + FROM tasks t + JOIN task_links l ON l.parent_id = t.id + WHERE l.child_id = ? AND t.status = 'done' + ORDER BY t.completed_at ASC + """, + (task_id,), + ).fetchall() + return [(r["id"], r["result"]) for r in rows] + + +# --------------------------------------------------------------------------- +# Comments & events +# --------------------------------------------------------------------------- + +def add_comment( + conn: sqlite3.Connection, task_id: str, author: str, body: str +) -> int: + if not body or not body.strip(): + raise ValueError("comment body is required") + if not author or not author.strip(): + raise ValueError("comment author is required") + now = int(time.time()) + with write_txn(conn): + if not conn.execute( + "SELECT 1 FROM tasks WHERE id = ?", (task_id,) + ).fetchone(): + raise ValueError(f"unknown task {task_id}") + cur = conn.execute( + "INSERT INTO task_comments (task_id, author, body, created_at) " + "VALUES (?, ?, ?, ?)", + (task_id, author.strip(), body.strip(), now), + ) + _append_event(conn, task_id, "commented", {"author": author, "len": len(body)}) + return int(cur.lastrowid or 0) + + +def list_comments(conn: sqlite3.Connection, task_id: str) -> list[Comment]: + rows = conn.execute( + "SELECT * FROM task_comments WHERE task_id = ? ORDER BY created_at ASC", + (task_id,), + ).fetchall() + return [ + Comment( + id=r["id"], + task_id=r["task_id"], + author=r["author"], + body=r["body"], + created_at=r["created_at"], + ) + for r in rows + ] + + +def list_events(conn: sqlite3.Connection, task_id: str) -> list[Event]: + rows = conn.execute( + "SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at ASC, id ASC", + (task_id,), + ).fetchall() + out = [] + for r in rows: + try: + payload = json.loads(r["payload"]) if r["payload"] else None + except Exception: + payload = None + out.append( + Event( + id=r["id"], + task_id=r["task_id"], + kind=r["kind"], + payload=payload, + created_at=r["created_at"], + run_id=(int(r["run_id"]) if "run_id" in r.keys() and r["run_id"] is not None else None), + ) + ) + return out + + +def _append_event( + conn: sqlite3.Connection, + task_id: str, + kind: str, + payload: Optional[dict] = None, + *, + run_id: Optional[int] = None, +) -> None: + """Record an event row. Called from within an already-open txn. + + ``run_id`` is optional: pass the current run id so UIs can group + events by attempt. For events that aren't scoped to a single run + (task created/edited/archived, dependency promotion) leave it None + and the row carries NULL. + """ + now = int(time.time()) + pl = json.dumps(payload, ensure_ascii=False) if payload else None + conn.execute( + "INSERT INTO task_events (task_id, run_id, kind, payload, created_at) " + "VALUES (?, ?, ?, ?, ?)", + (task_id, run_id, kind, pl, now), + ) + + +def _end_run( + conn: sqlite3.Connection, + task_id: str, + *, + outcome: str, + summary: Optional[str] = None, + error: Optional[str] = None, + metadata: Optional[dict] = None, + status: Optional[str] = None, +) -> Optional[int]: + """Close the currently-active run for ``task_id`` and clear the pointer. + + ``outcome`` is the semantic result (completed / blocked / crashed / + timed_out / spawn_failed / gave_up / reclaimed). ``status`` is the + run-row status (usually just ``outcome``, but callers can pass it + explicitly). Returns the closed run_id or ``None`` if no active run + existed (e.g. a CLI user calling ``hermes kanban complete`` on a + task that was never claimed). + """ + now = int(time.time()) + row = conn.execute( + "SELECT current_run_id FROM tasks WHERE id = ?", (task_id,), + ).fetchone() + if not row or not row["current_run_id"]: + return None + run_id = int(row["current_run_id"]) + conn.execute( + """ + UPDATE task_runs + SET status = ?, + outcome = ?, + summary = ?, + error = ?, + metadata = ?, + ended_at = ?, + claim_lock = NULL, + claim_expires = NULL, + worker_pid = NULL + WHERE id = ? + AND ended_at IS NULL + """, + ( + status or outcome, + outcome, + summary, + error, + json.dumps(metadata, ensure_ascii=False) if metadata else None, + now, + run_id, + ), + ) + conn.execute( + "UPDATE tasks SET current_run_id = NULL WHERE id = ?", (task_id,), + ) + return run_id + + +def _current_run_id(conn: sqlite3.Connection, task_id: str) -> Optional[int]: + row = conn.execute( + "SELECT current_run_id FROM tasks WHERE id = ?", (task_id,), + ).fetchone() + return int(row["current_run_id"]) if row and row["current_run_id"] else None + + +def _synthesize_ended_run( + conn: sqlite3.Connection, + task_id: str, + *, + outcome: str, + summary: Optional[str] = None, + error: Optional[str] = None, + metadata: Optional[dict] = None, +) -> int: + """Insert a zero-duration, already-closed run row. + + Used when a terminal transition happens on a task that was never + claimed (CLI user calling ``hermes kanban complete + --summary X``, or dashboard "mark done" on a ready task). Without + this, the handoff fields (summary / metadata / error) would be + silently dropped: ``_end_run`` is a no-op because there's no + current run. + + The synthetic run has ``started_at == ended_at == now`` so it + shows up in attempt history as "instant" and doesn't skew elapsed + stats. Caller is responsible for leaving ``current_run_id`` NULL + (or for clearing it elsewhere in the same txn) since this + function does NOT touch the tasks row. + """ + now = int(time.time()) + trow = conn.execute( + "SELECT assignee, current_step_key FROM tasks WHERE id = ?", + (task_id,), + ).fetchone() + profile = trow["assignee"] if trow else None + step_key = trow["current_step_key"] if trow else None + cur = conn.execute( + """ + INSERT INTO task_runs ( + task_id, profile, step_key, + status, outcome, + summary, error, metadata, + started_at, ended_at + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, + ( + task_id, profile, step_key, + outcome, outcome, + summary, error, + json.dumps(metadata, ensure_ascii=False) if metadata else None, + now, now, + ), + ) + return int(cur.lastrowid or 0) + + +# --------------------------------------------------------------------------- +# Dependency resolution (todo -> ready) +# --------------------------------------------------------------------------- + +def recompute_ready(conn: sqlite3.Connection) -> int: + """Promote ``todo`` tasks to ``ready`` when all parents are ``done``. + + Returns the number of tasks promoted. Safe to call inside or outside + an existing transaction; it opens its own IMMEDIATE txn. + """ + promoted = 0 + with write_txn(conn): + todo_rows = conn.execute( + "SELECT id FROM tasks WHERE status = 'todo'" + ).fetchall() + for row in todo_rows: + task_id = row["id"] + parents = conn.execute( + "SELECT t.status FROM tasks t " + "JOIN task_links l ON l.parent_id = t.id " + "WHERE l.child_id = ?", + (task_id,), + ).fetchall() + if all(p["status"] == "done" for p in parents): + conn.execute( + "UPDATE tasks SET status = 'ready' WHERE id = ? AND status = 'todo'", + (task_id,), + ) + _append_event(conn, task_id, "promoted", None) + promoted += 1 + return promoted + + +# --------------------------------------------------------------------------- +# Claim / complete / block +# --------------------------------------------------------------------------- + +def claim_task( + conn: sqlite3.Connection, + task_id: str, + *, + ttl_seconds: int = DEFAULT_CLAIM_TTL_SECONDS, + claimer: Optional[str] = None, +) -> Optional[Task]: + """Atomically transition ``ready -> running``. + + Returns the claimed ``Task`` on success, ``None`` if the task was + already claimed (or is not in ``ready`` status). + """ + now = int(time.time()) + lock = claimer or _claimer_id() + expires = now + int(ttl_seconds) + with write_txn(conn): + # Defensive: if a prior run somehow leaked (invariant violation from + # an unknown code path), close it as 'reclaimed' so we don't strand + # it when the CAS resets the pointer below. No-op when the invariant + # holds (the common case). + stale = conn.execute( + "SELECT current_run_id FROM tasks WHERE id = ? AND status = 'ready'", + (task_id,), + ).fetchone() + if stale and stale["current_run_id"]: + conn.execute( + """ + UPDATE task_runs + SET status = 'reclaimed', outcome = 'reclaimed', + summary = COALESCE(summary, 'invariant recovery on re-claim'), + ended_at = ?, + claim_lock = NULL, claim_expires = NULL, worker_pid = NULL + WHERE id = ? AND ended_at IS NULL + """, + (now, int(stale["current_run_id"])), + ) + cur = conn.execute( + """ + UPDATE tasks + SET status = 'running', + claim_lock = ?, + claim_expires = ?, + started_at = COALESCE(started_at, ?) + WHERE id = ? + AND status = 'ready' + AND claim_lock IS NULL + """, + (lock, expires, now, task_id), + ) + if cur.rowcount != 1: + return None + # Look up the current task row so we can populate the run with + # its assignee / step / runtime cap. + trow = conn.execute( + "SELECT assignee, max_runtime_seconds, current_step_key " + "FROM tasks WHERE id = ?", + (task_id,), + ).fetchone() + run_cur = conn.execute( + """ + INSERT INTO task_runs ( + task_id, profile, step_key, status, + claim_lock, claim_expires, max_runtime_seconds, + started_at + ) VALUES (?, ?, ?, 'running', ?, ?, ?, ?) + """, + ( + task_id, + trow["assignee"] if trow else None, + trow["current_step_key"] if trow else None, + lock, + expires, + trow["max_runtime_seconds"] if trow else None, + now, + ), + ) + run_id = run_cur.lastrowid + conn.execute( + "UPDATE tasks SET current_run_id = ? WHERE id = ?", + (run_id, task_id), + ) + _append_event( + conn, task_id, "claimed", + {"lock": lock, "expires": expires, "run_id": run_id}, + run_id=run_id, + ) + return get_task(conn, task_id) + + +def heartbeat_claim( + conn: sqlite3.Connection, + task_id: str, + *, + ttl_seconds: int = DEFAULT_CLAIM_TTL_SECONDS, + claimer: Optional[str] = None, +) -> bool: + """Extend a running claim. Returns True if we still own it. + + Workers that know they'll exceed 15 minutes should call this every + few minutes to keep ownership. + """ + expires = int(time.time()) + int(ttl_seconds) + lock = claimer or _claimer_id() + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET claim_expires = ? " + "WHERE id = ? AND status = 'running' AND claim_lock = ?", + (expires, task_id, lock), + ) + if cur.rowcount == 1: + run_id = _current_run_id(conn, task_id) + if run_id is not None: + conn.execute( + "UPDATE task_runs SET claim_expires = ? WHERE id = ?", + (expires, run_id), + ) + return True + return False + + +def release_stale_claims(conn: sqlite3.Connection) -> int: + """Reset any ``running`` task whose claim has expired. + + Returns the number of stale claims reclaimed. Safe to call often. + """ + now = int(time.time()) + reclaimed = 0 + with write_txn(conn): + stale = conn.execute( + "SELECT id, claim_lock FROM tasks " + "WHERE status = 'running' AND claim_expires IS NOT NULL AND claim_expires < ?", + (now,), + ).fetchall() + for row in stale: + conn.execute( + "UPDATE tasks SET status = 'ready', claim_lock = NULL, " + "claim_expires = NULL, worker_pid = NULL " + "WHERE id = ? AND status = 'running'", + (row["id"],), + ) + run_id = _end_run( + conn, row["id"], + outcome="reclaimed", status="reclaimed", + error=f"stale_lock={row['claim_lock']}", + ) + _append_event( + conn, row["id"], "reclaimed", + {"stale_lock": row["claim_lock"]}, + run_id=run_id, + ) + reclaimed += 1 + return reclaimed + + +def complete_task( + conn: sqlite3.Connection, + task_id: str, + *, + result: Optional[str] = None, + summary: Optional[str] = None, + metadata: Optional[dict] = None, +) -> bool: + """Transition ``running|ready -> done`` and record ``result``. + + Accepts a task that's merely ``ready`` too, so a manual CLI + completion (``hermes kanban complete ``) works without requiring + a claim/start/complete sequence. + + ``summary`` and ``metadata`` are stored on the closing run (if any) + and surfaced to downstream children via :func:`build_worker_context`. + When ``summary`` is omitted we fall back to ``result`` so single-run + callers don't have to pass both. ``metadata`` is a free-form dict + (e.g. ``{"changed_files": [...], "tests_run": [...]}``) — workers + are encouraged to use it for structured handoff facts. + """ + now = int(time.time()) + with write_txn(conn): + cur = conn.execute( + """ + UPDATE tasks + SET status = 'done', + result = ?, + completed_at = ?, + claim_lock = NULL, + claim_expires= NULL, + worker_pid = NULL + WHERE id = ? + AND status IN ('running', 'ready', 'blocked') + """, + (result, now, task_id), + ) + if cur.rowcount != 1: + return False + run_id = _end_run( + conn, task_id, + outcome="completed", status="done", + summary=summary if summary is not None else result, + metadata=metadata, + ) + # If complete_task was called on a never-claimed task (ready or + # blocked → done with no run in flight), synthesize a + # zero-duration run so the handoff fields are persisted in + # attempt history instead of silently lost. + if run_id is None and (summary or metadata or result): + run_id = _synthesize_ended_run( + conn, task_id, + outcome="completed", + summary=summary if summary is not None else result, + metadata=metadata, + ) + # Carry the handoff summary in the event payload so gateway + # notifiers and dashboard WS consumers can render it without a + # second SQL round-trip. First line only, 400 char cap — the + # full summary stays on the run row. + ev_summary = (summary if summary is not None else result) or "" + ev_summary = ev_summary.strip().splitlines()[0][:400] if ev_summary else "" + _append_event( + conn, task_id, "completed", + { + "result_len": len(result) if result else 0, + "summary": ev_summary or None, + }, + run_id=run_id, + ) + # Recompute ready status for dependents (separate txn so children see done). + recompute_ready(conn) + return True + + +def block_task( + conn: sqlite3.Connection, + task_id: str, + *, + reason: Optional[str] = None, +) -> bool: + """Transition ``running -> blocked``.""" + with write_txn(conn): + cur = conn.execute( + """ + UPDATE tasks + SET status = 'blocked', + claim_lock = NULL, + claim_expires= NULL, + worker_pid = NULL + WHERE id = ? + AND status IN ('running', 'ready') + """, + (task_id,), + ) + if cur.rowcount != 1: + return False + run_id = _end_run( + conn, task_id, + outcome="blocked", status="blocked", + summary=reason, + ) + # Synthesize a run when blocking a never-claimed task so the + # reason is preserved in attempt history. + if run_id is None and reason: + run_id = _synthesize_ended_run( + conn, task_id, + outcome="blocked", + summary=reason, + ) + _append_event(conn, task_id, "blocked", {"reason": reason}, run_id=run_id) + return True + + +def unblock_task(conn: sqlite3.Connection, task_id: str) -> bool: + """Transition ``blocked -> ready``. + + Defensively closes any stale ``current_run_id`` pointer before flipping + status. In the common path (``block_task`` closed the run already) this + is a no-op. If a future or external write left the pointer dangling, + the leaked run is closed as ``reclaimed`` inside the same txn so the + runs invariant (``current_run_id IS NULL`` ⇔ run row in terminal + state) holds for the rest of this function's lifetime. + """ + now = int(time.time()) + with write_txn(conn): + stale = conn.execute( + "SELECT current_run_id FROM tasks WHERE id = ? AND status = 'blocked'", + (task_id,), + ).fetchone() + if stale and stale["current_run_id"]: + conn.execute( + """ + UPDATE task_runs + SET status = 'reclaimed', outcome = 'reclaimed', + summary = COALESCE(summary, 'invariant recovery on unblock'), + ended_at = ?, + claim_lock = NULL, claim_expires = NULL, worker_pid = NULL + WHERE id = ? AND ended_at IS NULL + """, + (now, int(stale["current_run_id"])), + ) + cur = conn.execute( + "UPDATE tasks SET status = 'ready', current_run_id = NULL " + "WHERE id = ? AND status = 'blocked'", + (task_id,), + ) + if cur.rowcount != 1: + return False + _append_event(conn, task_id, "unblocked", None) + return True + + +def archive_task(conn: sqlite3.Connection, task_id: str) -> bool: + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET status = 'archived', " + " claim_lock = NULL, claim_expires = NULL, worker_pid = NULL " + "WHERE id = ? AND status != 'archived'", + (task_id,), + ) + if cur.rowcount != 1: + return False + # If archive happened while a run was still in flight (e.g. user + # archived a running task from the dashboard), close that run with + # outcome='reclaimed' so attempt history isn't orphaned. + run_id = _end_run( + conn, task_id, + outcome="reclaimed", status="reclaimed", + summary="task archived with run still active", + ) + _append_event(conn, task_id, "archived", None, run_id=run_id) + return True + + +# --------------------------------------------------------------------------- +# Workspace resolution +# --------------------------------------------------------------------------- + +def resolve_workspace(task: Task) -> Path: + """Resolve (and create if needed) the workspace for a task. + + - ``scratch``: a fresh dir under ``$HERMES_HOME/kanban/workspaces//``. + - ``dir:``: the path stored in ``workspace_path``. Created + if missing. MUST be absolute — relative paths are rejected to + prevent confused-deputy traversal where ``../../../tmp/attacker`` + resolves against the dispatcher's CWD instead of a meaningful + root. Users who want a HERMES_HOME-relative workspace should + compute the absolute path themselves. + - ``worktree``: a git worktree at ``workspace_path``. Not created + automatically in v1 -- the kanban-worker skill documents + ``git worktree add`` as a worker-side step. Returns the intended path. + + Persist the resolved path back to the task row via ``set_workspace_path`` + so subsequent runs reuse the same directory. + """ + kind = task.workspace_kind or "scratch" + if kind == "scratch": + if task.workspace_path: + # Legacy scratch tasks that were set to an explicit path get the + # same absolute-path guard as dir: — consistent with the + # threat model. + p = Path(task.workspace_path).expanduser() + if not p.is_absolute(): + raise ValueError( + f"task {task.id} has non-absolute workspace_path " + f"{task.workspace_path!r}; workspace paths must be absolute" + ) + else: + p = workspaces_root() / task.id + p.mkdir(parents=True, exist_ok=True) + return p + if kind == "dir": + if not task.workspace_path: + raise ValueError( + f"task {task.id} has workspace_kind=dir but no workspace_path" + ) + p = Path(task.workspace_path).expanduser() + if not p.is_absolute(): + raise ValueError( + f"task {task.id} has non-absolute workspace_path " + f"{task.workspace_path!r}; use an absolute path " + f"(relative paths are ambiguous against the dispatcher's CWD)" + ) + p.mkdir(parents=True, exist_ok=True) + return p + if kind == "worktree": + if not task.workspace_path: + # Default: .worktrees// under CWD. Worker skill creates it. + return Path.cwd() / ".worktrees" / task.id + p = Path(task.workspace_path).expanduser() + if not p.is_absolute(): + raise ValueError( + f"task {task.id} has non-absolute worktree path " + f"{task.workspace_path!r}; use an absolute path" + ) + return p + raise ValueError(f"unknown workspace_kind: {kind}") + + +def set_workspace_path( + conn: sqlite3.Connection, task_id: str, path: Path | str +) -> None: + with write_txn(conn): + conn.execute( + "UPDATE tasks SET workspace_path = ? WHERE id = ?", + (str(path), task_id), + ) + + +# --------------------------------------------------------------------------- +# Dispatcher (one-shot pass) +# --------------------------------------------------------------------------- + +# After this many consecutive `spawn_failed` events on a task, the dispatcher +# stops retrying and parks the task in ``blocked`` with a reason so a human +# can investigate. Prevents the dispatcher from thrashing forever on a task +# whose profile doesn't exist, whose workspace is unmountable, etc. +DEFAULT_SPAWN_FAILURE_LIMIT = 5 + +# Max bytes to keep in a single worker log file. The dispatcher truncates +# and rotates on spawn if the file is larger than this at spawn time. +DEFAULT_LOG_ROTATE_BYTES = 2 * 1024 * 1024 # 2 MiB + + +@dataclass +class DispatchResult: + """Outcome of a single ``dispatch`` pass.""" + + reclaimed: int = 0 + promoted: int = 0 + spawned: list[tuple[str, str, str]] = field(default_factory=list) + """List of ``(task_id, assignee, workspace_path)`` triples.""" + skipped_unassigned: list[str] = field(default_factory=list) + crashed: list[str] = field(default_factory=list) + """Task ids reclaimed because their worker PID disappeared.""" + auto_blocked: list[str] = field(default_factory=list) + """Task ids auto-blocked by the spawn-failure circuit breaker.""" + timed_out: list[str] = field(default_factory=list) + """Task ids whose workers exceeded ``max_runtime_seconds``.""" + + +def _pid_alive(pid: Optional[int]) -> bool: + """Return True if ``pid`` is still running on this host. + + Cross-platform: uses ``os.kill(pid, 0)`` on POSIX and ``OpenProcess`` + on Windows. Returns False for falsy PIDs or on any OS error. + + **Zombie handling (Linux):** ``os.kill(pid, 0)`` succeeds against + zombie processes (post-exit, pre-reap) because the process table + entry still exists. A worker that exits without being reaped by its + parent would stay "alive" to the dispatcher forever. Dispatcher + workers are started via ``start_new_session=True`` + intentional + Popen handle abandonment, so init reaps them quickly — but during + the window between exit and reap, we'd otherwise see stale "alive" + signals. On Linux we additionally peek at ``/proc//status`` + and treat ``State: Z`` as dead. On other POSIX or on Windows the + zombie check is a no-op. + """ + if not pid or pid <= 0: + return False + try: + if hasattr(os, "kill"): + os.kill(int(pid), 0) + except ProcessLookupError: + return False + except PermissionError: + # Process exists, we just can't signal it. + return True + except OSError: + return False + # Still here → kill(0) succeeded. Check for zombie on Linux. + if sys.platform == "linux": + try: + with open(f"/proc/{int(pid)}/status", "r") as f: + for line in f: + if line.startswith("State:"): + # "State:\tZ (zombie)" → dead + if "Z" in line.split(":", 1)[1]: + return False + break + except (FileNotFoundError, PermissionError, OSError): + # proc entry gone → already reaped; treat as dead. + # PermissionError shouldn't happen for our own children but + # be defensive. + pass + return True + + +def heartbeat_worker( + conn: sqlite3.Connection, + task_id: str, + *, + note: Optional[str] = None, +) -> bool: + """Record a ``heartbeat`` event + touch ``last_heartbeat_at``. + + Called by long-running workers as a liveness signal orthogonal to + the PID check. A worker that forks a long-lived child (train loop, + video encode, web crawl) can have its Python still alive while the + actual work process is stuck; periodic heartbeats catch that. + + Returns True on success, False if the task is not in a state that + should be heartbeating (not running, or claim expired). + """ + now = int(time.time()) + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET last_heartbeat_at = ? " + "WHERE id = ? AND status = 'running'", + (now, task_id), + ) + if cur.rowcount != 1: + return False + run_id = _current_run_id(conn, task_id) + if run_id is not None: + conn.execute( + "UPDATE task_runs SET last_heartbeat_at = ? WHERE id = ?", + (now, run_id), + ) + _append_event( + conn, task_id, "heartbeat", + {"note": note} if note else None, + run_id=run_id, + ) + return True + + +def enforce_max_runtime( + conn: sqlite3.Connection, + *, + signal_fn=None, +) -> list[str]: + """Terminate workers whose per-task ``max_runtime_seconds`` has elapsed. + + Sends SIGTERM, waits a short grace window, then SIGKILL. Emits a + ``timed_out`` event and drops the task back to ``ready`` so the next + dispatcher tick re-spawns it — unless the spawn-failure circuit + breaker has already given up, in which case the task stays blocked + where ``_record_spawn_failure`` parked it. + + Runs host-local: only tasks claimed by this host are candidates + (same reasoning as ``detect_crashed_workers``). ``signal_fn`` is a + test hook; defaults to ``os.kill`` on POSIX. + """ + import signal + timed_out: list[str] = [] + now = int(time.time()) + host_prefix = f"{_claimer_id().split(':', 1)[0]}:" + + rows = conn.execute( + "SELECT id, worker_pid, started_at, max_runtime_seconds, claim_lock " + "FROM tasks " + "WHERE status = 'running' AND max_runtime_seconds IS NOT NULL " + " AND started_at IS NOT NULL AND worker_pid IS NOT NULL" + ).fetchall() + for row in rows: + lock = row["claim_lock"] or "" + if not lock.startswith(host_prefix): + continue + elapsed = now - int(row["started_at"]) + if elapsed < int(row["max_runtime_seconds"]): + continue + + pid = int(row["worker_pid"]) + tid = row["id"] + # SIGTERM then SIGKILL. Keep it simple: 5 s grace. Workers that + # want a cleaner shutdown can install their own SIGTERM handler + # before the grace expires. + killed = False + kill = signal_fn if signal_fn is not None else ( + os.kill if hasattr(os, "kill") else None + ) + if kill is not None: + try: + kill(pid, signal.SIGTERM) + except (ProcessLookupError, OSError): + pass + # Short polling wait — no time.sleep on the write txn. + for _ in range(10): + if not _pid_alive(pid): + break + time.sleep(0.5) + if _pid_alive(pid): + try: + kill(pid, signal.SIGKILL) + killed = True + except (ProcessLookupError, OSError): + pass + + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET status = 'ready', claim_lock = NULL, " + "claim_expires = NULL, worker_pid = NULL, " + "last_heartbeat_at = NULL " + "WHERE id = ? AND status = 'running'", + (tid,), + ) + if cur.rowcount == 1: + payload = { + "pid": pid, + "elapsed_seconds": int(elapsed), + "limit_seconds": int(row["max_runtime_seconds"]), + "sigkill": killed, + } + run_id = _end_run( + conn, tid, + outcome="timed_out", status="timed_out", + error=f"elapsed {int(elapsed)}s > limit {int(row['max_runtime_seconds'])}s", + metadata=payload, + ) + _append_event( + conn, tid, "timed_out", payload, run_id=run_id, + ) + timed_out.append(tid) + return timed_out + + +def set_max_runtime( + conn: sqlite3.Connection, + task_id: str, + seconds: Optional[int], +) -> bool: + """Set or clear the per-task max_runtime_seconds. Returns True on + success.""" + with write_txn(conn): + cur = conn.execute( + "UPDATE tasks SET max_runtime_seconds = ? WHERE id = ?", + (int(seconds) if seconds is not None else None, task_id), + ) + return cur.rowcount == 1 + + +def detect_crashed_workers(conn: sqlite3.Connection) -> list[str]: + """Reclaim ``running`` tasks whose worker PID is no longer alive. + + Appends a ``crashed`` event and drops the task back to ``ready``. + Different from ``release_stale_claims``: this checks liveness + immediately rather than waiting for the claim TTL. + + Only considers tasks claimed by *this host* — PIDs from other hosts + are meaningless here. The host-local check is enough because + ``_default_spawn`` always runs the worker on the same host as the + dispatcher (the whole design is single-host). + """ + crashed: list[str] = [] + with write_txn(conn): + rows = conn.execute( + "SELECT id, worker_pid, claim_lock FROM tasks " + "WHERE status = 'running' AND worker_pid IS NOT NULL" + ).fetchall() + host_prefix = f"{_claimer_id().split(':', 1)[0]}:" + for row in rows: + # Only check liveness for claims owned by this host. + lock = row["claim_lock"] or "" + if not lock.startswith(host_prefix): + continue + if _pid_alive(row["worker_pid"]): + continue + cur = conn.execute( + "UPDATE tasks SET status = 'ready', claim_lock = NULL, " + "claim_expires = NULL, worker_pid = NULL " + "WHERE id = ? AND status = 'running'", + (row["id"],), + ) + if cur.rowcount == 1: + run_id = _end_run( + conn, row["id"], + outcome="crashed", status="crashed", + error=f"pid {int(row['worker_pid'])} not alive", + metadata={ + "pid": int(row["worker_pid"]), + "claimer": row["claim_lock"], + }, + ) + _append_event( + conn, row["id"], "crashed", + {"pid": int(row["worker_pid"]), "claimer": row["claim_lock"]}, + run_id=run_id, + ) + crashed.append(row["id"]) + return crashed + + +def _record_spawn_failure( + conn: sqlite3.Connection, + task_id: str, + error: str, + *, + failure_limit: int = DEFAULT_SPAWN_FAILURE_LIMIT, +) -> bool: + """Release the claim, increment the failure counter, maybe auto-block. + + Returns True when the task was auto-blocked (N failures exceeded), + False when it was just released back to ``ready`` for another try. + """ + blocked = False + with write_txn(conn): + row = conn.execute( + "SELECT spawn_failures FROM tasks WHERE id = ?", (task_id,), + ).fetchone() + failures = int(row["spawn_failures"]) + 1 if row else 1 + if failures >= failure_limit: + conn.execute( + "UPDATE tasks SET status = 'blocked', claim_lock = NULL, " + "claim_expires = NULL, worker_pid = NULL, " + "spawn_failures = ?, last_spawn_error = ? " + "WHERE id = ? AND status IN ('running', 'ready')", + (failures, error[:500], task_id), + ) + run_id = _end_run( + conn, task_id, + outcome="gave_up", status="gave_up", + error=error[:500], + metadata={"failures": failures}, + ) + _append_event( + conn, task_id, "gave_up", + {"failures": failures, "error": error[:500]}, + run_id=run_id, + ) + blocked = True + else: + conn.execute( + "UPDATE tasks SET status = 'ready', claim_lock = NULL, " + "claim_expires = NULL, worker_pid = NULL, " + "spawn_failures = ?, last_spawn_error = ? " + "WHERE id = ? AND status = 'running'", + (failures, error[:500], task_id), + ) + run_id = _end_run( + conn, task_id, + outcome="spawn_failed", status="spawn_failed", + error=error[:500], + metadata={"failures": failures}, + ) + _append_event( + conn, task_id, "spawn_failed", + {"error": error[:500], "failures": failures}, + run_id=run_id, + ) + return blocked + + +def _set_worker_pid(conn: sqlite3.Connection, task_id: str, pid: int) -> None: + """Record the spawned child's pid + emit a ``spawned`` event. + + The event's payload carries the pid so a human reading ``hermes kanban + tail`` can correlate log lines with OS-level traces without opening + the drawer. + """ + with write_txn(conn): + conn.execute( + "UPDATE tasks SET worker_pid = ? WHERE id = ?", + (int(pid), task_id), + ) + run_id = _current_run_id(conn, task_id) + if run_id is not None: + conn.execute( + "UPDATE task_runs SET worker_pid = ? WHERE id = ?", + (int(pid), run_id), + ) + _append_event(conn, task_id, "spawned", {"pid": int(pid)}, run_id=run_id) + + +def _clear_spawn_failures(conn: sqlite3.Connection, task_id: str) -> None: + """Reset the failure counter after a successful spawn.""" + with write_txn(conn): + conn.execute( + "UPDATE tasks SET spawn_failures = 0, last_spawn_error = NULL " + "WHERE id = ?", + (task_id,), + ) + + +def dispatch_once( + conn: sqlite3.Connection, + *, + spawn_fn=None, + ttl_seconds: int = DEFAULT_CLAIM_TTL_SECONDS, + dry_run: bool = False, + max_spawn: Optional[int] = None, + failure_limit: int = DEFAULT_SPAWN_FAILURE_LIMIT, +) -> DispatchResult: + """Run one dispatcher tick. + + Steps: + 1. Reclaim stale running tasks (TTL expired). + 2. Reclaim crashed running tasks (host-local PID no longer alive). + 3. Promote todo -> ready where all parents are done. + 4. For each ready task with an assignee, atomically claim and call + ``spawn_fn(task, workspace_path) -> Optional[int]``. The return + value (if any) is recorded as ``worker_pid`` so subsequent ticks + can detect crashes before the TTL expires. + + Spawn failures are counted per-task. After ``failure_limit`` consecutive + failures the task is auto-blocked with the last error as its reason — + prevents the dispatcher from thrashing forever on an unfixable task. + + ``spawn_fn`` defaults to ``_default_spawn``. Tests pass a stub. + """ + result = DispatchResult() + result.reclaimed = release_stale_claims(conn) + result.crashed = detect_crashed_workers(conn) + result.timed_out = enforce_max_runtime(conn) + result.promoted = recompute_ready(conn) + + ready_rows = conn.execute( + "SELECT id, assignee FROM tasks " + "WHERE status = 'ready' AND claim_lock IS NULL " + "ORDER BY priority DESC, created_at ASC" + ).fetchall() + spawned = 0 + for row in ready_rows: + if max_spawn is not None and spawned >= max_spawn: + break + if not row["assignee"]: + result.skipped_unassigned.append(row["id"]) + continue + if dry_run: + result.spawned.append((row["id"], row["assignee"], "")) + continue + claimed = claim_task(conn, row["id"], ttl_seconds=ttl_seconds) + if claimed is None: + continue + try: + workspace = resolve_workspace(claimed) + except Exception as exc: + auto = _record_spawn_failure( + conn, claimed.id, f"workspace: {exc}", + failure_limit=failure_limit, + ) + if auto: + result.auto_blocked.append(claimed.id) + continue + # Persist the resolved workspace path so the worker can cd there. + set_workspace_path(conn, claimed.id, str(workspace)) + _spawn = spawn_fn if spawn_fn is not None else _default_spawn + try: + pid = _spawn(claimed, str(workspace)) + if pid: + _set_worker_pid(conn, claimed.id, int(pid)) + _clear_spawn_failures(conn, claimed.id) + result.spawned.append((claimed.id, claimed.assignee or "", str(workspace))) + spawned += 1 + except Exception as exc: + auto = _record_spawn_failure( + conn, claimed.id, str(exc), + failure_limit=failure_limit, + ) + if auto: + result.auto_blocked.append(claimed.id) + return result + + +def _rotate_worker_log(log_path: Path, max_bytes: int) -> None: + """Rotate ```` to ``.1`` if it exceeds ``max_bytes``. + + Single-generation rotation — one old file kept, newer one replaces it. + Keeps disk usage bounded while still giving the user a chance to grab + the prior run's output. + """ + try: + if not log_path.exists(): + return + if log_path.stat().st_size <= max_bytes: + return + rotated = log_path.with_suffix(log_path.suffix + ".1") + try: + if rotated.exists(): + rotated.unlink() + except OSError: + pass + log_path.rename(rotated) + except OSError: + pass + + +def _default_spawn(task: Task, workspace: str) -> Optional[int]: + """Fire-and-forget ``hermes -p chat -q ...`` subprocess. + + Returns the spawned child's PID so the dispatcher can detect crashes + before the claim TTL expires. The child's completion is still observed + via the ``complete`` / ``block`` transitions the worker writes itself; + the PID check is a safety net for crashes, OOM kills, and Ctrl+C. + """ + import subprocess + if not task.assignee: + raise ValueError(f"task {task.id} has no assignee") + + prompt = f"work kanban task {task.id}" + env = dict(os.environ) + if task.tenant: + env["HERMES_TENANT"] = task.tenant + env["HERMES_KANBAN_TASK"] = task.id + env["HERMES_KANBAN_WORKSPACE"] = workspace + # HERMES_PROFILE is the author the kanban_comment tool defaults to. + # `hermes -p ` activates the profile, but the env var is + # what the tool reads — set it explicitly here so comments are + # attributed correctly regardless of how the child loads config. + env["HERMES_PROFILE"] = task.assignee + + cmd = [ + "hermes", + "-p", task.assignee, + # Auto-load the kanban-worker skill so every dispatched worker + # has the pattern library (good summary/metadata shapes, retry + # diagnostics, block-reason examples) in its context, even if + # the profile hasn't wired it into skills config. The MANDATORY + # lifecycle is already in the system prompt via KANBAN_GUIDANCE; + # this skill is the deeper reference. Users can point a profile + # at a different/additional skill via config if they want — + # --skills is additive to the profile's default skill set. + "--skills", "kanban-worker", + ] + # Per-task force-loaded skills. Each name goes in its own + # `--skills X` pair rather than a single comma-joined arg: the CLI + # accepts both forms (action='append' + comma-split), but + # per-name pairs are easier to read in `ps` output and avoid any + # quoting ambiguity if a skill name ever contains unusual chars. + # Dedupe against the built-in so we don't double-load kanban-worker + # if a task author asks for it explicitly. + if task.skills: + for sk in task.skills: + if sk and sk != "kanban-worker": + cmd.extend(["--skills", sk]) + cmd.extend([ + "chat", + "-q", prompt, + ]) + # Redirect output to a per-task log under HERMES_HOME/kanban/logs/. + from hermes_constants import get_hermes_home + log_dir = get_hermes_home() / "kanban" / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + log_path = log_dir / f"{task.id}.log" + _rotate_worker_log(log_path, DEFAULT_LOG_ROTATE_BYTES) + + # Use 'a' so a re-run on unblock appends rather than overwrites. + log_f = open(log_path, "ab") + try: + proc = subprocess.Popen( # noqa: S603 -- argv is a fixed list built above + cmd, + cwd=workspace if os.path.isdir(workspace) else None, + stdin=subprocess.DEVNULL, + stdout=log_f, + stderr=subprocess.STDOUT, + env=env, + start_new_session=True, + ) + except FileNotFoundError: + log_f.close() + raise RuntimeError( + "`hermes` executable not found on PATH. " + "Install Hermes Agent or activate its venv before running the kanban dispatcher." + ) + # NOTE: we intentionally do NOT close log_f here — we want Popen's + # child process to keep writing after this function returns. The + # handle is kept alive by the child's inheritance. The parent's + # reference goes out of scope and is GC'd, but the OS-level FD stays + # open in the child until the child exits. + return proc.pid + + +# --------------------------------------------------------------------------- +# Long-lived dispatcher daemon +# --------------------------------------------------------------------------- + +def run_daemon( + *, + interval: float = 60.0, + max_spawn: Optional[int] = None, + failure_limit: int = DEFAULT_SPAWN_FAILURE_LIMIT, + stop_event=None, + on_tick=None, +) -> None: + """Run the dispatcher in a loop until interrupted. + + Calls :func:`dispatch_once` every ``interval`` seconds. Exits cleanly + on SIGINT / SIGTERM so ``hermes kanban daemon`` is systemd-friendly. + ``stop_event`` (a :class:`threading.Event`) and ``on_tick`` (a + callable receiving the :class:`DispatchResult`) are test hooks. + """ + import signal + import threading + + if stop_event is None: + stop_event = threading.Event() + + def _handle(_signum, _frame): + stop_event.set() + + # Install handlers only when running on the main thread — tests call + # this inline from worker threads and signal() would raise there. + if threading.current_thread() is threading.main_thread(): + for sig_name in ("SIGINT", "SIGTERM"): + sig = getattr(signal, sig_name, None) + if sig is not None: + try: + signal.signal(sig, _handle) + except (ValueError, OSError): + pass + + while not stop_event.is_set(): + try: + with contextlib.closing(connect()) as conn: + res = dispatch_once( + conn, + max_spawn=max_spawn, + failure_limit=failure_limit, + ) + if on_tick is not None: + try: + on_tick(res) + except Exception: + pass + except Exception: + # Don't let any single tick kill the daemon. + import traceback + traceback.print_exc() + stop_event.wait(timeout=interval) + + +# --------------------------------------------------------------------------- +# Worker context builder (what a spawned worker sees) +# --------------------------------------------------------------------------- + +def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str: + """Return the full text a worker should read to understand its task. + + Order: + 1. Task title (mandatory). + 2. Task body (optional opening post, capped at 8 KB). + 3. Prior attempts on THIS task (most recent ``_CTX_MAX_PRIOR_ATTEMPTS`` + shown; older attempts collapsed into a one-line summary). + Each attempt's ``summary`` / ``error`` / ``metadata`` capped at + ``_CTX_MAX_FIELD_BYTES`` each. + 4. Structured handoff results of every done parent task. Prefers + ``run.summary`` / ``run.metadata`` when the parent was executed + via a run; falls back to ``task.result`` for older data. Same + per-field cap. + 5. Cross-task role history for the assignee (most recent 5 + completed runs on other tasks). + 6. Comment thread (most recent ``_CTX_MAX_COMMENTS`` shown, older + collapsed). + + All caps exist so worker prompts stay bounded even on pathological + boards (retry-heavy tasks, comment storms). The per-field char cap + prevents a single 1 MB summary from dominating context. + """ + task = get_task(conn, task_id) + if not task: + raise ValueError(f"unknown task {task_id}") + + def _cap(s: Optional[str], limit: int = _CTX_MAX_FIELD_BYTES) -> str: + """Truncate a string to `limit` chars with a visible ellipsis.""" + if not s: + return "" + s = s.strip() + if len(s) <= limit: + return s + return s[:limit] + f"… [truncated, {len(s) - limit} chars omitted]" + + lines: list[str] = [] + lines.append(f"# Kanban task {task.id}: {task.title}") + lines.append("") + lines.append(f"Assignee: {task.assignee or '(unassigned)'}") + lines.append(f"Status: {task.status}") + if task.tenant: + lines.append(f"Tenant: {task.tenant}") + lines.append(f"Workspace: {task.workspace_kind} @ {task.workspace_path or '(unresolved)'}") + lines.append("") + + if task.body and task.body.strip(): + lines.append("## Body") + lines.append(_cap(task.body, _CTX_MAX_BODY_BYTES)) + lines.append("") + + # Prior attempts — show closed runs so a retrying worker sees the + # history. Skip the currently-active run (that's this worker). + # Cap at _CTX_MAX_PRIOR_ATTEMPTS most-recent closed runs; older + # attempts get collapsed into a one-line marker so the worker knows + # more exist without bloating the prompt. + all_prior = [r for r in list_runs(conn, task_id) if r.ended_at is not None] + # list_runs returns ascending by started_at; "most recent" = last N + if len(all_prior) > _CTX_MAX_PRIOR_ATTEMPTS: + omitted = len(all_prior) - _CTX_MAX_PRIOR_ATTEMPTS + shown = all_prior[-_CTX_MAX_PRIOR_ATTEMPTS:] + first_shown_idx = omitted + 1 + else: + omitted = 0 + shown = all_prior + first_shown_idx = 1 + if shown: + lines.append("## Prior attempts on this task") + if omitted: + lines.append( + f"_({omitted} earlier attempt{'s' if omitted != 1 else ''} " + f"omitted; showing most recent {len(shown)})_" + ) + for offset, run in enumerate(shown): + idx = first_shown_idx + offset + ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(run.started_at)) + profile = run.profile or "(unknown)" + outcome = run.outcome or run.status + lines.append(f"### Attempt {idx} — {outcome} ({profile}, {ts})") + if run.summary and run.summary.strip(): + lines.append(_cap(run.summary)) + if run.error and run.error.strip(): + lines.append(f"_error_: {_cap(run.error)}") + if run.metadata: + try: + meta_str = json.dumps(run.metadata, ensure_ascii=False, sort_keys=True) + lines.append(f"_metadata_: `{_cap(meta_str)}`") + except Exception: + pass + lines.append("") + + # Parents: prefer the most-recent 'completed' run's summary + metadata, + # fall back to ``task.result`` when no run rows exist (legacy DBs, + # or tasks completed before the runs table landed). + parent_rows = conn.execute( + "SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id", + (task_id,), + ).fetchall() + parent_ids = [r["parent_id"] for r in parent_rows] + + if parent_ids: + wrote_header = False + for pid in parent_ids: + pt = get_task(conn, pid) + if not pt or pt.status != "done": + continue + runs = [r for r in list_runs(conn, pid) if r.outcome == "completed"] + runs.sort(key=lambda r: r.started_at, reverse=True) + run = runs[0] if runs else None + + if not wrote_header: + lines.append("## Parent task results") + wrote_header = True + lines.append(f"### {pid}") + + body_lines: list[str] = [] + if run is not None and run.summary and run.summary.strip(): + body_lines.append(_cap(run.summary)) + elif pt.result: + body_lines.append(_cap(pt.result)) + else: + body_lines.append("(no result recorded)") + + if run is not None and run.metadata: + try: + meta_str = json.dumps(run.metadata, ensure_ascii=False, sort_keys=True) + body_lines.append(f"_metadata_: `{_cap(meta_str)}`") + except Exception: + pass + lines.extend(body_lines) + lines.append("") + + # Cross-task role history: what else has THIS assignee completed + # recently? Gives the worker implicit continuity — "I'm the reviewer + # and my last three reviews focused on security" — without forcing + # the user to wire anything into SOUL.md / MEMORY.md. Bounded to the + # most recent 5 completed runs, excluding this task so the retry + # section above isn't duplicated. Safe on assignee=None (skipped). + if task.assignee: + role_rows = conn.execute( + "SELECT t.id, t.title, r.summary, r.ended_at " + "FROM task_runs r JOIN tasks t ON r.task_id = t.id " + "WHERE r.profile = ? AND r.task_id != ? " + " AND r.outcome = 'completed' " + "ORDER BY r.ended_at DESC LIMIT 5", + (task.assignee, task_id), + ).fetchall() + if role_rows: + lines.append(f"## Recent work by @{task.assignee}") + for row in role_rows: + ts = time.strftime( + "%Y-%m-%d %H:%M", time.localtime(int(row["ended_at"])) + ) + s = (row["summary"] or "").strip().splitlines() + first = s[0][:200] if s else "(no summary)" + lines.append(f"- {row['id']} — {row['title']} ({ts}): {first}") + lines.append("") + + # Comments: cap at the most-recent _CTX_MAX_COMMENTS so + # comment-storm tasks don't blow out the worker's prompt. Older + # comments summarised in a one-line marker like prior attempts. + all_comments = list_comments(conn, task_id) + if len(all_comments) > _CTX_MAX_COMMENTS: + omitted_c = len(all_comments) - _CTX_MAX_COMMENTS + shown_c = all_comments[-_CTX_MAX_COMMENTS:] + else: + omitted_c = 0 + shown_c = all_comments + if shown_c: + lines.append("## Comment thread") + if omitted_c: + lines.append( + f"_({omitted_c} earlier comment{'s' if omitted_c != 1 else ''} " + f"omitted; showing most recent {len(shown_c)})_" + ) + for c in shown_c: + ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(c.created_at)) + lines.append(f"**{c.author}** ({ts}):") + lines.append(_cap(c.body, _CTX_MAX_COMMENT_BYTES)) + lines.append("") + + return "\n".join(lines).rstrip() + "\n" + + +# --------------------------------------------------------------------------- +# Stats + SLA helpers +# --------------------------------------------------------------------------- + +def board_stats(conn: sqlite3.Connection) -> dict: + """Per-status + per-assignee counts, plus the oldest ``ready`` age in + seconds (the clearest staleness signal for a router or HUD). + """ + by_status: dict[str, int] = {} + for row in conn.execute( + "SELECT status, COUNT(*) AS n FROM tasks " + "WHERE status != 'archived' GROUP BY status" + ): + by_status[row["status"]] = int(row["n"]) + + by_assignee: dict[str, dict[str, int]] = {} + for row in conn.execute( + "SELECT assignee, status, COUNT(*) AS n FROM tasks " + "WHERE status != 'archived' AND assignee IS NOT NULL " + "GROUP BY assignee, status" + ): + by_assignee.setdefault(row["assignee"], {})[row["status"]] = int(row["n"]) + + oldest_row = conn.execute( + "SELECT MIN(created_at) AS ts FROM tasks WHERE status = 'ready'" + ).fetchone() + now = int(time.time()) + oldest_ready_age = ( + (now - int(oldest_row["ts"])) + if oldest_row and oldest_row["ts"] is not None else None + ) + + return { + "by_status": by_status, + "by_assignee": by_assignee, + "oldest_ready_age_seconds": oldest_ready_age, + "now": now, + } + + +def task_age(task: Task) -> dict: + """Return age metrics for a single task. All values are seconds or None.""" + now = int(time.time()) + age_since_created = now - int(task.created_at) if task.created_at else None + age_since_started = ( + now - int(task.started_at) if task.started_at else None + ) + time_to_complete = ( + int(task.completed_at) - int(task.started_at or task.created_at) + if task.completed_at else None + ) + return { + "created_age_seconds": age_since_created, + "started_age_seconds": age_since_started, + "time_to_complete_seconds": time_to_complete, + } + + +# --------------------------------------------------------------------------- +# Notification subscriptions (used by the gateway kanban-notifier) +# --------------------------------------------------------------------------- + +def add_notify_sub( + conn: sqlite3.Connection, + *, + task_id: str, + platform: str, + chat_id: str, + thread_id: Optional[str] = None, + user_id: Optional[str] = None, +) -> None: + """Register a gateway source that wants terminal-state notifications + for ``task_id``. Idempotent on (task, platform, chat, thread).""" + now = int(time.time()) + with write_txn(conn): + conn.execute( + """ + INSERT OR IGNORE INTO kanban_notify_subs + (task_id, platform, chat_id, thread_id, user_id, created_at) + VALUES (?, ?, ?, ?, ?, ?) + """, + (task_id, platform, chat_id, thread_id or "", user_id, now), + ) + + +def list_notify_subs( + conn: sqlite3.Connection, task_id: Optional[str] = None, +) -> list[dict]: + if task_id is not None: + rows = conn.execute( + "SELECT * FROM kanban_notify_subs WHERE task_id = ?", (task_id,), + ).fetchall() + else: + rows = conn.execute("SELECT * FROM kanban_notify_subs").fetchall() + return [dict(r) for r in rows] + + +def remove_notify_sub( + conn: sqlite3.Connection, + *, + task_id: str, + platform: str, + chat_id: str, + thread_id: Optional[str] = None, +) -> bool: + with write_txn(conn): + cur = conn.execute( + "DELETE FROM kanban_notify_subs WHERE task_id = ? " + "AND platform = ? AND chat_id = ? AND thread_id = ?", + (task_id, platform, chat_id, thread_id or ""), + ) + return cur.rowcount > 0 + + +def unseen_events_for_sub( + conn: sqlite3.Connection, + *, + task_id: str, + platform: str, + chat_id: str, + thread_id: Optional[str] = None, + kinds: Optional[Iterable[str]] = None, +) -> tuple[int, list[Event]]: + """Return ``(new_cursor, events)`` for a given subscription. + + Only events with ``id > last_event_id`` are returned. The subscription's + cursor is NOT advanced here; call :func:`advance_notify_cursor` after + the gateway has successfully delivered the notifications. + """ + row = conn.execute( + "SELECT last_event_id FROM kanban_notify_subs " + "WHERE task_id = ? AND platform = ? AND chat_id = ? AND thread_id = ?", + (task_id, platform, chat_id, thread_id or ""), + ).fetchone() + if row is None: + return 0, [] + cursor = int(row["last_event_id"]) + kind_list = list(kinds) if kinds else None + q = ( + "SELECT * FROM task_events WHERE task_id = ? AND id > ? " + + ("AND kind IN (" + ",".join("?" * len(kind_list)) + ") " if kind_list else "") + + "ORDER BY id ASC" + ) + params: list[Any] = [task_id, cursor] + if kind_list: + params.extend(kind_list) + rows = conn.execute(q, params).fetchall() + out: list[Event] = [] + max_id = cursor + for r in rows: + try: + payload = json.loads(r["payload"]) if r["payload"] else None + except Exception: + payload = None + out.append(Event( + id=r["id"], task_id=r["task_id"], kind=r["kind"], + payload=payload, created_at=r["created_at"], + run_id=(int(r["run_id"]) if "run_id" in r.keys() and r["run_id"] is not None else None), + )) + max_id = max(max_id, int(r["id"])) + return max_id, out + + +def advance_notify_cursor( + conn: sqlite3.Connection, + *, + task_id: str, + platform: str, + chat_id: str, + thread_id: Optional[str] = None, + new_cursor: int, +) -> None: + with write_txn(conn): + conn.execute( + "UPDATE kanban_notify_subs SET last_event_id = ? " + "WHERE task_id = ? AND platform = ? AND chat_id = ? AND thread_id = ?", + (int(new_cursor), task_id, platform, chat_id, thread_id or ""), + ) + + +# --------------------------------------------------------------------------- +# Retention + garbage collection +# --------------------------------------------------------------------------- + +def gc_events( + conn: sqlite3.Connection, *, older_than_seconds: int = 30 * 24 * 3600, +) -> int: + """Delete task_events rows older than ``older_than_seconds`` for tasks + in a terminal state (``done`` or ``archived``). Returns the number of + rows deleted. Running / ready / blocked tasks keep their full event + history.""" + cutoff = int(time.time()) - int(older_than_seconds) + with write_txn(conn): + cur = conn.execute( + "DELETE FROM task_events WHERE created_at < ? AND task_id IN " + "(SELECT id FROM tasks WHERE status IN ('done', 'archived'))", + (cutoff,), + ) + return int(cur.rowcount or 0) + + +def gc_worker_logs( + *, older_than_seconds: int = 30 * 24 * 3600, +) -> int: + """Delete worker log files older than ``older_than_seconds``. Returns + the number of files removed. Kept separate from ``gc_events`` because + log files live on disk, not in SQLite.""" + from hermes_constants import get_hermes_home + log_dir = get_hermes_home() / "kanban" / "logs" + if not log_dir.exists(): + return 0 + cutoff = time.time() - older_than_seconds + removed = 0 + for p in log_dir.iterdir(): + try: + if p.is_file() and p.stat().st_mtime < cutoff: + p.unlink() + removed += 1 + except OSError: + continue + return removed + + +# --------------------------------------------------------------------------- +# Worker log accessor +# --------------------------------------------------------------------------- + +def worker_log_path(task_id: str) -> Path: + """Return the path to a worker's log file. The file may not exist + (task never spawned, or log already GC'd).""" + from hermes_constants import get_hermes_home + return get_hermes_home() / "kanban" / "logs" / f"{task_id}.log" + + +def read_worker_log( + task_id: str, *, tail_bytes: Optional[int] = None, +) -> Optional[str]: + """Read the worker log for ``task_id``. Returns None if the file + doesn't exist. If ``tail_bytes`` is set, only the last N bytes are + returned (useful for the dashboard drawer which shouldn't page megabytes).""" + path = worker_log_path(task_id) + if not path.exists(): + return None + try: + if tail_bytes is None: + return path.read_text(encoding="utf-8", errors="replace") + size = path.stat().st_size + with open(path, "rb") as f: + if size > tail_bytes: + f.seek(size - tail_bytes) + # Skip a partial line if we tailed mid-line. But if the + # window has no newline at all (one giant log line), + # readline() would eat everything — in that case don't + # skip and return the raw tail. + probe = f.tell() + partial = f.readline() + if not partial.endswith(b"\n") and f.tell() >= size: + f.seek(probe) + data = f.read() + return data.decode("utf-8", errors="replace") + except OSError: + return None + + +# --------------------------------------------------------------------------- +# Assignee enumeration (known profiles + per-profile board stats) +# --------------------------------------------------------------------------- + +def list_profiles_on_disk() -> list[str]: + """Return the set of named profiles discovered on disk. + + Reads ``~/.hermes/profiles/`` directly so this module has no import + dependency on ``hermes_cli.profiles`` (which pulls in a large chunk + of the CLI startup path). Only returns directories that contain a + ``config.yaml`` — a bare dir without config isn't a real profile. + """ + try: + home = Path.home() / ".hermes" / "profiles" + except Exception: + return [] + if not home.is_dir(): + return [] + names: list[str] = [] + try: + for entry in sorted(home.iterdir()): + if not entry.is_dir(): + continue + if (entry / "config.yaml").is_file(): + names.append(entry.name) + except OSError: + return names + return names + + +def known_assignees(conn: sqlite3.Connection) -> list[dict]: + """Return every assignee name known to the board or on disk. + + Each entry is ``{"name": str, "on_disk": bool, "counts": {status: n}}``. + A name is included when it's a configured profile on disk OR when + any non-archived task has it as the assignee. Used by: + + - ``hermes kanban assignees`` for the terminal. + - The dashboard assignee dropdown (so a fresh profile appears in + the picker even before it's been given any task). + - Router-profile heuristics ("who's overloaded?") without scanning + the whole board. + """ + on_disk = set(list_profiles_on_disk()) + + # Count tasks per (assignee, status), excluding archived. + counts: dict[str, dict[str, int]] = {} + for row in conn.execute( + "SELECT assignee, status, COUNT(*) AS n FROM tasks " + "WHERE status != 'archived' AND assignee IS NOT NULL " + "GROUP BY assignee, status" + ): + counts.setdefault(row["assignee"], {})[row["status"]] = int(row["n"]) + + names = sorted(on_disk | set(counts.keys())) + return [ + { + "name": name, + "on_disk": name in on_disk, + "counts": counts.get(name, {}), + } + for name in names + ] + + +# --------------------------------------------------------------------------- +# Runs (attempt history on a task) +# --------------------------------------------------------------------------- + +def list_runs( + conn: sqlite3.Connection, + task_id: str, + *, + include_active: bool = True, +) -> list[Run]: + """Return all runs for ``task_id`` in start order. + + ``include_active=True`` (default) includes the currently-running + attempt if any. Set False to return only closed runs (useful for + "how many prior attempts have there been?" checks). + """ + q = "SELECT * FROM task_runs WHERE task_id = ?" + params: list[Any] = [task_id] + if not include_active: + q += " AND ended_at IS NOT NULL" + q += " ORDER BY started_at ASC, id ASC" + rows = conn.execute(q, params).fetchall() + return [Run.from_row(r) for r in rows] + + +def get_run(conn: sqlite3.Connection, run_id: int) -> Optional[Run]: + row = conn.execute( + "SELECT * FROM task_runs WHERE id = ?", (int(run_id),), + ).fetchone() + return Run.from_row(row) if row else None + + +def active_run(conn: sqlite3.Connection, task_id: str) -> Optional[Run]: + """Return the currently-open run for ``task_id`` (``ended_at IS NULL``).""" + row = conn.execute( + "SELECT * FROM task_runs WHERE task_id = ? AND ended_at IS NULL " + "ORDER BY started_at DESC LIMIT 1", + (task_id,), + ).fetchone() + return Run.from_row(row) if row else None + + +def latest_run(conn: sqlite3.Connection, task_id: str) -> Optional[Run]: + """Return the most recent run regardless of outcome (active or closed).""" + row = conn.execute( + "SELECT * FROM task_runs WHERE task_id = ? " + "ORDER BY started_at DESC, id DESC LIMIT 1", + (task_id,), + ).fetchone() + return Run.from_row(row) if row else None diff --git a/hermes_cli/main.py b/hermes_cli/main.py index bdbf0390a68..3ef85d45415 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -5041,6 +5041,13 @@ def cmd_slack(args): return 1 +def cmd_kanban(args): + """Multi-profile collaboration board.""" + from hermes_cli.kanban import kanban_command + + return kanban_command(args) + + def cmd_hooks(args): """Shell-hook inspection and management.""" from hermes_cli.hooks import hooks_command @@ -8640,6 +8647,13 @@ def main(): webhook_parser.set_defaults(func=cmd_webhook) + # ========================================================================= + # kanban command — multi-profile collaboration board + # ========================================================================= + from hermes_cli.kanban import build_parser as _build_kanban_parser + kanban_parser = _build_kanban_parser(subparsers) + kanban_parser.set_defaults(func=cmd_kanban) + # ========================================================================= # hooks command — shell-hook inspection and management # ========================================================================= diff --git a/plugins/kanban/dashboard/dist/index.js b/plugins/kanban/dashboard/dist/index.js new file mode 100644 index 00000000000..2f6aab07cf7 --- /dev/null +++ b/plugins/kanban/dashboard/dist/index.js @@ -0,0 +1,1601 @@ +/** + * Hermes Kanban — Dashboard Plugin + * + * Board view for the multi-agent collaboration board backed by + * ~/.hermes/kanban.db. Calls the plugin's backend at /api/plugins/kanban/ + * and tails task_events over a WebSocket for live updates. + * + * Plain IIFE, no build step. Uses window.__HERMES_PLUGIN_SDK__ for React + + * shadcn primitives; HTML5 drag-and-drop for card movement on desktop and + * a pointer-based fallback for touch. + */ +(function () { + "use strict"; + + const SDK = window.__HERMES_PLUGIN_SDK__; + if (!SDK) return; + + const { React } = SDK; + const h = React.createElement; + const { + Card, CardContent, + Badge, Button, Input, Label, Select, SelectOption, + } = SDK.components; + const { useState, useEffect, useCallback, useMemo, useRef } = SDK.hooks; + const { cn, timeAgo } = SDK.utils; + + // Order matches BOARD_COLUMNS in plugin_api.py. + const COLUMN_ORDER = ["triage", "todo", "ready", "running", "blocked", "done"]; + const COLUMN_LABEL = { + triage: "Triage", + todo: "Todo", + ready: "Ready", + running: "In Progress", + blocked: "Blocked", + done: "Done", + archived: "Archived", + }; + const COLUMN_HELP = { + triage: "Raw ideas — a specifier will flesh out the spec", + todo: "Waiting on dependencies or unassigned", + ready: "Assigned and waiting for a dispatcher tick", + running: "Claimed by a worker — in-flight", + blocked: "Worker asked for human input", + done: "Completed", + archived: "Archived", + }; + const COLUMN_DOT = { + triage: "hermes-kanban-dot-triage", + todo: "hermes-kanban-dot-todo", + ready: "hermes-kanban-dot-ready", + running: "hermes-kanban-dot-running", + blocked: "hermes-kanban-dot-blocked", + done: "hermes-kanban-dot-done", + archived: "hermes-kanban-dot-archived", + }; + + const DESTRUCTIVE_TRANSITIONS = { + done: "Mark this task as done? The worker's claim is released and dependent children become ready.", + archived: "Archive this task? It disappears from the default board view.", + blocked: "Mark this task as blocked? The worker's claim is released.", + }; + + const API = "/api/plugins/kanban"; + const MIME_TASK = "text/x-hermes-task"; + + // ------------------------------------------------------------------------- + // Minimal safe markdown renderer. + // + // Recognises a small subset (headings, bold, italic, inline code, fenced + // code, links, bullet lists, paragraphs). HTML escaping first, then + // inline replacements against the escaped string — no raw HTML from the + // user is ever executed. + // ------------------------------------------------------------------------- + + function escapeHtml(s) { + return String(s) + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"); + } + function renderInline(esc) { + // Fenced code has already been extracted before this runs; process + // inline replacements on the escaped string. + return esc + // inline code + .replace(/`([^`\n]+)`/g, (_m, c) => `${c}`) + // bold + .replace(/\*\*([^*\n]+)\*\*/g, "$1") + // italic + .replace(/(^|[^*])\*([^*\n]+)\*/g, "$1$2") + // safe links — only http(s) and mailto + .replace( + /\[([^\]\n]+)\]\((https?:\/\/[^\s)]+|mailto:[^\s)]+)\)/g, + (_m, text, href) => + `${text}`, + ); + } + function renderMarkdown(src) { + if (!src) return ""; + // Split out fenced code blocks first so their contents aren't mangled. + const blocks = []; + let working = String(src).replace(/```([\s\S]*?)```/g, (_m, code) => { + blocks.push(code); + return `\u0000CODE${blocks.length - 1}\u0000`; + }); + const escaped = escapeHtml(working); + const lines = escaped.split(/\r?\n/); + const out = []; + let inList = false; + for (const raw of lines) { + const line = raw; + const bullet = /^\s*[-*]\s+(.*)$/.exec(line); + const heading = /^(#{1,4})\s+(.*)$/.exec(line); + if (bullet) { + if (!inList) { out.push("
    "); inList = true; } + out.push(`
  • ${renderInline(bullet[1])}
  • `); + continue; + } + if (inList) { out.push("
"); inList = false; } + if (heading) { + const level = heading[1].length; + out.push(`${renderInline(heading[2])}`); + } else if (line.trim() === "") { + out.push(""); + } else { + out.push(`

${renderInline(line)}

`); + } + } + if (inList) out.push(""); + let html = out.join("\n"); + // Re-insert fenced code blocks. + html = html.replace(/\u0000CODE(\d+)\u0000/g, (_m, i) => + `
${escapeHtml(blocks[Number(i)])}
`, + ); + return html; + } + + function MarkdownBlock(props) { + const enabled = props.enabled !== false; + if (!enabled) { + return h("pre", { className: "hermes-kanban-pre" }, props.source || ""); + } + return h("div", { + className: "hermes-kanban-md", + dangerouslySetInnerHTML: { __html: renderMarkdown(props.source || "") }, + }); + } + + // ------------------------------------------------------------------------- + // Touch drag-drop helper. + // + // HTML5 DnD is desktop-only. On touch devices we attach a pointerdown + // handler that simulates a drag proxy and fires a custom event on the + // column under the finger when released. Columns listen for both the + // standard `drop` event and our `hermes-kanban:drop` event. + // ------------------------------------------------------------------------- + + function attachTouchDrag(el, taskId) { + if (!el) return; + function onDown(e) { + if (e.pointerType !== "touch") return; + e.preventDefault(); + const proxy = el.cloneNode(true); + proxy.classList.add("hermes-kanban-touch-proxy"); + document.body.appendChild(proxy); + let lastTarget = null; + + function move(ev) { + proxy.style.left = `${ev.clientX - proxy.offsetWidth / 2}px`; + proxy.style.top = `${ev.clientY - 24}px`; + proxy.style.display = "none"; + const under = document.elementFromPoint(ev.clientX, ev.clientY); + proxy.style.display = ""; + const col = under && under.closest && under.closest("[data-kanban-column]"); + if (col !== lastTarget) { + if (lastTarget) lastTarget.classList.remove("hermes-kanban-column--drop"); + if (col) col.classList.add("hermes-kanban-column--drop"); + lastTarget = col; + } + } + function up() { + document.removeEventListener("pointermove", move); + document.removeEventListener("pointerup", up); + document.removeEventListener("pointercancel", up); + if (lastTarget) { + lastTarget.classList.remove("hermes-kanban-column--drop"); + const status = lastTarget.getAttribute("data-kanban-column"); + lastTarget.dispatchEvent(new CustomEvent("hermes-kanban:drop", { + detail: { taskId, status }, + bubbles: true, + })); + } + proxy.remove(); + } + // Kick off proxy at the pointer origin. + proxy.style.position = "fixed"; + proxy.style.pointerEvents = "none"; + proxy.style.opacity = "0.85"; + proxy.style.zIndex = "9999"; + proxy.style.width = `${el.offsetWidth}px`; + proxy.style.left = `${e.clientX - el.offsetWidth / 2}px`; + proxy.style.top = `${e.clientY - 24}px`; + document.addEventListener("pointermove", move); + document.addEventListener("pointerup", up); + document.addEventListener("pointercancel", up); + } + el.addEventListener("pointerdown", onDown); + return function () { el.removeEventListener("pointerdown", onDown); }; + } + + // ------------------------------------------------------------------------- + // Error boundary + // ------------------------------------------------------------------------- + + class ErrorBoundary extends React.Component { + constructor(props) { super(props); this.state = { error: null }; } + static getDerivedStateFromError(error) { return { error }; } + componentDidCatch(error, info) { + // eslint-disable-next-line no-console + console.error("Kanban plugin crashed:", error, info); + } + render() { + if (this.state.error) { + return h(Card, null, + h(CardContent, { className: "p-6 text-sm" }, + h("div", { className: "text-destructive font-semibold mb-1" }, + "Kanban tab hit a rendering error"), + h("div", { className: "text-muted-foreground text-xs mb-3" }, + String(this.state.error && this.state.error.message || this.state.error)), + h(Button, { + onClick: () => this.setState({ error: null }), + className: "h-7 px-3 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Reload view"), + ), + ); + } + return this.props.children; + } + } + + // ------------------------------------------------------------------------- + // Root page + // ------------------------------------------------------------------------- + + function KanbanPage() { + const [board, setBoard] = useState(null); + const [config, setConfig] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + const [tenantFilter, setTenantFilter] = useState(""); + const [assigneeFilter, setAssigneeFilter] = useState(""); + const [includeArchived, setIncludeArchived] = useState(false); + const [search, setSearch] = useState(""); + const [laneByProfile, setLaneByProfile] = useState(true); + const [configApplied, setConfigApplied] = useState(false); + + const [selectedTaskId, setSelectedTaskId] = useState(null); + const [selectedIds, setSelectedIds] = useState(() => new Set()); + // Per-task event counter incremented whenever the WS stream reports + // a new event for that task id. TaskDrawer useEffect-depends on its + // own task's counter so it reloads itself on live events instead of + // showing stale data. + const [taskEventTick, setTaskEventTick] = useState({}); + + const cursorRef = useRef(0); + const reloadTimerRef = useRef(null); + const wsRef = useRef(null); + const wsBackoffRef = useRef(1000); + const wsClosedRef = useRef(false); + + // --- load config once --------------------------------------------------- + useEffect(function () { + SDK.fetchJSON(`${API}/config`) + .then(function (c) { + setConfig(c); + if (!configApplied) { + if (c.default_tenant) setTenantFilter(c.default_tenant); + if (typeof c.lane_by_profile === "boolean") setLaneByProfile(c.lane_by_profile); + if (typeof c.include_archived_by_default === "boolean") setIncludeArchived(c.include_archived_by_default); + setConfigApplied(true); + } + }) + .catch(function () { setConfig({ render_markdown: true }); }); + }, []); // eslint-disable-line react-hooks/exhaustive-deps + + // --- fetch full board --------------------------------------------------- + const loadBoard = useCallback(() => { + const qs = new URLSearchParams(); + if (tenantFilter) qs.set("tenant", tenantFilter); + if (includeArchived) qs.set("include_archived", "true"); + const url = qs.toString() ? `${API}/board?${qs}` : `${API}/board`; + return SDK.fetchJSON(url) + .then(function (data) { + setBoard(data); + cursorRef.current = data.latest_event_id || 0; + setError(null); + }) + .catch(function (err) { + setError(String(err && err.message ? err.message : err)); + }) + .finally(function () { setLoading(false); }); + }, [tenantFilter, includeArchived]); + + const scheduleReload = useCallback(function () { + if (reloadTimerRef.current) return; + reloadTimerRef.current = setTimeout(function () { + reloadTimerRef.current = null; + loadBoard(); + }, 250); + }, [loadBoard]); + + useEffect(function () { + loadBoard(); + return function () { + if (reloadTimerRef.current) { + clearTimeout(reloadTimerRef.current); + reloadTimerRef.current = null; + } + }; + }, [loadBoard]); + + // --- WebSocket --------------------------------------------------------- + useEffect(function () { + if (!board) return undefined; + wsClosedRef.current = false; + function openWs() { + if (wsClosedRef.current) return; + const token = window.__HERMES_SESSION_TOKEN__ || ""; + const proto = window.location.protocol === "https:" ? "wss:" : "ws:"; + const qs = new URLSearchParams({ + since: String(cursorRef.current || 0), + token: token, + }); + const url = `${proto}//${window.location.host}${API}/events?${qs}`; + let ws; + try { ws = new WebSocket(url); } catch (_e) { return; } + wsRef.current = ws; + ws.onopen = function () { wsBackoffRef.current = 1000; }; + ws.onmessage = function (ev) { + try { + const msg = JSON.parse(ev.data); + if (msg && Array.isArray(msg.events) && msg.events.length > 0) { + cursorRef.current = msg.cursor || cursorRef.current; + // Stamp per-task signal so the TaskDrawer can reload itself. + setTaskEventTick(function (prev) { + const next = Object.assign({}, prev); + for (const e of msg.events) { + if (e && e.task_id) next[e.task_id] = (next[e.task_id] || 0) + 1; + } + return next; + }); + scheduleReload(); + } + } catch (_e) { /* ignore */ } + }; + ws.onclose = function (ev) { + if (wsClosedRef.current) return; + if (ev && ev.code === 1008) { + setError("WebSocket auth failed — reload the page to refresh the session token."); + return; + } + const delay = Math.min(wsBackoffRef.current, 30000); + wsBackoffRef.current = Math.min(wsBackoffRef.current * 2, 30000); + setTimeout(openWs, delay); + }; + } + openWs(); + return function () { + wsClosedRef.current = true; + try { wsRef.current && wsRef.current.close(); } catch (_e) { /* noop */ } + }; + }, [!!board, scheduleReload]); + + // --- filtering ---------------------------------------------------------- + const filteredBoard = useMemo(function () { + if (!board) return null; + const q = search.trim().toLowerCase(); + const filterTask = function (t) { + if (assigneeFilter && t.assignee !== assigneeFilter) return false; + if (q) { + const hay = `${t.id} ${t.title || ""} ${t.assignee || ""} ${t.tenant || ""}`.toLowerCase(); + if (hay.indexOf(q) === -1) return false; + } + return true; + }; + return Object.assign({}, board, { + columns: board.columns.map(function (col) { + return Object.assign({}, col, { tasks: col.tasks.filter(filterTask) }); + }), + }); + }, [board, assigneeFilter, search]); + + // --- actions ------------------------------------------------------------ + const moveTask = useCallback(function (taskId, newStatus) { + const confirmMsg = DESTRUCTIVE_TRANSITIONS[newStatus]; + if (confirmMsg && !window.confirm(confirmMsg)) return; + setBoard(function (b) { + if (!b) return b; + let moved = null; + const columns = b.columns.map(function (col) { + const next = col.tasks.filter(function (t) { + if (t.id === taskId) { moved = Object.assign({}, t, { status: newStatus }); return false; } + return true; + }); + return Object.assign({}, col, { tasks: next }); + }); + if (moved) { + const dest = columns.find(function (c) { return c.name === newStatus; }); + if (dest) dest.tasks = [moved].concat(dest.tasks); + } + return Object.assign({}, b, { columns }); + }); + SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(taskId)}`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ status: newStatus }), + }).catch(function (err) { + setError(`Move failed: ${err.message || err}`); + loadBoard(); + }); + }, [loadBoard]); + + const createTask = useCallback(function (body) { + return SDK.fetchJSON(`${API}/tasks`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }).then(function (res) { + // Surface dispatcher-presence warnings (e.g. "no gateway is + // running") via the existing error banner channel. Not fatal — + // the task was created successfully — but the user should know + // their ready task will sit idle until the gateway is up. + if (res && res.warning) { + setError("Task created, but: " + res.warning); + } + loadBoard(); + return res; + }); + }, [loadBoard]); + + const toggleSelected = useCallback(function (id, additive) { + setSelectedIds(function (prev) { + const next = new Set(additive ? prev : []); + if (prev.has(id)) next.delete(id); + else next.add(id); + return next; + }); + }, []); + const clearSelected = useCallback(function () { setSelectedIds(new Set()); }, []); + + const applyBulk = useCallback(function (patch, confirmMsg) { + if (selectedIds.size === 0) return; + if (confirmMsg && !window.confirm(confirmMsg)) return; + const body = Object.assign({ ids: Array.from(selectedIds) }, patch); + SDK.fetchJSON(`${API}/tasks/bulk`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(body), + }) + .then(function (res) { + const failed = (res.results || []).filter(function (r) { return !r.ok; }); + if (failed.length > 0) { + setError(`Bulk: ${failed.length} of ${res.results.length} failed: ` + + failed.slice(0, 3).map(function (f) { return `${f.id} (${f.error})`; }).join("; ")); + } + clearSelected(); + loadBoard(); + }) + .catch(function (e) { setError(String(e.message || e)); }); + }, [selectedIds, loadBoard, clearSelected]); + + // --- render ------------------------------------------------------------- + if (loading && !board) { + return h("div", { className: "p-8 text-sm text-muted-foreground" }, + "Loading Kanban board…"); + } + if (error && !board) { + return h(Card, null, + h(CardContent, { className: "p-6" }, + h("div", { className: "text-sm text-destructive" }, + "Failed to load Kanban board: ", error), + h("div", { className: "text-xs text-muted-foreground mt-2" }, + "The backend auto-creates kanban.db on first read. If this persists, check the dashboard logs."), + ), + ); + } + if (!filteredBoard) return null; + + const renderMd = !config || config.render_markdown !== false; + + return h(ErrorBoundary, null, + h("div", { className: "hermes-kanban flex flex-col gap-4" }, + h(BoardToolbar, { + board: board, + tenantFilter, setTenantFilter, + assigneeFilter, setAssigneeFilter, + includeArchived, setIncludeArchived, + laneByProfile, setLaneByProfile, + search, setSearch, + onNudgeDispatch: function () { + SDK.fetchJSON(`${API}/dispatch?max=8`, { method: "POST" }) + .then(loadBoard) + .catch(function (e) { setError(String(e.message || e)); }); + }, + onRefresh: loadBoard, + }), + selectedIds.size > 0 ? h(BulkActionBar, { + count: selectedIds.size, + assignees: (board && board.assignees) || [], + onApply: applyBulk, + onClear: clearSelected, + }) : null, + error ? h("div", { className: "text-xs text-destructive px-2" }, error) : null, + h(BoardColumns, { + board: filteredBoard, + laneByProfile, + selectedIds, + toggleSelected, + onMove: moveTask, + onOpen: setSelectedTaskId, + onCreate: createTask, + allTasks: board.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), + }), + selectedTaskId ? h(TaskDrawer, { + taskId: selectedTaskId, + onClose: function () { setSelectedTaskId(null); }, + onRefresh: loadBoard, + renderMarkdown: renderMd, + allTasks: board.columns.reduce(function (acc, c) { return acc.concat(c.tasks); }, []), + eventTick: taskEventTick[selectedTaskId] || 0, + }) : null, + ), + ); + } + + // ------------------------------------------------------------------------- + // Toolbar + // ------------------------------------------------------------------------- + + function BoardToolbar(props) { + const tenants = (props.board && props.board.tenants) || []; + const assignees = (props.board && props.board.assignees) || []; + return h("div", { className: "flex flex-wrap items-end gap-3" }, + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs text-muted-foreground" }, "Search"), + h(Input, { + placeholder: "Filter cards…", + value: props.search, + onChange: function (e) { props.setSearch(e.target.value); }, + className: "w-56 h-8", + }), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs text-muted-foreground" }, "Tenant"), + h(Select, { + value: props.tenantFilter, + onChange: function (e) { props.setTenantFilter(e.target.value); }, + className: "h-8", + }, + h(SelectOption, { value: "" }, "All tenants"), + tenants.map(function (t) { + return h(SelectOption, { key: t, value: t }, t); + }), + ), + ), + h("div", { className: "flex flex-col gap-1" }, + h(Label, { className: "text-xs text-muted-foreground" }, "Assignee"), + h(Select, { + value: props.assigneeFilter, + onChange: function (e) { props.setAssigneeFilter(e.target.value); }, + className: "h-8", + }, + h(SelectOption, { value: "" }, "All profiles"), + assignees.map(function (a) { + return h(SelectOption, { key: a, value: a }, a); + }), + ), + ), + h("label", { className: "flex items-center gap-2 text-xs" }, + h("input", { + type: "checkbox", + checked: props.includeArchived, + onChange: function (e) { props.setIncludeArchived(e.target.checked); }, + }), + "Show archived", + ), + h("label", { className: "flex items-center gap-2 text-xs", + title: "Group the Running column by assigned profile" }, + h("input", { + type: "checkbox", + checked: props.laneByProfile, + onChange: function (e) { props.setLaneByProfile(e.target.checked); }, + }), + "Lanes by profile", + ), + h("div", { className: "flex-1" }), + h(Button, { + onClick: props.onNudgeDispatch, + className: "h-8 px-3 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Nudge dispatcher"), + h(Button, { + onClick: props.onRefresh, + className: "h-8 px-3 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Refresh"), + ); + } + + // ------------------------------------------------------------------------- + // Bulk action bar (appears when >= 1 card is selected) + // ------------------------------------------------------------------------- + + function BulkActionBar(props) { + const [assignee, setAssignee] = useState(""); + return h("div", { className: "hermes-kanban-bulk" }, + h("span", { className: "hermes-kanban-bulk-count" }, + `${props.count} selected`), + h(Button, { + onClick: function () { props.onApply({ status: "ready" }); }, + className: "hermes-kanban-bulk-btn", + }, "→ ready"), + h(Button, { + onClick: function () { + props.onApply({ status: "done" }, + `Mark ${props.count} task(s) as done?`); + }, + className: "hermes-kanban-bulk-btn", + }, "Complete"), + h(Button, { + onClick: function () { + props.onApply({ archive: true }, + `Archive ${props.count} task(s)?`); + }, + className: "hermes-kanban-bulk-btn", + }, "Archive"), + h("div", { className: "hermes-kanban-bulk-reassign" }, + h(Select, { + value: assignee, + onChange: function (e) { setAssignee(e.target.value); }, + className: "h-7 text-xs", + }, + h(SelectOption, { value: "" }, "— reassign —"), + h(SelectOption, { value: "__none__" }, "(unassign)"), + props.assignees.map(function (a) { + return h(SelectOption, { key: a, value: a }, a); + }), + ), + h(Button, { + onClick: function () { + if (!assignee) return; + props.onApply({ assignee: assignee === "__none__" ? "" : assignee }); + setAssignee(""); + }, + disabled: !assignee, + className: cn("hermes-kanban-bulk-btn", + !assignee ? "opacity-40 cursor-not-allowed" : ""), + }, "Apply"), + ), + h("div", { className: "flex-1" }), + h(Button, { + onClick: props.onClear, + className: "hermes-kanban-bulk-btn", + }, "Clear"), + ); + } + + // ------------------------------------------------------------------------- + // Columns + // ------------------------------------------------------------------------- + + function BoardColumns(props) { + return h("div", { className: "hermes-kanban-columns" }, + props.board.columns.map(function (col) { + return h(Column, { + key: col.name, + column: col, + laneByProfile: props.laneByProfile, + selectedIds: props.selectedIds, + toggleSelected: props.toggleSelected, + onMove: props.onMove, + onOpen: props.onOpen, + onCreate: props.onCreate, + allTasks: props.allTasks, + }); + }), + ); + } + + function Column(props) { + const [dragOver, setDragOver] = useState(false); + const [showCreate, setShowCreate] = useState(false); + const colRef = useRef(null); + + // Listen for our synthetic touch-drop events from attachTouchDrag(). + useEffect(function () { + if (!colRef.current) return undefined; + const el = colRef.current; + function onTouchDrop(e) { + if (e.detail && e.detail.status === props.column.name) { + props.onMove(e.detail.taskId, props.column.name); + } + } + el.addEventListener("hermes-kanban:drop", onTouchDrop); + return function () { el.removeEventListener("hermes-kanban:drop", onTouchDrop); }; + }, [props.column.name, props.onMove]); + + const handleDragOver = function (e) { + e.preventDefault(); + e.dataTransfer.dropEffect = "move"; + if (!dragOver) setDragOver(true); + }; + const handleDragLeave = function () { setDragOver(false); }; + const handleDrop = function (e) { + e.preventDefault(); + setDragOver(false); + const taskId = e.dataTransfer.getData(MIME_TASK); + if (taskId) props.onMove(taskId, props.column.name); + }; + + const lanes = useMemo(function () { + if (!props.laneByProfile || props.column.name !== "running") return null; + const byProfile = {}; + for (const t of props.column.tasks) { + const key = t.assignee || "(unassigned)"; + (byProfile[key] = byProfile[key] || []).push(t); + } + return Object.keys(byProfile).sort().map(function (k) { + return { assignee: k, tasks: byProfile[k] }; + }); + }, [props.column, props.laneByProfile]); + + return h("div", { + ref: colRef, + "data-kanban-column": props.column.name, + className: cn( + "hermes-kanban-column", + dragOver ? "hermes-kanban-column--drop" : "", + ), + onDragOver: handleDragOver, + onDragLeave: handleDragLeave, + onDrop: handleDrop, + }, + h("div", { className: "hermes-kanban-column-header" }, + h("span", { className: cn("hermes-kanban-dot", COLUMN_DOT[props.column.name]) }), + h("span", { className: "hermes-kanban-column-label" }, + COLUMN_LABEL[props.column.name] || props.column.name), + h("span", { className: "hermes-kanban-column-count" }, + props.column.tasks.length), + h("button", { + type: "button", + className: "hermes-kanban-column-add", + title: "Create task in this column", + onClick: function () { setShowCreate(function (v) { return !v; }); }, + }, showCreate ? "×" : "+"), + ), + h("div", { className: "hermes-kanban-column-sub" }, + COLUMN_HELP[props.column.name] || ""), + showCreate ? h(InlineCreate, { + columnName: props.column.name, + allTasks: props.allTasks, + onSubmit: function (body) { + props.onCreate(body).then(function () { setShowCreate(false); }); + }, + onCancel: function () { setShowCreate(false); }, + }) : null, + h("div", { className: "hermes-kanban-column-body" }, + props.column.tasks.length === 0 + ? h("div", { className: "hermes-kanban-empty" }, "— no tasks —") + : lanes + ? lanes.map(function (lane) { + return h("div", { key: lane.assignee, className: "hermes-kanban-lane" }, + h("div", { className: "hermes-kanban-lane-head" }, + h("span", { className: "hermes-kanban-lane-name" }, lane.assignee), + h("span", { className: "hermes-kanban-lane-count" }, lane.tasks.length), + ), + lane.tasks.map(function (t) { + return h(TaskCard, { + key: t.id, task: t, + selected: props.selectedIds.has(t.id), + toggleSelected: props.toggleSelected, + onOpen: props.onOpen, + }); + }), + ); + }) + : props.column.tasks.map(function (t) { + return h(TaskCard, { + key: t.id, task: t, + selected: props.selectedIds.has(t.id), + toggleSelected: props.toggleSelected, + onOpen: props.onOpen, + }); + }), + ), + ); + } + + // ------------------------------------------------------------------------- + // Card + // ------------------------------------------------------------------------- + + // Staleness tiers — amber after a grace window, red when clearly stuck. + // Values below are seconds. + const STALENESS = { + ready: { amber: 1 * 60 * 60, red: 24 * 60 * 60 }, + running: { amber: 10 * 60, red: 60 * 60 }, + blocked: { amber: 1 * 60 * 60, red: 24 * 60 * 60 }, + todo: { amber: 7 * 24 * 60 * 60, red: 30 * 24 * 60 * 60 }, + }; + + function stalenessClass(task) { + if (!task || !task.age) return ""; + const age = task.status === "running" + ? task.age.started_age_seconds + : task.age.created_age_seconds; + const tier = STALENESS[task.status]; + if (!tier || age == null) return ""; + if (age >= tier.red) return "hermes-kanban-card--stale-red"; + if (age >= tier.amber) return "hermes-kanban-card--stale-amber"; + return ""; + } + + function TaskCard(props) { + const t = props.task; + const cardRef = useRef(null); + + useEffect(function () { + return attachTouchDrag(cardRef.current, t.id); + }, [t.id]); + + const handleDragStart = function (e) { + e.dataTransfer.setData(MIME_TASK, t.id); + e.dataTransfer.effectAllowed = "move"; + }; + const handleClick = function (e) { + // Shift-click or ctrl/cmd-click toggles selection instead of opening. + if (e.shiftKey || e.ctrlKey || e.metaKey) { + e.preventDefault(); + e.stopPropagation(); + props.toggleSelected(t.id, e.ctrlKey || e.metaKey); + return; + } + props.onOpen(t.id); + }; + const handleCheckbox = function (e) { + e.stopPropagation(); + props.toggleSelected(t.id, true); + }; + + const progress = t.progress; + + return h("div", { + ref: cardRef, + className: cn( + "hermes-kanban-card", + props.selected ? "hermes-kanban-card--selected" : "", + stalenessClass(t), + ), + draggable: true, + onDragStart: handleDragStart, + onClick: handleClick, + }, + h(Card, null, + h(CardContent, { className: "hermes-kanban-card-content" }, + h("div", { className: "hermes-kanban-card-row" }, + h("input", { + type: "checkbox", + className: "hermes-kanban-card-check", + checked: props.selected, + onChange: handleCheckbox, + onClick: function (e) { e.stopPropagation(); }, + title: "Select for bulk actions", + }), + h("span", { className: "hermes-kanban-card-id" }, t.id), + t.priority > 0 + ? h(Badge, { className: "hermes-kanban-priority" }, `P${t.priority}`) + : null, + t.tenant + ? h(Badge, { variant: "outline", className: "hermes-kanban-tag" }, t.tenant) + : null, + progress + ? h("span", { + className: cn( + "hermes-kanban-progress", + progress.done === progress.total ? "hermes-kanban-progress--full" : "", + ), + title: `${progress.done} of ${progress.total} child tasks done`, + }, `${progress.done}/${progress.total}`) + : null, + ), + h("div", { className: "hermes-kanban-card-title" }, t.title || "(untitled)"), + h("div", { className: "hermes-kanban-card-row hermes-kanban-card-meta" }, + t.assignee + ? h("span", { className: "hermes-kanban-assignee" }, "@", t.assignee) + : h("span", { className: "hermes-kanban-unassigned" }, "unassigned"), + t.comment_count > 0 + ? h("span", { className: "hermes-kanban-count" }, "💬 ", t.comment_count) + : null, + t.link_counts && (t.link_counts.parents + t.link_counts.children) > 0 + ? h("span", { className: "hermes-kanban-count" }, + "↔ ", t.link_counts.parents + t.link_counts.children) + : null, + h("span", { className: "hermes-kanban-ago" }, + timeAgo ? timeAgo(t.created_at) : ""), + ), + ), + ), + ); + } + + // ------------------------------------------------------------------------- + // Inline create (with parent selector) + // ------------------------------------------------------------------------- + + function InlineCreate(props) { + const [title, setTitle] = useState(""); + const [assignee, setAssignee] = useState(""); + const [priority, setPriority] = useState(0); + const [parent, setParent] = useState(""); + const [skills, setSkills] = useState(""); + + const submit = function () { + const trimmed = title.trim(); + if (!trimmed) return; + const body = { + title: trimmed, + assignee: assignee.trim() || null, + priority: Number(priority) || 0, + triage: props.columnName === "triage", + }; + if (parent) body.parents = [parent]; + // Parse comma-separated skills into a clean list. Blank = no + // extras (omit key so backend leaves it null). The dispatcher + // always auto-loads kanban-worker; these are extras on top. + const skillList = skills + .split(",") + .map(function (s) { return s.trim(); }) + .filter(function (s) { return s.length > 0; }); + if (skillList.length > 0) body.skills = skillList; + props.onSubmit(body); + setTitle(""); setAssignee(""); setPriority(0); setParent(""); setSkills(""); + }; + + return h("div", { className: "hermes-kanban-inline-create" }, + h(Input, { + value: title, + onChange: function (e) { setTitle(e.target.value); }, + onKeyDown: function (e) { + if (e.key === "Enter") { e.preventDefault(); submit(); } + if (e.key === "Escape") props.onCancel(); + }, + placeholder: props.columnName === "triage" + ? "Rough idea — AI will spec it…" + : "New task title…", + autoFocus: true, + className: "h-8 text-sm", + }), + h("div", { className: "flex gap-2" }, + h(Input, { + value: assignee, + onChange: function (e) { setAssignee(e.target.value); }, + placeholder: props.columnName === "triage" ? "specifier" : "assignee", + className: "h-7 text-xs flex-1", + }), + h(Input, { + type: "number", + value: priority, + onChange: function (e) { setPriority(e.target.value); }, + placeholder: "pri", + className: "h-7 text-xs w-16", + }), + ), + h(Input, { + value: skills, + onChange: function (e) { setSkills(e.target.value); }, + placeholder: "skills (optional, comma-separated): translation, github-code-review", + title: "Force-load these skills into the worker (in addition to the built-in kanban-worker).", + className: "h-7 text-xs", + }), + h(Select, { + value: parent, + onChange: function (e) { setParent(e.target.value); }, + className: "h-7 text-xs", + }, + h(SelectOption, { value: "" }, "— no parent —"), + (props.allTasks || []).map(function (t) { + return h(SelectOption, { key: t.id, value: t.id }, + `${t.id} — ${(t.title || "").slice(0, 50)}`); + }), + ), + h("div", { className: "flex gap-2" }, + h(Button, { + onClick: submit, + className: "h-7 px-2 text-xs border border-border hover:bg-foreground/10 cursor-pointer flex-1", + }, "Create"), + h(Button, { + onClick: props.onCancel, + className: "h-7 px-2 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Cancel"), + ), + ); + } + + // ------------------------------------------------------------------------- + // Task drawer + // ------------------------------------------------------------------------- + + function TaskDrawer(props) { + const [data, setData] = useState(null); + const [loading, setLoading] = useState(true); + const [err, setErr] = useState(null); + const [newComment, setNewComment] = useState(""); + const [editing, setEditing] = useState(false); + + const load = useCallback(function () { + return SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}`) + .then(function (d) { setData(d); setErr(null); }) + .catch(function (e) { setErr(String(e.message || e)); }) + .finally(function () { setLoading(false); }); + }, [props.taskId]); + + // Reload when the WS stream reports new events for this task id + // (completion, block, crash, etc. — anything that'd make the drawer + // show stale data if we only loaded on mount). + useEffect(function () { load(); }, [load, props.eventTick]); + useEffect(function () { + function onKey(e) { if (e.key === "Escape" && !editing) props.onClose(); } + window.addEventListener("keydown", onKey); + return function () { window.removeEventListener("keydown", onKey); }; + }, [props.onClose, editing]); + + const handleComment = function () { + const body = newComment.trim(); + if (!body) return; + SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}/comments`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ body }), + }).then(function () { + setNewComment(""); + load(); + props.onRefresh(); + }).catch(function (e) { setErr(String(e.message || e)); }); + }; + + const doPatch = function (patch, opts) { + if (opts && opts.confirm && !window.confirm(opts.confirm)) { + return Promise.resolve(); + } + return SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}`, { + method: "PATCH", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify(patch), + }).then(function () { load(); props.onRefresh(); }); + }; + + const addLink = function (parentId) { + return SDK.fetchJSON(`${API}/links`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ parent_id: parentId, child_id: props.taskId }), + }).then(function () { load(); props.onRefresh(); }) + .catch(function (e) { setErr(String(e.message || e)); }); + }; + const removeLink = function (parentId) { + const qs = new URLSearchParams({ parent_id: parentId, child_id: props.taskId }); + return SDK.fetchJSON(`${API}/links?${qs}`, { method: "DELETE" }) + .then(function () { load(); props.onRefresh(); }) + .catch(function (e) { setErr(String(e.message || e)); }); + }; + const addChild = function (childId) { + return SDK.fetchJSON(`${API}/links`, { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ parent_id: props.taskId, child_id: childId }), + }).then(function () { load(); props.onRefresh(); }) + .catch(function (e) { setErr(String(e.message || e)); }); + }; + const removeChild = function (childId) { + const qs = new URLSearchParams({ parent_id: props.taskId, child_id: childId }); + return SDK.fetchJSON(`${API}/links?${qs}`, { method: "DELETE" }) + .then(function () { load(); props.onRefresh(); }) + .catch(function (e) { setErr(String(e.message || e)); }); + }; + + return h("div", { className: "hermes-kanban-drawer-shade", onClick: props.onClose }, + h("div", { + className: "hermes-kanban-drawer", + onClick: function (e) { e.stopPropagation(); }, + }, + h("div", { className: "hermes-kanban-drawer-head" }, + h("span", { className: "text-xs text-muted-foreground" }, props.taskId), + h("button", { + type: "button", + onClick: props.onClose, + className: "hermes-kanban-drawer-close", + title: "Close (Esc)", + }, "×"), + ), + loading ? h("div", { className: "p-4 text-sm text-muted-foreground" }, "Loading…") : + err ? h("div", { className: "p-4 text-sm text-destructive" }, err) : + data ? h(TaskDetail, { + data, editing, setEditing, + renderMarkdown: props.renderMarkdown, + allTasks: props.allTasks, + onPatch: doPatch, + onAddParent: addLink, + onRemoveParent: removeLink, + onAddChild: addChild, + onRemoveChild: removeChild, + }) : null, + data ? h("div", { className: "hermes-kanban-drawer-comment-row" }, + h(Input, { + value: newComment, + onChange: function (e) { setNewComment(e.target.value); }, + onKeyDown: function (e) { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); handleComment(); + } + }, + placeholder: "Add a comment… (Enter to submit)", + className: "h-8 text-sm flex-1", + }), + h(Button, { + onClick: handleComment, + className: "h-8 px-3 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Comment"), + ) : null, + ), + ); + } + + function TaskDetail(props) { + const t = props.data.task; + const comments = props.data.comments || []; + const events = props.data.events || []; + const links = props.data.links || { parents: [], children: [] }; + + return h("div", { className: "hermes-kanban-drawer-body" }, + h("div", { className: "hermes-kanban-drawer-title" }, + h("span", { className: cn("hermes-kanban-dot", COLUMN_DOT[t.status]) }), + props.editing + ? h(TitleEditor, { + initial: t.title || "", + onSave: function (newTitle) { + return props.onPatch({ title: newTitle }).then(function () { props.setEditing(false); }); + }, + onCancel: function () { props.setEditing(false); }, + }) + : h("span", { + className: "hermes-kanban-drawer-title-text", + title: "Click to edit", + onClick: function () { props.setEditing(true); }, + }, t.title || "(untitled)"), + ), + h("div", { className: "hermes-kanban-drawer-meta" }, + h(MetaRow, { label: "Status", value: t.status }), + h(AssigneeEditor, { task: t, onPatch: props.onPatch }), + h(PriorityEditor, { task: t, onPatch: props.onPatch }), + t.tenant ? h(MetaRow, { label: "Tenant", value: t.tenant }) : null, + h(MetaRow, { + label: "Workspace", + value: `${t.workspace_kind}${t.workspace_path ? ": " + t.workspace_path : ""}`, + }), + (t.skills && t.skills.length > 0) ? h(MetaRow, { + label: "Skills", + value: t.skills.join(", "), + }) : null, + t.created_by ? h(MetaRow, { label: "Created by", value: t.created_by }) : null, + ), + h(StatusActions, { task: t, onPatch: props.onPatch }), + h(BodyEditor, { + task: t, + renderMarkdown: props.renderMarkdown, + onPatch: props.onPatch, + }), + h(DependencyEditor, { + task: t, + links, allTasks: props.allTasks, + onAddParent: props.onAddParent, + onRemoveParent: props.onRemoveParent, + onAddChild: props.onAddChild, + onRemoveChild: props.onRemoveChild, + }), + t.result ? h("div", { className: "hermes-kanban-section" }, + h("div", { className: "hermes-kanban-section-head" }, "Result"), + h(MarkdownBlock, { source: t.result, enabled: props.renderMarkdown }), + ) : null, + h("div", { className: "hermes-kanban-section" }, + h("div", { className: "hermes-kanban-section-head" }, `Comments (${comments.length})`), + comments.length === 0 + ? h("div", { className: "text-xs text-muted-foreground" }, "— no comments —") + : comments.map(function (c) { + return h("div", { key: c.id, className: "hermes-kanban-comment" }, + h("div", { className: "hermes-kanban-comment-head" }, + h("span", { className: "hermes-kanban-comment-author" }, c.author || "anon"), + h("span", { className: "hermes-kanban-comment-ago" }, + timeAgo ? timeAgo(c.created_at) : ""), + ), + h(MarkdownBlock, { source: c.body, enabled: props.renderMarkdown }), + ); + }), + ), + h("div", { className: "hermes-kanban-section" }, + h("div", { className: "hermes-kanban-section-head" }, `Events (${events.length})`), + events.slice().reverse().slice(0, 20).map(function (e) { + return h("div", { key: e.id, className: "hermes-kanban-event" }, + h("span", { className: "hermes-kanban-event-kind" }, e.kind), + h("span", { className: "hermes-kanban-event-ago" }, + timeAgo ? timeAgo(e.created_at) : ""), + e.payload + ? h("code", { className: "hermes-kanban-event-payload" }, + JSON.stringify(e.payload)) + : null, + ); + }), + ), + h(WorkerLogSection, { taskId: t.id }), + h(RunHistorySection, { runs: props.data.runs || [] }), + ); + } + + // Per-attempt history. Closed runs first (most recent last), then the + // active run if any. Each row shows profile / outcome / elapsed / + // summary. Collapsed by default when there are more than three runs. + function RunHistorySection(props) { + const runs = props.runs || []; + const [expanded, setExpanded] = useState(false); + if (runs.length === 0) return null; + const showAll = expanded || runs.length <= 3; + const visible = showAll ? runs : runs.slice(-3); + + const fmtElapsed = function (run) { + if (!run || !run.started_at) return ""; + const end = run.ended_at || Math.floor(Date.now() / 1000); + const secs = Math.max(0, end - run.started_at); + if (secs < 60) return `${secs}s`; + if (secs < 3600) return `${Math.round(secs / 60)}m`; + return `${(secs / 3600).toFixed(1)}h`; + }; + + return h("div", { className: "hermes-kanban-section" }, + h("div", { className: "hermes-kanban-section-head-row" }, + h("span", { className: "hermes-kanban-section-head" }, + `Run history (${runs.length})`), + !showAll + ? h("button", { + type: "button", + onClick: function () { setExpanded(true); }, + className: "hermes-kanban-edit-link", + title: "Show all attempts", + }, `+${runs.length - 3} earlier`) + : null, + ), + visible.map(function (r) { + const outcomeClass = r.ended_at + ? `hermes-kanban-run--${r.outcome || r.status || "ended"}` + : "hermes-kanban-run--active"; + return h("div", { key: r.id, className: cn("hermes-kanban-run", outcomeClass) }, + h("div", { className: "hermes-kanban-run-head" }, + h("span", { className: "hermes-kanban-run-outcome" }, + r.ended_at ? (r.outcome || r.status || "ended") : "active"), + h("span", { className: "hermes-kanban-run-profile" }, + r.profile ? `@${r.profile}` : "(no profile)"), + h("span", { className: "hermes-kanban-run-elapsed" }, fmtElapsed(r)), + h("span", { className: "hermes-kanban-run-ago" }, + timeAgo ? timeAgo(r.started_at) : ""), + ), + r.summary + ? h("div", { className: "hermes-kanban-run-summary" }, r.summary) + : null, + r.error + ? h("div", { className: "hermes-kanban-run-error" }, r.error) + : null, + r.metadata + ? h("code", { className: "hermes-kanban-run-meta" }, + JSON.stringify(r.metadata)) + : null, + ); + }), + ); + } + + // Worker log: loads lazily (one GET on mount), refresh button, tail cap. + function WorkerLogSection(props) { + const [state, setState] = useState({ loading: false, data: null, err: null }); + const load = useCallback(function () { + setState({ loading: true, data: null, err: null }); + SDK.fetchJSON(`${API}/tasks/${encodeURIComponent(props.taskId)}/log?tail=100000`) + .then(function (d) { setState({ loading: false, data: d, err: null }); }) + .catch(function (e) { setState({ loading: false, data: null, err: String(e.message || e) }); }); + }, [props.taskId]); + + // Auto-load when the section mounts; the user opened the drawer so the + // cost is one small HTTP round-trip. + useEffect(function () { load(); }, [load]); + + const data = state.data; + let body; + if (state.loading) { + body = h("div", { className: "text-xs text-muted-foreground" }, "Loading log…"); + } else if (state.err) { + body = h("div", { className: "text-xs text-destructive" }, state.err); + } else if (!data || !data.exists) { + body = h("div", { className: "text-xs text-muted-foreground italic" }, + "— no worker log yet (task hasn't spawned or log was rotated away) —"); + } else { + body = h("pre", { className: "hermes-kanban-pre hermes-kanban-log" }, + data.content || "(empty)"); + } + + return h("div", { className: "hermes-kanban-section" }, + h("div", { className: "hermes-kanban-section-head-row" }, + h("span", { className: "hermes-kanban-section-head" }, + "Worker log" + (data && data.size_bytes ? ` (${data.size_bytes} B)` : "")), + h("button", { + type: "button", + onClick: load, + className: "hermes-kanban-edit-link", + title: "Refresh log", + }, "refresh"), + ), + body, + data && data.truncated + ? h("div", { className: "text-xs text-muted-foreground" }, + "(showing last 100 KB — full log at ", data.path, ")") + : null, + ); + } + + function MetaRow(props) { + return h("div", { className: "hermes-kanban-meta-row" }, + h("span", { className: "hermes-kanban-meta-label" }, props.label), + h("span", { className: "hermes-kanban-meta-value" }, props.value), + ); + } + + function TitleEditor(props) { + const [v, setV] = useState(props.initial); + const save = function () { + const t = v.trim(); + if (!t) return; + props.onSave(t); + }; + return h("div", { className: "hermes-kanban-edit-row" }, + h(Input, { + value: v, autoFocus: true, + onChange: function (e) { setV(e.target.value); }, + onKeyDown: function (e) { + if (e.key === "Enter") { e.preventDefault(); save(); } + if (e.key === "Escape") props.onCancel(); + }, + className: "h-8 text-sm flex-1", + }), + h(Button, { onClick: save, + className: "h-7 px-2 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Save"), + h(Button, { onClick: props.onCancel, + className: "h-7 px-2 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Cancel"), + ); + } + + function AssigneeEditor(props) { + const [editing, setEditing] = useState(false); + const [v, setV] = useState(props.task.assignee || ""); + useEffect(function () { setV(props.task.assignee || ""); }, [props.task.assignee]); + if (!editing) { + return h("div", { className: "hermes-kanban-meta-row" }, + h("span", { className: "hermes-kanban-meta-label" }, "Assignee"), + h("span", { + className: "hermes-kanban-meta-value hermes-kanban-editable", + onClick: function () { setEditing(true); }, + title: "Click to edit", + }, props.task.assignee || "unassigned"), + ); + } + const save = function () { + props.onPatch({ assignee: v.trim() || "" }).then(function () { setEditing(false); }); + }; + return h("div", { className: "hermes-kanban-meta-row" }, + h("span", { className: "hermes-kanban-meta-label" }, "Assignee"), + h(Input, { + value: v, autoFocus: true, + onChange: function (e) { setV(e.target.value); }, + onKeyDown: function (e) { + if (e.key === "Enter") { e.preventDefault(); save(); } + if (e.key === "Escape") setEditing(false); + }, + placeholder: "(empty = unassign)", + className: "h-7 text-xs flex-1", + }), + ); + } + + function PriorityEditor(props) { + const [editing, setEditing] = useState(false); + const [v, setV] = useState(String(props.task.priority || 0)); + useEffect(function () { setV(String(props.task.priority || 0)); }, [props.task.priority]); + if (!editing) { + return h("div", { className: "hermes-kanban-meta-row" }, + h("span", { className: "hermes-kanban-meta-label" }, "Priority"), + h("span", { + className: "hermes-kanban-meta-value hermes-kanban-editable", + onClick: function () { setEditing(true); }, + title: "Click to edit", + }, String(props.task.priority)), + ); + } + const save = function () { + props.onPatch({ priority: Number(v) || 0 }).then(function () { setEditing(false); }); + }; + return h("div", { className: "hermes-kanban-meta-row" }, + h("span", { className: "hermes-kanban-meta-label" }, "Priority"), + h(Input, { + type: "number", value: v, autoFocus: true, + onChange: function (e) { setV(e.target.value); }, + onKeyDown: function (e) { + if (e.key === "Enter") { e.preventDefault(); save(); } + if (e.key === "Escape") setEditing(false); + }, + className: "h-7 text-xs w-20", + }), + ); + } + + function BodyEditor(props) { + const [editing, setEditing] = useState(false); + const [v, setV] = useState(props.task.body || ""); + useEffect(function () { setV(props.task.body || ""); }, [props.task.body]); + const save = function () { + props.onPatch({ body: v }).then(function () { setEditing(false); }); + }; + return h("div", { className: "hermes-kanban-section" }, + h("div", { className: "hermes-kanban-section-head-row" }, + h("span", { className: "hermes-kanban-section-head" }, "Description"), + editing + ? h("div", { className: "flex gap-1" }, + h(Button, { onClick: save, + className: "h-6 px-2 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Save"), + h(Button, { onClick: function () { setEditing(false); setV(props.task.body || ""); }, + className: "h-6 px-2 text-xs border border-border hover:bg-foreground/10 cursor-pointer", + }, "Cancel"), + ) + : h("button", { + type: "button", + onClick: function () { setEditing(true); }, + className: "hermes-kanban-edit-link", + title: "Edit description", + }, "edit"), + ), + editing + ? h("textarea", { + className: "hermes-kanban-textarea", + value: v, + rows: 8, + onChange: function (e) { setV(e.target.value); }, + }) + : props.task.body + ? h(MarkdownBlock, { source: props.task.body, enabled: props.renderMarkdown }) + : h("div", { className: "text-xs text-muted-foreground italic" }, "— no description —"), + ); + } + + function DependencyEditor(props) { + const { task, links, allTasks } = props; + const [newParent, setNewParent] = useState(""); + const [newChild, setNewChild] = useState(""); + // Filter out self + existing links when offering the "add" dropdown. + const candidatesFor = function (excludeSet) { + return (allTasks || []).filter(function (t) { + return t.id !== task.id && !excludeSet.has(t.id); + }); + }; + const parentExclude = new Set([task.id, ...(links.parents || [])]); + const childExclude = new Set([task.id, ...(links.children || [])]); + + return h("div", { className: "hermes-kanban-section" }, + h("div", { className: "hermes-kanban-section-head" }, "Dependencies"), + h("div", { className: "hermes-kanban-deps-row" }, + h("span", { className: "hermes-kanban-deps-label" }, "Parents:"), + h("div", { className: "hermes-kanban-deps-chips" }, + (links.parents || []).length === 0 + ? h("span", { className: "hermes-kanban-deps-empty" }, "none") + : (links.parents || []).map(function (id) { + return h("span", { key: id, className: "hermes-kanban-dep-chip" }, + id, + h("button", { + type: "button", + className: "hermes-kanban-dep-chip-x", + onClick: function () { props.onRemoveParent(id); }, + title: "Remove dependency", + }, "×"), + ); + }), + ), + ), + h("div", { className: "hermes-kanban-deps-row" }, + h(Select, { + value: newParent, + onChange: function (e) { setNewParent(e.target.value); }, + className: "h-7 text-xs flex-1", + }, + h(SelectOption, { value: "" }, "— add parent —"), + candidatesFor(parentExclude).map(function (t) { + return h(SelectOption, { key: t.id, value: t.id }, + `${t.id} — ${(t.title || "").slice(0, 50)}`); + }), + ), + h(Button, { + onClick: function () { + if (!newParent) return; + props.onAddParent(newParent).then(function () { setNewParent(""); }); + }, + disabled: !newParent, + className: cn("h-7 px-2 text-xs border border-border cursor-pointer", + !newParent ? "opacity-40 cursor-not-allowed" : "hover:bg-foreground/10"), + }, "+ parent"), + ), + h("div", { className: "hermes-kanban-deps-row" }, + h("span", { className: "hermes-kanban-deps-label" }, "Children:"), + h("div", { className: "hermes-kanban-deps-chips" }, + (links.children || []).length === 0 + ? h("span", { className: "hermes-kanban-deps-empty" }, "none") + : (links.children || []).map(function (id) { + return h("span", { key: id, className: "hermes-kanban-dep-chip" }, + id, + h("button", { + type: "button", + className: "hermes-kanban-dep-chip-x", + onClick: function () { props.onRemoveChild(id); }, + title: "Remove dependency", + }, "×"), + ); + }), + ), + ), + h("div", { className: "hermes-kanban-deps-row" }, + h(Select, { + value: newChild, + onChange: function (e) { setNewChild(e.target.value); }, + className: "h-7 text-xs flex-1", + }, + h(SelectOption, { value: "" }, "— add child —"), + candidatesFor(childExclude).map(function (t) { + return h(SelectOption, { key: t.id, value: t.id }, + `${t.id} — ${(t.title || "").slice(0, 50)}`); + }), + ), + h(Button, { + onClick: function () { + if (!newChild) return; + props.onAddChild(newChild).then(function () { setNewChild(""); }); + }, + disabled: !newChild, + className: cn("h-7 px-2 text-xs border border-border cursor-pointer", + !newChild ? "opacity-40 cursor-not-allowed" : "hover:bg-foreground/10"), + }, "+ child"), + ), + ); + } + + function StatusActions(props) { + const t = props.task; + const b = function (label, patch, enabled, confirmMsg) { + return h(Button, { + onClick: function () { if (enabled !== false) props.onPatch(patch, { confirm: confirmMsg }); }, + disabled: enabled === false, + className: cn( + "h-7 px-2 text-xs border border-border cursor-pointer", + enabled === false ? "opacity-40 cursor-not-allowed" : "hover:bg-foreground/10", + ), + }, label); + }; + return h("div", { className: "hermes-kanban-actions" }, + b("→ triage", { status: "triage" }, t.status !== "triage"), + b("→ ready", { status: "ready" }, t.status !== "ready"), + b("→ running", { status: "running" }, t.status !== "running"), + b("Block", { status: "blocked" }, + t.status === "running" || t.status === "ready", + DESTRUCTIVE_TRANSITIONS.blocked), + b("Unblock", { status: "ready" }, t.status === "blocked"), + b("Complete", { status: "done" }, + t.status === "running" || t.status === "ready" || t.status === "blocked", + DESTRUCTIVE_TRANSITIONS.done), + b("Archive", { status: "archived" }, t.status !== "archived", + DESTRUCTIVE_TRANSITIONS.archived), + ); + } + + // ------------------------------------------------------------------------- + // Register + // ------------------------------------------------------------------------- + + if (window.__HERMES_PLUGINS__ && typeof window.__HERMES_PLUGINS__.register === "function") { + window.__HERMES_PLUGINS__.register("kanban", KanbanPage); + } +})(); diff --git a/plugins/kanban/dashboard/dist/style.css b/plugins/kanban/dashboard/dist/style.css new file mode 100644 index 00000000000..6ac7f5d4b44 --- /dev/null +++ b/plugins/kanban/dashboard/dist/style.css @@ -0,0 +1,752 @@ +/* + * Hermes Kanban — dashboard plugin styles. + * + * All colors reference theme CSS vars so the board reskins with the + * active dashboard theme. No hardcoded palette. + */ + +.hermes-kanban { + width: 100%; +} + +/* ---- Columns layout -------------------------------------------------- */ + +.hermes-kanban-columns { + display: grid; + grid-template-columns: repeat(auto-fit, minmax(260px, 1fr)); + gap: 0.75rem; + align-items: start; +} + +.hermes-kanban-column { + display: flex; + flex-direction: column; + background: color-mix(in srgb, var(--color-card) 85%, transparent); + border: 1px solid var(--color-border); + border-radius: var(--radius); + padding: 0.5rem; + min-height: 200px; + max-height: calc(100vh - 220px); + transition: border-color 120ms ease, background-color 120ms ease; +} + +.hermes-kanban-column--drop { + border-color: var(--color-ring); + background: color-mix(in srgb, var(--color-ring) 8%, var(--color-card)); +} + +.hermes-kanban-column-header { + display: flex; + align-items: center; + gap: 0.5rem; + padding: 0.25rem 0.25rem 0.35rem; + font-weight: 600; + font-size: 0.85rem; + color: var(--color-foreground); +} + +.hermes-kanban-column-label { + flex: 1; + letter-spacing: 0.01em; +} + +.hermes-kanban-column-count { + font-variant-numeric: tabular-nums; + color: var(--color-muted-foreground); + font-size: 0.75rem; + font-weight: 500; +} + +.hermes-kanban-column-add { + appearance: none; + background: transparent; + border: 1px solid var(--color-border); + color: var(--color-foreground); + border-radius: var(--radius-sm, 0.25rem); + width: 22px; + height: 22px; + line-height: 1; + font-size: 1rem; + cursor: pointer; +} +.hermes-kanban-column-add:hover { + background: color-mix(in srgb, var(--color-foreground) 8%, transparent); +} + +.hermes-kanban-column-sub { + padding: 0 0.25rem 0.5rem; + font-size: 0.7rem; + color: var(--color-muted-foreground); + border-bottom: 1px solid color-mix(in srgb, var(--color-border) 60%, transparent); + margin-bottom: 0.5rem; +} + +.hermes-kanban-column-body { + display: flex; + flex-direction: column; + gap: 0.45rem; + overflow-y: auto; + padding-right: 0.1rem; +} + +.hermes-kanban-empty { + padding: 1.5rem 0.5rem; + text-align: center; + font-size: 0.75rem; + color: var(--color-muted-foreground); + border: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent); + border-radius: var(--radius-sm, 0.25rem); +} + +/* ---- Status dots ----------------------------------------------------- */ + +.hermes-kanban-dot { + display: inline-block; + width: 0.5rem; + height: 0.5rem; + border-radius: 999px; + background: var(--color-muted-foreground); +} +.hermes-kanban-dot-triage { background: #b47dd6; } /* lilac — fresh/unspecified */ +.hermes-kanban-dot-todo { background: var(--color-muted-foreground); } +.hermes-kanban-dot-ready { background: #d4b348; } /* amber */ +.hermes-kanban-dot-running { background: #3fb97d; } /* green */ +.hermes-kanban-dot-blocked { background: var(--color-destructive, #d14a4a); } +.hermes-kanban-dot-done { background: #4a8cd1; } /* blue */ +.hermes-kanban-dot-archived { background: var(--color-border); } + +/* ---- Progress pill (N/M child tasks done) --------------------------- */ + +.hermes-kanban-progress { + font-family: var(--font-mono, ui-monospace, monospace); + font-size: 0.62rem; + padding: 0.05rem 0.35rem; + border-radius: 999px; + background: color-mix(in srgb, var(--color-foreground) 8%, transparent); + border: 1px solid color-mix(in srgb, var(--color-border) 80%, transparent); + color: var(--color-muted-foreground); + letter-spacing: 0.02em; +} +.hermes-kanban-progress--full { + background: color-mix(in srgb, #3fb97d 22%, transparent); + border-color: color-mix(in srgb, #3fb97d 45%, transparent); + color: var(--color-foreground); +} + +/* ---- Lanes (per-profile sub-grouping inside Running) ---------------- */ + +.hermes-kanban-lane { + display: flex; + flex-direction: column; + gap: 0.35rem; + padding: 0.25rem 0 0.35rem; + border-top: 1px dashed color-mix(in srgb, var(--color-border) 70%, transparent); +} +.hermes-kanban-lane:first-child { + border-top: 0; + padding-top: 0; +} +.hermes-kanban-lane-head { + display: flex; + align-items: center; + gap: 0.4rem; + font-size: 0.65rem; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--color-muted-foreground); + padding: 0 0.1rem; +} +.hermes-kanban-lane-name { + font-weight: 600; + font-family: var(--font-mono, ui-monospace, monospace); +} +.hermes-kanban-lane-count { + margin-left: auto; + font-variant-numeric: tabular-nums; +} + +/* ---- Card ------------------------------------------------------------ */ + +.hermes-kanban-card { + cursor: grab; + transition: transform 100ms ease, box-shadow 100ms ease; +} +.hermes-kanban-card:hover { + box-shadow: 0 1px 0 0 var(--color-ring) inset, 0 0 0 1px var(--color-ring) inset; +} +.hermes-kanban-card:active { + cursor: grabbing; + transform: scale(0.995); +} + +.hermes-kanban-card-content { + padding: 0.5rem 0.6rem !important; + display: flex; + flex-direction: column; + gap: 0.3rem; +} + +.hermes-kanban-card-row { + display: flex; + align-items: center; + gap: 0.35rem; + flex-wrap: wrap; +} + +.hermes-kanban-card-id { + font-family: var(--font-mono, ui-monospace, monospace); + font-size: 0.65rem; + color: var(--color-muted-foreground); + letter-spacing: 0.03em; +} + +.hermes-kanban-card-title { + font-size: 0.85rem; + font-weight: 500; + line-height: 1.3; + color: var(--color-foreground); + word-break: break-word; +} + +.hermes-kanban-card-meta { + font-size: 0.7rem; + color: var(--color-muted-foreground); + gap: 0.55rem; +} + +.hermes-kanban-priority { + font-size: 0.6rem !important; + padding: 0.05rem 0.3rem !important; + background: color-mix(in srgb, var(--color-ring) 18%, transparent); + color: var(--color-foreground); + border: 1px solid color-mix(in srgb, var(--color-ring) 40%, transparent); +} + +.hermes-kanban-tag { + font-size: 0.6rem !important; + padding: 0.05rem 0.3rem !important; +} + +.hermes-kanban-assignee { + font-weight: 500; + color: color-mix(in srgb, var(--color-foreground) 80%, var(--color-muted-foreground)); +} +.hermes-kanban-unassigned { + font-style: italic; +} +.hermes-kanban-ago { + margin-left: auto; +} + +/* ---- Inline create --------------------------------------------------- */ + +.hermes-kanban-inline-create { + display: flex; + flex-direction: column; + gap: 0.35rem; + padding: 0.5rem; + margin-bottom: 0.5rem; + background: color-mix(in srgb, var(--color-card) 70%, transparent); + border: 1px dashed var(--color-border); + border-radius: var(--radius-sm, 0.25rem); +} + +/* ---- Drawer (task detail side panel) --------------------------------- */ + +.hermes-kanban-drawer-shade { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.45); + z-index: 60; + display: flex; + justify-content: flex-end; +} + +.hermes-kanban-drawer { + width: min(480px, 92vw); + height: 100vh; + background: var(--color-card); + border-left: 1px solid var(--color-border); + display: flex; + flex-direction: column; + box-shadow: -4px 0 18px rgba(0, 0, 0, 0.35); + animation: hermes-kanban-drawer-in 180ms ease-out; +} + +@keyframes hermes-kanban-drawer-in { + from { transform: translateX(100%); opacity: 0.3; } + to { transform: translateX(0); opacity: 1; } +} + +.hermes-kanban-drawer-head { + display: flex; + align-items: center; + justify-content: space-between; + padding: 0.6rem 0.8rem; + border-bottom: 1px solid var(--color-border); + font-family: var(--font-mono, ui-monospace, monospace); +} + +.hermes-kanban-drawer-close { + appearance: none; + background: transparent; + border: 0; + color: var(--color-muted-foreground); + font-size: 1.25rem; + line-height: 1; + cursor: pointer; + padding: 0 0.25rem; +} +.hermes-kanban-drawer-close:hover { color: var(--color-foreground); } + +.hermes-kanban-drawer-body { + flex: 1; + overflow-y: auto; + padding: 0.9rem; + display: flex; + flex-direction: column; + gap: 0.85rem; +} + +.hermes-kanban-drawer-title { + display: flex; + align-items: center; + gap: 0.5rem; + font-size: 1rem; + font-weight: 600; +} + +.hermes-kanban-drawer-meta { + display: flex; + flex-direction: column; + gap: 0.15rem; + padding: 0.5rem 0.6rem; + background: color-mix(in srgb, var(--color-foreground) 4%, transparent); + border: 1px solid var(--color-border); + border-radius: var(--radius-sm, 0.25rem); +} + +.hermes-kanban-meta-row { + display: flex; + gap: 0.5rem; + font-size: 0.72rem; +} +.hermes-kanban-meta-label { + width: 92px; + color: var(--color-muted-foreground); +} +.hermes-kanban-meta-value { + color: var(--color-foreground); + word-break: break-word; +} + +.hermes-kanban-actions { + display: flex; + flex-wrap: wrap; + gap: 0.3rem; +} + +.hermes-kanban-section { + display: flex; + flex-direction: column; + gap: 0.35rem; +} + +.hermes-kanban-section-head { + font-size: 0.72rem; + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.07em; + color: var(--color-muted-foreground); +} + +.hermes-kanban-pre { + margin: 0; + padding: 0.45rem 0.55rem; + white-space: pre-wrap; + word-break: break-word; + background: color-mix(in srgb, var(--color-foreground) 4%, transparent); + border: 1px solid var(--color-border); + border-radius: var(--radius-sm, 0.25rem); + font-family: var(--font-mono, ui-monospace, monospace); + font-size: 0.72rem; + color: var(--color-foreground); +} + +.hermes-kanban-comment { + border-left: 2px solid color-mix(in srgb, var(--color-ring) 35%, transparent); + padding-left: 0.5rem; + display: flex; + flex-direction: column; + gap: 0.2rem; +} + +.hermes-kanban-comment-head { + display: flex; + gap: 0.5rem; + font-size: 0.7rem; +} +.hermes-kanban-comment-author { + font-weight: 600; + color: var(--color-foreground); +} +.hermes-kanban-comment-ago { + color: var(--color-muted-foreground); +} + +.hermes-kanban-event { + display: flex; + gap: 0.5rem; + font-size: 0.7rem; + color: var(--color-muted-foreground); + font-family: var(--font-mono, ui-monospace, monospace); +} +.hermes-kanban-event-kind { + color: var(--color-foreground); + min-width: 6rem; +} +.hermes-kanban-event-payload { + color: var(--color-muted-foreground); + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + max-width: 280px; +} + +.hermes-kanban-drawer-comment-row { + display: flex; + gap: 0.4rem; + padding: 0.55rem 0.75rem; + border-top: 1px solid var(--color-border); + background: color-mix(in srgb, var(--color-card) 90%, transparent); +} + +.hermes-kanban-count { + display: inline-flex; + gap: 0.2rem; + align-items: center; +} + +/* ---- Selection chrome ----------------------------------------------- */ + +.hermes-kanban-card--selected :where(.hermes-kanban-card-content) { + box-shadow: 0 0 0 2px var(--color-ring) inset, + 0 0 0 1px var(--color-ring) inset; + background: color-mix(in srgb, var(--color-ring) 6%, var(--color-card)); +} + +.hermes-kanban-card-check { + width: 0.85rem; + height: 0.85rem; + margin: 0; + cursor: pointer; + accent-color: var(--color-ring); +} + +/* ---- Bulk action bar ------------------------------------------------ */ + +.hermes-kanban-bulk { + display: flex; + align-items: center; + gap: 0.5rem; + padding: 0.4rem 0.75rem; + background: color-mix(in srgb, var(--color-ring) 10%, var(--color-card)); + border: 1px solid color-mix(in srgb, var(--color-ring) 40%, var(--color-border)); + border-radius: var(--radius-sm, 0.25rem); + flex-wrap: wrap; +} +.hermes-kanban-bulk-count { + font-weight: 600; + font-size: 0.75rem; + padding-right: 0.25rem; +} +.hermes-kanban-bulk-btn { + height: 1.7rem !important; + padding: 0 0.5rem !important; + font-size: 0.7rem !important; + border: 1px solid var(--color-border); + cursor: pointer; +} +.hermes-kanban-bulk-btn:hover { + background: color-mix(in srgb, var(--color-foreground) 8%, transparent); +} +.hermes-kanban-bulk-reassign { + display: flex; + align-items: center; + gap: 0.25rem; + padding-left: 0.5rem; + border-left: 1px solid color-mix(in srgb, var(--color-border) 70%, transparent); +} + +/* ---- Dependency editor chips --------------------------------------- */ + +.hermes-kanban-deps-row { + display: flex; + align-items: center; + gap: 0.5rem; + margin-bottom: 0.4rem; +} +.hermes-kanban-deps-label { + font-size: 0.68rem; + text-transform: uppercase; + letter-spacing: 0.08em; + color: var(--color-muted-foreground); + min-width: 4rem; +} +.hermes-kanban-deps-chips { + display: flex; + gap: 0.3rem; + flex-wrap: wrap; + flex: 1; +} +.hermes-kanban-deps-empty { + font-size: 0.7rem; + color: var(--color-muted-foreground); + font-style: italic; +} +.hermes-kanban-dep-chip { + display: inline-flex; + align-items: center; + gap: 0.15rem; + padding: 0.1rem 0.35rem; + background: color-mix(in srgb, var(--color-foreground) 6%, transparent); + border: 1px solid var(--color-border); + border-radius: var(--radius-sm, 0.25rem); + font-family: var(--font-mono, ui-monospace, monospace); + font-size: 0.68rem; + color: var(--color-foreground); +} +.hermes-kanban-dep-chip-x { + appearance: none; + background: transparent; + border: 0; + color: var(--color-muted-foreground); + cursor: pointer; + font-size: 0.85rem; + line-height: 1; + padding: 0 0.15rem; +} +.hermes-kanban-dep-chip-x:hover { color: var(--color-destructive, #d14a4a); } + +/* ---- Inline edit affordances --------------------------------------- */ + +.hermes-kanban-editable { + cursor: pointer; + border-bottom: 1px dotted color-mix(in srgb, var(--color-border) 80%, transparent); +} +.hermes-kanban-editable:hover { + color: var(--color-foreground); + border-bottom-color: var(--color-ring); +} + +.hermes-kanban-drawer-title-text { + cursor: pointer; +} +.hermes-kanban-drawer-title-text:hover { + text-decoration: underline; + text-decoration-color: var(--color-ring); + text-decoration-style: dotted; + text-underline-offset: 3px; +} + +.hermes-kanban-edit-row { + display: flex; + align-items: center; + gap: 0.35rem; + width: 100%; +} + +.hermes-kanban-section-head-row { + display: flex; + align-items: center; + justify-content: space-between; + gap: 0.5rem; +} +.hermes-kanban-edit-link { + appearance: none; + background: transparent; + border: 0; + color: var(--color-muted-foreground); + font-size: 0.7rem; + text-transform: uppercase; + letter-spacing: 0.05em; + cursor: pointer; + padding: 0; +} +.hermes-kanban-edit-link:hover { color: var(--color-ring); } + +.hermes-kanban-textarea { + width: 100%; + min-height: 8rem; + background: var(--color-card); + color: var(--color-foreground); + border: 1px solid var(--color-border); + border-radius: var(--radius-sm, 0.25rem); + padding: 0.5rem 0.6rem; + font-family: var(--font-mono, ui-monospace, monospace); + font-size: 0.8rem; + line-height: 1.5; + resize: vertical; +} +.hermes-kanban-textarea:focus { + outline: none; + border-color: var(--color-ring); + box-shadow: 0 0 0 2px color-mix(in srgb, var(--color-ring) 30%, transparent); +} + +/* ---- Markdown rendering -------------------------------------------- */ + +.hermes-kanban-md { + font-size: 0.8rem; + line-height: 1.55; + color: var(--color-foreground); +} +.hermes-kanban-md p { margin: 0.25rem 0; } +.hermes-kanban-md h1, +.hermes-kanban-md h2, +.hermes-kanban-md h3, +.hermes-kanban-md h4 { + margin: 0.6rem 0 0.2rem; + line-height: 1.25; +} +.hermes-kanban-md h1 { font-size: 1.05rem; } +.hermes-kanban-md h2 { font-size: 0.95rem; } +.hermes-kanban-md h3 { font-size: 0.88rem; } +.hermes-kanban-md h4 { font-size: 0.82rem; } +.hermes-kanban-md ul { + margin: 0.25rem 0 0.25rem 1.1rem; + padding: 0; +} +.hermes-kanban-md li { margin: 0.1rem 0; } +.hermes-kanban-md a { + color: var(--color-ring); + text-decoration: underline; +} +.hermes-kanban-md code { + font-family: var(--font-mono, ui-monospace, monospace); + font-size: 0.75rem; + padding: 0.05rem 0.3rem; + background: color-mix(in srgb, var(--color-foreground) 8%, transparent); + border-radius: 3px; +} +.hermes-kanban-md-code { + margin: 0.35rem 0; + padding: 0.5rem 0.6rem; + background: color-mix(in srgb, var(--color-foreground) 5%, transparent); + border: 1px solid var(--color-border); + border-radius: var(--radius-sm, 0.25rem); + overflow-x: auto; +} +.hermes-kanban-md-code code { + background: transparent; + padding: 0; + font-size: 0.75rem; + white-space: pre; +} +.hermes-kanban-md strong { font-weight: 600; } + +/* ---- Touch-drag proxy ---------------------------------------------- */ + +.hermes-kanban-touch-proxy { + pointer-events: none; + opacity: 0.85; + box-shadow: 0 8px 20px rgba(0, 0, 0, 0.35); + transform: scale(1.02); + transition: none; +} + + +/* ---- Staleness tiers ------------------------------------------------ */ + +.hermes-kanban-card--stale-amber :where(.hermes-kanban-card-content) { + box-shadow: 0 0 0 1px #d4b34888 inset; +} +.hermes-kanban-card--stale-amber:hover :where(.hermes-kanban-card-content) { + box-shadow: 0 0 0 2px #d4b348 inset; +} +.hermes-kanban-card--stale-red :where(.hermes-kanban-card-content) { + box-shadow: 0 0 0 1px var(--color-destructive, #d14a4a) inset, + 0 0 8px color-mix(in srgb, var(--color-destructive, #d14a4a) 30%, transparent); +} +.hermes-kanban-card--stale-red:hover :where(.hermes-kanban-card-content) { + box-shadow: 0 0 0 2px var(--color-destructive, #d14a4a) inset, + 0 0 10px color-mix(in srgb, var(--color-destructive, #d14a4a) 45%, transparent); +} + +/* ---- Worker log pane ------------------------------------------------ */ + +.hermes-kanban-log { + max-height: 340px; + overflow: auto; + white-space: pre; + font-size: 0.7rem; + line-height: 1.45; +} + + +/* ---- Run history (per-attempt log in the drawer) ------------------- */ + +.hermes-kanban-run { + border-left: 2px solid var(--color-border); + padding: 0.35rem 0.5rem; + margin-bottom: 0.4rem; + background: color-mix(in srgb, var(--color-foreground) 3%, transparent); + border-radius: var(--radius-sm, 0.25rem); +} +.hermes-kanban-run--active { border-left-color: #3fb97d; } +.hermes-kanban-run--completed { border-left-color: #4a8cd1; } +.hermes-kanban-run--ended { border-left-color: #6b7280; } /* generic fallback when outcome is unset */ +.hermes-kanban-run--blocked { border-left-color: var(--color-destructive, #d14a4a); } +.hermes-kanban-run--crashed, +.hermes-kanban-run--timed_out, +.hermes-kanban-run--gave_up, +.hermes-kanban-run--spawn_failed { + border-left-color: var(--color-destructive, #d14a4a); + background: color-mix(in srgb, var(--color-destructive, #d14a4a) 6%, transparent); +} +.hermes-kanban-run--reclaimed { border-left-color: #d4b348; } + +.hermes-kanban-run-head { + display: flex; + align-items: center; + gap: 0.6rem; + font-size: 0.7rem; +} +.hermes-kanban-run-outcome { + font-family: var(--font-mono, ui-monospace, monospace); + font-weight: 600; + text-transform: uppercase; + letter-spacing: 0.05em; + color: var(--color-foreground); +} +.hermes-kanban-run-profile { + color: var(--color-muted-foreground); +} +.hermes-kanban-run-elapsed { + font-variant-numeric: tabular-nums; + color: var(--color-muted-foreground); +} +.hermes-kanban-run-ago { + margin-left: auto; + color: var(--color-muted-foreground); +} +.hermes-kanban-run-summary { + font-size: 0.75rem; + padding: 0.2rem 0 0; + color: var(--color-foreground); +} +.hermes-kanban-run-error { + font-size: 0.7rem; + color: var(--color-destructive, #d14a4a); + padding: 0.15rem 0 0; + font-family: var(--font-mono, ui-monospace, monospace); +} +.hermes-kanban-run-meta { + display: block; + font-size: 0.65rem; + padding: 0.15rem 0 0; + color: var(--color-muted-foreground); + white-space: pre-wrap; + word-break: break-word; + font-family: var(--font-mono, ui-monospace, monospace); +} diff --git a/plugins/kanban/dashboard/manifest.json b/plugins/kanban/dashboard/manifest.json new file mode 100644 index 00000000000..8be4b8c4517 --- /dev/null +++ b/plugins/kanban/dashboard/manifest.json @@ -0,0 +1,14 @@ +{ + "name": "kanban", + "label": "Kanban", + "description": "Multi-agent collaboration board — drag-drop cards across columns, read comment threads, see which profile is running what", + "icon": "Package", + "version": "1.0.0", + "tab": { + "path": "/kanban", + "position": "after:skills" + }, + "entry": "dist/index.js", + "css": "dist/style.css", + "api": "plugin_api.py" +} diff --git a/plugins/kanban/dashboard/plugin_api.py b/plugins/kanban/dashboard/plugin_api.py new file mode 100644 index 00000000000..acccf63c9de --- /dev/null +++ b/plugins/kanban/dashboard/plugin_api.py @@ -0,0 +1,845 @@ +"""Kanban dashboard plugin — backend API routes. + +Mounted at /api/plugins/kanban/ by the dashboard plugin system. + +This layer is intentionally thin: every handler is a small wrapper around +``hermes_cli.kanban_db`` or a direct SQL query. Writes use the same code +paths the CLI and gateway ``/kanban`` command use, so the three surfaces +cannot drift. + +Live updates arrive via the ``/events`` WebSocket, which tails the +append-only ``task_events`` table on a short poll interval (WAL mode lets +reads run alongside the dispatcher's IMMEDIATE write transactions). + +Security note +------------- +The dashboard's HTTP auth middleware (``web_server.auth_middleware``) +explicitly skips ``/api/plugins/`` — plugin routes are unauthenticated by +design because the dashboard binds to localhost by default. For the +WebSocket we still require the session token as a ``?token=`` query +parameter (browsers cannot set the ``Authorization`` header on an upgrade +request), matching the established pattern used by the in-browser PTY +bridge in ``hermes_cli/web_server.py``. If you run the dashboard with +``--host 0.0.0.0``, every plugin route — kanban included — becomes +reachable from the network. Don't do that on a shared host. +""" + +from __future__ import annotations + +import asyncio +import hmac +import json +import logging +import sqlite3 +import time +from dataclasses import asdict +from typing import Any, Optional + +from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status +from pydantic import BaseModel, Field + +from hermes_cli import kanban_db + +log = logging.getLogger(__name__) + +router = APIRouter() + + +# --------------------------------------------------------------------------- +# Auth helper — WebSocket only (HTTP routes live behind the dashboard's +# existing plugin-bypass; this is documented above). +# --------------------------------------------------------------------------- + +def _check_ws_token(provided: Optional[str]) -> bool: + """Constant-time compare against the dashboard session token. + + Imported lazily so the plugin still loads in test contexts where the + dashboard web_server module isn't importable (e.g. the bare-FastAPI + test harness). + """ + if not provided: + return False + try: + from hermes_cli import web_server as _ws + except Exception: + # No dashboard context (tests). Accept so the tail loop is still + # testable; in production the dashboard module always imports + # cleanly because it's the caller. + return True + expected = getattr(_ws, "_SESSION_TOKEN", None) + if not expected: + return True + return hmac.compare_digest(str(provided), str(expected)) + + +def _conn(): + """Open a kanban_db connection, creating the schema on first use. + + Every handler that mutates the DB goes through this so the plugin + self-heals on a fresh install (no user-visible "no such table" + error if somebody hits POST /tasks before GET /board). + ``init_db`` is idempotent. + """ + try: + kanban_db.init_db() + except Exception as exc: + log.warning("kanban init_db failed: %s", exc) + return kanban_db.connect() + + +# --------------------------------------------------------------------------- +# Serialization helpers +# --------------------------------------------------------------------------- + +# Columns shown by the dashboard, in left-to-right order. "archived" is +# available via a filter toggle rather than a visible column. +BOARD_COLUMNS: list[str] = [ + "triage", "todo", "ready", "running", "blocked", "done", +] + + +def _task_dict(task: kanban_db.Task) -> dict[str, Any]: + d = asdict(task) + # Add derived age metrics so the UI can colour stale cards without + # computing deltas client-side. + d["age"] = kanban_db.task_age(task) + # Keep body short on list endpoints; full body comes from /tasks/:id. + return d + + +def _event_dict(event: kanban_db.Event) -> dict[str, Any]: + return { + "id": event.id, + "task_id": event.task_id, + "kind": event.kind, + "payload": event.payload, + "created_at": event.created_at, + "run_id": event.run_id, + } + + +def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]: + return { + "id": c.id, + "task_id": c.task_id, + "author": c.author, + "body": c.body, + "created_at": c.created_at, + } + + +def _run_dict(r: kanban_db.Run) -> dict[str, Any]: + """Serialise a Run for the drawer's Run history section.""" + return { + "id": r.id, + "task_id": r.task_id, + "profile": r.profile, + "step_key": r.step_key, + "status": r.status, + "claim_lock": r.claim_lock, + "claim_expires": r.claim_expires, + "worker_pid": r.worker_pid, + "max_runtime_seconds": r.max_runtime_seconds, + "last_heartbeat_at": r.last_heartbeat_at, + "started_at": r.started_at, + "ended_at": r.ended_at, + "outcome": r.outcome, + "summary": r.summary, + "metadata": r.metadata, + "error": r.error, + } + + +def _links_for(conn: sqlite3.Connection, task_id: str) -> dict[str, list[str]]: + """Return {'parents': [...], 'children': [...]} for a task.""" + parents = [ + r["parent_id"] + for r in conn.execute( + "SELECT parent_id FROM task_links WHERE child_id = ? ORDER BY parent_id", + (task_id,), + ) + ] + children = [ + r["child_id"] + for r in conn.execute( + "SELECT child_id FROM task_links WHERE parent_id = ? ORDER BY child_id", + (task_id,), + ) + ] + return {"parents": parents, "children": children} + + +# --------------------------------------------------------------------------- +# GET /board +# --------------------------------------------------------------------------- + +@router.get("/board") +def get_board( + tenant: Optional[str] = Query(None, description="Filter to a single tenant"), + include_archived: bool = Query(False), +): + """Return the full board grouped by status column. + + ``_conn()`` auto-initializes ``kanban.db`` on first call so a fresh + install doesn't surface a "failed to load" error on the plugin tab. + """ + conn = _conn() + try: + tasks = kanban_db.list_tasks( + conn, tenant=tenant, include_archived=include_archived + ) + # Pre-fetch link counts per task (cheap: one query). + link_counts: dict[str, dict[str, int]] = {} + for row in conn.execute( + "SELECT parent_id, child_id FROM task_links" + ).fetchall(): + link_counts.setdefault(row["parent_id"], {"parents": 0, "children": 0})[ + "children" + ] += 1 + link_counts.setdefault(row["child_id"], {"parents": 0, "children": 0})[ + "parents" + ] += 1 + + # Comment + event counts (both cheap aggregates). + comment_counts: dict[str, int] = { + r["task_id"]: r["n"] + for r in conn.execute( + "SELECT task_id, COUNT(*) AS n FROM task_comments GROUP BY task_id" + ) + } + + # Progress rollup: for each parent, how many children are done / total. + # One pass over task_links joined with child status — cheaper than + # N per-task queries and the plugin uses it to render "N/M". + progress: dict[str, dict[str, int]] = {} + for row in conn.execute( + "SELECT l.parent_id AS pid, t.status AS cstatus " + "FROM task_links l JOIN tasks t ON t.id = l.child_id" + ).fetchall(): + p = progress.setdefault(row["pid"], {"done": 0, "total": 0}) + p["total"] += 1 + if row["cstatus"] == "done": + p["done"] += 1 + + latest_event_id = conn.execute( + "SELECT COALESCE(MAX(id), 0) AS m FROM task_events" + ).fetchone()["m"] + + columns: dict[str, list[dict]] = {c: [] for c in BOARD_COLUMNS} + if include_archived: + columns["archived"] = [] + + for t in tasks: + d = _task_dict(t) + d["link_counts"] = link_counts.get(t.id, {"parents": 0, "children": 0}) + d["comment_count"] = comment_counts.get(t.id, 0) + d["progress"] = progress.get(t.id) # None when the task has no children + col = t.status if t.status in columns else "todo" + columns[col].append(d) + + # Stable per-column ordering already applied by list_tasks + # (priority DESC, created_at ASC), keep as-is. + + # List of known tenants for the UI filter dropdown. + tenants = [ + r["tenant"] + for r in conn.execute( + "SELECT DISTINCT tenant FROM tasks WHERE tenant IS NOT NULL ORDER BY tenant" + ) + ] + # List of distinct assignees for the lane-by-profile sub-grouping. + assignees = [ + r["assignee"] + for r in conn.execute( + "SELECT DISTINCT assignee FROM tasks WHERE assignee IS NOT NULL " + "AND status != 'archived' ORDER BY assignee" + ) + ] + + return { + "columns": [ + {"name": name, "tasks": columns[name]} for name in columns.keys() + ], + "tenants": tenants, + "assignees": assignees, + "latest_event_id": int(latest_event_id), + "now": int(time.time()), + } + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# GET /tasks/:id +# --------------------------------------------------------------------------- + +@router.get("/tasks/{task_id}") +def get_task(task_id: str): + conn = _conn() + try: + task = kanban_db.get_task(conn, task_id) + if task is None: + raise HTTPException(status_code=404, detail=f"task {task_id} not found") + return { + "task": _task_dict(task), + "comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)], + "events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)], + "links": _links_for(conn, task_id), + "runs": [_run_dict(r) for r in kanban_db.list_runs(conn, task_id)], + } + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# POST /tasks +# --------------------------------------------------------------------------- + +class CreateTaskBody(BaseModel): + title: str + body: Optional[str] = None + assignee: Optional[str] = None + tenant: Optional[str] = None + priority: int = 0 + workspace_kind: str = "scratch" + workspace_path: Optional[str] = None + parents: list[str] = Field(default_factory=list) + triage: bool = False + idempotency_key: Optional[str] = None + max_runtime_seconds: Optional[int] = None + skills: Optional[list[str]] = None + + +@router.post("/tasks") +def create_task(payload: CreateTaskBody): + conn = _conn() + try: + task_id = kanban_db.create_task( + conn, + title=payload.title, + body=payload.body, + assignee=payload.assignee, + created_by="dashboard", + workspace_kind=payload.workspace_kind, + workspace_path=payload.workspace_path, + tenant=payload.tenant, + priority=payload.priority, + parents=payload.parents, + triage=payload.triage, + idempotency_key=payload.idempotency_key, + max_runtime_seconds=payload.max_runtime_seconds, + skills=payload.skills, + ) + task = kanban_db.get_task(conn, task_id) + body: dict[str, Any] = {"task": _task_dict(task) if task else None} + # Surface a dispatcher-presence warning so the UI can show a + # banner when a `ready` task would otherwise sit idle because no + # gateway is running (or dispatch_in_gateway=false). Only emit + # for ready+assigned tasks; triage/todo are expected to wait, + # and unassigned tasks can't be dispatched regardless. + if task and task.status == "ready" and task.assignee: + try: + from hermes_cli.kanban import _check_dispatcher_presence + running, message = _check_dispatcher_presence() + if not running and message: + body["warning"] = message + except Exception: + # Probe failure must never block the create itself. + pass + return body + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# PATCH /tasks/:id (status / assignee / priority / title / body) +# --------------------------------------------------------------------------- + +class UpdateTaskBody(BaseModel): + status: Optional[str] = None + assignee: Optional[str] = None + priority: Optional[int] = None + title: Optional[str] = None + body: Optional[str] = None + result: Optional[str] = None + block_reason: Optional[str] = None + # Structured handoff fields — forwarded to complete_task when status + # transitions to 'done'. Dashboard parity with ``hermes kanban + # complete --summary ... --metadata ...``. + summary: Optional[str] = None + metadata: Optional[dict] = None + + +@router.patch("/tasks/{task_id}") +def update_task(task_id: str, payload: UpdateTaskBody): + conn = _conn() + try: + task = kanban_db.get_task(conn, task_id) + if task is None: + raise HTTPException(status_code=404, detail=f"task {task_id} not found") + + # --- assignee ---------------------------------------------------- + if payload.assignee is not None: + try: + ok = kanban_db.assign_task( + conn, task_id, payload.assignee or None, + ) + except RuntimeError as e: + raise HTTPException(status_code=409, detail=str(e)) + if not ok: + raise HTTPException(status_code=404, detail="task not found") + + # --- status ------------------------------------------------------- + if payload.status is not None: + s = payload.status + ok = True + if s == "done": + ok = kanban_db.complete_task( + conn, task_id, + result=payload.result, + summary=payload.summary, + metadata=payload.metadata, + ) + elif s == "blocked": + ok = kanban_db.block_task(conn, task_id, reason=payload.block_reason) + elif s == "ready": + # Re-open a blocked task, or just an explicit status set. + current = kanban_db.get_task(conn, task_id) + if current and current.status == "blocked": + ok = kanban_db.unblock_task(conn, task_id) + else: + # Direct status write for drag-drop (todo -> ready etc). + ok = _set_status_direct(conn, task_id, "ready") + elif s == "archived": + ok = kanban_db.archive_task(conn, task_id) + elif s in ("todo", "running", "triage"): + ok = _set_status_direct(conn, task_id, s) + else: + raise HTTPException(status_code=400, detail=f"unknown status: {s}") + if not ok: + raise HTTPException( + status_code=409, + detail=f"status transition to {s!r} not valid from current state", + ) + + # --- priority ----------------------------------------------------- + if payload.priority is not None: + with kanban_db.write_txn(conn): + conn.execute( + "UPDATE tasks SET priority = ? WHERE id = ?", + (int(payload.priority), task_id), + ) + conn.execute( + "INSERT INTO task_events (task_id, kind, payload, created_at) " + "VALUES (?, 'reprioritized', ?, ?)", + (task_id, json.dumps({"priority": int(payload.priority)}), + int(time.time())), + ) + + # --- title / body ------------------------------------------------- + if payload.title is not None or payload.body is not None: + with kanban_db.write_txn(conn): + sets, vals = [], [] + if payload.title is not None: + if not payload.title.strip(): + raise HTTPException(status_code=400, detail="title cannot be empty") + sets.append("title = ?") + vals.append(payload.title.strip()) + if payload.body is not None: + sets.append("body = ?") + vals.append(payload.body) + vals.append(task_id) + conn.execute( + f"UPDATE tasks SET {', '.join(sets)} WHERE id = ?", vals, + ) + conn.execute( + "INSERT INTO task_events (task_id, kind, payload, created_at) " + "VALUES (?, 'edited', NULL, ?)", + (task_id, int(time.time())), + ) + + updated = kanban_db.get_task(conn, task_id) + return {"task": _task_dict(updated) if updated else None} + finally: + conn.close() + + +def _set_status_direct( + conn: sqlite3.Connection, task_id: str, new_status: str, +) -> bool: + """Direct status write for drag-drop moves that aren't covered by the + structured complete/block/unblock/archive verbs (e.g. todo<->ready, + running<->ready). Appends a ``status`` event row for the live feed. + + When this transitions OFF ``running`` to anything other than the + terminal verbs above (which own their own run closing), we close the + active run with outcome='reclaimed' so attempt history isn't + orphaned. ``running -> ready`` via drag-drop is the common case + (user yanking a stuck worker back to the queue). + """ + with kanban_db.write_txn(conn): + # Snapshot current state so we know whether to close a run. + prev = conn.execute( + "SELECT status, current_run_id FROM tasks WHERE id = ?", + (task_id,), + ).fetchone() + if prev is None: + return False + was_running = prev["status"] == "running" + + cur = conn.execute( + "UPDATE tasks SET status = ?, " + " claim_lock = CASE WHEN ? = 'running' THEN claim_lock ELSE NULL END, " + " claim_expires = CASE WHEN ? = 'running' THEN claim_expires ELSE NULL END, " + " worker_pid = CASE WHEN ? = 'running' THEN worker_pid ELSE NULL END " + "WHERE id = ?", + (new_status, new_status, new_status, new_status, task_id), + ) + if cur.rowcount != 1: + return False + run_id = None + if was_running and new_status != "running" and prev["current_run_id"]: + run_id = kanban_db._end_run( + conn, task_id, + outcome="reclaimed", status="reclaimed", + summary=f"status changed to {new_status} (dashboard/direct)", + ) + conn.execute( + "INSERT INTO task_events (task_id, run_id, kind, payload, created_at) " + "VALUES (?, ?, 'status', ?, ?)", + (task_id, run_id, json.dumps({"status": new_status}), int(time.time())), + ) + # If we re-opened something, children may have gone stale. + if new_status in ("done", "ready"): + kanban_db.recompute_ready(conn) + return True + + +# --------------------------------------------------------------------------- +# Comments +# --------------------------------------------------------------------------- + +class CommentBody(BaseModel): + body: str + author: Optional[str] = "dashboard" + + +@router.post("/tasks/{task_id}/comments") +def add_comment(task_id: str, payload: CommentBody): + if not payload.body.strip(): + raise HTTPException(status_code=400, detail="body is required") + conn = _conn() + try: + if kanban_db.get_task(conn, task_id) is None: + raise HTTPException(status_code=404, detail=f"task {task_id} not found") + kanban_db.add_comment( + conn, task_id, author=payload.author or "dashboard", body=payload.body, + ) + return {"ok": True} + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Links +# --------------------------------------------------------------------------- + +class LinkBody(BaseModel): + parent_id: str + child_id: str + + +@router.post("/links") +def add_link(payload: LinkBody): + conn = _conn() + try: + kanban_db.link_tasks(conn, payload.parent_id, payload.child_id) + return {"ok": True} + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + finally: + conn.close() + + +@router.delete("/links") +def delete_link(parent_id: str = Query(...), child_id: str = Query(...)): + conn = _conn() + try: + ok = kanban_db.unlink_tasks(conn, parent_id, child_id) + return {"ok": bool(ok)} + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Bulk actions (multi-select on the board) +# --------------------------------------------------------------------------- + +class BulkTaskBody(BaseModel): + ids: list[str] + status: Optional[str] = None + assignee: Optional[str] = None # "" or None = unassign + priority: Optional[int] = None + archive: bool = False + + +@router.post("/tasks/bulk") +def bulk_update(payload: BulkTaskBody): + """Apply the same patch to every id in ``payload.ids``. + + This is an *independent* iteration — per-task failures don't abort + siblings. Returns per-id outcome so the UI can surface partials. + """ + ids = [i for i in (payload.ids or []) if i] + if not ids: + raise HTTPException(status_code=400, detail="ids is required") + results: list[dict] = [] + conn = _conn() + try: + for tid in ids: + entry: dict[str, Any] = {"id": tid, "ok": True} + try: + task = kanban_db.get_task(conn, tid) + if task is None: + entry.update(ok=False, error="not found") + results.append(entry) + continue + if payload.archive: + if not kanban_db.archive_task(conn, tid): + entry.update(ok=False, error="archive refused") + if payload.status is not None and not payload.archive: + s = payload.status + if s == "done": + ok = kanban_db.complete_task(conn, tid) + elif s == "blocked": + ok = kanban_db.block_task(conn, tid) + elif s == "ready": + cur = kanban_db.get_task(conn, tid) + if cur and cur.status == "blocked": + ok = kanban_db.unblock_task(conn, tid) + else: + ok = _set_status_direct(conn, tid, "ready") + elif s in ("todo", "running", "triage"): + ok = _set_status_direct(conn, tid, s) + else: + entry.update(ok=False, error=f"unknown status {s!r}") + results.append(entry) + continue + if not ok: + entry.update(ok=False, error=f"transition to {s!r} refused") + if payload.assignee is not None: + try: + if not kanban_db.assign_task( + conn, tid, payload.assignee or None, + ): + entry.update(ok=False, error="assign refused") + except RuntimeError as e: + entry.update(ok=False, error=str(e)) + if payload.priority is not None: + with kanban_db.write_txn(conn): + conn.execute( + "UPDATE tasks SET priority = ? WHERE id = ?", + (int(payload.priority), tid), + ) + conn.execute( + "INSERT INTO task_events (task_id, kind, payload, created_at) " + "VALUES (?, 'reprioritized', ?, ?)", + (tid, json.dumps({"priority": int(payload.priority)}), + int(time.time())), + ) + except Exception as e: # defensive — one bad id shouldn't kill the batch + entry.update(ok=False, error=str(e)) + results.append(entry) + return {"results": results} + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Plugin config (read dashboard.kanban.* defaults from config.yaml) +# --------------------------------------------------------------------------- + +@router.get("/config") +def get_config(): + """Return kanban dashboard preferences from ~/.hermes/config.yaml. + + Reads the ``dashboard.kanban`` section if present; defaults otherwise. + Used by the UI to pre-select tenant filters, toggle markdown rendering, + or set column-width preferences without a round-trip per page load. + """ + try: + from hermes_cli.config import load_config + cfg = load_config() or {} + except Exception: + cfg = {} + dash_cfg = (cfg.get("dashboard") or {}) + # dashboard.kanban may itself be a dict; fall back to {}. + k_cfg = dash_cfg.get("kanban") or {} + return { + "default_tenant": k_cfg.get("default_tenant") or "", + "lane_by_profile": bool(k_cfg.get("lane_by_profile", True)), + "include_archived_by_default": bool(k_cfg.get("include_archived_by_default", False)), + "render_markdown": bool(k_cfg.get("render_markdown", True)), + } + + +# --------------------------------------------------------------------------- +# Stats (per-profile / per-status counts + oldest-ready age) +# --------------------------------------------------------------------------- + +@router.get("/stats") +def get_stats(): + """Per-status + per-assignee counts + oldest-ready age. + + Designed for the dashboard HUD and for router profiles that need to + answer "is this specialist overloaded?" without scanning the whole + board themselves. + """ + conn = _conn() + try: + return kanban_db.board_stats(conn) + finally: + conn.close() + + +@router.get("/assignees") +def get_assignees(): + """Known profiles + per-profile task counts. + + Returns the union of ``~/.hermes/profiles/*`` on disk and every + distinct assignee currently used on the board. The dashboard uses + this to populate its assignee dropdown so a freshly-created profile + appears in the picker before it's been given any task. + """ + conn = _conn() + try: + return {"assignees": kanban_db.known_assignees(conn)} + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Worker log (read-only; file written by _default_spawn) +# --------------------------------------------------------------------------- + +@router.get("/tasks/{task_id}/log") +def get_task_log(task_id: str, tail: Optional[int] = Query(None, ge=1, le=2_000_000)): + """Return the worker's stdout/stderr log. + + ``tail`` caps the response size (bytes) so the dashboard drawer + doesn't paginate megabytes into the browser. Returns 404 if the task + has never spawned. The on-disk log is rotated at 2 MiB per + ``_rotate_worker_log`` — a single ``.log.1`` is kept, no further + generations, so disk usage per task is bounded at ~4 MiB. + """ + conn = _conn() + try: + task = kanban_db.get_task(conn, task_id) + finally: + conn.close() + if task is None: + raise HTTPException(status_code=404, detail=f"task {task_id} not found") + content = kanban_db.read_worker_log(task_id, tail_bytes=tail) + log_path = kanban_db.worker_log_path(task_id) + size = log_path.stat().st_size if log_path.exists() else 0 + return { + "task_id": task_id, + "path": str(log_path), + "exists": content is not None, + "size_bytes": size, + "content": content or "", + # Truncated when the on-disk file was larger than the tail cap. + "truncated": bool(tail and size > tail), + } + + +# --------------------------------------------------------------------------- +# Dispatch nudge (optional quick-path so the UI doesn't wait 60 s) +# --------------------------------------------------------------------------- + +@router.post("/dispatch") +def dispatch(dry_run: bool = Query(False), max_n: int = Query(8, alias="max")): + conn = _conn() + try: + result = kanban_db.dispatch_once( + conn, dry_run=dry_run, max_spawn=max_n, + ) + # DispatchResult is a dataclass. + try: + return asdict(result) + except TypeError: + return {"result": str(result)} + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# WebSocket: /events?since= +# --------------------------------------------------------------------------- + +# Poll interval for the event tail loop. SQLite WAL + 300 ms polling is +# the simplest and most robust approach; it adds a fraction of a percent +# of CPU and has no shared state to synchronize across workers. +_EVENT_POLL_SECONDS = 0.3 + + +@router.websocket("/events") +async def stream_events(ws: WebSocket): + # Enforce the dashboard session token as a query param — browsers can't + # set Authorization on a WS upgrade. This matches how the PTY bridge + # authenticates in hermes_cli/web_server.py. + token = ws.query_params.get("token") + if not _check_ws_token(token): + await ws.close(code=http_status.WS_1008_POLICY_VIOLATION) + return + await ws.accept() + try: + since_raw = ws.query_params.get("since", "0") + try: + cursor = int(since_raw) + except ValueError: + cursor = 0 + + def _fetch_new(cursor_val: int) -> tuple[int, list[dict]]: + conn = kanban_db.connect() + try: + rows = conn.execute( + "SELECT id, task_id, run_id, kind, payload, created_at " + "FROM task_events WHERE id > ? ORDER BY id ASC LIMIT 200", + (cursor_val,), + ).fetchall() + out: list[dict] = [] + new_cursor = cursor_val + for r in rows: + try: + payload = json.loads(r["payload"]) if r["payload"] else None + except Exception: + payload = None + out.append({ + "id": r["id"], + "task_id": r["task_id"], + "run_id": r["run_id"], + "kind": r["kind"], + "payload": payload, + "created_at": r["created_at"], + }) + new_cursor = r["id"] + return new_cursor, out + finally: + conn.close() + + while True: + cursor, events = await asyncio.to_thread(_fetch_new, cursor) + if events: + await ws.send_json({"events": events, "cursor": cursor}) + await asyncio.sleep(_EVENT_POLL_SECONDS) + except WebSocketDisconnect: + return + except Exception as exc: # defensive: never crash the dashboard worker + log.warning("Kanban event stream error: %s", exc) + try: + await ws.close() + except Exception: + pass diff --git a/plugins/kanban/systemd/hermes-kanban-dispatcher.service b/plugins/kanban/systemd/hermes-kanban-dispatcher.service new file mode 100644 index 00000000000..299a0f17700 --- /dev/null +++ b/plugins/kanban/systemd/hermes-kanban-dispatcher.service @@ -0,0 +1,32 @@ +# DEPRECATED — the kanban dispatcher now runs inside the gateway by +# default (config key: kanban.dispatch_in_gateway, default true). To +# migrate: +# +# systemctl --user disable --now hermes-kanban-dispatcher.service +# # then make sure a gateway is running; e.g. a systemd user unit +# # for `hermes gateway start`. The gateway hosts the dispatcher. +# +# This unit is kept for users who truly cannot run the gateway (host +# policy forbids long-lived services, etc.). It now invokes the +# standalone dispatcher via the explicit --force flag, so nobody +# accidentally keeps two dispatchers racing against the same +# kanban.db. Running this unit AND a gateway with +# dispatch_in_gateway=true is NOT supported. + +[Unit] +Description=Hermes Kanban dispatcher (DEPRECATED standalone daemon — prefer gateway-embedded dispatch) +Documentation=https://hermes-agent.nousresearch.com/docs/user-guide/features/kanban +After=network.target + +[Service] +Type=simple +ExecStart=/usr/bin/env hermes kanban daemon --force --interval 60 --pidfile %t/hermes-kanban-dispatcher.pid +Restart=on-failure +RestartSec=5 +# Log to the journal via stdout/stderr; the dispatcher also writes per-task +# worker output to $HERMES_HOME/kanban/logs/.log. +StandardOutput=journal +StandardError=journal + +[Install] +WantedBy=default.target diff --git a/run_agent.py b/run_agent.py index 0f6755539db..0c64bfe50d0 100644 --- a/run_agent.py +++ b/run_agent.py @@ -133,6 +133,7 @@ from agent.prompt_builder import ( DEFAULT_AGENT_IDENTITY, PLATFORM_HINTS, MEMORY_GUIDANCE, SESSION_SEARCH_GUIDANCE, SKILLS_GUIDANCE, HERMES_AGENT_HELP_GUIDANCE, + KANBAN_GUIDANCE, build_nous_subscription_prompt, ) from agent.model_metadata import ( @@ -4823,6 +4824,12 @@ class AIAgent: tool_guidance.append(SESSION_SEARCH_GUIDANCE) if "skill_manage" in self.valid_tool_names: tool_guidance.append(SKILLS_GUIDANCE) + # Kanban worker/orchestrator lifecycle — only present when the + # dispatcher spawned this process (kanban_show check_fn gates on + # HERMES_KANBAN_TASK env var). Normal chat sessions never see + # this block. + if "kanban_show" in self.valid_tool_names: + tool_guidance.append(KANBAN_GUIDANCE) if tool_guidance: prompt_parts.append(" ".join(tool_guidance)) diff --git a/skills/devops/kanban-orchestrator/SKILL.md b/skills/devops/kanban-orchestrator/SKILL.md new file mode 100644 index 00000000000..8b1a8c3a4ff --- /dev/null +++ b/skills/devops/kanban-orchestrator/SKILL.md @@ -0,0 +1,152 @@ +--- +name: kanban-orchestrator +description: Decomposition playbook + specialist-roster conventions + anti-temptation rules for an orchestrator profile routing work through Kanban. The "don't do the work yourself" rule and the basic lifecycle are auto-injected into every kanban worker's system prompt; this skill is the deeper playbook when you're specifically playing the orchestrator role. +version: 2.0.0 +metadata: + hermes: + tags: [kanban, multi-agent, orchestration, routing] + related_skills: [kanban-worker] +--- + +# Kanban Orchestrator — Decomposition Playbook + +> The **core worker lifecycle** (including the `kanban_create` fan-out pattern and the "decompose, don't execute" rule) is auto-injected into every kanban process via the `KANBAN_GUIDANCE` system-prompt block. This skill is the deeper playbook when you're an orchestrator profile whose whole job is routing. + +## When to use the board (vs. just doing the work) + +Create Kanban tasks when any of these are true: + +1. **Multiple specialists are needed.** Research + analysis + writing is three profiles. +2. **The work should survive a crash or restart.** Long-running, recurring, or important. +3. **The user might want to interject.** Human-in-the-loop at any step. +4. **Multiple subtasks can run in parallel.** Fan-out for speed. +5. **Review / iteration is expected.** A reviewer profile loops on drafter output. +6. **The audit trail matters.** Board rows persist in SQLite forever. + +If *none* of those apply — it's a small one-shot reasoning task — use `delegate_task` instead or answer the user directly. + +## The anti-temptation rules + +Your job description says "route, don't execute." The rules that enforce that: + +- **Do not execute the work yourself.** Your restricted toolset usually doesn't even include terminal/file/code/web for implementation. If you find yourself "just fixing this quickly" — stop and create a task for the right specialist. +- **For any concrete task, create a Kanban task and assign it.** Every single time. +- **If no specialist fits, ask the user which profile to create.** Do not default to doing it yourself under "close enough." +- **Decompose, route, and summarize — that's the whole job.** + +## The standard specialist roster (convention) + +Unless the user's setup has customized profiles, assume these exist. Adjust to whatever the user actually has — ask if you're unsure. + +| Profile | Does | Typical workspace | +|---|---|---| +| `researcher` | Reads sources, gathers facts, writes findings | `scratch` | +| `analyst` | Synthesizes, ranks, de-dupes. Consumes multiple `researcher` outputs | `scratch` | +| `writer` | Drafts prose in the user's voice | `scratch` or `dir:` into their Obsidian vault | +| `reviewer` | Reads output, leaves findings, gates approval | `scratch` | +| `backend-eng` | Writes server-side code | `worktree` | +| `frontend-eng` | Writes client-side code | `worktree` | +| `ops` | Runs scripts, manages services, handles deployments | `dir:` into ops scripts repo | +| `pm` | Writes specs, acceptance criteria | `scratch` | + +## Decomposition playbook + +### Step 1 — Understand the goal + +Ask clarifying questions if the goal is ambiguous. Cheap to ask; expensive to spawn the wrong fleet. + +### Step 2 — Sketch the task graph + +Before creating anything, draft the graph out loud (in your response to the user). Example for "Analyze whether we should migrate to Postgres": + +``` +T1 researcher research: Postgres cost vs current +T2 researcher research: Postgres performance vs current +T3 analyst synthesize migration recommendation parents: T1, T2 +T4 writer draft decision memo parents: T3 +``` + +Show this to the user. Let them correct it before you create anything. + +### Step 3 — Create tasks and link + +```python +t1 = kanban_create( + title="research: Postgres cost vs current", + assignee="researcher", + body="Compare estimated infrastructure costs, migration costs, and ongoing ops costs over a 3-year window. Sources: AWS/GCP pricing, team time estimates, current Postgres bills from peers.", + tenant=os.environ.get("HERMES_TENANT"), +)["task_id"] + +t2 = kanban_create( + title="research: Postgres performance vs current", + assignee="researcher", + body="Compare query latency, throughput, and scaling characteristics at our expected data volume (~500GB, 10k QPS peak). Sources: benchmark papers, public case studies, pgbench results if easy.", +)["task_id"] + +t3 = kanban_create( + title="synthesize migration recommendation", + assignee="analyst", + body="Read the findings from T1 (cost) and T2 (performance). Produce a 1-page recommendation with explicit trade-offs and a go/no-go call.", + parents=[t1, t2], +)["task_id"] + +t4 = kanban_create( + title="draft decision memo", + assignee="writer", + body="Turn the analyst's recommendation into a 2-page memo for the CTO. Match the tone of previous decision memos in the team's knowledge base.", + parents=[t3], +)["task_id"] +``` + +`parents=[...]` gates promotion — children stay in `todo` until every parent reaches `done`, then auto-promote to `ready`. No manual coordination needed; the dispatcher and dependency engine handle it. + +### Step 4 — Complete your own task + +If you were spawned as a task yourself (e.g. `planner` profile was assigned `T0: "investigate Postgres migration"`), mark it done with a summary of what you created: + +```python +kanban_complete( + summary="decomposed into T1-T4: 2 researchers parallel, 1 analyst on their outputs, 1 writer on the recommendation", + metadata={ + "task_graph": { + "T1": {"assignee": "researcher", "parents": []}, + "T2": {"assignee": "researcher", "parents": []}, + "T3": {"assignee": "analyst", "parents": ["T1", "T2"]}, + "T4": {"assignee": "writer", "parents": ["T3"]}, + }, + }, +) +``` + +### Step 5 — Report back to the user + +Tell them what you created in plain prose: + +> I've queued 4 tasks: +> - **T1** (researcher): cost comparison +> - **T2** (researcher): performance comparison, in parallel with T1 +> - **T3** (analyst): synthesizes T1 + T2 into a recommendation +> - **T4** (writer): turns T3 into a CTO memo +> +> The dispatcher will pick up T1 and T2 now. T3 starts when both finish. You'll get a gateway ping when T4 completes. Use the dashboard or `hermes kanban tail ` to follow along. + +## Common patterns + +**Fan-out + fan-in (research → synthesize):** N `researcher` tasks with no parents, one `analyst` task with all of them as parents. + +**Pipeline with gates:** `pm → backend-eng → reviewer`. Each stage's `parents=[previous_task]`. Reviewer blocks or completes; if reviewer blocks, the operator unblocks with feedback and respawns. + +**Same-profile queue:** 50 tasks, all assigned to `translator`, no dependencies between them. Dispatcher serializes — translator processes them in priority order, accumulating experience in their own memory. + +**Human-in-the-loop:** Any task can `kanban_block()` to wait for input. Dispatcher respawns after `/unblock`. The comment thread carries the full context. + +## Pitfalls + +**Reassignment vs. new task.** If a reviewer blocks with "needs changes," create a NEW task linked from the reviewer's task — don't re-run the same task with a stern look. The new task is assigned to the original implementer profile. + +**Argument order for links.** `kanban_link(parent_id=..., child_id=...)` — parent first. Mixing them up demotes the wrong task to `todo`. + +**Don't pre-create the whole graph if the shape depends on intermediate findings.** If T3's structure depends on what T1 and T2 find, let T3 exist as a "synthesize findings" task whose own first step is to read parent handoffs and plan the rest. Orchestrators can spawn orchestrators. + +**Tenant inheritance.** If `HERMES_TENANT` is set in your env, pass `tenant=os.environ.get("HERMES_TENANT")` on every `kanban_create` call so child tasks stay in the same namespace. diff --git a/skills/devops/kanban-worker/SKILL.md b/skills/devops/kanban-worker/SKILL.md new file mode 100644 index 00000000000..36055d6ec32 --- /dev/null +++ b/skills/devops/kanban-worker/SKILL.md @@ -0,0 +1,134 @@ +--- +name: kanban-worker +description: Pitfalls, examples, and edge cases for Hermes Kanban workers. The lifecycle itself is auto-injected into every worker's system prompt as KANBAN_GUIDANCE (from agent/prompt_builder.py); this skill is what you load when you want deeper detail on specific scenarios. +version: 2.0.0 +metadata: + hermes: + tags: [kanban, multi-agent, collaboration, workflow, pitfalls] + related_skills: [kanban-orchestrator] +--- + +# Kanban Worker — Pitfalls and Examples + +> You're seeing this skill because the Hermes Kanban dispatcher spawned you as a worker with `--skills kanban-worker` — it's loaded automatically for every dispatched worker. The **lifecycle** (6 steps: orient → work → heartbeat → block/complete) also lives in the `KANBAN_GUIDANCE` block that's auto-injected into your system prompt. This skill is the deeper detail: good handoff shapes, retry diagnostics, edge cases. + +## Workspace handling + +Your workspace kind determines how you should behave inside `$HERMES_KANBAN_WORKSPACE`: + +| Kind | What it is | How to work | +|---|---|---| +| `scratch` | Fresh tmp dir, yours alone | Read/write freely; it gets GC'd when the task is archived. | +| `dir:` | Shared persistent directory | Other runs will read what you write. Treat it like long-lived state. Path is guaranteed absolute (the kernel rejects relative paths). | +| `worktree` | Git worktree at the resolved path | If `.git` doesn't exist, run `git worktree add ` from the main repo first, then cd and work normally. Commit work here. | + +## Tenant isolation + +If `$HERMES_TENANT` is set, the task belongs to a tenant namespace. When reading or writing persistent memory, prefix memory entries with the tenant so context doesn't leak across tenants: + +- Good: `business-a: Acme is our biggest customer` +- Bad (leaks): `Acme is our biggest customer` + +## Good summary + metadata shapes + +The `kanban_complete(summary=..., metadata=...)` handoff is how downstream workers read what you did. Patterns that work: + +**Coding task:** +```python +kanban_complete( + summary="shipped rate limiter — token bucket, keys on user_id with IP fallback, 14 tests pass", + metadata={ + "changed_files": ["rate_limiter.py", "tests/test_rate_limiter.py"], + "tests_run": 14, + "tests_passed": 14, + "decisions": ["user_id primary, IP fallback for unauthenticated requests"], + }, +) +``` + +**Research task:** +```python +kanban_complete( + summary="3 competing libraries reviewed; vLLM wins on throughput, SGLang on latency, Tensorrt-LLM on memory efficiency", + metadata={ + "sources_read": 12, + "recommendation": "vLLM", + "benchmarks": {"vllm": 1.0, "sglang": 0.87, "trtllm": 0.72}, + }, +) +``` + +**Review task:** +```python +kanban_complete( + summary="reviewed PR #123; 2 blocking issues found (SQL injection in /search, missing CSRF on /settings)", + metadata={ + "pr_number": 123, + "findings": [ + {"severity": "critical", "file": "api/search.py", "line": 42, "issue": "raw SQL concat"}, + {"severity": "high", "file": "api/settings.py", "issue": "missing CSRF middleware"}, + ], + "approved": False, + }, +) +``` + +Shape `metadata` so downstream parsers (reviewers, aggregators, schedulers) can use it without re-reading your prose. + +## Block reasons that get answered fast + +Bad: `"stuck"` — the human has no context. + +Good: one sentence naming the specific decision you need. Leave longer context as a comment instead. + +```python +kanban_comment( + task_id=os.environ["HERMES_KANBAN_TASK"], + body="Full context: I have user IPs from Cloudflare headers but some users are behind NATs with thousands of peers. Keying on IP alone causes false positives.", +) +kanban_block(reason="Rate limit key choice: IP (simple, NAT-unsafe) or user_id (requires auth, skips anonymous endpoints)?") +``` + +The block message is what appears in the dashboard / gateway notifier. The comment is the deeper context a human reads when they open the task. + +## Heartbeats worth sending + +Good heartbeats name progress: `"epoch 12/50, loss 0.31"`, `"scanned 1.2M/2.4M rows"`, `"uploaded 47/120 videos"`. + +Bad heartbeats: `"still working"`, empty notes, sub-second intervals. Every few minutes max; skip entirely for tasks under ~2 minutes. + +## Retry scenarios + +If you open the task and `kanban_show` returns `runs: [...]` with one or more closed runs, you're a retry. The prior runs' `outcome` / `summary` / `error` tell you what didn't work. Don't repeat that path. Typical retry diagnostics: + +- `outcome: "timed_out"` — the previous attempt hit `max_runtime_seconds`. You may need to chunk the work or shorten it. +- `outcome: "crashed"` — OOM or segfault. Reduce memory footprint. +- `outcome: "spawn_failed"` + `error: "..."` — usually a profile config issue (missing credential, bad PATH). Ask the human via `kanban_block` instead of retrying blindly. +- `outcome: "reclaimed"` + `summary: "task archived..."` — operator archived the task out from under the previous run; you probably shouldn't be running at all, check status carefully. +- `outcome: "blocked"` — a previous attempt blocked; the unblock comment should be in the thread by now. + +## Do NOT + +- Call `delegate_task` as a substitute for `kanban_create`. `delegate_task` is for short reasoning subtasks inside YOUR run; `kanban_create` is for cross-agent handoffs that outlive one API loop. +- Modify files outside `$HERMES_KANBAN_WORKSPACE` unless the task body says to. +- Create follow-up tasks assigned to yourself — assign to the right specialist. +- Complete a task you didn't actually finish. Block it instead. + +## Pitfalls + +**Task state can change between dispatch and your startup.** Between when the dispatcher claimed and when your process actually booted, the task may have been blocked, reassigned, or archived. Always `kanban_show` first. If it reports `blocked` or `archived`, stop — you shouldn't be running. + +**Workspace may have stale artifacts.** Especially `dir:` and `worktree` workspaces can have files from previous runs. Read the comment thread — it usually explains why you're running again and what state the workspace is in. + +**Don't rely on the CLI when the guidance is available.** The `kanban_*` tools work across all terminal backends (Docker, Modal, SSH). `hermes kanban ` from your terminal tool will fail in containerized backends because the CLI isn't installed there. When in doubt, use the tool. + +## CLI fallback (for scripting) + +Every tool has a CLI equivalent for human operators and scripts: +- `kanban_show` ↔ `hermes kanban show --json` +- `kanban_complete` ↔ `hermes kanban complete --summary "..." --metadata '{...}'` +- `kanban_block` ↔ `hermes kanban block "reason"` +- `kanban_create` ↔ `hermes kanban create "title" --assignee [--parent ]` +- etc. + +Use the tools from inside an agent; the CLI exists for the human at the terminal. diff --git a/tests/hermes_cli/test_kanban_cli.py b/tests/hermes_cli/test_kanban_cli.py new file mode 100644 index 00000000000..f7c84d5df8e --- /dev/null +++ b/tests/hermes_cli/test_kanban_cli.py @@ -0,0 +1,210 @@ +"""Tests for the kanban CLI surface (hermes_cli.kanban).""" + +from __future__ import annotations + +import argparse +import json +import os +from pathlib import Path + +import pytest + +from hermes_cli import kanban as kc +from hermes_cli import kanban_db as kb + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +# --------------------------------------------------------------------------- +# Workspace flag parsing +# --------------------------------------------------------------------------- + +@pytest.mark.parametrize( + "value,expected", + [ + ("scratch", ("scratch", None)), + ("worktree", ("worktree", None)), + ("dir:/tmp/work", ("dir", "/tmp/work")), + ], +) +def test_parse_workspace_flag_valid(value, expected): + assert kc._parse_workspace_flag(value) == expected + + +def test_parse_workspace_flag_expands_user(): + kind, path = kc._parse_workspace_flag("dir:~/vault") + assert kind == "dir" + assert path.endswith("/vault") + assert not path.startswith("~") + + +@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"]) +def test_parse_workspace_flag_rejects(bad): + if not bad: + # Empty -> defaults; not an error. + assert kc._parse_workspace_flag(bad) == ("scratch", None) + return + with pytest.raises(argparse.ArgumentTypeError): + kc._parse_workspace_flag(bad) + + +# --------------------------------------------------------------------------- +# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use) +# --------------------------------------------------------------------------- + +def test_run_slash_no_args_shows_usage(kanban_home): + out = kc.run_slash("") + assert "kanban" in out.lower() + assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower() + + +def test_run_slash_create_and_list(kanban_home): + out = kc.run_slash("create 'ship feature' --assignee alice") + assert "Created" in out + out = kc.run_slash("list") + assert "ship feature" in out + assert "alice" in out + + +def test_run_slash_create_with_parent_and_cascade(kanban_home): + # Parent then child via --parent + out1 = kc.run_slash("create 'parent' --assignee alice") + # Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)" + import re + m = re.search(r"(t_[a-f0-9]+)", out1) + assert m + p = m.group(1) + out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}") + assert "todo" in out2 # child starts as todo + + # Complete parent; list should promote child to ready + kc.run_slash(f"complete {p}") + # Explicit filter: child should now be ready (was todo before complete). + ready_list = kc.run_slash("list --status ready") + assert "child" in ready_list + + +def test_run_slash_show_includes_comments(kanban_home): + out = kc.run_slash("create 'x'") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + kc.run_slash(f"comment {tid} 'source is paywalled'") + show = kc.run_slash(f"show {tid}") + assert "source is paywalled" in show + + +def test_run_slash_block_unblock_cycle(kanban_home): + out = kc.run_slash("create 'x' --assignee alice") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + # Claim first so block() finds it running + kc.run_slash(f"claim {tid}") + assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'") + assert "Unblocked" in kc.run_slash(f"unblock {tid}") + + +def test_run_slash_json_output(kanban_home): + out = kc.run_slash("create 'jsontask' --assignee alice --json") + payload = json.loads(out) + assert payload["title"] == "jsontask" + assert payload["assignee"] == "alice" + assert payload["status"] == "ready" + + +def test_run_slash_dispatch_dry_run_counts(kanban_home): + kc.run_slash("create 'a' --assignee alice") + kc.run_slash("create 'b' --assignee bob") + out = kc.run_slash("dispatch --dry-run") + assert "Spawned:" in out + + +def test_run_slash_context_output_format(kanban_home): + out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + kc.run_slash(f"comment {tid} 'remember to include performance section'") + ctx = kc.run_slash(f"context {tid}") + assert "tech spec" in ctx + assert "write an RFC" in ctx + assert "performance section" in ctx + + +def test_run_slash_tenant_filter(kanban_home): + kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice") + kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice") + a = kc.run_slash("list --tenant biz-a") + b = kc.run_slash("list --tenant biz-b") + assert "biz-a task" in a and "biz-b task" not in a + assert "biz-b task" in b and "biz-a task" not in b + + +def test_run_slash_usage_error_returns_message(kanban_home): + # Missing required argument for create + out = kc.run_slash("create") + assert "usage" in out.lower() or "error" in out.lower() + + +def test_run_slash_assign_reassigns(kanban_home): + out = kc.run_slash("create 'x' --assignee alice") + import re + tid = re.search(r"(t_[a-f0-9]+)", out).group(1) + assert "Assigned" in kc.run_slash(f"assign {tid} bob") + show = kc.run_slash(f"show {tid}") + assert "bob" in show + + +def test_run_slash_link_unlink(kanban_home): + a = kc.run_slash("create 'a'") + b = kc.run_slash("create 'b'") + import re + ta = re.search(r"(t_[a-f0-9]+)", a).group(1) + tb = re.search(r"(t_[a-f0-9]+)", b).group(1) + assert "Linked" in kc.run_slash(f"link {ta} {tb}") + # After link, b is todo + show = kc.run_slash(f"show {tb}") + assert "todo" in show + assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}") + + +# --------------------------------------------------------------------------- +# Integration with the COMMAND_REGISTRY +# --------------------------------------------------------------------------- + +def test_kanban_is_resolvable(): + from hermes_cli.commands import resolve_command + + cmd = resolve_command("kanban") + assert cmd is not None + assert cmd.name == "kanban" + + +def test_kanban_bypasses_active_session_guard(): + from hermes_cli.commands import should_bypass_active_session + + assert should_bypass_active_session("kanban") + + +def test_kanban_in_autocomplete_table(): + from hermes_cli.commands import COMMANDS, SUBCOMMANDS + + assert "/kanban" in COMMANDS + subs = SUBCOMMANDS.get("/kanban") or [] + assert "create" in subs + assert "dispatch" in subs + + +def test_kanban_not_gateway_only(): + # kanban is available in BOTH CLI and gateway surfaces. + from hermes_cli.commands import COMMAND_REGISTRY + + cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban") + assert not cmd.cli_only + assert not cmd.gateway_only diff --git a/tests/hermes_cli/test_kanban_core_functionality.py b/tests/hermes_cli/test_kanban_core_functionality.py new file mode 100644 index 00000000000..551480ff721 --- /dev/null +++ b/tests/hermes_cli/test_kanban_core_functionality.py @@ -0,0 +1,2713 @@ +"""Core-functionality tests for the kanban kernel + CLI additions. + +Complements tests/hermes_cli/test_kanban_db.py (schema + CAS atomicity) +and tests/hermes_cli/test_kanban_cli.py (end-to-end run_slash). The +tests here exercise the pieces added as part of the kanban hardening +pass: circuit breaker, crash detection, daemon loop, idempotency, +retention/gc, stats, notify subscriptions, worker log accessor, run_slash +parity across every registered verb. +""" + +from __future__ import annotations + +import argparse +import json +import os +import threading +import time +from pathlib import Path +from typing import Optional + +import pytest + +from hermes_cli import kanban_db as kb +from hermes_cli.kanban import run_slash + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +# --------------------------------------------------------------------------- +# Idempotency key +# --------------------------------------------------------------------------- + +def test_idempotency_key_returns_existing_task(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="first", idempotency_key="abc") + b = kb.create_task(conn, title="second attempt", idempotency_key="abc") + assert a == b, "same idempotency_key should return the same task id" + # And body wasn't overwritten — first create wins. + task = kb.get_task(conn, a) + assert task.title == "first" + finally: + conn.close() + + +def test_idempotency_key_ignored_for_archived(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="first", idempotency_key="abc") + kb.archive_task(conn, a) + b = kb.create_task(conn, title="second", idempotency_key="abc") + assert a != b, "archived task shouldn't block a fresh create with same key" + finally: + conn.close() + + +def test_no_idempotency_key_never_collides(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + assert a != b + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Spawn-failure circuit breaker +# --------------------------------------------------------------------------- + +def test_spawn_failure_auto_blocks_after_limit(kanban_home): + """N consecutive spawn failures on the same task → auto_blocked.""" + def _bad_spawn(task, ws): + raise RuntimeError("no PATH") + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + # Three ticks below the default limit (5) → still ready, counter grows. + for i in range(3): + res = kb.dispatch_once(conn, spawn_fn=_bad_spawn, failure_limit=5) + assert tid not in res.auto_blocked + task = kb.get_task(conn, tid) + assert task.status == "ready" + assert task.spawn_failures == 3 + + # Two more ticks → fifth failure exceeds the limit. + res1 = kb.dispatch_once(conn, spawn_fn=_bad_spawn, failure_limit=5) + assert tid not in res1.auto_blocked + res2 = kb.dispatch_once(conn, spawn_fn=_bad_spawn, failure_limit=5) + assert tid in res2.auto_blocked + task = kb.get_task(conn, tid) + assert task.status == "blocked" + assert task.spawn_failures >= 5 + assert task.last_spawn_error and "no PATH" in task.last_spawn_error + finally: + conn.close() + + +def test_successful_spawn_resets_failure_counter(kanban_home): + """A successful spawn clears the counter so past failures don't count + against future retries of the same task.""" + calls = [0] + def _flaky_spawn(task, ws): + calls[0] += 1 + if calls[0] <= 2: + raise RuntimeError("transient") + return 99999 # pid value — harmless; crash detection will clear it + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + # Two failures + one success. + kb.dispatch_once(conn, spawn_fn=_flaky_spawn, failure_limit=5) + kb.dispatch_once(conn, spawn_fn=_flaky_spawn, failure_limit=5) + task = kb.get_task(conn, tid) + assert task.spawn_failures == 2 + kb.dispatch_once(conn, spawn_fn=_flaky_spawn, failure_limit=5) + task = kb.get_task(conn, tid) + assert task.spawn_failures == 0 + assert task.last_spawn_error is None + # Task is now running with a pid. + assert task.status == "running" + assert task.worker_pid == 99999 + finally: + conn.close() + + +def test_workspace_resolution_failure_also_counts(kanban_home): + """`dir:` workspace with no path should fail workspace resolution AND + count against the failure budget — not just crash the tick.""" + conn = kb.connect() + try: + # Manually insert a broken task: dir workspace but workspace_path is NULL + # after initial create. We achieve this by creating via kanban_db then + # UPDATE-ing workspace_path to NULL. + tid = kb.create_task( + conn, title="x", assignee="worker", + workspace_kind="dir", workspace_path="/tmp/kanban_e2e_dir", + ) + with kb.write_txn(conn): + conn.execute( + "UPDATE tasks SET workspace_path = NULL WHERE id = ?", (tid,), + ) + res = kb.dispatch_once(conn, failure_limit=3) + task = kb.get_task(conn, tid) + assert task.spawn_failures == 1 + assert task.status == "ready" + assert task.last_spawn_error and "workspace" in task.last_spawn_error + # Run twice more → auto-blocked. + kb.dispatch_once(conn, failure_limit=3) + res = kb.dispatch_once(conn, failure_limit=3) + assert tid in res.auto_blocked + task = kb.get_task(conn, tid) + assert task.status == "blocked" + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Worker aliveness / crash detection +# --------------------------------------------------------------------------- + +def test_pid_alive_helper(): + # Our own pid is alive. + assert kb._pid_alive(os.getpid()) + # PID 0 / None / negative. + assert not kb._pid_alive(0) + assert not kb._pid_alive(None) + # A clearly-dead pid (very large, extremely unlikely to exist). + assert not kb._pid_alive(2 ** 30) + + +def test_detect_crashed_workers_reclaims(kanban_home): + """A running task whose pid vanished gets dropped to ready with a + ``crashed`` event, independent of the claim TTL.""" + def _spawn_pid_that_exits(task, ws): + # Spawn a real child that exits instantly. + import subprocess + p = subprocess.Popen( + ["python3", "-c", "pass"], stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, stdin=subprocess.DEVNULL, + ) + p.wait() + return p.pid + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + res = kb.dispatch_once(conn, spawn_fn=_spawn_pid_that_exits) + # Brief sleep to make sure the child's pid has been reaped; on + # busy CI the pid may be reused by another process, which would + # fool _pid_alive. If that happens we accept the test still + # passing as long as the dispatcher ran without error. + time.sleep(0.2) + res2 = kb.dispatch_once(conn) + task = kb.get_task(conn, tid) + # Either crashed was detected (preferred) or the TTL reclaim path + # will eventually fire; we accept either outcome but the worker_pid + # should no longer be set. + if res2.crashed: + assert tid in res2.crashed + events = kb.list_events(conn, tid) + assert any(e.kind == "crashed" for e in events) + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Daemon loop +# --------------------------------------------------------------------------- + +def test_daemon_runs_and_stops(kanban_home): + """run_daemon should execute at least one tick and exit cleanly on + stop_event.""" + ticks = [] + stop = threading.Event() + + def _runner(): + kb.run_daemon( + interval=0.05, + stop_event=stop, + on_tick=lambda res: ticks.append(res), + ) + + t = threading.Thread(target=_runner, daemon=True) + t.start() + # Give it a few ticks. + time.sleep(0.3) + stop.set() + t.join(timeout=2.0) + assert not t.is_alive(), "daemon should exit on stop_event" + assert len(ticks) >= 1, "expected at least one tick" + + +def test_daemon_keeps_going_after_tick_exception(kanban_home, monkeypatch): + """A tick that raises shouldn't kill the loop.""" + calls = [0] + orig_dispatch = kb.dispatch_once + + def _boom(conn, **kw): + calls[0] += 1 + if calls[0] == 1: + raise RuntimeError("simulated tick failure") + return orig_dispatch(conn, **kw) + + monkeypatch.setattr(kb, "dispatch_once", _boom) + + stop = threading.Event() + def _runner(): + kb.run_daemon(interval=0.05, stop_event=stop) + + t = threading.Thread(target=_runner, daemon=True) + t.start() + time.sleep(0.3) + stop.set() + t.join(timeout=2.0) + # At minimum, second-tick+ should have run. + assert calls[0] >= 2 + + +# --------------------------------------------------------------------------- +# Stats + age +# --------------------------------------------------------------------------- + +def test_board_stats(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="a", assignee="x") + b = kb.create_task(conn, title="b", assignee="y") + kb.complete_task(conn, a, result="done") + stats = kb.board_stats(conn) + assert stats["by_status"]["ready"] == 1 + assert stats["by_status"]["done"] == 1 + assert stats["by_assignee"]["x"]["done"] == 1 + assert stats["by_assignee"]["y"]["ready"] == 1 + assert stats["oldest_ready_age_seconds"] is not None + finally: + conn.close() + + +def test_task_age_helper(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") + task = kb.get_task(conn, tid) + age = kb.task_age(task) + assert age["created_age_seconds"] is not None + assert age["started_age_seconds"] is None + assert age["time_to_complete_seconds"] is None + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Notify subscriptions +# --------------------------------------------------------------------------- + +def test_notify_sub_crud(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") + kb.add_notify_sub( + conn, task_id=tid, platform="telegram", chat_id="123", user_id="u1", + ) + subs = kb.list_notify_subs(conn, tid) + assert len(subs) == 1 + assert subs[0]["platform"] == "telegram" + # Duplicate add is a no-op. + kb.add_notify_sub( + conn, task_id=tid, platform="telegram", chat_id="123", + ) + assert len(kb.list_notify_subs(conn, tid)) == 1 + # Distinct thread is a new row. + kb.add_notify_sub( + conn, task_id=tid, platform="telegram", chat_id="123", + thread_id="5", + ) + assert len(kb.list_notify_subs(conn, tid)) == 2 + # Remove one. + ok = kb.remove_notify_sub( + conn, task_id=tid, platform="telegram", chat_id="123", + ) + assert ok is True + assert len(kb.list_notify_subs(conn, tid)) == 1 + finally: + conn.close() + + +def test_notify_cursor_advances(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="w") + kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="123") + # Initial: one "created" event but we only want terminal kinds. + cursor, events = kb.unseen_events_for_sub( + conn, task_id=tid, platform="telegram", chat_id="123", + kinds=["completed", "blocked"], + ) + assert events == [] + # Complete the task → new `completed` event. + kb.complete_task(conn, tid, result="ok") + cursor, events = kb.unseen_events_for_sub( + conn, task_id=tid, platform="telegram", chat_id="123", + kinds=["completed", "blocked"], + ) + assert len(events) == 1 + assert events[0].kind == "completed" + # Advance cursor — next call returns empty. + kb.advance_notify_cursor( + conn, task_id=tid, platform="telegram", chat_id="123", + new_cursor=cursor, + ) + _, events2 = kb.unseen_events_for_sub( + conn, task_id=tid, platform="telegram", chat_id="123", + kinds=["completed", "blocked"], + ) + assert events2 == [] + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# GC + retention +# --------------------------------------------------------------------------- + +def test_gc_events_keeps_active_task_history(kanban_home): + """gc_events should only prune rows for terminal (done/archived) tasks.""" + conn = kb.connect() + try: + alive = kb.create_task(conn, title="a", assignee="w") + done_id = kb.create_task(conn, title="b", assignee="w") + kb.complete_task(conn, done_id) + + # Force all existing events to "old" by bumping created_at backwards. + with kb.write_txn(conn): + conn.execute( + "UPDATE task_events SET created_at = ?", + (int(time.time()) - 60 * 24 * 3600,), + ) + removed = kb.gc_events(conn, older_than_seconds=30 * 24 * 3600) + # At least the done task's "created" + "completed" events gone. + assert removed >= 2 + # Alive task's events survive. + alive_events = kb.list_events(conn, alive) + assert len(alive_events) >= 1 + finally: + conn.close() + + +def test_gc_worker_logs_deletes_old_files(kanban_home): + log_dir = kanban_home / "kanban" / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + old = log_dir / "old.log" + young = log_dir / "young.log" + old.write_text("stale") + young.write_text("fresh") + # Age the old file by 100 days. + past = time.time() - 100 * 24 * 3600 + os.utime(old, (past, past)) + removed = kb.gc_worker_logs(older_than_seconds=30 * 24 * 3600) + assert removed == 1 + assert not old.exists() + assert young.exists() + + +# --------------------------------------------------------------------------- +# Log rotation + accessor +# --------------------------------------------------------------------------- + +def test_worker_log_rotation_keeps_one_generation(kanban_home, tmp_path): + log_dir = kanban_home / "kanban" / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + target = log_dir / "t_aaaa.log" + target.write_bytes(b"x" * (3 * 1024 * 1024)) # 3 MiB, over 2 MiB threshold + kb._rotate_worker_log(target, kb.DEFAULT_LOG_ROTATE_BYTES) + assert not target.exists() + assert (log_dir / "t_aaaa.log.1").exists() + + +def test_read_worker_log_tail(kanban_home): + log_dir = kanban_home / "kanban" / "logs" + log_dir.mkdir(parents=True, exist_ok=True) + p = log_dir / "t_beef.log" + # 10 lines + p.write_text("\n".join(f"line {i}" for i in range(10))) + full = kb.read_worker_log("t_beef") + assert full is not None and "line 0" in full + tail = kb.read_worker_log("t_beef", tail_bytes=30) + assert tail is not None + # Tail should not include line 0. + assert "line 0" not in tail + # Missing log returns None. + assert kb.read_worker_log("t_missing") is None + + +# --------------------------------------------------------------------------- +# CLI bulk verbs +# --------------------------------------------------------------------------- + +def test_cli_complete_bulk(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + c = kb.create_task(conn, title="c") + finally: + conn.close() + out = run_slash(f"complete {a} {b} {c} --result all-done") + assert out.count("Completed") == 3 + conn = kb.connect() + try: + for tid in (a, b, c): + assert kb.get_task(conn, tid).status == "done" + finally: + conn.close() + + +def test_cli_archive_bulk(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + finally: + conn.close() + out = run_slash(f"archive {a} {b}") + assert "Archived" in out + conn = kb.connect() + try: + assert kb.get_task(conn, a).status == "archived" + assert kb.get_task(conn, b).status == "archived" + finally: + conn.close() + + +def test_cli_unblock_bulk(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + kb.block_task(conn, a) + kb.block_task(conn, b) + finally: + conn.close() + out = run_slash(f"unblock {a} {b}") + assert out.count("Unblocked") == 2 + + +def test_cli_block_bulk_via_ids_flag(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + finally: + conn.close() + out = run_slash(f"block {a} need input --ids {b}") + assert out.count("Blocked") == 2 + + +def test_cli_create_with_idempotency_key(kanban_home): + out1 = run_slash("create 'x' --idempotency-key abc --json") + tid1 = json.loads(out1)["id"] + out2 = run_slash("create 'y' --idempotency-key abc --json") + tid2 = json.loads(out2)["id"] + assert tid1 == tid2 + + +# --------------------------------------------------------------------------- +# CLI stats / watch / log / notify / daemon parity +# --------------------------------------------------------------------------- + +def test_cli_stats_json(kanban_home): + conn = kb.connect() + try: + kb.create_task(conn, title="a", assignee="r") + finally: + conn.close() + out = run_slash("stats --json") + data = json.loads(out) + assert "by_status" in data + assert "by_assignee" in data + assert "oldest_ready_age_seconds" in data + + +def test_cli_notify_subscribe_and_list(kanban_home): + tid = run_slash("create 'x' --json") + tid = json.loads(tid)["id"] + out = run_slash( + f"notify-subscribe {tid} --platform telegram --chat-id 999", + ) + assert "Subscribed" in out + lst = run_slash("notify-list --json") + subs = json.loads(lst) + assert any(s["task_id"] == tid and s["platform"] == "telegram" for s in subs) + rm = run_slash( + f"notify-unsubscribe {tid} --platform telegram --chat-id 999", + ) + assert "Unsubscribed" in rm + + +def test_cli_log_missing_task(kanban_home): + # No such task → exit-style (no log for...) message on stderr, returned + # in combined output. + out = run_slash("log t_nope") + assert "no log" in out.lower() + + +def test_cli_gc_reports_counts(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") + kb.archive_task(conn, tid) + finally: + conn.close() + out = run_slash("gc") + assert "GC complete" in out + + +# --------------------------------------------------------------------------- +# run_slash parity — every verb returns a sensible, non-crashy string +# --------------------------------------------------------------------------- + +def test_run_slash_every_verb_returns_sensible_output(kanban_home): + """Smoke-test every verb with minimal args. None may raise, none may + return the empty string (must either succeed or report a usage error).""" + # Set up a pair of tasks to reference. + conn = kb.connect() + try: + tid_a = kb.create_task(conn, title="a") + tid_b = kb.create_task(conn, title="b", parents=[tid_a]) + finally: + conn.close() + + invocations = [ + "", # no subcommand → help text + "--help", + "init", + "create 'smoke'", + "list", + "ls", + f"show {tid_a}", + f"assign {tid_a} researcher", + f"link {tid_a} {tid_b}", + f"unlink {tid_a} {tid_b}", + f"claim {tid_a}", + f"comment {tid_a} hello", + f"complete {tid_a}", + f"block {tid_b} need input", + f"unblock {tid_b}", + f"archive {tid_a}", + "dispatch --dry-run --json", + "stats --json", + "notify-list", + f"log {tid_a}", + f"context {tid_b}", + "gc", + ] + for cmd in invocations: + out = run_slash(cmd) + assert out is not None + assert out.strip() != "", f"empty output for `/kanban {cmd}`" + + +# --------------------------------------------------------------------------- +# Max-runtime enforcement (item 1 from the Multica audit) +# --------------------------------------------------------------------------- + +def test_max_runtime_terminates_overrun_worker(kanban_home): + """A running task whose elapsed time exceeds max_runtime_seconds gets + SIGTERM'd, emits a ``timed_out`` event, and goes back to ready.""" + killed = [] + def _signal_fn(pid, sig): + killed.append((pid, sig)) + + # We bypass _pid_alive by stubbing it so the grace-poll exits fast. + import hermes_cli.kanban_db as _kb + original_alive = _kb._pid_alive + _kb._pid_alive = lambda pid: False # pretend SIGTERM worked immediately + + try: + conn = kb.connect() + try: + tid = kb.create_task( + conn, title="long job", assignee="worker", + max_runtime_seconds=1, # one second cap + ) + # Spawn by hand: claim + set pid + set started_at to the past. + kb.claim_task(conn, tid) + kb._set_worker_pid(conn, tid, os.getpid()) # any live pid works + # Backdate started_at so elapsed > limit. + with kb.write_txn(conn): + conn.execute( + "UPDATE tasks SET started_at = ? WHERE id = ?", + (int(time.time()) - 30, tid), + ) + + timed_out = kb.enforce_max_runtime(conn, signal_fn=_signal_fn) + assert tid in timed_out + assert killed and killed[0][0] == os.getpid() + + task = kb.get_task(conn, tid) + assert task.status == "ready", f"timed-out task should reset to ready, got {task.status}" + assert task.worker_pid is None + assert task.last_heartbeat_at is None + + events = kb.list_events(conn, tid) + assert any(e.kind == "timed_out" for e in events) + to_event = next(e for e in events if e.kind == "timed_out") + assert to_event.payload["limit_seconds"] == 1 + assert to_event.payload["elapsed_seconds"] >= 30 + finally: + conn.close() + finally: + _kb._pid_alive = original_alive + + +def test_max_runtime_none_means_no_cap(kanban_home): + """A task with max_runtime_seconds=None is never timed out regardless + of how long it runs.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="uncapped", assignee="worker") + kb.claim_task(conn, tid) + kb._set_worker_pid(conn, tid, os.getpid()) + # Backdate aggressively; no cap means we don't care. + with kb.write_txn(conn): + conn.execute( + "UPDATE tasks SET started_at = ? WHERE id = ?", + (int(time.time()) - 100_000, tid), + ) + timed_out = kb.enforce_max_runtime(conn) + assert timed_out == [] + task = kb.get_task(conn, tid) + assert task.status == "running" + finally: + conn.close() + + +def test_create_task_persists_max_runtime(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", max_runtime_seconds=600) + task = kb.get_task(conn, tid) + assert task.max_runtime_seconds == 600 + finally: + conn.close() + + +def test_enforce_max_runtime_integrates_with_dispatch(kanban_home, monkeypatch): + """enforce_max_runtime + dispatch_once integrate cleanly — a timed-out + task goes through ``timed_out`` → ``ready`` and dispatch_once can then + re-spawn it without re-reporting the timeout.""" + import hermes_cli.kanban_db as _kb + # Leave _pid_alive=True so the crash detector doesn't steal the task + # before timeout enforcement runs. After SIGTERM in enforce_max_runtime, + # pretend the worker died so the grace wait exits fast. + state = {"sent_term": False} + def _alive(pid): + return not state["sent_term"] + def _signal(pid, sig): + import signal as _sig + if sig == _sig.SIGTERM: + state["sent_term"] = True + monkeypatch.setattr(_kb, "_pid_alive", _alive) + + conn = kb.connect() + try: + tid = kb.create_task( + conn, title="timeout-me", assignee="worker", + max_runtime_seconds=1, + ) + kb.claim_task(conn, tid) + kb._set_worker_pid(conn, tid, os.getpid()) + with kb.write_txn(conn): + conn.execute( + "UPDATE tasks SET started_at = ? WHERE id = ?", + (int(time.time()) - 30, tid), + ) + # Use enforce_max_runtime directly with our signal stub — dispatch_once + # uses the default os.kill, but integration-wise calling + # enforce_max_runtime directly proves the kernel wiring. For the + # dispatch_once assertion, rely on its own code path by calling it + # after forcing SIGTERM via enforce_max_runtime. + before = kb.enforce_max_runtime(conn, signal_fn=_signal) + assert tid in before, "kernel enforce_max_runtime should catch the overrun" + + # Now a second dispatch_once run should be a no-op on this task + # (already released). Confirm the loop doesn't re-report it. + res = kb.dispatch_once(conn, spawn_fn=lambda t, ws: None) + task = kb.get_task(conn, tid) + # After timeout, task is back in 'ready' and will be re-spawned + # by the same pass. That's the intended behaviour. + assert task.status in ("ready", "running") + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Heartbeat (item 2 from the Multica audit) +# --------------------------------------------------------------------------- + +def test_heartbeat_on_running_task(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + ok = kb.heartbeat_worker(conn, tid, note="step 3/10") + assert ok is True + task = kb.get_task(conn, tid) + assert task.last_heartbeat_at is not None + events = kb.list_events(conn, tid) + hb = [e for e in events if e.kind == "heartbeat"] + assert len(hb) == 1 + assert hb[0].payload == {"note": "step 3/10"} + finally: + conn.close() + + +def test_heartbeat_refused_when_not_running(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") # lands in ready, not running + ok = kb.heartbeat_worker(conn, tid) + assert ok is False + task = kb.get_task(conn, tid) + assert task.last_heartbeat_at is None + finally: + conn.close() + + +def test_cli_heartbeat_verb(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + finally: + conn.close() + out = run_slash(f"heartbeat {tid}") + assert "Heartbeat recorded" in out + + # With --note. + out = run_slash(f"heartbeat {tid} --note 'step 42'") + assert "Heartbeat recorded" in out + conn = kb.connect() + try: + events = kb.list_events(conn, tid) + notes = [e.payload.get("note") for e in events if e.kind == "heartbeat" and e.payload] + assert "step 42" in notes + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Event vocab rename + spawned event (item 3 from Multica) +# --------------------------------------------------------------------------- + +def test_recompute_ready_emits_promoted_not_ready(kanban_home): + conn = kb.connect() + try: + parent = kb.create_task(conn, title="p") + child = kb.create_task(conn, title="c", parents=[parent]) + kb.complete_task(conn, parent, result="ok") + # recompute_ready runs inside complete_task too, but call it again + # defensively. + kb.recompute_ready(conn) + events = kb.list_events(conn, child) + kinds = [e.kind for e in events] + assert "promoted" in kinds + # Old name must not appear. + assert "ready" not in kinds + finally: + conn.close() + + +def test_spawn_failure_circuit_breaker_emits_gave_up(kanban_home): + def _bad(task, ws): + raise RuntimeError("nope") + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + for _ in range(5): + kb.dispatch_once(conn, spawn_fn=_bad, failure_limit=5) + events = kb.list_events(conn, tid) + kinds = [e.kind for e in events] + assert "gave_up" in kinds + assert "spawn_auto_blocked" not in kinds + finally: + conn.close() + + +def test_spawned_event_emitted_with_pid(kanban_home): + """Successful spawn must append a ``spawned`` event with the pid in + the payload so humans tailing events see pid tracking.""" + def _spawn_returns_pid(task, ws): + return 98765 + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.dispatch_once(conn, spawn_fn=_spawn_returns_pid) + events = kb.list_events(conn, tid) + spawned = [e for e in events if e.kind == "spawned"] + assert len(spawned) == 1 + assert spawned[0].payload == {"pid": 98765} + finally: + conn.close() + + +def test_migration_renames_legacy_event_kinds(tmp_path, monkeypatch): + """A DB created with the old vocab must have its event rows renamed + in place on init_db().""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + # Init fresh. + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") + # Inject legacy event kinds directly. + now = int(time.time()) + with kb.write_txn(conn): + for old in ("ready", "priority", "spawn_auto_blocked"): + conn.execute( + "INSERT INTO task_events (task_id, kind, payload, created_at) " + "VALUES (?, ?, NULL, ?)", + (tid, old, now), + ) + # Re-run init_db — the migration pass should rename them. + kb.init_db() + rows = conn.execute( + "SELECT kind FROM task_events WHERE task_id = ? ORDER BY id", (tid,), + ).fetchall() + kinds = [r["kind"] for r in rows] + assert "ready" not in kinds + assert "priority" not in kinds + assert "spawn_auto_blocked" not in kinds + assert "promoted" in kinds + assert "reprioritized" in kinds + assert "gave_up" in kinds + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# Assignees (item 4 from Multica) +# --------------------------------------------------------------------------- + +def test_list_profiles_on_disk(tmp_path, monkeypatch): + """list_profiles_on_disk returns directories under ~/.hermes/profiles/ + that contain a config.yaml.""" + monkeypatch.setattr(Path, "home", lambda: tmp_path) + profiles = tmp_path / ".hermes" / "profiles" + profiles.mkdir(parents=True) + (profiles / "researcher").mkdir() + (profiles / "researcher" / "config.yaml").write_text("model: {}\n") + (profiles / "writer").mkdir() + (profiles / "writer" / "config.yaml").write_text("model: {}\n") + (profiles / "empty_dir").mkdir() + # A stray file; should be ignored. + (profiles / "stray.txt").write_text("noise") + + names = kb.list_profiles_on_disk() + assert names == ["researcher", "writer"] + + +def test_known_assignees_merges_disk_and_board(tmp_path, monkeypatch): + """known_assignees unions profiles on disk with currently-assigned + names, and reports per-status counts.""" + monkeypatch.setattr(Path, "home", lambda: tmp_path) + profiles = tmp_path / ".hermes" / "profiles" + profiles.mkdir(parents=True) + monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) + + for name in ("researcher", "writer"): + d = profiles / name + d.mkdir() + (d / "config.yaml").write_text("model: {}\n") + + kb.init_db() + conn = kb.connect() + try: + # writer has a ready task; on_board_only has a task but no profile dir. + kb.create_task(conn, title="a", assignee="writer") + kb.create_task(conn, title="b", assignee="on_board_only") + data = kb.known_assignees(conn) + finally: + conn.close() + + by_name = {d["name"]: d for d in data} + assert by_name["researcher"]["on_disk"] is True + assert by_name["researcher"]["counts"] == {} + assert by_name["writer"]["on_disk"] is True + assert by_name["writer"]["counts"] == {"ready": 1} + assert by_name["on_board_only"]["on_disk"] is False + assert by_name["on_board_only"]["counts"] == {"ready": 1} + + +def test_cli_assignees_json(kanban_home): + conn = kb.connect() + try: + kb.create_task(conn, title="x", assignee="someone") + finally: + conn.close() + out = run_slash("assignees --json") + data = json.loads(out) + names = [e["name"] for e in data] + assert "someone" in names + + +# --------------------------------------------------------------------------- +# CLI --max-runtime flag + duration parser +# --------------------------------------------------------------------------- + +def test_parse_duration_accepts_formats(): + from hermes_cli.kanban import _parse_duration + assert _parse_duration(None) is None + assert _parse_duration("") is None + assert _parse_duration("42") == 42 + assert _parse_duration("30s") == 30 + assert _parse_duration("5m") == 300 + assert _parse_duration("2h") == 7200 + assert _parse_duration("1d") == 86400 + assert _parse_duration("1.5h") == 5400 + + +def test_parse_duration_rejects_garbage(): + from hermes_cli.kanban import _parse_duration + import pytest as _p + with _p.raises(ValueError): + _parse_duration("tenminutes") + with _p.raises(ValueError): + _parse_duration("fish") + + +def test_cli_create_max_runtime_via_duration(kanban_home): + """`hermes kanban create --max-runtime 2h` should persist 7200 seconds.""" + out = run_slash("create 'long task' --max-runtime 2h --json") + data = json.loads(out) + tid = data["id"] + conn = kb.connect() + try: + task = kb.get_task(conn, tid) + assert task.max_runtime_seconds == 7200 + finally: + conn.close() + + +def test_cli_create_max_runtime_bad_format_exits_nonzero(kanban_home): + out = run_slash("create 'bad' --max-runtime fish") + assert "max-runtime" in out.lower() or "malformed" in out.lower() + + +# --------------------------------------------------------------------------- +# Runs as first-class (vulcan-artivus RFC feedback) +# --------------------------------------------------------------------------- + +def test_run_created_on_claim(kanban_home): + """claim_task opens a new task_runs row and points current_run_id at it.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + assert kb.get_task(conn, tid).current_run_id is None + + claimed = kb.claim_task(conn, tid) + assert claimed is not None + + task = kb.get_task(conn, tid) + assert task.current_run_id is not None + + runs = kb.list_runs(conn, tid) + assert len(runs) == 1 + r = runs[0] + assert r.id == task.current_run_id + assert r.profile == "worker" + assert r.status == "running" + assert r.outcome is None + assert r.ended_at is None + assert r.claim_lock is not None and r.claim_expires is not None + finally: + conn.close() + + +def test_run_closed_on_complete_with_summary(kanban_home): + """complete_task ends the active run with outcome='completed' and + persists summary + metadata.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + ok = kb.complete_task( + conn, tid, + result="shipped", + summary="implemented rate limiter, tests pass", + metadata={"changed_files": ["limiter.py"], "tests_run": 12}, + ) + assert ok is True + + task = kb.get_task(conn, tid) + assert task.current_run_id is None + assert task.result == "shipped" + + runs = kb.list_runs(conn, tid) + assert len(runs) == 1 + r = runs[0] + assert r.status == "done" + assert r.outcome == "completed" + assert r.summary == "implemented rate limiter, tests pass" + assert r.metadata == {"changed_files": ["limiter.py"], "tests_run": 12} + assert r.ended_at is not None + finally: + conn.close() + + +def test_run_summary_falls_back_to_result(kanban_home): + """If the caller doesn't pass summary, we fall back to result so + single-run workflows don't need to pass the same string twice.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, result="only-arg") + r = kb.latest_run(conn, tid) + assert r.summary == "only-arg" + finally: + conn.close() + + +def test_multiple_attempts_preserved_as_runs(kanban_home): + """Crash / retry / complete flow produces one run per attempt, all + visible in list_runs in chronological order.""" + import hermes_cli.kanban_db as _kb + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + + # Attempt 1: claim then force the claim to be stale by backdating + # claim_expires, then let release_stale_claims reclaim it. + kb.claim_task(conn, tid) + with kb.write_txn(conn): + conn.execute( + "UPDATE tasks SET claim_expires = ? WHERE id = ?", + (int(time.time()) - 10, tid), + ) + conn.execute( + "UPDATE task_runs SET claim_expires = ? WHERE task_id = ?", + (int(time.time()) - 10, tid), + ) + kb.release_stale_claims(conn) + + # Attempt 2: claim then crash (simulated: pid dead). + kb.claim_task(conn, tid) + kb._set_worker_pid(conn, tid, 98765) + original_alive = _kb._pid_alive + _kb._pid_alive = lambda pid: False + try: + kb.detect_crashed_workers(conn) + finally: + _kb._pid_alive = original_alive + + # Attempt 3: claim then complete. + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, result="finally") + + runs = kb.list_runs(conn, tid) + assert len(runs) == 3 + assert [r.outcome for r in runs] == ["reclaimed", "crashed", "completed"] + assert runs[-1].summary == "finally" + assert kb.get_task(conn, tid).current_run_id is None + finally: + conn.close() + + +def test_run_on_block_with_reason(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + kb.block_task(conn, tid, reason="needs API key") + + r = kb.latest_run(conn, tid) + assert r.outcome == "blocked" + assert r.summary == "needs API key" + assert r.ended_at is not None + assert kb.get_task(conn, tid).current_run_id is None + finally: + conn.close() + + +def test_run_on_spawn_failure_records_failed_runs(kanban_home): + """Each spawn_failed event closes a run with outcome='spawn_failed', + and the Nth failure closes a run with outcome='gave_up'.""" + def _bad(task, ws): + raise RuntimeError("no PATH") + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + for _ in range(5): + kb.dispatch_once(conn, spawn_fn=_bad, failure_limit=5) + + runs = kb.list_runs(conn, tid) + # 5 claim attempts → 5 runs. Final one is gave_up, earlier ones + # are spawn_failed. + assert len(runs) == 5 + assert runs[-1].outcome == "gave_up" + assert all(r.outcome == "spawn_failed" for r in runs[:-1]) + assert runs[-1].error and "no PATH" in runs[-1].error + finally: + conn.close() + + +def test_event_rows_carry_run_id(kanban_home): + """task_events.run_id is populated for run-scoped kinds and NULL for + task-scoped ones.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + # task-scoped: 'created' — no run yet + # run-scoped: 'claimed' + 'completed' + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, result="ok") + + rows = conn.execute( + "SELECT kind, run_id FROM task_events WHERE task_id = ? ORDER BY id", + (tid,), + ).fetchall() + by_kind = {r["kind"]: r["run_id"] for r in rows} + assert by_kind["created"] is None + assert by_kind["claimed"] is not None + assert by_kind["completed"] is not None + # Both belong to the same run. + assert by_kind["claimed"] == by_kind["completed"] + finally: + conn.close() + + +def test_build_worker_context_includes_prior_attempts(kanban_home): + """A worker spawned after a prior attempt sees that attempt's outcome + + summary in its context so it can skip the failed path.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="port x", assignee="worker") + + # Attempt 1: blocked with a reason. + kb.claim_task(conn, tid) + kb.block_task(conn, tid, reason="needs clarification on IP vs user_id") + kb.unblock_task(conn, tid) + + # Attempt 2: claim (but don't complete yet) and read the context + # as this worker would see it. + kb.claim_task(conn, tid) + ctx = kb.build_worker_context(conn, tid) + + assert "Prior attempts on this task" in ctx + assert "blocked" in ctx + assert "needs clarification on IP vs user_id" in ctx + finally: + conn.close() + + +def test_build_worker_context_uses_parent_run_summary(kanban_home): + """Downstream children read the parent's run.summary + metadata, not + just task.result.""" + conn = kb.connect() + try: + parent = kb.create_task(conn, title="research", assignee="researcher") + child = kb.create_task( + conn, title="write", assignee="writer", parents=[parent], + ) + + kb.claim_task(conn, parent) + kb.complete_task( + conn, parent, + result="done", + summary="three angles explored; B looks strongest", + metadata={"sources": ["paper A", "paper B", "paper C"]}, + ) + + # child becomes ready via recompute_ready (runs inside complete_task) + ctx = kb.build_worker_context(conn, child) + assert "Parent task results" in ctx + assert "three angles explored; B looks strongest" in ctx + assert '"sources"' in ctx # metadata JSON serialized + finally: + conn.close() + + +def test_migration_backfills_inflight_run_for_legacy_db(kanban_home): + """An existing 'running' task from before task_runs existed should + get a synthesized run row so subsequent operations (complete, + heartbeat) have something to write to.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="pre-migration", assignee="worker") + # Simulate legacy: set running + claim_lock directly, leave + # current_run_id NULL and delete the run row the claim created. + kb.claim_task(conn, tid) + with kb.write_txn(conn): + conn.execute("DELETE FROM task_runs WHERE task_id = ?", (tid,)) + conn.execute( + "UPDATE tasks SET current_run_id = NULL WHERE id = ?", + (tid,), + ) + + # Sanity: no runs, no pointer. + assert kb.list_runs(conn, tid) == [] + assert kb.get_task(conn, tid).current_run_id is None + + # Re-run init_db — migration backfill should kick in. + kb.init_db() + conn2 = kb.connect() + try: + runs = kb.list_runs(conn2, tid) + assert len(runs) == 1 + assert runs[0].status == "running" + assert runs[0].profile == "worker" + task = kb.get_task(conn2, tid) + assert task.current_run_id == runs[0].id + + # Subsequent complete closes the backfilled run cleanly. + kb.complete_task(conn2, tid, result="done", summary="ok") + r = kb.latest_run(conn2, tid) + assert r.outcome == "completed" + assert r.summary == "ok" + finally: + conn2.close() + finally: + conn.close() + + +def test_forward_compat_columns_writable(kanban_home): + """v2 will route by workflow_template_id + current_step_key. In v1 + these are nullable, kernel doesn't consult them for routing, but + they must be writable so a v2 client can populate them without + schema changes.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") + with kb.write_txn(conn): + conn.execute( + "UPDATE tasks SET workflow_template_id = ?, current_step_key = ? " + "WHERE id = ?", + ("code-review-v1", "implement", tid), + ) + task = kb.get_task(conn, tid) + assert task.workflow_template_id == "code-review-v1" + assert task.current_step_key == "implement" + finally: + conn.close() + + +def test_cli_runs_verb(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, result="ok", summary="shipped") + finally: + conn.close() + out = run_slash(f"runs {tid}") + assert "completed" in out + assert "shipped" in out + assert "worker" in out + + +def test_cli_runs_json(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + kb.complete_task( + conn, tid, result="ok", summary="shipped", + metadata={"files": 1}, + ) + finally: + conn.close() + out = run_slash(f"runs {tid} --json") + data = json.loads(out) + assert len(data) == 1 + assert data[0]["outcome"] == "completed" + assert data[0]["metadata"] == {"files": 1} + + +def test_cli_complete_with_summary_and_metadata(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + finally: + conn.close() + # JSON metadata must round-trip through shlex + argparse. + meta = '{"files": 3}' + out = run_slash( + "complete " + tid + " --summary \"done it\" --metadata '" + meta + "'" + ) + assert "Completed" in out + conn = kb.connect() + try: + r = kb.latest_run(conn, tid) + finally: + conn.close() + assert r.summary == "done it" + assert r.metadata == {"files": 3} + + +def test_cli_complete_bad_metadata_exits_nonzero(kanban_home): + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + finally: + conn.close() + out = run_slash(f"complete {tid} --metadata not-json") + assert "metadata" in out.lower() + + +# ------------------------------------------------------------------------- +# Integration hardening (Apr 2026 audit fixes) +# ------------------------------------------------------------------------- + +def test_archive_of_running_task_closes_run(kanban_home): + """Archiving a claimed task must close the in-flight run with + outcome='reclaimed', not orphan it.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + run = kb.latest_run(conn, tid) + assert run.ended_at is None + open_run_id = run.id + + assert kb.archive_task(conn, tid) is True + + task = kb.get_task(conn, tid) + assert task.status == "archived" + assert task.current_run_id is None + # The previously-active run must now be closed. + closed = kb.get_run(conn, open_run_id) + assert closed.ended_at is not None + assert closed.outcome == "reclaimed" + finally: + conn.close() + + +def test_archive_of_ready_task_does_not_create_spurious_run(kanban_home): + """No active run → archive shouldn't synthesize one.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + # Never claimed. Move to ready (task starts in 'ready' here). + assert kb.archive_task(conn, tid) is True + runs = kb.list_runs(conn, tid) + assert runs == [] # No run was ever opened; archive didn't fabricate one. + finally: + conn.close() + + +def test_dashboard_direct_status_change_off_running_closes_run(kanban_home): + """Dashboard drag-drop running->ready must close the active run. + + Importing _set_status_direct directly to simulate the PATCH handler + without spinning up FastAPI. + """ + from plugins.kanban.dashboard.plugin_api import _set_status_direct + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + open_run = kb.latest_run(conn, tid) + assert open_run.ended_at is None + prev_run_id = open_run.id + + # Simulate yanking the worker back to the queue. + assert _set_status_direct(conn, tid, "ready") is True + + task = kb.get_task(conn, tid) + assert task.status == "ready" + assert task.current_run_id is None + closed = kb.get_run(conn, prev_run_id) + assert closed.ended_at is not None + assert closed.outcome == "reclaimed" + finally: + conn.close() + + +def test_dashboard_direct_status_change_within_same_state_is_noop_for_runs(kanban_home): + """todo -> ready on an unclaimed task must not create any run rows.""" + from plugins.kanban.dashboard.plugin_api import _set_status_direct + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") + # Force to todo for the sake of the test. + conn.execute("UPDATE tasks SET status='todo' WHERE id=?", (tid,)) + conn.commit() + assert _set_status_direct(conn, tid, "ready") is True + assert kb.list_runs(conn, tid) == [] + finally: + conn.close() + + +def test_cli_bulk_complete_with_summary_rejects(kanban_home): + conn = kb.connect() + try: + a = kb.create_task(conn, title="a", assignee="worker") + b = kb.create_task(conn, title="b", assignee="worker") + kb.claim_task(conn, a); kb.claim_task(conn, b) + finally: + conn.close() + # Bulk + summary is refused (stderr message, no mutation). + # Note: hermes_cli.main doesn't propagate sub-command exit codes + # (args.func(args) discards the return value), so we check the side + # effects instead. + from subprocess import run as _run + import os, sys + env = os.environ.copy() + r = _run( + [sys.executable, "-m", "hermes_cli.main", "kanban", + "complete", a, b, "--summary", "oops"], + capture_output=True, text=True, env=env, + ) + assert "per-task" in r.stderr, r.stderr + # The tasks must still be running (no partial apply). + conn = kb.connect() + try: + assert kb.get_task(conn, a).status == "running" + assert kb.get_task(conn, b).status == "running" + finally: + conn.close() + + +def test_cli_bulk_complete_without_summary_still_works(kanban_home): + """Bulk close with no per-task handoff is allowed — the common case.""" + conn = kb.connect() + try: + a = kb.create_task(conn, title="a", assignee="worker") + b = kb.create_task(conn, title="b", assignee="worker") + kb.claim_task(conn, a); kb.claim_task(conn, b) + finally: + conn.close() + out = run_slash(f"complete {a} {b}") + assert f"Completed {a}" in out + assert f"Completed {b}" in out + + +def test_completed_event_payload_carries_summary(kanban_home): + """The 'completed' event must embed the run summary so gateway + notifiers render structured handoffs without a second SQL hit.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary="handoff line 1\nextra", + metadata={"n": 3}) + events = kb.list_events(conn, tid) + comp = [e for e in events if e.kind == "completed"] + assert len(comp) == 1 + # First-line-only, within the 400-char cap, preserved verbatim. + assert comp[0].payload["summary"] == "handoff line 1" + finally: + conn.close() + + +def test_completed_event_payload_summary_none_when_missing(kanban_home): + """If the caller passes no summary AND no result, payload.summary is None.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid) # no summary, no result + events = kb.list_events(conn, tid) + comp = [e for e in events if e.kind == "completed"][0] + assert comp.payload.get("summary") is None + finally: + conn.close() + + +# ------------------------------------------------------------------------- +# Deep-scan fixes (Apr 2026 second audit) +# ------------------------------------------------------------------------- + +def test_complete_never_claimed_task_synthesizes_run(kanban_home): + """complete_task on a ready (never-claimed) task must persist the + handoff instead of silently dropping summary/metadata.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="skip claim", assignee="worker") + # Task is in 'ready' state with no run opened. + assert kb.list_runs(conn, tid) == [] + ok = kb.complete_task( + conn, tid, + summary="did it manually", + metadata={"reason": "human intervention"}, + ) + assert ok is True + + runs = kb.list_runs(conn, tid) + assert len(runs) == 1, f"expected 1 synthetic run, got {len(runs)}" + r = runs[0] + assert r.outcome == "completed" + assert r.summary == "did it manually" + assert r.metadata == {"reason": "human intervention"} + # Zero-duration synthetic run. + assert r.started_at == r.ended_at + # Task pointer still NULL (we never claimed, never opened a run). + assert kb.get_task(conn, tid).current_run_id is None + + # Event carries the synthetic run_id. + evts = [e for e in kb.list_events(conn, tid) if e.kind == "completed"] + assert len(evts) == 1 + assert evts[0].run_id == r.id + finally: + conn.close() + + +def test_block_never_claimed_task_synthesizes_run(kanban_home): + """block_task on a ready task must persist --reason on a synthetic run.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="drop this", assignee="worker") + ok = kb.block_task(conn, tid, reason="deprioritized") + assert ok is True + + runs = kb.list_runs(conn, tid) + assert len(runs) == 1 + r = runs[0] + assert r.outcome == "blocked" + assert r.summary == "deprioritized" + assert r.started_at == r.ended_at + + evts = [e for e in kb.list_events(conn, tid) if e.kind == "blocked"] + assert evts[0].run_id == r.id + finally: + conn.close() + + +def test_complete_never_claimed_without_handoff_skips_synthesis(kanban_home): + """If a bulk-complete passes no summary/metadata/result, don't spam + the runs table with empty synthetic rows.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="simple", assignee="worker") + ok = kb.complete_task(conn, tid) # no handoff fields + assert ok is True + assert kb.list_runs(conn, tid) == [] # no synthetic row + finally: + conn.close() + + +def test_event_dataclass_carries_run_id(kanban_home): + """list_events and the Event dataclass must expose run_id so + downstream consumers (notifier, dashboard) can group by attempt.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x", assignee="worker") + kb.claim_task(conn, tid) + run_id = kb.latest_run(conn, tid).id + kb.complete_task(conn, tid, summary="done") + + events = kb.list_events(conn, tid) + kinds_with_run = { + e.kind: e.run_id for e in events if e.run_id is not None + } + # 'created' should NOT have a run_id (task-scoped). + created = [e for e in events if e.kind == "created"][0] + assert created.run_id is None + # 'claimed' and 'completed' must have run_id. + assert kinds_with_run.get("claimed") == run_id + assert kinds_with_run.get("completed") == run_id + finally: + conn.close() + + +def test_unseen_events_for_sub_includes_run_id(kanban_home): + """Gateway notifier path must also surface run_id on events.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="notify test", assignee="worker") + kb.add_notify_sub( + conn, task_id=tid, platform="telegram", + chat_id="12345", thread_id="", + ) + kb.claim_task(conn, tid) + run_id = kb.latest_run(conn, tid).id + kb.complete_task(conn, tid, summary="notify-ready") + + cursor, events = kb.unseen_events_for_sub( + conn, task_id=tid, platform="telegram", + chat_id="12345", thread_id="", + kinds=("completed",), + ) + assert len(events) == 1 + assert events[0].run_id == run_id + finally: + conn.close() + + +def test_claim_task_recovers_from_invariant_leak(kanban_home): + """Belt-and-suspenders: if a prior run somehow leaked (stranded + current_run_id on a ready task), claim_task should recover rather + than strand it further.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="invariant test", assignee="worker") + # Manually engineer the invariant violation: create a run, then + # flip status back to 'ready' without closing the run. + kb.claim_task(conn, tid) + leaked_run_id = kb.latest_run(conn, tid).id + conn.execute( + "UPDATE tasks SET status = 'ready', claim_lock = NULL, " + "claim_expires = NULL " + "WHERE id = ?", (tid,), + ) + conn.commit() + # The leaked run is still open. + assert kb.get_run(conn, leaked_run_id).ended_at is None + + # Now re-claim — the defensive recovery must close the leak. + claimed = kb.claim_task(conn, tid) + assert claimed is not None + leaked = kb.get_run(conn, leaked_run_id) + assert leaked.ended_at is not None + assert leaked.outcome == "reclaimed" + # New run opened and pointed to. + new_run = kb.latest_run(conn, tid) + assert new_run.id != leaked_run_id + assert new_run.ended_at is None + finally: + conn.close() + + +# ------------------------------------------------------------------------- +# Live-test findings (Apr 2026 third pass: auto-init, show --json carries runs) +# ------------------------------------------------------------------------- + +def test_cli_create_on_fresh_home_auto_inits(tmp_path, monkeypatch): + """First CLI action on an empty HERMES_HOME must not error with + 'no such table: tasks' — init_db auto-runs now.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + # Sanity: kanban.db does NOT exist yet. + import subprocess as _sp + import sys as _sys + worktree_root = Path(__file__).resolve().parents[2] + env = {**os.environ, "HERMES_HOME": str(home), + "PYTHONPATH": str(worktree_root)} + r = _sp.run( + [_sys.executable, "-m", "hermes_cli.main", "kanban", + "create", "smoke", "--assignee", "worker", "--json"], + capture_output=True, text=True, env=env, + ) + assert r.returncode == 0, f"rc={r.returncode} stderr={r.stderr}" + import json as _json + out = _json.loads(r.stdout) + assert out["status"] == "ready" + # DB file exists now. + assert (home / "kanban.db").exists() + + +def test_connect_auto_inits_fresh_db(tmp_path, monkeypatch): + """Calling connect() on a fresh HERMES_HOME must create the + schema. Previously callers had to remember kb.init_db() first.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + # Flush the module-level cache so this path looks fresh. + kb._INITIALIZED_PATHS.clear() + + # Direct connect() without init_db() — used to raise "no such table". + conn = kb.connect() + try: + tid = kb.create_task(conn, title="x") + assert tid is not None + assert kb.get_task(conn, tid).title == "x" + finally: + conn.close() + + +def test_cli_show_json_carries_runs(kanban_home): + """hermes kanban show --json must include runs[] so scripts that + inspect attempt history don't need a separate 'runs' call.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="show test", assignee="worker") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary="inspected") + finally: + conn.close() + + out = run_slash(f"show {tid} --json") + import json as _json + # run_slash returns combined text; find the JSON block. + # The output IS json, single doc. + # Strip any leading ansi or surrounding noise. + try: + data = _json.loads(out) + except _json.JSONDecodeError: + # Some environments may prefix/suffix whitespace. + data = _json.loads(out.strip()) + + assert "runs" in data, f"show --json must include runs[], got keys: {list(data.keys())}" + assert len(data["runs"]) == 1 + r = data["runs"][0] + assert r["outcome"] == "completed" + assert r["summary"] == "inspected" + # Events also carry run_id field. + for e in data["events"]: + assert "run_id" in e + + +# ------------------------------------------------------------------------- +# Pre-merge audit by @erosika (issue #16102 comment 4331125835) — fixes +# ------------------------------------------------------------------------- + +def test_unblock_invariant_recovery(kanban_home): + """unblock_task must leave current_run_id NULL even if some other + code path left it dangling. Engineer the leak, verify recovery.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="unblock invariant", assignee="worker") + # Start on running, then open a run, then force to 'blocked' but + # leave current_run_id pointing at the open run — simulate the + # invariant violation erosika flagged. + kb.claim_task(conn, tid) + leaked_run_id = kb.latest_run(conn, tid).id + # Force the bad state. + conn.execute( + "UPDATE tasks SET status = 'blocked' WHERE id = ?", (tid,), + ) + conn.commit() + # current_run_id is still set; run is still open. + assert kb.get_task(conn, tid).current_run_id == leaked_run_id + assert kb.get_run(conn, leaked_run_id).ended_at is None + + # Unblock — the defensive recovery must close the leaked run. + assert kb.unblock_task(conn, tid) is True + task = kb.get_task(conn, tid) + assert task.status == "ready" + assert task.current_run_id is None + leaked = kb.get_run(conn, leaked_run_id) + assert leaked.outcome == "reclaimed" + assert leaked.ended_at is not None + finally: + conn.close() + + +def test_unblock_normal_path_no_spurious_run(kanban_home): + """Happy path: claim -> block -> unblock. Unblock must be a no-op + on runs (block_task already closed the run cleanly).""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="normal unblock", assignee="worker") + kb.claim_task(conn, tid) + kb.block_task(conn, tid, reason="pause") + runs_before = len(kb.list_runs(conn, tid)) + assert kb.unblock_task(conn, tid) is True + runs_after = len(kb.list_runs(conn, tid)) + # No new run created by the happy-path unblock. + assert runs_after == runs_before + # Task in ready with cleared pointer. + t = kb.get_task(conn, tid) + assert t.status == "ready" + assert t.current_run_id is None + finally: + conn.close() + + +def test_migration_backfill_idempotent_under_re_run(tmp_path, monkeypatch): + """init_db must be safe to re-run repeatedly. Each call should leave + at most one run row per in-flight task, even if called while a + dispatcher is simultaneously claiming.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + + # Fresh DB, one task left in 'running' with a claim but no run row. + # Simulates a pre-runs-era DB. + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="legacy inflight", assignee="worker") + now = int(time.time()) + conn.execute( + "UPDATE tasks SET status='running', claim_lock='old', " + "claim_expires=?, started_at=?, current_run_id=NULL WHERE id=?", + (now + 900, now, tid), + ) + # Drop any synthetic run the normal claim path would have made. + conn.execute("DELETE FROM task_runs WHERE task_id=?", (tid,)) + conn.commit() + + # Re-run init_db 3x — each should detect the orphan-inflight and + # install exactly ONE run row, not three. + for _ in range(3): + kb.init_db() + + runs = kb.list_runs(conn, tid) + assert len(runs) == 1, f"expected exactly 1 backfilled run, got {len(runs)}" + # Pointer should be installed. + assert kb.get_task(conn, tid).current_run_id == runs[0].id + finally: + conn.close() + + +def test_build_worker_context_includes_role_history(kanban_home): + """build_worker_context must surface recent completed runs for the + same assignee, giving cross-task continuity.""" + conn = kb.connect() + try: + # Three completed tasks for 'reviewer' + for i, (title, summary) in enumerate([ + ("Review security PR #1", "approved, focus on CSRF"), + ("Review security PR #2", "requested changes: SQL injection vector"), + ("Review security PR #3", "approved, rate-limit added"), + ]): + tid = kb.create_task(conn, title=title, assignee="reviewer") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary=summary) + + # Now a NEW task for reviewer, not yet done + new_tid = kb.create_task( + conn, title="Review perf PR", assignee="reviewer", + ) + ctx = kb.build_worker_context(conn, new_tid) + + assert "## Recent work by @reviewer" in ctx + assert "Review security PR #3" in ctx + assert "approved, rate-limit added" in ctx + # Current task should be excluded from its own recent work list. + assert "Review perf PR" not in ctx.split("## Recent work by")[1] + finally: + conn.close() + + +def test_build_worker_context_role_history_skipped_when_no_assignee(kanban_home): + """If task has no assignee, the role-history section is omitted.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="orphan task") + # Force no assignee (create_task already defaults to None). + ctx = kb.build_worker_context(conn, tid) + assert "## Recent work by" not in ctx + finally: + conn.close() + + +def test_build_worker_context_role_history_bounded_to_5(kanban_home): + """Role history must be capped at 5 entries even when the assignee + has many completed tasks.""" + conn = kb.connect() + try: + for i in range(10): + tid = kb.create_task( + conn, title=f"prior #{i}", assignee="worker", + ) + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary=f"done #{i}") + + new_tid = kb.create_task(conn, title="new", assignee="worker") + ctx = kb.build_worker_context(conn, new_tid) + # Section should exist and contain exactly 5 bullet lines. + section = ctx.split("## Recent work by @worker")[1] + bullets = [l for l in section.splitlines() if l.startswith("- ")] + assert len(bullets) == 5, f"expected 5 bullets, got {len(bullets)}" + finally: + conn.close() + + +# ------------------------------------------------------------------------- +# Battle-test findings (May 2026: stress/ suite exposed zombie + id collision) +# ------------------------------------------------------------------------- + +@pytest.mark.skipif("linux" not in __import__("sys").platform, + reason="zombie detection is Linux-specific") +def test_pid_alive_detects_zombie(kanban_home): + """_pid_alive must return False for a zombie process. + + Without the /proc check, kill(pid, 0) succeeds against zombies + (process table entry exists until parent reaps), so the dispatcher + would treat a dead-but-unreaped worker as alive. This catches a + worker that exited normally but whose parent hasn't called wait(). + """ + import subprocess as _sp + proc = _sp.Popen( + ["sleep", "3600"], + stdin=_sp.DEVNULL, stdout=_sp.DEVNULL, stderr=_sp.DEVNULL, + ) + pid = proc.pid + try: + assert kb._pid_alive(pid) is True # live non-zombie + os.kill(pid, 9) + time.sleep(0.3) + # Verify /proc reports zombie state so the test is actually + # exercising the zombie path and not some other liveness failure + with open(f"/proc/{pid}/status") as f: + state_line = next( + (l for l in f if l.startswith("State:")), "" + ) + assert "Z" in state_line, f"expected zombie, got {state_line!r}" + # And _pid_alive must see through it. + assert kb._pid_alive(pid) is False + finally: + try: + proc.wait(timeout=1) + except Exception: + pass + + +def test_task_ids_dont_collide_at_scale(kanban_home): + """ID generator must be wide enough that creating 10k tasks doesn't + hit a UNIQUE constraint violation. + + Regression test for the 2-hex-byte ID (65k space) that would + collide at ~50% probability by 10k tasks due to birthday paradox. + Current generator uses 4 hex bytes (4.3B space). + """ + conn = kb.connect() + try: + # 500 is enough to exercise the generator diversity without + # making the test slow. At 2-hex-byte width, collision chance + # over 500 creates was ~1.3%; over 10000 the old generator + # would fail reliably. We don't need the full 10k run to prove + # the regression; distribution check is sufficient. + ids = [kb.create_task(conn, title=f"scale-{i}") for i in range(500)] + assert len(ids) == len(set(ids)), "ID collision at N=500" + # Sanity: every id matches the expected format + for tid in ids[:10]: + assert tid.startswith("t_") + assert len(tid) == 10 # "t_" + 8 hex chars + finally: + conn.close() + + +def test_cli_show_clamps_negative_elapsed(kanban_home): + """When NTP jumps backward between claim and complete, started_at + can exceed ended_at. CLI display must clamp to 0, not print '-3600s'. + """ + conn = kb.connect() + try: + tid = kb.create_task(conn, title="time-skewed", assignee="worker") + kb.claim_task(conn, tid) + # Force a future started_at via raw SQL — simulates NTP jump. + future = int(time.time()) + 3600 + conn.execute( + "UPDATE task_runs SET started_at = ? WHERE task_id = ?", + (future, tid), + ) + conn.commit() + # Complete normally (ended_at < started_at now) + kb.complete_task(conn, tid, summary="after skew") + finally: + conn.close() + + # Both `show` and `runs` render this. Neither should display a + # negative elapsed token. We check specifically for the pattern + # `-s` (the elapsed column) rather than any minus sign, + # since timestamps legitimately contain dashes (2026-04-28). + out_show = run_slash(f"show {tid}") + out_runs = run_slash(f"runs {tid}") + import re as _re + neg_elapsed = _re.compile(r"-\d+s") + assert not neg_elapsed.search(out_show), ( + f"show output has negative elapsed: {out_show!r}" + ) + assert not neg_elapsed.search(out_runs), ( + f"runs output has negative elapsed: {out_runs!r}" + ) + # Should show "0s" for the clamped elapsed + assert "0s" in out_show or "0s" in out_runs + + +def test_resolve_workspace_rejects_relative_dir_path(kanban_home): + """dir: workspace_path must be absolute. A relative path like + '../../../tmp/attacker' would be resolved against the dispatcher's + CWD — a confused-deputy escape vector.""" + conn = kb.connect() + try: + tid = kb.create_task( + conn, title="path-trav", assignee="worker", + workspace_kind="dir", + workspace_path="../../../tmp/attacker", + ) + task = kb.get_task(conn, tid) + # Storage is verbatim — that's fine. + assert task.workspace_path == "../../../tmp/attacker" + # But resolution must refuse. + with pytest.raises(ValueError, match=r"non-absolute"): + kb.resolve_workspace(task) + finally: + conn.close() + + +def test_resolve_workspace_accepts_absolute_dir_path(kanban_home, tmp_path): + """Legitimate absolute paths are accepted and created.""" + conn = kb.connect() + try: + abs_path = str(tmp_path / "my-workspace") + tid = kb.create_task( + conn, title="legit", assignee="worker", + workspace_kind="dir", + workspace_path=abs_path, + ) + task = kb.get_task(conn, tid) + resolved = kb.resolve_workspace(task) + assert str(resolved) == abs_path + assert resolved.exists() + finally: + conn.close() + + +def test_resolve_workspace_rejects_relative_worktree_path(kanban_home): + """Worktree paths also must be absolute when explicitly set.""" + conn = kb.connect() + try: + tid = kb.create_task( + conn, title="wt", assignee="worker", + workspace_kind="worktree", + workspace_path="../escape", + ) + with pytest.raises(ValueError, match=r"non-absolute"): + kb.resolve_workspace(kb.get_task(conn, tid)) + finally: + conn.close() + + +def test_build_worker_context_caps_prior_attempts(kanban_home): + """When a task has more than _CTX_MAX_PRIOR_ATTEMPTS runs, only + the most recent N are shown in full; earlier attempts are summarised + in a one-line marker so the worker knows more exist without + blowing the prompt.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="retry", assignee="worker") + # Force 25 closed runs + for i in range(25): + kb.claim_task(conn, tid) + kb._end_run(conn, tid, outcome="reclaimed", + summary=f"attempt {i} summary") + conn.execute( + "UPDATE tasks SET status='ready', claim_lock=NULL, " + "claim_expires=NULL WHERE id=?", (tid,), + ) + conn.commit() + + ctx = kb.build_worker_context(conn, tid) + # Check: only _CTX_MAX_PRIOR_ATTEMPTS attempt headers present + attempt_count = ctx.count("### Attempt ") + assert attempt_count == kb._CTX_MAX_PRIOR_ATTEMPTS, ( + f"expected {kb._CTX_MAX_PRIOR_ATTEMPTS} attempts shown, got {attempt_count}" + ) + # And the "omitted" marker appears with the right count + omitted_count = 25 - kb._CTX_MAX_PRIOR_ATTEMPTS + assert f"{omitted_count} earlier attempt" in ctx, ( + f"expected omitted-count marker, got ctx=\n{ctx[:2000]}" + ) + # Total size is bounded — empirically we expect << 100KB even + # for 1000 attempts (capped to N * ~500 chars) + assert len(ctx) < 20_000, ( + f"context should be bounded even at 25 runs, got {len(ctx)} chars" + ) + # Attempt numbering starts at the real index (not renumbered) + assert "Attempt 16 " in ctx, ( + "first-shown attempt should be numbered 16 (25 - 10 + 1)" + ) + finally: + conn.close() + + +def test_build_worker_context_caps_comments(kanban_home): + """Same cap for comments — comment-storm tasks stay bounded.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="chatty", assignee="worker") + for i in range(100): + kb.add_comment(conn, tid, author=f"u{i % 3}", body=f"comment {i}") + ctx = kb.build_worker_context(conn, tid) + # Only _CTX_MAX_COMMENTS most-recent shown in full + comment_count = ctx.count("**u") + # 3 distinct authors u0/u1/u2 so the count is trickier; use the + # "comment N" body text to count. + body_count = sum(1 for line in ctx.splitlines() if line.startswith("comment ")) + assert body_count == kb._CTX_MAX_COMMENTS, ( + f"expected {kb._CTX_MAX_COMMENTS} comments shown, got {body_count}" + ) + omitted = 100 - kb._CTX_MAX_COMMENTS + assert f"{omitted} earlier comment" in ctx + finally: + conn.close() + + +def test_build_worker_context_caps_huge_summary(kanban_home): + """A 1 MB summary on a single prior run must not dominate the + worker prompt. Per-field cap truncates with a visible ellipsis.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="giant", assignee="worker") + kb.claim_task(conn, tid) + huge = "X" * (1024 * 1024) # 1 MB + kb._end_run(conn, tid, outcome="reclaimed", summary=huge) + conn.execute( + "UPDATE tasks SET status='ready', claim_lock=NULL, " + "claim_expires=NULL WHERE id=?", (tid,), + ) + conn.commit() + + ctx = kb.build_worker_context(conn, tid) + # Much smaller than 1 MB + assert len(ctx) < 10_000, ( + f"1 MB summary should be capped, got {len(ctx)} chars" + ) + # Truncation marker present + assert "truncated" in ctx + finally: + conn.close() + + +def test_default_spawn_auto_loads_kanban_worker_skill(kanban_home, monkeypatch): + """The dispatcher's _default_spawn must include --skills kanban-worker + in its argv so every worker loads the skill automatically, even if + the profile hasn't wired it into its default skills config. + + We intercept Popen to capture the argv without actually spawning a + hermes subprocess (which would hang trying to call an LLM). + """ + captured = {} + + class FakeProc: + def __init__(self): + self.pid = 99999 + + def fake_popen(cmd, **kwargs): + captured["cmd"] = cmd + captured["env"] = kwargs.get("env", {}) + return FakeProc() + + monkeypatch.setattr("subprocess.Popen", fake_popen) + + conn = kb.connect() + try: + tid = kb.create_task(conn, title="skill-loading test", + assignee="some-profile") + task = kb.get_task(conn, tid) + workspace = kb.resolve_workspace(task) + pid = kb._default_spawn(task, str(workspace)) + assert pid == 99999 + finally: + conn.close() + + cmd = captured["cmd"] + assert "--skills" in cmd, f"spawn argv missing --skills: {cmd}" + idx = cmd.index("--skills") + assert cmd[idx + 1] == "kanban-worker", ( + f"expected 'kanban-worker', got {cmd[idx + 1]!r}" + ) + # Assignee + task env are still present + assert "some-profile" in cmd + env = captured["env"] + assert env.get("HERMES_KANBAN_TASK") == tid + assert env.get("HERMES_PROFILE") == "some-profile" + + + +# --------------------------------------------------------------------------- +# Per-task force-loaded skills +# --------------------------------------------------------------------------- + +def test_create_task_persists_skills(kanban_home): + """Task.skills round-trips through create -> get_task.""" + conn = kb.connect() + try: + tid = kb.create_task( + conn, + title="skilled task", + assignee="linguist", + skills=["translation", "github-code-review"], + ) + task = kb.get_task(conn, tid) + assert task is not None + assert task.skills == ["translation", "github-code-review"] + finally: + conn.close() + + +def test_create_task_skills_none_stays_none(kanban_home): + """Default behavior: no skills arg means Task.skills is None.""" + conn = kb.connect() + try: + tid = kb.create_task(conn, title="plain task", assignee="someone") + task = kb.get_task(conn, tid) + assert task is not None + assert task.skills is None + finally: + conn.close() + + +def test_create_task_skills_deduplicates_and_strips(kanban_home): + """Dup names collapse; whitespace is stripped; empties dropped.""" + conn = kb.connect() + try: + tid = kb.create_task( + conn, + title="dedupe", + assignee="x", + skills=[" translation ", "translation", "", None, "review"], + ) + task = kb.get_task(conn, tid) + assert task.skills == ["translation", "review"] + finally: + conn.close() + + +def test_create_task_skills_rejects_comma_embedded(kanban_home): + """Comma in a skill name is rejected — force caller to pass a list.""" + conn = kb.connect() + try: + with pytest.raises(ValueError, match="cannot contain comma"): + kb.create_task( + conn, + title="bad", + assignee="x", + skills=["a,b"], + ) + finally: + conn.close() + + +def test_default_spawn_appends_per_task_skills(kanban_home, monkeypatch): + """Dispatcher argv must carry one `--skills X` pair per task skill, + in addition to the built-in kanban-worker.""" + captured = {} + + class FakeProc: + def __init__(self): + self.pid = 42 + + def fake_popen(cmd, **kwargs): + captured["cmd"] = cmd + return FakeProc() + + monkeypatch.setattr("subprocess.Popen", fake_popen) + + conn = kb.connect() + try: + tid = kb.create_task( + conn, + title="multi-skill worker", + assignee="linguist", + skills=["translation", "github-code-review"], + ) + task = kb.get_task(conn, tid) + workspace = kb.resolve_workspace(task) + kb._default_spawn(task, str(workspace)) + finally: + conn.close() + + cmd = captured["cmd"] + # Count every --skills pair and gather the skill names. + skill_names = [] + for i, tok in enumerate(cmd): + if tok == "--skills" and i + 1 < len(cmd): + skill_names.append(cmd[i + 1]) + # kanban-worker first (built-in), then per-task extras in order. + assert skill_names[0] == "kanban-worker", skill_names + assert "translation" in skill_names + assert "github-code-review" in skill_names + # --skills must appear BEFORE the `chat` subcommand so argparse + # attaches them to the top-level parser, not the subcommand. + chat_idx = cmd.index("chat") + last_skills_idx = max( + i for i, tok in enumerate(cmd) if tok == "--skills" + ) + assert last_skills_idx < chat_idx, ( + f"--skills must come before 'chat' in argv: {cmd}" + ) + + +def test_default_spawn_dedupes_kanban_worker_from_task_skills(kanban_home, monkeypatch): + """If a task explicitly lists 'kanban-worker', we don't double-pass it.""" + captured = {} + + class FakeProc: + pid = 1 + + def fake_popen(cmd, **kwargs): + captured["cmd"] = cmd + return FakeProc() + + monkeypatch.setattr("subprocess.Popen", fake_popen) + + conn = kb.connect() + try: + tid = kb.create_task( + conn, title="dup", assignee="x", + skills=["kanban-worker", "translation"], + ) + task = kb.get_task(conn, tid) + workspace = kb.resolve_workspace(task) + kb._default_spawn(task, str(workspace)) + finally: + conn.close() + + cmd = captured["cmd"] + worker_pairs = [ + i for i, tok in enumerate(cmd) + if tok == "--skills" and i + 1 < len(cmd) and cmd[i + 1] == "kanban-worker" + ] + assert len(worker_pairs) == 1, ( + f"kanban-worker appeared {len(worker_pairs)} times in argv: {cmd}" + ) + + +def test_cli_create_skill_flag_repeatable(kanban_home): + """`hermes kanban create --skill a --skill b` persists the list.""" + out = run_slash( + "create 'multi-skill' --assignee linguist " + "--skill translation --skill github-code-review --json" + ) + tid = json.loads(out)["id"] + with kb.connect() as conn: + task = kb.get_task(conn, tid) + assert task.skills == ["translation", "github-code-review"] + + +def test_cli_create_without_skill_flag_leaves_none(kanban_home): + """No --skill on the CLI means Task.skills stays None (not []) — + we don't want to silently write [] when the user didn't opt in.""" + out = run_slash("create 'no-skill' --assignee x --json") + tid = json.loads(out)["id"] + with kb.connect() as conn: + task = kb.get_task(conn, tid) + assert task.skills is None + + +def test_cli_show_renders_skills(kanban_home): + """`hermes kanban show ` prints a skills row when present.""" + out = run_slash( + "create 'show-test' --assignee x " + "--skill translation --json" + ) + tid = json.loads(out)["id"] + shown = run_slash(f"show {tid}") + assert "skills:" in shown + assert "translation" in shown + + +def test_legacy_db_without_skills_column_migrates(tmp_path): + """_migrate_add_optional_columns is idempotent and adds skills + when absent. Run it twice on a pared-down schema to confirm.""" + import sqlite3 + db_path = tmp_path / "legacy.db" + conn = sqlite3.connect(str(db_path)) + conn.row_factory = sqlite3.Row + # Build a pared-down legacy tasks table that lacks all the + # optional columns _migrate_add_optional_columns knows how to + # add. We deliberately omit `skills` so we can observe its + # introduction. + conn.execute(""" + CREATE TABLE tasks ( + id TEXT PRIMARY KEY, + title TEXT NOT NULL, + status TEXT NOT NULL, + created_at INTEGER NOT NULL + ) + """) + # task_events is also touched by the migrator for run_id backfill. + conn.execute(""" + CREATE TABLE task_events ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + kind TEXT NOT NULL, + payload TEXT, + created_at INTEGER NOT NULL + ) + """) + conn.execute( + "INSERT INTO tasks (id, title, status, created_at) " + "VALUES ('legacy', 'old task', 'ready', 1)" + ) + conn.commit() + + before = {r[1] for r in conn.execute("PRAGMA table_info(tasks)")} + assert "skills" not in before + + # Run the migrator directly — the same function connect() calls. + kb._migrate_add_optional_columns(conn) + after = {r[1] for r in conn.execute("PRAGMA table_info(tasks)")} + assert "skills" in after, f"migration did not add skills column: {after}" + + # Idempotent: running again must not raise. + kb._migrate_add_optional_columns(conn) + + # Legacy row has skills=NULL -> Task.skills=None. + row = conn.execute("SELECT * FROM tasks WHERE id = 'legacy'").fetchone() + # from_row needs additional columns; build a Task manually via the + # path from_row takes for a skills NULL/missing. + keys = set(row.keys()) + assert "skills" in keys + assert row["skills"] is None + conn.close() + + + +# --------------------------------------------------------------------------- +# Gateway-embedded dispatcher: config, CLI warnings, daemon deprecation stub +# --------------------------------------------------------------------------- + +def test_config_default_dispatch_in_gateway_is_true(): + """Default config must enable gateway-embedded dispatch out of the box. + Flipping this default to false is a user-visible behaviour change and + should require a conscious migration.""" + from hermes_cli.config import DEFAULT_CONFIG + kanban = DEFAULT_CONFIG.get("kanban", {}) + assert kanban.get("dispatch_in_gateway") is True, ( + "kanban.dispatch_in_gateway default should be True; got " + f"{kanban.get('dispatch_in_gateway')!r}" + ) + interval = kanban.get("dispatch_interval_seconds") + assert isinstance(interval, (int, float)) and interval >= 1, ( + f"dispatch_interval_seconds must be a positive number, got {interval!r}" + ) + + +def test_check_dispatcher_presence_silent_when_gateway_running(monkeypatch): + from hermes_cli import kanban as kb_cli + monkeypatch.setattr("gateway.status.get_running_pid", lambda: 12345) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"kanban": {"dispatch_in_gateway": True}}, + ) + running, msg = kb_cli._check_dispatcher_presence() + assert running is True + # Either empty (if import failed defensively) or includes the pid. + assert msg == "" or "12345" in msg + + +def test_check_dispatcher_presence_warns_when_no_gateway(monkeypatch): + from hermes_cli import kanban as kb_cli + monkeypatch.setattr("gateway.status.get_running_pid", lambda: None) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"kanban": {"dispatch_in_gateway": True}}, + ) + running, msg = kb_cli._check_dispatcher_presence() + assert running is False + assert "hermes gateway start" in msg + + +def test_check_dispatcher_presence_warns_when_flag_off(monkeypatch): + """Gateway is up but dispatch_in_gateway=false -> warning.""" + from hermes_cli import kanban as kb_cli + monkeypatch.setattr("gateway.status.get_running_pid", lambda: 999) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"kanban": {"dispatch_in_gateway": False}}, + ) + running, msg = kb_cli._check_dispatcher_presence() + assert running is False + assert "dispatch_in_gateway" in msg + + +def test_check_dispatcher_presence_silent_on_probe_error(monkeypatch): + """If the probe itself errors, we stay silent.""" + from hermes_cli import kanban as kb_cli + def _raise(): + raise RuntimeError("boom") + monkeypatch.setattr("gateway.status.get_running_pid", _raise) + running, msg = kb_cli._check_dispatcher_presence() + assert running is True + assert msg == "" + + +def _make_create_ns(**overrides): + """Build a Namespace suitable for kb_cli._cmd_create().""" + ns = argparse.Namespace( + title="x", body=None, assignee="worker", + created_by="user", workspace="scratch", tenant=None, + priority=0, parent=None, triage=False, + idempotency_key=None, max_runtime=None, skills=None, + json=False, + ) + for k, v in overrides.items(): + setattr(ns, k, v) + return ns + + +def test_cli_create_warns_when_no_gateway(kanban_home, monkeypatch, capsys): + """ready+assigned task + no gateway -> warning on stderr.""" + from hermes_cli import kanban as kb_cli + monkeypatch.setattr("gateway.status.get_running_pid", lambda: None) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"kanban": {"dispatch_in_gateway": True}}, + ) + ns = _make_create_ns(title="warn-me", assignee="worker") + assert kb_cli._cmd_create(ns) == 0 + captured = capsys.readouterr() + # Stderr has the warning prefix + guidance. + assert "hermes gateway start" in captured.err + + +def test_cli_create_silent_when_gateway_up(kanban_home, monkeypatch, capsys): + """gateway running + dispatch enabled -> no warning.""" + from hermes_cli import kanban as kb_cli + monkeypatch.setattr("gateway.status.get_running_pid", lambda: 4242) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"kanban": {"dispatch_in_gateway": True}}, + ) + ns = _make_create_ns(title="silent", assignee="worker") + assert kb_cli._cmd_create(ns) == 0 + captured = capsys.readouterr() + assert "hermes gateway start" not in captured.err + + +def test_cli_create_no_warn_on_triage(kanban_home, monkeypatch, capsys): + """Triage tasks can't be dispatched -> no warning.""" + from hermes_cli import kanban as kb_cli + monkeypatch.setattr("gateway.status.get_running_pid", lambda: None) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"kanban": {"dispatch_in_gateway": True}}, + ) + ns = _make_create_ns(title="triage-task", assignee=None, triage=True) + assert kb_cli._cmd_create(ns) == 0 + err = capsys.readouterr().err + assert "hermes gateway start" not in err + + +def test_cli_create_no_warn_unassigned(kanban_home, monkeypatch, capsys): + """Unassigned tasks can't be dispatched -> no warning.""" + from hermes_cli import kanban as kb_cli + monkeypatch.setattr("gateway.status.get_running_pid", lambda: None) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"kanban": {"dispatch_in_gateway": True}}, + ) + ns = _make_create_ns(title="nobody", assignee=None) + assert kb_cli._cmd_create(ns) == 0 + err = capsys.readouterr().err + assert "hermes gateway start" not in err + + +def test_cli_daemon_without_force_prints_deprecation_exits_2(kanban_home, capsys): + """`hermes kanban daemon` (no --force) is a deprecation stub.""" + from hermes_cli import kanban as kb_cli + ns = argparse.Namespace( + force=False, interval=60.0, max=None, failure_limit=3, + pidfile=None, verbose=False, + ) + rc = kb_cli._cmd_daemon(ns) + assert rc == 2 + err = capsys.readouterr().err + assert "DEPRECATED" in err + assert "hermes gateway start" in err + + +def test_cli_daemon_help_marks_deprecated(): + """The argparse help string on `daemon` mentions deprecation so users + scanning `--help` see the migration before running the stub.""" + import argparse as _ap + from hermes_cli import kanban as kb_cli + root = _ap.ArgumentParser() + subs = root.add_subparsers() + kb_cli.build_parser(subs) + # Walk the subparser tree to find the daemon action. + daemon_help = None + for action in root._actions: + if isinstance(action, _ap._SubParsersAction): + for name, parser in action.choices.items(): + if name == "kanban": + for sub_action in parser._actions: + if isinstance(sub_action, _ap._SubParsersAction): + for sname, _ in sub_action.choices.items(): + if sname == "daemon": + daemon_help = sub_action._choices_actions + break + # _choices_actions is a list of _ChoicesPseudoAction-like objects with .help + found_deprecation = False + if daemon_help: + for act in daemon_help: + if getattr(act, "dest", "") == "daemon": + if "DEPRECATED" in (act.help or ""): + found_deprecation = True + break + assert found_deprecation, ( + "daemon subparser help should be marked DEPRECATED so users see " + "the migration guidance in `hermes kanban --help` output" + ) + + +# --------------------------------------------------------------------------- +# Gateway embedded dispatcher watcher +# --------------------------------------------------------------------------- + +def test_gateway_dispatcher_watcher_respects_config_flag_off(monkeypatch): + """dispatch_in_gateway=false -> watcher exits fast, no loop.""" + import asyncio + from gateway.run import GatewayRunner + import hermes_cli.config as _cfg_mod + + runner = object.__new__(GatewayRunner) + runner._running = True + + monkeypatch.setattr( + _cfg_mod, "load_config", + lambda: {"kanban": {"dispatch_in_gateway": False}}, + ) + asyncio.run( + asyncio.wait_for( + runner._kanban_dispatcher_watcher(), + timeout=3.0, + ) + ) + + +def test_gateway_dispatcher_watcher_respects_env_override(monkeypatch): + """HERMES_KANBAN_DISPATCH_IN_GATEWAY=0 disables without touching config.""" + import asyncio + from gateway.run import GatewayRunner + monkeypatch.setenv("HERMES_KANBAN_DISPATCH_IN_GATEWAY", "0") + + runner = object.__new__(GatewayRunner) + runner._running = True + asyncio.run( + asyncio.wait_for( + runner._kanban_dispatcher_watcher(), + timeout=3.0, + ) + ) + + +def test_gateway_dispatcher_watcher_env_truthy_uses_config(monkeypatch): + """Truthy env value doesn't force-enable — config still decides. + (We only treat explicit falses as an override; unset or truthy + defers to config.)""" + import asyncio + from gateway.run import GatewayRunner + import hermes_cli.config as _cfg_mod + + monkeypatch.setenv("HERMES_KANBAN_DISPATCH_IN_GATEWAY", "yes") + monkeypatch.setattr( + _cfg_mod, "load_config", + lambda: {"kanban": {"dispatch_in_gateway": False}}, + ) + + runner = object.__new__(GatewayRunner) + runner._running = True + # config says false, env is truthy — watcher should still exit + # (because config is authoritative when env isn't a falsey override). + asyncio.run( + asyncio.wait_for( + runner._kanban_dispatcher_watcher(), + timeout=3.0, + ) + ) diff --git a/tests/hermes_cli/test_kanban_db.py b/tests/hermes_cli/test_kanban_db.py new file mode 100644 index 00000000000..fcc6396be40 --- /dev/null +++ b/tests/hermes_cli/test_kanban_db.py @@ -0,0 +1,438 @@ +"""Tests for the Kanban DB layer (hermes_cli.kanban_db).""" + +from __future__ import annotations + +import concurrent.futures +import os +import time +from pathlib import Path + +import pytest + +from hermes_cli import kanban_db as kb + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + """Isolated HERMES_HOME with an empty kanban DB.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +# --------------------------------------------------------------------------- +# Schema / init +# --------------------------------------------------------------------------- + +def test_init_db_is_idempotent(kanban_home): + # Second call should not error or drop data. + with kb.connect() as conn: + kb.create_task(conn, title="persisted") + kb.init_db() + with kb.connect() as conn: + tasks = kb.list_tasks(conn) + assert len(tasks) == 1 + assert tasks[0].title == "persisted" + + +def test_init_creates_expected_tables(kanban_home): + with kb.connect() as conn: + rows = conn.execute( + "SELECT name FROM sqlite_master WHERE type='table' ORDER BY name" + ).fetchall() + names = {r["name"] for r in rows} + assert {"tasks", "task_links", "task_comments", "task_events"} <= names + + +# --------------------------------------------------------------------------- +# Task creation + status inference +# --------------------------------------------------------------------------- + +def test_create_task_no_parents_is_ready(kanban_home): + with kb.connect() as conn: + tid = kb.create_task(conn, title="ship it", assignee="alice") + t = kb.get_task(conn, tid) + assert t is not None + assert t.status == "ready" + assert t.assignee == "alice" + assert t.workspace_kind == "scratch" + + +def test_create_task_with_parent_is_todo_until_parent_done(kanban_home): + with kb.connect() as conn: + p = kb.create_task(conn, title="parent") + c = kb.create_task(conn, title="child", parents=[p]) + assert kb.get_task(conn, c).status == "todo" + kb.complete_task(conn, p, result="ok") + assert kb.get_task(conn, c).status == "ready" + + +def test_create_task_unknown_parent_errors(kanban_home): + with kb.connect() as conn, pytest.raises(ValueError, match="unknown parent"): + kb.create_task(conn, title="orphan", parents=["t_ghost"]) + + +def test_workspace_kind_validation(kanban_home): + with kb.connect() as conn, pytest.raises(ValueError, match="workspace_kind"): + kb.create_task(conn, title="bad ws", workspace_kind="cloud") + + +# --------------------------------------------------------------------------- +# Links + dependency resolution +# --------------------------------------------------------------------------- + +def test_link_demotes_ready_child_to_todo_when_parent_not_done(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + assert kb.get_task(conn, b).status == "ready" + kb.link_tasks(conn, a, b) + assert kb.get_task(conn, b).status == "todo" + + +def test_link_keeps_ready_child_when_parent_already_done(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + kb.complete_task(conn, a) + b = kb.create_task(conn, title="b") + assert kb.get_task(conn, b).status == "ready" + kb.link_tasks(conn, a, b) + assert kb.get_task(conn, b).status == "ready" + + +def test_link_rejects_self_loop(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + with pytest.raises(ValueError, match="itself"): + kb.link_tasks(conn, a, a) + + +def test_link_detects_cycle(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b", parents=[a]) + c = kb.create_task(conn, title="c", parents=[b]) + with pytest.raises(ValueError, match="cycle"): + kb.link_tasks(conn, c, a) + with pytest.raises(ValueError, match="cycle"): + kb.link_tasks(conn, b, a) + + +def test_recompute_ready_cascades_through_chain(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b", parents=[a]) + c = kb.create_task(conn, title="c", parents=[b]) + assert [kb.get_task(conn, x).status for x in (a, b, c)] == \ + ["ready", "todo", "todo"] + kb.complete_task(conn, a) + assert kb.get_task(conn, b).status == "ready" + kb.complete_task(conn, b) + assert kb.get_task(conn, c).status == "ready" + + +def test_recompute_ready_fan_in_waits_for_all_parents(kanban_home): + with kb.connect() as conn: + a = kb.create_task(conn, title="a") + b = kb.create_task(conn, title="b") + c = kb.create_task(conn, title="c", parents=[a, b]) + kb.complete_task(conn, a) + assert kb.get_task(conn, c).status == "todo" + kb.complete_task(conn, b) + assert kb.get_task(conn, c).status == "ready" + + +# --------------------------------------------------------------------------- +# Atomic claim (CAS) +# --------------------------------------------------------------------------- + +def test_claim_once_wins_second_loses(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + first = kb.claim_task(conn, t, claimer="host:1") + assert first is not None and first.status == "running" + second = kb.claim_task(conn, t, claimer="host:2") + assert second is None + + +def test_claim_fails_on_non_ready(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + # Move to todo by introducing an unsatisfied parent. + p = kb.create_task(conn, title="p") + kb.link_tasks(conn, p, t) + assert kb.get_task(conn, t).status == "todo" + assert kb.claim_task(conn, t) is None + + +def test_stale_claim_reclaimed(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + # Rewind claim_expires so it looks stale. + conn.execute( + "UPDATE tasks SET claim_expires = ? WHERE id = ?", + (int(time.time()) - 3600, t), + ) + reclaimed = kb.release_stale_claims(conn) + assert reclaimed == 1 + assert kb.get_task(conn, t).status == "ready" + + +def test_heartbeat_extends_claim(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + claimer = "host:hb" + kb.claim_task(conn, t, claimer=claimer, ttl_seconds=60) + original = kb.get_task(conn, t).claim_expires + # Rewind then heartbeat. + conn.execute("UPDATE tasks SET claim_expires = ? WHERE id = ?", (0, t)) + ok = kb.heartbeat_claim(conn, t, claimer=claimer, ttl_seconds=3600) + assert ok + new = kb.get_task(conn, t).claim_expires + assert new > int(time.time()) + 3000 + + +def test_concurrent_claims_only_one_wins(kanban_home): + """Fire N threads claiming the same task; exactly one must win.""" + with kb.connect() as conn: + t = kb.create_task(conn, title="race", assignee="a") + + def attempt(i): + with kb.connect() as c: + return kb.claim_task(c, t, claimer=f"host:{i}") + + n_workers = 8 + with concurrent.futures.ThreadPoolExecutor(max_workers=n_workers) as ex: + results = list(ex.map(attempt, range(n_workers))) + winners = [r for r in results if r is not None] + assert len(winners) == 1 + assert winners[0].status == "running" + + +# --------------------------------------------------------------------------- +# Complete / block / unblock / archive / assign +# --------------------------------------------------------------------------- + +def test_complete_records_result(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + assert kb.complete_task(conn, t, result="done and dusted") + task = kb.get_task(conn, t) + assert task.status == "done" + assert task.result == "done and dusted" + assert task.completed_at is not None + + +def test_block_then_unblock(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + assert kb.block_task(conn, t, reason="need input") + assert kb.get_task(conn, t).status == "blocked" + assert kb.unblock_task(conn, t) + assert kb.get_task(conn, t).status == "ready" + + +def test_assign_refuses_while_running(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + with pytest.raises(RuntimeError, match="currently running"): + kb.assign_task(conn, t, "b") + + +def test_assign_reassigns_when_not_running(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + assert kb.assign_task(conn, t, "b") + assert kb.get_task(conn, t).assignee == "b" + + +def test_archive_hides_from_default_list(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + kb.complete_task(conn, t) + assert kb.archive_task(conn, t) + assert len(kb.list_tasks(conn)) == 0 + assert len(kb.list_tasks(conn, include_archived=True)) == 1 + + +# --------------------------------------------------------------------------- +# Comments / events / worker context +# --------------------------------------------------------------------------- + +def test_comments_recorded_in_order(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + kb.add_comment(conn, t, "user", "first") + kb.add_comment(conn, t, "researcher", "second") + comments = kb.list_comments(conn, t) + assert [c.body for c in comments] == ["first", "second"] + assert [c.author for c in comments] == ["user", "researcher"] + + +def test_empty_comment_rejected(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + with pytest.raises(ValueError, match="body is required"): + kb.add_comment(conn, t, "user", "") + + +def test_events_capture_lifecycle(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="a") + kb.claim_task(conn, t) + kb.complete_task(conn, t, result="ok") + events = kb.list_events(conn, t) + kinds = [e.kind for e in events] + assert "created" in kinds + assert "claimed" in kinds + assert "completed" in kinds + + +def test_worker_context_includes_parent_results_and_comments(kanban_home): + with kb.connect() as conn: + p = kb.create_task(conn, title="p") + kb.complete_task(conn, p, result="PARENT_RESULT_MARKER") + c = kb.create_task(conn, title="child", parents=[p]) + kb.add_comment(conn, c, "user", "CLARIFICATION_MARKER") + ctx = kb.build_worker_context(conn, c) + assert "PARENT_RESULT_MARKER" in ctx + assert "CLARIFICATION_MARKER" in ctx + assert c in ctx + assert "child" in ctx + + +# --------------------------------------------------------------------------- +# Dispatcher +# --------------------------------------------------------------------------- + +def test_dispatch_dry_run_does_not_claim(kanban_home): + with kb.connect() as conn: + t1 = kb.create_task(conn, title="a", assignee="alice") + t2 = kb.create_task(conn, title="b", assignee="bob") + res = kb.dispatch_once(conn, dry_run=True) + assert {s[0] for s in res.spawned} == {t1, t2} + with kb.connect() as conn: + # Dry run must NOT mutate status. + assert kb.get_task(conn, t1).status == "ready" + assert kb.get_task(conn, t2).status == "ready" + + +def test_dispatch_skips_unassigned(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="floater") + res = kb.dispatch_once(conn, dry_run=True) + assert t in res.skipped_unassigned + assert not res.spawned + + +def test_dispatch_promotes_ready_and_spawns(kanban_home): + spawns = [] + + def fake_spawn(task, workspace): + spawns.append((task.id, task.assignee, workspace)) + + with kb.connect() as conn: + p = kb.create_task(conn, title="p", assignee="alice") + c = kb.create_task(conn, title="c", assignee="bob", parents=[p]) + # Finish parent outside dispatch; promotion happens inside. + kb.complete_task(conn, p) + res = kb.dispatch_once(conn, spawn_fn=fake_spawn) + # Spawned c (a was already done when dispatch was called). + assert len(spawns) == 1 + assert spawns[0][0] == c + assert spawns[0][1] == "bob" + # c is now running + with kb.connect() as conn: + assert kb.get_task(conn, c).status == "running" + + +def test_dispatch_spawn_failure_releases_claim(kanban_home): + def boom(task, workspace): + raise RuntimeError("spawn failed") + + with kb.connect() as conn: + t = kb.create_task(conn, title="boom", assignee="alice") + kb.dispatch_once(conn, spawn_fn=boom) + # Must return to ready so the next tick can retry. + assert kb.get_task(conn, t).status == "ready" + assert kb.get_task(conn, t).claim_lock is None + + +def test_dispatch_reclaims_stale_before_spawning(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x", assignee="alice") + kb.claim_task(conn, t) + conn.execute( + "UPDATE tasks SET claim_expires = ? WHERE id = ?", + (int(time.time()) - 1, t), + ) + res = kb.dispatch_once(conn, dry_run=True) + assert res.reclaimed == 1 + + +# --------------------------------------------------------------------------- +# Workspace resolution +# --------------------------------------------------------------------------- + +def test_scratch_workspace_created_under_hermes_home(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="x") + task = kb.get_task(conn, t) + ws = kb.resolve_workspace(task) + assert ws.exists() + assert ws.is_dir() + assert "kanban" in str(ws) + + +def test_dir_workspace_honors_given_path(kanban_home, tmp_path): + target = tmp_path / "my-vault" + with kb.connect() as conn: + t = kb.create_task( + conn, title="biz", workspace_kind="dir", workspace_path=str(target) + ) + task = kb.get_task(conn, t) + ws = kb.resolve_workspace(task) + assert ws == target + assert ws.exists() + + +def test_worktree_workspace_returns_intended_path(kanban_home, tmp_path): + target = str(tmp_path / ".worktrees" / "my-task") + with kb.connect() as conn: + t = kb.create_task( + conn, title="ship", workspace_kind="worktree", workspace_path=target + ) + task = kb.get_task(conn, t) + ws = kb.resolve_workspace(task) + # We do NOT auto-create worktrees; the worker's skill handles that. + assert str(ws) == target + + +# --------------------------------------------------------------------------- +# Tenancy +# --------------------------------------------------------------------------- + +def test_tenant_column_filters_listings(kanban_home): + with kb.connect() as conn: + kb.create_task(conn, title="a1", tenant="biz-a") + kb.create_task(conn, title="b1", tenant="biz-b") + kb.create_task(conn, title="shared") # no tenant + biz_a = kb.list_tasks(conn, tenant="biz-a") + biz_b = kb.list_tasks(conn, tenant="biz-b") + assert [t.title for t in biz_a] == ["a1"] + assert [t.title for t in biz_b] == ["b1"] + + +def test_tenant_propagates_to_events(kanban_home): + with kb.connect() as conn: + t = kb.create_task(conn, title="tenant-task", tenant="biz-a") + events = kb.list_events(conn, t) + # The "created" event should have tenant in its payload. + created = [e for e in events if e.kind == "created"] + assert created and created[0].payload.get("tenant") == "biz-a" diff --git a/tests/plugins/test_kanban_dashboard_plugin.py b/tests/plugins/test_kanban_dashboard_plugin.py new file mode 100644 index 00000000000..4bbc621f1aa --- /dev/null +++ b/tests/plugins/test_kanban_dashboard_plugin.py @@ -0,0 +1,889 @@ +"""Tests for the Kanban dashboard plugin backend (plugins/kanban/dashboard/plugin_api.py). + +The plugin mounts as /api/plugins/kanban/ inside the dashboard's FastAPI app, +but here we attach its router to a bare FastAPI instance so we can test the +REST surface without spinning up the whole dashboard. +""" + +from __future__ import annotations + +import importlib.util +import os +import sys +import time +from pathlib import Path + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from hermes_cli import kanban_db as kb + + +# --------------------------------------------------------------------------- +# Fixtures +# --------------------------------------------------------------------------- + + +def _load_plugin_router(): + """Dynamically load plugins/kanban/dashboard/plugin_api.py and return its router.""" + repo_root = Path(__file__).resolve().parents[2] + plugin_file = repo_root / "plugins" / "kanban" / "dashboard" / "plugin_api.py" + assert plugin_file.exists(), f"plugin file missing: {plugin_file}" + + spec = importlib.util.spec_from_file_location( + "hermes_dashboard_plugin_kanban_test", plugin_file, + ) + assert spec is not None and spec.loader is not None + mod = importlib.util.module_from_spec(spec) + sys.modules[spec.name] = mod + spec.loader.exec_module(mod) + return mod.router + + +@pytest.fixture +def kanban_home(tmp_path, monkeypatch): + """Isolated HERMES_HOME with an empty kanban DB.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + return home + + +@pytest.fixture +def client(kanban_home): + app = FastAPI() + app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban") + return TestClient(app) + + +# --------------------------------------------------------------------------- +# GET /board on an empty DB +# --------------------------------------------------------------------------- + + +def test_board_empty(client): + r = client.get("/api/plugins/kanban/board") + assert r.status_code == 200 + data = r.json() + # All canonical columns present (triage + the rest), each empty. + names = [c["name"] for c in data["columns"]] + for expected in ("triage", "todo", "ready", "running", "blocked", "done"): + assert expected in names, f"missing column {expected}: {names}" + assert all(len(c["tasks"]) == 0 for c in data["columns"]) + assert data["tenants"] == [] + assert data["assignees"] == [] + assert data["latest_event_id"] == 0 + + +# --------------------------------------------------------------------------- +# POST /tasks then GET /board sees it +# --------------------------------------------------------------------------- + + +def test_create_task_appears_on_board(client): + r = client.post( + "/api/plugins/kanban/tasks", + json={ + "title": "Research LLM caching", + "assignee": "researcher", + "priority": 3, + "tenant": "acme", + }, + ) + assert r.status_code == 200, r.text + task = r.json()["task"] + assert task["title"] == "Research LLM caching" + assert task["assignee"] == "researcher" + assert task["status"] == "ready" # no parents -> immediately ready + assert task["priority"] == 3 + assert task["tenant"] == "acme" + task_id = task["id"] + + # Board now lists it under 'ready'. + r = client.get("/api/plugins/kanban/board") + assert r.status_code == 200 + data = r.json() + ready = next(c for c in data["columns"] if c["name"] == "ready") + assert len(ready["tasks"]) == 1 + assert ready["tasks"][0]["id"] == task_id + assert "acme" in data["tenants"] + assert "researcher" in data["assignees"] + + +def test_tenant_filter(client): + client.post("/api/plugins/kanban/tasks", json={"title": "A", "tenant": "t1"}) + client.post("/api/plugins/kanban/tasks", json={"title": "B", "tenant": "t2"}) + + r = client.get("/api/plugins/kanban/board?tenant=t1") + counts = {c["name"]: len(c["tasks"]) for c in r.json()["columns"]} + total = sum(counts.values()) + assert total == 1 + + r = client.get("/api/plugins/kanban/board?tenant=t2") + total = sum(len(c["tasks"]) for c in r.json()["columns"]) + assert total == 1 + + +# --------------------------------------------------------------------------- +# GET /tasks/:id returns body + comments + events + links +# --------------------------------------------------------------------------- + + +def test_task_detail_includes_links_and_events(client): + parent = client.post( + "/api/plugins/kanban/tasks", json={"title": "parent"}, + ).json()["task"] + child = client.post( + "/api/plugins/kanban/tasks", + json={"title": "child", "parents": [parent["id"]]}, + ).json()["task"] + assert child["status"] == "todo" # parent not done yet + + # Detail for the child shows the parent link. + r = client.get(f"/api/plugins/kanban/tasks/{child['id']}") + assert r.status_code == 200 + data = r.json() + assert data["task"]["id"] == child["id"] + assert parent["id"] in data["links"]["parents"] + + # Detail for the parent shows the child. + r = client.get(f"/api/plugins/kanban/tasks/{parent['id']}") + assert child["id"] in r.json()["links"]["children"] + + # Events exist from creation. + assert len(data["events"]) >= 1 + + +def test_task_detail_404_on_unknown(client): + r = client.get("/api/plugins/kanban/tasks/does-not-exist") + assert r.status_code == 404 + + +# --------------------------------------------------------------------------- +# PATCH /tasks/:id — status transitions +# --------------------------------------------------------------------------- + + +def test_patch_status_complete(client): + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"status": "done", "result": "shipped"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["status"] == "done" + + # Board reflects the move. + done = next( + c for c in client.get("/api/plugins/kanban/board").json()["columns"] + if c["name"] == "done" + ) + assert any(x["id"] == t["id"] for x in done["tasks"]) + + +def test_patch_block_then_unblock(client): + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"status": "blocked", "block_reason": "need input"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["status"] == "blocked" + + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"status": "ready"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["status"] == "ready" + + +def test_patch_drag_drop_move_todo_to_ready(client): + """Direct status write: the drag-drop path for statuses without a + dedicated verb (e.g. manually promoting todo -> ready).""" + parent = client.post("/api/plugins/kanban/tasks", json={"title": "p"}).json()["task"] + child = client.post( + "/api/plugins/kanban/tasks", + json={"title": "c", "parents": [parent["id"]]}, + ).json()["task"] + assert child["status"] == "todo" + + r = client.patch( + f"/api/plugins/kanban/tasks/{child['id']}", + json={"status": "ready"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["status"] == "ready" + + +def test_patch_reassign(client): + t = client.post( + "/api/plugins/kanban/tasks", + json={"title": "x", "assignee": "a"}, + ).json()["task"] + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"assignee": "b"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["assignee"] == "b" + + +def test_patch_priority_and_edit(client): + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"priority": 5, "title": "renamed"}, + ) + assert r.status_code == 200 + data = r.json()["task"] + assert data["priority"] == 5 + assert data["title"] == "renamed" + + +def test_patch_invalid_status(client): + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", + json={"status": "banana"}, + ) + assert r.status_code == 400 + + +# --------------------------------------------------------------------------- +# Comments + Links +# --------------------------------------------------------------------------- + + +def test_add_comment(client): + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.post( + f"/api/plugins/kanban/tasks/{t['id']}/comments", + json={"body": "how's progress?", "author": "teknium"}, + ) + assert r.status_code == 200 + + r = client.get(f"/api/plugins/kanban/tasks/{t['id']}") + comments = r.json()["comments"] + assert len(comments) == 1 + assert comments[0]["body"] == "how's progress?" + assert comments[0]["author"] == "teknium" + + +def test_add_comment_empty_rejected(client): + t = client.post("/api/plugins/kanban/tasks", json={"title": "x"}).json()["task"] + r = client.post( + f"/api/plugins/kanban/tasks/{t['id']}/comments", + json={"body": " "}, + ) + assert r.status_code == 400 + + +def test_add_link_and_delete_link(client): + a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"] + b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"] + + r = client.post( + "/api/plugins/kanban/links", + json={"parent_id": a["id"], "child_id": b["id"]}, + ) + assert r.status_code == 200 + + r = client.get(f"/api/plugins/kanban/tasks/{b['id']}") + assert a["id"] in r.json()["links"]["parents"] + + r = client.delete( + "/api/plugins/kanban/links", + params={"parent_id": a["id"], "child_id": b["id"]}, + ) + assert r.status_code == 200 + assert r.json()["ok"] is True + + +def test_add_link_cycle_rejected(client): + a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"] + b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"] + client.post( + "/api/plugins/kanban/links", + json={"parent_id": a["id"], "child_id": b["id"]}, + ) + r = client.post( + "/api/plugins/kanban/links", + json={"parent_id": b["id"], "child_id": a["id"]}, + ) + assert r.status_code == 400 + + +# --------------------------------------------------------------------------- +# Dispatch nudge +# --------------------------------------------------------------------------- + + +def test_dispatch_dry_run(client): + client.post( + "/api/plugins/kanban/tasks", + json={"title": "work", "assignee": "researcher"}, + ) + r = client.post("/api/plugins/kanban/dispatch?dry_run=true&max=4") + assert r.status_code == 200 + body = r.json() + # DispatchResult is serialized as a dataclass dict. + assert isinstance(body, dict) + + +# --------------------------------------------------------------------------- +# Triage column (new v1 status) +# --------------------------------------------------------------------------- + + +def test_create_triage_lands_in_triage_column(client): + r = client.post( + "/api/plugins/kanban/tasks", + json={"title": "rough idea, spec me", "triage": True}, + ) + assert r.status_code == 200 + task = r.json()["task"] + assert task["status"] == "triage" + + r = client.get("/api/plugins/kanban/board") + triage = next(c for c in r.json()["columns"] if c["name"] == "triage") + assert len(triage["tasks"]) == 1 + assert triage["tasks"][0]["title"] == "rough idea, spec me" + + +def test_triage_task_not_promoted_to_ready(client): + """Triage tasks must stay in triage even when they have no parents.""" + client.post( + "/api/plugins/kanban/tasks", + json={"title": "must stay put", "triage": True}, + ) + # Run the dispatcher — it should NOT promote the triage task. + client.post("/api/plugins/kanban/dispatch?dry_run=false&max=4") + r = client.get("/api/plugins/kanban/board") + triage = next(c for c in r.json()["columns"] if c["name"] == "triage") + ready = next(c for c in r.json()["columns"] if c["name"] == "ready") + assert len(triage["tasks"]) == 1 + assert len(ready["tasks"]) == 0 + + +def test_patch_status_triage_works(client): + """A user (or specifier) can push a task back into triage, and out of it.""" + t = client.post( + "/api/plugins/kanban/tasks", json={"title": "x"}, + ).json()["task"] + # Normal creation is 'ready'; push to triage. + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "triage"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["status"] == "triage" + + # Now promote to todo. + r = client.patch( + f"/api/plugins/kanban/tasks/{t['id']}", json={"status": "todo"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["status"] == "todo" + + +# --------------------------------------------------------------------------- +# Progress rollup (done children / total children) +# --------------------------------------------------------------------------- + + +def test_board_progress_rollup(client): + parent = client.post( + "/api/plugins/kanban/tasks", json={"title": "parent"}, + ).json()["task"] + child_a = client.post( + "/api/plugins/kanban/tasks", + json={"title": "a", "parents": [parent["id"]]}, + ).json()["task"] + child_b = client.post( + "/api/plugins/kanban/tasks", + json={"title": "b", "parents": [parent["id"]]}, + ).json()["task"] + # Children start as "todo" because the parent isn't done yet; promote + # them to "ready" so complete_task will accept the transition. + for cid in (child_a["id"], child_b["id"]): + r = client.patch( + f"/api/plugins/kanban/tasks/{cid}", json={"status": "ready"}, + ) + assert r.status_code == 200 + + # 0/2 done. + r = client.get("/api/plugins/kanban/board") + parent_row = next( + t for col in r.json()["columns"] for t in col["tasks"] + if t["id"] == parent["id"] + ) + assert parent_row["progress"] == {"done": 0, "total": 2} + + # Complete one child. 1/2. + r = client.patch( + f"/api/plugins/kanban/tasks/{child_a['id']}", + json={"status": "done"}, + ) + assert r.status_code == 200 + r = client.get("/api/plugins/kanban/board") + parent_row = next( + t for col in r.json()["columns"] for t in col["tasks"] + if t["id"] == parent["id"] + ) + assert parent_row["progress"] == {"done": 1, "total": 2} + + # Childless tasks report progress=None, not {0/0}. + assert next( + t for col in r.json()["columns"] for t in col["tasks"] + if t["id"] == child_b["id"] + )["progress"] is None + + +# --------------------------------------------------------------------------- +# Auto-init on first board read +# --------------------------------------------------------------------------- + + +def test_board_auto_initializes_missing_db(tmp_path, monkeypatch): + """If kanban.db doesn't exist yet, GET /board must create it, not 500.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + # Deliberately DO NOT call kb.init_db(). + + app = FastAPI() + app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban") + c = TestClient(app) + r = c.get("/api/plugins/kanban/board") + assert r.status_code == 200 + assert (home / "kanban.db").exists(), "init_db wasn't invoked by /board" + + +# --------------------------------------------------------------------------- +# WebSocket auth (query-param token) +# --------------------------------------------------------------------------- + + +def test_ws_events_rejects_when_token_required(tmp_path, monkeypatch): + """When _SESSION_TOKEN is set (normal dashboard context), a missing or + wrong ?token= query param must be rejected with policy-violation.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setattr(Path, "home", lambda: tmp_path) + kb.init_db() + + # Stub web_server so _check_ws_token has a token to compare against. + import types + stub = types.SimpleNamespace(_SESSION_TOKEN="secret-xyz") + monkeypatch.setitem(sys.modules, "hermes_cli.web_server", stub) + + app = FastAPI() + app.include_router(_load_plugin_router(), prefix="/api/plugins/kanban") + c = TestClient(app) + + # No token → policy violation close. + from starlette.websockets import WebSocketDisconnect + with pytest.raises(WebSocketDisconnect) as exc: + with c.websocket_connect("/api/plugins/kanban/events"): + pass + assert exc.value.code == 1008 + + # Wrong token → policy violation close. + with pytest.raises(WebSocketDisconnect) as exc: + with c.websocket_connect("/api/plugins/kanban/events?token=nope"): + pass + assert exc.value.code == 1008 + + # Correct token → accepted (connect then close cleanly from our side). + with c.websocket_connect( + "/api/plugins/kanban/events?token=secret-xyz" + ) as ws: + assert ws is not None # handshake succeeded + + +# --------------------------------------------------------------------------- +# Bulk actions +# --------------------------------------------------------------------------- + + +def test_bulk_status_ready(client): + a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"] + b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"] + c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"] + # Parent-less tasks land in "ready" already; push them to blocked first. + for tid in (a["id"], b["id"], c2["id"]): + client.patch(f"/api/plugins/kanban/tasks/{tid}", + json={"status": "blocked", "block_reason": "wait"}) + + r = client.post("/api/plugins/kanban/tasks/bulk", + json={"ids": [a["id"], b["id"], c2["id"]], "status": "ready"}) + assert r.status_code == 200 + results = r.json()["results"] + assert all(r["ok"] for r in results) + # All three are now ready. + board = client.get("/api/plugins/kanban/board").json() + ready = next(col for col in board["columns"] if col["name"] == "ready") + ids = {t["id"] for t in ready["tasks"]} + assert {a["id"], b["id"], c2["id"]}.issubset(ids) + + +def test_bulk_archive(client): + a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"] + b = client.post("/api/plugins/kanban/tasks", json={"title": "b"}).json()["task"] + r = client.post("/api/plugins/kanban/tasks/bulk", + json={"ids": [a["id"], b["id"]], "archive": True}) + assert r.status_code == 200 + assert all(r["ok"] for r in r.json()["results"]) + # Default board (archived hidden) — both gone. + board = client.get("/api/plugins/kanban/board").json() + ids = {t["id"] for col in board["columns"] for t in col["tasks"]} + assert a["id"] not in ids + assert b["id"] not in ids + + +def test_bulk_reassign(client): + a = client.post("/api/plugins/kanban/tasks", + json={"title": "a", "assignee": "old"}).json()["task"] + b = client.post("/api/plugins/kanban/tasks", + json={"title": "b", "assignee": "old"}).json()["task"] + r = client.post("/api/plugins/kanban/tasks/bulk", + json={"ids": [a["id"], b["id"]], "assignee": "new"}) + assert r.status_code == 200 + for tid in (a["id"], b["id"]): + t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"] + assert t["assignee"] == "new" + + +def test_bulk_unassign_via_empty_string(client): + a = client.post("/api/plugins/kanban/tasks", + json={"title": "a", "assignee": "x"}).json()["task"] + r = client.post("/api/plugins/kanban/tasks/bulk", + json={"ids": [a["id"]], "assignee": ""}) + assert r.status_code == 200 + t = client.get(f"/api/plugins/kanban/tasks/{a['id']}").json()["task"] + assert t["assignee"] is None + + +def test_bulk_partial_failure_doesnt_abort_siblings(client): + """One bad id in the middle of a batch must not prevent others from + applying.""" + a = client.post("/api/plugins/kanban/tasks", json={"title": "a"}).json()["task"] + c2 = client.post("/api/plugins/kanban/tasks", json={"title": "c"}).json()["task"] + r = client.post("/api/plugins/kanban/tasks/bulk", + json={"ids": [a["id"], "bogus-id", c2["id"]], "priority": 7}) + assert r.status_code == 200 + results = r.json()["results"] + assert len(results) == 3 + ok_ids = {r["id"] for r in results if r["ok"]} + assert a["id"] in ok_ids + assert c2["id"] in ok_ids + assert any(not r["ok"] and r["id"] == "bogus-id" for r in results) + # Good siblings actually got the priority bump. + for tid in (a["id"], c2["id"]): + t = client.get(f"/api/plugins/kanban/tasks/{tid}").json()["task"] + assert t["priority"] == 7 + + +def test_bulk_empty_ids_400(client): + r = client.post("/api/plugins/kanban/tasks/bulk", json={"ids": []}) + assert r.status_code == 400 + + +# --------------------------------------------------------------------------- +# /config endpoint +# --------------------------------------------------------------------------- + + +def test_config_returns_defaults_when_section_missing(client): + r = client.get("/api/plugins/kanban/config") + assert r.status_code == 200 + data = r.json() + # Defaults when dashboard.kanban is missing. + assert data["default_tenant"] == "" + assert data["lane_by_profile"] is True + assert data["include_archived_by_default"] is False + assert data["render_markdown"] is True + + +def test_config_reads_dashboard_kanban_section(tmp_path, monkeypatch, client): + home = Path(os.environ["HERMES_HOME"]) + (home / "config.yaml").write_text( + "dashboard:\n" + " kanban:\n" + " default_tenant: acme\n" + " lane_by_profile: false\n" + " include_archived_by_default: true\n" + " render_markdown: false\n" + ) + r = client.get("/api/plugins/kanban/config") + assert r.status_code == 200 + data = r.json() + assert data["default_tenant"] == "acme" + assert data["lane_by_profile"] is False + assert data["include_archived_by_default"] is True + assert data["render_markdown"] is False + + +# --------------------------------------------------------------------------- +# Runs surfacing (vulcan-artivus RFC feedback) +# --------------------------------------------------------------------------- + +def test_task_detail_includes_runs(client): + """GET /tasks/:id carries a runs[] array with the attempt history.""" + r = client.post("/api/plugins/kanban/tasks", + json={"title": "port x", "assignee": "worker"}).json() + tid = r["task"]["id"] + + # Drive status running to force a run creation: PATCH to running + # doesn't call claim_task (the PATCH path uses _set_status_direct), + # so use the bulk/claim indirection via the kernel. + import hermes_cli.kanban_db as _kb + conn = _kb.connect() + try: + _kb.claim_task(conn, tid) + _kb.complete_task( + conn, tid, + result="done", + summary="tested on rate limiter", + metadata={"changed_files": ["limiter.py"]}, + ) + finally: + conn.close() + + d = client.get(f"/api/plugins/kanban/tasks/{tid}").json() + assert "runs" in d + assert len(d["runs"]) == 1 + run = d["runs"][0] + assert run["outcome"] == "completed" + assert run["profile"] == "worker" + assert run["summary"] == "tested on rate limiter" + assert run["metadata"] == {"changed_files": ["limiter.py"]} + assert run["ended_at"] is not None + + +def test_task_detail_runs_empty_before_claim(client): + """A task that's never been claimed has an empty runs[] list, not + a missing key.""" + r = client.post("/api/plugins/kanban/tasks", json={"title": "fresh"}).json() + d = client.get(f"/api/plugins/kanban/tasks/{r['task']['id']}").json() + assert d["runs"] == [] + + +def test_patch_status_done_with_summary_and_metadata(client): + """PATCH /tasks/:id with status=done + summary + metadata must + reach complete_task, so the dashboard has CLI parity.""" + # Create + claim. + r = client.post("/api/plugins/kanban/tasks", json={"title": "x", "assignee": "worker"}) + tid = r.json()["task"]["id"] + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + kb.claim_task(conn, tid) + finally: + conn.close() + + r = client.patch( + f"/api/plugins/kanban/tasks/{tid}", + json={ + "status": "done", + "summary": "shipped the thing", + "metadata": {"changed_files": ["a.py", "b.py"], "tests_run": 7}, + }, + ) + assert r.status_code == 200, r.text + + # The run must have the summary + metadata attached. + conn = kb.connect() + try: + run = kb.latest_run(conn, tid) + assert run.outcome == "completed" + assert run.summary == "shipped the thing" + assert run.metadata == {"changed_files": ["a.py", "b.py"], "tests_run": 7} + finally: + conn.close() + + +def test_patch_status_done_without_summary_still_works(client): + """Back-compat: PATCH without the new fields still completes.""" + r = client.post("/api/plugins/kanban/tasks", json={"title": "y", "assignee": "worker"}) + tid = r.json()["task"]["id"] + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + kb.claim_task(conn, tid) + finally: + conn.close() + r = client.patch( + f"/api/plugins/kanban/tasks/{tid}", + json={"status": "done", "result": "legacy shape"}, + ) + assert r.status_code == 200, r.text + conn = kb.connect() + try: + run = kb.latest_run(conn, tid) + assert run.outcome == "completed" + assert run.summary == "legacy shape" # falls back to result + finally: + conn.close() + + +def test_patch_status_archive_closes_running_run(client): + """PATCH to archived while running must close the in-flight run.""" + r = client.post("/api/plugins/kanban/tasks", json={"title": "z", "assignee": "worker"}) + tid = r.json()["task"]["id"] + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + kb.claim_task(conn, tid) + open_run = kb.latest_run(conn, tid) + assert open_run.ended_at is None + finally: + conn.close() + r = client.patch( + f"/api/plugins/kanban/tasks/{tid}", + json={"status": "archived"}, + ) + assert r.status_code == 200, r.text + conn = kb.connect() + try: + task = kb.get_task(conn, tid) + assert task.status == "archived" + assert task.current_run_id is None + assert kb.latest_run(conn, tid).outcome == "reclaimed" + finally: + conn.close() + + +def test_event_dict_includes_run_id(client): + """GET /tasks/:id returns events with run_id populated.""" + r = client.post("/api/plugins/kanban/tasks", json={"title": "e", "assignee": "worker"}) + tid = r.json()["task"]["id"] + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + kb.claim_task(conn, tid) + run_id = kb.latest_run(conn, tid).id + kb.complete_task(conn, tid, summary="wss") + finally: + conn.close() + + r = client.get(f"/api/plugins/kanban/tasks/{tid}") + assert r.status_code == 200 + events = r.json()["events"] + # Every event in the response must have a run_id key (None or int). + for e in events: + assert "run_id" in e, f"missing run_id in event: {e}" + # completed event must have the actual run_id. + comp = [e for e in events if e["kind"] == "completed"] + assert comp[0]["run_id"] == run_id + + + +# --------------------------------------------------------------------------- +# Per-task force-loaded skills via REST +# --------------------------------------------------------------------------- + +def test_create_task_with_skills_roundtrips(client): + """POST /tasks accepts `skills: [...]`, GET /tasks/:id returns it.""" + r = client.post( + "/api/plugins/kanban/tasks", + json={ + "title": "translate docs", + "assignee": "linguist", + "skills": ["translation", "github-code-review"], + }, + ) + assert r.status_code == 200, r.text + task = r.json()["task"] + assert task["skills"] == ["translation", "github-code-review"] + + # Fetch via GET /tasks/:id as the drawer does. + got = client.get(f"/api/plugins/kanban/tasks/{task['id']}").json() + assert got["task"]["skills"] == ["translation", "github-code-review"] + + +def test_create_task_without_skills_defaults_to_empty_list(client): + """_task_dict serializes Task.skills=None as [] so the drawer can + always .length check without guarding against null.""" + r = client.post( + "/api/plugins/kanban/tasks", + json={"title": "no skills", "assignee": "x"}, + ) + assert r.status_code == 200, r.text + task = r.json()["task"] + # Task.skills is None in-memory; _task_dict serializes via + # dataclasses.asdict which keeps it None. The drawer's + # `t.skills && t.skills.length > 0` guard handles both null and []. + assert task.get("skills") in (None, []) + + + +# --------------------------------------------------------------------------- +# Dispatcher-presence warning in POST /tasks response +# --------------------------------------------------------------------------- + +def test_create_task_includes_warning_when_no_dispatcher(client, monkeypatch): + """ready+assigned task + no gateway -> response has `warning` field + so the dashboard UI can surface a banner.""" + # Force the dispatcher probe to report "not running". + monkeypatch.setattr( + "hermes_cli.kanban._check_dispatcher_presence", + lambda: (False, "No gateway is running — start `hermes gateway start`."), + ) + r = client.post( + "/api/plugins/kanban/tasks", + json={"title": "warn-me", "assignee": "worker"}, + ) + assert r.status_code == 200 + data = r.json() + assert data.get("warning") + assert "gateway" in data["warning"].lower() + + +def test_create_task_no_warning_when_dispatcher_up(client, monkeypatch): + """Dispatcher running -> no `warning` field in the response.""" + monkeypatch.setattr( + "hermes_cli.kanban._check_dispatcher_presence", + lambda: (True, ""), + ) + r = client.post( + "/api/plugins/kanban/tasks", + json={"title": "silent", "assignee": "worker"}, + ) + assert r.status_code == 200 + assert "warning" not in r.json() or not r.json()["warning"] + + +def test_create_task_no_warning_on_triage(client, monkeypatch): + """Triage tasks never get the warning (they can't be dispatched + anyway until promoted).""" + monkeypatch.setattr( + "hermes_cli.kanban._check_dispatcher_presence", + lambda: (False, "oh no"), + ) + r = client.post( + "/api/plugins/kanban/tasks", + json={"title": "triage-task", "assignee": "worker", "triage": True}, + ) + assert r.status_code == 200 + assert "warning" not in r.json() or not r.json()["warning"] + + +def test_create_task_probe_error_does_not_break_create(client, monkeypatch): + """Probe failure must never break task creation.""" + def _raise(): + raise RuntimeError("probe crashed") + monkeypatch.setattr( + "hermes_cli.kanban._check_dispatcher_presence", _raise, + ) + r = client.post( + "/api/plugins/kanban/tasks", + json={"title": "resilient", "assignee": "worker"}, + ) + assert r.status_code == 200 + assert r.json()["task"]["title"] == "resilient" diff --git a/tests/stress/README.md b/tests/stress/README.md new file mode 100644 index 00000000000..8f56f24f35c --- /dev/null +++ b/tests/stress/README.md @@ -0,0 +1,41 @@ +# Stress / battle-test suite + +Long-running tests that exercise the Kanban kernel under adversarial +conditions. **Not run by `scripts/run_tests.sh`** because they can +take 30+ seconds each and spawn real subprocesses. + +Run manually: + +```bash +./venv/bin/python -m pytest tests/stress/ -v -s +# or individual files: +./venv/bin/python tests/stress/test_concurrency.py +./venv/bin/python tests/stress/test_subprocess_e2e.py +./venv/bin/python tests/stress/test_property_fuzzing.py +./venv/bin/python tests/stress/test_benchmarks.py +``` + +## What's covered + +- **test_concurrency.py** — 5 workers, 100 tasks, race-for-claim. Asserts + no double-claims, no orphan runs, no SQLite errors escape retry. +- **test_concurrency_mixed.py** — 10 workers + 1 reclaimer, 500 tasks, + random ops (claim/complete/block/unblock/archive). Same invariants + under adversarial scheduling. +- **test_concurrency_reclaim_race.py** — TTL < work duration so the + reclaimer intentionally yanks tasks mid-work; verifies the worker's + late-complete is refused cleanly (CAS guard works). +- **test_subprocess_e2e.py** — dispatcher spawns real Python subprocess + workers that heartbeat + complete via the CLI; crash detection + against a real dead PID. +- **test_property_fuzzing.py** — 500 random operation sequences, + ~40k operations total, 9 invariant checks after each step. +- **test_atypical_scenarios.py** — 28 scenarios covering atypical + user inputs: unicode/emoji/RTL, 1 MB strings, SQL injection + attempts, cycles, self-parents, wide fan-in/out, clock skew, + HERMES_HOME with spaces/unicode/symlinks, 1000 runs on one + task, idempotency-key race across processes, terminal-state + resurrection attempts, dashboard REST with weird JSON. +- **test_benchmarks.py** — latency at 100/1k/10k tasks for dispatch, + recompute_ready, list_tasks, build_worker_context, etc. Results saved + to JSON for regression diffing. diff --git a/tests/stress/_fake_worker.py b/tests/stress/_fake_worker.py new file mode 100644 index 00000000000..be05bcbedc7 --- /dev/null +++ b/tests/stress/_fake_worker.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +"""Fake worker process that exercises the real subprocess contract. + +Reads HERMES_KANBAN_TASK from env, heartbeats periodically, does short +work, completes via the CLI. Designed to be spawned by the dispatcher +exactly the way `hermes chat -q` would be, minus the LLM cost. +""" + +import json +import os +import subprocess +import sys +import time + + +def main(): + tid = os.environ["HERMES_KANBAN_TASK"] + workspace = os.environ.get("HERMES_KANBAN_WORKSPACE", "") + + # Announce via CLI (goes through real argparse + init_db + etc) + subprocess.run( + ["hermes", "kanban", "heartbeat", tid, "--note", "started"], + check=True, capture_output=True, + ) + + # Simulate work with periodic heartbeats + for i in range(3): + time.sleep(0.3) + subprocess.run( + ["hermes", "kanban", "heartbeat", tid, "--note", f"progress {i+1}/3"], + check=True, capture_output=True, + ) + + # Complete with structured handoff + subprocess.run( + [ + "hermes", "kanban", "complete", tid, + "--summary", f"real-subprocess worker finished {tid}", + "--metadata", json.dumps({ + "workspace": workspace, + "worker_pid": os.getpid(), + "iterations": 3, + }), + ], + check=True, capture_output=True, + ) + + +if __name__ == "__main__": + main() diff --git a/tests/stress/conftest.py b/tests/stress/conftest.py new file mode 100644 index 00000000000..4c72a0462d0 --- /dev/null +++ b/tests/stress/conftest.py @@ -0,0 +1,37 @@ +"""pytest config for the stress/ subdirectory. + +These tests are slow (30s+), spawn subprocesses, and are not run by +default. Enable via `pytest --run-stress` or by running the scripts +directly. + +The scripts are primarily __main__-executable entry points; pytest +isn't expected to collect individual test functions from them. +""" +import pytest + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--run-stress", default=False): + return + skip_stress = pytest.mark.skip( + reason="stress test (opt-in via --run-stress or run script directly)" + ) + for item in items: + if "tests/stress" in str(item.fspath): + item.add_marker(skip_stress) + + +def pytest_addoption(parser): + parser.addoption( + "--run-stress", + action="store_true", + default=False, + help="Run the stress/battle-test suite (slow, spawns subprocesses).", + ) + + +collect_ignore_glob = [ + # The stress scripts have top-level code and hard-coded paths; they're + # meant to run as `python tests/stress/.py`, not as pytest modules. + "*.py", +] diff --git a/tests/stress/test_atypical_scenarios.py b/tests/stress/test_atypical_scenarios.py new file mode 100644 index 00000000000..2010049e14f --- /dev/null +++ b/tests/stress/test_atypical_scenarios.py @@ -0,0 +1,1060 @@ +"""Atypical user scenarios and configurations. + +Exercises the kernel against user inputs and environments that the +normal tests assume away: + + - Data: unicode, emoji, RTL, huge strings, control chars, SQL + injection attempts, malformed JSON, newlines in summaries. + - Graph: cycles, self-parenting, diamonds, wide fan-out/fan-in. + - Workspace: non-existent, spaces, symlinks, path traversal. + - Clock: skew, pre-1970 timestamps, zero-duration runs. + - Filesystem: HERMES_HOME with spaces / unicode / symlinks. + - Scale extremes: 100k tasks, 10k runs per task, huge bodies. + - Concurrency: idempotency-key race across processes. + - Hostile: path traversal attempts, injection attempts. + +Each scenario is self-contained. Failures are collected and printed +together at the end. Script exits 0 iff every scenario passed or was +cleanly SKIPPED (with reason). +""" + +import json +import multiprocessing as mp +import os +import shutil +import sqlite3 +import subprocess +import sys +import tempfile +import time +from pathlib import Path + +# Resolve the worktree path robustly. +_THIS = Path(__file__).resolve() +WT = _THIS.parents[2] if _THIS.parent.name == "stress" else Path.cwd() + +FAILURES: list[str] = [] +SKIPS: list[str] = [] +_REGISTERED: list = [] + + +def scenario(name): + """Decorator: run `fn` in its own HERMES_HOME, collect failures. + + The returned function is named `_scenario_` so discovery can + find it in globals() reliably. + """ + def wrap(fn): + def run(): + home = tempfile.mkdtemp(prefix=f"hermes_atyp_{name}_") + os.environ["HERMES_HOME"] = home + os.environ["HOME"] = home + for m in list(sys.modules.keys()): + if m.startswith(("hermes_cli", "plugins", "gateway")): + del sys.modules[m] + sys.path.insert(0, str(WT)) + from hermes_cli import kanban_db as kb # noqa: F401 + print(f"\n═══ {name} ═══") + try: + fn(home, kb) + print(f" ✔ {name}") + except AssertionError as e: + msg = f"{name}: {e}" + FAILURES.append(msg) + print(f" ✗ FAIL: {e}") + except Exception as e: + msg = f"{name}: unexpected {type(e).__name__}: {e}" + FAILURES.append(msg) + import traceback + traceback.print_exc() + print(f" ✗ ERROR: {msg}") + finally: + try: + shutil.rmtree(home) + except Exception: + pass + run.__name__ = f"_scenario_{name}" + # Register in a module-level list so discovery is trivial. + _REGISTERED.append(run) + return run + return wrap + + +# ============================================================================= +# DATA WEIRDNESS +# ============================================================================= + +@scenario("unicode_and_emoji") +def _(home, kb): + kb.init_db() + conn = kb.connect() + try: + # Emoji, CJK, RTL, zero-width joiner + cases = [ + ("📋 buy groceries 🍎", "shopping"), + ("设计认证模式", "implement"), + ("אימות משתמש חדש", "auth-rtl"), # Hebrew RTL + ("مهمة تصحيح الأخطاء", "bug-arabic"), + ("👨‍👩‍👧‍👦 family emoji ZWJ sequences 🏳️‍🌈", "emoji-stress"), + ("control\x01chars\x02in\x03body", "ctrl"), + ("null\x00bytes", "nullbyte"), + ] + for title, kind in cases: + tid = kb.create_task(conn, title=title, assignee="w") + back = kb.get_task(conn, tid) + assert back.title == title, ( + f"[{kind}] round-trip mismatch: {title!r} → {back.title!r}" + ) + print(f" {len(cases)} unicode titles round-tripped") + + # Metadata with non-ASCII + emoji + tid = kb.create_task(conn, title="with meta", assignee="w") + kb.claim_task(conn, tid) + meta = { + "作者": "张三", + "summary_fr": "résumé avec des caractères accentués", + "emoji": "🎉🔥💯", + "mixed_list": ["normal", "日本語", "🇺🇸"], + } + kb.complete_task( + conn, tid, + summary="完成了 📝 résumé", + metadata=meta, + ) + run = kb.latest_run(conn, tid) + assert run.summary == "完成了 📝 résumé", f"summary round-trip failed" + assert run.metadata == meta, ( + f"metadata round-trip failed: {run.metadata} != {meta}" + ) + print(f" metadata with CJK + emoji round-tripped") + finally: + conn.close() + + +@scenario("huge_strings") +def _(home, kb): + """1MB body + 1MB summary + deeply nested metadata.""" + kb.init_db() + conn = kb.connect() + try: + huge_body = "x" * (1024 * 1024) # 1 MB + huge_summary = "y" * (1024 * 1024) + # Nested metadata: 50 levels deep + meta = "leaf" + for _ in range(50): + meta = {"nested": meta} + tid = kb.create_task( + conn, title="huge task", body=huge_body, assignee="w", + ) + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary=huge_summary, metadata=meta) + + back = kb.get_task(conn, tid) + assert back.body == huge_body, f"body truncated: {len(back.body)} vs {len(huge_body)}" + run = kb.latest_run(conn, tid) + assert run.summary == huge_summary + assert run.metadata == meta + print(f" 1 MB body + 1 MB summary + 50-deep metadata OK") + finally: + conn.close() + + +@scenario("sql_injection_attempts") +def _(home, kb): + """SQLite parameterized queries should neutralize all of these, but + verify empirically across every string field.""" + kb.init_db() + conn = kb.connect() + try: + payloads = [ + "'; DROP TABLE tasks; --", + "\" OR 1=1 --", + "'; DELETE FROM task_runs; --", + "Robert'); DROP TABLE students;--", # Little Bobby Tables + "\\x00\\x01\\x02", + "' UNION SELECT * FROM kanban_notify_subs --", + ] + for p in payloads: + tid = kb.create_task( + conn, title=p, body=p, assignee=p, tenant=p, + ) + back = kb.get_task(conn, tid) + assert back.title == p + assert back.body == p + # Kernel should have stored, not executed + # Verify tasks table still has rows + count = conn.execute("SELECT COUNT(*) FROM tasks").fetchone()[0] + assert count == len(payloads), f"lost rows: {count} vs {len(payloads)}" + # tasks table wasn't dropped (we're still here) + print(f" {len(payloads)} injection payloads neutralized") + finally: + conn.close() + + +@scenario("newlines_in_summary") +def _(home, kb): + """Summaries with newlines, tabs, and shell metachars. + + The notifier truncates to first line — verify that's right, not + that the kernel loses data.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="multiline", assignee="w") + kb.claim_task(conn, tid) + multi = "line 1\nline 2\tindented\n\nline 4" + kb.complete_task(conn, tid, summary=multi) + run = kb.latest_run(conn, tid) + assert run.summary == multi, "full summary should survive in kernel" + # Event payload takes first line (for notifier brevity) + events = [e for e in kb.list_events(conn, tid) if e.kind == "completed"] + assert events[0].payload["summary"] == "line 1", ( + f"event payload should be first line, got {events[0].payload['summary']!r}" + ) + print(" multiline summary preserved on run; first line in event") + finally: + conn.close() + + +@scenario("malformed_metadata_via_cli") +def _(home, kb): + """CLI rejects malformed JSON and non-dict JSON cleanly.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="meta test", assignee="w") + kb.claim_task(conn, tid) + finally: + conn.close() + + env = {**os.environ, "PYTHONPATH": str(WT), "HERMES_HOME": home, "HOME": home} + bad_metas = [ + "not-json", + "[1, 2, 3]", # array not dict + "42", # scalar + '{"unclosed', # truncated + ] + for bad in bad_metas: + r = subprocess.run( + [sys.executable, "-m", "hermes_cli.main", "kanban", + "complete", tid, "--metadata", bad], + capture_output=True, text=True, env=env, + ) + # Should print an error to stderr, exit non-zero, not touch the task + assert "metadata" in r.stderr.lower() or "json" in r.stderr.lower(), ( + f"bad metadata {bad!r} didn't produce a metadata error: " + f"stderr={r.stderr!r}" + ) + # Verify task is still running (no partial apply) + conn = kb.connect() + try: + assert kb.get_task(conn, tid).status == "running" + finally: + conn.close() + print(f" {len(bad_metas)} malformed --metadata values cleanly rejected") + + +# ============================================================================= +# DEPENDENCY GRAPH PATHOLOGIES +# ============================================================================= + +@scenario("dependency_cycle") +def _(home, kb): + """A → B → A should be refused. If it's allowed, recompute_ready + could infinite-loop or never promote.""" + kb.init_db() + conn = kb.connect() + try: + a = kb.create_task(conn, title="A", assignee="w") + b = kb.create_task(conn, title="B", assignee="w", parents=[a]) + # Try to link A back to B — creating the cycle + try: + kb.link_tasks(conn, parent_id=b, child_id=a) + # If that didn't raise, the kernel allowed a cycle. + # Verify recompute_ready at least doesn't hang. + import threading + done = threading.Event() + result = [] + def run(): + try: + result.append(kb.recompute_ready(conn)) + except Exception as e: + result.append(e) + done.set() + t = threading.Thread(target=run, daemon=True) + t.start() + done.wait(timeout=5) + if not done.is_set(): + assert False, "recompute_ready HUNG on cyclic graph" + raise AssertionError( + "cycle creation was allowed; kernel should reject" + ) + except (ValueError, RuntimeError, sqlite3.IntegrityError) as e: + # Expected: kernel refuses the cycle + print(f" cycle correctly rejected: {e}") + finally: + conn.close() + + +@scenario("self_parent") +def _(home, kb): + """A task cannot be its own parent.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="self", assignee="w") + try: + kb.link_tasks(conn, parent_id=tid, child_id=tid) + raise AssertionError("self-parenting should be rejected") + except (ValueError, RuntimeError, sqlite3.IntegrityError) as e: + print(f" self-parent rejected: {e}") + finally: + conn.close() + + +@scenario("diamond_dependency") +def _(home, kb): + """Root → (A, B) → leaf. Leaf should promote to ready only when + BOTH A and B are done.""" + kb.init_db() + conn = kb.connect() + try: + root = kb.create_task(conn, title="root", assignee="w") + kb.claim_task(conn, root) + kb.complete_task(conn, root, result="ready") + a = kb.create_task(conn, title="A", assignee="w", parents=[root]) + b = kb.create_task(conn, title="B", assignee="w", parents=[root]) + leaf = kb.create_task(conn, title="leaf", assignee="w", parents=[a, b]) + + # A done but B not → leaf stays todo + kb.claim_task(conn, a) + kb.complete_task(conn, a, result="a done") + kb.recompute_ready(conn) + assert kb.get_task(conn, leaf).status == "todo", ( + f"leaf should still be todo with B unfinished, got " + f"{kb.get_task(conn, leaf).status}" + ) + # Both done → leaf promotes + kb.claim_task(conn, b) + kb.complete_task(conn, b, result="b done") + kb.recompute_ready(conn) + assert kb.get_task(conn, leaf).status == "ready", ( + f"leaf should promote with both parents done, got " + f"{kb.get_task(conn, leaf).status}" + ) + print(f" diamond dependency resolved correctly") + finally: + conn.close() + + +@scenario("wide_fan_out") +def _(home, kb): + """One parent, 500 children. Completing the parent should promote + all 500 in its own recompute_ready pass (triggered by complete_task). + """ + kb.init_db() + conn = kb.connect() + try: + parent = kb.create_task(conn, title="root", assignee="w") + children = [ + kb.create_task(conn, title=f"c{i}", assignee="w", parents=[parent]) + for i in range(500) + ] + kb.claim_task(conn, parent) + t0 = time.monotonic() + kb.complete_task(conn, parent, result="done") + elapsed = (time.monotonic() - t0) * 1000 + # complete_task calls recompute_ready internally; check result. + ready_count = conn.execute( + "SELECT COUNT(*) FROM tasks WHERE status='ready' AND id != ?", + (parent,), + ).fetchone()[0] + assert ready_count == 500, f"expected 500 promoted, got {ready_count}" + for cid in children[:5]: + assert kb.get_task(conn, cid).status == "ready" + print(f" 500 children promoted in {elapsed:.0f}ms (via complete_task)") + finally: + conn.close() + + +@scenario("wide_fan_in") +def _(home, kb): + """500 parents, 1 child. Child should not promote until all 500 done.""" + kb.init_db() + conn = kb.connect() + try: + parents = [ + kb.create_task(conn, title=f"p{i}", assignee="w") for i in range(500) + ] + child = kb.create_task( + conn, title="leaf", assignee="w", parents=parents, + ) + # Complete 499 parents + for p in parents[:-1]: + kb.claim_task(conn, p) + kb.complete_task(conn, p) + kb.recompute_ready(conn) + assert kb.get_task(conn, child).status == "todo", ( + "child should still be todo with 1/500 parents incomplete" + ) + # Finish the last one + kb.claim_task(conn, parents[-1]) + kb.complete_task(conn, parents[-1]) + kb.recompute_ready(conn) + assert kb.get_task(conn, child).status == "ready" + print(f" 500 parents → 1 child promotion works") + finally: + conn.close() + + +# ============================================================================= +# WORKSPACE EDGE CASES +# ============================================================================= + +@scenario("workspace_path_traversal") +def _(home, kb): + """`workspace_path='../../../etc/passwd'` or absolute-outside-home + should not be silently accepted and then executed in the wrong place.""" + kb.init_db() + conn = kb.connect() + try: + # Direct kernel API — create with an attacker-ish path + tid = kb.create_task( + conn, title="path-traversal", + assignee="w", + workspace_kind="dir", + workspace_path="../../../tmp/attacker", + ) + task = kb.get_task(conn, tid) + # Document what actually happens — is the path stored verbatim? + # Is it resolved? Is it rejected? + print(f" stored workspace_path: {task.workspace_path!r}") + print(f" workspace_kind: {task.workspace_kind!r}") + # Verify resolve_workspace (which the dispatcher calls) doesn't + # allow escape. + try: + from hermes_cli.kanban_db import resolve_workspace + resolved = resolve_workspace(task) + # If resolve succeeded, check it's actually escape-safe. + resolved_abs = str(Path(resolved).resolve()) + home_abs = str(Path(os.environ["HERMES_HOME"]).resolve()) + if not resolved_abs.startswith(home_abs) and resolved_abs.startswith("/tmp"): + # This is escaping the home dir. Whether that's actually + # a problem depends on the threat model. Flag for attention. + print(f" ⚠ workspace resolved OUTSIDE hermes_home: {resolved}") + print(f" (not necessarily a bug — dir: workspaces are intentionally arbitrary, but worth documenting)") + except Exception as e: + print(f" resolve_workspace rejected: {e}") + finally: + conn.close() + + +@scenario("workspace_nonexistent_path") +def _(home, kb): + """Dispatching a task whose workspace can't be resolved should go + through the spawn-failure circuit breaker, not crash.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task( + conn, title="bad-workspace", assignee="w", + workspace_kind="dir", + workspace_path="/nonexistent/path/that/does/not/exist", + ) + # Run dispatch_once with a dummy spawn_fn + result = kb.dispatch_once(conn, spawn_fn=lambda *_: 99999) + # If the path was rejected, the task went through _record_spawn_failure + task = kb.get_task(conn, tid) + # Possible outcomes: + # - Task back in ready (workspace issue = spawn_failed, retries) + # - Task in running (kernel accepted the bogus path and spawned) + # - Task auto-blocked (after N retries, but we only ran 1 tick) + print(f" after 1 tick with nonexistent workspace: status={task.status}") + if task.status == "ready": + # Expected path: workspace failure led to release + spawn_failures = task.spawn_failures + print(f" spawn_failures counter: {spawn_failures}") + assert spawn_failures >= 1, "spawn_failures counter didn't increment" + elif task.status == "running": + # Workspace not checked before spawn — the worker would hit + # the bad path itself. Defensible for `dir:` workspaces that + # the user might create later. + print(" kernel accepted bogus path (deferred check to worker)") + finally: + conn.close() + + +# ============================================================================= +# CLOCK SKEW +# ============================================================================= + +@scenario("clock_skew_start_greater_than_end") +def _(home, kb): + """NTP jumps backward. Run.started_at gets written as 1234 but by + the time complete_task runs, time.time() returned 1230. A human + reading run history sees negative elapsed.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="time-travel", assignee="w") + kb.claim_task(conn, tid) + # Force a future started_at via raw SQL + future = int(time.time()) + 3600 + conn.execute( + "UPDATE task_runs SET started_at = ? WHERE task_id = ?", + (future, tid), + ) + conn.commit() + # Complete normally — ended_at will be now, < started_at + kb.complete_task(conn, tid, summary="time-skewed") + run = kb.latest_run(conn, tid) + # Invariant I5 (from property fuzzer): started_at <= ended_at + # when ended_at is set. Verify this is enforced OR gracefully + # handled in display. + if run.ended_at < run.started_at: + # Kernel didn't reject the write; check that CLI display + # doesn't produce "-1800s" elapsed. + elapsed = run.ended_at - run.started_at + print(f" clock-skewed run: elapsed = {elapsed}s (negative)") + print(f" ⚠ kernel stores this; UI should clamp to 0 or handle") + # Don't fail — document the behavior. + else: + print(" kernel normalized ended_at >= started_at") + finally: + conn.close() + + +# ============================================================================= +# FILESYSTEM WEIRDNESS +# ============================================================================= + +@scenario("hermes_home_with_spaces") +def _(home, kb): + """HERMES_HOME at a path with spaces — should work but catches + anyone doing string interpolation without quoting.""" + # Note: home was already created with a safe prefix. We need to + # reset to a weird one for this test. + weird = tempfile.mkdtemp(prefix="hermes with spaces ") + os.environ["HERMES_HOME"] = weird + os.environ["HOME"] = weird + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="spaced", assignee="w") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary="path has spaces") + runs = kb.list_runs(conn, tid) + assert len(runs) == 1 and runs[0].outcome == "completed" + # Verify the DB file is actually in the weird path + db_path = Path(weird) / "kanban.db" + assert db_path.exists(), f"DB not at {db_path}" + print(f" HERMES_HOME with spaces: OK at {weird}") + finally: + conn.close() + shutil.rmtree(weird, ignore_errors=True) + + +@scenario("hermes_home_with_unicode") +def _(home, kb): + """HERMES_HOME with non-ASCII chars.""" + # Pre-create directly since tempfile doesn't love unicode prefixes + weird = f"/tmp/hermes_héllo_émöji_{os.getpid()}" + os.makedirs(weird, exist_ok=True) + os.environ["HERMES_HOME"] = weird + os.environ["HOME"] = weird + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="unicode home", assignee="w") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary="ok") + assert (Path(weird) / "kanban.db").exists() + print(f" HERMES_HOME with unicode path: OK at {weird}") + finally: + conn.close() + shutil.rmtree(weird, ignore_errors=True) + + +@scenario("hermes_home_via_symlink") +def _(home, kb): + """HERMES_HOME is a symlink to the real dir. _INITIALIZED_PATHS + uses Path.resolve() — two different symlink names pointing at the + same dir should NOT double-init.""" + real = tempfile.mkdtemp(prefix="hermes_real_") + link1 = real + "_link1" + link2 = real + "_link2" + os.symlink(real, link1) + os.symlink(real, link2) + try: + os.environ["HERMES_HOME"] = link1 + os.environ["HOME"] = link1 + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn1 = kb.connect() + kb.create_task(conn1, title="t1", assignee="w") + conn1.close() + + # Switch to link2 pointing at the same dir + os.environ["HERMES_HOME"] = link2 + os.environ["HOME"] = link2 + conn2 = kb.connect() + # Should see the task we created via link1 + all_tasks = kb.list_tasks(conn2) + assert len(all_tasks) == 1, ( + f"symlinks to same dir should share DB, got {len(all_tasks)} tasks" + ) + conn2.close() + print(" symlinks to same HERMES_HOME share DB correctly") + finally: + for p in (link1, link2): + try: + os.remove(p) + except OSError: + pass + shutil.rmtree(real, ignore_errors=True) + + +# ============================================================================= +# SCALE EXTREMES +# ============================================================================= + +@scenario("huge_run_count_on_one_task") +def _(home, kb): + """1000 reclaim cycles on a single task → 1000 run rows. Verify + list_runs still performs, and build_worker_context isn't quadratic.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="retry-heavy", assignee="w") + # Force reclaims by manually closing runs + for i in range(1000): + kb.claim_task(conn, tid) + # Force close the run directly so we can make another claim + rid = kb.latest_run(conn, tid).id + kb._end_run(conn, tid, outcome="reclaimed", summary=f"attempt {i}") + conn.execute( + "UPDATE tasks SET status='ready', claim_lock=NULL, " + "claim_expires=NULL WHERE id=?", (tid,), + ) + conn.commit() + runs = kb.list_runs(conn, tid) + assert len(runs) == 1000, f"expected 1000 runs, got {len(runs)}" + # build_worker_context should NOT take forever + t0 = time.monotonic() + ctx = kb.build_worker_context(conn, tid) + elapsed = (time.monotonic() - t0) * 1000 + # The "Prior attempts" section renders ALL closed runs. + # For 1000 runs this could produce a massive string. + # Fair question: is this bounded? Let's measure. + print(f" 1000 runs → list_runs OK; build_worker_context = {elapsed:.0f}ms, {len(ctx)} chars") + if len(ctx) > 200_000: + print(f" ⚠ build_worker_context unbounded on retry-heavy tasks " + f"({len(ctx)} chars) — worker context will be huge") + finally: + conn.close() + + +@scenario("hundred_tenants") +def _(home, kb): + """100 distinct tenants with 50 tasks each. board_stats + list_tasks + should still return quickly.""" + kb.init_db() + conn = kb.connect() + try: + for t in range(100): + for i in range(50): + kb.create_task( + conn, title=f"tenant-{t}-task-{i}", + tenant=f"tenant_{t:03d}", + assignee="w", + ) + t0 = time.monotonic() + stats = kb.board_stats(conn) + el_stats = (time.monotonic() - t0) * 1000 + t0 = time.monotonic() + tasks = kb.list_tasks(conn) + el_list = (time.monotonic() - t0) * 1000 + print(f" 5000 tasks / 100 tenants: stats={el_stats:.0f}ms, list={el_list:.0f}ms") + assert len(tasks) == 5000 + finally: + conn.close() + + +# ============================================================================= +# CONCURRENCY CORNERS +# ============================================================================= + +def _idempotency_race_worker(hermes_home: str, key: str, result_file: str, + barrier_path: str) -> None: + """Subprocess body for the idempotency race test.""" + os.environ["HERMES_HOME"] = hermes_home + os.environ["HOME"] = hermes_home + sys.path.insert(0, str(WT)) + from hermes_cli import kanban_db as kb + + # Spin until the barrier file exists (crude sync across processes) + while not os.path.exists(barrier_path): + time.sleep(0.001) + + conn = kb.connect() + try: + tid = kb.create_task( + conn, title=f"race pid={os.getpid()}", + assignee="w", idempotency_key=key, + ) + finally: + conn.close() + with open(result_file, "w") as f: + f.write(tid) + + +@scenario("idempotency_key_race") +def _(home, kb): + """Two processes concurrently call create_task with the same + idempotency_key — should both get back the SAME task id, not two + different ones.""" + kb.init_db() + # Spawn workers, then drop the barrier so they fire ~simultaneously. + key = "race-key-12345" + barrier = os.path.join(home, "barrier") + results = [os.path.join(home, f"res_{i}") for i in range(2)] + ctx = mp.get_context("spawn") + procs = [ + ctx.Process( + target=_idempotency_race_worker, + args=(home, key, results[i], barrier), + ) + for i in range(2) + ] + for p in procs: + p.start() + time.sleep(0.1) # let them hit the spin + # Fire the gun + with open(barrier, "w") as f: + f.write("go") + for p in procs: + p.join(timeout=10) + + tids = [open(r).read().strip() for r in results if os.path.exists(r)] + assert len(tids) == 2, f"only {len(tids)} workers finished" + assert tids[0] == tids[1], ( + f"idempotency key race produced two different tasks: {tids}" + ) + # Also verify there's only ONE row in the DB + conn = kb.connect() + try: + count = conn.execute( + "SELECT COUNT(*) FROM tasks WHERE idempotency_key = ?", + (key,), + ).fetchone()[0] + assert count == 1, f"expected 1 task with key, got {count}" + finally: + conn.close() + print(f" idempotency race: both workers got {tids[0]}") + + + +# ============================================================================= +# MORE EDGE CASES +# ============================================================================= + +@scenario("assignee_with_special_chars") +def _(home, kb): + """Profile names can contain @-signs, dots, hyphens. Some users + might try nonsense. Kernel shouldn't break on any of them.""" + kb.init_db() + conn = kb.connect() + try: + assignees = [ + "normal-dev", + "dev.with.dots", + "backend@v2", + "日本語-dev", + "🤖-bot", + "x" * 200, # very long + "", # empty string + ] + for a in assignees: + tid = kb.create_task(conn, title=f"for {a!r}", assignee=a or None) + back = kb.get_task(conn, tid) + # Empty string is coerced to None by kernel, or stored verbatim? + if a: + assert back.assignee == a, f"assignee round-trip: {a!r} → {back.assignee!r}" + print(f" {len(assignees)} weird assignee names round-tripped") + finally: + conn.close() + + +@scenario("completed_task_reclaim_attempt") +def _(home, kb): + """A task in 'done' should NOT be reclaimable — reclaim/claim paths + must refuse.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="terminal", assignee="w") + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary="all done") + # Try to re-claim a done task + claimed = kb.claim_task(conn, tid) + assert claimed is None, "done task should not be claimable" + # Try to complete it again + ok = kb.complete_task(conn, tid, summary="oops twice") + assert ok is False, "completing an already-done task should refuse" + # Try to block it + ok = kb.block_task(conn, tid, reason="trying") + assert ok is False, "blocking a done task should refuse" + print(" done task correctly resists re-claim/complete/block") + finally: + conn.close() + + +@scenario("archived_task_resurrection_attempt") +def _(home, kb): + """An archived task should be invisible to normal ops.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="archive-me", assignee="w") + kb.archive_task(conn, tid) + # Archived task shouldn't appear in default list + tasks = kb.list_tasks(conn) + assert all(t.id != tid for t in tasks), "archived task leaked into default list" + # But it should still exist in the DB + row = conn.execute("SELECT status FROM tasks WHERE id = ?", (tid,)).fetchone() + assert row is not None + assert row["status"] == "archived" + # Trying to claim an archived task: should refuse + claimed = kb.claim_task(conn, tid) + assert claimed is None, "archived task should not be claimable" + # Archived can be un-archived via direct status? No API for that intentionally + # (archive is meant to be terminal). Verify this. + # complete/block/unblock on archived should all refuse. + assert kb.complete_task(conn, tid) is False + assert kb.block_task(conn, tid, reason="no") is False + assert kb.unblock_task(conn, tid) is False + print(" archived task cannot be resurrected via normal APIs") + finally: + conn.close() + + +@scenario("unassigned_task_never_claims") +def _(home, kb): + """Task without an assignee should never be claimed by dispatch_once, + even though its status might be 'ready' if it has no parents.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="orphan", assignee=None) + assert kb.get_task(conn, tid).status == "ready" + result = kb.dispatch_once(conn, spawn_fn=lambda *_: 42) + assert tid in result.skipped_unassigned + assert len(result.spawned) == 0 + # Task should still be ready, untouched + assert kb.get_task(conn, tid).status == "ready" + print(" unassigned ready task correctly skipped by dispatcher") + finally: + conn.close() + + +@scenario("comment_storm") +def _(home, kb): + """1000 comments on a single task — build_worker_context should still + be reasonable.""" + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="chatty", assignee="w") + for i in range(1000): + kb.add_comment(conn, tid, author=f"user{i % 5}", body=f"comment number {i}") + comments = kb.list_comments(conn, tid) + assert len(comments) == 1000 + t0 = time.monotonic() + ctx = kb.build_worker_context(conn, tid) + elapsed = (time.monotonic() - t0) * 1000 + print(f" 1000 comments: list in {elapsed:.0f}ms, context size = {len(ctx)} chars") + if len(ctx) > 200_000: + print(f" ⚠ comment thread unbounded in worker context") + finally: + conn.close() + + +@scenario("empty_string_fields") +def _(home, kb): + """Empty title should be rejected (we already do this). Empty body, + empty summary, etc. should be accepted.""" + kb.init_db() + conn = kb.connect() + try: + # Empty title → reject + try: + kb.create_task(conn, title="", assignee="w") + raise AssertionError("empty title should have been rejected") + except ValueError: + pass + # Whitespace-only title → reject + try: + kb.create_task(conn, title=" \t\n ", assignee="w") + raise AssertionError("whitespace-only title should have been rejected") + except ValueError: + pass + # Empty body → accept (legitimate: just title says it all) + tid = kb.create_task(conn, title="empty body ok", body="", assignee="w") + assert kb.get_task(conn, tid).body in ("", None) + # Empty summary on complete → accept + kb.claim_task(conn, tid) + kb.complete_task(conn, tid, summary="") + run = kb.latest_run(conn, tid) + # Empty summary falls back to result; both empty → None on run + print(f" empty body accepted, empty-title rejected") + finally: + conn.close() + + +@scenario("tenant_with_newlines") +def _(home, kb): + """Someone pastes a multi-line string into --tenant. Kernel should + store what it gets — but queries filtering by tenant should still + work against the raw value.""" + kb.init_db() + conn = kb.connect() + try: + weird_tenant = "line1\nline2\tindented" + tid = kb.create_task(conn, title="weird tenant", assignee="w", tenant=weird_tenant) + back = kb.get_task(conn, tid) + assert back.tenant == weird_tenant + # board_stats groups by tenant — verify it doesn't fall over + stats = kb.board_stats(conn) + print(f" multiline tenant stored and stats still work") + finally: + conn.close() + + +@scenario("parent_in_different_status_states") +def _(home, kb): + """recompute_ready promotes a todo child only if ALL parents are + in 'done'. Verify against parents in every non-done state.""" + kb.init_db() + conn = kb.connect() + try: + # Create one parent in each possible non-done state + p_ready = kb.create_task(conn, title="p-ready", assignee="w") + p_running = kb.create_task(conn, title="p-running", assignee="w") + kb.claim_task(conn, p_running) + p_blocked = kb.create_task(conn, title="p-blocked", assignee="w") + kb.block_task(conn, p_blocked, reason="stuck") + p_triage = kb.create_task(conn, title="p-triage", assignee="w", triage=True) + p_archived = kb.create_task(conn, title="p-archived", assignee="w") + kb.archive_task(conn, p_archived) + p_done = kb.create_task(conn, title="p-done", assignee="w") + kb.claim_task(conn, p_done) + kb.complete_task(conn, p_done) + + # Child with just one parent, cycle it through each state + for parent, expected in [ + (p_ready, "todo"), # parent not done → child stays todo + (p_running, "todo"), + (p_blocked, "todo"), + (p_triage, "todo"), + (p_archived, "todo"), # archived != done! + (p_done, "ready"), # only done parent unblocks child + ]: + child = kb.create_task( + conn, title=f"child-of-{parent}", assignee="w", parents=[parent], + ) + kb.recompute_ready(conn) + actual = kb.get_task(conn, child).status + assert actual == expected, ( + f"child of {parent} ({kb.get_task(conn, parent).status}): " + f"expected {expected}, got {actual}" + ) + print(" child promotion correctly gated on parent.status == 'done'") + finally: + conn.close() + + +@scenario("dashboard_rest_with_weird_inputs") +def _(home, kb): + """FastAPI TestClient POST /tasks with atypical JSON bodies.""" + kb.init_db() + # Set a session token so the ws check doesnt bomb on import + try: + from hermes_cli import web_server as ws # noqa + except Exception: + pass + + from fastapi import FastAPI + from fastapi.testclient import TestClient + from plugins.kanban.dashboard.plugin_api import router as kanban_router + app = FastAPI() + app.include_router(kanban_router, prefix="/api/plugins/kanban") + client = TestClient(app) + + # Empty title + r = client.post("/api/plugins/kanban/tasks", json={"title": ""}) + assert r.status_code in (400, 422), f"empty title should 4xx, got {r.status_code}" + + # Title only + r = client.post("/api/plugins/kanban/tasks", json={"title": "x"}) + assert r.status_code == 200, r.text + + # Huge title + r = client.post("/api/plugins/kanban/tasks", json={"title": "x" * 10000}) + # Should succeed — kernel doesn't cap title length + assert r.status_code == 200 + + # Unicode + emoji + r = client.post("/api/plugins/kanban/tasks", json={ + "title": "📋 deploy 🚀 to 生产", + "body": "日本語 body", + "assignee": "deploy-bot", + }) + assert r.status_code == 200 + tid = r.json()["task"]["id"] + assert r.json()["task"]["title"] == "📋 deploy 🚀 to 生产" + + # Invalid JSON schema — unknown field, pydantic should either ignore or 422 + r = client.post("/api/plugins/kanban/tasks", json={ + "title": "fine", "nonexistent_field": "whatever", + }) + assert r.status_code in (200, 422) + + # Priority as non-int + r = client.post("/api/plugins/kanban/tasks", json={"title": "prio", "priority": "high"}) + assert r.status_code == 422, f"string priority should 422, got {r.status_code}" + + # PATCH with empty body (no changes requested) + r = client.patch(f"/api/plugins/kanban/tasks/{tid}", json={}) + # Accept either success-no-op or 400 + assert r.status_code in (200, 400) + print(" dashboard REST handles weird inputs correctly") + +# ============================================================================= +# RUN ALL +# ============================================================================= + +def main(): + print(f"Running {len(_REGISTERED)} atypical-scenario tests...") + for fn in _REGISTERED: + fn() + + print() + print("=" * 60) + print("SUMMARY") + print("=" * 60) + print(f" Ran: {len(_REGISTERED)}") + print(f" Failures: {len(FAILURES)}") + print(f" Skips: {len(SKIPS)}") + if FAILURES: + print() + for f in FAILURES: + print(f" ✗ {f}") + sys.exit(1) + else: + print("\n✔ ALL ATYPICAL SCENARIOS HANDLED CORRECTLY") + + +if __name__ == "__main__": + main() diff --git a/tests/stress/test_benchmarks.py b/tests/stress/test_benchmarks.py new file mode 100644 index 00000000000..e092ed0fcc7 --- /dev/null +++ b/tests/stress/test_benchmarks.py @@ -0,0 +1,221 @@ +"""Scale benchmarks for the Kanban kernel. + +Measures: + - dispatch_once latency at 100, 1000, 10000 tasks + - recompute_ready latency at 100, 1000, 10000 todo tasks with wide parent graphs + - build_worker_context latency with 1, 10, 50 parent dependencies + - board list/stats query latency + - task_runs query latency at scale + +Results printed as a table. Saved to JSON for regression-diffing in CI +or future reviews. Not a pass/fail test — records numbers so we know +when a change regresses latency by 10x and can decide whether to care. +""" + +import json +import os +import random +import sys +import tempfile +import time +from pathlib import Path + +WT = str(Path(__file__).resolve().parents[2]) + + +def bench(label, fn, iterations=5): + """Time fn over `iterations` runs, return (min, median, max) in ms.""" + times = [] + for _ in range(iterations): + t0 = time.perf_counter() + fn() + times.append((time.perf_counter() - t0) * 1000) + times.sort() + mn = times[0] + md = times[len(times) // 2] + mx = times[-1] + return {"label": label, "iter": iterations, "min_ms": mn, "median_ms": md, "max_ms": mx} + + +def seed_tasks(conn, kb, n, assignee="bench-worker", with_parents=False): + """Seed n tasks. Optionally give each task 5 parents.""" + ids = [] + for i in range(n): + if with_parents and i >= 5: + parents = random.sample(ids[:i], 5) + else: + parents = () + tid = kb.create_task( + conn, title=f"bench {i}", assignee=assignee, + tenant="bench", parents=parents, + ) + ids.append(tid) + return ids + + +def main(): + home = tempfile.mkdtemp(prefix="hermes_bench_") + os.environ["HERMES_HOME"] = home + os.environ["HOME"] = home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + kb.init_db() + + results = [] + + # ============ dispatch_once latency ============ + for n in [100, 1000, 10000]: + print(f"\n== dispatch_once @ {n} tasks ==") + # Fresh DB each time so we're not measuring cumulative effects + import shutil + shutil.rmtree(home, ignore_errors=True) + os.makedirs(home) + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + seed_tasks(conn, kb, n, assignee=None) # no assignee → won't spawn + r = bench( + f"dispatch_once (n={n}, no spawn)", + lambda: kb.dispatch_once(conn, spawn_fn=lambda *_: None), + iterations=5, + ) + print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms") + r["n"] = n + results.append(r) + conn.close() + + # ============ recompute_ready at scale with parent graphs ============ + for n in [100, 1000, 10000]: + print(f"\n== recompute_ready @ {n} tasks (5 parents each) ==") + shutil.rmtree(home, ignore_errors=True) + os.makedirs(home) + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + ids = seed_tasks(conn, kb, n, assignee=None, with_parents=True) + # Complete the first 100 so some todo tasks might get promoted + for tid in ids[:min(100, n // 10)]: + kb.complete_task(conn, tid, result="bench") + r = bench( + f"recompute_ready (n={n}, with parents)", + lambda: kb.recompute_ready(conn), + iterations=5, + ) + print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms") + r["n"] = n + results.append(r) + conn.close() + + # ============ build_worker_context with N parents ============ + for parent_count in [1, 10, 50]: + print(f"\n== build_worker_context with {parent_count} parents ==") + shutil.rmtree(home, ignore_errors=True) + os.makedirs(home) + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + # Create parents, complete them with summaries+metadata + parent_ids = [] + for i in range(parent_count): + pid = kb.create_task(conn, title=f"parent {i}", assignee="p") + kb.claim_task(conn, pid) + kb.complete_task( + conn, pid, + summary=f"parent {i} result that is longer than a single token " + f"so we actually measure the IO", + metadata={"files": [f"file_{j}.py" for j in range(5)], "i": i}, + ) + parent_ids.append(pid) + child_id = kb.create_task( + conn, title="child", assignee="c", parents=parent_ids, + ) + r = bench( + f"build_worker_context (parents={parent_count})", + lambda: kb.build_worker_context(conn, child_id), + iterations=10, + ) + print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms") + r["parent_count"] = parent_count + results.append(r) + conn.close() + + # ============ list_tasks at scale ============ + for n in [100, 1000, 10000]: + print(f"\n== list_tasks @ {n} ==") + shutil.rmtree(home, ignore_errors=True) + os.makedirs(home) + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + seed_tasks(conn, kb, n) + r = bench( + f"list_tasks (n={n})", + lambda: kb.list_tasks(conn), + iterations=5, + ) + print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms") + r["n"] = n + results.append(r) + conn.close() + + # ============ board_stats at scale ============ + for n in [100, 1000, 10000]: + print(f"\n== board_stats @ {n} ==") + shutil.rmtree(home, ignore_errors=True) + os.makedirs(home) + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + seed_tasks(conn, kb, n) + r = bench( + f"board_stats (n={n})", + lambda: kb.board_stats(conn), + iterations=5, + ) + print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms") + r["n"] = n + results.append(r) + conn.close() + + # ============ list_runs at scale ============ + for n in [100, 1000]: + print(f"\n== list_runs for task with {n} attempts ==") + shutil.rmtree(home, ignore_errors=True) + os.makedirs(home) + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + tid = kb.create_task(conn, title="x", assignee="w") + # Create N attempts via claim/release + for i in range(n): + kb.claim_task(conn, tid, ttl_seconds=0) + kb.release_stale_claims(conn) + r = bench( + f"list_runs (runs={n})", + lambda: kb.list_runs(conn, tid), + iterations=10, + ) + print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms") + r["run_count"] = n + results.append(r) + conn.close() + + # ============ SUMMARY TABLE ============ + print() + print("=" * 60) + print("SUMMARY") + print("=" * 60) + print(f"{'Benchmark':<50} {'min':>8} {'median':>8} {'max':>8}") + for r in results: + print(f"{r['label']:<50} {r['min_ms']:>7.1f}ms {r['median_ms']:>7.1f}ms {r['max_ms']:>7.1f}ms") + + # Save for future diffing. + out_path = "/tmp/kanban_bench_results.json" + with open(out_path, "w") as f: + json.dump(results, f, indent=2) + print(f"\nResults saved to {out_path}") + + +if __name__ == "__main__": + main() diff --git a/tests/stress/test_concurrency.py b/tests/stress/test_concurrency.py new file mode 100644 index 00000000000..5cbe455cb02 --- /dev/null +++ b/tests/stress/test_concurrency.py @@ -0,0 +1,302 @@ +"""Multi-process concurrency stress test for the Kanban kernel. + +5 worker processes race for claims on a shared DB with 100 tasks. Each +worker loops: claim -> simulate work -> complete. Asserts the invariants +that make the system worth building: + + - No task claimed by two workers simultaneously + - No task completed twice + - Every claim produces exactly one run row + - Every completion closes exactly one run row + - Zero SQLite locking errors that escape the retry layer + - Total run count == total claim events == total completed events + +This test is the primary justification for WAL + CAS-based claim. If it +passes, the architecture holds. If it fails, we have a real bug to fix +before anyone runs this in anger. +""" + +import json +import multiprocessing as mp +import os +import random +import sqlite3 +import subprocess +import sys +import tempfile +import time +from pathlib import Path + + +NUM_WORKERS = 5 +NUM_TASKS = 100 +WORKER_TIMEOUT_S = 60 +WT = str(Path(__file__).resolve().parents[2]) + + +def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None: + """One worker's inner loop. Runs in a fresh Python process. + + Tries to claim a ready task, marks it done with a per-worker summary, + repeats until the ready pool is empty. Records every claim + complete + into its own JSON result file for later aggregation. + """ + os.environ["HERMES_HOME"] = hermes_home + os.environ["HOME"] = hermes_home + sys.path.insert(0, WT) + + from hermes_cli import kanban_db as kb + + events = [] + empty_polls = 0 + start = time.monotonic() + + while time.monotonic() - start < WORKER_TIMEOUT_S: + conn = kb.connect() + try: + # Find any ready task (non-deterministic order intentional — we + # want workers to race on popular assignees). + row = conn.execute( + "SELECT id FROM tasks WHERE status = 'ready' " + "AND claim_lock IS NULL LIMIT 1" + ).fetchone() + if row is None: + empty_polls += 1 + if empty_polls > 20: + break # queue empty long enough, stop + time.sleep(0.01) + continue + empty_polls = 0 + + tid = row["id"] + try: + claimed = kb.claim_task( + conn, tid, claimer=f"worker-{worker_id}", + ) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err_on_claim", "task": tid, "err": str(e)}) + continue + if claimed is None: + # Someone else beat us — expected contention, not an error. + events.append({"kind": "lost_claim_race", "task": tid}) + continue + + run = kb.latest_run(conn, tid) + events.append({ + "kind": "claimed", + "task": tid, + "worker": worker_id, + "run_id": run.id, + "t": time.monotonic() - start, + }) + + # Simulate short, variable work + time.sleep(random.uniform(0.001, 0.05)) + + try: + kb.complete_task( + conn, tid, + result=f"done by worker-{worker_id}", + summary=f"worker-{worker_id} finished task {tid}", + metadata={"worker_id": worker_id, "run_id": run.id}, + ) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err_on_complete", "task": tid, "err": str(e)}) + continue + events.append({ + "kind": "completed", + "task": tid, + "worker": worker_id, + "run_id": run.id, + "t": time.monotonic() - start, + }) + finally: + conn.close() + + with open(result_file, "w") as f: + json.dump(events, f) + + +def main(): + home = tempfile.mkdtemp(prefix="hermes_concurrency_") + print(f"HERMES_HOME = {home}") + + # Seed. + os.environ["HERMES_HOME"] = home + os.environ["HOME"] = home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + kb.init_db() + conn = kb.connect() + tids = [] + for i in range(NUM_TASKS): + tid = kb.create_task( + conn, title=f"task #{i}", assignee="shared", + tenant="concurrency-test", + ) + tids.append(tid) + conn.close() + print(f"Seeded {NUM_TASKS} tasks.") + + # Spawn workers. + ctx = mp.get_context("spawn") + result_files = [f"/tmp/concurrency_worker_{i}.json" for i in range(NUM_WORKERS)] + procs = [] + start = time.monotonic() + for i in range(NUM_WORKERS): + p = ctx.Process(target=worker_loop, args=(i, home, result_files[i])) + p.start() + procs.append(p) + + for p in procs: + p.join(timeout=WORKER_TIMEOUT_S + 30) + if p.is_alive(): + p.terminate() + p.join() + + elapsed = time.monotonic() - start + print(f"All workers done in {elapsed:.1f}s") + + # Aggregate worker events. + all_events = [] + for i, f in enumerate(result_files): + if not os.path.isfile(f): + print(f" WORKER {i} produced no result file — died?") + continue + with open(f) as fh: + events = json.load(fh) + all_events.extend(events) + + # ============ INVARIANT CHECKS ============ + print() + print("=" * 60) + print("INVARIANT CHECKS") + print("=" * 60) + + failures = [] + + # Check 1: no task claimed by two different workers + claims_by_task = {} + for e in all_events: + if e["kind"] == "claimed": + if e["task"] in claims_by_task: + prev = claims_by_task[e["task"]] + if prev["worker"] != e["worker"]: + failures.append( + f"DOUBLE CLAIM: task {e['task']} claimed by " + f"worker {prev['worker']} AND worker {e['worker']}" + ) + claims_by_task[e["task"]] = e + + # Check 2: every completion has a matching claim from the same worker + for e in all_events: + if e["kind"] == "completed": + prev_claim = claims_by_task.get(e["task"]) + if prev_claim is None: + failures.append(f"COMPLETION WITHOUT CLAIM: task {e['task']}") + elif prev_claim["worker"] != e["worker"]: + failures.append( + f"WORKER MISMATCH: task {e['task']} claimed by " + f"{prev_claim['worker']} but completed by {e['worker']}" + ) + + # Check 3: DB state — every task should be in 'done', no dangling claims + conn = kb.connect() + try: + bad_status = conn.execute( + "SELECT id, status, claim_lock, current_run_id FROM tasks " + "WHERE status != 'done' OR claim_lock IS NOT NULL " + "OR current_run_id IS NOT NULL" + ).fetchall() + if bad_status: + for row in bad_status: + failures.append( + f"BAD FINAL STATE: task {row['id']} status={row['status']} " + f"claim_lock={row['claim_lock']} current_run_id={row['current_run_id']}" + ) + + # Check 4: exactly one run per task, all closed as completed + bad_runs = conn.execute( + "SELECT task_id, COUNT(*) as n FROM task_runs " + "GROUP BY task_id HAVING n != 1" + ).fetchall() + if bad_runs: + for row in bad_runs: + failures.append( + f"WRONG RUN COUNT: task {row['task_id']} has {row['n']} runs (expected 1)" + ) + + open_runs = conn.execute( + "SELECT id, task_id FROM task_runs WHERE ended_at IS NULL" + ).fetchall() + for row in open_runs: + failures.append(f"OPEN RUN: run {row['id']} on task {row['task_id']}") + + wrong_outcomes = conn.execute( + "SELECT task_id, outcome FROM task_runs " + "WHERE outcome IS NULL OR outcome != 'completed'" + ).fetchall() + for row in wrong_outcomes: + failures.append( + f"WRONG OUTCOME: task {row['task_id']} run outcome={row['outcome']}" + ) + + # Check 5: event counts — exactly NUM_TASKS completed events + completed_events = conn.execute( + "SELECT COUNT(*) as n FROM task_events WHERE kind='completed'" + ).fetchone()["n"] + if completed_events != NUM_TASKS: + failures.append( + f"EVENT COUNT MISMATCH: {completed_events} completed events " + f"expected {NUM_TASKS}" + ) + + # Check 6: count SQLite errors that escaped retry + sqlite_errs = sum( + 1 for e in all_events if e["kind"].startswith("sqlite_err") + ) + if sqlite_errs > 0: + failures.append(f"UNRETRIED SQLITE ERRORS: {sqlite_errs}") + + finally: + conn.close() + + # ============ STATS ============ + print() + total_claims = sum(1 for e in all_events if e["kind"] == "claimed") + total_completes = sum(1 for e in all_events if e["kind"] == "completed") + total_lost_races = sum(1 for e in all_events if e["kind"] == "lost_claim_race") + + per_worker = {} + for e in all_events: + if e["kind"] == "completed": + per_worker.setdefault(e["worker"], 0) + per_worker[e["worker"]] += 1 + + print(f"Total claims: {total_claims}") + print(f"Total completes: {total_completes}") + print(f"Lost claim races: {total_lost_races} (expected contention; not a bug)") + print(f"Elapsed: {elapsed:.2f}s") + print(f"Throughput: {NUM_TASKS/elapsed:.1f} tasks/sec") + print(f"Per-worker completions:") + for w in sorted(per_worker.keys()): + print(f" worker-{w}: {per_worker[w]}") + + if failures: + print() + print("=" * 60) + print(f"FAILURES ({len(failures)}):") + print("=" * 60) + for f in failures[:20]: + print(f" {f}") + if len(failures) > 20: + print(f" ... and {len(failures) - 20} more") + sys.exit(1) + else: + print() + print("✔ ALL INVARIANTS HELD") + + +if __name__ == "__main__": + main() diff --git a/tests/stress/test_concurrency_mixed.py b/tests/stress/test_concurrency_mixed.py new file mode 100644 index 00000000000..8b6ef718667 --- /dev/null +++ b/tests/stress/test_concurrency_mixed.py @@ -0,0 +1,350 @@ +"""Harder concurrency stress: mixed operations + larger scale. + +Scales to 500 tasks, 10 workers, 60s runtime. Each worker randomly: + - claims + completes (70%) + - claims + blocks with a reason (15%) + - unblocks a random blocked task (10%) + - archives a random done task (5%) + +Adds a background "dispatcher" process that calls release_stale_claims +and detect_crashed_workers every 200ms, racing against the workers to +surface TTL + crash detection races. + +Pass criteria: runs invariant holds, no double-completions, no orphan +runs, no SQLite errors escape the retry layer. +""" + +import json +import multiprocessing as mp +import os +import random +import sqlite3 +import sys +import tempfile +import time +from pathlib import Path + +NUM_WORKERS = 10 +NUM_TASKS = 500 +RUN_DURATION_S = 30 +WT = str(Path(__file__).resolve().parents[2]) + + +def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None: + os.environ["HERMES_HOME"] = hermes_home + os.environ["HOME"] = hermes_home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + events = [] + start = time.monotonic() + idle_rounds = 0 + + while time.monotonic() - start < RUN_DURATION_S: + conn = kb.connect() + try: + op = random.random() + + if op < 0.10: + # Try to unblock a blocked task. + row = conn.execute( + "SELECT id FROM tasks WHERE status='blocked' " + "ORDER BY RANDOM() LIMIT 1" + ).fetchone() + if row: + try: + ok = kb.unblock_task(conn, row["id"]) + events.append({"kind": "unblocked" if ok else "unblock_noop", + "task": row["id"], "worker": worker_id}) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "unblock", + "task": row["id"], "err": str(e)[:100]}) + continue + + if op < 0.15: + # Try to archive a done task. + row = conn.execute( + "SELECT id FROM tasks WHERE status='done' " + "ORDER BY RANDOM() LIMIT 1" + ).fetchone() + if row: + try: + kb.archive_task(conn, row["id"]) + events.append({"kind": "archived", "task": row["id"], + "worker": worker_id}) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "archive", + "task": row["id"], "err": str(e)[:100]}) + continue + + # Default: claim + complete-or-block. + row = conn.execute( + "SELECT id FROM tasks WHERE status='ready' " + "AND claim_lock IS NULL LIMIT 1" + ).fetchone() + if row is None: + idle_rounds += 1 + if idle_rounds > 50: + break + time.sleep(0.02) + continue + idle_rounds = 0 + + tid = row["id"] + try: + claimed = kb.claim_task( + conn, tid, claimer=f"worker-{worker_id}", + ttl_seconds=5, # short TTL so reclaim races in + ) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "claim", + "task": tid, "err": str(e)[:100]}) + continue + if claimed is None: + events.append({"kind": "lost_claim_race", "task": tid}) + continue + + run = kb.latest_run(conn, tid) + events.append({"kind": "claimed", "task": tid, "worker": worker_id, + "run_id": run.id, "t": time.monotonic() - start}) + + time.sleep(random.uniform(0.005, 0.05)) + + # 20% of the time, block instead of complete + if random.random() < 0.20: + try: + kb.block_task(conn, tid, + reason=f"blocked by worker-{worker_id}") + events.append({"kind": "blocked", "task": tid, + "worker": worker_id, "run_id": run.id}) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "block", + "task": tid, "err": str(e)[:100]}) + else: + try: + kb.complete_task( + conn, tid, + result=f"done by worker-{worker_id}", + summary=f"worker-{worker_id} ok", + metadata={"worker_id": worker_id}, + ) + events.append({"kind": "completed", "task": tid, + "worker": worker_id, "run_id": run.id, + "t": time.monotonic() - start}) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "complete", + "task": tid, "err": str(e)[:100]}) + finally: + conn.close() + + with open(result_file, "w") as f: + json.dump(events, f) + + +def reclaimer_loop(hermes_home: str, result_file: str) -> None: + """Background dispatcher-like loop that reclaims stale tasks.""" + os.environ["HERMES_HOME"] = hermes_home + os.environ["HOME"] = hermes_home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + events = [] + start = time.monotonic() + while time.monotonic() - start < RUN_DURATION_S + 2: + conn = kb.connect() + try: + try: + reclaimed = kb.release_stale_claims(conn) + if reclaimed: + events.append({"kind": "reclaimed", "count": reclaimed, + "t": time.monotonic() - start}) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "reclaim", + "err": str(e)[:100]}) + finally: + conn.close() + time.sleep(0.2) + + with open(result_file, "w") as f: + json.dump(events, f) + + +def main(): + home = tempfile.mkdtemp(prefix="hermes_mixed_stress_") + print(f"HERMES_HOME = {home}") + + os.environ["HERMES_HOME"] = home + os.environ["HOME"] = home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + kb.init_db() + conn = kb.connect() + for i in range(NUM_TASKS): + kb.create_task( + conn, title=f"t#{i}", assignee="shared", tenant="mixed-stress", + ) + conn.close() + print(f"Seeded {NUM_TASKS} tasks, launching {NUM_WORKERS} workers + 1 reclaimer") + + ctx = mp.get_context("spawn") + worker_results = [f"/tmp/mixed_worker_{i}.json" for i in range(NUM_WORKERS)] + reclaim_result = "/tmp/mixed_reclaim.json" + + procs = [] + start = time.monotonic() + for i in range(NUM_WORKERS): + p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i])) + p.start() + procs.append(p) + r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result)) + r.start() + procs.append(r) + + for p in procs: + p.join(timeout=RUN_DURATION_S + 30) + if p.is_alive(): + p.terminate() + p.join() + + elapsed = time.monotonic() - start + print(f"Done in {elapsed:.1f}s") + + # Aggregate. + all_events = [] + for i, f in enumerate(worker_results): + if os.path.isfile(f): + with open(f) as fh: + all_events.extend(json.load(fh)) + else: + print(f" WORKER {i} died with no result file!") + reclaim_events = [] + if os.path.isfile(reclaim_result): + with open(reclaim_result) as fh: + reclaim_events = json.load(fh) + + # ============ INVARIANT CHECKS ============ + print() + print("=" * 60) + print("INVARIANT CHECKS") + print("=" * 60) + + failures = [] + + # Per-run attribution tracking + claims = [e for e in all_events if e["kind"] == "claimed"] + completions = [e for e in all_events if e["kind"] == "completed"] + blocks = [e for e in all_events if e["kind"] == "blocked"] + + # Every completion must have a matching claim on the same run_id AND + # the same worker (workers don't steal each other's runs). + claims_by_run = {c["run_id"]: c for c in claims} + for comp in completions: + claim = claims_by_run.get(comp["run_id"]) + if claim is None: + # It's possible this worker saw a reclaimed run from another worker + # — that's still a bug: the worker shouldn't be able to complete + # a run it didn't claim. But let me check if reclaim happened first. + failures.append( + f"COMPLETION WITHOUT CLAIM: task {comp['task']} run {comp['run_id']} " + f"by worker {comp['worker']}" + ) + elif claim["worker"] != comp["worker"]: + failures.append( + f"CROSS-WORKER COMPLETION: run {comp['run_id']} claimed by " + f"worker {claim['worker']} but completed by worker {comp['worker']}" + ) + + # SQLite errors that escaped the retry layer + sqlite_errs = [e for e in all_events if e["kind"] == "sqlite_err"] + if sqlite_errs: + for e in sqlite_errs[:5]: + failures.append(f"SQLITE ERROR: op={e.get('op')} err={e.get('err')}") + if len(sqlite_errs) > 5: + failures.append(f" ... and {len(sqlite_errs) - 5} more sqlite errs") + + # DB final state — every task should be in a clean terminal state. + conn = kb.connect() + try: + # Invariant: current_run_id NULL iff latest run is terminal + inconsistent = conn.execute(""" + SELECT t.id, t.status, t.current_run_id + FROM tasks t + WHERE t.current_run_id IS NOT NULL + AND EXISTS (SELECT 1 FROM task_runs r + WHERE r.id = t.current_run_id AND r.ended_at IS NOT NULL) + """).fetchall() + for row in inconsistent: + failures.append( + f"INVARIANT VIOLATION: task {row['id']} status={row['status']} " + f"has current_run_id={row['current_run_id']} but run is ended" + ) + + # Invariant: no orphan open runs + orphans = conn.execute(""" + SELECT r.id, r.task_id, r.status + FROM task_runs r + LEFT JOIN tasks t ON t.current_run_id = r.id + WHERE r.ended_at IS NULL AND t.id IS NULL + """).fetchall() + for row in orphans: + failures.append( + f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}" + ) + + # Counts — should roughly balance. + status_counts = dict( + conn.execute("SELECT status, COUNT(*) FROM tasks GROUP BY status").fetchall() + ) + run_outcome_counts = dict( + conn.execute( + "SELECT outcome, COUNT(*) FROM task_runs " + "WHERE ended_at IS NOT NULL GROUP BY outcome" + ).fetchall() + ) + active_runs = conn.execute( + "SELECT COUNT(*) FROM task_runs WHERE ended_at IS NULL" + ).fetchone()[0] + + finally: + conn.close() + + # ============ STATS ============ + print() + print(f"Workers: {NUM_WORKERS}, Tasks: {NUM_TASKS}") + print(f"Elapsed: {elapsed:.1f}s") + print(f"Events collected: {len(all_events)} (+{len(reclaim_events)} reclaim)") + print() + print("Operations:") + op_counts = {} + for e in all_events: + op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1 + for k in sorted(op_counts.keys()): + print(f" {k:<25} {op_counts[k]}") + + print() + print("Final task status:") + for s, n in sorted(status_counts.items()): + print(f" {s:<10} {n}") + print("Final run outcomes:") + for o, n in sorted(run_outcome_counts.items(), key=lambda x: (x[0] or '',)): + print(f" {o:<12} {n}") + print(f" active {active_runs}") + + if failures: + print() + print("=" * 60) + print(f"FAILURES ({len(failures)}):") + print("=" * 60) + for f in failures[:30]: + print(f" {f}") + if len(failures) > 30: + print(f" ... and {len(failures) - 30} more") + sys.exit(1) + else: + print() + print("✔ ALL INVARIANTS HELD UNDER MIXED STRESS") + + +if __name__ == "__main__": + main() diff --git a/tests/stress/test_concurrency_reclaim_race.py b/tests/stress/test_concurrency_reclaim_race.py new file mode 100644 index 00000000000..b468cd957ef --- /dev/null +++ b/tests/stress/test_concurrency_reclaim_race.py @@ -0,0 +1,241 @@ +"""Target the reclaim race specifically. + +Workers claim tasks with a 1s TTL but sleep 2s before completing. The +reclaimer runs every 200ms. Scenario: worker claims, reclaimer expires +the claim mid-work, worker tries to complete AFTER its run has been +reclaimed. + +Expected behavior (per design): the worker's complete_task should +either succeed on the reclaimed-and-re-claimed-by-another-worker case +(no, it should refuse — the claim was invalidated), OR succeed by +grace (we "forgive" a late complete from the original worker if no +one else picked it up). + +Actually looking at complete_task: it doesn't check claim_lock. It just +transitions from 'running' -> 'done'. So if the reclaimer moved it back +to 'ready', the late worker's complete_task will fail (CAS on +status='running' fails). This is the CORRECT behavior. + +Invariant being tested: race between worker.complete and +dispatcher.reclaim must not produce a double-run-close or other +inconsistency. +""" + +import json +import multiprocessing as mp +import os +import random +import sqlite3 +import sys +import tempfile +import time +from pathlib import Path + +NUM_WORKERS = 5 +NUM_TASKS = 50 +TTL = 1 +WORK_DURATION_S = 2.0 # longer than TTL => reclaimer wins +WT = str(Path(__file__).resolve().parents[2]) + + +def worker_loop(worker_id: int, hermes_home: str, result_file: str) -> None: + os.environ["HERMES_HOME"] = hermes_home + os.environ["HOME"] = hermes_home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + events = [] + start = time.monotonic() + idle = 0 + + while time.monotonic() - start < 40: + conn = kb.connect() + try: + row = conn.execute( + "SELECT id FROM tasks WHERE status='ready' AND claim_lock IS NULL LIMIT 1" + ).fetchone() + if row is None: + idle += 1 + if idle > 30: + break + time.sleep(0.05) + continue + idle = 0 + tid = row["id"] + try: + claimed = kb.claim_task(conn, tid, claimer=f"worker-{worker_id}", + ttl_seconds=TTL) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "claim", "err": str(e)[:100]}) + continue + if claimed is None: + events.append({"kind": "lost_claim", "task": tid}) + continue + run = kb.latest_run(conn, tid) + events.append({"kind": "claimed", "task": tid, "worker": worker_id, + "run_id": run.id}) + + # Sleep longer than TTL so reclaimer has a chance to intervene + time.sleep(WORK_DURATION_S + random.uniform(-0.3, 0.3)) + + try: + ok = kb.complete_task( + conn, tid, + result=f"by worker-{worker_id}", + summary=f"worker-{worker_id} finished", + ) + events.append({"kind": "complete_ok" if ok else "complete_refused", + "task": tid, "worker": worker_id, "run_id": run.id}) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "op": "complete", "err": str(e)[:100]}) + finally: + conn.close() + + with open(result_file, "w") as f: + json.dump(events, f) + + +def reclaimer_loop(hermes_home: str, result_file: str) -> None: + os.environ["HERMES_HOME"] = hermes_home + os.environ["HOME"] = hermes_home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + events = [] + start = time.monotonic() + while time.monotonic() - start < 42: + conn = kb.connect() + try: + try: + n = kb.release_stale_claims(conn) + if n: + events.append({"kind": "reclaimed", "count": n, + "t": time.monotonic() - start}) + except sqlite3.OperationalError as e: + events.append({"kind": "sqlite_err", "err": str(e)[:100]}) + finally: + conn.close() + time.sleep(0.2) + with open(result_file, "w") as f: + json.dump(events, f) + + +def main(): + home = tempfile.mkdtemp(prefix="hermes_reclaim_race_") + os.environ["HERMES_HOME"] = home + os.environ["HOME"] = home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + kb.init_db() + conn = kb.connect() + for i in range(NUM_TASKS): + kb.create_task(conn, title=f"t{i}", assignee="shared", + tenant="reclaim-race") + conn.close() + print(f"Seeded {NUM_TASKS} tasks. TTL={TTL}s, work_duration={WORK_DURATION_S}s") + print(f"(worker work > TTL guarantees reclaims)") + + ctx = mp.get_context("spawn") + worker_results = [f"/tmp/rc_worker_{i}.json" for i in range(NUM_WORKERS)] + reclaim_result = "/tmp/rc_reclaim.json" + procs = [] + for i in range(NUM_WORKERS): + p = ctx.Process(target=worker_loop, args=(i, home, worker_results[i])) + p.start() + procs.append(p) + r = ctx.Process(target=reclaimer_loop, args=(home, reclaim_result)) + r.start() + procs.append(r) + + for p in procs: + p.join(timeout=60) + if p.is_alive(): + p.terminate() + p.join() + + # Aggregate. + all_events = [] + for f in worker_results: + if os.path.isfile(f): + with open(f) as fh: + all_events.extend(json.load(fh)) + reclaim_events = [] + if os.path.isfile(reclaim_result): + with open(reclaim_result) as fh: + reclaim_events = json.load(fh) + + op_counts = {} + for e in all_events: + op_counts[e["kind"]] = op_counts.get(e["kind"], 0) + 1 + total_reclaims = sum(e.get("count", 0) for e in reclaim_events) + print(f"\nReclaimer fired {len(reclaim_events)} times, total tasks reclaimed: {total_reclaims}") + print("Worker events:") + for k in sorted(op_counts): + print(f" {k:<25} {op_counts[k]}") + + # Invariant checks + failures = [] + conn = kb.connect() + try: + # Any task stuck with current_run_id pointing at a closed run? + bad = conn.execute(""" + SELECT t.id, t.status, t.current_run_id, r.ended_at, r.outcome + FROM tasks t + JOIN task_runs r ON r.id = t.current_run_id + WHERE r.ended_at IS NOT NULL + """).fetchall() + for row in bad: + failures.append( + f"INVARIANT VIOLATION: task {row['id']} status={row['status']} " + f"current_run_id={row['current_run_id']} but run ended " + f"outcome={row['outcome']}" + ) + # Every run with NULL ended_at should still have the task pointing at it + orphans = conn.execute(""" + SELECT r.id, r.task_id + FROM task_runs r + LEFT JOIN tasks t ON t.current_run_id = r.id + WHERE r.ended_at IS NULL AND t.id IS NULL + """).fetchall() + for row in orphans: + failures.append(f"ORPHAN OPEN RUN: run {row['id']} on task {row['task_id']}") + # Event counts + claim_evts = conn.execute( + "SELECT COUNT(*) FROM task_events WHERE kind='claimed'").fetchone()[0] + reclaim_evts = conn.execute( + "SELECT COUNT(*) FROM task_events WHERE kind='reclaimed'").fetchone()[0] + comp_evts = conn.execute( + "SELECT COUNT(*) FROM task_events WHERE kind='completed'").fetchone()[0] + print(f"\nDB event counts: claimed={claim_evts} reclaimed={reclaim_evts} completed={comp_evts}") + # Every reclaimed run must have ended_at set + unended_reclaims = conn.execute( + "SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed' AND ended_at IS NULL" + ).fetchone()[0] + if unended_reclaims: + failures.append(f"UNENDED RECLAIMED RUNS: {unended_reclaims}") + # Count of completed runs + comp_runs = conn.execute( + "SELECT COUNT(*) FROM task_runs WHERE outcome='completed'" + ).fetchone()[0] + reclaim_runs = conn.execute( + "SELECT COUNT(*) FROM task_runs WHERE outcome='reclaimed'" + ).fetchone()[0] + print(f"DB run outcomes: completed={comp_runs} reclaimed={reclaim_runs}") + finally: + conn.close() + + if reclaim_runs == 0: + failures.append("NO RECLAIMS HAPPENED — test didn't stress what it was supposed to") + + if failures: + print(f"\nFAILURES ({len(failures)}):") + for f in failures[:20]: + print(f" {f}") + sys.exit(1) + else: + print("\n✔ RECLAIM RACE INVARIANTS HELD") + + +if __name__ == "__main__": + main() diff --git a/tests/stress/test_property_fuzzing.py b/tests/stress/test_property_fuzzing.py new file mode 100644 index 00000000000..b8facc62493 --- /dev/null +++ b/tests/stress/test_property_fuzzing.py @@ -0,0 +1,283 @@ +"""Randomized property testing for the Kanban kernel. + +Generates 1000 random operation sequences, each 20-50 ops, on small +task graphs. After each step, checks the full invariant set: + + I1. If tasks.current_run_id IS NOT NULL, the run MUST exist AND + ended_at MUST be NULL (we never point at a closed run). + I2. If a run has ended_at NULL, SOME task MUST have current_run_id + pointing at it (no orphan open runs). + I3. task.status in the valid set {triage, todo, ready, running, + blocked, done, archived}. + I4. task.claim_lock NULL iff status not in (running,). + I5. Every run has started_at <= ended_at (or ended_at is NULL). + I6. If outcome is set, ended_at must also be set. + I7. Events are strictly monotonic in (created_at, id). + I8. task_events.run_id references a task_runs.id that exists + (or is NULL). + I9. Parent completion invariant: if all parents are 'done', the + child cannot be in 'todo' status (recompute_ready should have + promoted it). This is called out in the comment on + recompute_ready; verify it holds after every random seq. + +Not using hypothesis the lib; just Python random for simplicity. +""" + +import os +import random +import sys +import tempfile +import time +from pathlib import Path + +WT = str(Path(__file__).resolve().parents[2]) +NUM_SEQUENCES = 500 +OPS_PER_SEQUENCE = 100 +TASK_POOL = 10 + +OPS = [ + "create", "create_child", "claim", "complete", "block", "unblock", + "archive", "heartbeat", "release_stale", "detect_crashed", + "recompute_ready", "reassign", +] + + +def assert_invariants(conn, kb, ops_log): + """Run all invariant checks; raise AssertionError with context on any.""" + failures = [] + + # I1: current_run_id → run exists and not ended + bad_ptr = conn.execute(""" + SELECT t.id, t.current_run_id, r.ended_at, r.outcome + FROM tasks t + LEFT JOIN task_runs r ON r.id = t.current_run_id + WHERE t.current_run_id IS NOT NULL + AND (r.id IS NULL OR r.ended_at IS NOT NULL) + """).fetchall() + for row in bad_ptr: + if row["ended_at"] is None and row["outcome"] is None: + detail = "missing" + else: + detail = f"closed ({row['outcome']})" + failures.append( + f"I1: task {row['id']} points at run {row['current_run_id']} " + f"which is {detail}" + ) + + # I2: open run → some task points at it + orphans = conn.execute(""" + SELECT r.id, r.task_id + FROM task_runs r + WHERE r.ended_at IS NULL + AND NOT EXISTS (SELECT 1 FROM tasks t WHERE t.current_run_id = r.id) + """).fetchall() + for row in orphans: + failures.append(f"I2: open run {row['id']} on task {row['task_id']} has no pointer") + + # I3: valid statuses + valid = {"triage", "todo", "ready", "running", "blocked", "done", "archived"} + bad_status = conn.execute("SELECT id, status FROM tasks").fetchall() + for row in bad_status: + if row["status"] not in valid: + failures.append(f"I3: task {row['id']} has invalid status {row['status']!r}") + + # I4: claim_lock set only when running + bad_lock = conn.execute(""" + SELECT id, status, claim_lock FROM tasks + WHERE (status != 'running' AND claim_lock IS NOT NULL) + """).fetchall() + for row in bad_lock: + failures.append( + f"I4: task {row['id']} status={row['status']} but claim_lock={row['claim_lock']!r}" + ) + + # I5: run started_at <= ended_at + bad_times = conn.execute(""" + SELECT id, started_at, ended_at FROM task_runs + WHERE ended_at IS NOT NULL AND started_at > ended_at + """).fetchall() + for row in bad_times: + failures.append( + f"I5: run {row['id']} started_at={row['started_at']} > ended_at={row['ended_at']}" + ) + + # I6: outcome set → ended_at set + bad_outcome = conn.execute(""" + SELECT id, outcome, ended_at FROM task_runs + WHERE outcome IS NOT NULL AND ended_at IS NULL + """).fetchall() + for row in bad_outcome: + failures.append(f"I6: run {row['id']} outcome={row['outcome']} but ended_at NULL") + + # I7: events monotonic in id (always true for autoincrement) + # Skip — autoincrement guarantees it. + + # I8: event.run_id references existing run + bad_ev_fk = conn.execute(""" + SELECT e.id, e.run_id FROM task_events e + LEFT JOIN task_runs r ON r.id = e.run_id + WHERE e.run_id IS NOT NULL AND r.id IS NULL + """).fetchall() + for row in bad_ev_fk: + failures.append(f"I8: event {row['id']} references missing run {row['run_id']}") + + # I9: if all parents done → child not in todo + # (Only applies to children with at least one parent) + orphaned_todo = conn.execute(""" + SELECT c.id AS child_id, + COUNT(*) AS n_parents, + SUM(CASE WHEN p.status = 'done' THEN 1 ELSE 0 END) AS done_parents + FROM tasks c + JOIN task_links l ON l.child_id = c.id + JOIN tasks p ON p.id = l.parent_id + WHERE c.status = 'todo' + GROUP BY c.id + HAVING n_parents > 0 AND n_parents = done_parents + """).fetchall() + for row in orphaned_todo: + failures.append( + f"I9: task {row['child_id']} is todo but all {row['n_parents']} parents are done" + ) + + if failures: + print(f"\n!!! INVARIANT VIOLATION after {len(ops_log)} ops:") + for f in failures[:10]: + print(f" {f}") + if len(failures) > 10: + print(f" ... and {len(failures) - 10} more") + print("\nLast 10 ops:") + for op in ops_log[-10:]: + print(f" {op}") + return False + return True + + +def random_op(rng, conn, kb, task_pool): + op = rng.choice(OPS) + + if op == "create": + tid = kb.create_task( + conn, + title=f"rand {rng.randint(0, 1000)}", + assignee=rng.choice(["w1", "w2", "w3", None]), + ) + task_pool.append(tid) + return {"op": "create", "tid": tid} + + if op == "create_child" and task_pool: + parent = rng.choice(task_pool) + tid = kb.create_task( + conn, title=f"child of {parent}", + assignee=rng.choice(["w1", "w2", "w3", None]), + parents=[parent], + ) + task_pool.append(tid) + return {"op": "create_child", "tid": tid, "parent": parent} + + if not task_pool: + return None + + tid = rng.choice(task_pool) + task = kb.get_task(conn, tid) + if task is None: + task_pool.remove(tid) + return None + + if op == "claim": + claimed = kb.claim_task(conn, tid, ttl_seconds=rng.choice([1, 3, 10])) + return {"op": "claim", "tid": tid, "ok": claimed is not None} + if op == "complete": + summary = rng.choice([None, f"done via op {rng.randint(0, 1000)}"]) + ok = kb.complete_task(conn, tid, summary=summary) + return {"op": "complete", "tid": tid, "ok": ok} + if op == "block": + reason = rng.choice([None, "rand block"]) + ok = kb.block_task(conn, tid, reason=reason) + return {"op": "block", "tid": tid, "ok": ok} + if op == "unblock": + ok = kb.unblock_task(conn, tid) + return {"op": "unblock", "tid": tid, "ok": ok} + if op == "archive": + ok = kb.archive_task(conn, tid) + if ok: + task_pool.remove(tid) + return {"op": "archive", "tid": tid, "ok": ok} + if op == "heartbeat": + ok = kb.heartbeat_worker(conn, tid) + return {"op": "heartbeat", "tid": tid, "ok": ok} + if op == "release_stale": + n = kb.release_stale_claims(conn) + return {"op": "release_stale", "n": n} + if op == "detect_crashed": + # Force-kill a fake PID first so there's something to detect + crashed = kb.detect_crashed_workers(conn) + return {"op": "detect_crashed", "n": len(crashed)} + if op == "recompute_ready": + n = kb.recompute_ready(conn) + return {"op": "recompute_ready", "promoted": n} + if op == "reassign": + # Reassignment isn't a direct API; simulate via assign_task + new_a = rng.choice(["w1", "w2", "w3", None]) + try: + kb.assign_task(conn, tid, new_a) + return {"op": "reassign", "tid": tid, "to": new_a} + except Exception as e: + return {"op": "reassign", "tid": tid, "err": str(e)[:50]} + + return None + + +def main(): + total_ops = 0 + total_violations = 0 + + for seq_idx in range(NUM_SEQUENCES): + seed = random.randint(0, 10**9) + rng = random.Random(seed) + home = tempfile.mkdtemp(prefix=f"hermes_fuzz_{seq_idx}_") + os.environ["HERMES_HOME"] = home + os.environ["HOME"] = home + sys.path.insert(0, WT) + + # Fresh module state per sequence to avoid cached init paths. + for m in list(sys.modules.keys()): + if m.startswith("hermes_cli"): + del sys.modules[m] + from hermes_cli import kanban_db as kb + + kb.init_db() + conn = kb.connect() + task_pool = [] + ops_log = [] + + try: + for i in range(OPS_PER_SEQUENCE): + result = random_op(rng, conn, kb, task_pool) + if result is None: + continue + ops_log.append(result) + total_ops += 1 + if not assert_invariants(conn, kb, ops_log): + total_violations += 1 + print(f" sequence {seq_idx} (seed={seed}) failed at op {i}") + break + finally: + conn.close() + + if seq_idx % 10 == 0: + print(f" seq {seq_idx:3d}: {total_ops} ops so far, {total_violations} violations") + + print() + print("=" * 60) + print(f"Total sequences: {NUM_SEQUENCES}") + print(f"Total operations: {total_ops}") + print(f"Invariant violations: {total_violations}") + if total_violations == 0: + print("\n✔ ALL INVARIANTS HELD ACROSS RANDOMIZED SEQUENCES") + else: + print("\n✗ INVARIANT VIOLATIONS FOUND") + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/stress/test_subprocess_e2e.py b/tests/stress/test_subprocess_e2e.py new file mode 100644 index 00000000000..5dd27f25eee --- /dev/null +++ b/tests/stress/test_subprocess_e2e.py @@ -0,0 +1,228 @@ +"""E2E: dispatcher spawns real Python subprocess workers. + +This validates the IPC + lifecycle story that mocks can't: + - spawn_fn returns a real PID + - the child process resolves hermes_cli.kanban_db on its own + - the child writes heartbeats via the CLI (real argparse, real init_db) + - the child completes via the CLI with --summary + --metadata + - the dispatcher observes all of this through the DB only + - worker logs are captured to HERMES_HOME/kanban/logs/.log + - crash detection works against a real dead PID +""" + +import json +import os +import subprocess +import sys +import tempfile +import time + +WT = str(Path(__file__).resolve().parents[2]) +FAKE_WORKER = str(Path(__file__).parent / "_fake_worker.py") +PY = sys.executable + + +def make_spawn_fn(home: str): + """Return a spawn_fn the dispatcher can call. Launches the fake + worker as a detached subprocess.""" + + def _spawn(task, workspace): + log_path = os.path.join(home, f"worker_{task.id}.log") + env = { + **os.environ, + "HERMES_HOME": home, + "HOME": home, + "PYTHONPATH": WT, + "HERMES_KANBAN_TASK": task.id, + "HERMES_KANBAN_WORKSPACE": workspace, + "PATH": f"{os.path.dirname(PY)}:{os.environ.get('PATH','')}", + } + log_f = open(log_path, "ab") + proc = subprocess.Popen( + [PY, FAKE_WORKER], + stdin=subprocess.DEVNULL, + stdout=log_f, + stderr=subprocess.STDOUT, + env=env, + start_new_session=True, + ) + return proc.pid + + return _spawn + + +def main(): + home = tempfile.mkdtemp(prefix="hermes_e2e_") + os.environ["HERMES_HOME"] = home + os.environ["HOME"] = home + sys.path.insert(0, WT) + from hermes_cli import kanban_db as kb + + # Point the `hermes` CLI child processes will run at the worktree + # hermes_cli.main. We do this by putting a shim on PATH. + shim_dir = os.path.join(home, "bin") + os.makedirs(shim_dir, exist_ok=True) + shim_path = os.path.join(shim_dir, "hermes") + with open(shim_path, "w") as f: + f.write(f"""#!/bin/sh +exec {PY} -m hermes_cli.main "$@" +""") + os.chmod(shim_path, 0o755) + os.environ["PATH"] = f"{shim_dir}:{os.environ.get('PATH','')}" + + kb.init_db() + conn = kb.connect() + + # ============ SCENARIO A: happy path, 3 tasks ============ + print("=" * 60) + print("A. Real-subprocess happy path (3 tasks)") + print("=" * 60) + + tids = [] + for i in range(3): + tid = kb.create_task( + conn, title=f"real-e2e-{i}", assignee="worker", + ) + tids.append(tid) + + spawn_fn = make_spawn_fn(home) + result = kb.dispatch_once(conn, spawn_fn=spawn_fn) + print(f" dispatched: {len(result.spawned)} spawned") + spawned_pids = [] + # The dispatcher sets worker_pid on each claimed task via _set_worker_pid. + for tid in tids: + task = kb.get_task(conn, tid) + spawned_pids.append(task.worker_pid) + print(f" task {tid}: pid={task.worker_pid} status={task.status}") + + # Wait for all workers to complete (up to 10s). + deadline = time.monotonic() + 10 + while time.monotonic() < deadline: + statuses = [kb.get_task(conn, tid).status for tid in tids] + if all(s == "done" for s in statuses): + break + time.sleep(0.2) + + print() + failures = [] + for tid in tids: + task = kb.get_task(conn, tid) + runs = kb.list_runs(conn, tid) + print(f" task {tid}: status={task.status}, current_run_id={task.current_run_id}, " + f"runs={[(r.id, r.outcome) for r in runs]}") + if task.status != "done": + failures.append(f"task {tid} not done: status={task.status}") + if task.current_run_id is not None: + failures.append(f"task {tid} has dangling current_run_id={task.current_run_id}") + if len(runs) != 1: + failures.append(f"task {tid} has {len(runs)} runs, expected 1") + else: + r = runs[0] + if r.outcome != "completed": + failures.append(f"task {tid} run outcome={r.outcome}, expected completed") + if not r.summary or "real-subprocess worker finished" not in r.summary: + failures.append(f"task {tid} summary missing: {r.summary!r}") + if not r.metadata or r.metadata.get("iterations") != 3: + failures.append(f"task {tid} metadata missing iterations: {r.metadata}") + # Heartbeat events should be present + events = kb.list_events(conn, tid) + heartbeats = [e for e in events if e.kind == "heartbeat"] + if len(heartbeats) < 3: # start + 3 progress + failures.append(f"task {tid} heartbeats={len(heartbeats)} expected >=3") + + if failures: + print("\nFAILURES:") + for f in failures: + print(f" {f}") + sys.exit(1) + + print("\n ✔ Scenario A: all 3 real-subprocess workers completed cleanly") + + # ============ SCENARIO B: crashed worker ============ + print() + print("=" * 60) + print("B. Crashed worker (kill -9 mid-heartbeat)") + print("=" * 60) + + crash_tid = kb.create_task( + conn, title="crash-e2e", assignee="worker", + ) + + # Spawn a worker that sleeps long enough for us to kill it. + # CRITICAL: spawn through a double-fork so when we kill the child it + # doesn't zombify under our pid (which would fool kill -0 liveness + # checks into thinking it's still alive). In production the + # dispatcher daemon is long-lived but its workers are reaped by init + # after exit; the test needs to match that orphaning behavior. + def spawn_sleeper(task, workspace): + r, w = os.pipe() + middleman = subprocess.Popen( + [ + PY, "-c", + "import os,sys,subprocess;" + "p=subprocess.Popen(['sleep','30']," + "stdin=subprocess.DEVNULL," + "stdout=subprocess.DEVNULL,stderr=subprocess.DEVNULL," + "start_new_session=True);" + "os.write(int(sys.argv[1]), str(p.pid).encode());" + "sys.exit(0)", + str(w), + ], + pass_fds=(w,), + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + os.close(w) + middleman.wait() # middleman exits immediately, orphaning the sleep + grandchild_pid = int(os.read(r, 16)) + os.close(r) + return grandchild_pid + + result = kb.dispatch_once(conn, spawn_fn=spawn_sleeper) + task = kb.get_task(conn, crash_tid) + print(f" spawned sleeper pid={task.worker_pid} for {crash_tid}") + # Kill the sleeper forcibly + os.kill(task.worker_pid, 9) + # Give the OS a moment to reap + time.sleep(0.5) + + # Simulate next dispatcher tick — should detect the crashed PID + crashed = kb.detect_crashed_workers(conn) + print(f" detect_crashed_workers returned {len(crashed)} crashed (expected 1)") + + task = kb.get_task(conn, crash_tid) + runs = kb.list_runs(conn, crash_tid) + print(f" task status={task.status}, runs={[(r.id, r.outcome) for r in runs]}") + + if len(crashed) < 1: + print(" ✗ crash NOT detected") + sys.exit(1) + if task.status != "ready": + print(f" ✗ task should be back to ready, got {task.status}") + sys.exit(1) + if runs[0].outcome != "crashed": + print(f" ✗ run outcome should be 'crashed', got {runs[0].outcome!r}") + sys.exit(1) + print("\n ✔ Scenario B: crash detected, task re-queued, run outcome=crashed") + + # ============ SCENARIO C: worker log was captured ============ + print() + print("=" * 60) + print("C. Worker log captured to disk") + print("=" * 60) + # Scenario A workers wrote to /tmp/hermes_e2e_*/worker_*.log + import glob + logs = glob.glob(os.path.join(home, "worker_*.log")) + print(f" {len(logs)} worker log files") + for lp in logs[:3]: + size = os.path.getsize(lp) + print(f" {os.path.basename(lp)}: {size} bytes") + # Our fake worker is quiet (no prints); size=0 is fine + + conn.close() + print("\n✔ ALL E2E SCENARIOS PASS") + + +if __name__ == "__main__": + main() diff --git a/tests/tools/test_kanban_tools.py b/tests/tools/test_kanban_tools.py new file mode 100644 index 00000000000..a7a8fda44db --- /dev/null +++ b/tests/tools/test_kanban_tools.py @@ -0,0 +1,494 @@ +"""Tests for the Kanban tool surface (tools/kanban_tools.py). + +Verifies: + - Tools are gated on HERMES_KANBAN_TASK: a normal chat session sees + zero kanban tools in its schema; a worker session sees all seven. + - Each handler's happy path. + - Error paths (missing required args, bad metadata type, etc). +""" +from __future__ import annotations + +import json +import os + +import pytest + + +# --------------------------------------------------------------------------- +# Gating +# --------------------------------------------------------------------------- + +def test_kanban_tools_hidden_without_env_var(monkeypatch, tmp_path): + """Normal `hermes chat` sessions (no HERMES_KANBAN_TASK) must have + zero kanban_* tools in their schema.""" + monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False) + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + + import tools.kanban_tools # ensure registered + from tools.registry import registry + from toolsets import resolve_toolset + + schema = registry.get_definitions(set(resolve_toolset("hermes-cli")), quiet=True) + names = {s["function"].get("name") for s in schema if "function" in s} + kanban = {n for n in names if n and n.startswith("kanban_")} + assert kanban == set(), ( + f"kanban tools leaked into normal chat schema: {kanban}" + ) + + +def test_kanban_tools_visible_with_env_var(monkeypatch, tmp_path): + """Worker sessions (HERMES_KANBAN_TASK set) must have all 7 tools.""" + monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake") + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + + import tools.kanban_tools # ensure registered + from tools.registry import registry + from toolsets import resolve_toolset + + schema = registry.get_definitions(set(resolve_toolset("hermes-cli")), quiet=True) + names = {s["function"].get("name") for s in schema if "function" in s} + kanban = {n for n in names if n and n.startswith("kanban_")} + expected = { + "kanban_show", "kanban_complete", "kanban_block", "kanban_heartbeat", + "kanban_comment", "kanban_create", "kanban_link", + } + assert kanban == expected, f"expected {expected}, got {kanban}" + + +# --------------------------------------------------------------------------- +# Handler happy paths +# --------------------------------------------------------------------------- + +@pytest.fixture +def worker_env(monkeypatch, tmp_path): + """Simulate being a worker: HERMES_HOME isolated, HERMES_KANBAN_TASK set + after we've created the task.""" + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + monkeypatch.setenv("HERMES_PROFILE", "test-worker") + from pathlib import Path as _Path + monkeypatch.setattr(_Path, "home", lambda: tmp_path) + + from hermes_cli import kanban_db as kb + kb._INITIALIZED_PATHS.clear() + kb.init_db() + conn = kb.connect() + try: + tid = kb.create_task(conn, title="worker-test", assignee="test-worker") + kb.claim_task(conn, tid) + finally: + conn.close() + monkeypatch.setenv("HERMES_KANBAN_TASK", tid) + return tid + + +def test_show_defaults_to_env_task_id(worker_env): + from tools import kanban_tools as kt + out = kt._handle_show({}) + d = json.loads(out) + assert "task" in d + assert d["task"]["id"] == worker_env + assert d["task"]["status"] == "running" + assert "worker_context" in d + assert "runs" in d + + +def test_show_explicit_task_id(worker_env): + """Peek at a different task than the one in env.""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + other = kb.create_task(conn, title="other task", assignee="peer") + finally: + conn.close() + from tools import kanban_tools as kt + out = kt._handle_show({"task_id": other}) + d = json.loads(out) + assert d["task"]["id"] == other + + +def test_complete_happy_path(worker_env): + from tools import kanban_tools as kt + out = kt._handle_complete({ + "summary": "got the thing done", + "metadata": {"files": 2}, + }) + d = json.loads(out) + assert d["ok"] is True + assert d["task_id"] == worker_env + # Verify via kernel + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + run = kb.latest_run(conn, worker_env) + assert run.outcome == "completed" + assert run.summary == "got the thing done" + assert run.metadata == {"files": 2} + finally: + conn.close() + + +def test_complete_with_result_only(worker_env): + """`result` alone (without summary) is accepted for legacy compat.""" + from tools import kanban_tools as kt + out = kt._handle_complete({"result": "legacy result"}) + d = json.loads(out) + assert d["ok"] is True + + +def test_complete_rejects_no_handoff(worker_env): + from tools import kanban_tools as kt + out = kt._handle_complete({}) + assert json.loads(out).get("error"), "should have errored" + + +def test_complete_rejects_non_dict_metadata(worker_env): + from tools import kanban_tools as kt + out = kt._handle_complete({"summary": "x", "metadata": [1, 2, 3]}) + assert json.loads(out).get("error") + + +def test_block_happy_path(worker_env): + from tools import kanban_tools as kt + out = kt._handle_block({"reason": "need clarification"}) + d = json.loads(out) + assert d["ok"] is True + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + assert kb.get_task(conn, worker_env).status == "blocked" + finally: + conn.close() + + +def test_block_rejects_empty_reason(worker_env): + from tools import kanban_tools as kt + for bad in ["", " ", None]: + out = kt._handle_block({"reason": bad}) + assert json.loads(out).get("error") + + +def test_heartbeat_happy_path(worker_env): + from tools import kanban_tools as kt + out = kt._handle_heartbeat({"note": "progress"}) + d = json.loads(out) + assert d["ok"] is True + + +def test_heartbeat_without_note(worker_env): + """note is optional.""" + from tools import kanban_tools as kt + out = kt._handle_heartbeat({}) + d = json.loads(out) + assert d["ok"] is True + + +def test_comment_happy_path(worker_env): + from tools import kanban_tools as kt + out = kt._handle_comment({ + "task_id": worker_env, + "body": "hello thread", + }) + d = json.loads(out) + assert d["ok"] is True + assert d["comment_id"] + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + comments = kb.list_comments(conn, worker_env) + assert len(comments) == 1 + # Author defaults to HERMES_PROFILE env we set in the fixture + assert comments[0].author == "test-worker" + assert comments[0].body == "hello thread" + finally: + conn.close() + + +def test_comment_rejects_empty_body(worker_env): + from tools import kanban_tools as kt + out = kt._handle_comment({"task_id": worker_env, "body": " "}) + assert json.loads(out).get("error") + + +def test_comment_custom_author(worker_env): + from tools import kanban_tools as kt + out = kt._handle_comment({ + "task_id": worker_env, "body": "hi", "author": "custom-bot", + }) + assert json.loads(out)["ok"] + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + comments = kb.list_comments(conn, worker_env) + assert comments[0].author == "custom-bot" + finally: + conn.close() + + +def test_create_happy_path(worker_env): + from tools import kanban_tools as kt + out = kt._handle_create({ + "title": "child task", + "assignee": "peer", + "parents": [worker_env], + }) + d = json.loads(out) + assert d["ok"] is True + assert d["task_id"] + assert d["status"] == "todo" # parent isn't done yet + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + child = kb.get_task(conn, d["task_id"]) + assert child.title == "child task" + assert child.assignee == "peer" + finally: + conn.close() + + +def test_create_rejects_no_title(worker_env): + from tools import kanban_tools as kt + assert json.loads(kt._handle_create({"assignee": "x"})).get("error") + assert json.loads(kt._handle_create({"title": " ", "assignee": "x"})).get("error") + + +def test_create_rejects_no_assignee(worker_env): + from tools import kanban_tools as kt + assert json.loads(kt._handle_create({"title": "t"})).get("error") + + +def test_create_rejects_non_list_parents(worker_env): + from tools import kanban_tools as kt + out = kt._handle_create({"title": "t", "assignee": "a", "parents": 42}) + assert json.loads(out).get("error") + + +def test_create_accepts_string_parent(worker_env): + """Convenience: a single parent id as string is coerced to [id].""" + from tools import kanban_tools as kt + out = kt._handle_create({ + "title": "t", "assignee": "a", "parents": worker_env, + }) + assert json.loads(out)["ok"] + + +def test_create_accepts_skills_list(worker_env): + """Tool writes the per-task skills through to the kernel.""" + from tools import kanban_tools as kt + from hermes_cli import kanban_db as kb + out = kt._handle_create({ + "title": "skilled", + "assignee": "linguist", + "skills": ["translation", "github-code-review"], + }) + d = json.loads(out) + assert d["ok"] is True + with kb.connect() as conn: + task = kb.get_task(conn, d["task_id"]) + assert task.skills == ["translation", "github-code-review"] + + +def test_create_accepts_skills_string(worker_env): + """Convenience: a single skill name as string is coerced to [name].""" + from tools import kanban_tools as kt + from hermes_cli import kanban_db as kb + out = kt._handle_create({ + "title": "one-skill", + "assignee": "a", + "skills": "translation", + }) + d = json.loads(out) + assert d["ok"] is True + with kb.connect() as conn: + task = kb.get_task(conn, d["task_id"]) + assert task.skills == ["translation"] + + +def test_create_rejects_non_list_skills(worker_env): + """skills: 42 must be rejected, not silently dropped.""" + from tools import kanban_tools as kt + out = kt._handle_create({ + "title": "t", "assignee": "a", "skills": 42, + }) + assert json.loads(out).get("error") + + +def test_link_happy_path(worker_env): + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + a = kb.create_task(conn, title="A", assignee="x") + b = kb.create_task(conn, title="B", assignee="x") + finally: + conn.close() + from tools import kanban_tools as kt + out = kt._handle_link({"parent_id": a, "child_id": b}) + d = json.loads(out) + assert d["ok"] is True + + +def test_link_rejects_self_reference(worker_env): + from tools import kanban_tools as kt + out = kt._handle_link({"parent_id": worker_env, "child_id": worker_env}) + assert json.loads(out).get("error") + + +def test_link_rejects_missing_args(worker_env): + from tools import kanban_tools as kt + assert json.loads(kt._handle_link({"parent_id": "x"})).get("error") + assert json.loads(kt._handle_link({"child_id": "y"})).get("error") + + +def test_link_rejects_cycle(worker_env): + """A → B, then try to link B → A.""" + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + a = kb.create_task(conn, title="A", assignee="x") + b = kb.create_task(conn, title="B", assignee="x", parents=[a]) + finally: + conn.close() + from tools import kanban_tools as kt + out = kt._handle_link({"parent_id": b, "child_id": a}) + assert json.loads(out).get("error") + + +# --------------------------------------------------------------------------- +# End-to-end: simulate a full worker lifecycle through the tools +# --------------------------------------------------------------------------- + +def test_worker_lifecycle_through_tools(worker_env): + """Drive the full claim -> heartbeat -> comment -> complete lifecycle + exclusively through the tools, then verify the DB state matches what + the dispatcher/notifier expect.""" + from tools import kanban_tools as kt + + # 1. show — worker orientation + show = json.loads(kt._handle_show({})) + assert show["task"]["id"] == worker_env + + # 2. heartbeat during long op + assert json.loads(kt._handle_heartbeat({"note": "warming up"}))["ok"] + + # 3. comment for a future peer + assert json.loads(kt._handle_comment({ + "task_id": worker_env, + "body": "note: using stdlib sqlite3 bindings", + }))["ok"] + + # 4. spawn a child task for follow-up + child_out = json.loads(kt._handle_create({ + "title": "write integration test", + "assignee": "qa", + "parents": [worker_env], + })) + assert child_out["ok"] + + # 5. complete with structured handoff + comp = json.loads(kt._handle_complete({ + "summary": "implemented + spawned QA follow-up", + "metadata": {"child_task": child_out["task_id"]}, + })) + assert comp["ok"] + + # Verify final state + from hermes_cli import kanban_db as kb + conn = kb.connect() + try: + parent = kb.get_task(conn, worker_env) + assert parent.status == "done" + assert parent.current_run_id is None + run = kb.latest_run(conn, worker_env) + assert run.outcome == "completed" + assert run.metadata == {"child_task": child_out["task_id"]} + # Child is todo (parent just finished, but recompute_ready may + # have promoted it — complete_task runs recompute internally). + child = kb.get_task(conn, child_out["task_id"]) + assert child.status == "ready", ( + f"child should be ready after parent done, got {child.status}" + ) + # Comment is visible + assert len(kb.list_comments(conn, worker_env)) == 1 + # Heartbeat event recorded + hb = [e for e in kb.list_events(conn, worker_env) if e.kind == "heartbeat"] + assert len(hb) == 1 + finally: + conn.close() + + +# --------------------------------------------------------------------------- +# System-prompt guidance injection +# --------------------------------------------------------------------------- + +def test_kanban_guidance_not_in_normal_prompt(monkeypatch, tmp_path): + """A normal chat session (no HERMES_KANBAN_TASK) must NOT have + KANBAN_GUIDANCE in its system prompt.""" + monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False) + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + from pathlib import Path as _P + monkeypatch.setattr(_P, "home", lambda: tmp_path) + + from run_agent import AIAgent + a = AIAgent( + api_key="test", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + prompt = a._build_system_prompt() + assert "You are a Kanban worker" not in prompt + assert "kanban_show()" not in prompt + + +def test_kanban_guidance_in_worker_prompt(monkeypatch, tmp_path): + """A worker session (HERMES_KANBAN_TASK set) MUST have the full + lifecycle guidance in its system prompt.""" + monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake") + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + from pathlib import Path as _P + monkeypatch.setattr(_P, "home", lambda: tmp_path) + + from run_agent import AIAgent + a = AIAgent( + api_key="test", + base_url="https://openrouter.ai/api/v1", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + prompt = a._build_system_prompt() + # Header phrase + assert "You are a Kanban worker" in prompt + # Lifecycle signals + assert "kanban_show()" in prompt + assert "kanban_complete" in prompt + assert "kanban_block" in prompt + assert "kanban_create" in prompt + # Anti-shell guidance + assert "Do not shell out" in prompt or "tools — they work" in prompt + + +def test_kanban_guidance_prompt_size_bounded(monkeypatch, tmp_path): + """Sanity: the guidance block is under 4 KB so it doesn't blow + up the cached prompt.""" + monkeypatch.setenv("HERMES_KANBAN_TASK", "t_fake") + home = tmp_path / ".hermes" + home.mkdir() + monkeypatch.setenv("HERMES_HOME", str(home)) + from pathlib import Path as _P + monkeypatch.setattr(_P, "home", lambda: tmp_path) + + from agent.prompt_builder import KANBAN_GUIDANCE + assert 1_500 < len(KANBAN_GUIDANCE) < 4_096, ( + f"KANBAN_GUIDANCE is {len(KANBAN_GUIDANCE)} chars — too short (missing?) or too long" + ) diff --git a/tools/kanban_tools.py b/tools/kanban_tools.py new file mode 100644 index 00000000000..de5d180c833 --- /dev/null +++ b/tools/kanban_tools.py @@ -0,0 +1,726 @@ +"""Kanban tools — structured tool-call surface for worker + orchestrator agents. + +These tools are only registered into the model's schema when the agent is +running under the dispatcher (env var ``HERMES_KANBAN_TASK`` set). A +normal ``hermes chat`` session sees **zero** kanban tools in its schema. + +Why tools instead of just shelling out to ``hermes kanban``? + +1. **Backend portability.** A worker whose terminal tool points at Docker + / Modal / Singularity / SSH would run ``hermes kanban complete …`` + inside the container, where ``hermes`` isn't installed and the DB + isn't mounted. Tools run in the agent's Python process, so they + always reach ``~/.hermes/kanban.db`` regardless of terminal backend. + +2. **No shell-quoting footguns.** Passing ``--metadata '{"x": [...]}'`` + through shlex+argparse is fragile. Structured tool args skip it. + +3. **Better errors.** Tool-call failures return structured JSON the + model can reason about, not stderr strings it has to parse. + +Humans continue to use the CLI (``hermes kanban …``), the dashboard +(``hermes dashboard``), and the slash command (``/kanban …``) — all +three bypass the agent entirely. The tools are ONLY for the worker +agent's handoff back to the kernel. +""" +from __future__ import annotations + +import json +import logging +import os +from typing import Any, Optional + +from tools.registry import registry, tool_error + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Gating +# --------------------------------------------------------------------------- + +def _check_kanban_mode() -> bool: + """Tools are available iff the current process has ``HERMES_KANBAN_TASK`` + set in its env, which the dispatcher sets when spawning a worker. + + Humans running ``hermes chat`` see zero kanban tools. Workers spawned + by the kanban dispatcher (gateway-embedded by default) see all seven. + """ + return bool(os.environ.get("HERMES_KANBAN_TASK")) + + +# --------------------------------------------------------------------------- +# Shared helpers +# --------------------------------------------------------------------------- + +def _default_task_id(arg: Optional[str]) -> Optional[str]: + """Resolve ``task_id`` arg or fall back to the env var the dispatcher set.""" + if arg: + return arg + env_tid = os.environ.get("HERMES_KANBAN_TASK") + return env_tid or None + + +def _connect(): + """Import + connect lazily so the module imports cleanly in non-kanban + contexts (e.g. test rigs that import every tool module).""" + from hermes_cli import kanban_db as kb + return kb, kb.connect() + + +def _ok(**fields: Any) -> str: + return json.dumps({"ok": True, **fields}) + + +# --------------------------------------------------------------------------- +# Handlers +# --------------------------------------------------------------------------- + +def _handle_show(args: dict, **kw) -> str: + """Read a task's full state: task row, parents, children, comments, + runs (attempt history), and the last N events.""" + tid = _default_task_id(args.get("task_id")) + if not tid: + return tool_error( + "task_id is required (or set HERMES_KANBAN_TASK in the env)" + ) + try: + kb, conn = _connect() + try: + task = kb.get_task(conn, tid) + if task is None: + return tool_error(f"task {tid} not found") + comments = kb.list_comments(conn, tid) + events = kb.list_events(conn, tid) + runs = kb.list_runs(conn, tid) + parents = kb.parent_ids(conn, tid) + children = kb.child_ids(conn, tid) + + def _task_dict(t): + return { + "id": t.id, "title": t.title, "body": t.body, + "assignee": t.assignee, "status": t.status, + "tenant": t.tenant, "priority": t.priority, + "workspace_kind": t.workspace_kind, + "workspace_path": t.workspace_path, + "created_by": t.created_by, "created_at": t.created_at, + "started_at": t.started_at, + "completed_at": t.completed_at, + "result": t.result, + "current_run_id": t.current_run_id, + } + + def _run_dict(r): + return { + "id": r.id, "profile": r.profile, + "status": r.status, "outcome": r.outcome, + "summary": r.summary, "error": r.error, + "metadata": r.metadata, + "started_at": r.started_at, "ended_at": r.ended_at, + } + + return json.dumps({ + "task": _task_dict(task), + "parents": parents, + "children": children, + "comments": [ + {"author": c.author, "body": c.body, + "created_at": c.created_at} + for c in comments + ], + "events": [ + {"kind": e.kind, "payload": e.payload, + "created_at": e.created_at, "run_id": e.run_id} + for e in events[-50:] # cap; full log via CLI + ], + "runs": [_run_dict(r) for r in runs], + # Also surface the worker's own context block so the + # agent can include it directly if it wants. This is + # the same string build_worker_context returns to the + # dispatcher at spawn time. + "worker_context": kb.build_worker_context(conn, tid), + }) + finally: + conn.close() + except Exception as e: + logger.exception("kanban_show failed") + return tool_error(f"kanban_show: {e}") + + +def _handle_complete(args: dict, **kw) -> str: + """Mark the current task done with a structured handoff.""" + tid = _default_task_id(args.get("task_id")) + if not tid: + return tool_error( + "task_id is required (or set HERMES_KANBAN_TASK in the env)" + ) + summary = args.get("summary") + metadata = args.get("metadata") + result = args.get("result") + if not (summary or result): + return tool_error( + "provide at least one of: summary (preferred), result" + ) + if metadata is not None and not isinstance(metadata, dict): + return tool_error( + f"metadata must be an object/dict, got {type(metadata).__name__}" + ) + try: + kb, conn = _connect() + try: + ok = kb.complete_task( + conn, tid, + result=result, summary=summary, metadata=metadata, + ) + if not ok: + return tool_error( + f"could not complete {tid} (unknown id or already terminal)" + ) + run = kb.latest_run(conn, tid) + return _ok(task_id=tid, run_id=run.id if run else None) + finally: + conn.close() + except Exception as e: + logger.exception("kanban_complete failed") + return tool_error(f"kanban_complete: {e}") + + +def _handle_block(args: dict, **kw) -> str: + """Transition the task to blocked with a reason a human will read.""" + tid = _default_task_id(args.get("task_id")) + if not tid: + return tool_error( + "task_id is required (or set HERMES_KANBAN_TASK in the env)" + ) + reason = args.get("reason") + if not reason or not str(reason).strip(): + return tool_error("reason is required — explain what input you need") + try: + kb, conn = _connect() + try: + ok = kb.block_task(conn, tid, reason=reason) + if not ok: + return tool_error( + f"could not block {tid} (unknown id or not in " + f"running/ready)" + ) + run = kb.latest_run(conn, tid) + return _ok(task_id=tid, run_id=run.id if run else None) + finally: + conn.close() + except Exception as e: + logger.exception("kanban_block failed") + return tool_error(f"kanban_block: {e}") + + +def _handle_heartbeat(args: dict, **kw) -> str: + """Signal that the worker is still alive during a long operation.""" + tid = _default_task_id(args.get("task_id")) + if not tid: + return tool_error( + "task_id is required (or set HERMES_KANBAN_TASK in the env)" + ) + note = args.get("note") + try: + kb, conn = _connect() + try: + ok = kb.heartbeat_worker(conn, tid, note=note) + if not ok: + return tool_error( + f"could not heartbeat {tid} (unknown id or not running)" + ) + return _ok(task_id=tid) + finally: + conn.close() + except Exception as e: + logger.exception("kanban_heartbeat failed") + return tool_error(f"kanban_heartbeat: {e}") + + +def _handle_comment(args: dict, **kw) -> str: + """Append a comment to a task's thread.""" + tid = args.get("task_id") + if not tid: + return tool_error( + "task_id is required (use the current task id if that's what " + "you mean — pulls from env but kept explicit here)" + ) + body = args.get("body") + if not body or not str(body).strip(): + return tool_error("body is required") + author = args.get("author") or os.environ.get("HERMES_PROFILE") or "worker" + try: + kb, conn = _connect() + try: + cid = kb.add_comment(conn, tid, author=author, body=str(body)) + return _ok(task_id=tid, comment_id=cid) + finally: + conn.close() + except Exception as e: + logger.exception("kanban_comment failed") + return tool_error(f"kanban_comment: {e}") + + +def _handle_create(args: dict, **kw) -> str: + """Create a child task. Orchestrator workers use this to fan out. + + ``parents`` can be a list of task ids; dependency-gated promotion + works as usual. + """ + title = args.get("title") + if not title or not str(title).strip(): + return tool_error("title is required") + assignee = args.get("assignee") + if not assignee: + return tool_error( + "assignee is required — name the profile that should execute this " + "task (the dispatcher will only spawn tasks with an assignee)" + ) + body = args.get("body") + parents = args.get("parents") or [] + tenant = args.get("tenant") or os.environ.get("HERMES_TENANT") + priority = args.get("priority") + workspace_kind = args.get("workspace_kind") or "scratch" + workspace_path = args.get("workspace_path") + triage = bool(args.get("triage")) + idempotency_key = args.get("idempotency_key") + max_runtime_seconds = args.get("max_runtime_seconds") + skills = args.get("skills") + if isinstance(skills, str): + # Accept a single skill name as a string for convenience. + skills = [skills] + if skills is not None and not isinstance(skills, (list, tuple)): + return tool_error( + f"skills must be a list of skill names, got {type(skills).__name__}" + ) + if isinstance(parents, str): + parents = [parents] + if not isinstance(parents, (list, tuple)): + return tool_error( + f"parents must be a list of task ids, got {type(parents).__name__}" + ) + try: + kb, conn = _connect() + try: + new_tid = kb.create_task( + conn, + title=str(title).strip(), + body=body, + assignee=str(assignee), + parents=tuple(parents), + tenant=tenant, + priority=int(priority) if priority is not None else 0, + workspace_kind=str(workspace_kind), + workspace_path=workspace_path, + triage=triage, + idempotency_key=idempotency_key, + max_runtime_seconds=( + int(max_runtime_seconds) + if max_runtime_seconds is not None else None + ), + skills=skills, + created_by=os.environ.get("HERMES_PROFILE") or "worker", + ) + new_task = kb.get_task(conn, new_tid) + return _ok( + task_id=new_tid, + status=new_task.status if new_task else None, + ) + finally: + conn.close() + except Exception as e: + logger.exception("kanban_create failed") + return tool_error(f"kanban_create: {e}") + + +def _handle_link(args: dict, **kw) -> str: + """Add a parent→child dependency edge after the fact.""" + parent_id = args.get("parent_id") + child_id = args.get("child_id") + if not parent_id or not child_id: + return tool_error("both parent_id and child_id are required") + try: + kb, conn = _connect() + try: + kb.link_tasks(conn, parent_id=parent_id, child_id=child_id) + return _ok(parent_id=parent_id, child_id=child_id) + finally: + conn.close() + except ValueError as e: + # Covers cycle + self-parent rejections + return tool_error(f"kanban_link: {e}") + except Exception as e: + logger.exception("kanban_link failed") + return tool_error(f"kanban_link: {e}") + + +# --------------------------------------------------------------------------- +# Schemas +# --------------------------------------------------------------------------- + +_DESC_TASK_ID_DEFAULT = ( + "Task id. If omitted, defaults to HERMES_KANBAN_TASK from the env " + "(the task the dispatcher spawned you to work on)." +) + +KANBAN_SHOW_SCHEMA = { + "name": "kanban_show", + "description": ( + "Read a task's full state — title, body, assignee, parent task " + "handoffs, your prior attempts on this task if any, comments, " + "and recent events. Use this to (re)orient yourself before " + "starting work, especially on retries. The response includes a " + "pre-formatted ``worker_context`` string suitable for inclusion " + "verbatim in your reasoning." + ), + "parameters": { + "type": "object", + "properties": { + "task_id": { + "type": "string", + "description": _DESC_TASK_ID_DEFAULT, + }, + }, + "required": [], + }, +} + +KANBAN_COMPLETE_SCHEMA = { + "name": "kanban_complete", + "description": ( + "Mark your current task done with a structured handoff for " + "downstream workers and humans. Prefer ``summary`` for a " + "human-readable 1-3 sentence description of what you did; put " + "machine-readable facts in ``metadata`` (changed_files, " + "tests_run, decisions, findings, etc). At least one of " + "``summary`` or ``result`` is required." + ), + "parameters": { + "type": "object", + "properties": { + "task_id": { + "type": "string", + "description": _DESC_TASK_ID_DEFAULT, + }, + "summary": { + "type": "string", + "description": ( + "Human-readable handoff, 1-3 sentences. Appears in " + "Run History on the dashboard and in downstream " + "workers' context." + ), + }, + "metadata": { + "type": "object", + "description": ( + "Free-form dict of structured facts about this " + "attempt — {\"changed_files\": [...], \"tests_run\": 12, " + "\"findings\": [...]}. Surfaced to downstream " + "workers alongside ``summary``." + ), + }, + "result": { + "type": "string", + "description": ( + "Short result log line (legacy field, maps to " + "task.result). Use ``summary`` instead when " + "possible; this exists for compatibility with " + "callers that still set --result on the CLI." + ), + }, + }, + "required": [], + }, +} + +KANBAN_BLOCK_SCHEMA = { + "name": "kanban_block", + "description": ( + "Transition the task to blocked because you need human input " + "to proceed. ``reason`` will be shown to the human on the " + "board and included in context when someone unblocks you. " + "Use for genuine blockers only — don't block on things you can " + "resolve yourself." + ), + "parameters": { + "type": "object", + "properties": { + "task_id": { + "type": "string", + "description": _DESC_TASK_ID_DEFAULT, + }, + "reason": { + "type": "string", + "description": ( + "What you need answered, in one or two sentences. " + "Don't paste the whole conversation; the human has " + "the board and can ask follow-ups via comments." + ), + }, + }, + "required": ["reason"], + }, +} + +KANBAN_HEARTBEAT_SCHEMA = { + "name": "kanban_heartbeat", + "description": ( + "Signal that you're still alive during a long operation " + "(training, encoding, large crawls). Call every few minutes so " + "humans see liveness separately from PID checks. Pure side " + "effect — no work changes." + ), + "parameters": { + "type": "object", + "properties": { + "task_id": { + "type": "string", + "description": _DESC_TASK_ID_DEFAULT, + }, + "note": { + "type": "string", + "description": ( + "Optional short note describing current progress. " + "Shown in the event log." + ), + }, + }, + "required": [], + }, +} + +KANBAN_COMMENT_SCHEMA = { + "name": "kanban_comment", + "description": ( + "Append a comment to a task's thread. Use for durable notes " + "that should outlive this run (questions for the next worker, " + "partial findings, rationale). Ephemeral reasoning doesn't " + "belong here — use your normal response instead." + ), + "parameters": { + "type": "object", + "properties": { + "task_id": { + "type": "string", + "description": ( + "Task id. Required (may be your own task or " + "another's — comment threads are per-task)." + ), + }, + "body": { + "type": "string", + "description": "Markdown-supported comment body.", + }, + "author": { + "type": "string", + "description": ( + "Override author name. Defaults to the current " + "profile (HERMES_PROFILE env)." + ), + }, + }, + "required": ["task_id", "body"], + }, +} + +KANBAN_CREATE_SCHEMA = { + "name": "kanban_create", + "description": ( + "Create a new kanban task, optionally as a child of the current " + "one (pass the current task id in ``parents``). Used by " + "orchestrator workers to fan out — decompose work into child " + "tasks with specific assignees, link them into a pipeline, " + "then complete your own task. The dispatcher picks up the new " + "tasks on its next tick and spawns the assigned profiles." + ), + "parameters": { + "type": "object", + "properties": { + "title": { + "type": "string", + "description": "Short task title (required).", + }, + "assignee": { + "type": "string", + "description": ( + "Profile name that should execute this task " + "(e.g. 'researcher-a', 'reviewer', 'writer'). " + "Required — tasks without an assignee are never " + "dispatched." + ), + }, + "body": { + "type": "string", + "description": ( + "Opening post: full spec, acceptance criteria, " + "links. The assigned worker reads this as part of " + "its context." + ), + }, + "parents": { + "type": "array", + "items": {"type": "string"}, + "description": ( + "Parent task ids. The new task stays in 'todo' " + "until every parent reaches 'done'; then it " + "auto-promotes to 'ready'. Typical fan-in: list " + "all the researcher task ids when creating a " + "synthesizer task." + ), + }, + "tenant": { + "type": "string", + "description": ( + "Optional namespace for multi-project isolation. " + "Defaults to HERMES_TENANT env if set." + ), + }, + "priority": { + "type": "integer", + "description": ( + "Dispatcher tiebreaker. Higher = picked sooner " + "when multiple ready tasks share an assignee." + ), + }, + "workspace_kind": { + "type": "string", + "enum": ["scratch", "dir", "worktree"], + "description": ( + "Workspace flavor: 'scratch' (fresh tmp dir, " + "default), 'dir' (shared directory, requires " + "absolute workspace_path), 'worktree' (git worktree)." + ), + }, + "workspace_path": { + "type": "string", + "description": ( + "Absolute path for 'dir' or 'worktree' workspace. " + "Relative paths are rejected at dispatch." + ), + }, + "triage": { + "type": "boolean", + "description": ( + "If true, task lands in 'triage' instead of 'todo' " + "— a specifier profile is expected to flesh out " + "the body before work starts." + ), + }, + "idempotency_key": { + "type": "string", + "description": ( + "If a non-archived task with this key already " + "exists, return that task's id instead of creating " + "a duplicate. Useful for retry-safe automation." + ), + }, + "max_runtime_seconds": { + "type": "integer", + "description": ( + "Per-task runtime cap. When exceeded, the " + "dispatcher SIGTERMs the worker and re-queues the " + "task with outcome='timed_out'." + ), + }, + "skills": { + "type": "array", + "items": {"type": "string"}, + "description": ( + "Skill names to force-load into the dispatched " + "worker (in addition to the built-in kanban-worker " + "skill). Use this to pin a task to a specialist " + "context — e.g. ['translation'] for a translation " + "task, ['github-code-review'] for a reviewer task. " + "The names must match skills installed on the " + "assignee's profile." + ), + }, + }, + "required": ["title", "assignee"], + }, +} + +KANBAN_LINK_SCHEMA = { + "name": "kanban_link", + "description": ( + "Add a parent→child dependency edge after both tasks already " + "exist. The child won't promote to 'ready' until all parents " + "are 'done'. Cycles and self-links are rejected." + ), + "parameters": { + "type": "object", + "properties": { + "parent_id": {"type": "string", "description": "Parent task id."}, + "child_id": {"type": "string", "description": "Child task id."}, + }, + "required": ["parent_id", "child_id"], + }, +} + + +# --------------------------------------------------------------------------- +# Registration +# --------------------------------------------------------------------------- + +registry.register( + name="kanban_show", + toolset="kanban", + schema=KANBAN_SHOW_SCHEMA, + handler=_handle_show, + check_fn=_check_kanban_mode, + emoji="📋", +) + +registry.register( + name="kanban_complete", + toolset="kanban", + schema=KANBAN_COMPLETE_SCHEMA, + handler=_handle_complete, + check_fn=_check_kanban_mode, + emoji="✔", +) + +registry.register( + name="kanban_block", + toolset="kanban", + schema=KANBAN_BLOCK_SCHEMA, + handler=_handle_block, + check_fn=_check_kanban_mode, + emoji="⏸", +) + +registry.register( + name="kanban_heartbeat", + toolset="kanban", + schema=KANBAN_HEARTBEAT_SCHEMA, + handler=_handle_heartbeat, + check_fn=_check_kanban_mode, + emoji="💓", +) + +registry.register( + name="kanban_comment", + toolset="kanban", + schema=KANBAN_COMMENT_SCHEMA, + handler=_handle_comment, + check_fn=_check_kanban_mode, + emoji="💬", +) + +registry.register( + name="kanban_create", + toolset="kanban", + schema=KANBAN_CREATE_SCHEMA, + handler=_handle_create, + check_fn=_check_kanban_mode, + emoji="➕", +) + +registry.register( + name="kanban_link", + toolset="kanban", + schema=KANBAN_LINK_SCHEMA, + handler=_handle_link, + check_fn=_check_kanban_mode, + emoji="🔗", +) diff --git a/toolsets.py b/toolsets.py index ee067aa13e3..57e226d3c08 100644 --- a/toolsets.py +++ b/toolsets.py @@ -60,6 +60,11 @@ _HERMES_CORE_TOOLS = [ "send_message", # Home Assistant smart home control (gated on HASS_TOKEN via check_fn) "ha_list_entities", "ha_get_state", "ha_list_services", "ha_call_service", + # Kanban multi-agent coordination — only in schema when the agent is + # spawned as a kanban worker (HERMES_KANBAN_TASK env set), otherwise + # zero schema footprint. Gated via check_fn in tools/kanban_tools.py. + "kanban_show", "kanban_complete", "kanban_block", "kanban_heartbeat", + "kanban_comment", "kanban_create", "kanban_link", ] @@ -202,6 +207,24 @@ TOOLSETS = { "includes": [] }, + "kanban": { + "description": ( + "Kanban multi-agent coordination — only active when the agent " + "is spawned by the kanban dispatcher (HERMES_KANBAN_TASK env " + "set). The dispatcher runs inside the gateway by default; see " + "`kanban.dispatch_in_gateway` in config.yaml. Lets workers mark " + "tasks done with structured handoffs, block for human input, " + "heartbeat during long ops, comment on threads, and (for " + "orchestrators) fan out into child tasks." + ), + "tools": [ + "kanban_show", "kanban_complete", "kanban_block", + "kanban_heartbeat", "kanban_comment", + "kanban_create", "kanban_link", + ], + "includes": [], + }, + "discord": { "description": "Discord read and participate tools (fetch messages, search members, create threads)", "tools": ["discord"], diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 933cb64732f..5ae38e255b7 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -47,6 +47,7 @@ hermes [global-options] [subcommand/options] | `hermes login` / `logout` | **Deprecated** — use `hermes auth` instead. | | `hermes status` | Show agent, auth, and platform status. | | `hermes cron` | Inspect and tick the cron scheduler. | +| `hermes kanban` | Multi-profile collaboration board (tasks, links, dispatcher). | | `hermes webhook` | Manage dynamic webhook subscriptions for event-driven activation. | | `hermes hooks` | Inspect, approve, or remove shell-script hooks declared in `config.yaml`. | | `hermes doctor` | Diagnose config and dependency issues. | @@ -336,6 +337,38 @@ hermes cron | `status` | Check whether the cron scheduler is running. | | `tick` | Run due jobs once and exit. | +## `hermes kanban` + +```bash +hermes kanban [options] +``` + +Multi-profile collaboration board. Tasks live in `~/.hermes/kanban.db` (WAL-mode SQLite); every profile reads and writes the same board. A `cron`-driven dispatcher (`hermes kanban dispatch`) atomically claims ready tasks and spawns the assigned profile as its own process with an isolated workspace. + +| Action | Purpose | +|--------|---------| +| `init` | Create `kanban.db` if missing. Idempotent. | +| `create ""` | Create a new task. Flags: `--body`, `--assignee`, `--parent` (repeatable), `--workspace scratch\|worktree\|dir:<path>`, `--tenant`, `--priority`. | +| `list` / `ls` | List tasks. Filter with `--mine`, `--assignee`, `--status`, `--tenant`, `--archived`, `--json`. | +| `show <id>` | Show a task with comments and events. `--json` for machine output. | +| `assign <id> <profile>` | Assign or reassign. Use `none` to unassign. Refused while task is running. | +| `link <parent> <child>` | Add a dependency. Cycle-detected. | +| `unlink <parent> <child>` | Remove a dependency. | +| `claim <id>` | Atomically claim a ready task. Prints resolved workspace path. | +| `comment <id> "<text>"` | Append a comment. Visible to the next worker that runs the task. | +| `complete <id>` | Mark task done. Flag: `--result "<summary>"` (goes into children's parent-result context). | +| `block <id> "<reason>"` | Mark task blocked. Also appends the reason as a comment. | +| `unblock <id>` | Return a blocked task to ready. | +| `archive <id>` | Hide from default list. `gc` will remove scratch workspaces. | +| `tail <id>` | Follow a task's event stream. | +| `dispatch` | One dispatcher pass. Flags: `--dry-run`, `--max N`, `--json`. | +| `context <id>` | Print the full context a worker would see (title + body + parent results + comments). | +| `gc` | Remove scratch workspaces for archived tasks. | + +All actions are also available as a slash command in the gateway (`/kanban …`), with the same argument surface. + +For the full design — comparison with Cline Kanban / Paperclip / NanoClaw / Gemini Enterprise, eight collaboration patterns, four user stories, concurrency correctness proof — see `docs/hermes-kanban-v1-spec.pdf` in the repository or the [Kanban user guide](/docs/user-guide/features/kanban). + ## `hermes webhook` ```bash diff --git a/website/docs/user-guide/features/kanban-tutorial.md b/website/docs/user-guide/features/kanban-tutorial.md new file mode 100644 index 00000000000..89b5c1c0247 --- /dev/null +++ b/website/docs/user-guide/features/kanban-tutorial.md @@ -0,0 +1,263 @@ +# Kanban tutorial + +A walkthrough of the four use-cases the Hermes Kanban system was designed for, with the dashboard open in a browser. If you haven't read the [Kanban overview](./kanban) yet, start there — this assumes you know what a task, run, assignee, and dispatcher are. + +## Setup + +```bash +hermes kanban init # optional; first `hermes kanban <anything>` auto-inits +hermes dashboard # opens http://127.0.0.1:9119 in your browser +# click Kanban in the left nav +``` + +The dashboard is the most comfortable place to learn the system. Everything you see here is also available via `hermes kanban <verb>` on the CLI — the two surfaces share the same SQLite database at `~/.hermes/kanban.db`. + +## The board at a glance + +![Kanban board overview](/img/kanban-tutorial/01-board-overview.png) + +Six columns, left to right: + +- **Triage** — raw ideas, a specifier will flesh out the spec before anyone works on them. +- **Todo** — created but waiting on dependencies, or not yet assigned. +- **Ready** — assigned and waiting for the dispatcher to claim. +- **In progress** — a worker is actively running the task. With "Lanes by profile" on (the default), this column sub-groups by assignee so you can see at a glance what each worker is doing. +- **Blocked** — a worker asked for human input, or the circuit breaker tripped. +- **Done** — completed. + +The top bar has filters for search, tenant, and assignee, plus a `Lanes by profile` toggle and a `Nudge dispatcher` button that runs one dispatch tick right now instead of waiting for the daemon's next interval. Clicking any card opens its drawer on the right. + +### Flat view + +If the profile lanes are noisy, toggle "Lanes by profile" off and the In Progress column collapses to a single flat list ordered by claim time: + +![Board with lanes by profile off](/img/kanban-tutorial/02-board-flat.png) + +## Story 1 — Solo dev shipping a feature + +You're building a feature. Classic flow: design a schema, implement the API, write the tests. Three tasks with parent→child dependencies. + +```bash +SCHEMA=$(hermes kanban create "Design auth schema" \ + --assignee backend-dev --tenant auth-project --priority 2 \ + --body "Design the user/session/token schema for the auth module." \ + --json | jq -r .id) + +API=$(hermes kanban create "Implement auth API endpoints" \ + --assignee backend-dev --tenant auth-project --priority 2 \ + --parent $SCHEMA \ + --body "POST /register, POST /login, POST /refresh, POST /logout." \ + --json | jq -r .id) + +hermes kanban create "Write auth integration tests" \ + --assignee qa-dev --tenant auth-project --priority 2 \ + --parent $API \ + --body "Cover happy path, wrong password, expired token, concurrent refresh." +``` + +Because `API` has `SCHEMA` as its parent, and `tests` has `API` as its parent, only `SCHEMA` starts in `ready`. The other two sit in `todo` until their parents complete. This is the dependency promotion engine doing its job — no other worker will pick up the test-writing until there's an API to test. + +Claim the schema task, do the work, hand off: + +```bash +hermes kanban claim $SCHEMA + +# (you design the schema, commit, etc.) + +hermes kanban complete $SCHEMA \ + --summary "users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens stored as sessions with type='refresh'" \ + --metadata '{ + "changed_files": ["migrations/001_users.sql", "migrations/002_sessions.sql"], + "decisions": ["bcrypt for hashing", "JWT for session tokens", "7-day refresh, 15-min access"] + }' +``` + +When `SCHEMA` hits `done`, the dependency engine promotes `API` to `ready` automatically. The API worker, when it picks up, will read `SCHEMA`'s summary and metadata in its context — so it knows the schema decisions without re-reading a long design doc. + +Click the completed schema task on the board and the drawer shows everything: + +![Solo dev — completed schema task drawer](/img/kanban-tutorial/03-drawer-schema-task.png) + +The Run History section at the bottom is the key addition. One attempt: outcome `completed`, worker `@backend-dev`, duration, timestamp, and the handoff summary in full. The metadata blob (`changed_files`, `decisions`) is stored on the run too and surfaced to any downstream worker that reads this parent. + +On the CLI: + +```bash +hermes kanban show $SCHEMA +hermes kanban runs $SCHEMA +# # OUTCOME PROFILE ELAPSED STARTED +# 1 completed backend-dev 0s 2026-04-27 19:34 +# → users(id, email, pw_hash), sessions(id, user_id, jti, expires_at); refresh tokens ... +``` + +## Story 2 — Fleet farming + +You have three workers (a translator, a transcriber, a copywriter) and a pile of independent tasks. You want all three pulling in parallel and making visible progress. This is the simplest kanban use-case and the one the original design optimized for. + +Create the work: + +```bash +for lang in Spanish French German; do + hermes kanban create "Translate homepage to $lang" \ + --assignee translator --tenant content-ops +done +for i in 1 2 3 4 5; do + hermes kanban create "Transcribe Q3 customer call #$i" \ + --assignee transcriber --tenant content-ops +done +for sku in 1001 1002 1003 1004; do + hermes kanban create "Generate product description: SKU-$sku" \ + --assignee copywriter --tenant content-ops +done +``` + +Start the gateway and walk away — it hosts the embedded dispatcher +that picks up all three specialist profiles' tasks on the same +kanban.db: + +```bash +hermes gateway start +``` + +Now filter the board to `content-ops` (or just search for "Transcribe") and you get this: + +![Fleet view filtered to transcribe tasks](/img/kanban-tutorial/07-fleet-transcribes.png) + +Two transcribes done, one running, two ready waiting for the next dispatcher tick. The In Progress column is grouped by profile (the "Lanes by profile" default) so you see each worker's active task without scanning a mixed list. The dispatcher will promote the next ready task to running as soon as the current one completes. With three daemons working on three assignee pools in parallel, the whole content queue drains without further human input. + +**Everything Story 1 said about structured handoff still applies here.** A translator worker completing a call can pass `--summary "translated 4 pages, style matched existing marketing voice"` and `--metadata '{"duration_seconds": 720, "tokens_used": 2100}'` — useful for analytics and for any downstream task that depends on this one. + +## Story 3 — Role pipeline with retry + +This is where Kanban earns its keep over a flat TODO list. A PM writes a spec. An engineer implements it. A reviewer rejects the first attempt. The engineer tries again with changes. The reviewer approves. + +The dashboard view, filtered by `auth-project`: + +![Pipeline view for a multi-role feature](/img/kanban-tutorial/08-pipeline-auth.png) + +Three-stage chain visible at once: `Spec: password reset flow` (DONE, pm), `Implement password reset flow` (DONE, backend-dev), `Review password reset PR` (READY, reviewer). Each has its parent in green at the bottom and children as dependencies. + +The interesting one is the implementation task, because it was blocked and retried: + +```bash +# PM completes the spec with acceptance criteria in metadata +hermes kanban complete $SPEC \ + --summary "spec approved; POST /forgot-password sends email, GET /reset/:token renders form, POST /reset applies new password" \ + --metadata '{"acceptance": [ + "expired token returns 410", + "reused last-3 password returns 400 with message", + "successful reset invalidates all active sessions" + ]}' + +# Engineer claims + implements, but review blocks it for missing strength check +hermes kanban claim $IMPL +hermes kanban block $IMPL "Review: password strength check missing, reset link isn't single-use (can be replayed within 30min)" + +# Engineer iterates, resolves, completes +hermes kanban unblock $IMPL +hermes kanban claim $IMPL +hermes kanban complete $IMPL \ + --summary "added zxcvbn strength check, reset tokens are now single-use (stored + deleted on success)" \ + --metadata '{ + "changed_files": ["auth/reset.py", "auth/tests/test_reset.py", "migrations/003_single_use_reset_tokens.sql"], + "tests_run": 11, + "review_iteration": 2 + }' +``` + +Click the implementation task. The drawer shows **two attempts**: + +![Implementation task with two runs — blocked then completed](/img/kanban-tutorial/04b-drawer-retry-history-scrolled.png) + +- **Run 1** — `blocked` by `@backend-dev`. The review feedback sits right under the outcome: "password strength check missing, reset link isn't single-use (can be replayed within 30min)". +- **Run 2** — `completed` by `@backend-dev`. Fresh summary, fresh metadata. + +Each run is a row in `task_runs` with its own outcome, summary, and metadata. Retry history is not a conceptual afterthought layered on top of a "latest state" task — it's the primary representation. When a retrying worker opens the task, `build_worker_context` shows it the prior attempts, so the second-pass worker sees why the first pass was blocked and addresses those specific findings instead of re-running from scratch. + +The reviewer picks up next. When they open `Review password reset PR`, they see: + +![Reviewer's drawer view of the pipeline](/img/kanban-tutorial/09-drawer-pipeline-review.png) + +The parent link is the completed implementation. When the reviewer's worker calls `build_worker_context`, it pulls the parent's most-recent-completed-run summary + metadata — so the reviewer reads "added zxcvbn strength check, reset tokens are now single-use" and has the list of changed files in hand before looking at a diff. + +## Story 4 — Circuit breaker and crash recovery + +Real workers fail. Missing credentials, OOM kills, transient network errors. The dispatcher has two lines of defense: a **circuit breaker** that auto-blocks after N consecutive failures so the board doesn't thrash forever, and **crash detection** that reclaims a task whose worker PID went away before its TTL expired. + +### Circuit breaker — permanent-looking failure + +A deploy task that can't spawn its worker because `AWS_ACCESS_KEY_ID` isn't set in the profile's environment: + +```bash +hermes kanban create "Deploy to staging (missing creds)" \ + --assignee deploy-bot --tenant ops +``` + +The dispatcher tries to spawn the worker. Spawn fails (`RuntimeError: AWS_ACCESS_KEY_ID not set`). The dispatcher releases the claim, increments a failure counter, and tries again next tick. After three consecutive failures (the default `failure_limit`), the circuit trips: the task goes to `blocked` with outcome `gave_up`. No more retries until a human unblocks it. + +Click the blocked task: + +![Circuit breaker — 2 spawn_failed + 1 gave_up](/img/kanban-tutorial/11-drawer-gave-up.png) + +Three runs, all with the same error on the `error` field. The first two are `spawn_failed` (retryable), the third is `gave_up` (terminal). The event log above shows the full sequence: `created → claimed → spawn_failed → claimed → spawn_failed → claimed → gave_up`. + +On the terminal: + +```bash +hermes kanban runs t_ef5d +# # OUTCOME PROFILE ELAPSED STARTED +# 1 spawn_failed deploy-bot 0s 2026-04-27 19:34 +# ! AWS_ACCESS_KEY_ID not set in deploy-bot env +# 2 spawn_failed deploy-bot 0s 2026-04-27 19:34 +# ! AWS_ACCESS_KEY_ID not set in deploy-bot env +# 3 gave_up deploy-bot 0s 2026-04-27 19:34 +# ! AWS_ACCESS_KEY_ID not set in deploy-bot env +``` + +If Telegram / Discord / Slack is wired in, a gateway notification fires on the `gave_up` event so you hear about the outage without having to check the board. + +### Crash recovery — worker dies mid-flight + +Sometimes the spawn succeeds but the worker process dies later — segfault, OOM, `systemctl stop`. The dispatcher polls `kill(pid, 0)` and detects the dead pid; the claim releases, the task goes back to `ready`, and the next tick gives it to a fresh worker. + +The example in the seed data is a migration that was running out of memory: + +```bash +# Worker claims, starts scanning 2.4M rows, OOM kills it at ~2.3M +# Dispatcher detects dead pid, releases claim, increments attempt counter +# Retry with a chunked strategy succeeds +``` + +The drawer shows the full two-attempt history: + +![Crash and recovery — 1 crashed + 1 completed](/img/kanban-tutorial/06-drawer-crash-recovery.png) + +Run 1 — `crashed`, with the error `OOM kill at row 2.3M (process 99999 gone)`. Run 2 — `completed`, with `"strategy": "chunked with LIMIT + WHERE id > last_id"` in its metadata. The retrying worker saw the crash of run 1 in its context and picked a safer strategy; the metadata makes it obvious to a future observer (or postmortem writer) what changed. + +## Structured handoff — why `--summary` and `--metadata` matter + +In every story above, workers passed `--summary` and `--metadata` on completion. That's not decoration — it's the primary handoff channel between stages of a workflow. + +When a worker on task B reads its context, it gets: + +- B's **prior attempts** (previous runs: outcome, summary, error, metadata) so a retrying worker doesn't repeat a failed path. +- **Parent task results** — for each parent, the most-recent completed run's summary and metadata — so downstream workers see why and how the upstream work was done. + +This replaces the "dig through comments and the work output" dance that plagues flat kanban systems. A PM writes acceptance criteria in the spec's metadata, and the engineer's worker sees them structurally. An engineer records which tests they ran and how many passed, and the reviewer's worker has that list in hand before opening a diff. + +The bulk-close guard exists because this data is per-run. `hermes kanban complete a b c --summary X` is refused — copy-pasting the same summary to three tasks is almost always wrong. Bulk close without the handoff flags still works for the common "I finished a pile of admin tasks" case. + +## Inspecting a task currently running + +For completeness — here's the drawer of a task still in flight (the API implementation from Story 1, claimed by `backend-dev` but not yet complete): + +![Claimed, in-flight task](/img/kanban-tutorial/10-drawer-in-flight.png) + +Status is `Running`. The active run appears in the Run History section with outcome `active` and no `ended_at`. If this worker dies or times out, the dispatcher closes this run with the appropriate outcome and opens a new one on the next claim — the attempt row never disappears. + +## Next steps + +- [Kanban overview](./kanban) — the full data model, event vocabulary, and CLI reference. +- `hermes kanban --help` — every subcommand, every flag. +- `hermes kanban watch --kinds completed,gave_up,timed_out` — live stream terminal events across the whole board. +- `hermes kanban notify-subscribe <task> --platform telegram --chat-id <id>` — get a gateway ping when a specific task finishes. diff --git a/website/docs/user-guide/features/kanban.md b/website/docs/user-guide/features/kanban.md new file mode 100644 index 00000000000..7bbea348552 --- /dev/null +++ b/website/docs/user-guide/features/kanban.md @@ -0,0 +1,510 @@ +--- +sidebar_position: 12 +title: "Kanban (Multi-Agent Board)" +description: "Durable SQLite-backed task board for coordinating multiple Hermes profiles" +--- + +# Kanban — Multi-Agent Profile Collaboration + +> **Want a walkthrough?** Read the [Kanban tutorial](./kanban-tutorial) — four user stories (solo dev, fleet farming, role pipeline with retry, circuit breaker) with dashboard screenshots of each. This page is the reference; the tutorial is the narrative. + +Hermes Kanban is a durable task board, shared across all your Hermes profiles, that lets multiple named agents collaborate on work without fragile in-process subagent swarms. Every task is a row in `~/.hermes/kanban.db`; every handoff is a row anyone can read and write; every worker is a full OS process with its own identity. + +This is the shape that covers the workloads `delegate_task` can't: + +- **Research triage** — parallel researchers + analyst + writer, human-in-the-loop. +- **Scheduled ops** — recurring daily briefs that build a journal over weeks. +- **Digital twins** — persistent named assistants (`inbox-triage`, `ops-review`) that accumulate memory over time. +- **Engineering pipelines** — decompose → implement in parallel worktrees → review → iterate → PR. +- **Fleet work** — one specialist managing N subjects (50 social accounts, 12 monitored services). + +For the full design rationale, comparative analysis against Cline Kanban / Paperclip / NanoClaw / Google Gemini Enterprise, and the eight canonical collaboration patterns, see `docs/hermes-kanban-v1-spec.pdf` in the repository. + +## Kanban vs. `delegate_task` + +They look similar; they are not the same primitive. + +| | `delegate_task` | Kanban | +|---|---|---| +| Shape | RPC call (fork → join) | Durable message queue + state machine | +| Parent | Blocks until child returns | Fire-and-forget after `create` | +| Child identity | Anonymous subagent | Named profile with persistent memory | +| Resumability | None — failed = failed | Block → unblock → re-run; crash → reclaim | +| Human in the loop | Not supported | Comment / unblock at any point | +| Agents per task | One call = one subagent | N agents over task's life (retry, review, follow-up) | +| Audit trail | Lost on context compression | Durable rows in SQLite forever | +| Coordination | Hierarchical (caller → callee) | Peer — any profile reads/writes any task | + +**One-sentence distinction:** `delegate_task` is a function call; Kanban is a work queue where every handoff is a row any profile (or human) can see and edit. + +**Use `delegate_task` when** the parent agent needs a short reasoning answer before continuing, no humans involved, result goes back into the parent's context. + +**Use Kanban when** work crosses agent boundaries, needs to survive restarts, might need human input, might be picked up by a different role, or needs to be discoverable after the fact. + +They coexist: a kanban worker may call `delegate_task` internally during its run. + +## Core concepts + +- **Task** — a row with title, optional body, one assignee (a profile name), status (`triage | todo | ready | running | blocked | done | archived`), optional tenant namespace, optional idempotency key (dedup for retried automation). +- **Link** — `task_links` row recording a parent → child dependency. The dispatcher promotes `todo → ready` when all parents are `done`. +- **Comment** — the inter-agent protocol. Agents and humans append comments; when a worker is (re-)spawned it reads the full comment thread as part of its context. +- **Workspace** — the directory a worker operates in. Three kinds: + - `scratch` (default) — fresh tmp dir under `~/.hermes/kanban/workspaces/<id>/`. + - `dir:<path>` — an existing shared directory (Obsidian vault, mail ops dir, per-account folder). **Must be an absolute path.** Relative paths like `dir:../tenants/foo/` are rejected at dispatch because they'd resolve against whatever CWD the dispatcher happens to be in, which is ambiguous and a confused-deputy escape vector. The path is otherwise trusted — it's your box, your filesystem, the worker runs with your uid. This is the trusted-local-user threat model; kanban is single-host by design. + - `worktree` — a git worktree under `.worktrees/<id>/` for coding tasks. Worker-side `git worktree add` creates it. +- **Dispatcher** — a long-lived loop that, every N seconds (default 60): reclaims stale claims, reclaims crashed workers (PID gone but TTL not yet expired), promotes ready tasks, atomically claims, spawns assigned profiles. Runs **inside the gateway** by default (`kanban.dispatch_in_gateway: true`). After ~5 consecutive spawn failures on the same task the dispatcher auto-blocks it with the last error as the reason — prevents thrashing on tasks whose profile doesn't exist, workspace can't mount, etc. +- **Tenant** — optional string namespace. One specialist fleet can serve multiple businesses (`--tenant business-a`) with data isolation by workspace path and memory key prefix. + +## Quick start + +```bash +# 1. Create the board +hermes kanban init + +# 2. Start the gateway (hosts the embedded dispatcher) +hermes gateway start + +# 3. Create a task +hermes kanban create "research AI funding landscape" --assignee researcher + +# 4. Watch activity live +hermes kanban watch + +# 5. See the board +hermes kanban list +hermes kanban stats +``` + +### Gateway-embedded dispatcher (default) + +The dispatcher runs inside the gateway process. Nothing to install, no +separate service to manage — if the gateway is up, ready tasks get picked +up on the next tick (60s by default). + +```yaml +# config.yaml +kanban: + dispatch_in_gateway: true # default + dispatch_interval_seconds: 60 # default +``` + +Override the config flag at runtime via `HERMES_KANBAN_DISPATCH_IN_GATEWAY=0` +for debugging. Standard gateway supervision applies: run `hermes gateway +start` directly, or wire the gateway up as a systemd user unit (see the +gateway docs). Without a running gateway, `ready` tasks stay where they are +until one comes up — `hermes kanban create` warns about this at creation +time. + +Running `hermes kanban daemon` as a separate process is **deprecated**; +use the gateway. If you truly cannot run the gateway (headless host +policy forbids long-lived services, etc.) a `--force` escape hatch keeps +the old standalone daemon alive for one release cycle, but running both +a gateway-embedded dispatcher AND a standalone daemon against the same +`kanban.db` causes claim races and is not supported. + +### Idempotent create (for automation / webhooks) + +```bash +# First call creates the task. Any subsequent call with the same key +# returns the existing task id instead of duplicating. +hermes kanban create "nightly ops review" \ + --assignee ops \ + --idempotency-key "nightly-ops-$(date -u +%Y-%m-%d)" \ + --json +``` + +### Bulk CLI verbs + +All the lifecycle verbs accept multiple ids so you can clean up a batch +in one command: + +```bash +hermes kanban complete t_abc t_def t_hij --result "batch wrap" +hermes kanban archive t_abc t_def t_hij +hermes kanban unblock t_abc t_def +hermes kanban block t_abc "need input" --ids t_def t_hij +``` + +## How workers interact with the board + +When the dispatcher spawns a worker, it sets `HERMES_KANBAN_TASK` in the child's env. That env var is the gate for a dedicated **kanban toolset** — 7 tools that the normal agent schema never sees: + +| Tool | Purpose | +|---|---| +| `kanban_show` | Read the current task (title, body, prior attempts, parent handoffs, comments, full `worker_context`). Defaults to the env's task id. | +| `kanban_complete` | Finish with `summary` + `metadata` structured handoff. | +| `kanban_block` | Escalate for human input. | +| `kanban_heartbeat` | Signal liveness during long operations. | +| `kanban_comment` | Append to the task thread. | +| `kanban_create` | (Orchestrators) fan out into child tasks. | +| `kanban_link` | (Orchestrators) add dependency edges after the fact. | + +**Why tools and not just shelling to `hermes kanban`?** Three reasons: + +1. **Backend portability.** Workers whose terminal tool points at a remote backend (Docker / Modal / Singularity / SSH) would run `hermes kanban complete` inside the container where `hermes` isn't installed and the DB isn't mounted. The kanban tools run in the agent's own Python process and always reach `~/.hermes/kanban.db` regardless of terminal backend. +2. **No shell-quoting fragility.** Passing `--metadata '{"files": [...]}'` through shlex + argparse is a latent footgun. Structured tool args skip it. +3. **Better errors.** Tool results are structured JSON the model can reason about, not stderr strings it has to parse. + +**Zero schema footprint on normal sessions.** A regular `hermes chat` session has zero `kanban_*` tools in its schema. The `check_fn` on each tool only returns True when `HERMES_KANBAN_TASK` is set, which only happens when the dispatcher spawned this process. No tool bloat for users who never touch kanban. + +The `kanban-worker` and `kanban-orchestrator` skills teach the model which tool to call when and in what order. + +### The worker skill + +Any profile that should be able to work kanban tasks must load the `kanban-worker` skill. It teaches the worker the full lifecycle: + +1. On spawn, call `kanban_show()` to read title + body + parent handoffs + prior attempts + full comment thread. +2. `cd $HERMES_KANBAN_WORKSPACE` and do the work there. +3. Call `kanban_heartbeat(note="...")` every few minutes during long operations. +4. Complete with `kanban_complete(summary="...", metadata={...})`, or `kanban_block(reason="...")` if stuck. + +Load it with: + +```bash +hermes skills install devops/kanban-worker +``` + +The dispatcher also auto-passes `--skills kanban-worker` when spawning every worker, so the worker always has the pattern library available even if a profile's default skills config doesn't include it. + +### Pinning extra skills to a specific task + +Sometimes a single task needs specialist context the assignee profile doesn't carry by default — a translation job that needs the `translation` skill, a review task that needs `github-code-review`, a security audit that needs `security-pr-audit`. Rather than editing the assignee's profile every time, attach the skills directly to the task: + +```bash +# CLI — repeat --skill for each extra skill +hermes kanban create "translate README to Japanese" \ + --assignee linguist \ + --skill translation + +# Multiple skills +hermes kanban create "audit auth flow" \ + --assignee reviewer \ + --skill security-pr-audit \ + --skill github-code-review +``` + +From the dashboard's inline create form, type the skills comma-separated into the **skills** field. From another agent (orchestrator pattern), use `kanban_create(skills=[...])`: + +``` +kanban_create( + title="translate README to Japanese", + assignee="linguist", + skills=["translation"], +) +``` + +These skills are **additive** to the built-in `kanban-worker` — the dispatcher emits one `--skills <name>` flag for each (and for the built-in), so the worker spawns with all of them loaded. The skill names must match skills that are actually installed on the assignee's profile (run `hermes skills list` to see what's available); there's no runtime install. + +### The orchestrator skill + +A **well-behaved orchestrator does not do the work itself.** It decomposes the user's goal into tasks, links them, assigns each to a specialist, and steps back. The `kanban-orchestrator` skill encodes this: anti-temptation rules, a standard specialist roster (`researcher`, `writer`, `analyst`, `backend-eng`, `reviewer`, `ops`), and a decomposition playbook. + +Load it into your orchestrator profile: + +```bash +hermes skills install devops/kanban-orchestrator +``` + +For best results, pair it with a profile whose toolsets are restricted to board operations (`kanban`, `gateway`, `memory`) so the orchestrator literally cannot execute implementation tasks even if it tries. + +## Dashboard (GUI) + +The `/kanban` CLI and slash command are enough to run the board headlessly, but a visual board is often the right interface for humans-in-the-loop: triage, cross-profile supervision, reading comment threads, and dragging cards between columns. Hermes ships this as a **bundled dashboard plugin** at `plugins/kanban/` — not a core feature, not a separate service — following the model laid out in [Extending the Dashboard](./extending-the-dashboard). + +Open it with: + +```bash +hermes kanban init # one-time: create kanban.db if not already present +hermes dashboard # "Kanban" tab appears in the nav, after "Skills" +``` + +### What the plugin gives you + +- A **Kanban** tab showing one column per status: `triage`, `todo`, `ready`, `running`, `blocked`, `done` (plus `archived` when the toggle is on). + - `triage` is the parking column for rough ideas a specifier is expected to flesh out. Tasks created with `hermes kanban create --triage` (or via the Triage column's inline create) land here and the dispatcher leaves them alone until a human or specifier promotes them to `todo` / `ready`. +- Cards show the task id, title, priority badge, tenant tag, assigned profile, comment/link counts, a **progress pill** (`N/M` children done when the task has dependents), and "created N ago". A per-card checkbox enables multi-select. +- **Per-profile lanes inside Running** — toolbar checkbox toggles sub-grouping of the Running column by assignee. +- **Live updates via WebSocket** — the plugin tails the append-only `task_events` table on a short poll interval; the board reflects changes the instant any profile (CLI, gateway, or another dashboard tab) acts. Reloads are debounced so a burst of events triggers a single refetch. +- **Drag-drop** cards between columns to change status. The drop sends `PATCH /api/plugins/kanban/tasks/:id` which routes through the same `kanban_db` code the CLI uses — the three surfaces can never drift. Moves into destructive statuses (`done`, `archived`, `blocked`) prompt for confirmation. Touch devices use a pointer-based fallback so the board is usable from a tablet. +- **Inline create** — click `+` on any column header to type a title, assignee, priority, and (optionally) a parent task from a dropdown over every existing task. Creating from the Triage column automatically parks the new task in triage. +- **Multi-select with bulk actions** — shift/ctrl-click a card or tick its checkbox to add it to the selection. A bulk action bar appears at the top with batch status transitions, archive, and reassign (by profile dropdown, or "(unassign)"). Destructive batches confirm first. Per-id partial failures are reported without aborting the rest. +- **Click a card** (without shift/ctrl) to open a side drawer (Escape or click-outside closes) with: + - **Editable title** — click the heading to rename. + - **Editable assignee / priority** — click the meta row to rewrite. + - **Editable description** — markdown-rendered by default (headings, bold, italic, inline code, fenced code, `http(s)` / `mailto:` links, bullet lists), with an "edit" button that swaps in a textarea. Markdown rendering is a tiny, XSS-safe renderer — every substitution runs on HTML-escaped input, only `http(s)` / `mailto:` links pass through, and `target="_blank"` + `rel="noopener noreferrer"` are always set. + - **Dependency editor** — chip list of parents and children, each with an `×` to unlink, plus dropdowns over every other task to add a new parent or child. Cycle attempts are rejected server-side with a clear message. + - **Status action row** (→ triage / → ready / → running / block / unblock / complete / archive) with confirm prompts for destructive transitions. + - Result section (also markdown-rendered), comment thread with Enter-to-submit, the last 20 events. +- **Toolbar filters** — free-text search, tenant dropdown (defaults to `dashboard.kanban.default_tenant` from `config.yaml`), assignee dropdown, "show archived" toggle, "lanes by profile" toggle, and a **Nudge dispatcher** button so you don't have to wait for the next 60 s tick. + +Visually the target is the familiar Linear / Fusion layout: dark theme, column headers with counts, coloured status dots, pill chips for priority and tenant. The plugin reads only theme CSS vars (`--color-*`, `--radius`, `--font-mono`, ...), so it reskins automatically with whichever dashboard theme is active. + +### Architecture + +The GUI is strictly a **read-through-the-DB + write-through-kanban_db** layer with no domain logic of its own: + +``` +┌────────────────────────┐ WebSocket (tails task_events) +│ React SPA (plugin) │ ◀──────────────────────────────────┐ +│ HTML5 drag-and-drop │ │ +└──────────┬─────────────┘ │ + │ REST over fetchJSON │ + ▼ │ +┌────────────────────────┐ writes call kanban_db.* │ +│ FastAPI router │ directly — same code path │ +│ plugins/kanban/ │ the CLI /kanban verbs use │ +│ dashboard/plugin_api.py │ +└──────────┬─────────────┘ │ + │ │ + ▼ │ +┌────────────────────────┐ │ +│ ~/.hermes/kanban.db │ ───── append task_events ──────────┘ +│ (WAL, shared) │ +└────────────────────────┘ +``` + +### REST surface + +All routes are mounted under `/api/plugins/kanban/` and protected by the dashboard's ephemeral session token: + +| Method | Path | Purpose | +|---|---|---| +| `GET` | `/board?tenant=<name>&include_archived=…` | Full board grouped by status column, plus tenants + assignees for filter dropdowns | +| `GET` | `/tasks/:id` | Task + comments + events + links | +| `POST` | `/tasks` | Create (wraps `kanban_db.create_task`, accepts `triage: bool` and `parents: [id, …]`) | +| `PATCH` | `/tasks/:id` | Status / assignee / priority / title / body / result | +| `POST` | `/tasks/bulk` | Apply the same patch (status / archive / assignee / priority) to every id in `ids`. Per-id failures reported without aborting siblings | +| `POST` | `/tasks/:id/comments` | Append a comment | +| `POST` | `/links` | Add a dependency (`parent_id` → `child_id`) | +| `DELETE` | `/links?parent_id=…&child_id=…` | Remove a dependency | +| `POST` | `/dispatch?max=…&dry_run=…` | Nudge the dispatcher — skip the 60 s wait | +| `GET` | `/config` | Read `dashboard.kanban` preferences from `config.yaml` — `default_tenant`, `lane_by_profile`, `include_archived_by_default`, `render_markdown` | +| `WS` | `/events?since=<event_id>` | Live stream of `task_events` rows | + +Every handler is a thin wrapper — the plugin is ~700 lines of Python (router + WebSocket tail + bulk batcher + config reader) and adds no new business logic. A tiny `_conn()` helper auto-initializes `kanban.db` on every read and write, so a fresh install works whether the user opened the dashboard first, hit the REST API directly, or ran `hermes kanban init`. + +### Dashboard config + +Any of these keys under `dashboard.kanban` in `~/.hermes/config.yaml` changes the tab's defaults — the plugin reads them at load time via `GET /config`: + +```yaml +dashboard: + kanban: + default_tenant: acme # preselects the tenant filter + lane_by_profile: true # default for the "lanes by profile" toggle + include_archived_by_default: false + render_markdown: true # set false for plain <pre> rendering +``` + +Each key is optional and falls back to the shown default. + +### Security model + +The dashboard's HTTP auth middleware [explicitly skips `/api/plugins/`](./extending-the-dashboard#backend-api-routes) — plugin routes are unauthenticated by design because the dashboard binds to localhost by default. That means the kanban REST surface is reachable from any process on the host. + +The WebSocket takes one additional step: it requires the dashboard's ephemeral session token as a `?token=…` query parameter (browsers can't set `Authorization` on an upgrade request), matching the pattern used by the in-browser PTY bridge. + +If you run `hermes dashboard --host 0.0.0.0`, every plugin route — kanban included — becomes reachable from the network. **Don't do that on a shared host.** The board contains task bodies, comments, and workspace paths; an attacker reaching these routes gets read access to your entire collaboration surface and can also create / reassign / archive tasks. + +Tasks in `~/.hermes/kanban.db` are profile-agnostic on purpose (that's the coordination primitive). If you open the dashboard with `hermes -p <profile> dashboard`, the board still shows tasks created by any other profile on the host. Same user owns all profiles, but this is worth knowing if multiple personas coexist. + +### Live updates + +`task_events` is an append-only SQLite table with a monotonic `id`. The WebSocket endpoint holds each client's last-seen event id and pushes new rows as they land. When a burst of events arrives, the frontend reloads the (very cheap) board endpoint — simpler and more correct than trying to patch local state from every event kind. WAL mode means the read loop never blocks the dispatcher's `BEGIN IMMEDIATE` claim transactions. + +### Extending it + +The plugin uses the standard Hermes dashboard plugin contract — see [Extending the Dashboard](./extending-the-dashboard) for the full manifest reference, shell slots, page-scoped slots, and the Plugin SDK. Extra columns, custom card chrome, tenant-filtered layouts, or full `tab.override` replacements are all expressible without forking this plugin. + +To disable without removing: add `dashboard.plugins.kanban.enabled: false` to `config.yaml` (or delete `plugins/kanban/dashboard/manifest.json`). + +### Scope boundary + +The GUI is deliberately thin. Everything the plugin does is reachable from the CLI; the plugin just makes it comfortable for humans. Auto-assignment, budgets, governance gates, and org-chart views remain user-space — a router profile, another plugin, or a reuse of `tools/approval.py` — exactly as listed in the out-of-scope section of the design spec. + +## CLI command reference + +``` +hermes kanban init # create kanban.db + print daemon hint +hermes kanban create "<title>" [--body ...] [--assignee <profile>] + [--parent <id>]... [--tenant <name>] + [--workspace scratch|worktree|dir:<path>] + [--priority N] [--triage] [--idempotency-key KEY] + [--max-runtime 30m|2h|1d|<seconds>] + [--skill <name>]... + [--json] +hermes kanban list [--mine] [--assignee P] [--status S] [--tenant T] [--archived] [--json] +hermes kanban show <id> [--json] +hermes kanban assign <id> <profile> # or 'none' to unassign +hermes kanban link <parent_id> <child_id> +hermes kanban unlink <parent_id> <child_id> +hermes kanban claim <id> [--ttl SECONDS] +hermes kanban comment <id> "<text>" [--author NAME] + +# Bulk verbs — accept multiple ids: +hermes kanban complete <id>... [--result "..."] +hermes kanban block <id> "<reason>" [--ids <id>...] +hermes kanban unblock <id>... +hermes kanban archive <id>... + +hermes kanban tail <id> # follow a single task's event stream +hermes kanban watch [--assignee P] [--tenant T] # live stream ALL events to the terminal + [--kinds completed,blocked,…] [--interval SECS] +hermes kanban heartbeat <id> [--note "..."] # worker liveness signal for long ops +hermes kanban runs <id> [--json] # attempt history (one row per run) +hermes kanban assignees [--json] # profiles on disk + per-assignee task counts +hermes kanban dispatch [--dry-run] [--max N] # one-shot pass + [--failure-limit N] [--json] +hermes kanban daemon --force # DEPRECATED — standalone dispatcher (use `hermes gateway start` instead) + [--failure-limit N] [--pidfile PATH] [-v] +hermes kanban stats [--json] # per-status + per-assignee counts +hermes kanban log <id> [--tail BYTES] # worker log from ~/.hermes/kanban/logs/ +hermes kanban notify-subscribe <id> # gateway bridge hook (used by /kanban in the gateway) + --platform <name> --chat-id <id> [--thread-id <id>] [--user-id <id>] +hermes kanban notify-list [<id>] [--json] +hermes kanban notify-unsubscribe <id> + --platform <name> --chat-id <id> [--thread-id <id>] +hermes kanban context <id> # what a worker sees +hermes kanban gc [--event-retention-days N] # workspaces + old events + old logs + [--log-retention-days N] +``` + +All commands are also available as a slash command in the gateway (`/kanban list`, `/kanban comment t_abc "need docs"`, etc.). The slash command bypasses the running-agent guard, so you can `/kanban unblock` a stuck worker while the main agent is still chatting. + +## Collaboration patterns + +The board supports these eight patterns without any new primitives: + +| Pattern | Shape | Example | +|---|---|---| +| **P1 Fan-out** | N siblings, same role | "research 5 angles in parallel" | +| **P2 Pipeline** | role chain: scout → editor → writer | daily brief assembly | +| **P3 Voting / quorum** | N siblings + 1 aggregator | 3 researchers → 1 reviewer picks | +| **P4 Long-running journal** | same profile + shared dir + cron | Obsidian vault | +| **P5 Human-in-the-loop** | worker blocks → user comments → unblock | ambiguous decisions | +| **P6 `@mention`** | inline routing from prose | `@reviewer look at this` | +| **P7 Thread-scoped workspace** | `/kanban here` in a thread | per-project gateway threads | +| **P8 Fleet farming** | one profile, N subjects | 50 social accounts | +| **P9 Triage specifier** | rough idea → `triage` → specifier expands body → `todo` | "turn this one-liner into a spec' task" | + +For worked examples of each, see `docs/hermes-kanban-v1-spec.pdf`. + +## Multi-tenant usage + +When one specialist fleet serves multiple businesses, tag each task with a tenant: + +```bash +hermes kanban create "monthly report" \ + --assignee researcher \ + --tenant business-a \ + --workspace dir:~/tenants/business-a/data/ +``` + +Workers receive `$HERMES_TENANT` and namespace their memory writes by prefix. The board, the dispatcher, and the profile definitions are all shared; only the data is scoped. + +## Gateway notifications + +When you run `/kanban create …` from the gateway (Telegram, Discord, Slack, etc.), the originating chat is automatically subscribed to the new task. The gateway's background notifier polls `task_events` every few seconds and delivers one message per terminal event (`completed`, `blocked`, `gave_up`, `crashed`, `timed_out`) to that chat. Completed tasks also send the first line of the worker's `--result` so you see the outcome without having to `/kanban show`. + +You can manage subscriptions explicitly from the CLI — useful when a script / cron job wants to notify a chat it didn't originate from: + +```bash +hermes kanban notify-subscribe t_abcd \ + --platform telegram --chat-id 12345678 --thread-id 7 +hermes kanban notify-list +hermes kanban notify-unsubscribe t_abcd \ + --platform telegram --chat-id 12345678 --thread-id 7 +``` + +A subscription removes itself automatically once the task reaches `done` or `archived`; no cleanup needed. + +## Runs — one row per attempt + +A task is a logical unit of work; a **run** is one attempt to execute it. When the dispatcher claims a ready task it creates a row in `task_runs` and points `tasks.current_run_id` at it. When that attempt ends — completed, blocked, crashed, timed out, spawn-failed, reclaimed — the run row closes with an `outcome` and the task's pointer clears. A task that's been attempted three times has three `task_runs` rows. + +Why two tables instead of just mutating the task: you need **full attempt history** for real-world postmortems ("the second reviewer attempt got to approve, the third merged"), and you need a clean place to hang per-attempt metadata — which files changed, which tests ran, which findings a reviewer noted. Those are run facts, not task facts. + +Runs are also where **structured handoff** lives. When a worker completes a task it can pass: + +- `--result "<short log line>"` — goes on the task row as before (for back-compat). +- `--summary "<human handoff>"` — goes on the run; downstream children see it in their `build_worker_context`. +- `--metadata '{"changed_files": [...], "tests_run": 12}'` — JSON dict on the run; children see it serialized alongside the summary. + +Downstream children read the most recent completed run's summary + metadata for each parent. Retrying workers read the prior attempts on their own task (outcome, summary, error) so they don't repeat a path that already failed. + +```bash +# Worker completes with a structured handoff: +hermes kanban complete t_abcd \ + --result "rate limiter shipped" \ + --summary "implemented token bucket, keys on user_id with IP fallback, all tests pass" \ + --metadata '{"changed_files": ["limiter.py", "tests/test_limiter.py"], "tests_run": 14}' + +# Review the attempt history on a retried task: +hermes kanban runs t_abcd +# # OUTCOME PROFILE ELAPSED STARTED +# 1 blocked worker 12s 2026-04-27 14:02 +# → BLOCKED: need decision on rate-limit key +# 2 completed worker 8m 2026-04-27 15:18 +# → implemented token bucket, keys on user_id with IP fallback +``` + +Runs are exposed on the dashboard (Run History section in the drawer, one coloured row per attempt) and on the REST API (`GET /api/plugins/kanban/tasks/:id` returns a `runs[]` array). `PATCH /api/plugins/kanban/tasks/:id` with `{status: "done", summary, metadata}` forwards both to the kernel, so the dashboard's "mark done" button is CLI-equivalent. `task_events` rows carry the `run_id` they belong to so the UI can group them by attempt, and the `completed` event embeds the first-line summary in its payload (capped at 400 chars) so gateway notifiers can render structured handoffs without a second SQL round-trip. + +**Bulk close caveat.** `hermes kanban complete a b c --summary X` is refused — structured handoff is per-run, so copy-pasting the same summary to N tasks is almost always wrong. Bulk close *without* `--summary` / `--metadata` still works for the common "I finished a pile of admin tasks" case. + +**Reclaimed runs from status changes.** If you drag a running task off `running` in the dashboard (back to `ready`, or straight to `todo`), or archive a task that was still running, the in-flight run closes with `outcome='reclaimed'` rather than being orphaned. The `task_runs` row is always in a terminal state when `tasks.current_run_id` is `NULL`, and vice versa — that invariant holds across CLI, dashboard, dispatcher, and notifier. + +**Synthetic runs for never-claimed completions.** Completing or blocking a task that was never claimed (e.g. a human closes a `ready` task from the dashboard with a summary, or a CLI user runs `hermes kanban complete <ready-task> --summary X`) would otherwise drop the handoff. Instead the kernel inserts a zero-duration run row (`started_at == ended_at`) carrying the summary / metadata / reason so attempt history stays complete. The `completed` / `blocked` event's `run_id` points at that row. + +**Live drawer refresh.** When the dashboard's WebSocket event stream reports new events for the task the user is currently viewing, the drawer reloads itself (via a per-task event counter threaded into its `useEffect` dependency list). Closing and reopening is no longer required to see a run's new row or updated outcome. + +### Forward compatibility + +Two nullable columns on `tasks` are reserved for v2 workflow routing: `workflow_template_id` (which template this task belongs to) and `current_step_key` (which step in that template is active). The v1 kernel ignores them for routing but lets clients write them, so a v2 release can add the routing machinery without another schema migration. + +## Event reference + +Every transition appends a row to `task_events`. Each row carries an optional `run_id` so UIs can group events by attempt. Kinds group into three clusters so filtering is easy (`hermes kanban watch --kinds completed,gave_up,timed_out`): + +**Lifecycle** (what changed about the task as a logical unit): + +| Kind | Payload | When | +|---|---|---| +| `created` | `{assignee, status, parents, tenant}` | Task inserted. `run_id` is `NULL`. | +| `promoted` | — | `todo → ready` because all parents hit `done`. `run_id` is `NULL`. | +| `claimed` | `{lock, expires, run_id}` | Dispatcher atomically claimed a `ready` task for spawn. | +| `completed` | `{result_len, summary?}` | Worker wrote `--result` / `--summary` and task hit `done`. `summary` is the first-line handoff (400-char cap); full version lives on the run row. If `complete_task` is called on a never-claimed task with handoff fields, a zero-duration run is synthesized so `run_id` still points at something. | +| `blocked` | `{reason}` | Worker or human flipped the task to `blocked`. Synthesizes a zero-duration run when called on a never-claimed task with `--reason`. | +| `unblocked` | — | `blocked → ready`, either manually or via `/unblock`. `run_id` is `NULL`. | +| `archived` | — | Hidden from the default board. If the task was still running, carries the `run_id` of the run that was reclaimed as a side effect. | + +**Edits** (human-driven changes that aren't transitions): + +| Kind | Payload | When | +|---|---|---| +| `assigned` | `{assignee}` | Assignee changed (including unassignment). | +| `edited` | `{fields}` | Title or body updated. | +| `reprioritized` | `{priority}` | Priority changed. | +| `status` | `{status}` | Dashboard drag-drop wrote a status directly (e.g. `todo → ready`). Carries the `run_id` of the run that was reclaimed when dragging off `running`; otherwise `run_id` is NULL. | + +**Worker telemetry** (about the execution process, not the logical task): + +| Kind | Payload | When | +|---|---|---| +| `spawned` | `{pid}` | Dispatcher successfully started a worker process. | +| `heartbeat` | `{note?}` | Worker called `hermes kanban heartbeat $TASK` to signal liveness during long operations. | +| `reclaimed` | `{stale_lock}` | Claim TTL expired without a completion; task goes back to `ready`. | +| `crashed` | `{pid, claimer}` | Worker PID no longer alive but TTL hadn't expired yet. | +| `timed_out` | `{pid, elapsed_seconds, limit_seconds, sigkill}` | `max_runtime_seconds` exceeded; dispatcher SIGTERM'd (then SIGKILL'd after 5 s grace) and re-queued. | +| `spawn_failed` | `{error, failures}` | One spawn attempt failed (missing PATH, workspace unmountable, …). Counter increments; task returns to `ready` for retry. | +| `gave_up` | `{failures, error}` | Circuit breaker fired after N consecutive `spawn_failed`. Task auto-blocks with the last error. Default N = 5; override via `--failure-limit`. | + +`hermes kanban tail <id>` shows these for a single task. `hermes kanban watch` streams them board-wide. + +## Out of scope + +Kanban is deliberately single-host. `~/.hermes/kanban.db` is a local SQLite file and the dispatcher spawns workers on the same machine. Running a shared board across two hosts is not supported — there's no coordination primitive for "worker X on host A, worker Y on host B," and the crash-detection path assumes PIDs are host-local. If you need multi-host, run an independent board per host and use `delegate_task` / a message queue to bridge them. + +## Design spec + +The complete design — architecture, concurrency correctness, comparison with other systems, implementation plan, risks, open questions — lives in `docs/hermes-kanban-v1-spec.pdf`. Read that before filing any behavior-change PR. diff --git a/website/sidebars.ts b/website/sidebars.ts index 03093b50373..59219b31027 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -62,6 +62,8 @@ const sidebars: SidebarsConfig = { items: [ 'user-guide/features/cron', 'user-guide/features/delegation', + 'user-guide/features/kanban', + 'user-guide/features/kanban-tutorial', 'user-guide/features/code-execution', 'user-guide/features/hooks', 'user-guide/features/batch-processing', diff --git a/website/static/img/kanban-tutorial/01-board-overview.png b/website/static/img/kanban-tutorial/01-board-overview.png new file mode 100644 index 00000000000..aded26f09d9 Binary files /dev/null and b/website/static/img/kanban-tutorial/01-board-overview.png differ diff --git a/website/static/img/kanban-tutorial/02-board-flat.png b/website/static/img/kanban-tutorial/02-board-flat.png new file mode 100644 index 00000000000..621dc2f734e Binary files /dev/null and b/website/static/img/kanban-tutorial/02-board-flat.png differ diff --git a/website/static/img/kanban-tutorial/03-drawer-schema-task.png b/website/static/img/kanban-tutorial/03-drawer-schema-task.png new file mode 100644 index 00000000000..9c3da0f58c0 Binary files /dev/null and b/website/static/img/kanban-tutorial/03-drawer-schema-task.png differ diff --git a/website/static/img/kanban-tutorial/04b-drawer-retry-history-scrolled.png b/website/static/img/kanban-tutorial/04b-drawer-retry-history-scrolled.png new file mode 100644 index 00000000000..4b162eaab82 Binary files /dev/null and b/website/static/img/kanban-tutorial/04b-drawer-retry-history-scrolled.png differ diff --git a/website/static/img/kanban-tutorial/06-drawer-crash-recovery.png b/website/static/img/kanban-tutorial/06-drawer-crash-recovery.png new file mode 100644 index 00000000000..629c4e1c6f4 Binary files /dev/null and b/website/static/img/kanban-tutorial/06-drawer-crash-recovery.png differ diff --git a/website/static/img/kanban-tutorial/07-fleet-transcribes.png b/website/static/img/kanban-tutorial/07-fleet-transcribes.png new file mode 100644 index 00000000000..0f469612bad Binary files /dev/null and b/website/static/img/kanban-tutorial/07-fleet-transcribes.png differ diff --git a/website/static/img/kanban-tutorial/08-pipeline-auth.png b/website/static/img/kanban-tutorial/08-pipeline-auth.png new file mode 100644 index 00000000000..c7cbf4d510a Binary files /dev/null and b/website/static/img/kanban-tutorial/08-pipeline-auth.png differ diff --git a/website/static/img/kanban-tutorial/09-drawer-pipeline-review.png b/website/static/img/kanban-tutorial/09-drawer-pipeline-review.png new file mode 100644 index 00000000000..dac3ac6aeb3 Binary files /dev/null and b/website/static/img/kanban-tutorial/09-drawer-pipeline-review.png differ diff --git a/website/static/img/kanban-tutorial/10-drawer-in-flight.png b/website/static/img/kanban-tutorial/10-drawer-in-flight.png new file mode 100644 index 00000000000..467da920aad Binary files /dev/null and b/website/static/img/kanban-tutorial/10-drawer-in-flight.png differ diff --git a/website/static/img/kanban-tutorial/11-drawer-gave-up.png b/website/static/img/kanban-tutorial/11-drawer-gave-up.png new file mode 100644 index 00000000000..74d36abfa57 Binary files /dev/null and b/website/static/img/kanban-tutorial/11-drawer-gave-up.png differ