Files
hermes-agent/tests/hermes_cli/test_kanban_cli.py
Teknium de9238d37e feat(kanban): hallucination gate + recovery UX for worker-created-card claims (#20232)
Workers completing a kanban task can now claim the ids of cards they
created via an optional ``created_cards`` field on ``kanban_complete``.
The kernel verifies each id exists and was created by the completing
worker's profile; any phantom id blocks the completion with a
``HallucinatedCardsError`` and records a
``completion_blocked_hallucination`` event on the task so the rejected
attempt is auditable. Successful completions also get a non-blocking
prose-scan pass over their ``summary`` + ``result`` that emits a
``suspected_hallucinated_references`` event for any ``t_<hex>``
reference that doesn't resolve.

Closes #20017.

Recovery UX (kernel + CLI + dashboard)
--------------------------------------

A structural gate alone isn't enough — operators also need to see and
act on stuck workers, especially when a profile's model is the root
cause. This PR ships the full loop:

* ``kanban_db.reclaim_task(task_id)`` — operator-driven reclaim that
  releases an active worker claim immediately (unlike
  ``release_stale_claims`` which only acts after claim_expires has
  passed). Emits a ``reclaimed`` event with ``manual: True`` payload.
* ``kanban_db.reassign_task(task_id, profile, reclaim_first=...)`` —
  switch a task to a different profile, optionally reclaiming a stuck
  running worker in the same call.
* ``hermes kanban reclaim <id> [--reason ...]`` and
  ``hermes kanban reassign <id> <profile> [--reclaim] [--reason ...]``
  CLI subcommands wired through to the same helpers.
* ``POST /api/plugins/kanban/tasks/{id}/reclaim`` and
  ``POST /api/plugins/kanban/tasks/{id}/reassign`` endpoints on the
  dashboard plugin.

Dashboard surfacing
-------------------

* ⚠ **warning badge** on cards with active hallucination events.
* **attention strip** at the top of the board listing all flagged
  tasks; dismissible per session.
* **events callout** in the task drawer — hallucination events render
  with a red left border, amber icon, and phantom ids as styled chips.
* **recovery section** in the task drawer with three actions: Reclaim,
  Reassign (with profile picker + reclaim-first checkbox), and a
  copy-to-clipboard hint for ``hermes -p <profile> model`` since
  profile config lives on disk and can't be edited from the browser.
  Auto-opens when the task has warnings, collapsed otherwise.
  Keyed by task id so state doesn't leak between drawers.

Active-vs-stale rule: warnings clear when a clean ``completed`` or
``edited`` event supersedes the hallucination, so recovery is never
permanently stigmatising — the audit events persist for debugging but
the badge goes away once the worker succeeds.

Skill updates
-------------

* ``skills/devops/kanban-worker/SKILL.md`` documents the
  ``created_cards`` contract with good/bad examples.
* ``skills/devops/kanban-orchestrator/SKILL.md`` gains a "Recovering
  stuck workers" section with the three actions and when to use each.

Tests
-----

* Kernel gate: verified-cards manifest, phantom rejection + audit
  event, cross-worker rejection, prose scan positive + negative.
* Recovery helpers: reclaim on running task, reclaim on non-running
  returns False, reassign refuses running without reclaim_first,
  reassign with reclaim_first succeeds on running.
* API endpoints: warnings field present on /board and /tasks/:id,
  warnings cleared after clean completion, reclaim 200 + 409 paths,
  reassign 200 + 409 + reclaim_first paths.
* CLI smoke: reclaim + reassign subcommands.

Live-verified end-to-end on a dashboard with seeded scenarios:
attention strip renders, badges land on the right cards, drawer
callout shows phantom chips, Reclaim on a running task flips status to
ready + emits manual reclaimed event + refreshes the drawer,
Reassign swaps the assignee and triggers board refresh.

359/359 kanban-suite tests pass
(test_kanban_{db,cli,boards,core_functionality} + dashboard + tools).
2026-05-05 08:06:55 -07:00

289 lines
9.4 KiB
Python

"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
from __future__ import annotations
import argparse
import json
import os
from pathlib import Path
import pytest
from hermes_cli import kanban as kc
from hermes_cli import kanban_db as kb
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
# ---------------------------------------------------------------------------
# Workspace flag parsing
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"value,expected",
[
("scratch", ("scratch", None)),
("worktree", ("worktree", None)),
("dir:/tmp/work", ("dir", "/tmp/work")),
],
)
def test_parse_workspace_flag_valid(value, expected):
assert kc._parse_workspace_flag(value) == expected
def test_parse_workspace_flag_expands_user():
kind, path = kc._parse_workspace_flag("dir:~/vault")
assert kind == "dir"
assert path.endswith("/vault")
assert not path.startswith("~")
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
def test_parse_workspace_flag_rejects(bad):
if not bad:
# Empty -> defaults; not an error.
assert kc._parse_workspace_flag(bad) == ("scratch", None)
return
with pytest.raises(argparse.ArgumentTypeError):
kc._parse_workspace_flag(bad)
# ---------------------------------------------------------------------------
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
# ---------------------------------------------------------------------------
def test_run_slash_no_args_shows_usage(kanban_home):
out = kc.run_slash("")
assert "kanban" in out.lower()
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
def test_run_slash_create_and_list(kanban_home):
out = kc.run_slash("create 'ship feature' --assignee alice")
assert "Created" in out
out = kc.run_slash("list")
assert "ship feature" in out
assert "alice" in out
def test_run_slash_create_with_parent_and_cascade(kanban_home):
# Parent then child via --parent
out1 = kc.run_slash("create 'parent' --assignee alice")
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
import re
m = re.search(r"(t_[a-f0-9]+)", out1)
assert m
p = m.group(1)
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
assert "todo" in out2 # child starts as todo
# Complete parent; list should promote child to ready
kc.run_slash(f"complete {p}")
# Explicit filter: child should now be ready (was todo before complete).
ready_list = kc.run_slash("list --status ready")
assert "child" in ready_list
def test_run_slash_show_includes_comments(kanban_home):
out = kc.run_slash("create 'x'")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
kc.run_slash(f"comment {tid} 'source is paywalled'")
show = kc.run_slash(f"show {tid}")
assert "source is paywalled" in show
def test_run_slash_block_unblock_cycle(kanban_home):
out = kc.run_slash("create 'x' --assignee alice")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
# Claim first so block() finds it running
kc.run_slash(f"claim {tid}")
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
def test_run_slash_json_output(kanban_home):
out = kc.run_slash("create 'jsontask' --assignee alice --json")
payload = json.loads(out)
assert payload["title"] == "jsontask"
assert payload["assignee"] == "alice"
assert payload["status"] == "ready"
def test_run_slash_dispatch_dry_run_counts(kanban_home):
kc.run_slash("create 'a' --assignee alice")
kc.run_slash("create 'b' --assignee bob")
out = kc.run_slash("dispatch --dry-run")
assert "Spawned:" in out
def test_run_slash_context_output_format(kanban_home):
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
kc.run_slash(f"comment {tid} 'remember to include performance section'")
ctx = kc.run_slash(f"context {tid}")
assert "tech spec" in ctx
assert "write an RFC" in ctx
assert "performance section" in ctx
def test_run_slash_tenant_filter(kanban_home):
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
a = kc.run_slash("list --tenant biz-a")
b = kc.run_slash("list --tenant biz-b")
assert "biz-a task" in a and "biz-b task" not in a
assert "biz-b task" in b and "biz-a task" not in b
def test_run_slash_usage_error_returns_message(kanban_home):
# Missing required argument for create
out = kc.run_slash("create")
assert "usage" in out.lower() or "error" in out.lower()
def test_run_slash_assign_reassigns(kanban_home):
out = kc.run_slash("create 'x' --assignee alice")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
show = kc.run_slash(f"show {tid}")
assert "bob" in show
def test_run_slash_link_unlink(kanban_home):
a = kc.run_slash("create 'a'")
b = kc.run_slash("create 'b'")
import re
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
# After link, b is todo
show = kc.run_slash(f"show {tb}")
assert "todo" in show
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
# ---------------------------------------------------------------------------
# Integration with the COMMAND_REGISTRY
# ---------------------------------------------------------------------------
def test_kanban_is_resolvable():
from hermes_cli.commands import resolve_command
cmd = resolve_command("kanban")
assert cmd is not None
assert cmd.name == "kanban"
def test_kanban_bypasses_active_session_guard():
from hermes_cli.commands import should_bypass_active_session
assert should_bypass_active_session("kanban")
def test_kanban_in_autocomplete_table():
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
assert "/kanban" in COMMANDS
subs = SUBCOMMANDS.get("/kanban") or []
assert "create" in subs
assert "dispatch" in subs
def test_kanban_not_gateway_only():
# kanban is available in BOTH CLI and gateway surfaces.
from hermes_cli.commands import COMMAND_REGISTRY
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
assert not cmd.cli_only
assert not cmd.gateway_only
# ---------------------------------------------------------------------------
# reclaim + reassign CLI smoke tests
# ---------------------------------------------------------------------------
def test_run_slash_reclaim_running_task(kanban_home):
import re
import time
import secrets
from hermes_cli import kanban_db as kb
out1 = kc.run_slash("create 'stuck worker task' --assignee broken-model")
m = re.search(r"(t_[a-f0-9]+)", out1)
assert m
tid = m.group(1)
# Simulate a running claim outside TTL.
conn = kb.connect()
try:
lock = secrets.token_hex(4)
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, int(time.time()) + 3600, 4242, tid),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
)
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
conn.commit()
finally:
conn.close()
out = kc.run_slash(f"reclaim {tid} --reason 'test'")
assert "Reclaimed" in out, out
# Status back to ready.
out2 = kc.run_slash(f"show {tid}")
assert "ready" in out2.lower()
def test_run_slash_reassign_with_reclaim_flag(kanban_home):
import re
import time
import secrets
from hermes_cli import kanban_db as kb
out1 = kc.run_slash("create 'switch model' --assignee orig")
m = re.search(r"(t_[a-f0-9]+)", out1)
tid = m.group(1)
# Simulate a running claim.
conn = kb.connect()
try:
lock = secrets.token_hex(4)
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, int(time.time()) + 3600, 4242, tid),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
)
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
conn.commit()
finally:
conn.close()
out = kc.run_slash(f"reassign {tid} newbie --reclaim --reason 'switch'")
assert "Reassigned" in out, out
out2 = kc.run_slash(f"show {tid}")
assert "newbie" in out2