mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-06 18:57:21 +08:00
Workers completing a kanban task can now claim the ids of cards they created via an optional ``created_cards`` field on ``kanban_complete``. The kernel verifies each id exists and was created by the completing worker's profile; any phantom id blocks the completion with a ``HallucinatedCardsError`` and records a ``completion_blocked_hallucination`` event on the task so the rejected attempt is auditable. Successful completions also get a non-blocking prose-scan pass over their ``summary`` + ``result`` that emits a ``suspected_hallucinated_references`` event for any ``t_<hex>`` reference that doesn't resolve. Closes #20017. Recovery UX (kernel + CLI + dashboard) -------------------------------------- A structural gate alone isn't enough — operators also need to see and act on stuck workers, especially when a profile's model is the root cause. This PR ships the full loop: * ``kanban_db.reclaim_task(task_id)`` — operator-driven reclaim that releases an active worker claim immediately (unlike ``release_stale_claims`` which only acts after claim_expires has passed). Emits a ``reclaimed`` event with ``manual: True`` payload. * ``kanban_db.reassign_task(task_id, profile, reclaim_first=...)`` — switch a task to a different profile, optionally reclaiming a stuck running worker in the same call. * ``hermes kanban reclaim <id> [--reason ...]`` and ``hermes kanban reassign <id> <profile> [--reclaim] [--reason ...]`` CLI subcommands wired through to the same helpers. * ``POST /api/plugins/kanban/tasks/{id}/reclaim`` and ``POST /api/plugins/kanban/tasks/{id}/reassign`` endpoints on the dashboard plugin. Dashboard surfacing ------------------- * ⚠ **warning badge** on cards with active hallucination events. * **attention strip** at the top of the board listing all flagged tasks; dismissible per session. * **events callout** in the task drawer — hallucination events render with a red left border, amber icon, and phantom ids as styled chips. * **recovery section** in the task drawer with three actions: Reclaim, Reassign (with profile picker + reclaim-first checkbox), and a copy-to-clipboard hint for ``hermes -p <profile> model`` since profile config lives on disk and can't be edited from the browser. Auto-opens when the task has warnings, collapsed otherwise. Keyed by task id so state doesn't leak between drawers. Active-vs-stale rule: warnings clear when a clean ``completed`` or ``edited`` event supersedes the hallucination, so recovery is never permanently stigmatising — the audit events persist for debugging but the badge goes away once the worker succeeds. Skill updates ------------- * ``skills/devops/kanban-worker/SKILL.md`` documents the ``created_cards`` contract with good/bad examples. * ``skills/devops/kanban-orchestrator/SKILL.md`` gains a "Recovering stuck workers" section with the three actions and when to use each. Tests ----- * Kernel gate: verified-cards manifest, phantom rejection + audit event, cross-worker rejection, prose scan positive + negative. * Recovery helpers: reclaim on running task, reclaim on non-running returns False, reassign refuses running without reclaim_first, reassign with reclaim_first succeeds on running. * API endpoints: warnings field present on /board and /tasks/:id, warnings cleared after clean completion, reclaim 200 + 409 paths, reassign 200 + 409 + reclaim_first paths. * CLI smoke: reclaim + reassign subcommands. Live-verified end-to-end on a dashboard with seeded scenarios: attention strip renders, badges land on the right cards, drawer callout shows phantom chips, Reclaim on a running task flips status to ready + emits manual reclaimed event + refreshes the drawer, Reassign swaps the assignee and triggers board refresh. 359/359 kanban-suite tests pass (test_kanban_{db,cli,boards,core_functionality} + dashboard + tools).
289 lines
9.4 KiB
Python
289 lines
9.4 KiB
Python
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from hermes_cli import kanban as kc
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
|
|
@pytest.fixture
|
|
def kanban_home(tmp_path, monkeypatch):
|
|
home = tmp_path / ".hermes"
|
|
home.mkdir()
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
kb.init_db()
|
|
return home
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Workspace flag parsing
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.mark.parametrize(
|
|
"value,expected",
|
|
[
|
|
("scratch", ("scratch", None)),
|
|
("worktree", ("worktree", None)),
|
|
("dir:/tmp/work", ("dir", "/tmp/work")),
|
|
],
|
|
)
|
|
def test_parse_workspace_flag_valid(value, expected):
|
|
assert kc._parse_workspace_flag(value) == expected
|
|
|
|
|
|
def test_parse_workspace_flag_expands_user():
|
|
kind, path = kc._parse_workspace_flag("dir:~/vault")
|
|
assert kind == "dir"
|
|
assert path.endswith("/vault")
|
|
assert not path.startswith("~")
|
|
|
|
|
|
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
|
|
def test_parse_workspace_flag_rejects(bad):
|
|
if not bad:
|
|
# Empty -> defaults; not an error.
|
|
assert kc._parse_workspace_flag(bad) == ("scratch", None)
|
|
return
|
|
with pytest.raises(argparse.ArgumentTypeError):
|
|
kc._parse_workspace_flag(bad)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_run_slash_no_args_shows_usage(kanban_home):
|
|
out = kc.run_slash("")
|
|
assert "kanban" in out.lower()
|
|
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
|
|
|
|
|
|
def test_run_slash_create_and_list(kanban_home):
|
|
out = kc.run_slash("create 'ship feature' --assignee alice")
|
|
assert "Created" in out
|
|
out = kc.run_slash("list")
|
|
assert "ship feature" in out
|
|
assert "alice" in out
|
|
|
|
|
|
def test_run_slash_create_with_parent_and_cascade(kanban_home):
|
|
# Parent then child via --parent
|
|
out1 = kc.run_slash("create 'parent' --assignee alice")
|
|
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
|
|
import re
|
|
m = re.search(r"(t_[a-f0-9]+)", out1)
|
|
assert m
|
|
p = m.group(1)
|
|
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
|
|
assert "todo" in out2 # child starts as todo
|
|
|
|
# Complete parent; list should promote child to ready
|
|
kc.run_slash(f"complete {p}")
|
|
# Explicit filter: child should now be ready (was todo before complete).
|
|
ready_list = kc.run_slash("list --status ready")
|
|
assert "child" in ready_list
|
|
|
|
|
|
def test_run_slash_show_includes_comments(kanban_home):
|
|
out = kc.run_slash("create 'x'")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
kc.run_slash(f"comment {tid} 'source is paywalled'")
|
|
show = kc.run_slash(f"show {tid}")
|
|
assert "source is paywalled" in show
|
|
|
|
|
|
def test_run_slash_block_unblock_cycle(kanban_home):
|
|
out = kc.run_slash("create 'x' --assignee alice")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
# Claim first so block() finds it running
|
|
kc.run_slash(f"claim {tid}")
|
|
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
|
|
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
|
|
|
|
|
|
def test_run_slash_json_output(kanban_home):
|
|
out = kc.run_slash("create 'jsontask' --assignee alice --json")
|
|
payload = json.loads(out)
|
|
assert payload["title"] == "jsontask"
|
|
assert payload["assignee"] == "alice"
|
|
assert payload["status"] == "ready"
|
|
|
|
|
|
def test_run_slash_dispatch_dry_run_counts(kanban_home):
|
|
kc.run_slash("create 'a' --assignee alice")
|
|
kc.run_slash("create 'b' --assignee bob")
|
|
out = kc.run_slash("dispatch --dry-run")
|
|
assert "Spawned:" in out
|
|
|
|
|
|
def test_run_slash_context_output_format(kanban_home):
|
|
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
kc.run_slash(f"comment {tid} 'remember to include performance section'")
|
|
ctx = kc.run_slash(f"context {tid}")
|
|
assert "tech spec" in ctx
|
|
assert "write an RFC" in ctx
|
|
assert "performance section" in ctx
|
|
|
|
|
|
def test_run_slash_tenant_filter(kanban_home):
|
|
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
|
|
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
|
|
a = kc.run_slash("list --tenant biz-a")
|
|
b = kc.run_slash("list --tenant biz-b")
|
|
assert "biz-a task" in a and "biz-b task" not in a
|
|
assert "biz-b task" in b and "biz-a task" not in b
|
|
|
|
|
|
def test_run_slash_usage_error_returns_message(kanban_home):
|
|
# Missing required argument for create
|
|
out = kc.run_slash("create")
|
|
assert "usage" in out.lower() or "error" in out.lower()
|
|
|
|
|
|
def test_run_slash_assign_reassigns(kanban_home):
|
|
out = kc.run_slash("create 'x' --assignee alice")
|
|
import re
|
|
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
|
|
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
|
|
show = kc.run_slash(f"show {tid}")
|
|
assert "bob" in show
|
|
|
|
|
|
def test_run_slash_link_unlink(kanban_home):
|
|
a = kc.run_slash("create 'a'")
|
|
b = kc.run_slash("create 'b'")
|
|
import re
|
|
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
|
|
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
|
|
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
|
|
# After link, b is todo
|
|
show = kc.run_slash(f"show {tb}")
|
|
assert "todo" in show
|
|
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Integration with the COMMAND_REGISTRY
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_kanban_is_resolvable():
|
|
from hermes_cli.commands import resolve_command
|
|
|
|
cmd = resolve_command("kanban")
|
|
assert cmd is not None
|
|
assert cmd.name == "kanban"
|
|
|
|
|
|
def test_kanban_bypasses_active_session_guard():
|
|
from hermes_cli.commands import should_bypass_active_session
|
|
|
|
assert should_bypass_active_session("kanban")
|
|
|
|
|
|
def test_kanban_in_autocomplete_table():
|
|
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
|
|
|
|
assert "/kanban" in COMMANDS
|
|
subs = SUBCOMMANDS.get("/kanban") or []
|
|
assert "create" in subs
|
|
assert "dispatch" in subs
|
|
|
|
|
|
def test_kanban_not_gateway_only():
|
|
# kanban is available in BOTH CLI and gateway surfaces.
|
|
from hermes_cli.commands import COMMAND_REGISTRY
|
|
|
|
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
|
|
assert not cmd.cli_only
|
|
assert not cmd.gateway_only
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# reclaim + reassign CLI smoke tests
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def test_run_slash_reclaim_running_task(kanban_home):
|
|
import re
|
|
import time
|
|
import secrets
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
out1 = kc.run_slash("create 'stuck worker task' --assignee broken-model")
|
|
m = re.search(r"(t_[a-f0-9]+)", out1)
|
|
assert m
|
|
tid = m.group(1)
|
|
|
|
# Simulate a running claim outside TTL.
|
|
conn = kb.connect()
|
|
try:
|
|
lock = secrets.token_hex(4)
|
|
conn.execute(
|
|
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
|
"worker_pid=? WHERE id=?",
|
|
(lock, int(time.time()) + 3600, 4242, tid),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
|
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
|
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
|
|
)
|
|
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
out = kc.run_slash(f"reclaim {tid} --reason 'test'")
|
|
assert "Reclaimed" in out, out
|
|
# Status back to ready.
|
|
out2 = kc.run_slash(f"show {tid}")
|
|
assert "ready" in out2.lower()
|
|
|
|
|
|
def test_run_slash_reassign_with_reclaim_flag(kanban_home):
|
|
import re
|
|
import time
|
|
import secrets
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
out1 = kc.run_slash("create 'switch model' --assignee orig")
|
|
m = re.search(r"(t_[a-f0-9]+)", out1)
|
|
tid = m.group(1)
|
|
|
|
# Simulate a running claim.
|
|
conn = kb.connect()
|
|
try:
|
|
lock = secrets.token_hex(4)
|
|
conn.execute(
|
|
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
|
|
"worker_pid=? WHERE id=?",
|
|
(lock, int(time.time()) + 3600, 4242, tid),
|
|
)
|
|
conn.execute(
|
|
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
|
|
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
|
|
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
|
|
)
|
|
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
|
|
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
|
|
conn.commit()
|
|
finally:
|
|
conn.close()
|
|
|
|
out = kc.run_slash(f"reassign {tid} newbie --reclaim --reason 'switch'")
|
|
assert "Reassigned" in out, out
|
|
out2 = kc.run_slash(f"show {tid}")
|
|
assert "newbie" in out2
|