mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 17:27:37 +08:00
Salvage of PR #16100 onto current main (after emozilla's #17514 fix that unblocks plugin Pydantic body validation). History preserved on the standing `feat/kanban-standing` branch; this squashes the 22 iterative commits into one clean landing. What this lands: - SQLite kernel (hermes_cli/kanban_db.py) — durable task board with tasks, task_links, task_runs, task_comments, task_events, kanban_notify_subs tables. WAL mode, atomic claim via CAS, tenant-namespaced, skills JSON array per task, max-runtime timeouts, worker heartbeats, idempotency keys, circuit breaker on repeated spawn failures, crash detection via /proc/<pid>/status, run history preserved across attempts. - Dispatcher — runs inside the gateway by default (`kanban.dispatch_in_gateway: true`). Ticks every 60s, reclaims stale claims, promotes ready tasks, spawns `hermes -p <assignee> chat -q "work kanban task <id>"` with HERMES_KANBAN_TASK + HERMES_KANBAN_WORKSPACE env. Auto-loads `--skills kanban-worker` plus any per-task skills. Health telemetry warns on stuck ready queue. - Structured tool surface (tools/kanban_tools.py) — 7 tools (kanban_show, kanban_complete, kanban_block, kanban_heartbeat, kanban_comment, kanban_create, kanban_link). Gated on HERMES_KANBAN_TASK via check_fn so zero schema footprint in normal sessions. - System-prompt guidance (agent/prompt_builder.py KANBAN_GUIDANCE) injected only when kanban tools are active. - Dashboard plugin (plugins/kanban/dashboard/) — Linear-style board UI: triage/todo/ready/running/blocked/done columns, drag-drop, inline create, task drawer with markdown, comments, run history, dependency editor, bulk ops, lanes-by-profile grouping, WS-driven live refresh. Matches active dashboard theme via CSS variables. - CLI — `hermes kanban init|create|list|show|assign|link|unlink| claim|comment|complete|block|unblock|archive|tail|dispatch|context| init|gc|watch|stats|notify|log|heartbeat|runs|assignees` + `/kanban` slash in-session. - Worker + orchestrator skills (skills/devops/kanban-worker + kanban-orchestrator) — pattern library for good summary/metadata shapes, retry diagnostics, block-reason examples, fan-out patterns. - Per-task force-loaded skills — `--skill <name>` (repeatable), stored as JSON, threaded through to dispatcher argv as one `--skills X` pair per skill alongside the built-in kanban-worker. Dashboard + CLI + tool parity. - Deprecation of standalone `hermes kanban daemon` — stub exits 2 with migration guidance; `--force` escape hatch for headless hosts. - Docs (website/docs/user-guide/features/kanban.md + kanban-tutorial.md) with 11 dashboard screenshots walking through four user stories (Solo Dev, Fleet Farming, Role Pipeline, Circuit Breaker). - Tests (251 passing): kernel schema + migration + CAS atomicity, dispatcher logic, circuit breaker, crash detection, max-runtime timeouts, claim lifecycle, tenant isolation, idempotency keys, per- task skills round-trip + validation + dispatcher argv, tool surface (7 tools × round-trip + error paths), dashboard REST (CRUD + bulk + links + warnings), gateway-embedded dispatcher (config gate, env override, graceful shutdown), CLI deprecation stub, migration from legacy schemas. Gateway integration: - GatewayRunner._kanban_dispatcher_watcher — new asyncio background task, symmetric with _kanban_notifier_watcher. Runs dispatch_once via asyncio.to_thread so SQLite WAL never blocks the loop. Sleeps in 1s slices for snappy shutdown. Respects HERMES_KANBAN_DISPATCH_IN_GATEWAY=0 env override for debugging. - Config: new `kanban` section in DEFAULT_CONFIG with `dispatch_in_gateway: true` (default) + `dispatch_interval_seconds: 60`. Additive — no \_config_version bump needed. Forward-compat: - workflow_template_id / current_step_key columns on tasks (v1 writes NULL; v2 will use them for routing). - task_runs holds claim machinery (claim_lock, claim_expires, worker_pid, last_heartbeat_at) so multi-attempt history is first- class from day one. Closes #16102. Co-authored-by: emozilla <emozilla@nousresearch.com>
284 lines
9.8 KiB
Python
284 lines
9.8 KiB
Python
"""Randomized property testing for the Kanban kernel.
|
|
|
|
Generates 1000 random operation sequences, each 20-50 ops, on small
|
|
task graphs. After each step, checks the full invariant set:
|
|
|
|
I1. If tasks.current_run_id IS NOT NULL, the run MUST exist AND
|
|
ended_at MUST be NULL (we never point at a closed run).
|
|
I2. If a run has ended_at NULL, SOME task MUST have current_run_id
|
|
pointing at it (no orphan open runs).
|
|
I3. task.status in the valid set {triage, todo, ready, running,
|
|
blocked, done, archived}.
|
|
I4. task.claim_lock NULL iff status not in (running,).
|
|
I5. Every run has started_at <= ended_at (or ended_at is NULL).
|
|
I6. If outcome is set, ended_at must also be set.
|
|
I7. Events are strictly monotonic in (created_at, id).
|
|
I8. task_events.run_id references a task_runs.id that exists
|
|
(or is NULL).
|
|
I9. Parent completion invariant: if all parents are 'done', the
|
|
child cannot be in 'todo' status (recompute_ready should have
|
|
promoted it). This is called out in the comment on
|
|
recompute_ready; verify it holds after every random seq.
|
|
|
|
Not using hypothesis the lib; just Python random for simplicity.
|
|
"""
|
|
|
|
import os
|
|
import random
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
|
|
WT = str(Path(__file__).resolve().parents[2])
|
|
NUM_SEQUENCES = 500
|
|
OPS_PER_SEQUENCE = 100
|
|
TASK_POOL = 10
|
|
|
|
OPS = [
|
|
"create", "create_child", "claim", "complete", "block", "unblock",
|
|
"archive", "heartbeat", "release_stale", "detect_crashed",
|
|
"recompute_ready", "reassign",
|
|
]
|
|
|
|
|
|
def assert_invariants(conn, kb, ops_log):
|
|
"""Run all invariant checks; raise AssertionError with context on any."""
|
|
failures = []
|
|
|
|
# I1: current_run_id → run exists and not ended
|
|
bad_ptr = conn.execute("""
|
|
SELECT t.id, t.current_run_id, r.ended_at, r.outcome
|
|
FROM tasks t
|
|
LEFT JOIN task_runs r ON r.id = t.current_run_id
|
|
WHERE t.current_run_id IS NOT NULL
|
|
AND (r.id IS NULL OR r.ended_at IS NOT NULL)
|
|
""").fetchall()
|
|
for row in bad_ptr:
|
|
if row["ended_at"] is None and row["outcome"] is None:
|
|
detail = "missing"
|
|
else:
|
|
detail = f"closed ({row['outcome']})"
|
|
failures.append(
|
|
f"I1: task {row['id']} points at run {row['current_run_id']} "
|
|
f"which is {detail}"
|
|
)
|
|
|
|
# I2: open run → some task points at it
|
|
orphans = conn.execute("""
|
|
SELECT r.id, r.task_id
|
|
FROM task_runs r
|
|
WHERE r.ended_at IS NULL
|
|
AND NOT EXISTS (SELECT 1 FROM tasks t WHERE t.current_run_id = r.id)
|
|
""").fetchall()
|
|
for row in orphans:
|
|
failures.append(f"I2: open run {row['id']} on task {row['task_id']} has no pointer")
|
|
|
|
# I3: valid statuses
|
|
valid = {"triage", "todo", "ready", "running", "blocked", "done", "archived"}
|
|
bad_status = conn.execute("SELECT id, status FROM tasks").fetchall()
|
|
for row in bad_status:
|
|
if row["status"] not in valid:
|
|
failures.append(f"I3: task {row['id']} has invalid status {row['status']!r}")
|
|
|
|
# I4: claim_lock set only when running
|
|
bad_lock = conn.execute("""
|
|
SELECT id, status, claim_lock FROM tasks
|
|
WHERE (status != 'running' AND claim_lock IS NOT NULL)
|
|
""").fetchall()
|
|
for row in bad_lock:
|
|
failures.append(
|
|
f"I4: task {row['id']} status={row['status']} but claim_lock={row['claim_lock']!r}"
|
|
)
|
|
|
|
# I5: run started_at <= ended_at
|
|
bad_times = conn.execute("""
|
|
SELECT id, started_at, ended_at FROM task_runs
|
|
WHERE ended_at IS NOT NULL AND started_at > ended_at
|
|
""").fetchall()
|
|
for row in bad_times:
|
|
failures.append(
|
|
f"I5: run {row['id']} started_at={row['started_at']} > ended_at={row['ended_at']}"
|
|
)
|
|
|
|
# I6: outcome set → ended_at set
|
|
bad_outcome = conn.execute("""
|
|
SELECT id, outcome, ended_at FROM task_runs
|
|
WHERE outcome IS NOT NULL AND ended_at IS NULL
|
|
""").fetchall()
|
|
for row in bad_outcome:
|
|
failures.append(f"I6: run {row['id']} outcome={row['outcome']} but ended_at NULL")
|
|
|
|
# I7: events monotonic in id (always true for autoincrement)
|
|
# Skip — autoincrement guarantees it.
|
|
|
|
# I8: event.run_id references existing run
|
|
bad_ev_fk = conn.execute("""
|
|
SELECT e.id, e.run_id FROM task_events e
|
|
LEFT JOIN task_runs r ON r.id = e.run_id
|
|
WHERE e.run_id IS NOT NULL AND r.id IS NULL
|
|
""").fetchall()
|
|
for row in bad_ev_fk:
|
|
failures.append(f"I8: event {row['id']} references missing run {row['run_id']}")
|
|
|
|
# I9: if all parents done → child not in todo
|
|
# (Only applies to children with at least one parent)
|
|
orphaned_todo = conn.execute("""
|
|
SELECT c.id AS child_id,
|
|
COUNT(*) AS n_parents,
|
|
SUM(CASE WHEN p.status = 'done' THEN 1 ELSE 0 END) AS done_parents
|
|
FROM tasks c
|
|
JOIN task_links l ON l.child_id = c.id
|
|
JOIN tasks p ON p.id = l.parent_id
|
|
WHERE c.status = 'todo'
|
|
GROUP BY c.id
|
|
HAVING n_parents > 0 AND n_parents = done_parents
|
|
""").fetchall()
|
|
for row in orphaned_todo:
|
|
failures.append(
|
|
f"I9: task {row['child_id']} is todo but all {row['n_parents']} parents are done"
|
|
)
|
|
|
|
if failures:
|
|
print(f"\n!!! INVARIANT VIOLATION after {len(ops_log)} ops:")
|
|
for f in failures[:10]:
|
|
print(f" {f}")
|
|
if len(failures) > 10:
|
|
print(f" ... and {len(failures) - 10} more")
|
|
print("\nLast 10 ops:")
|
|
for op in ops_log[-10:]:
|
|
print(f" {op}")
|
|
return False
|
|
return True
|
|
|
|
|
|
def random_op(rng, conn, kb, task_pool):
|
|
op = rng.choice(OPS)
|
|
|
|
if op == "create":
|
|
tid = kb.create_task(
|
|
conn,
|
|
title=f"rand {rng.randint(0, 1000)}",
|
|
assignee=rng.choice(["w1", "w2", "w3", None]),
|
|
)
|
|
task_pool.append(tid)
|
|
return {"op": "create", "tid": tid}
|
|
|
|
if op == "create_child" and task_pool:
|
|
parent = rng.choice(task_pool)
|
|
tid = kb.create_task(
|
|
conn, title=f"child of {parent}",
|
|
assignee=rng.choice(["w1", "w2", "w3", None]),
|
|
parents=[parent],
|
|
)
|
|
task_pool.append(tid)
|
|
return {"op": "create_child", "tid": tid, "parent": parent}
|
|
|
|
if not task_pool:
|
|
return None
|
|
|
|
tid = rng.choice(task_pool)
|
|
task = kb.get_task(conn, tid)
|
|
if task is None:
|
|
task_pool.remove(tid)
|
|
return None
|
|
|
|
if op == "claim":
|
|
claimed = kb.claim_task(conn, tid, ttl_seconds=rng.choice([1, 3, 10]))
|
|
return {"op": "claim", "tid": tid, "ok": claimed is not None}
|
|
if op == "complete":
|
|
summary = rng.choice([None, f"done via op {rng.randint(0, 1000)}"])
|
|
ok = kb.complete_task(conn, tid, summary=summary)
|
|
return {"op": "complete", "tid": tid, "ok": ok}
|
|
if op == "block":
|
|
reason = rng.choice([None, "rand block"])
|
|
ok = kb.block_task(conn, tid, reason=reason)
|
|
return {"op": "block", "tid": tid, "ok": ok}
|
|
if op == "unblock":
|
|
ok = kb.unblock_task(conn, tid)
|
|
return {"op": "unblock", "tid": tid, "ok": ok}
|
|
if op == "archive":
|
|
ok = kb.archive_task(conn, tid)
|
|
if ok:
|
|
task_pool.remove(tid)
|
|
return {"op": "archive", "tid": tid, "ok": ok}
|
|
if op == "heartbeat":
|
|
ok = kb.heartbeat_worker(conn, tid)
|
|
return {"op": "heartbeat", "tid": tid, "ok": ok}
|
|
if op == "release_stale":
|
|
n = kb.release_stale_claims(conn)
|
|
return {"op": "release_stale", "n": n}
|
|
if op == "detect_crashed":
|
|
# Force-kill a fake PID first so there's something to detect
|
|
crashed = kb.detect_crashed_workers(conn)
|
|
return {"op": "detect_crashed", "n": len(crashed)}
|
|
if op == "recompute_ready":
|
|
n = kb.recompute_ready(conn)
|
|
return {"op": "recompute_ready", "promoted": n}
|
|
if op == "reassign":
|
|
# Reassignment isn't a direct API; simulate via assign_task
|
|
new_a = rng.choice(["w1", "w2", "w3", None])
|
|
try:
|
|
kb.assign_task(conn, tid, new_a)
|
|
return {"op": "reassign", "tid": tid, "to": new_a}
|
|
except Exception as e:
|
|
return {"op": "reassign", "tid": tid, "err": str(e)[:50]}
|
|
|
|
return None
|
|
|
|
|
|
def main():
|
|
total_ops = 0
|
|
total_violations = 0
|
|
|
|
for seq_idx in range(NUM_SEQUENCES):
|
|
seed = random.randint(0, 10**9)
|
|
rng = random.Random(seed)
|
|
home = tempfile.mkdtemp(prefix=f"hermes_fuzz_{seq_idx}_")
|
|
os.environ["HERMES_HOME"] = home
|
|
os.environ["HOME"] = home
|
|
sys.path.insert(0, WT)
|
|
|
|
# Fresh module state per sequence to avoid cached init paths.
|
|
for m in list(sys.modules.keys()):
|
|
if m.startswith("hermes_cli"):
|
|
del sys.modules[m]
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
kb.init_db()
|
|
conn = kb.connect()
|
|
task_pool = []
|
|
ops_log = []
|
|
|
|
try:
|
|
for i in range(OPS_PER_SEQUENCE):
|
|
result = random_op(rng, conn, kb, task_pool)
|
|
if result is None:
|
|
continue
|
|
ops_log.append(result)
|
|
total_ops += 1
|
|
if not assert_invariants(conn, kb, ops_log):
|
|
total_violations += 1
|
|
print(f" sequence {seq_idx} (seed={seed}) failed at op {i}")
|
|
break
|
|
finally:
|
|
conn.close()
|
|
|
|
if seq_idx % 10 == 0:
|
|
print(f" seq {seq_idx:3d}: {total_ops} ops so far, {total_violations} violations")
|
|
|
|
print()
|
|
print("=" * 60)
|
|
print(f"Total sequences: {NUM_SEQUENCES}")
|
|
print(f"Total operations: {total_ops}")
|
|
print(f"Invariant violations: {total_violations}")
|
|
if total_violations == 0:
|
|
print("\n✔ ALL INVARIANTS HELD ACROSS RANDOMIZED SEQUENCES")
|
|
else:
|
|
print("\n✗ INVARIANT VIOLATIONS FOUND")
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|