mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 17:27:37 +08:00
Salvage of PR #16100 onto current main (after emozilla's #17514 fix that unblocks plugin Pydantic body validation). History preserved on the standing `feat/kanban-standing` branch; this squashes the 22 iterative commits into one clean landing. What this lands: - SQLite kernel (hermes_cli/kanban_db.py) — durable task board with tasks, task_links, task_runs, task_comments, task_events, kanban_notify_subs tables. WAL mode, atomic claim via CAS, tenant-namespaced, skills JSON array per task, max-runtime timeouts, worker heartbeats, idempotency keys, circuit breaker on repeated spawn failures, crash detection via /proc/<pid>/status, run history preserved across attempts. - Dispatcher — runs inside the gateway by default (`kanban.dispatch_in_gateway: true`). Ticks every 60s, reclaims stale claims, promotes ready tasks, spawns `hermes -p <assignee> chat -q "work kanban task <id>"` with HERMES_KANBAN_TASK + HERMES_KANBAN_WORKSPACE env. Auto-loads `--skills kanban-worker` plus any per-task skills. Health telemetry warns on stuck ready queue. - Structured tool surface (tools/kanban_tools.py) — 7 tools (kanban_show, kanban_complete, kanban_block, kanban_heartbeat, kanban_comment, kanban_create, kanban_link). Gated on HERMES_KANBAN_TASK via check_fn so zero schema footprint in normal sessions. - System-prompt guidance (agent/prompt_builder.py KANBAN_GUIDANCE) injected only when kanban tools are active. - Dashboard plugin (plugins/kanban/dashboard/) — Linear-style board UI: triage/todo/ready/running/blocked/done columns, drag-drop, inline create, task drawer with markdown, comments, run history, dependency editor, bulk ops, lanes-by-profile grouping, WS-driven live refresh. Matches active dashboard theme via CSS variables. - CLI — `hermes kanban init|create|list|show|assign|link|unlink| claim|comment|complete|block|unblock|archive|tail|dispatch|context| init|gc|watch|stats|notify|log|heartbeat|runs|assignees` + `/kanban` slash in-session. - Worker + orchestrator skills (skills/devops/kanban-worker + kanban-orchestrator) — pattern library for good summary/metadata shapes, retry diagnostics, block-reason examples, fan-out patterns. - Per-task force-loaded skills — `--skill <name>` (repeatable), stored as JSON, threaded through to dispatcher argv as one `--skills X` pair per skill alongside the built-in kanban-worker. Dashboard + CLI + tool parity. - Deprecation of standalone `hermes kanban daemon` — stub exits 2 with migration guidance; `--force` escape hatch for headless hosts. - Docs (website/docs/user-guide/features/kanban.md + kanban-tutorial.md) with 11 dashboard screenshots walking through four user stories (Solo Dev, Fleet Farming, Role Pipeline, Circuit Breaker). - Tests (251 passing): kernel schema + migration + CAS atomicity, dispatcher logic, circuit breaker, crash detection, max-runtime timeouts, claim lifecycle, tenant isolation, idempotency keys, per- task skills round-trip + validation + dispatcher argv, tool surface (7 tools × round-trip + error paths), dashboard REST (CRUD + bulk + links + warnings), gateway-embedded dispatcher (config gate, env override, graceful shutdown), CLI deprecation stub, migration from legacy schemas. Gateway integration: - GatewayRunner._kanban_dispatcher_watcher — new asyncio background task, symmetric with _kanban_notifier_watcher. Runs dispatch_once via asyncio.to_thread so SQLite WAL never blocks the loop. Sleeps in 1s slices for snappy shutdown. Respects HERMES_KANBAN_DISPATCH_IN_GATEWAY=0 env override for debugging. - Config: new `kanban` section in DEFAULT_CONFIG with `dispatch_in_gateway: true` (default) + `dispatch_interval_seconds: 60`. Additive — no \_config_version bump needed. Forward-compat: - workflow_template_id / current_step_key columns on tasks (v1 writes NULL; v2 will use them for routing). - task_runs holds claim machinery (claim_lock, claim_expires, worker_pid, last_heartbeat_at) so multi-attempt history is first- class from day one. Closes #16102. Co-authored-by: emozilla <emozilla@nousresearch.com>
222 lines
7.4 KiB
Python
222 lines
7.4 KiB
Python
"""Scale benchmarks for the Kanban kernel.
|
|
|
|
Measures:
|
|
- dispatch_once latency at 100, 1000, 10000 tasks
|
|
- recompute_ready latency at 100, 1000, 10000 todo tasks with wide parent graphs
|
|
- build_worker_context latency with 1, 10, 50 parent dependencies
|
|
- board list/stats query latency
|
|
- task_runs query latency at scale
|
|
|
|
Results printed as a table. Saved to JSON for regression-diffing in CI
|
|
or future reviews. Not a pass/fail test — records numbers so we know
|
|
when a change regresses latency by 10x and can decide whether to care.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import random
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
|
|
WT = str(Path(__file__).resolve().parents[2])
|
|
|
|
|
|
def bench(label, fn, iterations=5):
|
|
"""Time fn over `iterations` runs, return (min, median, max) in ms."""
|
|
times = []
|
|
for _ in range(iterations):
|
|
t0 = time.perf_counter()
|
|
fn()
|
|
times.append((time.perf_counter() - t0) * 1000)
|
|
times.sort()
|
|
mn = times[0]
|
|
md = times[len(times) // 2]
|
|
mx = times[-1]
|
|
return {"label": label, "iter": iterations, "min_ms": mn, "median_ms": md, "max_ms": mx}
|
|
|
|
|
|
def seed_tasks(conn, kb, n, assignee="bench-worker", with_parents=False):
|
|
"""Seed n tasks. Optionally give each task 5 parents."""
|
|
ids = []
|
|
for i in range(n):
|
|
if with_parents and i >= 5:
|
|
parents = random.sample(ids[:i], 5)
|
|
else:
|
|
parents = ()
|
|
tid = kb.create_task(
|
|
conn, title=f"bench {i}", assignee=assignee,
|
|
tenant="bench", parents=parents,
|
|
)
|
|
ids.append(tid)
|
|
return ids
|
|
|
|
|
|
def main():
|
|
home = tempfile.mkdtemp(prefix="hermes_bench_")
|
|
os.environ["HERMES_HOME"] = home
|
|
os.environ["HOME"] = home
|
|
sys.path.insert(0, WT)
|
|
from hermes_cli import kanban_db as kb
|
|
|
|
kb.init_db()
|
|
|
|
results = []
|
|
|
|
# ============ dispatch_once latency ============
|
|
for n in [100, 1000, 10000]:
|
|
print(f"\n== dispatch_once @ {n} tasks ==")
|
|
# Fresh DB each time so we're not measuring cumulative effects
|
|
import shutil
|
|
shutil.rmtree(home, ignore_errors=True)
|
|
os.makedirs(home)
|
|
kb._INITIALIZED_PATHS.clear()
|
|
kb.init_db()
|
|
conn = kb.connect()
|
|
seed_tasks(conn, kb, n, assignee=None) # no assignee → won't spawn
|
|
r = bench(
|
|
f"dispatch_once (n={n}, no spawn)",
|
|
lambda: kb.dispatch_once(conn, spawn_fn=lambda *_: None),
|
|
iterations=5,
|
|
)
|
|
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
|
r["n"] = n
|
|
results.append(r)
|
|
conn.close()
|
|
|
|
# ============ recompute_ready at scale with parent graphs ============
|
|
for n in [100, 1000, 10000]:
|
|
print(f"\n== recompute_ready @ {n} tasks (5 parents each) ==")
|
|
shutil.rmtree(home, ignore_errors=True)
|
|
os.makedirs(home)
|
|
kb._INITIALIZED_PATHS.clear()
|
|
kb.init_db()
|
|
conn = kb.connect()
|
|
ids = seed_tasks(conn, kb, n, assignee=None, with_parents=True)
|
|
# Complete the first 100 so some todo tasks might get promoted
|
|
for tid in ids[:min(100, n // 10)]:
|
|
kb.complete_task(conn, tid, result="bench")
|
|
r = bench(
|
|
f"recompute_ready (n={n}, with parents)",
|
|
lambda: kb.recompute_ready(conn),
|
|
iterations=5,
|
|
)
|
|
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
|
r["n"] = n
|
|
results.append(r)
|
|
conn.close()
|
|
|
|
# ============ build_worker_context with N parents ============
|
|
for parent_count in [1, 10, 50]:
|
|
print(f"\n== build_worker_context with {parent_count} parents ==")
|
|
shutil.rmtree(home, ignore_errors=True)
|
|
os.makedirs(home)
|
|
kb._INITIALIZED_PATHS.clear()
|
|
kb.init_db()
|
|
conn = kb.connect()
|
|
# Create parents, complete them with summaries+metadata
|
|
parent_ids = []
|
|
for i in range(parent_count):
|
|
pid = kb.create_task(conn, title=f"parent {i}", assignee="p")
|
|
kb.claim_task(conn, pid)
|
|
kb.complete_task(
|
|
conn, pid,
|
|
summary=f"parent {i} result that is longer than a single token "
|
|
f"so we actually measure the IO",
|
|
metadata={"files": [f"file_{j}.py" for j in range(5)], "i": i},
|
|
)
|
|
parent_ids.append(pid)
|
|
child_id = kb.create_task(
|
|
conn, title="child", assignee="c", parents=parent_ids,
|
|
)
|
|
r = bench(
|
|
f"build_worker_context (parents={parent_count})",
|
|
lambda: kb.build_worker_context(conn, child_id),
|
|
iterations=10,
|
|
)
|
|
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
|
r["parent_count"] = parent_count
|
|
results.append(r)
|
|
conn.close()
|
|
|
|
# ============ list_tasks at scale ============
|
|
for n in [100, 1000, 10000]:
|
|
print(f"\n== list_tasks @ {n} ==")
|
|
shutil.rmtree(home, ignore_errors=True)
|
|
os.makedirs(home)
|
|
kb._INITIALIZED_PATHS.clear()
|
|
kb.init_db()
|
|
conn = kb.connect()
|
|
seed_tasks(conn, kb, n)
|
|
r = bench(
|
|
f"list_tasks (n={n})",
|
|
lambda: kb.list_tasks(conn),
|
|
iterations=5,
|
|
)
|
|
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
|
r["n"] = n
|
|
results.append(r)
|
|
conn.close()
|
|
|
|
# ============ board_stats at scale ============
|
|
for n in [100, 1000, 10000]:
|
|
print(f"\n== board_stats @ {n} ==")
|
|
shutil.rmtree(home, ignore_errors=True)
|
|
os.makedirs(home)
|
|
kb._INITIALIZED_PATHS.clear()
|
|
kb.init_db()
|
|
conn = kb.connect()
|
|
seed_tasks(conn, kb, n)
|
|
r = bench(
|
|
f"board_stats (n={n})",
|
|
lambda: kb.board_stats(conn),
|
|
iterations=5,
|
|
)
|
|
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
|
r["n"] = n
|
|
results.append(r)
|
|
conn.close()
|
|
|
|
# ============ list_runs at scale ============
|
|
for n in [100, 1000]:
|
|
print(f"\n== list_runs for task with {n} attempts ==")
|
|
shutil.rmtree(home, ignore_errors=True)
|
|
os.makedirs(home)
|
|
kb._INITIALIZED_PATHS.clear()
|
|
kb.init_db()
|
|
conn = kb.connect()
|
|
tid = kb.create_task(conn, title="x", assignee="w")
|
|
# Create N attempts via claim/release
|
|
for i in range(n):
|
|
kb.claim_task(conn, tid, ttl_seconds=0)
|
|
kb.release_stale_claims(conn)
|
|
r = bench(
|
|
f"list_runs (runs={n})",
|
|
lambda: kb.list_runs(conn, tid),
|
|
iterations=10,
|
|
)
|
|
print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
|
|
r["run_count"] = n
|
|
results.append(r)
|
|
conn.close()
|
|
|
|
# ============ SUMMARY TABLE ============
|
|
print()
|
|
print("=" * 60)
|
|
print("SUMMARY")
|
|
print("=" * 60)
|
|
print(f"{'Benchmark':<50} {'min':>8} {'median':>8} {'max':>8}")
|
|
for r in results:
|
|
print(f"{r['label']:<50} {r['min_ms']:>7.1f}ms {r['median_ms']:>7.1f}ms {r['max_ms']:>7.1f}ms")
|
|
|
|
# Save for future diffing.
|
|
out_path = "/tmp/kanban_bench_results.json"
|
|
with open(out_path, "w") as f:
|
|
json.dump(results, f, indent=2)
|
|
print(f"\nResults saved to {out_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|