tests/stress/test_benchmarks.py

"""Scale benchmarks for the Kanban kernel.

Measures:
  - dispatch_once latency at 100, 1000, 10000 tasks
  - recompute_ready latency at 100, 1000, 10000 todo tasks with wide parent graphs
  - build_worker_context latency with 1, 10, 50 parent dependencies
  - board list/stats query latency
  - task_runs query latency at scale

Results printed as a table. Saved to JSON for regression-diffing in CI
or future reviews. Not a pass/fail test — records numbers so we know
when a change regresses latency by 10x and can decide whether to care.
"""

import json
import os
import random
import sys
import tempfile
import time
from pathlib import Path

WT = str(Path(__file__).resolve().parents[2])


def bench(label, fn, iterations=5):
    """Time fn over `iterations` runs, return (min, median, max) in ms."""
    times = []
    for _ in range(iterations):
        t0 = time.perf_counter()
        fn()
        times.append((time.perf_counter() - t0) * 1000)
    times.sort()
    mn = times[0]
    md = times[len(times) // 2]
    mx = times[-1]
    return {"label": label, "iter": iterations, "min_ms": mn, "median_ms": md, "max_ms": mx}


def seed_tasks(conn, kb, n, assignee="bench-worker", with_parents=False):
    """Seed n tasks. Optionally give each task 5 parents."""
    ids = []
    for i in range(n):
        if with_parents and i >= 5:
            parents = random.sample(ids[:i], 5)
        else:
            parents = ()
        tid = kb.create_task(
            conn, title=f"bench {i}", assignee=assignee,
            tenant="bench", parents=parents,
        )
        ids.append(tid)
    return ids


def main():
    home = tempfile.mkdtemp(prefix="hermes_bench_")
    os.environ["HERMES_HOME"] = home
    os.environ["HOME"] = home
    sys.path.insert(0, WT)
    from hermes_cli import kanban_db as kb

    kb.init_db()

    results = []

    # ============ dispatch_once latency ============
    for n in [100, 1000, 10000]:
        print(f"\n== dispatch_once @ {n} tasks ==")
        # Fresh DB each time so we're not measuring cumulative effects
        import shutil
        shutil.rmtree(home, ignore_errors=True)
        os.makedirs(home)
        kb._INITIALIZED_PATHS.clear()
        kb.init_db()
        conn = kb.connect()
        seed_tasks(conn, kb, n, assignee=None)  # no assignee → won't spawn
        r = bench(
            f"dispatch_once (n={n}, no spawn)",
            lambda: kb.dispatch_once(conn, spawn_fn=lambda *_: None),
            iterations=5,
        )
        print(f"  min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
        r["n"] = n
        results.append(r)
        conn.close()

    # ============ recompute_ready at scale with parent graphs ============
    for n in [100, 1000, 10000]:
        print(f"\n== recompute_ready @ {n} tasks (5 parents each) ==")
        shutil.rmtree(home, ignore_errors=True)
        os.makedirs(home)
        kb._INITIALIZED_PATHS.clear()
        kb.init_db()
        conn = kb.connect()
        ids = seed_tasks(conn, kb, n, assignee=None, with_parents=True)
        # Complete the first 100 so some todo tasks might get promoted
        for tid in ids[:min(100, n // 10)]:
            kb.complete_task(conn, tid, result="bench")
        r = bench(
            f"recompute_ready (n={n}, with parents)",
            lambda: kb.recompute_ready(conn),
            iterations=5,
        )
        print(f"  min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
        r["n"] = n
        results.append(r)
        conn.close()

    # ============ build_worker_context with N parents ============
    for parent_count in [1, 10, 50]:
        print(f"\n== build_worker_context with {parent_count} parents ==")
        shutil.rmtree(home, ignore_errors=True)
        os.makedirs(home)
        kb._INITIALIZED_PATHS.clear()
        kb.init_db()
        conn = kb.connect()
        # Create parents, complete them with summaries+metadata
        parent_ids = []
        for i in range(parent_count):
            pid = kb.create_task(conn, title=f"parent {i}", assignee="p")
            kb.claim_task(conn, pid)
            kb.complete_task(
                conn, pid,
                summary=f"parent {i} result that is longer than a single token "
                        f"so we actually measure the IO",
                metadata={"files": [f"file_{j}.py" for j in range(5)], "i": i},
            )
            parent_ids.append(pid)
        child_id = kb.create_task(
            conn, title="child", assignee="c", parents=parent_ids,
        )
        r = bench(
            f"build_worker_context (parents={parent_count})",
            lambda: kb.build_worker_context(conn, child_id),
            iterations=10,
        )
        print(f"  min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
        r["parent_count"] = parent_count
        results.append(r)
        conn.close()

    # ============ list_tasks at scale ============
    for n in [100, 1000, 10000]:
        print(f"\n== list_tasks @ {n} ==")
        shutil.rmtree(home, ignore_errors=True)
        os.makedirs(home)
        kb._INITIALIZED_PATHS.clear()
        kb.init_db()
        conn = kb.connect()
        seed_tasks(conn, kb, n)
        r = bench(
            f"list_tasks (n={n})",
            lambda: kb.list_tasks(conn),
            iterations=5,
        )
        print(f"  min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
        r["n"] = n
        results.append(r)
        conn.close()

    # ============ board_stats at scale ============
    for n in [100, 1000, 10000]:
        print(f"\n== board_stats @ {n} ==")
        shutil.rmtree(home, ignore_errors=True)
        os.makedirs(home)
        kb._INITIALIZED_PATHS.clear()
        kb.init_db()
        conn = kb.connect()
        seed_tasks(conn, kb, n)
        r = bench(
            f"board_stats (n={n})",
            lambda: kb.board_stats(conn),
            iterations=5,
        )
        print(f"  min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
        r["n"] = n
        results.append(r)
        conn.close()

    # ============ list_runs at scale ============
    for n in [100, 1000]:
        print(f"\n== list_runs for task with {n} attempts ==")
        shutil.rmtree(home, ignore_errors=True)
        os.makedirs(home)
        kb._INITIALIZED_PATHS.clear()
        kb.init_db()
        conn = kb.connect()
        tid = kb.create_task(conn, title="x", assignee="w")
        # Create N attempts via claim/release
        for i in range(n):
            kb.claim_task(conn, tid, ttl_seconds=0)
            kb.release_stale_claims(conn)
        r = bench(
            f"list_runs (runs={n})",
            lambda: kb.list_runs(conn, tid),
            iterations=10,
        )
        print(f"  min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")
        r["run_count"] = n
        results.append(r)
        conn.close()

    # ============ SUMMARY TABLE ============
    print()
    print("=" * 60)
    print("SUMMARY")
    print("=" * 60)
    print(f"{'Benchmark':<50} {'min':>8} {'median':>8} {'max':>8}")
    for r in results:
        print(f"{r['label']:<50} {r['min_ms']:>7.1f}ms {r['median_ms']:>7.1f}ms {r['max_ms']:>7.1f}ms")

    # Save for future diffing.
    out_path = "/tmp/kanban_bench_results.json"
    with open(out_path, "w") as f:
        json.dump(results, f, indent=2)
    print(f"\nResults saved to {out_path}")


if __name__ == "__main__":
    main()
feat(kanban): durable multi-profile collaboration board (#17805) Salvage of PR #16100 onto current main (after emozilla's #17514 fix that unblocks plugin Pydantic body validation). History preserved on the standing `feat/kanban-standing` branch; this squashes the 22 iterative commits into one clean landing. What this lands: - SQLite kernel (hermes_cli/kanban_db.py) — durable task board with tasks, task_links, task_runs, task_comments, task_events, kanban_notify_subs tables. WAL mode, atomic claim via CAS, tenant-namespaced, skills JSON array per task, max-runtime timeouts, worker heartbeats, idempotency keys, circuit breaker on repeated spawn failures, crash detection via /proc/<pid>/status, run history preserved across attempts. - Dispatcher — runs inside the gateway by default (`kanban.dispatch_in_gateway: true`). Ticks every 60s, reclaims stale claims, promotes ready tasks, spawns `hermes -p <assignee> chat -q "work kanban task <id>"` with HERMES_KANBAN_TASK + HERMES_KANBAN_WORKSPACE env. Auto-loads `--skills kanban-worker` plus any per-task skills. Health telemetry warns on stuck ready queue. - Structured tool surface (tools/kanban_tools.py) — 7 tools (kanban_show, kanban_complete, kanban_block, kanban_heartbeat, kanban_comment, kanban_create, kanban_link). Gated on HERMES_KANBAN_TASK via check_fn so zero schema footprint in normal sessions. - System-prompt guidance (agent/prompt_builder.py KANBAN_GUIDANCE) injected only when kanban tools are active. - Dashboard plugin (plugins/kanban/dashboard/) — Linear-style board UI: triage/todo/ready/running/blocked/done columns, drag-drop, inline create, task drawer with markdown, comments, run history, dependency editor, bulk ops, lanes-by-profile grouping, WS-driven live refresh. Matches active dashboard theme via CSS variables. - CLI — `hermes kanban init\|create\|list\|show\|assign\|link\|unlink\| claim\|comment\|complete\|block\|unblock\|archive\|tail\|dispatch\|context\| init\|gc\|watch\|stats\|notify\|log\|heartbeat\|runs\|assignees` + `/kanban` slash in-session. - Worker + orchestrator skills (skills/devops/kanban-worker + kanban-orchestrator) — pattern library for good summary/metadata shapes, retry diagnostics, block-reason examples, fan-out patterns. - Per-task force-loaded skills — `--skill <name>` (repeatable), stored as JSON, threaded through to dispatcher argv as one `--skills X` pair per skill alongside the built-in kanban-worker. Dashboard + CLI + tool parity. - Deprecation of standalone `hermes kanban daemon` — stub exits 2 with migration guidance; `--force` escape hatch for headless hosts. - Docs (website/docs/user-guide/features/kanban.md + kanban-tutorial.md) with 11 dashboard screenshots walking through four user stories (Solo Dev, Fleet Farming, Role Pipeline, Circuit Breaker). - Tests (251 passing): kernel schema + migration + CAS atomicity, dispatcher logic, circuit breaker, crash detection, max-runtime timeouts, claim lifecycle, tenant isolation, idempotency keys, per- task skills round-trip + validation + dispatcher argv, tool surface (7 tools × round-trip + error paths), dashboard REST (CRUD + bulk + links + warnings), gateway-embedded dispatcher (config gate, env override, graceful shutdown), CLI deprecation stub, migration from legacy schemas. Gateway integration: - GatewayRunner._kanban_dispatcher_watcher — new asyncio background task, symmetric with _kanban_notifier_watcher. Runs dispatch_once via asyncio.to_thread so SQLite WAL never blocks the loop. Sleeps in 1s slices for snappy shutdown. Respects HERMES_KANBAN_DISPATCH_IN_GATEWAY=0 env override for debugging. - Config: new `kanban` section in DEFAULT_CONFIG with `dispatch_in_gateway: true` (default) + `dispatch_interval_seconds: 60`. Additive — no \_config_version bump needed. Forward-compat: - workflow_template_id / current_step_key columns on tasks (v1 writes NULL; v2 will use them for routing). - task_runs holds claim machinery (claim_lock, claim_expires, worker_pid, last_heartbeat_at) so multi-attempt history is first- class from day one. Closes #16102. Co-authored-by: emozilla <emozilla@nousresearch.com> 2026-04-30 13:36:47 -07:00			`"""Scale benchmarks for the Kanban kernel.`

			`Measures:`
			`- dispatch_once latency at 100, 1000, 10000 tasks`
			`- recompute_ready latency at 100, 1000, 10000 todo tasks with wide parent graphs`
			`- build_worker_context latency with 1, 10, 50 parent dependencies`
			`- board list/stats query latency`
			`- task_runs query latency at scale`

			`Results printed as a table. Saved to JSON for regression-diffing in CI`
			`or future reviews. Not a pass/fail test — records numbers so we know`
			`when a change regresses latency by 10x and can decide whether to care.`
			`"""`

			`import json`
			`import os`
			`import random`
			`import sys`
			`import tempfile`
			`import time`
			`from pathlib import Path`

			`WT = str(Path(__file__).resolve().parents[2])`


			`def bench(label, fn, iterations=5):`
			"""Time fn over `iterations` runs, return (min, median, max) in ms."""
			`times = []`
			`for _ in range(iterations):`
			`t0 = time.perf_counter()`
			`fn()`
			`times.append((time.perf_counter() - t0) * 1000)`
			`times.sort()`
			`mn = times[0]`
			`md = times[len(times) // 2]`
			`mx = times[-1]`
			`return {"label": label, "iter": iterations, "min_ms": mn, "median_ms": md, "max_ms": mx}`


			`def seed_tasks(conn, kb, n, assignee="bench-worker", with_parents=False):`
			`"""Seed n tasks. Optionally give each task 5 parents."""`
			`ids = []`
			`for i in range(n):`
			`if with_parents and i >= 5:`
			`parents = random.sample(ids[:i], 5)`
			`else:`
			`parents = ()`
			`tid = kb.create_task(`
			`conn, title=f"bench {i}", assignee=assignee,`
			`tenant="bench", parents=parents,`
			`)`
			`ids.append(tid)`
			`return ids`


			`def main():`
			`home = tempfile.mkdtemp(prefix="hermes_bench_")`
			`os.environ["HERMES_HOME"] = home`
			`os.environ["HOME"] = home`
			`sys.path.insert(0, WT)`
			`from hermes_cli import kanban_db as kb`

			`kb.init_db()`

			`results = []`

			`# ============ dispatch_once latency ============`
			`for n in [100, 1000, 10000]:`
			`print(f"\n== dispatch_once @ {n} tasks ==")`
			`# Fresh DB each time so we're not measuring cumulative effects`
			`import shutil`
			`shutil.rmtree(home, ignore_errors=True)`
			`os.makedirs(home)`
			`kb._INITIALIZED_PATHS.clear()`
			`kb.init_db()`
			`conn = kb.connect()`
			`seed_tasks(conn, kb, n, assignee=None) # no assignee → won't spawn`
			`r = bench(`
			`f"dispatch_once (n={n}, no spawn)",`
			`lambda: kb.dispatch_once(conn, spawn_fn=lambda *_: None),`
			`iterations=5,`
			`)`
			`print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")`
			`r["n"] = n`
			`results.append(r)`
			`conn.close()`

			`# ============ recompute_ready at scale with parent graphs ============`
			`for n in [100, 1000, 10000]:`
			`print(f"\n== recompute_ready @ {n} tasks (5 parents each) ==")`
			`shutil.rmtree(home, ignore_errors=True)`
			`os.makedirs(home)`
			`kb._INITIALIZED_PATHS.clear()`
			`kb.init_db()`
			`conn = kb.connect()`
			`ids = seed_tasks(conn, kb, n, assignee=None, with_parents=True)`
			`# Complete the first 100 so some todo tasks might get promoted`
			`for tid in ids[:min(100, n // 10)]:`
			`kb.complete_task(conn, tid, result="bench")`
			`r = bench(`
			`f"recompute_ready (n={n}, with parents)",`
			`lambda: kb.recompute_ready(conn),`
			`iterations=5,`
			`)`
			`print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")`
			`r["n"] = n`
			`results.append(r)`
			`conn.close()`

			`# ============ build_worker_context with N parents ============`
			`for parent_count in [1, 10, 50]:`
			`print(f"\n== build_worker_context with {parent_count} parents ==")`
			`shutil.rmtree(home, ignore_errors=True)`
			`os.makedirs(home)`
			`kb._INITIALIZED_PATHS.clear()`
			`kb.init_db()`
			`conn = kb.connect()`
			`# Create parents, complete them with summaries+metadata`
			`parent_ids = []`
			`for i in range(parent_count):`
			`pid = kb.create_task(conn, title=f"parent {i}", assignee="p")`
			`kb.claim_task(conn, pid)`
			`kb.complete_task(`
			`conn, pid,`
			`summary=f"parent {i} result that is longer than a single token "`
			`f"so we actually measure the IO",`
			`metadata={"files": [f"file_{j}.py" for j in range(5)], "i": i},`
			`)`
			`parent_ids.append(pid)`
			`child_id = kb.create_task(`
			`conn, title="child", assignee="c", parents=parent_ids,`
			`)`
			`r = bench(`
			`f"build_worker_context (parents={parent_count})",`
			`lambda: kb.build_worker_context(conn, child_id),`
			`iterations=10,`
			`)`
			`print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")`
			`r["parent_count"] = parent_count`
			`results.append(r)`
			`conn.close()`

			`# ============ list_tasks at scale ============`
			`for n in [100, 1000, 10000]:`
			`print(f"\n== list_tasks @ {n} ==")`
			`shutil.rmtree(home, ignore_errors=True)`
			`os.makedirs(home)`
			`kb._INITIALIZED_PATHS.clear()`
			`kb.init_db()`
			`conn = kb.connect()`
			`seed_tasks(conn, kb, n)`
			`r = bench(`
			`f"list_tasks (n={n})",`
			`lambda: kb.list_tasks(conn),`
			`iterations=5,`
			`)`
			`print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")`
			`r["n"] = n`
			`results.append(r)`
			`conn.close()`

			`# ============ board_stats at scale ============`
			`for n in [100, 1000, 10000]:`
			`print(f"\n== board_stats @ {n} ==")`
			`shutil.rmtree(home, ignore_errors=True)`
			`os.makedirs(home)`
			`kb._INITIALIZED_PATHS.clear()`
			`kb.init_db()`
			`conn = kb.connect()`
			`seed_tasks(conn, kb, n)`
			`r = bench(`
			`f"board_stats (n={n})",`
			`lambda: kb.board_stats(conn),`
			`iterations=5,`
			`)`
			`print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")`
			`r["n"] = n`
			`results.append(r)`
			`conn.close()`

			`# ============ list_runs at scale ============`
			`for n in [100, 1000]:`
			`print(f"\n== list_runs for task with {n} attempts ==")`
			`shutil.rmtree(home, ignore_errors=True)`
			`os.makedirs(home)`
			`kb._INITIALIZED_PATHS.clear()`
			`kb.init_db()`
			`conn = kb.connect()`
			`tid = kb.create_task(conn, title="x", assignee="w")`
			`# Create N attempts via claim/release`
			`for i in range(n):`
			`kb.claim_task(conn, tid, ttl_seconds=0)`
			`kb.release_stale_claims(conn)`
			`r = bench(`
			`f"list_runs (runs={n})",`
			`lambda: kb.list_runs(conn, tid),`
			`iterations=10,`
			`)`
			`print(f" min={r['min_ms']:.1f} median={r['median_ms']:.1f} max={r['max_ms']:.1f} ms")`
			`r["run_count"] = n`
			`results.append(r)`
			`conn.close()`

			`# ============ SUMMARY TABLE ============`
			`print()`
			`print("=" * 60)`
			`print("SUMMARY")`
			`print("=" * 60)`
			`print(f"{'Benchmark':<50} {'min':>8} {'median':>8} {'max':>8}")`
			`for r in results:`
			`print(f"{r['label']:<50} {r['min_ms']:>7.1f}ms {r['median_ms']:>7.1f}ms {r['max_ms']:>7.1f}ms")`

			`# Save for future diffing.`
			`out_path = "/tmp/kanban_bench_results.json"`
			`with open(out_path, "w") as f:`
			`json.dump(results, f, indent=2)`
			`print(f"\nResults saved to {out_path}")`


			`if __name__ == "__main__":`
			`main()`