diff --git a/scripts/profile-tui.py b/scripts/profile-tui.py new file mode 100755 index 0000000000..5ef987c652 --- /dev/null +++ b/scripts/profile-tui.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python3 +"""Drive the Hermes TUI under HERMES_DEV_PERF and summarize the pipeline. + +Usage: + scripts/profile-tui.py [--session SID] [--hold KEY] [--seconds N] [--rate HZ] + +Defaults: picks the session with the most messages, holds PageUp for 8s at +~30 Hz (matching xterm key-repeat), summarizes ~/.hermes/perf.log on exit. + +The --tui build must exist (run `npm run build` in ui-tui first). This script +launches `node dist/entry.js` directly with HERMES_TUI_RESUME set so it +bypasses the hermes_cli wrapper — we want repeatable timing, not the CLI's +session-picker flow. + +Environment overrides: + HERMES_PERF_LOG (default ~/.hermes/perf.log) + HERMES_PERF_NODE (default node from $PATH) + HERMES_TUI_DIR (default /home/bb/hermes-agent/ui-tui) + +Exit code is 0 if the harness ran and parsed results, 2 if the TUI crashed +or produced no perf data (suggests HERMES_DEV_PERF wiring is broken). +""" + +from __future__ import annotations + +import argparse +import json +import os +import pty +import select +import signal +import sqlite3 +import statistics +import sys +import time +from pathlib import Path +from typing import Any + + +DEFAULT_TUI_DIR = Path(os.environ.get("HERMES_TUI_DIR", "/home/bb/hermes-agent/ui-tui")) +DEFAULT_LOG = Path(os.environ.get("HERMES_PERF_LOG", str(Path.home() / ".hermes" / "perf.log"))) +DEFAULT_STATE_DB = Path.home() / ".hermes" / "state.db" + +# Keystroke escape sequences. Matches what xterm/VT220 send when the +# terminal has bracketed-paste disabled and the key-repeat handler fires. +KEYS = { + "page_up": b"\x1b[5~", + "page_down": b"\x1b[6~", + "wheel_up": b"\x1b[M`!!", # mouse wheel up (SGR-less) — best-effort + "shift_up": b"\x1b[1;2A", + "shift_down": b"\x1b[1;2B", +} + + +def pick_longest_session(db: Path) -> str: + conn = sqlite3.connect(db) + row = conn.execute( + "SELECT id FROM sessions s ORDER BY " + "(SELECT COUNT(*) FROM messages m WHERE m.session_id = s.id) DESC LIMIT 1" + ).fetchone() + if not row: + sys.exit(f"no sessions in {db}") + return row[0] + + +def drain(fd: int, timeout: float) -> bytes: + """Read whatever's available from fd within `timeout`, then return.""" + chunks = [] + end = time.monotonic() + timeout + while time.monotonic() < end: + r, _, _ = select.select([fd], [], [], max(0.0, end - time.monotonic())) + if not r: + break + try: + data = os.read(fd, 4096) + except OSError: + break + if not data: + break + chunks.append(data) + return b"".join(chunks) + + +def hold_key(fd: int, seq: bytes, seconds: float, rate_hz: int) -> int: + """Write `seq` to fd at ~rate_hz for `seconds`. Returns keystrokes sent.""" + interval = 1.0 / max(1, rate_hz) + end = time.monotonic() + seconds + sent = 0 + while time.monotonic() < end: + try: + os.write(fd, seq) + sent += 1 + except OSError: + break + # Drain stdout to keep the PTY buffer flowing; ignore content. + drain(fd, 0) + time.sleep(interval) + return sent + + +def summarize(log: Path, since_ts_ms: int) -> dict[str, Any]: + """Parse perf.log, keep only events newer than since_ts_ms, return stats.""" + react_events: list[dict[str, Any]] = [] + frame_events: list[dict[str, Any]] = [] + if not log.exists(): + return {"error": f"no log at {log}", "react": [], "frame": []} + for line in log.read_text().splitlines(): + line = line.strip() + if not line: + continue + try: + row = json.loads(line) + except json.JSONDecodeError: + continue + if int(row.get("ts", 0)) < since_ts_ms: + continue + src = row.get("src") + if src == "react": + react_events.append(row) + elif src == "frame": + frame_events.append(row) + + return { + "react": react_events, + "frame": frame_events, + } + + +def pct(values: list[float], p: float) -> float: + if not values: + return 0.0 + s = sorted(values) + idx = min(len(s) - 1, int(len(s) * p)) + return s[idx] + + +def format_report(data: dict[str, Any]) -> str: + react = data.get("react") or [] + frames = data.get("frame") or [] + out = [] + + out.append("═══ React Profiler ═══") + if not react: + out.append(" (no react events — HERMES_DEV_PERF wired? threshold too high?)") + else: + by_id: dict[str, list[float]] = {} + for r in react: + by_id.setdefault(r["id"], []).append(r["actualMs"]) + out.append(f" {'pane':<14} {'count':>6} {'p50':>8} {'p95':>8} {'p99':>8} {'max':>8}") + for pid, ms in sorted(by_id.items(), key=lambda kv: -pct(kv[1], 0.99)): + out.append( + f" {pid:<14} {len(ms):>6} {pct(ms,0.50):>8.2f} {pct(ms,0.95):>8.2f} " + f"{pct(ms,0.99):>8.2f} {max(ms):>8.2f}" + ) + + out.append("") + out.append("═══ Ink pipeline ═══") + if not frames: + out.append(" (no frame events — onFrame wiring broken?)") + else: + dur = [f["durationMs"] for f in frames] + phases_present = any(f.get("phases") for f in frames) + out.append(f" frames captured: {len(frames)}") + out.append( + f" durationMs p50={pct(dur,0.50):.2f} p95={pct(dur,0.95):.2f} " + f"p99={pct(dur,0.99):.2f} max={max(dur):.2f}" + ) + # Effective FPS during the run: frames / elapsed seconds. + ts = sorted(f["ts"] for f in frames) + if len(ts) >= 2: + elapsed_s = (ts[-1] - ts[0]) / 1000.0 + fps = len(frames) / elapsed_s if elapsed_s > 0 else float("inf") + out.append(f" throughput: {len(frames)} frames / {elapsed_s:.2f}s = {fps:.1f} fps") + + if phases_present: + fields = ["yoga", "renderer", "diff", "optimize", "write", "commit"] + out.append("") + out.append(f" {'phase':<10} {'p50':>8} {'p95':>8} {'p99':>8} {'max':>8} (ms)") + for field in fields: + vals = [f["phases"][field] for f in frames if f.get("phases")] + if vals: + out.append( + f" {field:<10} {pct(vals,0.50):>8.2f} {pct(vals,0.95):>8.2f} " + f"{pct(vals,0.99):>8.2f} {max(vals):>8.2f}" + ) + # Derived: sum of phases vs durationMs (reveals hidden time). + sum_ps = [ + sum(f["phases"][k] for k in fields) + for f in frames if f.get("phases") + ] + if sum_ps: + dur_match = [f["durationMs"] for f in frames if f.get("phases")] + deltas = [d - s for d, s in zip(dur_match, sum_ps)] + out.append( + f" {'dur-Σphases':<10} {pct(deltas,0.50):>8.2f} {pct(deltas,0.95):>8.2f} " + f"{pct(deltas,0.99):>8.2f} {max(deltas):>8.2f} (unaccounted-for time)" + ) + + # Yoga counters + visited = [f["phases"]["yogaVisited"] for f in frames if f.get("phases")] + measured = [f["phases"]["yogaMeasured"] for f in frames if f.get("phases")] + cache_hits = [f["phases"]["yogaCacheHits"] for f in frames if f.get("phases")] + live = [f["phases"]["yogaLive"] for f in frames if f.get("phases")] + out.append("") + out.append(" Yoga counters (per frame):") + for name, vals in ( + ("visited", visited), + ("measured", measured), + ("cacheHits", cache_hits), + ("live", live), + ): + if vals: + out.append(f" {name:<11} p50={pct(vals,0.5):.0f} p99={pct(vals,0.99):.0f} max={max(vals)}") + + # Patch counts — proxy for "how much changed each frame" + patches = [f["phases"]["patches"] for f in frames if f.get("phases")] + if patches: + out.append( + f" patches p50={pct(patches,0.5):.0f} p99={pct(patches,0.99):.0f} " + f"max={max(patches)} total={sum(patches)}" + ) + + # Flickers + flicker_frames = [f for f in frames if f.get("flickers")] + if flicker_frames: + out.append("") + out.append(f" ⚠ flickers detected in {len(flicker_frames)} frames") + reasons: dict[str, int] = {} + for f in flicker_frames: + for fl in f["flickers"]: + reasons[fl["reason"]] = reasons.get(fl["reason"], 0) + 1 + for reason, n in sorted(reasons.items(), key=lambda kv: -kv[1]): + out.append(f" {reason}: {n}") + + return "\n".join(out) + + +def main() -> int: + p = argparse.ArgumentParser() + p.add_argument("--session", help="session id to resume (default: longest in db)") + p.add_argument("--hold", default="page_up", choices=sorted(KEYS.keys()), help="key to hold") + p.add_argument("--seconds", type=float, default=8.0, help="how long to hold the key") + p.add_argument("--rate", type=int, default=30, help="keystrokes per second") + p.add_argument("--warmup", type=float, default=3.0, help="seconds to wait after launch before input") + p.add_argument("--threshold-ms", type=float, default=0.0, help="HERMES_DEV_PERF_MS (0 = capture all)") + p.add_argument("--cols", type=int, default=120) + p.add_argument("--rows", type=int, default=40) + p.add_argument("--keep-log", action="store_true", help="don't wipe perf.log before run") + p.add_argument("--tui-dir", default=str(DEFAULT_TUI_DIR)) + p.add_argument("--log", default=str(DEFAULT_LOG)) + args = p.parse_args() + + tui_dir = Path(args.tui_dir).resolve() + entry = tui_dir / "dist" / "entry.js" + if not entry.exists(): + sys.exit(f"{entry} missing — run `npm run build` in {tui_dir} first") + + sid = args.session or pick_longest_session(DEFAULT_STATE_DB) + print(f"• session: {sid}") + print(f"• hold: {args.hold} x {args.rate}Hz for {args.seconds}s after {args.warmup}s warmup") + print(f"• terminal: {args.cols}x{args.rows}") + + log = Path(args.log) + if not args.keep_log and log.exists(): + log.unlink() + + since_ms = int(time.time() * 1000) + + env = os.environ.copy() + env["HERMES_DEV_PERF"] = "1" + env["HERMES_DEV_PERF_MS"] = str(args.threshold_ms) + env["HERMES_DEV_PERF_LOG"] = str(log) + env["HERMES_TUI_RESUME"] = sid + env["COLUMNS"] = str(args.cols) + env["LINES"] = str(args.rows) + env["TERM"] = env.get("TERM", "xterm-256color") + # Ensure bracketed-paste doesn't intercept our PageUp writes. + + node = os.environ.get("HERMES_PERF_NODE", "node") + + # Fork under a PTY so the TUI enters alt-screen / raw-mode cleanly. + pid, fd = pty.fork() + if pid == 0: + # Child: exec node. PTY makes stdin/stdout/stderr all TTY. + os.execvpe(node, [node, str(entry)], env) + + try: + # Set initial PTY size via ioctl (TIOCSWINSZ). + import fcntl, struct, termios + winsize = struct.pack("HHHH", args.rows, args.cols, 0, 0) + fcntl.ioctl(fd, termios.TIOCSWINSZ, winsize) + + print(f"• pid: {pid} fd: {fd}") + print(f"• warmup {args.warmup}s (drain startup output)…") + drain(fd, args.warmup) + + print(f"• holding {args.hold}…") + sent = hold_key(fd, KEYS[args.hold], args.seconds, args.rate) + print(f" sent {sent} keystrokes") + + # Small cooldown so trailing frames get written to the log. + drain(fd, 0.5) + finally: + # Kill TUI cleanly. SIGTERM first, SIGKILL if stubborn. + try: + os.kill(pid, signal.SIGTERM) + for _ in range(10): + pid_done, _ = os.waitpid(pid, os.WNOHANG) + if pid_done == pid: + break + time.sleep(0.1) + else: + os.kill(pid, signal.SIGKILL) + os.waitpid(pid, 0) + except (ProcessLookupError, ChildProcessError): + pass + try: + os.close(fd) + except OSError: + pass + + # Give the log a moment to flush. + time.sleep(0.2) + + data = summarize(log, since_ms) + print() + print(format_report(data)) + + if not data["react"] and not data["frame"]: + return 2 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/ui-tui/src/entry.tsx b/ui-tui/src/entry.tsx index 8fdf9f68fb..92ae4a71c0 100644 --- a/ui-tui/src/entry.tsx +++ b/ui-tui/src/entry.tsx @@ -41,6 +41,10 @@ if (process.env.HERMES_HEAPDUMP_ON_START === '1') { process.on('beforeExit', () => stopMemoryMonitor()) -const [{ render }, { App }] = await Promise.all([import('@hermes/ink'), import('./app.js')]) +const [{ render }, { App }, { logFrameEvent }] = await Promise.all([ + import('@hermes/ink'), + import('./app.js'), + import('./lib/perfPane.js') +]) -render(, { exitOnCtrlC: false }) +render(, { exitOnCtrlC: false, onFrame: logFrameEvent }) diff --git a/ui-tui/src/lib/perfPane.tsx b/ui-tui/src/lib/perfPane.tsx index 32b260b721..feae1f0b3b 100644 --- a/ui-tui/src/lib/perfPane.tsx +++ b/ui-tui/src/lib/perfPane.tsx @@ -1,27 +1,53 @@ -// Perf instrumentation: wraps React.Profiler around named panes and writes -// commit timings to a log file when HERMES_DEV_PERF is set. Enabled per-run -// via the env var; zero-cost (Profiler is replaced by a Fragment) when off. +// Perf instrumentation for the full render pipeline. // -// Log format: one JSON object per line, for easy `jq` filtering. We only -// log commits that exceed a threshold (default 2ms) so the file doesn't -// fill up with sub-millisecond idle renders. Tune via HERMES_DEV_PERF_MS. +// Two sources of timing: +// 1. React.Profiler wrapper (PerfPane) → per-pane commit times. Shows +// which subtree is reconciling and for how long. +// 2. Ink onFrame callback (logFrameEvent) → per-frame pipeline phases: +// yoga (calculateLayout), renderer (DOM → screen buffer), diff +// (prev vs current screen → patches), optimize (patch merge/dedupe), +// write (serialize → ANSI → stdout), plus yoga counters (visited, +// measured, cacheHits, live). Shows where the time goes BELOW React. // -// Usage in consumers: -// import { PerfPane } from './perfPane.js' -// ... +// Both sources gate on HERMES_DEV_PERF=1 and dump JSON-lines to the same +// log (default ~/.hermes/perf.log, override via HERMES_DEV_PERF_LOG). +// Events are tagged { src: 'react' | 'frame' } so jq can split them. // -// Inspect with: -// tail -f ~/.hermes/perf.log | jq -c 'select(.actualMs > 8)' -// jq -s 'group_by(.id) | map({id: .[0].id, count: length, p50: (sort_by(.actualMs) | .[length/2|floor].actualMs), p99: (sort_by(.actualMs) | .[length*0.99|floor].actualMs)})' ~/.hermes/perf.log +// Threshold HERMES_DEV_PERF_MS (default 2ms) skips sub-millisecond idle +// frames. For the 2fps-during-PageUp investigation, set +// HERMES_DEV_PERF_MS=0 to capture everything, then filter with jq. +// +// Zero cost when the env var is unset: PerfPane returns children +// directly (no Profiler fiber), logFrameEvent is a noop on the onFrame +// callback — the ink instance isn't given the callback at all. +// +// Usage: +// # entry.tsx wires logFrameEvent into render() +// import { logFrameEvent, PerfPane } from './lib/perfPane.js' +// render(, { onFrame: logFrameEvent }) +// +// Analysis helpers (once you've captured a session): +// tail -f ~/.hermes/perf.log | jq -c 'select(.src=="frame" and .durationMs > 16)' +// # p50/p99 per phase across frame events: +// jq -s '[.[] | select(.src=="frame")] | +// {n: length, +// dur_p50: (sort_by(.durationMs) | .[length/2|floor].durationMs), +// dur_p99: (sort_by(.durationMs) | .[length*0.99|floor].durationMs), +// yoga_p99: (sort_by(.phases.yoga) | .[length*0.99|floor].phases.yoga), +// write_p99: (sort_by(.phases.write) | .[length*0.99|floor].phases.write), +// diff_p99: (sort_by(.phases.diff) | .[length*0.99|floor].phases.diff), +// patches_p99: (sort_by(.phases.patches) | .[length*0.99|floor].phases.patches)}' \ +// ~/.hermes/perf.log import { appendFileSync, mkdirSync } from 'node:fs' import { homedir } from 'node:os' import { dirname, join } from 'node:path' +import type { FrameEvent } from '@hermes/ink' import { Profiler, type ProfilerOnRenderCallback, type ReactNode } from 'react' const ENABLED = /^(?:1|true|yes|on)$/i.test((process.env.HERMES_DEV_PERF ?? '').trim()) -const THRESHOLD_MS = Number(process.env.HERMES_DEV_PERF_MS ?? '2') || 2 +const THRESHOLD_MS = Number(process.env.HERMES_DEV_PERF_MS ?? '2') || 0 const LOG_PATH = process.env.HERMES_DEV_PERF_LOG?.trim() || join(homedir(), '.hermes', 'perf.log') let initialized = false @@ -42,23 +68,9 @@ const ensureLogDir = () => { } } -const onRender: ProfilerOnRenderCallback = (id, phase, actualMs, baseMs, startTime, commitTime) => { - if (actualMs < THRESHOLD_MS) { - return - } - +const writeRow = (row: Record) => { ensureLogDir() - const row = { - actualMs: Math.round(actualMs * 100) / 100, - baseMs: Math.round(baseMs * 100) / 100, - commitMs: Math.round(commitTime * 100) / 100, - id, - phase, - startMs: Math.round(startTime * 100) / 100, - ts: Date.now() - } - try { appendFileSync(LOG_PATH, `${JSON.stringify(row)}\n`) } catch { @@ -66,6 +78,25 @@ const onRender: ProfilerOnRenderCallback = (id, phase, actualMs, baseMs, startTi } } +const round2 = (n: number) => Math.round(n * 100) / 100 + +const onRender: ProfilerOnRenderCallback = (id, phase, actualMs, baseMs, startTime, commitTime) => { + if (actualMs < THRESHOLD_MS) { + return + } + + writeRow({ + actualMs: round2(actualMs), + baseMs: round2(baseMs), + commitMs: round2(commitTime), + id, + phase, + src: 'react', + startMs: round2(startTime), + ts: Date.now() + }) +} + export function PerfPane({ children, id }: { children: ReactNode; id: string }) { if (!ENABLED) { return children @@ -78,5 +109,42 @@ export function PerfPane({ children, id }: { children: ReactNode; id: string }) ) } +/** + * Ink onFrame handler. Captures the FULL render pipeline: yoga calculateLayout, + * DOM → screen buffer, screen diff, patch optimize, and stdout write. + * + * Returns `undefined` when disabled so `render()` doesn't attach the callback — + * ink only pays the timing cost when the callback is truthy. + */ +export const logFrameEvent = ENABLED + ? (event: FrameEvent) => { + if (event.durationMs < THRESHOLD_MS) { + return + } + + writeRow({ + durationMs: round2(event.durationMs), + flickers: event.flickers.length ? event.flickers : undefined, + phases: event.phases + ? { + commit: round2(event.phases.commit), + diff: round2(event.phases.diff), + optimize: round2(event.phases.optimize), + patches: event.phases.patches, + renderer: round2(event.phases.renderer), + write: round2(event.phases.write), + yoga: round2(event.phases.yoga), + yogaCacheHits: event.phases.yogaCacheHits, + yogaLive: event.phases.yogaLive, + yogaMeasured: event.phases.yogaMeasured, + yogaVisited: event.phases.yogaVisited + } + : undefined, + src: 'frame', + ts: Date.now() + }) + } + : undefined + export const PERF_ENABLED = ENABLED export const PERF_LOG_PATH = LOG_PATH diff --git a/ui-tui/src/types/hermes-ink.d.ts b/ui-tui/src/types/hermes-ink.d.ts index c878bdb4ea..762166af20 100644 --- a/ui-tui/src/types/hermes-ink.d.ts +++ b/ui-tui/src/types/hermes-ink.d.ts @@ -33,11 +33,35 @@ declare module '@hermes/ink' { export type InputHandler = (input: string, key: Key, event: InputEvent) => void + export type FrameEvent = { + readonly durationMs: number + readonly phases?: { + readonly renderer: number + readonly diff: number + readonly optimize: number + readonly write: number + readonly patches: number + readonly yoga: number + readonly commit: number + readonly yogaVisited: number + readonly yogaMeasured: number + readonly yogaCacheHits: number + readonly yogaLive: number + } + readonly flickers: ReadonlyArray<{ + readonly desiredHeight: number + readonly availableHeight: number + readonly reason: 'resize' | 'offscreen' | 'clear' + }> + } + export type RenderOptions = { readonly stdin?: NodeJS.ReadStream readonly stdout?: NodeJS.WriteStream readonly stderr?: NodeJS.WriteStream readonly exitOnCtrlC?: boolean + readonly patchConsole?: boolean + readonly onFrame?: (event: FrameEvent) => void } export type Instance = {