fix(tui): bound live render memory pressure

2026-04-28 06:51:16 +08:00 · 2026-04-25 16:43:55 -06:00
parent 778fd1898e
commit 1d24cb0e6e
10 changed files with 236 additions and 18 deletions
--- a/ui-tui/src/tests/text.test.ts
+++ b/ui-tui/src/tests/text.test.ts
@@ -1,6 +1,7 @@
 import { describe, expect, it } from 'vitest'

 import {
+  boundedLiveRenderText,
  edgePreview,
  estimateRows,
  estimateTokensRough,
@@ -106,3 +107,25 @@ describe('estimateRows', () => {
    expect(estimateRows(snake, w)).toBe(estimateRows(plain, w))
  })
 })
+
+describe('boundedLiveRenderText', () => {
+  it('keeps short text unchanged', () => {
+    expect(boundedLiveRenderText('alpha\nbeta', { maxChars: 50, maxLines: 5 })).toBe('alpha\nbeta')
+  })
+
+  it('keeps the tail of long live text', () => {
+    const text = Array.from({ length: 6 }, (_, i) => `line-${i + 1}`).join('\n')
+    const out = boundedLiveRenderText(text, { maxChars: 100, maxLines: 3 })
+
+    expect(out).toContain('omitted 3 lines')
+    expect(out.endsWith('line-4\nline-5\nline-6')).toBe(true)
+    expect(out).not.toContain('line-1')
+  })
+
+  it('bounds very long single-line text by chars', () => {
+    const out = boundedLiveRenderText('a'.repeat(60), { maxChars: 12, maxLines: 5 })
+
+    expect(out).toContain('omitted 48 chars')
+    expect(out.endsWith('a'.repeat(12))).toBe(true)
+  })
+})
--- a/ui-tui/src/app/turnController.ts
+++ b/ui-tui/src/app/turnController.ts
@@ -2,6 +2,7 @@ import { REASONING_PULSE_MS, STREAM_BATCH_MS } from '../config/timing.js'
 import type { SessionInterruptResponse, SubagentEventPayload } from '../gatewayTypes.js'
 import { hasReasoningTag, splitReasoning } from '../lib/reasoning.js'
 import {
+  boundedLiveRenderText,
  buildToolTrailLine,
  estimateTokensRough,
  isTransientTrailLine,
@@ -492,7 +493,7 @@ class TurnController {
      this.streamTimer = null
      const raw = this.bufRef.trimStart()
      const visible = hasReasoningTag(raw) ? splitReasoning(raw).text : raw
-      patchTurnState({ streaming: visible })
+      patchTurnState({ streaming: boundedLiveRenderText(visible) })
    }, STREAM_BATCH_MS)
  }

--- a/ui-tui/src/components/messageLine.tsx
+++ b/ui-tui/src/components/messageLine.tsx
@@ -5,7 +5,7 @@ import { LONG_MSG } from '../config/limits.js'
 import { sectionMode } from '../domain/details.js'
 import { userDisplay } from '../domain/messages.js'
 import { ROLE } from '../domain/roles.js'
-import { compactPreview, hasAnsi, isPasteBackedText, stripAnsi } from '../lib/text.js'
+import { boundedLiveRenderText, compactPreview, hasAnsi, isPasteBackedText, stripAnsi } from '../lib/text.js'
 import type { Theme } from '../theme.js'
 import type { DetailsMode, Msg, SectionVisibility } from '../types.js'

@@ -84,7 +84,11 @@ export const MessageLine = memo(function MessageLine({
    }

    if (msg.role === 'assistant') {
-      return isStreaming ? <Text color={body}>{msg.text}</Text> : <Md compact={compact} t={t} text={msg.text} />
+      return isStreaming ? (
+        <Text color={body}>{boundedLiveRenderText(msg.text)}</Text>
+      ) : (
+        <Md compact={compact} t={t} text={msg.text} />
+      )
    }

    if (msg.role === 'user' && msg.text.length > LONG_MSG && isPasteBackedText(msg.text)) {
--- a/ui-tui/src/components/thinking.tsx
+++ b/ui-tui/src/components/thinking.tsx
@@ -16,6 +16,7 @@ import {
  widthByDepth
 } from '../lib/subagentTree.js'
 import {
+  boundedLiveRenderText,
  compactPreview,
  estimateTokensRough,
  fmtK,
@@ -633,7 +634,12 @@ export const Thinking = memo(function Thinking({
  streaming?: boolean
  t: Theme
 }) {
-  const preview = useMemo(() => thinkingPreview(reasoning, mode, THINKING_COT_MAX), [mode, reasoning])
+  const preview = useMemo(() => {
+    const raw = thinkingPreview(reasoning, mode, THINKING_COT_MAX)
+
+    return mode === 'full' ? boundedLiveRenderText(raw) : raw
+  }, [mode, reasoning])
+
  const lines = useMemo(() => preview.split('\n').map(line => line.replace(/\t/g, '  ')), [preview])

  if (!preview && !active) {
@@ -868,8 +874,8 @@ export const ToolTrail = memo(function ToolTrail({
  const hasTools = groups.length > 0
  const hasSubagents = subagents.length > 0
  const hasMeta = meta.length > 0
-  const hasThinking = !!cot || reasoningActive || busy
  const thinkingLive = reasoningActive || reasoningStreaming
+  const hasThinking = !!cot || thinkingLive

  const tokenCount =
    reasoningTokens && reasoningTokens > 0 ? reasoningTokens : reasoning ? estimateTokensRough(reasoning) : 0
@@ -1002,7 +1008,7 @@ export const ToolTrail = memo(function ToolTrail({
      open: openThinking,
      render: rails => (
        <Thinking
-          active={reasoningActive}
+          active={thinkingLive}
          branch="last"
          mode="full"
          rails={rails}
--- a/ui-tui/src/config/limits.ts
+++ b/ui-tui/src/config/limits.ts
@@ -1,4 +1,6 @@
 export const LARGE_PASTE = { chars: 8000, lines: 80 }
+export const LIVE_RENDER_MAX_CHARS = 16_000
+export const LIVE_RENDER_MAX_LINES = 240
 export const LONG_MSG = 300
 export const MAX_HISTORY = 800
 export const THINKING_COT_MAX = 160
--- a/ui-tui/src/entry.tsx
+++ b/ui-tui/src/entry.tsx
@@ -14,7 +14,9 @@ const gw = new GatewayClient()
 gw.start()

 const dumpNotice = (snap: MemorySnapshot, dump: HeapDumpResult | null) =>
-  `hermes-tui: ${snap.level} memory (${formatBytes(snap.heapUsed)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n`
+  snap.source === 'heap'
+    ? `hermes-tui: ${snap.level} heap (${formatBytes(snap.heapUsed)}, rss ${formatBytes(snap.rss)}) — auto heap dump → ${dump?.heapPath ?? '(failed)'}\n`
+    : `hermes-tui: ${snap.level} rss (${formatBytes(snap.rss)}, native ${formatBytes(snap.nativeUsed)}) — auto diagnostics → ${dump?.diagPath ?? '(failed)'}\n`

 setupGracefulExit({
  cleanups: [() => gw.kill()],
--- a/ui-tui/src/lib/memory.ts
+++ b/ui-tui/src/lib/memory.ts
@@ -145,11 +145,11 @@ export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promis
    // Diagnostics first — heap-snapshot serialization can crash on very large
    // heaps, and the JSON sidecar is the most actionable artifact if so.
    const diagnostics = await captureMemoryDiagnostics(trigger)
-    const dir = process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps')
+    const dir = memoryDumpDir()

    await mkdir(dir, { recursive: true })

-    const base = `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}`
+    const base = memoryDumpBase(trigger)
    const heapPath = join(dir, `${base}.heapsnapshot`)
    const diagPath = join(dir, `${base}.diagnostics.json`)

@@ -162,6 +162,23 @@ export async function performHeapDump(trigger: MemoryTrigger = 'manual'): Promis
  }
 }

+export async function performDiagnosticsDump(trigger: MemoryTrigger = 'manual'): Promise<HeapDumpResult> {
+  try {
+    const diagnostics = await captureMemoryDiagnostics(trigger)
+    const dir = memoryDumpDir()
+
+    await mkdir(dir, { recursive: true })
+
+    const diagPath = join(dir, `${memoryDumpBase(trigger)}.diagnostics.json`)
+
+    await writeFile(diagPath, JSON.stringify(diagnostics, null, 2), { mode: 0o600 })
+
+    return { diagPath, success: true }
+  } catch (e) {
+    return { error: e instanceof Error ? e.message : String(e), success: false }
+  }
+}
+
 export function formatBytes(bytes: number): string {
  if (!Number.isFinite(bytes) || bytes <= 0) {
    return '0B'
@@ -177,6 +194,11 @@ const UNITS = ['B', 'KB', 'MB', 'GB', 'TB']

 const STARTED_AT = { rss: process.memoryUsage().rss, uptime: process.uptime() }

+const memoryDumpDir = () => process.env.HERMES_HEAPDUMP_DIR?.trim() || join(homedir() || tmpdir(), '.hermes', 'heapdumps')
+
+const memoryDumpBase = (trigger: MemoryTrigger) =>
+  `hermes-${new Date().toISOString().replace(/[:.]/g, '-')}-${process.pid}-${trigger}`
+
 // Returns undefined when the probe isn't available (non-Linux paths, sandboxed FS).
 const swallow = async <T>(fn: () => Promise<T>): Promise<T | undefined> => {
  try {
--- a/ui-tui/src/lib/memoryMonitor.test.ts
+++ b/ui-tui/src/lib/memoryMonitor.test.ts
@@ -0,0 +1,74 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'
+
+const memory = vi.hoisted(() => ({
+  performDiagnosticsDump: vi.fn(async () => ({ diagPath: '/tmp/diag.json', success: true })),
+  performHeapDump: vi.fn(async () => ({ heapPath: '/tmp/heap.heapsnapshot', success: true }))
+}))
+
+vi.mock('./memory.js', () => memory)
+
+import { type MemorySnapshot, startMemoryMonitor } from './memoryMonitor.js'
+
+const GB = 1024 ** 3
+
+const usage = (heapUsed: number, rss: number): NodeJS.MemoryUsage =>
+  ({
+    arrayBuffers: 0,
+    external: 0,
+    heapTotal: heapUsed,
+    heapUsed,
+    rss
+  }) as NodeJS.MemoryUsage
+
+describe('startMemoryMonitor', () => {
+  let memoryUsageSpy: ReturnType<typeof vi.spyOn>
+
+  beforeEach(() => {
+    vi.useFakeTimers()
+    memory.performDiagnosticsDump.mockClear()
+    memory.performHeapDump.mockClear()
+  })
+
+  afterEach(() => {
+    memoryUsageSpy?.mockRestore()
+    vi.useRealTimers()
+  })
+
+  it('captures diagnostics only for native RSS pressure', async () => {
+    memoryUsageSpy = vi.spyOn(process, 'memoryUsage').mockReturnValue(usage(100 * 1024 ** 2, 5 * GB))
+
+    const snaps: MemorySnapshot[] = []
+
+    const stop = startMemoryMonitor({
+      intervalMs: 1000,
+      onHigh: snap => snaps.push(snap),
+      rssHighBytes: 4 * GB
+    })
+
+    await vi.advanceTimersByTimeAsync(1000)
+    stop()
+
+    expect(memory.performDiagnosticsDump).toHaveBeenCalledWith('auto-high')
+    expect(memory.performHeapDump).not.toHaveBeenCalled()
+    expect(snaps[0]).toMatchObject({ level: 'high', source: 'rss' })
+    expect(snaps[0]?.nativeUsed).toBeGreaterThan(4 * GB)
+  })
+
+  it('keeps heap dumps for V8 heap pressure', async () => {
+    memoryUsageSpy = vi.spyOn(process, 'memoryUsage').mockReturnValue(usage(3 * GB, 3.5 * GB))
+
+    const snaps: MemorySnapshot[] = []
+
+    const stop = startMemoryMonitor({
+      intervalMs: 1000,
+      onCritical: snap => snaps.push(snap)
+    })
+
+    await vi.advanceTimersByTimeAsync(1000)
+    stop()
+
+    expect(memory.performHeapDump).toHaveBeenCalledWith('auto-critical')
+    expect(memory.performDiagnosticsDump).not.toHaveBeenCalled()
+    expect(snaps[0]).toMatchObject({ level: 'critical', source: 'heap' })
+  })
+})
--- a/ui-tui/src/lib/memoryMonitor.ts
+++ b/ui-tui/src/lib/memoryMonitor.ts
@@ -1,11 +1,14 @@
-import { type HeapDumpResult, performHeapDump } from './memory.js'
+import { type HeapDumpResult, performDiagnosticsDump, performHeapDump } from './memory.js'

 export type MemoryLevel = 'critical' | 'high' | 'normal'
+export type MemoryTriggerSource = 'heap' | 'rss'

 export interface MemorySnapshot {
  heapUsed: number
  level: MemoryLevel
+  nativeUsed: number
  rss: number
+  source: MemoryTriggerSource
 }

 export interface MemoryMonitorOptions {
@@ -14,35 +17,61 @@ export interface MemoryMonitorOptions {
  intervalMs?: number
  onCritical?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
  onHigh?: (snap: MemorySnapshot, dump: HeapDumpResult | null) => void
+  rssCriticalBytes?: number
+  rssHighBytes?: number
 }

 const GB = 1024 ** 3

+const maxLevel = (heapLevel: MemoryLevel, rssLevel: MemoryLevel): MemoryLevel => {
+  if (heapLevel === 'critical' || rssLevel === 'critical') {
+    return 'critical'
+  }
+
+  return heapLevel === 'high' || rssLevel === 'high' ? 'high' : 'normal'
+}
+
 export function startMemoryMonitor({
  criticalBytes = 2.5 * GB,
  highBytes = 1.5 * GB,
  intervalMs = 10_000,
  onCritical,
-  onHigh
+  onHigh,
+  rssCriticalBytes = 8 * GB,
+  rssHighBytes = 4 * GB
 }: MemoryMonitorOptions = {}): () => void {
-  const dumped = new Set<Exclude<MemoryLevel, 'normal'>>()
+  const dumped = new Set<`${MemoryTriggerSource}:${Exclude<MemoryLevel, 'normal'>}`>()

  const tick = async () => {
    const { heapUsed, rss } = process.memoryUsage()
-    const level: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
+    const nativeUsed = Math.max(0, rss - heapUsed)
+    const heapLevel: MemoryLevel = heapUsed >= criticalBytes ? 'critical' : heapUsed >= highBytes ? 'high' : 'normal'
+    const rssLevel: MemoryLevel = rss >= rssCriticalBytes ? 'critical' : rss >= rssHighBytes ? 'high' : 'normal'
+    const level = maxLevel(heapLevel, rssLevel)

    if (level === 'normal') {
      return void dumped.clear()
    }

-    if (dumped.has(level)) {
+    const source: MemoryTriggerSource =
+      heapLevel === level || (heapLevel !== 'normal' && rssLevel === level) ? 'heap' : 'rss'
+
+    const key = `${source}:${level}` as const
+
+    if (dumped.has(key)) {
      return
    }

-    dumped.add(level)
-    const dump = await performHeapDump(level === 'critical' ? 'auto-critical' : 'auto-high').catch(() => null)
+    dumped.add(key)

-    const snap: MemorySnapshot = { heapUsed, level, rss }
+    const trigger = level === 'critical' ? 'auto-critical' : 'auto-high'
+
+    const dump =
+      source === 'heap'
+        ? await performHeapDump(trigger).catch(() => null)
+        : await performDiagnosticsDump(trigger).catch(() => null)
+
+    const snap: MemorySnapshot = { heapUsed, level, nativeUsed, rss, source }

    ;(level === 'critical' ? onCritical : onHigh)?.(snap, dump)
  }
--- a/ui-tui/src/lib/text.ts
+++ b/ui-tui/src/lib/text.ts
@@ -1,4 +1,4 @@
-import { THINKING_COT_MAX } from '../config/limits.js'
+import { LIVE_RENDER_MAX_CHARS, LIVE_RENDER_MAX_LINES, THINKING_COT_MAX } from '../config/limits.js'
 import type { ThinkingMode } from '../types.js'

 const ESC = String.fromCharCode(27)
@@ -76,6 +76,61 @@ export const thinkingPreview = (reasoning: string, mode: ThinkingMode, max: numb
  return !raw || mode === 'collapsed' ? '' : mode === 'full' ? raw : compactPreview(raw.replace(WS_RE, ' '), max)
 }

+export const boundedLiveRenderText = (
+  text: string,
+  { maxChars = LIVE_RENDER_MAX_CHARS, maxLines = LIVE_RENDER_MAX_LINES } = {}
+) => {
+  if (text.length <= maxChars && text.split('\n', maxLines + 1).length <= maxLines) {
+    return text
+  }
+
+  let start = 0
+  let idx = text.length
+
+  for (let seen = 0; seen < maxLines && idx > 0; seen++) {
+    idx = text.lastIndexOf('\n', idx - 1)
+    start = idx < 0 ? 0 : idx + 1
+
+    if (idx < 0) {
+      break
+    }
+  }
+
+  const lineStart = start
+  start = Math.max(lineStart, text.length - maxChars)
+
+  if (start > lineStart) {
+    const nextBreak = text.indexOf('\n', start)
+
+    if (nextBreak >= 0 && nextBreak < text.length - 1) {
+      start = nextBreak + 1
+    }
+  }
+
+  const tail = text.slice(start).trimStart()
+  const omittedLines = countNewlines(text, start)
+  const omittedChars = Math.max(0, text.length - tail.length)
+
+  const label =
+    omittedLines > 0
+      ? `[showing live tail; omitted ${fmtK(omittedLines)} lines / ${fmtK(omittedChars)} chars]\n`
+      : `[showing live tail; omitted ${fmtK(omittedChars)} chars]\n`
+
+  return `${label}${tail.trimStart()}`
+}
+
+const countNewlines = (text: string, end: number) => {
+  let count = 0
+
+  for (let i = 0; i < end; i++) {
+    if (text.charCodeAt(i) === 10) {
+      count++
+    }
+  }
+
+  return count
+}
+
 export const stripTrailingPasteNewlines = (text: string) => (/[^\n]/.test(text) ? text.replace(/\n+$/, '') : text)

 export const toolTrailLabel = (name: string) =>