mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 12:18:44 +08:00
Compare commits
4 Commits
hermes/cur
...
bb/tui-ans
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b2f9088009 | ||
|
|
05d444fdf4 | ||
|
|
ef57e05f6a | ||
|
|
9fcfb5d08f |
143
ui-tui/src/__tests__/ansiHeightParity.test.ts
Normal file
143
ui-tui/src/__tests__/ansiHeightParity.test.ts
Normal file
@@ -0,0 +1,143 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { stripAnsi } from '../lib/text.js'
|
||||
import { estimatedMsgHeight, messageHeightKey, wrappedLines } from '../lib/virtualHeights.js'
|
||||
import type { Msg } from '../types.js'
|
||||
|
||||
const ESC = String.fromCharCode(27)
|
||||
|
||||
// Heavy per-word truecolor SGR — the shape cli-highlight / Rich tool output
|
||||
// takes when it lands in assistant/system history. Visible text is short;
|
||||
// raw byte length is ~5-6x larger.
|
||||
const colorize = (line: string) =>
|
||||
line
|
||||
.split(' ')
|
||||
.map((w, i) => `${ESC}[38;2;${(i * 40) % 255};${(i * 17) % 255};${(i * 90) % 255}m${w}${ESC}[39m`)
|
||||
.join(' ')
|
||||
|
||||
const cols = 80
|
||||
const opts = { compact: false, details: false }
|
||||
|
||||
describe('ANSI message height estimation parity (regression for resume desync)', () => {
|
||||
it('wrappedLines must measure visible width, not raw escape bytes', () => {
|
||||
const visible = 'the quick brown fox jumps over the lazy dog and keeps going for a while'
|
||||
const ansi = colorize(visible)
|
||||
|
||||
// The estimator path strips ANSI before measuring; verify the helper a
|
||||
// caller would use produces the visible row count, not the byte count.
|
||||
expect(wrappedLines(stripAnsi(ansi), cols)).toBe(wrappedLines(visible, cols))
|
||||
// Guard the test fixture itself: the raw form really is much longer.
|
||||
expect(ansi.length).toBeGreaterThan(visible.length * 3)
|
||||
})
|
||||
|
||||
it('estimatedMsgHeight for an ANSI assistant message matches its visible height', () => {
|
||||
const lines = Array.from({ length: 20 }, (_, i) =>
|
||||
colorize(`line ${i} with several colored words that wrap when measured by raw byte length only`)
|
||||
)
|
||||
|
||||
const ansiMsg: Msg = { role: 'assistant', text: lines.join('\n') }
|
||||
const visibleMsg: Msg = { role: 'assistant', text: stripAnsi(lines.join('\n')) }
|
||||
|
||||
const estAnsi = estimatedMsgHeight(ansiMsg, cols, opts)
|
||||
const estVisible = estimatedMsgHeight(visibleMsg, cols, opts)
|
||||
|
||||
// Before the fix this was ~3x (120 vs 40). Allow ±2 for paragraph-gap
|
||||
// heuristics that key off blank lines.
|
||||
expect(Math.abs(estAnsi - estVisible)).toBeLessThanOrEqual(2)
|
||||
})
|
||||
|
||||
it('FAKE LONG SESSION: cold-mount offsets track visible heights so the right rows mount', () => {
|
||||
// Synthesize a long resumed session: alternating user prompts and
|
||||
// ANSI-heavy assistant replies (highlighted code / tool echoes), exactly
|
||||
// what `-c` rehydrates from the DB before any row is Yoga-measured.
|
||||
const items: { key: string; msg: Msg }[] = []
|
||||
|
||||
for (let turn = 0; turn < 120; turn++) {
|
||||
const user: Msg = { role: 'user', text: `question ${turn} about the codebase` }
|
||||
|
||||
const codeLines = Array.from({ length: 12 }, (_, i) =>
|
||||
colorize(` const result_${i} = doSomething(${i}, "a fairly long argument string here")`)
|
||||
)
|
||||
|
||||
const assistant: Msg = { role: 'assistant', text: `Here is turn ${turn}:\n\n${codeLines.join('\n')}` }
|
||||
items.push({ key: messageHeightKey(user), msg: user })
|
||||
items.push({ key: messageHeightKey(assistant), msg: assistant })
|
||||
}
|
||||
|
||||
// Build the prefix-sum offset array the way useVirtualHistory does on
|
||||
// cold mount (every height comes from the estimator — no Yoga yet).
|
||||
const estOffsets = new Float64Array(items.length + 1)
|
||||
const visibleOffsets = new Float64Array(items.length + 1)
|
||||
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
estOffsets[i + 1] = estOffsets[i]! + estimatedMsgHeight(items[i]!.msg, cols, opts)
|
||||
const visMsg: Msg = { ...items[i]!.msg, text: stripAnsi(items[i]!.msg.text) }
|
||||
visibleOffsets[i + 1] = visibleOffsets[i]! + estimatedMsgHeight(visMsg, cols, opts)
|
||||
}
|
||||
|
||||
const estTotal = estOffsets[items.length]!
|
||||
const visTotal = visibleOffsets[items.length]!
|
||||
|
||||
// The estimated total height must track the visible total closely. Pre-fix
|
||||
// the ANSI estimate was several multiples larger (escape bytes counted as
|
||||
// width), so the binary search that maps scrollTop → mounted-row-index
|
||||
// landed on the wrong items → "jumbled / broken text" on resume.
|
||||
//
|
||||
// Tolerance note: the visible baseline runs estimatedMsgHeight on the
|
||||
// ANSI-stripped text, which (being non-ANSI) still earns the markdown
|
||||
// paragraph-gap bonus that the ANSI render path legitimately omits. That
|
||||
// asymmetry is a few rows across the whole synthetic, so allow 8% here —
|
||||
// the estimator-vs-REAL-render parity (within a couple rows) is asserted
|
||||
// in messageLineAnsiHeight.test.tsx, which is the truth source.
|
||||
const drift = Math.abs(estTotal - visTotal) / visTotal
|
||||
|
||||
expect(drift).toBeLessThan(0.08)
|
||||
})
|
||||
|
||||
it('upperBound on estimated offsets selects the same start index as visible offsets', () => {
|
||||
// Directly exercise the failure mechanism: with inflated offsets the
|
||||
// viewport's scrollTop maps to a different row than the one actually
|
||||
// visible there. This asserts they agree at several scroll positions.
|
||||
const items: Msg[] = []
|
||||
|
||||
for (let t = 0; t < 80; t++) {
|
||||
items.push({ role: 'user', text: `q${t}` })
|
||||
const code = Array.from({ length: 8 }, (_, i) => colorize(`token_${i} = highlight(${i})`)).join('\n')
|
||||
items.push({ role: 'assistant', text: code })
|
||||
}
|
||||
|
||||
const est = new Float64Array(items.length + 1)
|
||||
const vis = new Float64Array(items.length + 1)
|
||||
|
||||
for (let i = 0; i < items.length; i++) {
|
||||
est[i + 1] = est[i]! + estimatedMsgHeight(items[i]!, cols, opts)
|
||||
vis[i + 1] = vis[i]! + estimatedMsgHeight({ ...items[i]!, text: stripAnsi(items[i]!.text) }, cols, opts)
|
||||
}
|
||||
|
||||
const upperBound = (arr: Float64Array, target: number) => {
|
||||
let lo = 0
|
||||
let hi = arr.length
|
||||
|
||||
while (lo < hi) {
|
||||
const mid = (lo + hi) >> 1
|
||||
|
||||
arr[mid]! <= target ? (lo = mid + 1) : (hi = mid)
|
||||
}
|
||||
|
||||
return lo
|
||||
}
|
||||
|
||||
// Probe at 10%, 30%, 50%, 70%, 90% down the VISIBLE transcript. The row
|
||||
// index the estimator-derived offsets point to must match the visible
|
||||
// one, otherwise the wrong rows mount into the viewport.
|
||||
const visTotal = vis[items.length]!
|
||||
|
||||
for (const frac of [0.1, 0.3, 0.5, 0.7, 0.9]) {
|
||||
const scrollTop = visTotal * frac
|
||||
const visIdx = upperBound(vis, scrollTop) - 1
|
||||
const estIdx = upperBound(est, scrollTop) - 1
|
||||
|
||||
expect(estIdx).toBe(visIdx)
|
||||
}
|
||||
})
|
||||
})
|
||||
81
ui-tui/src/__tests__/boundedAnsiStrip.test.ts
Normal file
81
ui-tui/src/__tests__/boundedAnsiStrip.test.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { stripAnsi } from '../lib/text.js'
|
||||
import { estimatedMsgHeight, wrappedLines } from '../lib/virtualHeights.js'
|
||||
import type { Msg } from '../types.js'
|
||||
|
||||
// Guards the Copilot perf concern on #35992: stripAnsi(msg.text) ran over the
|
||||
// FULL message before wrappedLines' byte budget could cap the work, so a
|
||||
// multi-megabyte ANSI-heavy message re-introduced O(text) cost at cold-mount.
|
||||
// The bounded strippedForEstimate slices to the wrap budget (× an ANSI overhead
|
||||
// factor) first. These tests assert (a) the bounded estimate still equals the
|
||||
// full-strip estimate even when the message saturates the row cap, including
|
||||
// for the densest realistic SGR, and (b) cold-mounting a giant ANSI message
|
||||
// stays fast (no full-text strip).
|
||||
|
||||
// Densest realistic SGR: per-word truecolor open + reset (~20 bytes wrapping a
|
||||
// short token, ~5.5× overhead) — the cli-highlight / Rich style that triggered
|
||||
// the original resume desync.
|
||||
const colorWord = (w: string) => `\u001b[38;2;200;120;40m${w}\u001b[39m`
|
||||
|
||||
const makeAnsiBlob = (lines: number, wordsPerLine: number) => {
|
||||
const row = Array.from({ length: wordsPerLine }, (_, i) => colorWord(`tok${i}`)).join(' ')
|
||||
|
||||
return Array.from({ length: lines }, () => row).join('\n')
|
||||
}
|
||||
|
||||
const asMsg = (text: string): Msg => ({ role: 'assistant', text }) as Msg
|
||||
const opts = { compact: false, details: false }
|
||||
|
||||
describe('strippedForEstimate (bounded ANSI strip)', () => {
|
||||
it('matches the full-strip estimate when a dense-SGR message saturates the row cap', () => {
|
||||
// Far longer than MAX_ESTIMATE_LINES (800) rows. The invariant: the bounded
|
||||
// strip yields the SAME estimate as stripping the whole string — the perf
|
||||
// fix must not change the number even for the densest SGR.
|
||||
const blob = makeAnsiBlob(2000, 12)
|
||||
const cols = 80
|
||||
|
||||
const bounded = estimatedMsgHeight(asMsg(blob), cols, opts)
|
||||
const fullStrip = estimatedMsgHeight(asMsg(stripAnsi(blob)), cols, opts)
|
||||
|
||||
expect(fullStrip).toBe(bounded)
|
||||
})
|
||||
|
||||
it('matches the full-strip estimate for a message that fits within the budget', () => {
|
||||
const blob = makeAnsiBlob(40, 10)
|
||||
const cols = 80
|
||||
|
||||
const bounded = estimatedMsgHeight(asMsg(blob), cols, opts)
|
||||
const fullStrip = estimatedMsgHeight(asMsg(stripAnsi(blob)), cols, opts)
|
||||
|
||||
expect(bounded).toBe(fullStrip)
|
||||
// sanity: small message, no clamping
|
||||
expect(bounded).toBe(wrappedLines(stripAnsi(blob), cols))
|
||||
})
|
||||
|
||||
it('cold-mounts a multi-megabyte ANSI message without an O(text) strip', () => {
|
||||
// ~23 MB of dense ANSI — unbounded, this would chew the whole buffer on
|
||||
// every offset rebuild. Bounded, it touches only ~520 KB (budget × 8).
|
||||
const blob = makeAnsiBlob(60_000, 14)
|
||||
expect(blob.length).toBeGreaterThan(3_000_000)
|
||||
|
||||
const cols = 100
|
||||
let h = 0
|
||||
const start = performance.now()
|
||||
|
||||
// Simulate repeated offset rebuilds (the hot path on cold-mount / resize).
|
||||
for (let i = 0; i < 50; i++) {
|
||||
h = estimatedMsgHeight(asMsg(blob), cols, opts)
|
||||
}
|
||||
|
||||
const elapsed = performance.now() - start
|
||||
|
||||
// Still clamps to the row cap (bounded slice reaches it on dense SGR).
|
||||
expect(h).toBe(800)
|
||||
// 50 estimates over a 23 MB message must stay well under a second. An
|
||||
// unbounded strip (multiple regex passes × 23 MB × 50) would blow past
|
||||
// this by orders of magnitude. Generous ceiling avoids CI flake while
|
||||
// still catching a regression to full-text stripping.
|
||||
expect(elapsed).toBeLessThan(500)
|
||||
})
|
||||
})
|
||||
109
ui-tui/src/__tests__/messageLineAnsiHeight.test.tsx
Normal file
109
ui-tui/src/__tests__/messageLineAnsiHeight.test.tsx
Normal file
@@ -0,0 +1,109 @@
|
||||
import { PassThrough } from 'stream'
|
||||
|
||||
import { Box, renderSync } from '@hermes/ink'
|
||||
import React from 'react'
|
||||
import { describe, expect, it } from 'vitest'
|
||||
|
||||
import { MessageLine } from '../components/messageLine.js'
|
||||
import { stripAnsi } from '../lib/text.js'
|
||||
import { estimatedMsgHeight } from '../lib/virtualHeights.js'
|
||||
import { DEFAULT_THEME } from '../theme.js'
|
||||
import type { Msg } from '../types.js'
|
||||
|
||||
const BEL = String.fromCharCode(7)
|
||||
const ESC = String.fromCharCode(27)
|
||||
const CSI_RE = new RegExp(`${ESC}\\[[0-?]*[ -/]*[@-~]`, 'g')
|
||||
const OSC_RE = new RegExp(`${ESC}\\][\\s\\S]*?(?:${BEL}|${ESC}\\\\)`, 'g')
|
||||
|
||||
const cols = 80
|
||||
|
||||
// Render a node at a fixed terminal width and return its visible line count
|
||||
// (blank trailing lines trimmed) — i.e. the real laid-out height Ink + Yoga
|
||||
// produce, which is what post-mount measurement writes into the height cache.
|
||||
const renderHeight = (node: React.ReactNode): number => {
|
||||
const stdout = new PassThrough()
|
||||
const stdin = new PassThrough()
|
||||
const stderr = new PassThrough()
|
||||
let output = ''
|
||||
|
||||
Object.assign(stdout, { columns: cols, isTTY: false, rows: 24 })
|
||||
Object.assign(stdin, { isTTY: false })
|
||||
Object.assign(stderr, { isTTY: false })
|
||||
stdout.on('data', chunk => {
|
||||
output += chunk.toString()
|
||||
})
|
||||
|
||||
const instance = renderSync(node, {
|
||||
patchConsole: false,
|
||||
stderr: stderr as unknown as NodeJS.WriteStream,
|
||||
stdin: stdin as unknown as NodeJS.ReadStream,
|
||||
stdout: stdout as unknown as NodeJS.WriteStream
|
||||
})
|
||||
|
||||
instance.unmount()
|
||||
instance.cleanup()
|
||||
|
||||
const lines = output
|
||||
.replace(OSC_RE, '')
|
||||
.split('\n')
|
||||
.map(line => stripAnsi(line).replace(CSI_RE, '').trimEnd())
|
||||
|
||||
// Drop trailing blank lines so we count only rows with content.
|
||||
while (lines.length && lines[lines.length - 1] === '') {
|
||||
lines.pop()
|
||||
}
|
||||
|
||||
return lines.length
|
||||
}
|
||||
|
||||
const colorize = (line: string) =>
|
||||
line
|
||||
.split(' ')
|
||||
.map((w, i) => `${ESC}[38;2;${(i * 40) % 255};${(i * 17) % 255};${(i * 90) % 255}m${w}${ESC}[39m`)
|
||||
.join(' ')
|
||||
|
||||
describe('MessageLine render height ↔ estimator parity (ANSI history)', () => {
|
||||
it('estimator predicts the real rendered height of an ANSI assistant message', () => {
|
||||
// A short visible message wrapped in heavy SGR — the cli-highlight shape.
|
||||
const visible = Array.from(
|
||||
{ length: 6 },
|
||||
(_, i) => `line ${i}: some highlighted code tokens that should wrap predictably here`
|
||||
).join('\n')
|
||||
|
||||
const msg: Msg = { role: 'assistant', text: colorize(visible) }
|
||||
|
||||
const rendered = renderHeight(
|
||||
React.createElement(Box, { width: cols }, React.createElement(MessageLine, { cols, msg, t: DEFAULT_THEME }))
|
||||
)
|
||||
|
||||
const estimated = estimatedMsgHeight(msg, cols, { compact: false, details: false })
|
||||
|
||||
// The estimate must be in the same ballpark as the real render. Before
|
||||
// the ANSI-strip fix the estimate counted escape bytes as width and ran
|
||||
// 3x high, which is what desynced the virtual list on resume. Allow a
|
||||
// small margin for gutter/separator chrome the estimator folds in.
|
||||
expect(Math.abs(estimated - rendered)).toBeLessThanOrEqual(4)
|
||||
})
|
||||
|
||||
it('the same message stripped of ANSI renders to the same height it estimates', () => {
|
||||
const visible = Array.from(
|
||||
{ length: 6 },
|
||||
(_, i) => `line ${i}: some highlighted code tokens that should wrap predictably here`
|
||||
).join('\n')
|
||||
|
||||
const ansiMsg: Msg = { role: 'assistant', text: colorize(visible) }
|
||||
const plainMsg: Msg = { role: 'assistant', text: visible }
|
||||
|
||||
const ansiRendered = renderHeight(
|
||||
React.createElement(Box, { width: cols }, React.createElement(MessageLine, { cols, msg: ansiMsg, t: DEFAULT_THEME }))
|
||||
)
|
||||
|
||||
const plainRendered = renderHeight(
|
||||
React.createElement(Box, { width: cols }, React.createElement(MessageLine, { cols, msg: plainMsg, t: DEFAULT_THEME }))
|
||||
)
|
||||
|
||||
// <Ansi> lays out only visible graphemes, so colored and plain forms of
|
||||
// the same text occupy the same number of rows.
|
||||
expect(Math.abs(ansiRendered - plainRendered)).toBeLessThanOrEqual(1)
|
||||
})
|
||||
})
|
||||
@@ -2,6 +2,7 @@ import { TERMUX_TUI_MODE } from '../config/env.js'
|
||||
import type { Msg } from '../types.js'
|
||||
|
||||
import { transcriptBodyWidth } from './inputMetrics.js'
|
||||
import { hasAnsi, stripAnsi } from './text.js'
|
||||
|
||||
const hashText = (text: string) => {
|
||||
let h = 5381
|
||||
@@ -66,6 +67,39 @@ export const wrappedLines = (text: string, width: number, maxLines: number = MAX
|
||||
return n
|
||||
}
|
||||
|
||||
// Strip ANSI from only as much of the raw text as wrappedLines could possibly
|
||||
// need to saturate its row cap, so a multi-megabyte ANSI-heavy message doesn't
|
||||
// re-introduce the O(text) cost the wrappedLines byte-budget exists to avoid.
|
||||
//
|
||||
// wrappedLines stops counting once it reaches `maxLines` rows; the most
|
||||
// VISIBLE characters it can consume before that is `maxLines * width + maxLines`
|
||||
// (one cell per column per row, plus a per-line slack). ANSI escape sequences
|
||||
// add non-visible bytes interspersed with that visible content, so we slice a
|
||||
// multiple of the visible budget before stripping — the ANSI_OVERHEAD factor is
|
||||
// the assumed worst-case ratio of total bytes to visible bytes. The densest
|
||||
// realistic SGR (per-word truecolor `\x1b[38;2;r;g;bm…\x1b[39m`, ~20 bytes
|
||||
// wrapping a short token) measures ~5.5× overhead; 8× leaves headroom so the
|
||||
// bounded estimate still reaches the row cap on such input. Even if a
|
||||
// pathological message exceeds it, the only consequence is the estimate
|
||||
// clipping below the true height past row 800 — already the documented
|
||||
// MAX_ESTIMATE_LINES behavior, and post-mount Yoga measurement converges
|
||||
// anyway. Slicing BEFORE stripping is safe because stripAnsi only removes
|
||||
// characters, so the stripped slice still contains at least as many visible
|
||||
// chars as wrappedLines needs.
|
||||
const ANSI_OVERHEAD = 8
|
||||
|
||||
const strippedForEstimate = (raw: string, width: number) => {
|
||||
if (!hasAnsi(raw)) {
|
||||
return raw
|
||||
}
|
||||
|
||||
const w = Math.max(1, width)
|
||||
const visibleBudget = MAX_ESTIMATE_LINES * w + MAX_ESTIMATE_LINES
|
||||
const sliceBudget = visibleBudget * ANSI_OVERHEAD
|
||||
|
||||
return stripAnsi(raw.length > sliceBudget ? raw.slice(0, sliceBudget) : raw)
|
||||
}
|
||||
|
||||
export const estimatedMsgHeight = (
|
||||
msg: Msg,
|
||||
cols: number,
|
||||
@@ -102,14 +136,37 @@ export const estimatedMsgHeight = (
|
||||
}
|
||||
|
||||
const bodyWidth = transcriptBodyWidth(cols, msg.role, userPrompt, TERMUX_TUI_MODE)
|
||||
const text = msg.text
|
||||
// Parity with MessageLine: any message carrying ANSI is rendered through
|
||||
// <Ansi>/sanitizeAnsiForRender (role !== 'user') or laid out by the
|
||||
// terminal on its VISIBLE width — the escape bytes never occupy columns.
|
||||
// Measuring the raw string makes wrappedLines count escape bytes as
|
||||
// width, inflating the estimate ~3-6x for SGR-heavy history (cli-highlight
|
||||
// tool output, Rich markup). On long resumed sessions that drift desyncs
|
||||
// the virtual list's offsets from the post-mount Yoga heights, which
|
||||
// shows up as blank gaps and — once the wrong rows mount into the
|
||||
// viewport — overlapping/jumbled text. Strip first so the estimate
|
||||
// tracks what actually renders. (/compress masks the bug by replacing
|
||||
// ANSI-laden history with clean summary text.) strippedForEstimate bounds
|
||||
// the strip to the wrap budget so huge ANSI messages stay O(budget), not
|
||||
// O(text) — see its comment.
|
||||
const text = strippedForEstimate(msg.text, bodyWidth)
|
||||
let h = wrappedLines(text || ' ', bodyWidth)
|
||||
|
||||
if (!compact && msg.role === 'assistant') {
|
||||
// Paragraph gaps add up to 6 extra rows of breathing room. Slice
|
||||
// first so the regex never walks more than the first ~16k chars of
|
||||
// a giant assistant message — post-mount Yoga measurement converges
|
||||
// to the real height regardless of how the estimate undercounts.
|
||||
if (!compact && msg.role === 'assistant' && !hasAnsi(msg.text)) {
|
||||
// Paragraph gaps add up to 6 extra rows of breathing room — but ONLY
|
||||
// when the message renders through <Md> (markdown), which inserts blank
|
||||
// rows between blocks. ANSI-bearing assistant messages render through
|
||||
// <Ansi> instead (see messageLine.tsx role!=='user' && hasAnsi branch),
|
||||
// which emits the text's own newlines 1:1 with no extra breathing room.
|
||||
// wrappedLines already counted those literal blank lines, so adding the
|
||||
// gap bonus here double-counts and overshoots ~6 rows on colored code
|
||||
// echoes — re-introducing virtual-list drift on resumed sessions full
|
||||
// of highlighted tool output. Gate the bonus on !hasAnsi to match what
|
||||
// actually renders.
|
||||
//
|
||||
// Slice first so the regex never walks more than the first ~16k chars of
|
||||
// a giant assistant message — post-mount Yoga measurement converges to
|
||||
// the real height regardless of how the estimate undercounts.
|
||||
const scan = text.length > 16_000 ? text.slice(0, 16_000) : text
|
||||
h += Math.min(6, (scan.match(/\n\s*\n/g) ?? []).length)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user