Compare commits

...

3 Commits

Author SHA1 Message Date
Brooklyn Nicholson
5e76cbe47d perf(tui): hoist mouse-leak alphabet to a charcode helper
Copilot review: the two burst-edge extension loops created a fresh
RegExp on every iteration via /[;\\d<\\[Mm]/.test(). Replace with an
isMouseLeakChar() charcode check so a long mouse burst doesn't allocate
N RegExp objects, and the alphabet has a single named definition.
2026-05-16 01:06:52 -05:00
Brooklyn Nicholson
de7284e131 fix(tui): require digit+separator in mouse-burst noise check
Copilot review: MOUSE_BURST_NOISE_RE as written would swallow plain text
like 'Mmm' or 'MMM' since the alphabet allowed [Mm]-only runs as long as
there were ≥3 terminators. Add lookaheads requiring at least one digit
and at least one ';' — real mouse reports always carry coordinate digits
and ';' param separators, so this discriminates them from English text
without changing the leak-detection behaviour.

Also sync the threshold comment in parseTextWithSgrMouseFragments to
match the ≥3 terminators in the regex (was stale at ≥2).
2026-05-16 00:59:43 -05:00
Brooklyn Nicholson
ff1dc2561f fix(tui): swallow degraded SGR mouse bursts instead of leaking to prompt
Windows Terminal during a fast wheel-scroll can produce stdin runs like
';76;50mM1M68;36M;73;35M...M0M0MM6MMMMM' where the ESC[< prefix and the
button code on follow-up events have been chewed off and no individual
fragment matches SGR_MOUSE_FRAGMENT_RE. Previously these survived the
recovery path, fell through to parseKeypress, and got typed into the
composer.

Two-part fix in parseTextWithSgrMouseFragments:
- When the whole text is mouse-leak alphabet ([;\\d<\\[Mm]) with ≥3
  terminators, drop it as noise.
- Around any confirmed mouse-fragment burst, extend the consumed window
  greedily over adjacent leak chars on both sides so chewed-off neighbours
  go away with the real events instead of trailing into the prompt.

Threshold of ≥3 terminators keeps the existing 'see 1;2;3M for details'
and '1234;56;78M9;10;11M' tests as plain text — real scroll bursts have
many more M/m.
2026-05-16 00:54:32 -05:00
2 changed files with 82 additions and 4 deletions

View File

@@ -133,4 +133,35 @@ describe('fragmented SGR mouse recovery', () => {
expect(key).toMatchObject({ kind: 'key', sequence: '1234;56;78M9;10;11M' })
})
it('swallows degraded windows-terminal mouse bursts without leaking to the prompt', () => {
// Real capture from Windows Terminal during a wheel-scroll storm: button
// codes / ESC[< prefixes have been chewed off, leaving pure mouse-leak
// alphabet with many M/m terminators.
const leak =
';76;50mM1M68;36M;73;35M;74;38M74;41M75;41M66;38M37M2;38M;49;38M35;40;39M9M5;37;39M39M;32;39M29;39M0M0MM6MMMMM'
const [events] = parseMultipleKeypresses(INITIAL_STATE, leak)
const visibleKeys = events.filter(e => e.kind === 'key' && !e.isPasted)
expect(visibleKeys).toEqual([])
})
it('swallows a degraded mouse burst that surrounds a real fragment with leak chars', () => {
// Real fragment in the middle, noise on both sides — the surrounding
// noise should not leak as typed keys.
const text = ';12;34m<32;76;50M;78;52M;99;9M0MM'
const [events] = parseMultipleKeypresses(INITIAL_STATE, text)
const visibleKeys = events.filter(e => e.kind === 'key' && !e.isPasted)
expect(visibleKeys).toEqual([])
})
it.each(['Mmm', 'MMM', 'Mmmmm', 'mmm yum'])(
'keeps plain text %p that lacks digits/separators despite many M/m chars',
text => {
const [[key]] = parseMultipleKeypresses(INITIAL_STATE, text)
expect(key).toMatchObject({ kind: 'key', sequence: text })
}
)
})

View File

@@ -64,6 +64,31 @@ const XTVERSION_RE = /^\x1bP>\|(.*?)(?:\x07|\x1b\\)$/s
// eslint-disable-next-line no-control-regex
const SGR_MOUSE_RE = /^\x1b\[<(\d+);(\d+);(\d+)([Mm])$/
const SGR_MOUSE_FRAGMENT_RE = /(?<!\d)(?:\[<|<)?(?:[0-9]|[1-9][0-9]|1\d{2}|2[0-4]\d|25[0-5]);\d+;\d+[Mm]/g
// Mouse-leak "alphabet": chars that can appear inside an SGR mouse burst
// (digits, separators, terminators, prefix). Used to detect degraded bursts
// where the tokenizer or upstream stripped enough context that no single
// fragment matches SGR_MOUSE_FRAGMENT_RE but the run is clearly mouse noise.
// Requires ≥3 M/m terminators AND at least one digit AND at least one `;`
// — plain text like `Mmm` / `MMM` (no digits, no separators) stays put;
// `1234;56;78M9;10;11M` (only 2 terminators) is ambiguous and stays put;
// real mouse bursts during scroll/drag/motion have many more terminators
// plus coordinate digits and `;` separators between params.
const MOUSE_BURST_NOISE_RE = /^(?=[^]*\d)(?=[^]*;)[;\d<\[Mm]*[Mm][;\d<\[Mm]*[Mm][;\d<\[Mm]*[Mm][;\d<\[Mm]*$/
// Charcode test for the same alphabet — avoids allocating a fresh RegExp on
// every iteration of the burst-edge extension loops in parseTextWithSgrMouseFragments.
function isMouseLeakChar(c: string): boolean {
const code = c.charCodeAt(0)
return (
code === 0x3b /* ; */ ||
code === 0x3c /* < */ ||
code === 0x5b /* [ */ ||
code === 0x4d /* M */ ||
code === 0x6d /* m */ ||
(code >= 0x30 && code <= 0x39) /* 0-9 */
)
}
function createPasteKey(content: string): ParsedKey {
return {
@@ -646,8 +671,14 @@ function parseTextWithSgrMouseFragments(text: string): ParsedInput[] | null {
SGR_MOUSE_FRAGMENT_RE.lastIndex = 0
const matches = [...text.matchAll(SGR_MOUSE_FRAGMENT_RE)]
// Degraded burst: no full fragment matched, but the entire text is mouse-leak
// alphabet with ≥3 terminators plus at least one digit and one `;`. Tokenizer
// or upstream stripped the button code (or the ESC[< prefix and the button),
// leaving e.g. `;col;rowM` or worse. Swallow rather than leak into the prompt.
// Threshold lives on MOUSE_BURST_NOISE_RE — keep this comment in sync.
if (matches.length === 0) {
return null
return MOUSE_BURST_NOISE_RE.test(text) ? [] : null
}
const parsed: ParsedInput[] = []
@@ -673,8 +704,21 @@ function parseTextWithSgrMouseFragments(text: string): ParsedInput[] | null {
continue
}
if (first.index! > cursor) {
parsed.push(parseKeypress(text.slice(cursor, first.index!)))
// Extend the burst over adjacent mouse-leak noise. The tokenizer or
// upstream may have dropped button codes / extra prefixes on follow-up
// events, so they don't match SGR_MOUSE_FRAGMENT_RE but they're still
// noise we want to swallow rather than type into the prompt.
let burstStart = first.index!
while (burstStart > cursor && isMouseLeakChar(text[burstStart - 1]!)) {
burstStart--
}
while (runEnd < text.length && isMouseLeakChar(text[runEnd]!)) {
runEnd++
}
if (burstStart > cursor) {
parsed.push(parseKeypress(text.slice(cursor, burstStart)))
}
for (const match of run) {
@@ -686,7 +730,10 @@ function parseTextWithSgrMouseFragments(text: string): ParsedInput[] | null {
}
if (!consumedAny) {
return null
// Matched fragments existed but none had enough evidence to be promoted
// to mouse events. If the entire text is mouse-leak alphabet anyway,
// swallow it as noise rather than typing it into the prompt.
return MOUSE_BURST_NOISE_RE.test(text) ? [] : null
}
if (cursor < text.length) {