fix(tui): don't italicize intraword underscores in markdown

The inline markdown regex matched `_..._` / `__...__` anywhere, so file paths like `browser_screenshot_ecc1c3feab.png` got mid-path italics. Require non-word flanking (`(?<!\w)` / `(?!\w)`) on underscore emphasis so snake_case identifiers and paths render literally, matching the CommonMark intraword rule. `*` / `**` keep intraword semantics.
2026-04-28 06:51:16 +08:00 · 2026-04-20 17:04:09 -05:00
parent 36e8435d3e
commit b17eb94907
2 changed files with 39 additions and 5 deletions
--- a/ui-tui/src/tests/markdown.test.ts
+++ b/ui-tui/src/tests/markdown.test.ts
@@ -0,0 +1,34 @@
+import { describe, expect, it } from 'vitest'
+
+import { INLINE_RE, stripInlineMarkup } from '../components/markdown.js'
+
+const matches = (text: string) => [...text.matchAll(INLINE_RE)].map(m => m[0])
+
+describe('INLINE_RE emphasis', () => {
+  it('matches word-boundary italic/bold', () => {
+    expect(matches('say _hi_ there')).toEqual(['_hi_'])
+    expect(matches('very __bold__ move')).toEqual(['__bold__'])
+    expect(matches('(_paren_) and [_bracket_]')).toEqual(['_paren_', '_bracket_'])
+  })
+
+  it('keeps intraword underscores literal', () => {
+    const path = '/home/me/.hermes/cache/screenshots/browser_screenshot_ecc1c3feab.png'
+
+    expect(matches(path)).toEqual([])
+    expect(matches('snake_case_var and MY_CONST')).toEqual([])
+    expect(matches('foo__bar__baz')).toEqual([])
+  })
+
+  it('still matches asterisk emphasis intraword', () => {
+    expect(matches('a*b*c')).toEqual(['*b*'])
+    expect(matches('a**bold**c')).toEqual(['**bold**'])
+  })
+})
+
+describe('stripInlineMarkup', () => {
+  it('strips word-boundary emphasis only', () => {
+    expect(stripInlineMarkup('say _hi_ there')).toBe('say hi there')
+    expect(stripInlineMarkup('browser_screenshot_ecc.png')).toBe('browser_screenshot_ecc.png')
+    expect(stripInlineMarkup('__bold__ and foo__bar__')).toBe('bold and foo__bar__')
+  })
+})
--- a/ui-tui/src/components/markdown.tsx
+++ b/ui-tui/src/components/markdown.tsx
@@ -12,8 +12,8 @@ const DEF_RE = /^\s*:\s+(.+)$/
 const TABLE_DIVIDER_CELL_RE = /^:?-{3,}:?$/
 const MD_URL_RE = '((?:[^\\s()]|\\([^\\s()]*\\))+?)'

-const INLINE_RE = new RegExp(
-  `(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|__(.+?)__|\\*(.+?)\\*|_(.+?)_|==(.+?)==|\\[\\^([^\\]]+)\\]|\\^([^^\\s][^^]*?)\\^|~([^~\\s][^~]*?)~|(https?:\\/\\/[^\\s<]+))`,
+export const INLINE_RE = new RegExp(
+  `(!\\[(.*?)\\]\\(${MD_URL_RE}\\)|\\[(.+?)\\]\\(${MD_URL_RE}\\)|<((?:https?:\\/\\/|mailto:)[^>\\s]+|[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,})>|~~(.+?)~~|\`([^\\\`]+)\`|\\*\\*(.+?)\\*\\*|(?<!\\w)__(.+?)__(?!\\w)|\\*(.+?)\\*|(?<!\\w)_(.+?)_(?!\\w)|==(.+?)==|\\[\\^([^\\]]+)\\]|\\^([^^\\s][^^]*?)\\^|~([^~\\s][^~]*?)~|(https?:\\/\\/[^\\s<]+))`,
  'g'
 )

@@ -90,7 +90,7 @@ const isTableDivider = (row: string) => {
  return cells.length > 1 && cells.every(cell => TABLE_DIVIDER_CELL_RE.test(cell))
 }

-const stripInlineMarkup = (value: string) =>
+export const stripInlineMarkup = (value: string) =>
  value
    .replace(/!\[(.*?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '[image: $1] $2')
    .replace(/\[(.+?)\]\(((?:[^\s()]|\([^\s()]*\))+?)\)/g, '$1')
@@ -98,9 +98,9 @@ const stripInlineMarkup = (value: string) =>
    .replace(/~~(.+?)~~/g, '$1')
    .replace(/`([^`]+)`/g, '$1')
    .replace(/\*\*(.+?)\*\*/g, '$1')
-    .replace(/__(.+?)__/g, '$1')
+    .replace(/(?<!\w)__(.+?)__(?!\w)/g, '$1')
    .replace(/\*(.+?)\*/g, '$1')
-    .replace(/_(.+?)_/g, '$1')
+    .replace(/(?<!\w)_(.+?)_(?!\w)/g, '$1')
    .replace(/==(.+?)==/g, '$1')
    .replace(/\[\^([^\]]+)\]/g, '[$1]')
    .replace(/\^([^^\s][^^]*?)\^/g, '^$1')