fix(website): auto-wrap ASCII-art code blocks in generated skill pages (#16497)

Defensive: when the generator encounters a fenced code block containing
Unicode box-drawing characters, wrap it in `<!-- ascii-guard-ignore -->`
markers so the docs-site-checks lint (which scans inside code fences)
can't reject the page for a skill's own diagram.

Plain bash/python code blocks stay uncluttered — only blocks with box
chars get wrapped. Skill authors no longer have to remember to add the
ignore markers in every SKILL.md with ASCII art.

Fixes #15305.
This commit is contained in:
Teknium
2026-04-27 03:38:39 -07:00
committed by GitHub
parent 64a497bfa9
commit 65f648ee84
3 changed files with 134 additions and 1 deletions

View File

View File

@@ -0,0 +1,108 @@
"""Tests for website/scripts/generate-skill-docs.py.
The generator turns every `skills/**/SKILL.md` into a Docusaurus page before
the `docs-site-checks` CI workflow runs `ascii-guard lint` on the result. If
a SKILL.md contains ASCII diagrams (box-drawing chars in a fenced code block)
without its own `<!-- ascii-guard-ignore -->` markers, the generator must
add them defensively — otherwise every PR touching `website/**` fails lint
on unrelated skill content.
Regression for issue #15305.
"""
from __future__ import annotations
import importlib.util
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
GENERATOR = REPO_ROOT / "website" / "scripts" / "generate-skill-docs.py"
@pytest.fixture(scope="module")
def gen_module():
"""Load generate-skill-docs.py as a module (hyphenated filename, not importable via normal import)."""
spec = importlib.util.spec_from_file_location("generate_skill_docs", GENERATOR)
assert spec is not None and spec.loader is not None
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
return module
def test_code_block_without_box_chars_is_not_wrapped(gen_module):
"""Plain bash/python code blocks should stay uncluttered."""
body = "Intro.\n\n```bash\npip install foo\nfoo --run\n```\n\nOutro."
result = gen_module.mdx_escape_body(body)
assert "ascii-guard-ignore" not in result
assert "pip install foo" in result
def test_code_block_with_box_chars_gets_wrapped(gen_module):
"""A code fence containing Unicode box-drawing chars must be wrapped in
ascii-guard-ignore comments so the docs-site-checks lint can't fail on
a skill's own diagram (issue #15305)."""
body = (
"Some text.\n\n"
"```\n"
"┌─────────┐\n"
"│ diagram │\n"
"└─────────┘\n"
"```\n\n"
"More text."
)
result = gen_module.mdx_escape_body(body)
assert "<!-- ascii-guard-ignore -->" in result
assert "<!-- ascii-guard-ignore-end -->" in result
# The wrapper must sit OUTSIDE the fence, not inside.
wrap_open = result.index("<!-- ascii-guard-ignore -->")
fence_open = result.index("```\n")
assert wrap_open < fence_open
def test_multiple_code_blocks_only_box_ones_wrapped(gen_module):
"""Mixed body: plain code stays plain, box code gets wrapped."""
body = (
"```bash\necho hi\n```\n\n"
"```\n┌──┐\n│ │\n└──┘\n```\n\n"
"```python\nprint('ok')\n```"
)
result = gen_module.mdx_escape_body(body)
# exactly one wrap pair
assert result.count("<!-- ascii-guard-ignore -->") == 1
assert result.count("<!-- ascii-guard-ignore-end -->") == 1
# plain blocks untouched
assert "echo hi" in result
assert "print('ok')" in result
def test_tilde_fenced_box_is_wrapped(gen_module):
"""The generator supports both ``` and ~~~ fences — both must be covered."""
body = "~~~\n│ box │\n~~~"
result = gen_module.mdx_escape_body(body)
assert "<!-- ascii-guard-ignore -->" in result
def test_already_wrapped_source_double_wraps_harmlessly(gen_module):
"""If the SKILL.md already has ascii-guard-ignore markers, the generator's
extra wrap is harmless (ascii-guard tolerates adjacent duplicate markers).
The test just verifies we don't crash and the content survives."""
body = (
"<!-- ascii-guard-ignore -->\n"
"```\n┌─┐\n└─┘\n```\n"
"<!-- ascii-guard-ignore-end -->"
)
result = gen_module.mdx_escape_body(body)
assert "┌─┐" in result
# At least one marker pair survives
assert "<!-- ascii-guard-ignore -->" in result
assert "<!-- ascii-guard-ignore-end -->" in result
def test_box_drawing_detection_covers_common_chars(gen_module):
"""Smoke-test that the char set covers box-drawing ranges actually used
in skill diagrams."""
# Sample from real SKILL.md diagrams (segment-anything, research-paper-writing, etc.)
for ch in "┌┐└┘─│├┤┬┴┼═║╔╗╚╝╭╮╯╰▶◀▲▼":
assert ch in gen_module._BOX_DRAWING_CHARS, f"missing: {ch!r}"

View File

@@ -38,6 +38,31 @@ HAND_WRITTEN = {"godmode.md", "google-workspace.md"}
_FENCE_RE = re.compile(r"^(?P<indent>\s*)(?P<fence>```+|~~~+)", re.MULTILINE)
# Unicode box-drawing characters. If a generated fenced code block contains any
# of these, wrap it in `<!-- ascii-guard-ignore -->` so the docs-site-checks
# lint (which scans inside code fences) can't reject the page for a skill's
# own ASCII diagram. Skill authors shouldn't need to remember to add the
# ignore markers in every SKILL.md — the generator handles it defensively.
_BOX_DRAWING_CHARS = frozenset("┌┐└┘─│═║╔╗╚╝╠╣╦╩╬├┤┬┴┼╭╮╯╰▶◀▲▼")
def _wrap_ascii_art_code_blocks(code_segment: str) -> str:
"""Wrap a fenced code segment in ascii-guard-ignore markers if it contains
box-drawing characters. No-op otherwise, so plain bash/python code blocks
stay uncluttered.
Already-wrapped segments (the SKILL.md source added its own markers) are
left alone — double-wrapping is harmless but we'd rather keep the output
clean.
"""
if not any(ch in _BOX_DRAWING_CHARS for ch in code_segment):
return code_segment
return (
"<!-- ascii-guard-ignore -->\n"
f"{code_segment}\n"
"<!-- ascii-guard-ignore-end -->"
)
def mdx_escape_body(body: str) -> str:
"""Escape MDX-dangerous characters in markdown body, leaving fenced code blocks alone.
@@ -194,7 +219,7 @@ def mdx_escape_body(body: str) -> str:
processed: list[str] = []
for kind, content in segments:
if kind == "code":
processed.append(content)
processed.append(_wrap_ascii_art_code_blocks(content))
else:
processed.append(escape_text(content))
return "\n".join(processed)