Files
hermes-agent/tests/test_yuanbao_markdown.py
Teknium ab6879634e yuanbao platform (#16298)
Co-authored-by: loongzhao <loongzhao@tencent.com>
2026-04-26 18:50:49 -07:00

325 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
test_yuanbao_markdown.py - Unit tests for yuanbao_markdown.py
Run (no pytest needed):
cd /root/.openclaw/workspace/hermes-agent
python3 tests/test_yuanbao_markdown.py -v
Or with pytest if available:
python3 -m pytest tests/test_yuanbao_markdown.py -v
"""
import sys
import os
import unittest
# Ensure project root is on the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from gateway.platforms.yuanbao import MarkdownProcessor
# ============ has_unclosed_fence ============
class TestHasUnclosedFence(unittest.TestCase):
def test_unclosed_fence(self):
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```python\ncode"))
def test_closed_fence(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence("```python\ncode\n```"))
def test_empty(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence(""))
def test_no_fence(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence("just some text\nno fences here"))
def test_multiple_closed_fences(self):
text = "```python\ncode1\n```\n\n```js\ncode2\n```"
self.assertFalse(MarkdownProcessor.has_unclosed_fence(text))
def test_second_fence_unclosed(self):
text = "```python\ncode1\n```\n\n```js\ncode2"
self.assertTrue(MarkdownProcessor.has_unclosed_fence(text))
def test_fence_at_start(self):
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```\nsome code"))
def test_inline_backtick_ignored(self):
text = "`inline code` is fine"
self.assertFalse(MarkdownProcessor.has_unclosed_fence(text))
# ============ ends_with_table_row ============
class TestEndsWithTableRow(unittest.TestCase):
def test_simple_table_row(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| col1 | col2 |"))
def test_table_row_with_trailing_newline(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| col1 | col2 |\n"))
def test_table_row_in_middle(self):
text = "| col1 | col2 |\nsome other text"
self.assertFalse(MarkdownProcessor.ends_with_table_row(text))
def test_empty(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row(""))
def test_non_table(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row("just a normal line"))
def test_only_pipe_start(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row("| just pipe at start"))
def test_table_separator_row(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| --- | --- |"))
def test_whitespace_only(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row(" \n "))
# ============ split_at_paragraph_boundary ============
class TestSplitAtParagraphBoundary(unittest.TestCase):
def test_split_at_empty_line(self):
text = "paragraph one\n\nparagraph two\n\nparagraph three\nextra"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 30)
self.assertLessEqual(len(head), 30)
self.assertEqual(head + tail, text)
def test_split_at_sentence_end(self):
text = "This is a sentence.\nNext line.\nAnother line."
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 25)
self.assertLessEqual(len(head), 25)
self.assertEqual(head + tail, text)
def test_forced_split_no_boundary(self):
text = "a" * 100
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 50)
self.assertEqual(len(head), 50)
self.assertEqual(head + tail, text)
def test_split_at_newline(self):
text = "line one\nline two\nline three"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 15)
self.assertLessEqual(len(head), 15)
self.assertEqual(head + tail, text)
def test_chinese_sentence_boundary(self):
text = "这是第一句话。\n这是第二句话。\n这是第三句话。"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 15)
self.assertLessEqual(len(head), 15)
self.assertEqual(head + tail, text)
# ============ chunk_markdown_text ============
class TestChunkMarkdownText(unittest.TestCase):
def test_empty(self):
self.assertEqual(MarkdownProcessor.chunk_markdown_text(""), [])
def test_short_text_no_split(self):
text = "hello world"
self.assertEqual(MarkdownProcessor.chunk_markdown_text(text, 3000), [text])
def test_exactly_max_chars(self):
text = "a" * 3000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertEqual(len(result), 1)
self.assertEqual(result[0], text)
def test_plain_text_split(self):
"""x * 9000 should return 3 chunks of ~3000"""
text = "x" * 9000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertEqual(len(result), 3)
for chunk in result:
self.assertLessEqual(len(chunk), 3000)
self.assertEqual(''.join(result), text)
def test_5000_chars_returns_2(self):
"""验收标准: 'a'*5000 with max 3000 → 2 chunks"""
result = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
self.assertEqual(len(result), 2)
def test_code_fence_not_split(self):
"""代码块不应被切断"""
code_lines = "\n".join([f" line_{i} = {i}" for i in range(200)])
text = f"Some intro text.\n\n```python\n{code_lines}\n```\n\nSome outro text."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk),
f"Chunk has unclosed fence:\n{chunk[:200]}...")
def test_table_not_split(self):
"""表格行不应被切断"""
header = "| Name | Value | Description |\n| --- | --- | --- |"
rows = "\n".join([f"| item_{i} | {i * 100} | description for item {i} |"
for i in range(50)])
table = f"{header}\n{rows}"
text = "Some intro text.\n\n" + table + "\n\nSome outro text."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_code_fence_200_lines_not_cut(self):
"""包含 200 行代码块的文本,代码块不被切断"""
code_lines = "\n".join([f"x = {i}" for i in range(200)])
text = f"Intro.\n\n```python\n{code_lines}\n```\n\nOutro."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_multiple_paragraphs(self):
"""多段落文本应在段落边界切割"""
paragraphs = ["This is paragraph number " + str(i) + ". " * 50
for i in range(10)]
text = "\n\n".join(paragraphs)
result = MarkdownProcessor.chunk_markdown_text(text, 500)
self.assertGreater(len(result), 1)
total_content = ''.join(result)
self.assertGreaterEqual(len(total_content), len(text) * 0.95)
def test_single_long_line(self):
"""单行超长文本应被强制切割"""
text = "a" * 10000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertGreaterEqual(len(result), 3)
for c in result:
self.assertLessEqual(len(c), 3000)
def test_fence_followed_by_text(self):
"""围栏后的文本应正常切割"""
text = "```python\nprint('hi')\n```\n\n" + "Normal text. " * 300
result = MarkdownProcessor.chunk_markdown_text(text, 500)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_returns_non_empty_strings(self):
"""所有返回的片段都应为非空字符串"""
text = "Hello world!\n\n" * 100
result = MarkdownProcessor.chunk_markdown_text(text, 100)
for chunk in result:
self.assertGreater(len(chunk), 0)
# ============ Acceptance criteria ============
class TestAcceptanceCriteria(unittest.TestCase):
def test_9000_x_returns_3_chunks(self):
"""验收MarkdownProcessor.chunk_markdown_text("x" * 9000, 3000) 返回 3 个片段"""
result = MarkdownProcessor.chunk_markdown_text("x" * 9000, 3000)
self.assertEqual(len(result), 3)
for chunk in result:
self.assertLessEqual(len(chunk), 3000)
def test_5000_a_returns_2_chunks(self):
"""验收python -c 输出 2"""
result = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
self.assertEqual(len(result), 2)
def test_has_unclosed_fence_true(self):
"""验收MarkdownProcessor.has_unclosed_fence("```python\\ncode") 返回 True"""
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```python\ncode"))
def test_has_unclosed_fence_false(self):
"""验收MarkdownProcessor.has_unclosed_fence("```python\\ncode\\n```") 返回 False"""
self.assertFalse(MarkdownProcessor.has_unclosed_fence("```python\ncode\n```"))
def test_code_block_200_lines_not_broken(self):
"""验收:包含 200 行代码块的文本,代码块不被切断"""
code_lines = "\n".join([f" result_{i} = compute({i})" for i in range(200)])
text = f"Introduction.\n\n```python\n{code_lines}\n```\n\nConclusion."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk),
f"Found unclosed fence in chunk:\n{chunk[:100]}...")
def test_table_rows_not_broken(self):
"""验收:表格行不被切断(每个 chunk 中的表格 fence 完整)"""
rows = "\n".join([
f"| Col A {i} | Col B {i} | Col C {i} |" for i in range(100)
])
text = f"Table:\n\n| A | B | C |\n| --- | --- | --- |\n{rows}\n\nDone."
result = MarkdownProcessor.chunk_markdown_text(text, 500)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
if __name__ == '__main__':
unittest.main(verbosity=2)
# ============ pytest-style function tests (task specification) ============
def test_short_text_no_split():
assert MarkdownProcessor.chunk_markdown_text("hello", 100) == ["hello"]
def test_plain_text_split():
chunks = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
assert len(chunks) >= 2
for c in chunks:
assert len(c) <= 3000
def test_fence_not_broken():
"""代码块不应被切断"""
code_block = "```python\n" + "x = 1\n" * 200 + "```"
chunks = MarkdownProcessor.chunk_markdown_text(code_block, 1000)
for c in chunks:
assert not MarkdownProcessor.has_unclosed_fence(c), f"Chunk has unclosed fence: {c[:100]}"
def test_large_fence_kept_whole():
"""超大代码块即便超过 max_chars 也应整块输出"""
code_block = "```python\n" + "x = 1\n" * 200 + "```"
chunks = MarkdownProcessor.chunk_markdown_text(code_block, 500)
# 代码块应在同一个 chunk 中(允许超出 max_chars
fence_chunks = [c for c in chunks if "```python" in c]
for c in fence_chunks:
assert not MarkdownProcessor.has_unclosed_fence(c)
def test_mixed_content():
"""代码块前后的普通文本可以正常切割"""
text = "intro paragraph\n\n" + "```python\nx=1\n```" + "\n\noutro paragraph"
chunks = MarkdownProcessor.chunk_markdown_text(text, 100)
for c in chunks:
assert not MarkdownProcessor.has_unclosed_fence(c)
def test_table_not_broken():
"""表格不应被切断"""
table = "| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |"
text = "before\n\n" + table + "\n\nafter"
chunks = MarkdownProcessor.chunk_markdown_text(text, 30)
table_in_chunk = [c for c in chunks if "|" in c]
for c in table_in_chunk:
lines = [line for line in c.split('\n') if line.strip().startswith('|')]
if lines:
# 至少表格行不被半截切割
pass
def test_has_unclosed_fence():
assert MarkdownProcessor.has_unclosed_fence("```python\ncode") == True
assert MarkdownProcessor.has_unclosed_fence("```python\ncode\n```") == False
assert MarkdownProcessor.has_unclosed_fence("no fence") == False
def test_ends_with_table_row():
assert MarkdownProcessor.ends_with_table_row("| a | b |") == True
assert MarkdownProcessor.ends_with_table_row("normal text") == False
def test_empty_text():
assert MarkdownProcessor.chunk_markdown_text("", 100) == []
def test_exact_limit():
text = "a" * 3000
chunks = MarkdownProcessor.chunk_markdown_text(text, 3000)
assert len(chunks) == 1