Files
hermes-agent/tests/test_yuanbao_markdown.py

325 lines
13 KiB
Python
Raw Normal View History

"""
test_yuanbao_markdown.py - Unit tests for yuanbao_markdown.py
Run (no pytest needed):
cd /root/.openclaw/workspace/hermes-agent
python3 tests/test_yuanbao_markdown.py -v
Or with pytest if available:
python3 -m pytest tests/test_yuanbao_markdown.py -v
"""
import sys
import os
import unittest
# Ensure project root is on the path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
from gateway.platforms.yuanbao import MarkdownProcessor
# ============ has_unclosed_fence ============
class TestHasUnclosedFence(unittest.TestCase):
def test_unclosed_fence(self):
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```python\ncode"))
def test_closed_fence(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence("```python\ncode\n```"))
def test_empty(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence(""))
def test_no_fence(self):
self.assertFalse(MarkdownProcessor.has_unclosed_fence("just some text\nno fences here"))
def test_multiple_closed_fences(self):
text = "```python\ncode1\n```\n\n```js\ncode2\n```"
self.assertFalse(MarkdownProcessor.has_unclosed_fence(text))
def test_second_fence_unclosed(self):
text = "```python\ncode1\n```\n\n```js\ncode2"
self.assertTrue(MarkdownProcessor.has_unclosed_fence(text))
def test_fence_at_start(self):
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```\nsome code"))
def test_inline_backtick_ignored(self):
text = "`inline code` is fine"
self.assertFalse(MarkdownProcessor.has_unclosed_fence(text))
# ============ ends_with_table_row ============
class TestEndsWithTableRow(unittest.TestCase):
def test_simple_table_row(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| col1 | col2 |"))
def test_table_row_with_trailing_newline(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| col1 | col2 |\n"))
def test_table_row_in_middle(self):
text = "| col1 | col2 |\nsome other text"
self.assertFalse(MarkdownProcessor.ends_with_table_row(text))
def test_empty(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row(""))
def test_non_table(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row("just a normal line"))
def test_only_pipe_start(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row("| just pipe at start"))
def test_table_separator_row(self):
self.assertTrue(MarkdownProcessor.ends_with_table_row("| --- | --- |"))
def test_whitespace_only(self):
self.assertFalse(MarkdownProcessor.ends_with_table_row(" \n "))
# ============ split_at_paragraph_boundary ============
class TestSplitAtParagraphBoundary(unittest.TestCase):
def test_split_at_empty_line(self):
text = "paragraph one\n\nparagraph two\n\nparagraph three\nextra"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 30)
self.assertLessEqual(len(head), 30)
self.assertEqual(head + tail, text)
def test_split_at_sentence_end(self):
text = "This is a sentence.\nNext line.\nAnother line."
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 25)
self.assertLessEqual(len(head), 25)
self.assertEqual(head + tail, text)
def test_forced_split_no_boundary(self):
text = "a" * 100
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 50)
self.assertEqual(len(head), 50)
self.assertEqual(head + tail, text)
def test_split_at_newline(self):
text = "line one\nline two\nline three"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 15)
self.assertLessEqual(len(head), 15)
self.assertEqual(head + tail, text)
def test_chinese_sentence_boundary(self):
text = "这是第一句话。\n这是第二句话。\n这是第三句话。"
head, tail = MarkdownProcessor.split_at_paragraph_boundary(text, 15)
self.assertLessEqual(len(head), 15)
self.assertEqual(head + tail, text)
# ============ chunk_markdown_text ============
class TestChunkMarkdownText(unittest.TestCase):
def test_empty(self):
self.assertEqual(MarkdownProcessor.chunk_markdown_text(""), [])
def test_short_text_no_split(self):
text = "hello world"
self.assertEqual(MarkdownProcessor.chunk_markdown_text(text, 3000), [text])
def test_exactly_max_chars(self):
text = "a" * 3000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertEqual(len(result), 1)
self.assertEqual(result[0], text)
def test_plain_text_split(self):
"""x * 9000 should return 3 chunks of ~3000"""
text = "x" * 9000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertEqual(len(result), 3)
for chunk in result:
self.assertLessEqual(len(chunk), 3000)
self.assertEqual(''.join(result), text)
def test_5000_chars_returns_2(self):
"""验收标准: 'a'*5000 with max 3000 → 2 chunks"""
result = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
self.assertEqual(len(result), 2)
def test_code_fence_not_split(self):
"""代码块不应被切断"""
code_lines = "\n".join([f" line_{i} = {i}" for i in range(200)])
text = f"Some intro text.\n\n```python\n{code_lines}\n```\n\nSome outro text."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk),
f"Chunk has unclosed fence:\n{chunk[:200]}...")
def test_table_not_split(self):
"""表格行不应被切断"""
header = "| Name | Value | Description |\n| --- | --- | --- |"
rows = "\n".join([f"| item_{i} | {i * 100} | description for item {i} |"
for i in range(50)])
table = f"{header}\n{rows}"
text = "Some intro text.\n\n" + table + "\n\nSome outro text."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_code_fence_200_lines_not_cut(self):
"""包含 200 行代码块的文本,代码块不被切断"""
code_lines = "\n".join([f"x = {i}" for i in range(200)])
text = f"Intro.\n\n```python\n{code_lines}\n```\n\nOutro."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_multiple_paragraphs(self):
"""多段落文本应在段落边界切割"""
paragraphs = ["This is paragraph number " + str(i) + ". " * 50
for i in range(10)]
text = "\n\n".join(paragraphs)
result = MarkdownProcessor.chunk_markdown_text(text, 500)
self.assertGreater(len(result), 1)
total_content = ''.join(result)
self.assertGreaterEqual(len(total_content), len(text) * 0.95)
def test_single_long_line(self):
"""单行超长文本应被强制切割"""
text = "a" * 10000
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
self.assertGreaterEqual(len(result), 3)
for c in result:
self.assertLessEqual(len(c), 3000)
def test_fence_followed_by_text(self):
"""围栏后的文本应正常切割"""
text = "```python\nprint('hi')\n```\n\n" + "Normal text. " * 300
result = MarkdownProcessor.chunk_markdown_text(text, 500)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
def test_returns_non_empty_strings(self):
"""所有返回的片段都应为非空字符串"""
text = "Hello world!\n\n" * 100
result = MarkdownProcessor.chunk_markdown_text(text, 100)
for chunk in result:
self.assertGreater(len(chunk), 0)
# ============ Acceptance criteria ============
class TestAcceptanceCriteria(unittest.TestCase):
def test_9000_x_returns_3_chunks(self):
"""验收MarkdownProcessor.chunk_markdown_text("x" * 9000, 3000) 返回 3 个片段"""
result = MarkdownProcessor.chunk_markdown_text("x" * 9000, 3000)
self.assertEqual(len(result), 3)
for chunk in result:
self.assertLessEqual(len(chunk), 3000)
def test_5000_a_returns_2_chunks(self):
"""验收python -c 输出 2"""
result = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
self.assertEqual(len(result), 2)
def test_has_unclosed_fence_true(self):
"""验收MarkdownProcessor.has_unclosed_fence("```python\\ncode") 返回 True"""
self.assertTrue(MarkdownProcessor.has_unclosed_fence("```python\ncode"))
def test_has_unclosed_fence_false(self):
"""验收MarkdownProcessor.has_unclosed_fence("```python\\ncode\\n```") 返回 False"""
self.assertFalse(MarkdownProcessor.has_unclosed_fence("```python\ncode\n```"))
def test_code_block_200_lines_not_broken(self):
"""验收:包含 200 行代码块的文本,代码块不被切断"""
code_lines = "\n".join([f" result_{i} = compute({i})" for i in range(200)])
text = f"Introduction.\n\n```python\n{code_lines}\n```\n\nConclusion."
result = MarkdownProcessor.chunk_markdown_text(text, 3000)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk),
f"Found unclosed fence in chunk:\n{chunk[:100]}...")
def test_table_rows_not_broken(self):
"""验收:表格行不被切断(每个 chunk 中的表格 fence 完整)"""
rows = "\n".join([
f"| Col A {i} | Col B {i} | Col C {i} |" for i in range(100)
])
text = f"Table:\n\n| A | B | C |\n| --- | --- | --- |\n{rows}\n\nDone."
result = MarkdownProcessor.chunk_markdown_text(text, 500)
for chunk in result:
self.assertFalse(MarkdownProcessor.has_unclosed_fence(chunk))
if __name__ == '__main__':
unittest.main(verbosity=2)
# ============ pytest-style function tests (task specification) ============
def test_short_text_no_split():
assert MarkdownProcessor.chunk_markdown_text("hello", 100) == ["hello"]
def test_plain_text_split():
chunks = MarkdownProcessor.chunk_markdown_text("a" * 5000, 3000)
assert len(chunks) >= 2
for c in chunks:
assert len(c) <= 3000
def test_fence_not_broken():
"""代码块不应被切断"""
code_block = "```python\n" + "x = 1\n" * 200 + "```"
chunks = MarkdownProcessor.chunk_markdown_text(code_block, 1000)
for c in chunks:
assert not MarkdownProcessor.has_unclosed_fence(c), f"Chunk has unclosed fence: {c[:100]}"
def test_large_fence_kept_whole():
"""超大代码块即便超过 max_chars 也应整块输出"""
code_block = "```python\n" + "x = 1\n" * 200 + "```"
chunks = MarkdownProcessor.chunk_markdown_text(code_block, 500)
# 代码块应在同一个 chunk 中(允许超出 max_chars
fence_chunks = [c for c in chunks if "```python" in c]
for c in fence_chunks:
assert not MarkdownProcessor.has_unclosed_fence(c)
def test_mixed_content():
"""代码块前后的普通文本可以正常切割"""
text = "intro paragraph\n\n" + "```python\nx=1\n```" + "\n\noutro paragraph"
chunks = MarkdownProcessor.chunk_markdown_text(text, 100)
for c in chunks:
assert not MarkdownProcessor.has_unclosed_fence(c)
def test_table_not_broken():
"""表格不应被切断"""
table = "| A | B |\n|---|---|\n| 1 | 2 |\n| 3 | 4 |"
text = "before\n\n" + table + "\n\nafter"
chunks = MarkdownProcessor.chunk_markdown_text(text, 30)
table_in_chunk = [c for c in chunks if "|" in c]
for c in table_in_chunk:
lines = [line for line in c.split('\n') if line.strip().startswith('|')]
if lines:
# 至少表格行不被半截切割
pass
def test_has_unclosed_fence():
assert MarkdownProcessor.has_unclosed_fence("```python\ncode") == True
assert MarkdownProcessor.has_unclosed_fence("```python\ncode\n```") == False
assert MarkdownProcessor.has_unclosed_fence("no fence") == False
def test_ends_with_table_row():
assert MarkdownProcessor.ends_with_table_row("| a | b |") == True
assert MarkdownProcessor.ends_with_table_row("normal text") == False
def test_empty_text():
assert MarkdownProcessor.chunk_markdown_text("", 100) == []
def test_exact_limit():
text = "a" * 3000
chunks = MarkdownProcessor.chunk_markdown_text(text, 3000)
assert len(chunks) == 1