mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(compressor): use text char sum for multimodal token estimation in _find_tail_cut_by_tokens
_find_tail_cut_by_tokens called len(content) to estimate message tokens.
When content is a list of blocks (multimodal: text + image_url), len()
returns block count (e.g. 2) rather than character count, so a message
with 500 chars of text was counted as ~10 tokens instead of ~135.
This caused the backward walk to exhaust all messages before hitting the
budget ceiling; the head_end safeguard then forced cut = n - min_tail,
shrinking the protected tail to the bare minimum and preventing effective
compression of long multimodal conversations.
Fix mirrors the existing pattern in _prune_old_tool_results (line 487):
sum(len(p.get("text", "")) for p in raw_content)
if isinstance(raw_content, list) else len(raw_content)
Tests: 3 new cases in TestTokenBudgetTailProtection — regression guard
(confirms the test fails with the bug), plain-string regression guard,
and image-only block edge case.
Fixes #16087.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -846,6 +846,82 @@ class TestTokenBudgetTailProtection:
|
||||
# so it might or might not be pruned depending on boundary
|
||||
assert isinstance(pruned, int)
|
||||
|
||||
def test_multimodal_message_accumulates_text_chars_not_block_count(self, budget_compressor):
|
||||
"""_find_tail_cut_by_tokens must use text char count, not list length,
|
||||
for multimodal content. Regression guard for #16087.
|
||||
|
||||
Setup: 6 messages, budget=80 (soft_ceiling=120). The multimodal message
|
||||
at index 1 has 500 chars of text → 135 tokens (correct) or 10 tokens (bug).
|
||||
|
||||
Fixed path: walk stops at the multimodal (44+135=179 > 120), cut stays at 2,
|
||||
tail = messages[2:] = 4 messages.
|
||||
|
||||
Bug path: walk counts only 10 tokens for the multimodal, exhausts to head_end,
|
||||
the head_end safeguard forces cut = n - min_tail = 3, tail = only 3 messages.
|
||||
"""
|
||||
c = budget_compressor
|
||||
# 500 chars → 500//4 + 10 = 135 tokens; len([text, image]) // 4 + 10 = 10 (bug)
|
||||
big_text = "x" * 500
|
||||
multimodal_content = [
|
||||
{"type": "text", "text": big_text},
|
||||
{"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}},
|
||||
]
|
||||
messages = [
|
||||
{"role": "user", "content": "head1"}, # 0
|
||||
{"role": "user", "content": multimodal_content}, # 1: BIG (index under test)
|
||||
{"role": "assistant", "content": "tail1"}, # 2
|
||||
{"role": "user", "content": "tail2"}, # 3
|
||||
{"role": "assistant", "content": "tail3"}, # 4
|
||||
{"role": "user", "content": "tail4"}, # 5
|
||||
]
|
||||
c.tail_token_budget = 80 # soft_ceiling = 120
|
||||
head_end = 0
|
||||
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||
# With the fix: cut=2, tail has 4 messages (soft_ceiling not exceeded by tail1-4).
|
||||
# With the bug: head_end safeguard fires → cut = n - min_tail = 3, only 3 in tail.
|
||||
assert len(messages) - cut >= 4, (
|
||||
f"Expected ≥4 messages in tail (got {len(messages) - cut}, cut={cut}). "
|
||||
"The multimodal message was underestimated — len(list) used instead of text chars."
|
||||
)
|
||||
|
||||
def test_plain_string_content_unchanged(self, budget_compressor):
|
||||
"""Plain string content must still be estimated correctly after the fix."""
|
||||
c = budget_compressor
|
||||
# Same layout as the multimodal test but with a plain 500-char string.
|
||||
# Both buggy and fixed code count plain strings the same way (len(str)).
|
||||
# With 135 tokens the plain string also exceeds soft_ceiling=120, so
|
||||
# the walk stops at index 1 and tail has 4 messages — same as the fix path.
|
||||
big_plain = "x" * 500
|
||||
messages = [
|
||||
{"role": "user", "content": "head1"},
|
||||
{"role": "user", "content": big_plain}, # 1: 135 tokens, plain string
|
||||
{"role": "assistant", "content": "tail1"},
|
||||
{"role": "user", "content": "tail2"},
|
||||
{"role": "assistant", "content": "tail3"},
|
||||
{"role": "user", "content": "tail4"},
|
||||
]
|
||||
c.tail_token_budget = 80
|
||||
head_end = 0
|
||||
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||
assert len(messages) - cut >= 4, (
|
||||
f"Plain string regression: expected ≥4 messages in tail, got {len(messages) - cut}"
|
||||
)
|
||||
|
||||
def test_image_only_block_contributes_zero_text_chars(self, budget_compressor):
|
||||
"""Image-only content blocks (no 'text' key) contribute 0 chars + base overhead."""
|
||||
c = budget_compressor
|
||||
c.tail_token_budget = 500
|
||||
image_only = [{"type": "image_url", "image_url": {"url": "https://example.com/x.jpg"}}]
|
||||
messages = [
|
||||
{"role": "user", "content": "a" * 4000},
|
||||
{"role": "user", "content": image_only}, # 0 text chars → 10 tokens overhead
|
||||
{"role": "assistant", "content": "ok"},
|
||||
]
|
||||
head_end = 0
|
||||
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||
assert isinstance(cut, int)
|
||||
assert 0 <= cut <= len(messages)
|
||||
|
||||
|
||||
class TestUpdateModelBudgets:
|
||||
"""Regression: update_model() must recalculate token budgets."""
|
||||
|
||||
Reference in New Issue
Block a user