From cfc8befe65b929e7e4d3d071c778894fcfba4e7f Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Sun, 26 Apr 2026 08:38:16 -0700 Subject: [PATCH] fix(compressor): use text char sum for multimodal token estimation in _find_tail_cut_by_tokens MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _find_tail_cut_by_tokens called len(content) to estimate message tokens. When content is a list of blocks (multimodal: text + image_url), len() returns block count (e.g. 2) rather than character count, so a message with 500 chars of text was counted as ~10 tokens instead of ~135. This caused the backward walk to exhaust all messages before hitting the budget ceiling; the head_end safeguard then forced cut = n - min_tail, shrinking the protected tail to the bare minimum and preventing effective compression of long multimodal conversations. Fix mirrors the existing pattern in _prune_old_tool_results (line 487): sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content) Tests: 3 new cases in TestTokenBudgetTailProtection — regression guard (confirms the test fails with the bug), plain-string regression guard, and image-only block edge case. Fixes #16087. Co-Authored-By: Claude Sonnet 4.6 --- agent/context_compressor.py | 9 ++- tests/agent/test_context_compressor.py | 76 ++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 2 deletions(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 7a7a87ea11..306c07b216 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1082,8 +1082,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio for i in range(n - 1, head_end - 1, -1): msg = messages[i] - content = msg.get("content") or "" - msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata + raw_content = msg.get("content") or "" + content_len = ( + sum(len(p.get("text", "")) for p in raw_content) + if isinstance(raw_content, list) + else len(raw_content) + ) + msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata # Include tool call arguments in estimate for tc in msg.get("tool_calls") or []: if isinstance(tc, dict): diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 776dc0a0cf..ed0848b3c8 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -846,6 +846,82 @@ class TestTokenBudgetTailProtection: # so it might or might not be pruned depending on boundary assert isinstance(pruned, int) + def test_multimodal_message_accumulates_text_chars_not_block_count(self, budget_compressor): + """_find_tail_cut_by_tokens must use text char count, not list length, + for multimodal content. Regression guard for #16087. + + Setup: 6 messages, budget=80 (soft_ceiling=120). The multimodal message + at index 1 has 500 chars of text → 135 tokens (correct) or 10 tokens (bug). + + Fixed path: walk stops at the multimodal (44+135=179 > 120), cut stays at 2, + tail = messages[2:] = 4 messages. + + Bug path: walk counts only 10 tokens for the multimodal, exhausts to head_end, + the head_end safeguard forces cut = n - min_tail = 3, tail = only 3 messages. + """ + c = budget_compressor + # 500 chars → 500//4 + 10 = 135 tokens; len([text, image]) // 4 + 10 = 10 (bug) + big_text = "x" * 500 + multimodal_content = [ + {"type": "text", "text": big_text}, + {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}}, + ] + messages = [ + {"role": "user", "content": "head1"}, # 0 + {"role": "user", "content": multimodal_content}, # 1: BIG (index under test) + {"role": "assistant", "content": "tail1"}, # 2 + {"role": "user", "content": "tail2"}, # 3 + {"role": "assistant", "content": "tail3"}, # 4 + {"role": "user", "content": "tail4"}, # 5 + ] + c.tail_token_budget = 80 # soft_ceiling = 120 + head_end = 0 + cut = c._find_tail_cut_by_tokens(messages, head_end) + # With the fix: cut=2, tail has 4 messages (soft_ceiling not exceeded by tail1-4). + # With the bug: head_end safeguard fires → cut = n - min_tail = 3, only 3 in tail. + assert len(messages) - cut >= 4, ( + f"Expected ≥4 messages in tail (got {len(messages) - cut}, cut={cut}). " + "The multimodal message was underestimated — len(list) used instead of text chars." + ) + + def test_plain_string_content_unchanged(self, budget_compressor): + """Plain string content must still be estimated correctly after the fix.""" + c = budget_compressor + # Same layout as the multimodal test but with a plain 500-char string. + # Both buggy and fixed code count plain strings the same way (len(str)). + # With 135 tokens the plain string also exceeds soft_ceiling=120, so + # the walk stops at index 1 and tail has 4 messages — same as the fix path. + big_plain = "x" * 500 + messages = [ + {"role": "user", "content": "head1"}, + {"role": "user", "content": big_plain}, # 1: 135 tokens, plain string + {"role": "assistant", "content": "tail1"}, + {"role": "user", "content": "tail2"}, + {"role": "assistant", "content": "tail3"}, + {"role": "user", "content": "tail4"}, + ] + c.tail_token_budget = 80 + head_end = 0 + cut = c._find_tail_cut_by_tokens(messages, head_end) + assert len(messages) - cut >= 4, ( + f"Plain string regression: expected ≥4 messages in tail, got {len(messages) - cut}" + ) + + def test_image_only_block_contributes_zero_text_chars(self, budget_compressor): + """Image-only content blocks (no 'text' key) contribute 0 chars + base overhead.""" + c = budget_compressor + c.tail_token_budget = 500 + image_only = [{"type": "image_url", "image_url": {"url": "https://example.com/x.jpg"}}] + messages = [ + {"role": "user", "content": "a" * 4000}, + {"role": "user", "content": image_only}, # 0 text chars → 10 tokens overhead + {"role": "assistant", "content": "ok"}, + ] + head_end = 0 + cut = c._find_tail_cut_by_tokens(messages, head_end) + assert isinstance(cut, int) + assert 0 <= cut <= len(messages) + class TestUpdateModelBudgets: """Regression: update_model() must recalculate token budgets."""