diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 7a7a87ea11..306c07b216 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1082,8 +1082,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio for i in range(n - 1, head_end - 1, -1): msg = messages[i] - content = msg.get("content") or "" - msg_tokens = len(content) // _CHARS_PER_TOKEN + 10 # +10 for role/metadata + raw_content = msg.get("content") or "" + content_len = ( + sum(len(p.get("text", "")) for p in raw_content) + if isinstance(raw_content, list) + else len(raw_content) + ) + msg_tokens = content_len // _CHARS_PER_TOKEN + 10 # +10 for role/metadata # Include tool call arguments in estimate for tc in msg.get("tool_calls") or []: if isinstance(tc, dict): diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 776dc0a0cf..ed0848b3c8 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -846,6 +846,82 @@ class TestTokenBudgetTailProtection: # so it might or might not be pruned depending on boundary assert isinstance(pruned, int) + def test_multimodal_message_accumulates_text_chars_not_block_count(self, budget_compressor): + """_find_tail_cut_by_tokens must use text char count, not list length, + for multimodal content. Regression guard for #16087. + + Setup: 6 messages, budget=80 (soft_ceiling=120). The multimodal message + at index 1 has 500 chars of text → 135 tokens (correct) or 10 tokens (bug). + + Fixed path: walk stops at the multimodal (44+135=179 > 120), cut stays at 2, + tail = messages[2:] = 4 messages. + + Bug path: walk counts only 10 tokens for the multimodal, exhausts to head_end, + the head_end safeguard forces cut = n - min_tail = 3, tail = only 3 messages. + """ + c = budget_compressor + # 500 chars → 500//4 + 10 = 135 tokens; len([text, image]) // 4 + 10 = 10 (bug) + big_text = "x" * 500 + multimodal_content = [ + {"type": "text", "text": big_text}, + {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}}, + ] + messages = [ + {"role": "user", "content": "head1"}, # 0 + {"role": "user", "content": multimodal_content}, # 1: BIG (index under test) + {"role": "assistant", "content": "tail1"}, # 2 + {"role": "user", "content": "tail2"}, # 3 + {"role": "assistant", "content": "tail3"}, # 4 + {"role": "user", "content": "tail4"}, # 5 + ] + c.tail_token_budget = 80 # soft_ceiling = 120 + head_end = 0 + cut = c._find_tail_cut_by_tokens(messages, head_end) + # With the fix: cut=2, tail has 4 messages (soft_ceiling not exceeded by tail1-4). + # With the bug: head_end safeguard fires → cut = n - min_tail = 3, only 3 in tail. + assert len(messages) - cut >= 4, ( + f"Expected ≥4 messages in tail (got {len(messages) - cut}, cut={cut}). " + "The multimodal message was underestimated — len(list) used instead of text chars." + ) + + def test_plain_string_content_unchanged(self, budget_compressor): + """Plain string content must still be estimated correctly after the fix.""" + c = budget_compressor + # Same layout as the multimodal test but with a plain 500-char string. + # Both buggy and fixed code count plain strings the same way (len(str)). + # With 135 tokens the plain string also exceeds soft_ceiling=120, so + # the walk stops at index 1 and tail has 4 messages — same as the fix path. + big_plain = "x" * 500 + messages = [ + {"role": "user", "content": "head1"}, + {"role": "user", "content": big_plain}, # 1: 135 tokens, plain string + {"role": "assistant", "content": "tail1"}, + {"role": "user", "content": "tail2"}, + {"role": "assistant", "content": "tail3"}, + {"role": "user", "content": "tail4"}, + ] + c.tail_token_budget = 80 + head_end = 0 + cut = c._find_tail_cut_by_tokens(messages, head_end) + assert len(messages) - cut >= 4, ( + f"Plain string regression: expected ≥4 messages in tail, got {len(messages) - cut}" + ) + + def test_image_only_block_contributes_zero_text_chars(self, budget_compressor): + """Image-only content blocks (no 'text' key) contribute 0 chars + base overhead.""" + c = budget_compressor + c.tail_token_budget = 500 + image_only = [{"type": "image_url", "image_url": {"url": "https://example.com/x.jpg"}}] + messages = [ + {"role": "user", "content": "a" * 4000}, + {"role": "user", "content": image_only}, # 0 text chars → 10 tokens overhead + {"role": "assistant", "content": "ok"}, + ] + head_end = 0 + cut = c._find_tail_cut_by_tokens(messages, head_end) + assert isinstance(cut, int) + assert 0 <= cut <= len(messages) + class TestUpdateModelBudgets: """Regression: update_model() must recalculate token budgets."""