fix(compressor): use text char sum for multimodal token estimation in _find_tail_cut_by_tokens

_find_tail_cut_by_tokens called len(content) to estimate message tokens. When content is a list of blocks (multimodal: text + image_url), len() returns block count (e.g. 2) rather than character count, so a message with 500 chars of text was counted as ~10 tokens instead of ~135. This caused the backward walk to exhaust all messages before hitting the budget ceiling; the head_end safeguard then forced cut = n - min_tail, shrinking the protected tail to the bare minimum and preventing effective compression of long multimodal conversations. Fix mirrors the existing pattern in _prune_old_tool_results (line 487): sum(len(p.get("text", "")) for p in raw_content) if isinstance(raw_content, list) else len(raw_content) Tests: 3 new cases in TestTokenBudgetTailProtection — regression guard (confirms the test fails with the bug), plain-string regression guard, and image-only block edge case. Fixes #16087. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 06:51:16 +08:00 · 2026-04-26 08:38:16 -07:00
parent 3e68809fe0
commit cfc8befe65
2 changed files with 83 additions and 2 deletions
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -1082,8 +1082,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
        for i in range(n - 1, head_end - 1, -1):
            msg = messages[i]
-            content = msg.get("content") or ""
+            raw_content = msg.get("content") or ""
-            msg_tokens = len(content) // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
+            content_len = (
                sum(len(p.get("text", "")) for p in raw_content)
                if isinstance(raw_content, list)
                else len(raw_content)
            )
            msg_tokens = content_len // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
            # Include tool call arguments in estimate
            for tc in msg.get("tool_calls") or []:
                if isinstance(tc, dict):
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -846,6 +846,82 @@ class TestTokenBudgetTailProtection:
        # so it might or might not be pruned depending on boundary
        assert isinstance(pruned, int)
    def test_multimodal_message_accumulates_text_chars_not_block_count(self, budget_compressor):
        """_find_tail_cut_by_tokens must use text char count, not list length,
        for multimodal content. Regression guard for #16087.
        Setup: 6 messages, budget=80 (soft_ceiling=120).  The multimodal message
        at index 1 has 500 chars of text → 135 tokens (correct) or 10 tokens (bug).
        Fixed path: walk stops at the multimodal (44+135=179 > 120), cut stays at 2,
        tail = messages[2:] = 4 messages.
        Bug path: walk counts only 10 tokens for the multimodal, exhausts to head_end,
        the head_end safeguard forces cut = n - min_tail = 3, tail = only 3 messages.
        """
        c = budget_compressor
        # 500 chars → 500//4 + 10 = 135 tokens; len([text, image]) // 4 + 10 = 10 (bug)
        big_text = "x" * 500
        multimodal_content = [
            {"type": "text", "text": big_text},
            {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}},
        ]
        messages = [
            {"role": "user", "content": "head1"},               # 0
            {"role": "user", "content": multimodal_content},    # 1: BIG (index under test)
            {"role": "assistant", "content": "tail1"},           # 2
            {"role": "user", "content": "tail2"},                # 3
            {"role": "assistant", "content": "tail3"},           # 4
            {"role": "user", "content": "tail4"},                # 5
        ]
        c.tail_token_budget = 80  # soft_ceiling = 120
        head_end = 0
        cut = c._find_tail_cut_by_tokens(messages, head_end)
        # With the fix: cut=2, tail has 4 messages (soft_ceiling not exceeded by tail1-4).
        # With the bug: head_end safeguard fires → cut = n - min_tail = 3, only 3 in tail.
        assert len(messages) - cut >= 4, (
            f"Expected ≥4 messages in tail (got {len(messages) - cut}, cut={cut}). "
            "The multimodal message was underestimated — len(list) used instead of text chars."
        )
    def test_plain_string_content_unchanged(self, budget_compressor):
        """Plain string content must still be estimated correctly after the fix."""
        c = budget_compressor
        # Same layout as the multimodal test but with a plain 500-char string.
        # Both buggy and fixed code count plain strings the same way (len(str)).
        # With 135 tokens the plain string also exceeds soft_ceiling=120, so
        # the walk stops at index 1 and tail has 4 messages — same as the fix path.
        big_plain = "x" * 500
        messages = [
            {"role": "user", "content": "head1"},
            {"role": "user", "content": big_plain},   # 1: 135 tokens, plain string
            {"role": "assistant", "content": "tail1"},
            {"role": "user", "content": "tail2"},
            {"role": "assistant", "content": "tail3"},
            {"role": "user", "content": "tail4"},
        ]
        c.tail_token_budget = 80
        head_end = 0
        cut = c._find_tail_cut_by_tokens(messages, head_end)
        assert len(messages) - cut >= 4, (
            f"Plain string regression: expected ≥4 messages in tail, got {len(messages) - cut}"
        )
    def test_image_only_block_contributes_zero_text_chars(self, budget_compressor):
        """Image-only content blocks (no 'text' key) contribute 0 chars + base overhead."""
        c = budget_compressor
        c.tail_token_budget = 500
        image_only = [{"type": "image_url", "image_url": {"url": "https://example.com/x.jpg"}}]
        messages = [
            {"role": "user", "content": "a" * 4000},
            {"role": "user", "content": image_only},   # 0 text chars → 10 tokens overhead
            {"role": "assistant", "content": "ok"},
        ]
        head_end = 0
        cut = c._find_tail_cut_by_tokens(messages, head_end)
        assert isinstance(cut, int)
        assert 0 <= cut <= len(messages)
 class TestUpdateModelBudgets:
    """Regression: update_model() must recalculate token budgets."""