From cfc8befe65b929e7e4d3d071c778894fcfba4e7f Mon Sep 17 00:00:00 2001
From: briandevans <252620095+briandevans@users.noreply.github.com>
Date: Sun, 26 Apr 2026 08:38:16 -0700
Subject: [PATCH] fix(compressor): use text char sum for multimodal token
 estimation in _find_tail_cut_by_tokens
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

_find_tail_cut_by_tokens called len(content) to estimate message tokens.
When content is a list of blocks (multimodal: text + image_url), len()
returns block count (e.g. 2) rather than character count, so a message
with 500 chars of text was counted as ~10 tokens instead of ~135.

This caused the backward walk to exhaust all messages before hitting the
budget ceiling; the head_end safeguard then forced cut = n - min_tail,
shrinking the protected tail to the bare minimum and preventing effective
compression of long multimodal conversations.

Fix mirrors the existing pattern in _prune_old_tool_results (line 487):
  sum(len(p.get("text", "")) for p in raw_content)
  if isinstance(raw_content, list) else len(raw_content)

Tests: 3 new cases in TestTokenBudgetTailProtection — regression guard
(confirms the test fails with the bug), plain-string regression guard,
and image-only block edge case.

Fixes #16087.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 agent/context_compressor.py            |  9 ++-
 tests/agent/test_context_compressor.py | 76 ++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 2 deletions(-)

diff --git a/agent/context_compressor.py b/agent/context_compressor.py
index 7a7a87ea11..306c07b216 100644
--- a/agent/context_compressor.py
+++ b/agent/context_compressor.py
@@ -1082,8 +1082,13 @@ The user has requested that this compaction PRIORITISE preserving all informatio
 
         for i in range(n - 1, head_end - 1, -1):
             msg = messages[i]
-            content = msg.get("content") or ""
-            msg_tokens = len(content) // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
+            raw_content = msg.get("content") or ""
+            content_len = (
+                sum(len(p.get("text", "")) for p in raw_content)
+                if isinstance(raw_content, list)
+                else len(raw_content)
+            )
+            msg_tokens = content_len // _CHARS_PER_TOKEN + 10  # +10 for role/metadata
             # Include tool call arguments in estimate
             for tc in msg.get("tool_calls") or []:
                 if isinstance(tc, dict):
diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py
index 776dc0a0cf..ed0848b3c8 100644
--- a/tests/agent/test_context_compressor.py
+++ b/tests/agent/test_context_compressor.py
@@ -846,6 +846,82 @@ class TestTokenBudgetTailProtection:
         # so it might or might not be pruned depending on boundary
         assert isinstance(pruned, int)
 
+    def test_multimodal_message_accumulates_text_chars_not_block_count(self, budget_compressor):
+        """_find_tail_cut_by_tokens must use text char count, not list length,
+        for multimodal content. Regression guard for #16087.
+
+        Setup: 6 messages, budget=80 (soft_ceiling=120).  The multimodal message
+        at index 1 has 500 chars of text → 135 tokens (correct) or 10 tokens (bug).
+
+        Fixed path: walk stops at the multimodal (44+135=179 > 120), cut stays at 2,
+        tail = messages[2:] = 4 messages.
+
+        Bug path: walk counts only 10 tokens for the multimodal, exhausts to head_end,
+        the head_end safeguard forces cut = n - min_tail = 3, tail = only 3 messages.
+        """
+        c = budget_compressor
+        # 500 chars → 500//4 + 10 = 135 tokens; len([text, image]) // 4 + 10 = 10 (bug)
+        big_text = "x" * 500
+        multimodal_content = [
+            {"type": "text", "text": big_text},
+            {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}},
+        ]
+        messages = [
+            {"role": "user", "content": "head1"},               # 0
+            {"role": "user", "content": multimodal_content},    # 1: BIG (index under test)
+            {"role": "assistant", "content": "tail1"},           # 2
+            {"role": "user", "content": "tail2"},                # 3
+            {"role": "assistant", "content": "tail3"},           # 4
+            {"role": "user", "content": "tail4"},                # 5
+        ]
+        c.tail_token_budget = 80  # soft_ceiling = 120
+        head_end = 0
+        cut = c._find_tail_cut_by_tokens(messages, head_end)
+        # With the fix: cut=2, tail has 4 messages (soft_ceiling not exceeded by tail1-4).
+        # With the bug: head_end safeguard fires → cut = n - min_tail = 3, only 3 in tail.
+        assert len(messages) - cut >= 4, (
+            f"Expected ≥4 messages in tail (got {len(messages) - cut}, cut={cut}). "
+            "The multimodal message was underestimated — len(list) used instead of text chars."
+        )
+
+    def test_plain_string_content_unchanged(self, budget_compressor):
+        """Plain string content must still be estimated correctly after the fix."""
+        c = budget_compressor
+        # Same layout as the multimodal test but with a plain 500-char string.
+        # Both buggy and fixed code count plain strings the same way (len(str)).
+        # With 135 tokens the plain string also exceeds soft_ceiling=120, so
+        # the walk stops at index 1 and tail has 4 messages — same as the fix path.
+        big_plain = "x" * 500
+        messages = [
+            {"role": "user", "content": "head1"},
+            {"role": "user", "content": big_plain},   # 1: 135 tokens, plain string
+            {"role": "assistant", "content": "tail1"},
+            {"role": "user", "content": "tail2"},
+            {"role": "assistant", "content": "tail3"},
+            {"role": "user", "content": "tail4"},
+        ]
+        c.tail_token_budget = 80
+        head_end = 0
+        cut = c._find_tail_cut_by_tokens(messages, head_end)
+        assert len(messages) - cut >= 4, (
+            f"Plain string regression: expected ≥4 messages in tail, got {len(messages) - cut}"
+        )
+
+    def test_image_only_block_contributes_zero_text_chars(self, budget_compressor):
+        """Image-only content blocks (no 'text' key) contribute 0 chars + base overhead."""
+        c = budget_compressor
+        c.tail_token_budget = 500
+        image_only = [{"type": "image_url", "image_url": {"url": "https://example.com/x.jpg"}}]
+        messages = [
+            {"role": "user", "content": "a" * 4000},
+            {"role": "user", "content": image_only},   # 0 text chars → 10 tokens overhead
+            {"role": "assistant", "content": "ok"},
+        ]
+        head_end = 0
+        cut = c._find_tail_cut_by_tokens(messages, head_end)
+        assert isinstance(cut, int)
+        assert 0 <= cut <= len(messages)
+
 
 class TestUpdateModelBudgets:
     """Regression: update_model() must recalculate token budgets."""