From 943465235ee8e33f903e34b964ec6bfe5b7cffbf Mon Sep 17 00:00:00 2001 From: briandevans <252620095+briandevans@users.noreply.github.com> Date: Sun, 26 Apr 2026 09:09:03 -0700 Subject: [PATCH] fix(compressor): guard against bare-string items in multimodal content list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit raw_content from message["content"] can be a list that contains bare strings, not only dicts. The previous `p.get("text", "")` call raised AttributeError on string items, crashing context compression for any session that had a message with mixed content. Guard with isinstance checks: dict → .get("text"), str → len(p), fallback → len(str(p)). Adds a regression test covering the bare-string case that would have AttributeError'd on the pre-fix code. Co-Authored-By: Claude Opus 4.7 (1M context) --- agent/context_compressor.py | 9 ++++++++- tests/agent/test_context_compressor.py | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 306c07b216..9f90a96163 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -1084,7 +1084,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio msg = messages[i] raw_content = msg.get("content") or "" content_len = ( - sum(len(p.get("text", "")) for p in raw_content) + sum( + len(p.get("text", "")) + if isinstance(p, dict) + else len(p) + if isinstance(p, str) + else len(str(p)) + for p in raw_content + ) if isinstance(raw_content, list) else len(raw_content) ) diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index ed0848b3c8..883745d6c8 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -922,6 +922,21 @@ class TestTokenBudgetTailProtection: assert isinstance(cut, int) assert 0 <= cut <= len(messages) + def test_mixed_list_with_bare_strings_does_not_crash(self, budget_compressor): + """Content list may contain bare strings (not dicts) — must not raise AttributeError.""" + c = budget_compressor + c.tail_token_budget = 500 + # Bare string item alongside a dict item — normalisation elsewhere allows this. + mixed_content = ["Hello, world!", {"type": "text", "text": "extra text"}] + messages = [ + {"role": "user", "content": mixed_content}, + {"role": "assistant", "content": "ok"}, + ] + head_end = 0 + cut = c._find_tail_cut_by_tokens(messages, head_end) + assert isinstance(cut, int) + assert 0 <= cut <= len(messages) + class TestUpdateModelBudgets: """Regression: update_model() must recalculate token budgets."""