From b7bbc62503d54cd95de413df7cda2e802fec0206 Mon Sep 17 00:00:00 2001 From: swithek <52840391+swithek@users.noreply.github.com> Date: Sun, 26 Apr 2026 22:50:40 +0200 Subject: [PATCH] fix(compressor): _prune_old_tool_results boundary direction --- agent/context_compressor.py | 11 ++++++- tests/agent/test_context_compressor.py | 41 ++++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index 44d54d530c..69151a117a 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -554,7 +554,16 @@ class ContextCompressor(ContextEngine): break accumulated += msg_tokens boundary = i - prune_boundary = max(boundary, len(result) - min_protect) + # Translate the budget walk into a "protected count", apply the + # floor in count-space (where `max` reads naturally: protect at + # least `min_protect` messages or whatever the budget reserved, + # whichever is more), then convert back to a prune boundary. + # Doing this in index-space with `max` would invert the direction + # (smaller index = MORE protected), so a generous budget would + # silently get truncated back down to `min_protect`. + budget_protect_count = len(result) - boundary + protected_count = max(budget_protect_count, min_protect) + prune_boundary = len(result) - protected_count else: prune_boundary = len(result) - protect_tail_count diff --git a/tests/agent/test_context_compressor.py b/tests/agent/test_context_compressor.py index 8d1de377b0..fd88cc7a96 100644 --- a/tests/agent/test_context_compressor.py +++ b/tests/agent/test_context_compressor.py @@ -1281,6 +1281,47 @@ class TestTokenBudgetTailProtection: assert isinstance(cut, int) assert 0 <= cut <= len(messages) + def test_generous_budget_protects_everything_floor_does_not_override( + self, budget_compressor + ): + """A budget that covers the whole transcript must prune nothing — + ``protect_tail_count`` is a minimum floor, not a ceiling.""" + c = budget_compressor + + # 100 alternating assistant/tool messages. Each tool result has + # *unique* content so the dedup pass (Pass 1, which is independent + # of prune_boundary) is a no-op and we isolate the boundary logic. + messages = [] + for i in range(50): + messages.append({ + "role": "assistant", "content": None, + "tool_calls": [{ + "id": f"c{i}", + "type": "function", + "function": {"name": "noop", "arguments": "{}"}, + }], + }) + messages.append({ + "role": "tool", + "tool_call_id": f"c{i}", + "content": f"unique-tool-output-{i:03d}-" + ("x" * 250), + }) + + # Budget large enough to cover the whole transcript many times over, + # so the budget walk completes without hitting its break condition + # and the boundary lands at 0 ("protect everything"). + _, pruned = c._prune_old_tool_results( + messages, + protect_tail_count=20, + protect_tail_tokens=10_000_000, + ) + + assert pruned == 0, ( + "budget said protect everything, but the floor still pruned " + f"{pruned} messages — protect_tail_count is acting as a ceiling, " + "not a minimum floor" + ) + class TestUpdateModelBudgets: """Regression: update_model() must recalculate token budgets."""