Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
9ebe435ca1 feat: supersede stale browser snapshots to reclaim context tokens
Port from google-gemini/gemini-cli#24440.

Browser snapshots (accessibility trees from browser_snapshot) are the
largest single tool outputs — each one can be 8,000+ characters.  Only
the most recent snapshot reflects the current page state; older ones
waste context-window tokens.

New pre-pass runs before every API call (zero LLM cost):
- Scans messages for browser_snapshot and browser_vision tool results
- Replaces all but the most recent with a compact placeholder
- Skips short outputs (<200 chars, likely error messages)
- Idempotent — already-superseded snapshots are not re-processed

This complements the existing generic tool output pruning in
ContextCompressor._prune_old_tool_results(), which only triggers
during full compression.  The new function runs proactively every turn,
specifically targeting the highest-token-cost tool outputs.

Includes 11 tests covering all edge cases.
2026-04-01 17:16:42 -07:00
3 changed files with 236 additions and 1 deletions

View File

@@ -674,3 +674,66 @@ Write only the summary body. Do not include any preamble or prefix."""
logger.info("Compression #%d complete", self.compression_count)
return compressed
# ──────────────────────────────────────────────────────────────────────
# Stale browser snapshot superseding (cheap pre-pass, no LLM call)
# ──────────────────────────────────────────────────────────────────────
# Browser tool outputs that contain large page-state data. Only the most
# recent snapshot/vision result is meaningful — earlier ones describe a
# page state that no longer exists.
_BROWSER_SNAPSHOT_TOOLS = frozenset({"browser_snapshot", "browser_vision"})
_SNAPSHOT_SUPERSEDED_PLACEHOLDER = (
"[Snapshot superseded — a newer snapshot exists later in this conversation. "
"Call browser_snapshot for current page state.]"
)
def supersede_stale_browser_snapshots(messages: List[Dict[str, Any]]) -> int:
"""Replace stale browser snapshot tool results with a compact placeholder.
Browser snapshots (accessibility trees from ``browser_snapshot``) are often
the largest single tool outputs — each one can be 8,000+ characters. Only
the most recent snapshot reflects the current page state; older ones are
stale and waste context-window tokens.
This function scans *messages* in-place and replaces the content of all but
the most recent ``browser_snapshot`` / ``browser_vision`` tool result with
a short placeholder. It runs every turn as a cheap pre-pass before the API
call — no LLM invocation, just string replacement.
Returns the number of tool results that were superseded.
Ported from google-gemini/gemini-cli#24440.
"""
# Collect indices of all browser snapshot tool results.
snapshot_indices: list[int] = []
for i, msg in enumerate(messages):
if msg.get("role") != "tool":
continue
tool_name = msg.get("name", "")
if tool_name in _BROWSER_SNAPSHOT_TOOLS:
snapshot_indices.append(i)
# Nothing to do if there are 0 or 1 snapshots.
if len(snapshot_indices) < 2:
return 0
# Replace all but the last snapshot.
superseded = 0
for idx in snapshot_indices[:-1]:
content = messages[idx].get("content", "")
if not content or content == _SNAPSHOT_SUPERSEDED_PLACEHOLDER:
continue
# Only supersede if the content is substantial (short error messages
# or already-pruned outputs aren't worth touching).
if len(content) > 200:
messages[idx] = {**messages[idx], "content": _SNAPSHOT_SUPERSEDED_PLACEHOLDER}
superseded += 1
if superseded:
logger.info("Superseded %d stale browser snapshot(s)", superseded)
return superseded

View File

@@ -86,7 +86,7 @@ from agent.model_metadata import (
get_next_probe_tier, parse_context_limit_from_error,
save_context_length,
)
from agent.context_compressor import ContextCompressor
from agent.context_compressor import ContextCompressor, supersede_stale_browser_snapshots
from agent.prompt_caching import apply_anthropic_cache_control
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS
from agent.usage_pricing import estimate_usage_cost, normalize_usage
@@ -6710,6 +6710,14 @@ class AIAgent:
and "skill_manage" in self.valid_tool_names):
self._iters_since_skill += 1
# Supersede stale browser snapshots before building the API request.
# Each browser_snapshot returns a full accessibility tree (8,000+ chars);
# only the most recent one reflects the current page state. Older ones
# are replaced with a compact placeholder to reclaim context tokens.
# This is a cheap pre-pass (no LLM call) that runs every turn.
# Ported from google-gemini/gemini-cli#24440.
supersede_stale_browser_snapshots(messages)
# Prepare messages for API call
# If we have an ephemeral system prompt, prepend it to the messages
# Note: Reasoning is embedded in content via <think> tags for trajectory storage.

View File

@@ -562,3 +562,167 @@ class TestSummaryTargetRatio:
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
c = ContextCompressor(model="test", quiet_mode=True)
assert c.protect_last_n == 20
# ──────────────────────────────────────────────────────────────────────
# Tests for supersede_stale_browser_snapshots
# ──────────────────────────────────────────────────────────────────────
from agent.context_compressor import (
supersede_stale_browser_snapshots,
_SNAPSHOT_SUPERSEDED_PLACEHOLDER,
)
def _tool_msg(name: str, content: str, call_id: str = "call_1") -> dict:
"""Helper to create a tool result message."""
return {"role": "tool", "name": name, "content": content, "tool_call_id": call_id}
def _assistant_msg(content: str = "ok") -> dict:
return {"role": "assistant", "content": content}
def _user_msg(content: str = "do something") -> dict:
return {"role": "user", "content": content}
class TestSupersedeStaleSnapshots:
"""Tests for the browser snapshot superseding pre-pass."""
def test_no_snapshots_noop(self):
"""No browser snapshots → nothing changes."""
messages = [
_user_msg("navigate to example.com"),
_tool_msg("browser_navigate", '{"url": "https://example.com"}'),
_assistant_msg("Navigated."),
]
original = [m.copy() for m in messages]
count = supersede_stale_browser_snapshots(messages)
assert count == 0
assert messages == original
def test_single_snapshot_noop(self):
"""Only one snapshot → nothing to supersede."""
messages = [
_user_msg(),
_tool_msg("browser_snapshot", "A" * 5000, "call_snap_1"),
_assistant_msg(),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 0
assert len(messages[1]["content"]) == 5000
def test_two_snapshots_supersedes_first(self):
"""Two snapshots → first one gets replaced."""
messages = [
_user_msg(),
_tool_msg("browser_snapshot", "A" * 5000, "call_1"),
_assistant_msg("I see the page."),
_user_msg("scroll down"),
_tool_msg("browser_snapshot", "B" * 5000, "call_2"),
_assistant_msg("Scrolled."),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 1
assert messages[1]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
# Latest snapshot untouched
assert messages[4]["content"] == "B" * 5000
def test_three_snapshots_supersedes_first_two(self):
"""Three snapshots → first two get replaced, last one kept."""
messages = [
_tool_msg("browser_snapshot", "X" * 8000, "c1"),
_tool_msg("browser_click", '{"clicked": true}', "c2"),
_tool_msg("browser_snapshot", "Y" * 8000, "c3"),
_tool_msg("browser_click", '{"clicked": true}', "c4"),
_tool_msg("browser_snapshot", "Z" * 8000, "c5"),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 2
assert messages[0]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
assert messages[2]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
assert messages[4]["content"] == "Z" * 8000
def test_non_snapshot_tools_untouched(self):
"""Other tool results are never modified."""
messages = [
_tool_msg("browser_snapshot", "A" * 1000, "c1"),
_tool_msg("browser_click", "Clicked element @e5", "c2"),
_tool_msg("browser_navigate", '{"url": "https://example.com"}', "c3"),
_tool_msg("browser_snapshot", "B" * 1000, "c4"),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 1
# Click and navigate untouched
assert messages[1]["content"] == "Clicked element @e5"
assert messages[2]["content"] == '{"url": "https://example.com"}'
def test_already_superseded_noop(self):
"""Snapshots already replaced are not counted again."""
messages = [
_tool_msg("browser_snapshot", _SNAPSHOT_SUPERSEDED_PLACEHOLDER, "c1"),
_tool_msg("browser_snapshot", "current page" * 100, "c2"),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 0
def test_short_content_not_superseded(self):
"""Snapshots with very short content (errors, etc.) are skipped."""
messages = [
_tool_msg("browser_snapshot", "Error: no session", "c1"),
_tool_msg("browser_snapshot", "B" * 5000, "c2"),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 0 # "Error: no session" is <200 chars
def test_browser_vision_also_superseded(self):
"""browser_vision results are also superseded alongside browser_snapshot."""
messages = [
_tool_msg("browser_vision", "I see a login form with..." + "x" * 1000, "c1"),
_tool_msg("browser_snapshot", "big tree" * 500, "c2"),
_tool_msg("browser_vision", "Now the page shows..." + "y" * 1000, "c3"),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 2
assert messages[0]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
assert messages[1]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
# Last one (browser_vision) kept
assert "Now the page shows" in messages[2]["content"]
def test_preserves_other_message_fields(self):
"""Superseding preserves tool_call_id, name, role, and any extra fields."""
messages = [
{
"role": "tool",
"name": "browser_snapshot",
"content": "A" * 5000,
"tool_call_id": "call_abc",
"custom_field": "preserved",
},
_tool_msg("browser_snapshot", "B" * 5000, "call_def"),
]
count = supersede_stale_browser_snapshots(messages)
assert count == 1
assert messages[0]["tool_call_id"] == "call_abc"
assert messages[0]["name"] == "browser_snapshot"
assert messages[0]["role"] == "tool"
assert messages[0]["custom_field"] == "preserved"
assert messages[0]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
def test_empty_messages_noop(self):
"""Empty message list doesn't crash."""
messages = []
count = supersede_stale_browser_snapshots(messages)
assert count == 0
def test_idempotent(self):
"""Running twice produces the same result."""
messages = [
_tool_msg("browser_snapshot", "A" * 5000, "c1"),
_tool_msg("browser_snapshot", "B" * 5000, "c2"),
]
count1 = supersede_stale_browser_snapshots(messages)
assert count1 == 1
count2 = supersede_stale_browser_snapshots(messages)
assert count2 == 0 # Already superseded