mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-02 16:57:36 +08:00
Compare commits
1 Commits
fix/plugin
...
gemini-cli
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9ebe435ca1 |
@@ -674,3 +674,66 @@ Write only the summary body. Do not include any preamble or prefix."""
|
||||
logger.info("Compression #%d complete", self.compression_count)
|
||||
|
||||
return compressed
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Stale browser snapshot superseding (cheap pre-pass, no LLM call)
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
# Browser tool outputs that contain large page-state data. Only the most
|
||||
# recent snapshot/vision result is meaningful — earlier ones describe a
|
||||
# page state that no longer exists.
|
||||
_BROWSER_SNAPSHOT_TOOLS = frozenset({"browser_snapshot", "browser_vision"})
|
||||
|
||||
_SNAPSHOT_SUPERSEDED_PLACEHOLDER = (
|
||||
"[Snapshot superseded — a newer snapshot exists later in this conversation. "
|
||||
"Call browser_snapshot for current page state.]"
|
||||
)
|
||||
|
||||
|
||||
def supersede_stale_browser_snapshots(messages: List[Dict[str, Any]]) -> int:
|
||||
"""Replace stale browser snapshot tool results with a compact placeholder.
|
||||
|
||||
Browser snapshots (accessibility trees from ``browser_snapshot``) are often
|
||||
the largest single tool outputs — each one can be 8,000+ characters. Only
|
||||
the most recent snapshot reflects the current page state; older ones are
|
||||
stale and waste context-window tokens.
|
||||
|
||||
This function scans *messages* in-place and replaces the content of all but
|
||||
the most recent ``browser_snapshot`` / ``browser_vision`` tool result with
|
||||
a short placeholder. It runs every turn as a cheap pre-pass before the API
|
||||
call — no LLM invocation, just string replacement.
|
||||
|
||||
Returns the number of tool results that were superseded.
|
||||
|
||||
Ported from google-gemini/gemini-cli#24440.
|
||||
"""
|
||||
# Collect indices of all browser snapshot tool results.
|
||||
snapshot_indices: list[int] = []
|
||||
for i, msg in enumerate(messages):
|
||||
if msg.get("role") != "tool":
|
||||
continue
|
||||
tool_name = msg.get("name", "")
|
||||
if tool_name in _BROWSER_SNAPSHOT_TOOLS:
|
||||
snapshot_indices.append(i)
|
||||
|
||||
# Nothing to do if there are 0 or 1 snapshots.
|
||||
if len(snapshot_indices) < 2:
|
||||
return 0
|
||||
|
||||
# Replace all but the last snapshot.
|
||||
superseded = 0
|
||||
for idx in snapshot_indices[:-1]:
|
||||
content = messages[idx].get("content", "")
|
||||
if not content or content == _SNAPSHOT_SUPERSEDED_PLACEHOLDER:
|
||||
continue
|
||||
# Only supersede if the content is substantial (short error messages
|
||||
# or already-pruned outputs aren't worth touching).
|
||||
if len(content) > 200:
|
||||
messages[idx] = {**messages[idx], "content": _SNAPSHOT_SUPERSEDED_PLACEHOLDER}
|
||||
superseded += 1
|
||||
|
||||
if superseded:
|
||||
logger.info("Superseded %d stale browser snapshot(s)", superseded)
|
||||
|
||||
return superseded
|
||||
|
||||
10
run_agent.py
10
run_agent.py
@@ -86,7 +86,7 @@ from agent.model_metadata import (
|
||||
get_next_probe_tier, parse_context_limit_from_error,
|
||||
save_context_length,
|
||||
)
|
||||
from agent.context_compressor import ContextCompressor
|
||||
from agent.context_compressor import ContextCompressor, supersede_stale_browser_snapshots
|
||||
from agent.prompt_caching import apply_anthropic_cache_control
|
||||
from agent.prompt_builder import build_skills_system_prompt, build_context_files_prompt, load_soul_md, TOOL_USE_ENFORCEMENT_GUIDANCE, TOOL_USE_ENFORCEMENT_MODELS, DEVELOPER_ROLE_MODELS
|
||||
from agent.usage_pricing import estimate_usage_cost, normalize_usage
|
||||
@@ -6710,6 +6710,14 @@ class AIAgent:
|
||||
and "skill_manage" in self.valid_tool_names):
|
||||
self._iters_since_skill += 1
|
||||
|
||||
# Supersede stale browser snapshots before building the API request.
|
||||
# Each browser_snapshot returns a full accessibility tree (8,000+ chars);
|
||||
# only the most recent one reflects the current page state. Older ones
|
||||
# are replaced with a compact placeholder to reclaim context tokens.
|
||||
# This is a cheap pre-pass (no LLM call) that runs every turn.
|
||||
# Ported from google-gemini/gemini-cli#24440.
|
||||
supersede_stale_browser_snapshots(messages)
|
||||
|
||||
# Prepare messages for API call
|
||||
# If we have an ephemeral system prompt, prepend it to the messages
|
||||
# Note: Reasoning is embedded in content via <think> tags for trajectory storage.
|
||||
|
||||
@@ -562,3 +562,167 @@ class TestSummaryTargetRatio:
|
||||
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||
c = ContextCompressor(model="test", quiet_mode=True)
|
||||
assert c.protect_last_n == 20
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
# Tests for supersede_stale_browser_snapshots
|
||||
# ──────────────────────────────────────────────────────────────────────
|
||||
|
||||
from agent.context_compressor import (
|
||||
supersede_stale_browser_snapshots,
|
||||
_SNAPSHOT_SUPERSEDED_PLACEHOLDER,
|
||||
)
|
||||
|
||||
|
||||
def _tool_msg(name: str, content: str, call_id: str = "call_1") -> dict:
|
||||
"""Helper to create a tool result message."""
|
||||
return {"role": "tool", "name": name, "content": content, "tool_call_id": call_id}
|
||||
|
||||
|
||||
def _assistant_msg(content: str = "ok") -> dict:
|
||||
return {"role": "assistant", "content": content}
|
||||
|
||||
|
||||
def _user_msg(content: str = "do something") -> dict:
|
||||
return {"role": "user", "content": content}
|
||||
|
||||
|
||||
class TestSupersedeStaleSnapshots:
|
||||
"""Tests for the browser snapshot superseding pre-pass."""
|
||||
|
||||
def test_no_snapshots_noop(self):
|
||||
"""No browser snapshots → nothing changes."""
|
||||
messages = [
|
||||
_user_msg("navigate to example.com"),
|
||||
_tool_msg("browser_navigate", '{"url": "https://example.com"}'),
|
||||
_assistant_msg("Navigated."),
|
||||
]
|
||||
original = [m.copy() for m in messages]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 0
|
||||
assert messages == original
|
||||
|
||||
def test_single_snapshot_noop(self):
|
||||
"""Only one snapshot → nothing to supersede."""
|
||||
messages = [
|
||||
_user_msg(),
|
||||
_tool_msg("browser_snapshot", "A" * 5000, "call_snap_1"),
|
||||
_assistant_msg(),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 0
|
||||
assert len(messages[1]["content"]) == 5000
|
||||
|
||||
def test_two_snapshots_supersedes_first(self):
|
||||
"""Two snapshots → first one gets replaced."""
|
||||
messages = [
|
||||
_user_msg(),
|
||||
_tool_msg("browser_snapshot", "A" * 5000, "call_1"),
|
||||
_assistant_msg("I see the page."),
|
||||
_user_msg("scroll down"),
|
||||
_tool_msg("browser_snapshot", "B" * 5000, "call_2"),
|
||||
_assistant_msg("Scrolled."),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 1
|
||||
assert messages[1]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
|
||||
# Latest snapshot untouched
|
||||
assert messages[4]["content"] == "B" * 5000
|
||||
|
||||
def test_three_snapshots_supersedes_first_two(self):
|
||||
"""Three snapshots → first two get replaced, last one kept."""
|
||||
messages = [
|
||||
_tool_msg("browser_snapshot", "X" * 8000, "c1"),
|
||||
_tool_msg("browser_click", '{"clicked": true}', "c2"),
|
||||
_tool_msg("browser_snapshot", "Y" * 8000, "c3"),
|
||||
_tool_msg("browser_click", '{"clicked": true}', "c4"),
|
||||
_tool_msg("browser_snapshot", "Z" * 8000, "c5"),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 2
|
||||
assert messages[0]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
|
||||
assert messages[2]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
|
||||
assert messages[4]["content"] == "Z" * 8000
|
||||
|
||||
def test_non_snapshot_tools_untouched(self):
|
||||
"""Other tool results are never modified."""
|
||||
messages = [
|
||||
_tool_msg("browser_snapshot", "A" * 1000, "c1"),
|
||||
_tool_msg("browser_click", "Clicked element @e5", "c2"),
|
||||
_tool_msg("browser_navigate", '{"url": "https://example.com"}', "c3"),
|
||||
_tool_msg("browser_snapshot", "B" * 1000, "c4"),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 1
|
||||
# Click and navigate untouched
|
||||
assert messages[1]["content"] == "Clicked element @e5"
|
||||
assert messages[2]["content"] == '{"url": "https://example.com"}'
|
||||
|
||||
def test_already_superseded_noop(self):
|
||||
"""Snapshots already replaced are not counted again."""
|
||||
messages = [
|
||||
_tool_msg("browser_snapshot", _SNAPSHOT_SUPERSEDED_PLACEHOLDER, "c1"),
|
||||
_tool_msg("browser_snapshot", "current page" * 100, "c2"),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 0
|
||||
|
||||
def test_short_content_not_superseded(self):
|
||||
"""Snapshots with very short content (errors, etc.) are skipped."""
|
||||
messages = [
|
||||
_tool_msg("browser_snapshot", "Error: no session", "c1"),
|
||||
_tool_msg("browser_snapshot", "B" * 5000, "c2"),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 0 # "Error: no session" is <200 chars
|
||||
|
||||
def test_browser_vision_also_superseded(self):
|
||||
"""browser_vision results are also superseded alongside browser_snapshot."""
|
||||
messages = [
|
||||
_tool_msg("browser_vision", "I see a login form with..." + "x" * 1000, "c1"),
|
||||
_tool_msg("browser_snapshot", "big tree" * 500, "c2"),
|
||||
_tool_msg("browser_vision", "Now the page shows..." + "y" * 1000, "c3"),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 2
|
||||
assert messages[0]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
|
||||
assert messages[1]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
|
||||
# Last one (browser_vision) kept
|
||||
assert "Now the page shows" in messages[2]["content"]
|
||||
|
||||
def test_preserves_other_message_fields(self):
|
||||
"""Superseding preserves tool_call_id, name, role, and any extra fields."""
|
||||
messages = [
|
||||
{
|
||||
"role": "tool",
|
||||
"name": "browser_snapshot",
|
||||
"content": "A" * 5000,
|
||||
"tool_call_id": "call_abc",
|
||||
"custom_field": "preserved",
|
||||
},
|
||||
_tool_msg("browser_snapshot", "B" * 5000, "call_def"),
|
||||
]
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 1
|
||||
assert messages[0]["tool_call_id"] == "call_abc"
|
||||
assert messages[0]["name"] == "browser_snapshot"
|
||||
assert messages[0]["role"] == "tool"
|
||||
assert messages[0]["custom_field"] == "preserved"
|
||||
assert messages[0]["content"] == _SNAPSHOT_SUPERSEDED_PLACEHOLDER
|
||||
|
||||
def test_empty_messages_noop(self):
|
||||
"""Empty message list doesn't crash."""
|
||||
messages = []
|
||||
count = supersede_stale_browser_snapshots(messages)
|
||||
assert count == 0
|
||||
|
||||
def test_idempotent(self):
|
||||
"""Running twice produces the same result."""
|
||||
messages = [
|
||||
_tool_msg("browser_snapshot", "A" * 5000, "c1"),
|
||||
_tool_msg("browser_snapshot", "B" * 5000, "c2"),
|
||||
]
|
||||
count1 = supersede_stale_browser_snapshots(messages)
|
||||
assert count1 == 1
|
||||
count2 = supersede_stale_browser_snapshots(messages)
|
||||
assert count2 == 0 # Already superseded
|
||||
|
||||
Reference in New Issue
Block a user