mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 15:31:38 +08:00
Compare commits
3 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2faa9c4c75 | ||
|
|
9f8312178f | ||
|
|
c97b36230e |
@@ -154,12 +154,15 @@ class ContextCompressor:
|
|||||||
|
|
||||||
def _prune_old_tool_results(
|
def _prune_old_tool_results(
|
||||||
self, messages: List[Dict[str, Any]], protect_tail_count: int,
|
self, messages: List[Dict[str, Any]], protect_tail_count: int,
|
||||||
|
protect_tail_tokens: int | None = None,
|
||||||
) -> tuple[List[Dict[str, Any]], int]:
|
) -> tuple[List[Dict[str, Any]], int]:
|
||||||
"""Replace old tool result contents with a short placeholder.
|
"""Replace old tool result contents with a short placeholder.
|
||||||
|
|
||||||
Walks backward from the end, protecting the most recent
|
Walks backward from the end, protecting the most recent messages that
|
||||||
``protect_tail_count`` messages. Older tool results get their
|
fall within ``protect_tail_tokens`` (when provided) OR the last
|
||||||
content replaced with a placeholder string.
|
``protect_tail_count`` messages (backward-compatible default).
|
||||||
|
When both are given, the token budget takes priority and the message
|
||||||
|
count acts as a hard minimum floor.
|
||||||
|
|
||||||
Returns (pruned_messages, pruned_count).
|
Returns (pruned_messages, pruned_count).
|
||||||
"""
|
"""
|
||||||
@@ -168,7 +171,29 @@ class ContextCompressor:
|
|||||||
|
|
||||||
result = [m.copy() for m in messages]
|
result = [m.copy() for m in messages]
|
||||||
pruned = 0
|
pruned = 0
|
||||||
prune_boundary = len(result) - protect_tail_count
|
|
||||||
|
# Determine the prune boundary
|
||||||
|
if protect_tail_tokens is not None and protect_tail_tokens > 0:
|
||||||
|
# Token-budget approach: walk backward accumulating tokens
|
||||||
|
accumulated = 0
|
||||||
|
boundary = len(result)
|
||||||
|
min_protect = min(protect_tail_count, len(result) - 1)
|
||||||
|
for i in range(len(result) - 1, -1, -1):
|
||||||
|
msg = result[i]
|
||||||
|
content_len = len(msg.get("content") or "")
|
||||||
|
msg_tokens = content_len // _CHARS_PER_TOKEN + 10
|
||||||
|
for tc in msg.get("tool_calls") or []:
|
||||||
|
if isinstance(tc, dict):
|
||||||
|
args = tc.get("function", {}).get("arguments", "")
|
||||||
|
msg_tokens += len(args) // _CHARS_PER_TOKEN
|
||||||
|
if accumulated + msg_tokens > protect_tail_tokens and (len(result) - i) >= min_protect:
|
||||||
|
boundary = i
|
||||||
|
break
|
||||||
|
accumulated += msg_tokens
|
||||||
|
boundary = i
|
||||||
|
prune_boundary = max(boundary, len(result) - min_protect)
|
||||||
|
else:
|
||||||
|
prune_boundary = len(result) - protect_tail_count
|
||||||
|
|
||||||
for i in range(prune_boundary):
|
for i in range(prune_boundary):
|
||||||
msg = result[i]
|
msg = result[i]
|
||||||
@@ -533,13 +558,20 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||||||
derived from ``summary_target_ratio * context_length``, so it
|
derived from ``summary_target_ratio * context_length``, so it
|
||||||
scales automatically with the model's context window.
|
scales automatically with the model's context window.
|
||||||
|
|
||||||
Never cuts inside a tool_call/result group. Falls back to the old
|
Token budget is the primary criterion. A hard minimum of 3 messages
|
||||||
``protect_last_n`` if the budget would protect fewer messages.
|
is always protected, but the budget is allowed to exceed by up to
|
||||||
|
1.5x to avoid cutting inside an oversized message (tool output, file
|
||||||
|
read, etc.). If even the minimum 3 messages exceed 1.5x the budget
|
||||||
|
the cut is placed right after the head so compression still runs.
|
||||||
|
|
||||||
|
Never cuts inside a tool_call/result group.
|
||||||
"""
|
"""
|
||||||
if token_budget is None:
|
if token_budget is None:
|
||||||
token_budget = self.tail_token_budget
|
token_budget = self.tail_token_budget
|
||||||
n = len(messages)
|
n = len(messages)
|
||||||
min_tail = self.protect_last_n
|
# Hard minimum: always keep at least 3 messages in the tail
|
||||||
|
min_tail = min(3, n - head_end - 1) if n - head_end > 1 else 0
|
||||||
|
soft_ceiling = int(token_budget * 1.5)
|
||||||
accumulated = 0
|
accumulated = 0
|
||||||
cut_idx = n # start from beyond the end
|
cut_idx = n # start from beyond the end
|
||||||
|
|
||||||
@@ -552,21 +584,21 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||||||
if isinstance(tc, dict):
|
if isinstance(tc, dict):
|
||||||
args = tc.get("function", {}).get("arguments", "")
|
args = tc.get("function", {}).get("arguments", "")
|
||||||
msg_tokens += len(args) // _CHARS_PER_TOKEN
|
msg_tokens += len(args) // _CHARS_PER_TOKEN
|
||||||
if accumulated + msg_tokens > token_budget and (n - i) >= min_tail:
|
# Stop once we exceed the soft ceiling (unless we haven't hit min_tail yet)
|
||||||
|
if accumulated + msg_tokens > soft_ceiling and (n - i) >= min_tail:
|
||||||
break
|
break
|
||||||
accumulated += msg_tokens
|
accumulated += msg_tokens
|
||||||
cut_idx = i
|
cut_idx = i
|
||||||
|
|
||||||
# Ensure we protect at least protect_last_n messages
|
# Ensure we protect at least min_tail messages
|
||||||
fallback_cut = n - min_tail
|
fallback_cut = n - min_tail
|
||||||
if cut_idx > fallback_cut:
|
if cut_idx > fallback_cut:
|
||||||
cut_idx = fallback_cut
|
cut_idx = fallback_cut
|
||||||
|
|
||||||
# If the token budget would protect everything (small conversations),
|
# If the token budget would protect everything (small conversations),
|
||||||
# fall back to the fixed protect_last_n approach so compression can
|
# force a cut after the head so compression can still remove middle turns.
|
||||||
# still remove middle turns.
|
|
||||||
if cut_idx <= head_end:
|
if cut_idx <= head_end:
|
||||||
cut_idx = fallback_cut
|
cut_idx = max(fallback_cut, head_end + 1)
|
||||||
|
|
||||||
# Align to avoid splitting tool groups
|
# Align to avoid splitting tool groups
|
||||||
cut_idx = self._align_boundary_backward(messages, cut_idx)
|
cut_idx = self._align_boundary_backward(messages, cut_idx)
|
||||||
@@ -591,12 +623,13 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||||||
up so the API never receives mismatched IDs.
|
up so the API never receives mismatched IDs.
|
||||||
"""
|
"""
|
||||||
n_messages = len(messages)
|
n_messages = len(messages)
|
||||||
if n_messages <= self.protect_first_n + self.protect_last_n + 1:
|
# Only need head + 3 tail messages minimum (token budget decides the real tail size)
|
||||||
|
_min_for_compress = self.protect_first_n + 3 + 1
|
||||||
|
if n_messages <= _min_for_compress:
|
||||||
if not self.quiet_mode:
|
if not self.quiet_mode:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
"Cannot compress: only %d messages (need > %d)",
|
"Cannot compress: only %d messages (need > %d)",
|
||||||
n_messages,
|
n_messages, _min_for_compress,
|
||||||
self.protect_first_n + self.protect_last_n + 1,
|
|
||||||
)
|
)
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
@@ -604,7 +637,8 @@ Write only the summary body. Do not include any preamble or prefix."""
|
|||||||
|
|
||||||
# Phase 1: Prune old tool results (cheap, no LLM call)
|
# Phase 1: Prune old tool results (cheap, no LLM call)
|
||||||
messages, pruned_count = self._prune_old_tool_results(
|
messages, pruned_count = self._prune_old_tool_results(
|
||||||
messages, protect_tail_count=self.protect_last_n * 3,
|
messages, protect_tail_count=self.protect_last_n,
|
||||||
|
protect_tail_tokens=self.tail_token_budget,
|
||||||
)
|
)
|
||||||
if pruned_count and not self.quiet_mode:
|
if pruned_count and not self.quiet_mode:
|
||||||
logger.info("Pre-compression: pruned %d old tool result(s)", pruned_count)
|
logger.info("Pre-compression: pruned %d old tool result(s)", pruned_count)
|
||||||
|
|||||||
@@ -324,7 +324,10 @@ class TestCompressWithClient:
|
|||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||||
|
|
||||||
# Last head message (index 1) is "assistant" → summary should be "user"
|
# Last head message (index 1) is "assistant" → summary should be "user".
|
||||||
|
# With min_tail=3, tail = last 3 messages (indices 5-7).
|
||||||
|
# head_last=assistant, tail_first=assistant → summary_role="user", no collision.
|
||||||
|
# Need 8 messages: min_for_compress = 2+3+1 = 6, must have > 6.
|
||||||
msgs = [
|
msgs = [
|
||||||
{"role": "user", "content": "msg 0"},
|
{"role": "user", "content": "msg 0"},
|
||||||
{"role": "assistant", "content": "msg 1"},
|
{"role": "assistant", "content": "msg 1"},
|
||||||
@@ -332,6 +335,8 @@ class TestCompressWithClient:
|
|||||||
{"role": "assistant", "content": "msg 3"},
|
{"role": "assistant", "content": "msg 3"},
|
||||||
{"role": "user", "content": "msg 4"},
|
{"role": "user", "content": "msg 4"},
|
||||||
{"role": "assistant", "content": "msg 5"},
|
{"role": "assistant", "content": "msg 5"},
|
||||||
|
{"role": "user", "content": "msg 6"},
|
||||||
|
{"role": "assistant", "content": "msg 7"},
|
||||||
]
|
]
|
||||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
result = c.compress(msgs)
|
result = c.compress(msgs)
|
||||||
@@ -460,8 +465,10 @@ class TestCompressWithClient:
|
|||||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||||
|
|
||||||
# Head: [system, user] → last head = user
|
# Head: [system, user] → last head = user
|
||||||
# Tail: [assistant, user] → first tail = assistant
|
# Tail: [assistant, user, assistant] → first tail = assistant
|
||||||
# summary_role="assistant" collides with tail, "user" collides with head → merge
|
# summary_role="assistant" collides with tail, "user" collides with head → merge
|
||||||
|
# With min_tail=3, tail = last 3 messages (indices 5-7).
|
||||||
|
# Need 8 messages: min_for_compress = 2+3+1 = 6, must have > 6.
|
||||||
msgs = [
|
msgs = [
|
||||||
{"role": "system", "content": "system prompt"},
|
{"role": "system", "content": "system prompt"},
|
||||||
{"role": "user", "content": "msg 1"},
|
{"role": "user", "content": "msg 1"},
|
||||||
@@ -470,6 +477,7 @@ class TestCompressWithClient:
|
|||||||
{"role": "assistant", "content": "msg 4"}, # compressed
|
{"role": "assistant", "content": "msg 4"}, # compressed
|
||||||
{"role": "assistant", "content": "msg 5"}, # tail start
|
{"role": "assistant", "content": "msg 5"}, # tail start
|
||||||
{"role": "user", "content": "msg 6"},
|
{"role": "user", "content": "msg 6"},
|
||||||
|
{"role": "assistant", "content": "msg 7"},
|
||||||
]
|
]
|
||||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
result = c.compress(msgs)
|
result = c.compress(msgs)
|
||||||
@@ -481,7 +489,7 @@ class TestCompressWithClient:
|
|||||||
if r1 in ("user", "assistant") and r2 in ("user", "assistant"):
|
if r1 in ("user", "assistant") and r2 in ("user", "assistant"):
|
||||||
assert r1 != r2, f"consecutive {r1} at indices {i-1},{i}"
|
assert r1 != r2, f"consecutive {r1} at indices {i-1},{i}"
|
||||||
|
|
||||||
# The summary should be merged into the first tail message (assistant)
|
# The summary should be merged into the first tail message (assistant at index 5)
|
||||||
first_tail = [m for m in result if "msg 5" in (m.get("content") or "")]
|
first_tail = [m for m in result if "msg 5" in (m.get("content") or "")]
|
||||||
assert len(first_tail) == 1
|
assert len(first_tail) == 1
|
||||||
assert "summary text" in first_tail[0]["content"]
|
assert "summary text" in first_tail[0]["content"]
|
||||||
@@ -496,14 +504,18 @@ class TestCompressWithClient:
|
|||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
with patch("agent.context_compressor.get_model_context_length", return_value=100000):
|
||||||
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
c = ContextCompressor(model="test", quiet_mode=True, protect_first_n=2, protect_last_n=2)
|
||||||
|
|
||||||
# Head=assistant, Tail=assistant → summary_role="user", no collision
|
# Head=assistant, Tail=assistant → summary_role="user", no collision.
|
||||||
|
# With min_tail=3, tail = last 3 messages (indices 5-7).
|
||||||
|
# Need 8 messages: min_for_compress = 2+3+1 = 6, must have > 6.
|
||||||
msgs = [
|
msgs = [
|
||||||
{"role": "user", "content": "msg 0"},
|
{"role": "user", "content": "msg 0"},
|
||||||
{"role": "assistant", "content": "msg 1"},
|
{"role": "assistant", "content": "msg 1"},
|
||||||
{"role": "user", "content": "msg 2"},
|
{"role": "user", "content": "msg 2"},
|
||||||
{"role": "assistant", "content": "msg 3"},
|
{"role": "assistant", "content": "msg 3"},
|
||||||
{"role": "assistant", "content": "msg 4"},
|
{"role": "user", "content": "msg 4"},
|
||||||
{"role": "user", "content": "msg 5"},
|
{"role": "assistant", "content": "msg 5"},
|
||||||
|
{"role": "user", "content": "msg 6"},
|
||||||
|
{"role": "assistant", "content": "msg 7"},
|
||||||
]
|
]
|
||||||
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
with patch("agent.context_compressor.call_llm", return_value=mock_response):
|
||||||
result = c.compress(msgs)
|
result = c.compress(msgs)
|
||||||
@@ -600,3 +612,158 @@ class TestSummaryTargetRatio:
|
|||||||
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
with patch("agent.context_compressor.get_model_context_length", return_value=100_000):
|
||||||
c = ContextCompressor(model="test", quiet_mode=True)
|
c = ContextCompressor(model="test", quiet_mode=True)
|
||||||
assert c.protect_last_n == 20
|
assert c.protect_last_n == 20
|
||||||
|
|
||||||
|
|
||||||
|
class TestTokenBudgetTailProtection:
|
||||||
|
"""Tests for token-budget-based tail protection (PR #6240).
|
||||||
|
|
||||||
|
The core change: tail protection is now based on a token budget rather
|
||||||
|
than a fixed message count. This prevents large tool outputs from
|
||||||
|
blocking compaction.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def budget_compressor(self):
|
||||||
|
"""Compressor with known token budget for tail protection tests."""
|
||||||
|
with patch("agent.context_compressor.get_model_context_length", return_value=200_000):
|
||||||
|
c = ContextCompressor(
|
||||||
|
model="test/model",
|
||||||
|
threshold_percent=0.50, # 100K threshold
|
||||||
|
protect_first_n=2,
|
||||||
|
protect_last_n=20,
|
||||||
|
quiet_mode=True,
|
||||||
|
)
|
||||||
|
return c
|
||||||
|
|
||||||
|
def test_large_tool_outputs_no_longer_block_compaction(self, budget_compressor):
|
||||||
|
"""The motivating scenario: 20 messages with large tool outputs should
|
||||||
|
NOT prevent compaction. With message-count tail protection they would
|
||||||
|
all be protected, leaving nothing to summarize."""
|
||||||
|
c = budget_compressor
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "Start task"},
|
||||||
|
{"role": "assistant", "content": "On it"},
|
||||||
|
]
|
||||||
|
# Add 20 messages with large tool outputs (~5K chars each ≈ 1250 tokens)
|
||||||
|
for i in range(10):
|
||||||
|
messages.append({
|
||||||
|
"role": "assistant", "content": None,
|
||||||
|
"tool_calls": [{"function": {"name": f"tool_{i}", "arguments": "{}"}}],
|
||||||
|
})
|
||||||
|
messages.append({
|
||||||
|
"role": "tool", "content": "x" * 5000,
|
||||||
|
"tool_call_id": f"call_{i}",
|
||||||
|
})
|
||||||
|
# Add 3 recent small messages
|
||||||
|
messages.append({"role": "user", "content": "What's the status?"})
|
||||||
|
messages.append({"role": "assistant", "content": "Here's what I found..."})
|
||||||
|
messages.append({"role": "user", "content": "Continue"})
|
||||||
|
|
||||||
|
# The tail cut should NOT protect all 20 tool messages
|
||||||
|
head_end = c.protect_first_n
|
||||||
|
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||||
|
tail_size = len(messages) - cut
|
||||||
|
# With token budget, the tail should be much smaller than 20+
|
||||||
|
assert tail_size < 20, f"Tail {tail_size} messages — large tool outputs are blocking compaction"
|
||||||
|
# But at least 3 (hard minimum)
|
||||||
|
assert tail_size >= 3
|
||||||
|
|
||||||
|
def test_min_tail_always_3_messages(self, budget_compressor):
|
||||||
|
"""Even with a tiny token budget, at least 3 messages are protected."""
|
||||||
|
c = budget_compressor
|
||||||
|
# Override to a tiny budget
|
||||||
|
c.tail_token_budget = 10
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
{"role": "assistant", "content": "hi"},
|
||||||
|
{"role": "user", "content": "do something"},
|
||||||
|
{"role": "assistant", "content": "working on it"},
|
||||||
|
{"role": "user", "content": "more work"},
|
||||||
|
{"role": "assistant", "content": "done"},
|
||||||
|
{"role": "user", "content": "thanks"},
|
||||||
|
]
|
||||||
|
head_end = 2
|
||||||
|
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||||
|
tail_size = len(messages) - cut
|
||||||
|
assert tail_size >= 3, f"Tail is only {tail_size} messages, min should be 3"
|
||||||
|
|
||||||
|
def test_soft_ceiling_allows_oversized_message(self, budget_compressor):
|
||||||
|
"""The 1.5x soft ceiling allows an oversized message to be included
|
||||||
|
rather than splitting it."""
|
||||||
|
c = budget_compressor
|
||||||
|
# Set a small budget — 500 tokens
|
||||||
|
c.tail_token_budget = 500
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "hello"},
|
||||||
|
{"role": "assistant", "content": "hi"},
|
||||||
|
{"role": "user", "content": "read the file"},
|
||||||
|
# This message is ~600 tokens (> budget of 500, but < 1.5x = 750)
|
||||||
|
{"role": "assistant", "content": "a" * 2400},
|
||||||
|
{"role": "user", "content": "short"},
|
||||||
|
{"role": "assistant", "content": "short reply"},
|
||||||
|
{"role": "user", "content": "continue"},
|
||||||
|
]
|
||||||
|
head_end = 2
|
||||||
|
cut = c._find_tail_cut_by_tokens(messages, head_end)
|
||||||
|
# The oversized message at index 3 should NOT be the cut point
|
||||||
|
# because 1.5x ceiling = 750 tokens and accumulated would be ~610
|
||||||
|
# (short msgs + oversized msg) which is < 750
|
||||||
|
tail_size = len(messages) - cut
|
||||||
|
assert tail_size >= 3
|
||||||
|
|
||||||
|
def test_small_conversation_still_compresses(self, budget_compressor):
|
||||||
|
"""With the new min of 8 messages (head=2 + 3 + 1 guard + 2 middle),
|
||||||
|
a small but compressible conversation should still compress."""
|
||||||
|
c = budget_compressor
|
||||||
|
# 9 messages: head(2) + 4 middle + 3 tail = compressible
|
||||||
|
messages = []
|
||||||
|
for i in range(9):
|
||||||
|
role = "user" if i % 2 == 0 else "assistant"
|
||||||
|
messages.append({"role": role, "content": f"Message {i}"})
|
||||||
|
|
||||||
|
# Should not early-return (needs > protect_first_n + 3 + 1 = 6)
|
||||||
|
# Mock the summary generation to avoid real API call
|
||||||
|
with patch.object(c, "_generate_summary", return_value="Summary of conversation"):
|
||||||
|
result = c.compress(messages, current_tokens=90_000)
|
||||||
|
# Should have compressed (fewer messages than original)
|
||||||
|
assert len(result) < len(messages)
|
||||||
|
|
||||||
|
def test_prune_with_token_budget(self, budget_compressor):
|
||||||
|
"""_prune_old_tool_results with protect_tail_tokens respects the budget."""
|
||||||
|
c = budget_compressor
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "start"},
|
||||||
|
{"role": "assistant", "content": None,
|
||||||
|
"tool_calls": [{"function": {"name": "read_file", "arguments": '{"path": "big.txt"}'}}]},
|
||||||
|
{"role": "tool", "content": "x" * 10000, "tool_call_id": "c1"}, # ~2500 tokens
|
||||||
|
{"role": "assistant", "content": None,
|
||||||
|
"tool_calls": [{"function": {"name": "read_file", "arguments": '{"path": "small.txt"}'}}]},
|
||||||
|
{"role": "tool", "content": "y" * 10000, "tool_call_id": "c2"}, # ~2500 tokens
|
||||||
|
{"role": "user", "content": "short recent message"},
|
||||||
|
{"role": "assistant", "content": "short reply"},
|
||||||
|
]
|
||||||
|
# With a 1000-token budget, only the last couple messages should be protected
|
||||||
|
result, pruned = c._prune_old_tool_results(
|
||||||
|
messages, protect_tail_count=2, protect_tail_tokens=1000,
|
||||||
|
)
|
||||||
|
# At least one old tool result should have been pruned
|
||||||
|
assert pruned >= 1
|
||||||
|
|
||||||
|
def test_prune_without_token_budget_uses_message_count(self, budget_compressor):
|
||||||
|
"""Without protect_tail_tokens, falls back to message-count behavior."""
|
||||||
|
c = budget_compressor
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": "start"},
|
||||||
|
{"role": "assistant", "content": None,
|
||||||
|
"tool_calls": [{"function": {"name": "tool", "arguments": "{}"}}]},
|
||||||
|
{"role": "tool", "content": "x" * 5000, "tool_call_id": "c1"},
|
||||||
|
{"role": "user", "content": "recent"},
|
||||||
|
{"role": "assistant", "content": "reply"},
|
||||||
|
]
|
||||||
|
# protect_tail_count=3 means last 3 messages protected
|
||||||
|
result, pruned = c._prune_old_tool_results(
|
||||||
|
messages, protect_tail_count=3,
|
||||||
|
)
|
||||||
|
# Tool at index 2 is outside the protected tail (last 3 = indices 2,3,4)
|
||||||
|
# so it might or might not be pruned depending on boundary
|
||||||
|
assert isinstance(pruned, int)
|
||||||
|
|||||||
Reference in New Issue
Block a user