mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(file-tools): escalate to BLOCKED on repeated read_file dedup stubs (#16382)
read_file's dedup path returned a lightweight stub on re-reads of an unchanged file, then returned early — so the consecutive-read loop guard (hard block at count>=4) at the bottom of read_file_tool never ran for stub-looped calls. Weaker tool-following models (local Qwen3.6 variants in the reported case) ignore the passive 'refer to earlier result' hint and hammer the same read_file call until iteration budget runs out. Track per-key stub returns in task_data['dedup_hits'] and, on the second stub for the same (path, offset, limit), return a hard BLOCKED error mirroring the wording the real-read path already uses. A real read, an intervening non-read tool call (notify_other_tool_call), or reset_file_dedup (on context compression) all clear the counter so the guard never stays engaged longer than the actual loop. Closes #15759
This commit is contained in:
@@ -24,6 +24,7 @@ from tools.file_tools import (
|
||||
_get_max_read_chars,
|
||||
_DEFAULT_MAX_READ_CHARS,
|
||||
_read_tracker,
|
||||
notify_other_tool_call,
|
||||
)
|
||||
|
||||
|
||||
@@ -294,6 +295,153 @@ class TestFileDedup(unittest.TestCase):
|
||||
self.assertNotEqual(r2.get("dedup"), True)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dedup stub-loop guard (issue #15759)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestDedupStubLoopGuard(unittest.TestCase):
|
||||
"""Repeated dedup stubs must escalate to a hard BLOCKED error so weak
|
||||
tool-following models don't burn iteration budget in an infinite loop
|
||||
of ``read_file → stub → read_file → stub → ...``"""
|
||||
|
||||
def setUp(self):
|
||||
_read_tracker.clear()
|
||||
self._tmpdir = tempfile.mkdtemp()
|
||||
self._tmpfile = os.path.join(self._tmpdir, "loop_test.txt")
|
||||
with open(self._tmpfile, "w") as f:
|
||||
f.write("line one\nline two\n")
|
||||
|
||||
def tearDown(self):
|
||||
_read_tracker.clear()
|
||||
try:
|
||||
os.unlink(self._tmpfile)
|
||||
os.rmdir(self._tmpdir)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
def test_third_read_is_blocked(self, mock_ops):
|
||||
"""read → stub → BLOCKED. Second stub escalates to hard error."""
|
||||
mock_ops.return_value = _make_fake_ops(
|
||||
content="line one\nline two\n", file_size=20,
|
||||
)
|
||||
# 1. Real read — full content
|
||||
r1 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertNotIn("dedup", r1)
|
||||
self.assertNotIn("error", r1)
|
||||
|
||||
# 2. Dedup stub (first hit)
|
||||
r2 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertTrue(r2.get("dedup"))
|
||||
self.assertNotIn("error", r2)
|
||||
|
||||
# 3. Dedup stub (second hit) — escalates to BLOCKED
|
||||
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertIn("error", r3, "Second dedup stub should be BLOCKED")
|
||||
self.assertIn("BLOCKED", r3["error"])
|
||||
self.assertIn("STOP", r3["error"])
|
||||
self.assertEqual(r3.get("already_read"), 3)
|
||||
# The loop-breaker must NOT be a dedup stub, or the model sees the
|
||||
# same passive message it has been ignoring.
|
||||
self.assertNotIn("dedup", r3)
|
||||
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
def test_subsequent_reads_stay_blocked(self, mock_ops):
|
||||
"""Once blocked, continued hammering keeps returning BLOCKED."""
|
||||
mock_ops.return_value = _make_fake_ops(
|
||||
content="line one\nline two\n", file_size=20,
|
||||
)
|
||||
read_file_tool(self._tmpfile, task_id="loop") # read
|
||||
read_file_tool(self._tmpfile, task_id="loop") # stub
|
||||
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertIn("error", r3)
|
||||
# 4th, 5th, ... calls must stay blocked, never revert to stub
|
||||
for _ in range(5):
|
||||
rN = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertIn("error", rN)
|
||||
self.assertIn("BLOCKED", rN["error"])
|
||||
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
def test_file_modification_clears_block(self, mock_ops):
|
||||
"""Real file change should break out of the block — new content
|
||||
is legitimately different and the agent should see it."""
|
||||
mock_ops.return_value = _make_fake_ops(
|
||||
content="line one\nline two\n", file_size=20,
|
||||
)
|
||||
read_file_tool(self._tmpfile, task_id="loop")
|
||||
read_file_tool(self._tmpfile, task_id="loop")
|
||||
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertIn("error", r3)
|
||||
|
||||
# File changes — mtime updates
|
||||
time.sleep(0.05)
|
||||
with open(self._tmpfile, "w") as f:
|
||||
f.write("brand new content\n")
|
||||
|
||||
r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertNotIn("error", r4)
|
||||
self.assertNotIn("dedup", r4)
|
||||
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
def test_other_tool_call_clears_hits(self, mock_ops):
|
||||
"""An intervening non-read tool call resets stub-hit counters,
|
||||
just like it resets the consecutive-read counter."""
|
||||
mock_ops.return_value = _make_fake_ops(
|
||||
content="line one\nline two\n", file_size=20,
|
||||
)
|
||||
read_file_tool(self._tmpfile, task_id="loop")
|
||||
read_file_tool(self._tmpfile, task_id="loop") # 1st stub
|
||||
|
||||
# Agent did something else — e.g. terminal, write_file — so the
|
||||
# stub-loop is broken. Counter should reset.
|
||||
notify_other_tool_call("loop")
|
||||
|
||||
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
# Should be a stub again, NOT blocked
|
||||
self.assertTrue(r3.get("dedup"))
|
||||
self.assertNotIn("error", r3)
|
||||
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
def test_different_ranges_tracked_independently(self, mock_ops):
|
||||
"""Stub-hit counter is keyed by (path, offset, limit), so hammering
|
||||
one range shouldn't block reads of a different range."""
|
||||
mock_ops.return_value = _make_fake_ops(
|
||||
content="line one\nline two\n", file_size=20,
|
||||
)
|
||||
# Burn down one range
|
||||
read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop")
|
||||
read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop")
|
||||
r3 = json.loads(read_file_tool(
|
||||
self._tmpfile, offset=1, limit=100, task_id="loop",
|
||||
))
|
||||
self.assertIn("error", r3)
|
||||
|
||||
# Different range — fresh read, should go through
|
||||
r_other = json.loads(read_file_tool(
|
||||
self._tmpfile, offset=1, limit=200, task_id="loop",
|
||||
))
|
||||
self.assertNotIn("error", r_other)
|
||||
|
||||
@patch("tools.file_tools._get_file_ops")
|
||||
def test_reset_file_dedup_clears_hits(self, mock_ops):
|
||||
"""Post-compression reset must clear stub-hit counters too,
|
||||
otherwise the agent stays blocked after compression."""
|
||||
mock_ops.return_value = _make_fake_ops(
|
||||
content="line one\nline two\n", file_size=20,
|
||||
)
|
||||
read_file_tool(self._tmpfile, task_id="loop")
|
||||
read_file_tool(self._tmpfile, task_id="loop")
|
||||
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertIn("error", r3)
|
||||
|
||||
reset_file_dedup("loop")
|
||||
|
||||
# Fresh session — real read, no stub, no block
|
||||
r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
|
||||
self.assertNotIn("error", r4)
|
||||
self.assertNotIn("dedup", r4)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Dedup reset on compression
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -253,6 +253,15 @@ def _cap_read_tracker_data(task_data: dict) -> None:
|
||||
except (StopIteration, KeyError):
|
||||
break
|
||||
|
||||
dedup_hits = task_data.get("dedup_hits")
|
||||
if dedup_hits is not None and len(dedup_hits) > _DEDUP_CAP:
|
||||
excess = len(dedup_hits) - _DEDUP_CAP
|
||||
for _ in range(excess):
|
||||
try:
|
||||
dedup_hits.pop(next(iter(dedup_hits)))
|
||||
except (StopIteration, KeyError):
|
||||
break
|
||||
|
||||
ts = task_data.get("read_timestamps")
|
||||
if ts is not None and len(ts) > _READ_TIMESTAMPS_CAP:
|
||||
excess = len(ts) - _READ_TIMESTAMPS_CAP
|
||||
@@ -479,13 +488,43 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
||||
task_data = _read_tracker.setdefault(task_id, {
|
||||
"last_key": None, "consecutive": 0,
|
||||
"read_history": set(), "dedup": {},
|
||||
"dedup_hits": {},
|
||||
})
|
||||
# Backward-compat for pre-existing tracker entries that predate
|
||||
# dedup_hits (long-lived task or crossed an upgrade boundary).
|
||||
if "dedup_hits" not in task_data:
|
||||
task_data["dedup_hits"] = {}
|
||||
cached_mtime = task_data.get("dedup", {}).get(dedup_key)
|
||||
|
||||
if cached_mtime is not None:
|
||||
try:
|
||||
current_mtime = os.path.getmtime(resolved_str)
|
||||
if current_mtime == cached_mtime:
|
||||
# Count repeated stub returns so weak tool-followers that
|
||||
# ignore the "refer to earlier result" hint don't burn
|
||||
# their iteration budget in an infinite read loop. After
|
||||
# 2 stubs for the same key we escalate to a hard block
|
||||
# mirroring the count>=4 path on real reads.
|
||||
with _read_tracker_lock:
|
||||
hits = task_data["dedup_hits"].get(dedup_key, 0) + 1
|
||||
task_data["dedup_hits"][dedup_key] = hits
|
||||
_cap_read_tracker_data(task_data)
|
||||
|
||||
if hits >= 2:
|
||||
return json.dumps({
|
||||
"error": (
|
||||
f"BLOCKED: You have called read_file on this "
|
||||
f"exact region {hits + 1} times and the file "
|
||||
"has NOT changed. STOP calling read_file for "
|
||||
"this path — the content from your earlier "
|
||||
"read_file result in this conversation is "
|
||||
"still current. Proceed with your task using "
|
||||
"the information you already have."
|
||||
),
|
||||
"path": path,
|
||||
"already_read": hits + 1,
|
||||
}, ensure_ascii=False)
|
||||
|
||||
return json.dumps({
|
||||
"status": "unchanged",
|
||||
"message": _READ_DEDUP_STATUS_MESSAGE,
|
||||
@@ -544,9 +583,16 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
||||
# ── Track for consecutive-loop detection ──────────────────────
|
||||
read_key = ("read", path, offset, limit)
|
||||
with _read_tracker_lock:
|
||||
# Ensure "dedup" key exists (backward compat with old tracker state)
|
||||
# Ensure "dedup" / "dedup_hits" keys exist (backward compat with
|
||||
# old tracker state from pre-dedup-guard sessions).
|
||||
if "dedup" not in task_data:
|
||||
task_data["dedup"] = {}
|
||||
if "dedup_hits" not in task_data:
|
||||
task_data["dedup_hits"] = {}
|
||||
# Real read succeeded — this key is no longer in a stub-loop, so
|
||||
# reset its hit counter. (File either changed or stat failed
|
||||
# earlier and we fell through.)
|
||||
task_data["dedup_hits"].pop(dedup_key, None)
|
||||
task_data["read_history"].add((path, offset, limit))
|
||||
if task_data["last_key"] == read_key:
|
||||
task_data["consecutive"] += 1
|
||||
@@ -622,12 +668,17 @@ def reset_file_dedup(task_id: str = None):
|
||||
with _read_tracker_lock:
|
||||
if task_id:
|
||||
task_data = _read_tracker.get(task_id)
|
||||
if task_data and "dedup" in task_data:
|
||||
task_data["dedup"].clear()
|
||||
if task_data:
|
||||
if "dedup" in task_data:
|
||||
task_data["dedup"].clear()
|
||||
if "dedup_hits" in task_data:
|
||||
task_data["dedup_hits"].clear()
|
||||
else:
|
||||
for task_data in _read_tracker.values():
|
||||
if "dedup" in task_data:
|
||||
task_data["dedup"].clear()
|
||||
if "dedup_hits" in task_data:
|
||||
task_data["dedup_hits"].clear()
|
||||
|
||||
|
||||
def notify_other_tool_call(task_id: str = "default"):
|
||||
@@ -644,6 +695,10 @@ def notify_other_tool_call(task_id: str = "default"):
|
||||
if task_data:
|
||||
task_data["last_key"] = None
|
||||
task_data["consecutive"] = 0
|
||||
# An intervening non-read tool call breaks any stub-loop in
|
||||
# progress, so clear per-key dedup hit counters too.
|
||||
if "dedup_hits" in task_data:
|
||||
task_data["dedup_hits"].clear()
|
||||
|
||||
|
||||
def _invalidate_dedup_for_path(filepath: str, task_id: str) -> None:
|
||||
|
||||
Reference in New Issue
Block a user