fix(file-tools): escalate to BLOCKED on repeated read_file dedup stubs (#16382)

read_file's dedup path returned a lightweight stub on re-reads of an
unchanged file, then returned early — so the consecutive-read loop
guard (hard block at count>=4) at the bottom of read_file_tool never
ran for stub-looped calls. Weaker tool-following models (local Qwen3.6
variants in the reported case) ignore the passive 'refer to earlier
result' hint and hammer the same read_file call until iteration budget
runs out.

Track per-key stub returns in task_data['dedup_hits'] and, on the
second stub for the same (path, offset, limit), return a hard BLOCKED
error mirroring the wording the real-read path already uses. A real
read, an intervening non-read tool call (notify_other_tool_call), or
reset_file_dedup (on context compression) all clear the counter so
the guard never stays engaged longer than the actual loop.

Closes #15759
This commit is contained in:
Teknium
2026-04-27 00:17:26 -07:00
committed by GitHub
parent 3ff3dfb5ac
commit a32d07529c
2 changed files with 206 additions and 3 deletions

View File

@@ -24,6 +24,7 @@ from tools.file_tools import (
_get_max_read_chars,
_DEFAULT_MAX_READ_CHARS,
_read_tracker,
notify_other_tool_call,
)
@@ -294,6 +295,153 @@ class TestFileDedup(unittest.TestCase):
self.assertNotEqual(r2.get("dedup"), True)
# ---------------------------------------------------------------------------
# Dedup stub-loop guard (issue #15759)
# ---------------------------------------------------------------------------
class TestDedupStubLoopGuard(unittest.TestCase):
"""Repeated dedup stubs must escalate to a hard BLOCKED error so weak
tool-following models don't burn iteration budget in an infinite loop
of ``read_file → stub → read_file → stub → ...``"""
def setUp(self):
_read_tracker.clear()
self._tmpdir = tempfile.mkdtemp()
self._tmpfile = os.path.join(self._tmpdir, "loop_test.txt")
with open(self._tmpfile, "w") as f:
f.write("line one\nline two\n")
def tearDown(self):
_read_tracker.clear()
try:
os.unlink(self._tmpfile)
os.rmdir(self._tmpdir)
except OSError:
pass
@patch("tools.file_tools._get_file_ops")
def test_third_read_is_blocked(self, mock_ops):
"""read → stub → BLOCKED. Second stub escalates to hard error."""
mock_ops.return_value = _make_fake_ops(
content="line one\nline two\n", file_size=20,
)
# 1. Real read — full content
r1 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertNotIn("dedup", r1)
self.assertNotIn("error", r1)
# 2. Dedup stub (first hit)
r2 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertTrue(r2.get("dedup"))
self.assertNotIn("error", r2)
# 3. Dedup stub (second hit) — escalates to BLOCKED
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertIn("error", r3, "Second dedup stub should be BLOCKED")
self.assertIn("BLOCKED", r3["error"])
self.assertIn("STOP", r3["error"])
self.assertEqual(r3.get("already_read"), 3)
# The loop-breaker must NOT be a dedup stub, or the model sees the
# same passive message it has been ignoring.
self.assertNotIn("dedup", r3)
@patch("tools.file_tools._get_file_ops")
def test_subsequent_reads_stay_blocked(self, mock_ops):
"""Once blocked, continued hammering keeps returning BLOCKED."""
mock_ops.return_value = _make_fake_ops(
content="line one\nline two\n", file_size=20,
)
read_file_tool(self._tmpfile, task_id="loop") # read
read_file_tool(self._tmpfile, task_id="loop") # stub
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertIn("error", r3)
# 4th, 5th, ... calls must stay blocked, never revert to stub
for _ in range(5):
rN = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertIn("error", rN)
self.assertIn("BLOCKED", rN["error"])
@patch("tools.file_tools._get_file_ops")
def test_file_modification_clears_block(self, mock_ops):
"""Real file change should break out of the block — new content
is legitimately different and the agent should see it."""
mock_ops.return_value = _make_fake_ops(
content="line one\nline two\n", file_size=20,
)
read_file_tool(self._tmpfile, task_id="loop")
read_file_tool(self._tmpfile, task_id="loop")
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertIn("error", r3)
# File changes — mtime updates
time.sleep(0.05)
with open(self._tmpfile, "w") as f:
f.write("brand new content\n")
r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertNotIn("error", r4)
self.assertNotIn("dedup", r4)
@patch("tools.file_tools._get_file_ops")
def test_other_tool_call_clears_hits(self, mock_ops):
"""An intervening non-read tool call resets stub-hit counters,
just like it resets the consecutive-read counter."""
mock_ops.return_value = _make_fake_ops(
content="line one\nline two\n", file_size=20,
)
read_file_tool(self._tmpfile, task_id="loop")
read_file_tool(self._tmpfile, task_id="loop") # 1st stub
# Agent did something else — e.g. terminal, write_file — so the
# stub-loop is broken. Counter should reset.
notify_other_tool_call("loop")
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
# Should be a stub again, NOT blocked
self.assertTrue(r3.get("dedup"))
self.assertNotIn("error", r3)
@patch("tools.file_tools._get_file_ops")
def test_different_ranges_tracked_independently(self, mock_ops):
"""Stub-hit counter is keyed by (path, offset, limit), so hammering
one range shouldn't block reads of a different range."""
mock_ops.return_value = _make_fake_ops(
content="line one\nline two\n", file_size=20,
)
# Burn down one range
read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop")
read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop")
r3 = json.loads(read_file_tool(
self._tmpfile, offset=1, limit=100, task_id="loop",
))
self.assertIn("error", r3)
# Different range — fresh read, should go through
r_other = json.loads(read_file_tool(
self._tmpfile, offset=1, limit=200, task_id="loop",
))
self.assertNotIn("error", r_other)
@patch("tools.file_tools._get_file_ops")
def test_reset_file_dedup_clears_hits(self, mock_ops):
"""Post-compression reset must clear stub-hit counters too,
otherwise the agent stays blocked after compression."""
mock_ops.return_value = _make_fake_ops(
content="line one\nline two\n", file_size=20,
)
read_file_tool(self._tmpfile, task_id="loop")
read_file_tool(self._tmpfile, task_id="loop")
r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertIn("error", r3)
reset_file_dedup("loop")
# Fresh session — real read, no stub, no block
r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
self.assertNotIn("error", r4)
self.assertNotIn("dedup", r4)
# ---------------------------------------------------------------------------
# Dedup reset on compression
# ---------------------------------------------------------------------------

View File

@@ -253,6 +253,15 @@ def _cap_read_tracker_data(task_data: dict) -> None:
except (StopIteration, KeyError):
break
dedup_hits = task_data.get("dedup_hits")
if dedup_hits is not None and len(dedup_hits) > _DEDUP_CAP:
excess = len(dedup_hits) - _DEDUP_CAP
for _ in range(excess):
try:
dedup_hits.pop(next(iter(dedup_hits)))
except (StopIteration, KeyError):
break
ts = task_data.get("read_timestamps")
if ts is not None and len(ts) > _READ_TIMESTAMPS_CAP:
excess = len(ts) - _READ_TIMESTAMPS_CAP
@@ -479,13 +488,43 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
task_data = _read_tracker.setdefault(task_id, {
"last_key": None, "consecutive": 0,
"read_history": set(), "dedup": {},
"dedup_hits": {},
})
# Backward-compat for pre-existing tracker entries that predate
# dedup_hits (long-lived task or crossed an upgrade boundary).
if "dedup_hits" not in task_data:
task_data["dedup_hits"] = {}
cached_mtime = task_data.get("dedup", {}).get(dedup_key)
if cached_mtime is not None:
try:
current_mtime = os.path.getmtime(resolved_str)
if current_mtime == cached_mtime:
# Count repeated stub returns so weak tool-followers that
# ignore the "refer to earlier result" hint don't burn
# their iteration budget in an infinite read loop. After
# 2 stubs for the same key we escalate to a hard block
# mirroring the count>=4 path on real reads.
with _read_tracker_lock:
hits = task_data["dedup_hits"].get(dedup_key, 0) + 1
task_data["dedup_hits"][dedup_key] = hits
_cap_read_tracker_data(task_data)
if hits >= 2:
return json.dumps({
"error": (
f"BLOCKED: You have called read_file on this "
f"exact region {hits + 1} times and the file "
"has NOT changed. STOP calling read_file for "
"this path — the content from your earlier "
"read_file result in this conversation is "
"still current. Proceed with your task using "
"the information you already have."
),
"path": path,
"already_read": hits + 1,
}, ensure_ascii=False)
return json.dumps({
"status": "unchanged",
"message": _READ_DEDUP_STATUS_MESSAGE,
@@ -544,9 +583,16 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
# ── Track for consecutive-loop detection ──────────────────────
read_key = ("read", path, offset, limit)
with _read_tracker_lock:
# Ensure "dedup" key exists (backward compat with old tracker state)
# Ensure "dedup" / "dedup_hits" keys exist (backward compat with
# old tracker state from pre-dedup-guard sessions).
if "dedup" not in task_data:
task_data["dedup"] = {}
if "dedup_hits" not in task_data:
task_data["dedup_hits"] = {}
# Real read succeeded — this key is no longer in a stub-loop, so
# reset its hit counter. (File either changed or stat failed
# earlier and we fell through.)
task_data["dedup_hits"].pop(dedup_key, None)
task_data["read_history"].add((path, offset, limit))
if task_data["last_key"] == read_key:
task_data["consecutive"] += 1
@@ -622,12 +668,17 @@ def reset_file_dedup(task_id: str = None):
with _read_tracker_lock:
if task_id:
task_data = _read_tracker.get(task_id)
if task_data and "dedup" in task_data:
task_data["dedup"].clear()
if task_data:
if "dedup" in task_data:
task_data["dedup"].clear()
if "dedup_hits" in task_data:
task_data["dedup_hits"].clear()
else:
for task_data in _read_tracker.values():
if "dedup" in task_data:
task_data["dedup"].clear()
if "dedup_hits" in task_data:
task_data["dedup_hits"].clear()
def notify_other_tool_call(task_id: str = "default"):
@@ -644,6 +695,10 @@ def notify_other_tool_call(task_id: str = "default"):
if task_data:
task_data["last_key"] = None
task_data["consecutive"] = 0
# An intervening non-read tool call breaks any stub-loop in
# progress, so clear per-key dedup hit counters too.
if "dedup_hits" in task_data:
task_data["dedup_hits"].clear()
def _invalidate_dedup_for_path(filepath: str, task_id: str) -> None: