From a32d07529cf554d3d849a49f9924b2574107ada4 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 27 Apr 2026 00:17:26 -0700 Subject: [PATCH] fix(file-tools): escalate to BLOCKED on repeated read_file dedup stubs (#16382) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit read_file's dedup path returned a lightweight stub on re-reads of an unchanged file, then returned early — so the consecutive-read loop guard (hard block at count>=4) at the bottom of read_file_tool never ran for stub-looped calls. Weaker tool-following models (local Qwen3.6 variants in the reported case) ignore the passive 'refer to earlier result' hint and hammer the same read_file call until iteration budget runs out. Track per-key stub returns in task_data['dedup_hits'] and, on the second stub for the same (path, offset, limit), return a hard BLOCKED error mirroring the wording the real-read path already uses. A real read, an intervening non-read tool call (notify_other_tool_call), or reset_file_dedup (on context compression) all clear the counter so the guard never stays engaged longer than the actual loop. Closes #15759 --- tests/tools/test_file_read_guards.py | 148 +++++++++++++++++++++++++++ tools/file_tools.py | 61 ++++++++++- 2 files changed, 206 insertions(+), 3 deletions(-) diff --git a/tests/tools/test_file_read_guards.py b/tests/tools/test_file_read_guards.py index b9548fbd05..ccb82daa73 100644 --- a/tests/tools/test_file_read_guards.py +++ b/tests/tools/test_file_read_guards.py @@ -24,6 +24,7 @@ from tools.file_tools import ( _get_max_read_chars, _DEFAULT_MAX_READ_CHARS, _read_tracker, + notify_other_tool_call, ) @@ -294,6 +295,153 @@ class TestFileDedup(unittest.TestCase): self.assertNotEqual(r2.get("dedup"), True) +# --------------------------------------------------------------------------- +# Dedup stub-loop guard (issue #15759) +# --------------------------------------------------------------------------- + +class TestDedupStubLoopGuard(unittest.TestCase): + """Repeated dedup stubs must escalate to a hard BLOCKED error so weak + tool-following models don't burn iteration budget in an infinite loop + of ``read_file → stub → read_file → stub → ...``""" + + def setUp(self): + _read_tracker.clear() + self._tmpdir = tempfile.mkdtemp() + self._tmpfile = os.path.join(self._tmpdir, "loop_test.txt") + with open(self._tmpfile, "w") as f: + f.write("line one\nline two\n") + + def tearDown(self): + _read_tracker.clear() + try: + os.unlink(self._tmpfile) + os.rmdir(self._tmpdir) + except OSError: + pass + + @patch("tools.file_tools._get_file_ops") + def test_third_read_is_blocked(self, mock_ops): + """read → stub → BLOCKED. Second stub escalates to hard error.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + # 1. Real read — full content + r1 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertNotIn("dedup", r1) + self.assertNotIn("error", r1) + + # 2. Dedup stub (first hit) + r2 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertTrue(r2.get("dedup")) + self.assertNotIn("error", r2) + + # 3. Dedup stub (second hit) — escalates to BLOCKED + r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertIn("error", r3, "Second dedup stub should be BLOCKED") + self.assertIn("BLOCKED", r3["error"]) + self.assertIn("STOP", r3["error"]) + self.assertEqual(r3.get("already_read"), 3) + # The loop-breaker must NOT be a dedup stub, or the model sees the + # same passive message it has been ignoring. + self.assertNotIn("dedup", r3) + + @patch("tools.file_tools._get_file_ops") + def test_subsequent_reads_stay_blocked(self, mock_ops): + """Once blocked, continued hammering keeps returning BLOCKED.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + read_file_tool(self._tmpfile, task_id="loop") # read + read_file_tool(self._tmpfile, task_id="loop") # stub + r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertIn("error", r3) + # 4th, 5th, ... calls must stay blocked, never revert to stub + for _ in range(5): + rN = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertIn("error", rN) + self.assertIn("BLOCKED", rN["error"]) + + @patch("tools.file_tools._get_file_ops") + def test_file_modification_clears_block(self, mock_ops): + """Real file change should break out of the block — new content + is legitimately different and the agent should see it.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + read_file_tool(self._tmpfile, task_id="loop") + read_file_tool(self._tmpfile, task_id="loop") + r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertIn("error", r3) + + # File changes — mtime updates + time.sleep(0.05) + with open(self._tmpfile, "w") as f: + f.write("brand new content\n") + + r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertNotIn("error", r4) + self.assertNotIn("dedup", r4) + + @patch("tools.file_tools._get_file_ops") + def test_other_tool_call_clears_hits(self, mock_ops): + """An intervening non-read tool call resets stub-hit counters, + just like it resets the consecutive-read counter.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + read_file_tool(self._tmpfile, task_id="loop") + read_file_tool(self._tmpfile, task_id="loop") # 1st stub + + # Agent did something else — e.g. terminal, write_file — so the + # stub-loop is broken. Counter should reset. + notify_other_tool_call("loop") + + r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + # Should be a stub again, NOT blocked + self.assertTrue(r3.get("dedup")) + self.assertNotIn("error", r3) + + @patch("tools.file_tools._get_file_ops") + def test_different_ranges_tracked_independently(self, mock_ops): + """Stub-hit counter is keyed by (path, offset, limit), so hammering + one range shouldn't block reads of a different range.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + # Burn down one range + read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop") + read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop") + r3 = json.loads(read_file_tool( + self._tmpfile, offset=1, limit=100, task_id="loop", + )) + self.assertIn("error", r3) + + # Different range — fresh read, should go through + r_other = json.loads(read_file_tool( + self._tmpfile, offset=1, limit=200, task_id="loop", + )) + self.assertNotIn("error", r_other) + + @patch("tools.file_tools._get_file_ops") + def test_reset_file_dedup_clears_hits(self, mock_ops): + """Post-compression reset must clear stub-hit counters too, + otherwise the agent stays blocked after compression.""" + mock_ops.return_value = _make_fake_ops( + content="line one\nline two\n", file_size=20, + ) + read_file_tool(self._tmpfile, task_id="loop") + read_file_tool(self._tmpfile, task_id="loop") + r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertIn("error", r3) + + reset_file_dedup("loop") + + # Fresh session — real read, no stub, no block + r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop")) + self.assertNotIn("error", r4) + self.assertNotIn("dedup", r4) + + # --------------------------------------------------------------------------- # Dedup reset on compression # --------------------------------------------------------------------------- diff --git a/tools/file_tools.py b/tools/file_tools.py index 21061eb8aa..38801362e9 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -253,6 +253,15 @@ def _cap_read_tracker_data(task_data: dict) -> None: except (StopIteration, KeyError): break + dedup_hits = task_data.get("dedup_hits") + if dedup_hits is not None and len(dedup_hits) > _DEDUP_CAP: + excess = len(dedup_hits) - _DEDUP_CAP + for _ in range(excess): + try: + dedup_hits.pop(next(iter(dedup_hits))) + except (StopIteration, KeyError): + break + ts = task_data.get("read_timestamps") if ts is not None and len(ts) > _READ_TIMESTAMPS_CAP: excess = len(ts) - _READ_TIMESTAMPS_CAP @@ -479,13 +488,43 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = task_data = _read_tracker.setdefault(task_id, { "last_key": None, "consecutive": 0, "read_history": set(), "dedup": {}, + "dedup_hits": {}, }) + # Backward-compat for pre-existing tracker entries that predate + # dedup_hits (long-lived task or crossed an upgrade boundary). + if "dedup_hits" not in task_data: + task_data["dedup_hits"] = {} cached_mtime = task_data.get("dedup", {}).get(dedup_key) if cached_mtime is not None: try: current_mtime = os.path.getmtime(resolved_str) if current_mtime == cached_mtime: + # Count repeated stub returns so weak tool-followers that + # ignore the "refer to earlier result" hint don't burn + # their iteration budget in an infinite read loop. After + # 2 stubs for the same key we escalate to a hard block + # mirroring the count>=4 path on real reads. + with _read_tracker_lock: + hits = task_data["dedup_hits"].get(dedup_key, 0) + 1 + task_data["dedup_hits"][dedup_key] = hits + _cap_read_tracker_data(task_data) + + if hits >= 2: + return json.dumps({ + "error": ( + f"BLOCKED: You have called read_file on this " + f"exact region {hits + 1} times and the file " + "has NOT changed. STOP calling read_file for " + "this path — the content from your earlier " + "read_file result in this conversation is " + "still current. Proceed with your task using " + "the information you already have." + ), + "path": path, + "already_read": hits + 1, + }, ensure_ascii=False) + return json.dumps({ "status": "unchanged", "message": _READ_DEDUP_STATUS_MESSAGE, @@ -544,9 +583,16 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = # ── Track for consecutive-loop detection ────────────────────── read_key = ("read", path, offset, limit) with _read_tracker_lock: - # Ensure "dedup" key exists (backward compat with old tracker state) + # Ensure "dedup" / "dedup_hits" keys exist (backward compat with + # old tracker state from pre-dedup-guard sessions). if "dedup" not in task_data: task_data["dedup"] = {} + if "dedup_hits" not in task_data: + task_data["dedup_hits"] = {} + # Real read succeeded — this key is no longer in a stub-loop, so + # reset its hit counter. (File either changed or stat failed + # earlier and we fell through.) + task_data["dedup_hits"].pop(dedup_key, None) task_data["read_history"].add((path, offset, limit)) if task_data["last_key"] == read_key: task_data["consecutive"] += 1 @@ -622,12 +668,17 @@ def reset_file_dedup(task_id: str = None): with _read_tracker_lock: if task_id: task_data = _read_tracker.get(task_id) - if task_data and "dedup" in task_data: - task_data["dedup"].clear() + if task_data: + if "dedup" in task_data: + task_data["dedup"].clear() + if "dedup_hits" in task_data: + task_data["dedup_hits"].clear() else: for task_data in _read_tracker.values(): if "dedup" in task_data: task_data["dedup"].clear() + if "dedup_hits" in task_data: + task_data["dedup_hits"].clear() def notify_other_tool_call(task_id: str = "default"): @@ -644,6 +695,10 @@ def notify_other_tool_call(task_id: str = "default"): if task_data: task_data["last_key"] = None task_data["consecutive"] = 0 + # An intervening non-read tool call breaks any stub-loop in + # progress, so clear per-key dedup hit counters too. + if "dedup_hits" in task_data: + task_data["dedup_hits"].clear() def _invalidate_dedup_for_path(filepath: str, task_id: str) -> None: