fix(file-tools): escalate to BLOCKED on repeated read_file dedup stubs (#16382)

read_file's dedup path returned a lightweight stub on re-reads of an unchanged file, then returned early — so the consecutive-read loop guard (hard block at count>=4) at the bottom of read_file_tool never ran for stub-looped calls. Weaker tool-following models (local Qwen3.6 variants in the reported case) ignore the passive 'refer to earlier result' hint and hammer the same read_file call until iteration budget runs out. Track per-key stub returns in task_data['dedup_hits'] and, on the second stub for the same (path, offset, limit), return a hard BLOCKED error mirroring the wording the real-read path already uses. A real read, an intervening non-read tool call (notify_other_tool_call), or reset_file_dedup (on context compression) all clear the counter so the guard never stays engaged longer than the actual loop. Closes #15759
2026-04-28 06:51:16 +08:00 · 2026-04-27 00:17:26 -07:00
parent 3ff3dfb5ac
commit a32d07529c
2 changed files with 206 additions and 3 deletions
--- a/tests/tools/test_file_read_guards.py
+++ b/tests/tools/test_file_read_guards.py
@@ -24,6 +24,7 @@ from tools.file_tools import (
    _get_max_read_chars,
    _DEFAULT_MAX_READ_CHARS,
    _read_tracker,
+    notify_other_tool_call,
 )


@@ -294,6 +295,153 @@ class TestFileDedup(unittest.TestCase):
        self.assertNotEqual(r2.get("dedup"), True)


+# ---------------------------------------------------------------------------
+# Dedup stub-loop guard (issue #15759)
+# ---------------------------------------------------------------------------
+
+class TestDedupStubLoopGuard(unittest.TestCase):
+    """Repeated dedup stubs must escalate to a hard BLOCKED error so weak
+    tool-following models don't burn iteration budget in an infinite loop
+    of ``read_file → stub → read_file → stub → ...``"""
+
+    def setUp(self):
+        _read_tracker.clear()
+        self._tmpdir = tempfile.mkdtemp()
+        self._tmpfile = os.path.join(self._tmpdir, "loop_test.txt")
+        with open(self._tmpfile, "w") as f:
+            f.write("line one\nline two\n")
+
+    def tearDown(self):
+        _read_tracker.clear()
+        try:
+            os.unlink(self._tmpfile)
+            os.rmdir(self._tmpdir)
+        except OSError:
+            pass
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_third_read_is_blocked(self, mock_ops):
+        """read → stub → BLOCKED.  Second stub escalates to hard error."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        # 1. Real read — full content
+        r1 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertNotIn("dedup", r1)
+        self.assertNotIn("error", r1)
+
+        # 2. Dedup stub (first hit)
+        r2 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertTrue(r2.get("dedup"))
+        self.assertNotIn("error", r2)
+
+        # 3. Dedup stub (second hit) — escalates to BLOCKED
+        r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertIn("error", r3, "Second dedup stub should be BLOCKED")
+        self.assertIn("BLOCKED", r3["error"])
+        self.assertIn("STOP", r3["error"])
+        self.assertEqual(r3.get("already_read"), 3)
+        # The loop-breaker must NOT be a dedup stub, or the model sees the
+        # same passive message it has been ignoring.
+        self.assertNotIn("dedup", r3)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_subsequent_reads_stay_blocked(self, mock_ops):
+        """Once blocked, continued hammering keeps returning BLOCKED."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        read_file_tool(self._tmpfile, task_id="loop")  # read
+        read_file_tool(self._tmpfile, task_id="loop")  # stub
+        r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertIn("error", r3)
+        # 4th, 5th, ... calls must stay blocked, never revert to stub
+        for _ in range(5):
+            rN = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+            self.assertIn("error", rN)
+            self.assertIn("BLOCKED", rN["error"])
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_file_modification_clears_block(self, mock_ops):
+        """Real file change should break out of the block — new content
+        is legitimately different and the agent should see it."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        read_file_tool(self._tmpfile, task_id="loop")
+        read_file_tool(self._tmpfile, task_id="loop")
+        r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertIn("error", r3)
+
+        # File changes — mtime updates
+        time.sleep(0.05)
+        with open(self._tmpfile, "w") as f:
+            f.write("brand new content\n")
+
+        r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertNotIn("error", r4)
+        self.assertNotIn("dedup", r4)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_other_tool_call_clears_hits(self, mock_ops):
+        """An intervening non-read tool call resets stub-hit counters,
+        just like it resets the consecutive-read counter."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        read_file_tool(self._tmpfile, task_id="loop")
+        read_file_tool(self._tmpfile, task_id="loop")  # 1st stub
+
+        # Agent did something else — e.g. terminal, write_file — so the
+        # stub-loop is broken.  Counter should reset.
+        notify_other_tool_call("loop")
+
+        r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        # Should be a stub again, NOT blocked
+        self.assertTrue(r3.get("dedup"))
+        self.assertNotIn("error", r3)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_different_ranges_tracked_independently(self, mock_ops):
+        """Stub-hit counter is keyed by (path, offset, limit), so hammering
+        one range shouldn't block reads of a different range."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        # Burn down one range
+        read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop")
+        read_file_tool(self._tmpfile, offset=1, limit=100, task_id="loop")
+        r3 = json.loads(read_file_tool(
+            self._tmpfile, offset=1, limit=100, task_id="loop",
+        ))
+        self.assertIn("error", r3)
+
+        # Different range — fresh read, should go through
+        r_other = json.loads(read_file_tool(
+            self._tmpfile, offset=1, limit=200, task_id="loop",
+        ))
+        self.assertNotIn("error", r_other)
+
+    @patch("tools.file_tools._get_file_ops")
+    def test_reset_file_dedup_clears_hits(self, mock_ops):
+        """Post-compression reset must clear stub-hit counters too,
+        otherwise the agent stays blocked after compression."""
+        mock_ops.return_value = _make_fake_ops(
+            content="line one\nline two\n", file_size=20,
+        )
+        read_file_tool(self._tmpfile, task_id="loop")
+        read_file_tool(self._tmpfile, task_id="loop")
+        r3 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertIn("error", r3)
+
+        reset_file_dedup("loop")
+
+        # Fresh session — real read, no stub, no block
+        r4 = json.loads(read_file_tool(self._tmpfile, task_id="loop"))
+        self.assertNotIn("error", r4)
+        self.assertNotIn("dedup", r4)
+
+
 # ---------------------------------------------------------------------------
 # Dedup reset on compression
 # ---------------------------------------------------------------------------
--- a/tools/file_tools.py
+++ b/tools/file_tools.py
@@ -253,6 +253,15 @@ def _cap_read_tracker_data(task_data: dict) -> None:
            except (StopIteration, KeyError):
                break

+    dedup_hits = task_data.get("dedup_hits")
+    if dedup_hits is not None and len(dedup_hits) > _DEDUP_CAP:
+        excess = len(dedup_hits) - _DEDUP_CAP
+        for _ in range(excess):
+            try:
+                dedup_hits.pop(next(iter(dedup_hits)))
+            except (StopIteration, KeyError):
+                break
+
    ts = task_data.get("read_timestamps")
    if ts is not None and len(ts) > _READ_TIMESTAMPS_CAP:
        excess = len(ts) - _READ_TIMESTAMPS_CAP
@@ -479,13 +488,43 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
            task_data = _read_tracker.setdefault(task_id, {
                "last_key": None, "consecutive": 0,
                "read_history": set(), "dedup": {},
+                "dedup_hits": {},
            })
+            # Backward-compat for pre-existing tracker entries that predate
+            # dedup_hits (long-lived task or crossed an upgrade boundary).
+            if "dedup_hits" not in task_data:
+                task_data["dedup_hits"] = {}
            cached_mtime = task_data.get("dedup", {}).get(dedup_key)

        if cached_mtime is not None:
            try:
                current_mtime = os.path.getmtime(resolved_str)
                if current_mtime == cached_mtime:
+                    # Count repeated stub returns so weak tool-followers that
+                    # ignore the "refer to earlier result" hint don't burn
+                    # their iteration budget in an infinite read loop.  After
+                    # 2 stubs for the same key we escalate to a hard block
+                    # mirroring the count>=4 path on real reads.
+                    with _read_tracker_lock:
+                        hits = task_data["dedup_hits"].get(dedup_key, 0) + 1
+                        task_data["dedup_hits"][dedup_key] = hits
+                        _cap_read_tracker_data(task_data)
+
+                    if hits >= 2:
+                        return json.dumps({
+                            "error": (
+                                f"BLOCKED: You have called read_file on this "
+                                f"exact region {hits + 1} times and the file "
+                                "has NOT changed. STOP calling read_file for "
+                                "this path — the content from your earlier "
+                                "read_file result in this conversation is "
+                                "still current. Proceed with your task using "
+                                "the information you already have."
+                            ),
+                            "path": path,
+                            "already_read": hits + 1,
+                        }, ensure_ascii=False)
+
                    return json.dumps({
                        "status": "unchanged",
                        "message": _READ_DEDUP_STATUS_MESSAGE,
@@ -544,9 +583,16 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
        # ── Track for consecutive-loop detection ──────────────────────
        read_key = ("read", path, offset, limit)
        with _read_tracker_lock:
-            # Ensure "dedup" key exists (backward compat with old tracker state)
+            # Ensure "dedup" / "dedup_hits" keys exist (backward compat with
+            # old tracker state from pre-dedup-guard sessions).
            if "dedup" not in task_data:
                task_data["dedup"] = {}
+            if "dedup_hits" not in task_data:
+                task_data["dedup_hits"] = {}
+            # Real read succeeded — this key is no longer in a stub-loop, so
+            # reset its hit counter.  (File either changed or stat failed
+            # earlier and we fell through.)
+            task_data["dedup_hits"].pop(dedup_key, None)
            task_data["read_history"].add((path, offset, limit))
            if task_data["last_key"] == read_key:
                task_data["consecutive"] += 1
@@ -622,12 +668,17 @@ def reset_file_dedup(task_id: str = None):
    with _read_tracker_lock:
        if task_id:
            task_data = _read_tracker.get(task_id)
-            if task_data and "dedup" in task_data:
-                task_data["dedup"].clear()
+            if task_data:
+                if "dedup" in task_data:
+                    task_data["dedup"].clear()
+                if "dedup_hits" in task_data:
+                    task_data["dedup_hits"].clear()
        else:
            for task_data in _read_tracker.values():
                if "dedup" in task_data:
                    task_data["dedup"].clear()
+                if "dedup_hits" in task_data:
+                    task_data["dedup_hits"].clear()


 def notify_other_tool_call(task_id: str = "default"):
@@ -644,6 +695,10 @@ def notify_other_tool_call(task_id: str = "default"):
        if task_data:
            task_data["last_key"] = None
            task_data["consecutive"] = 0
+            # An intervening non-read tool call breaks any stub-loop in
+            # progress, so clear per-key dedup hit counters too.
+            if "dedup_hits" in task_data:
+                task_data["dedup_hits"].clear()


 def _invalidate_dedup_for_path(filepath: str, task_id: str) -> None: