From 3696c74bfbd8ba1761fb6a5f192003a50e8b5623 Mon Sep 17 00:00:00 2001 From: Teknium Date: Wed, 8 Apr 2026 01:45:51 -0700 Subject: [PATCH] fix: preserve existing thresholds, remove pre-read byte guard - DEFAULT_RESULT_SIZE_CHARS: 50K -> 100K (match current _LARGE_RESULT_CHARS) - DEFAULT_PREVIEW_SIZE_CHARS: 2K -> 1.5K (match current _LARGE_RESULT_PREVIEW_CHARS) - Per-tool overrides all set to 100K (terminal, execute_code, search_files) - Remove pre-read byte guard (no behavioral regression vs current main) - Revert limit signature change to int=500 (match current default) - Restore original read_file schema description - Update test assertions to match 100K thresholds --- tests/tools/test_tool_result_storage.py | 6 ++--- tools/budget_config.py | 4 ++-- tools/code_execution_tool.py | 2 +- tools/file_tools.py | 32 ++++--------------------- tools/terminal_tool.py | 2 +- 5 files changed, 11 insertions(+), 35 deletions(-) diff --git a/tests/tools/test_tool_result_storage.py b/tests/tools/test_tool_result_storage.py index 96b904a576..4e51fe7bb7 100644 --- a/tests/tools/test_tool_result_storage.py +++ b/tests/tools/test_tool_result_storage.py @@ -395,7 +395,7 @@ class TestEnforceTurnBudget: assert PERSISTED_OUTPUT_TAG in msgs[1]["content"] def test_medium_result_regression(self): - """6 results of 42K chars each (252K total) — each under 50K default + """6 results of 42K chars each (252K total) — each under 100K default threshold but aggregate exceeds 200K budget. L3 should persist.""" env = MagicMock() env.execute.return_value = {"output": "", "returncode": 0} @@ -449,7 +449,7 @@ class TestPerToolThresholds: try: import tools.terminal_tool # noqa: F401 val = registry.get_max_result_size("terminal") - assert val == 30_000 + assert val == 100_000 except ImportError: pytest.skip("terminal_tool not importable in test env") @@ -467,6 +467,6 @@ class TestPerToolThresholds: try: import tools.file_tools # noqa: F401 val = registry.get_max_result_size("search_files") - assert val == 20_000 + assert val == 100_000 except ImportError: pytest.skip("file_tools not importable in test env") diff --git a/tools/budget_config.py b/tools/budget_config.py index 52204cdf8e..577e59442e 100644 --- a/tools/budget_config.py +++ b/tools/budget_config.py @@ -15,9 +15,9 @@ PINNED_THRESHOLDS: Dict[str, float] = { # Defaults matching the current hardcoded values in tool_result_storage.py. # Kept here as the single source of truth; tool_result_storage.py imports these. -DEFAULT_RESULT_SIZE_CHARS: int = 50_000 +DEFAULT_RESULT_SIZE_CHARS: int = 100_000 DEFAULT_TURN_BUDGET_CHARS: int = 200_000 -DEFAULT_PREVIEW_SIZE_CHARS: int = 2_000 +DEFAULT_PREVIEW_SIZE_CHARS: int = 1_500 @dataclass(frozen=True) diff --git a/tools/code_execution_tool.py b/tools/code_execution_tool.py index f48c4b99ee..aa4cd0863f 100644 --- a/tools/code_execution_tool.py +++ b/tools/code_execution_tool.py @@ -1343,5 +1343,5 @@ registry.register( enabled_tools=kw.get("enabled_tools")), check_fn=check_sandbox_requirements, emoji="🐍", - max_result_size_chars=30_000, + max_result_size_chars=100_000, ) diff --git a/tools/file_tools.py b/tools/file_tools.py index 4ca10b2dcf..186a9d052c 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -26,8 +26,6 @@ _EXPECTED_WRITE_ERRNOS = {errno.EACCES, errno.EPERM, errno.EROFS} # Configurable via config.yaml: file_read_max_chars: 200000 # --------------------------------------------------------------------------- _DEFAULT_MAX_READ_CHARS = 100_000 -_PRE_READ_MAX_BYTES = 256_000 # reject full-file reads on files larger than this -_DEFAULT_READ_LIMIT = 500 _max_read_chars_cached: int | None = None @@ -279,7 +277,7 @@ def clear_file_ops_cache(task_id: str = None): _file_ops_cache.clear() -def read_file_tool(path: str, offset: int = 1, limit: int | None = None, task_id: str = "default") -> str: +def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = "default") -> str: """Read a file with pagination and line numbers.""" try: # ── Device path guard ───────────────────────────────────────── @@ -327,28 +325,6 @@ def read_file_tool(path: str, offset: int = 1, limit: int | None = None, task_id except ValueError: pass - # ── Pre-read file size guard ────────────────────────────────── - # Guard only when the caller omits limit; an explicit limit means - # the caller knows what slice it wants. - if limit is None: - try: - _fsize = os.path.getsize(str(_resolved)) - except OSError: - _fsize = 0 - if _fsize > _PRE_READ_MAX_BYTES: - return json.dumps({ - "error": ( - f"File is too large to read in full ({_fsize:,} bytes). " - f"Use offset and limit parameters to read specific sections " - f"(e.g. offset=1, limit=100 for the first 100 lines)." - ), - "path": path, - "file_size": _fsize, - }, ensure_ascii=False) - - if limit is None: - limit = _DEFAULT_READ_LIMIT - # ── Dedup check ─────────────────────────────────────────────── # If we already read this exact (path, offset, limit) and the # file hasn't been modified since, return a lightweight stub @@ -762,7 +738,7 @@ def _check_file_reqs(): READ_FILE_SCHEMA = { "name": "read_file", - "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. When you already know which part of the file you need, only read that part using offset and limit — this is important for larger files. Files over 256KB will be rejected unless you provide a limit parameter. NOTE: Cannot read images or binary files — use vision_analyze for images.", + "description": "Read a text file with line numbers and pagination. Use this instead of cat/head/tail in terminal. Output format: 'LINE_NUM|CONTENT'. Suggests similar filenames if not found. Use offset and limit for large files. Reads exceeding ~100K characters are rejected; use offset and limit to read specific sections of large files. NOTE: Cannot read images or binary files — use vision_analyze for images.", "parameters": { "type": "object", "properties": { @@ -826,7 +802,7 @@ SEARCH_FILES_SCHEMA = { def _handle_read_file(args, **kw): tid = kw.get("task_id") or "default" - return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit"), task_id=tid) + return read_file_tool(path=args.get("path", ""), offset=args.get("offset", 1), limit=args.get("limit", 500), task_id=tid) def _handle_write_file(args, **kw): @@ -856,4 +832,4 @@ def _handle_search_files(args, **kw): registry.register(name="read_file", toolset="file", schema=READ_FILE_SCHEMA, handler=_handle_read_file, check_fn=_check_file_reqs, emoji="📖", max_result_size_chars=float('inf')) registry.register(name="write_file", toolset="file", schema=WRITE_FILE_SCHEMA, handler=_handle_write_file, check_fn=_check_file_reqs, emoji="✍️", max_result_size_chars=100_000) registry.register(name="patch", toolset="file", schema=PATCH_SCHEMA, handler=_handle_patch, check_fn=_check_file_reqs, emoji="🔧", max_result_size_chars=100_000) -registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=20_000) +registry.register(name="search_files", toolset="file", schema=SEARCH_FILES_SCHEMA, handler=_handle_search_files, check_fn=_check_file_reqs, emoji="🔎", max_result_size_chars=100_000) diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index ff9e064b83..6206c4aa69 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1623,5 +1623,5 @@ registry.register( handler=_handle_terminal, check_fn=check_terminal_requirements, emoji="💻", - max_result_size_chars=30_000, + max_result_size_chars=100_000, )