Add file manipulation tools and enhance setup scripts

- Introduced file manipulation capabilities in `model_tools.py`, including functions for reading, writing, patching, and searching files. - Added a new `file` toolset in `toolsets.py` and updated distributions to include file tools. - Enhanced `setup-hermes.sh` and `install.sh` scripts to check for and optionally install `ripgrep` for faster file searching. - Implemented a new `file_operations.py` module to encapsulate file operations using shell commands. - Updated `doctor.py` and `install.ps1` to check for `ripgrep` and provide installation guidance if not found. - Added fuzzy matching and patch parsing capabilities to improve file manipulation accuracy and flexibility.
2026-04-28 06:51:16 +08:00 · 2026-02-05 03:49:46 -08:00
parent 5c3105b437
commit 533c064269
12 changed files with 2549 additions and 13 deletions
--- a/model_tools.py
+++ b/model_tools.py
@@ -33,6 +33,9 @@ from typing import Dict, Any, List, Optional, Tuple

 from tools.web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_firecrawl_api_key
 from tools.terminal_tool import terminal_tool, check_terminal_requirements, TERMINAL_TOOL_DESCRIPTION, cleanup_vm
+# File manipulation tools (read, write, patch, search)
+from tools.file_tools import read_file_tool, write_file_tool, patch_tool, search_tool
+from tools import check_file_requirements
 # Hecate/MorphCloud terminal tool (cloud VMs) - available as alternative backend
 from tools.terminal_hecate import terminal_hecate_tool, check_hecate_requirements, TERMINAL_HECATE_DESCRIPTION
 from tools.vision_tools import vision_analyze_tool, check_vision_requirements
@@ -155,6 +158,13 @@ TOOLSET_REQUIREMENTS = {
            "rl_list_runs", "rl_test_inference",
        ],
    },
+    "file": {
+        "name": "File Operations (read, write, patch, search)",
+        "env_vars": [],  # Uses terminal backend, no additional requirements
+        "check_fn": check_file_requirements,
+        "setup_url": None,
+        "tools": ["read_file", "write_file", "patch", "search"],
+    },
 }


@@ -675,6 +685,163 @@ def get_rl_tool_definitions() -> List[Dict[str, Any]]:
    ]


+def get_file_tool_definitions() -> List[Dict[str, Any]]:
+    """
+    Get tool definitions for file manipulation tools in OpenAI's expected format.
+    
+    File tools operate via the terminal backend and support any environment
+    (local, docker, singularity, ssh, modal).
+    
+    Returns:
+        List[Dict]: List of file tool definitions compatible with OpenAI API
+    """
+    return [
+        {
+            "type": "function",
+            "function": {
+                "name": "read_file",
+                "description": "Read a file with pagination support. Returns content with line numbers in 'LINE_NUM|CONTENT' format. For binary files (images), returns base64-encoded data. If file not found, suggests similar filenames.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "path": {
+                            "type": "string",
+                            "description": "Path to the file to read (absolute or relative)"
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Line number to start reading from (1-indexed, default: 1)",
+                            "default": 1,
+                            "minimum": 1
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Maximum number of lines to read (default: 500, max: 2000)",
+                            "default": 500,
+                            "maximum": 2000
+                        }
+                    },
+                    "required": ["path"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "write_file",
+                "description": "Write content to a file. Creates parent directories automatically. Returns bytes written and lint check results for supported languages.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "path": {
+                            "type": "string",
+                            "description": "Path to the file to write (will be created if doesn't exist)"
+                        },
+                        "content": {
+                            "type": "string",
+                            "description": "Content to write to the file"
+                        }
+                    },
+                    "required": ["path", "content"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "patch",
+                "description": "Modify files using either simple string replacement or V4A patch format. Mode 'replace' does find-and-replace with fuzzy matching. Mode 'patch' applies multi-file changes using V4A format (*** Begin/End Patch). Auto-runs syntax checks on modified files.",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "mode": {
+                            "type": "string",
+                            "enum": ["replace", "patch"],
+                            "description": "Edit mode: 'replace' for string replacement, 'patch' for V4A patch format",
+                            "default": "replace"
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "File path (required for 'replace' mode)"
+                        },
+                        "old_string": {
+                            "type": "string",
+                            "description": "Text to find and replace (required for 'replace' mode). Must be unique in file unless replace_all=true"
+                        },
+                        "new_string": {
+                            "type": "string",
+                            "description": "Replacement text (required for 'replace' mode)"
+                        },
+                        "replace_all": {
+                            "type": "boolean",
+                            "description": "Replace all occurrences instead of requiring unique match (default: false)",
+                            "default": False
+                        },
+                        "patch": {
+                            "type": "string",
+                            "description": "V4A format patch content (required for 'patch' mode). Format: *** Begin Patch / *** Update File: path / @@ context @@ / -removed / +added / *** End Patch"
+                        }
+                    },
+                    "required": ["mode"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "search",
+                "description": "Search for content in files or search for files by name. Use target='content' to search inside files (like grep), or target='files' to find files by name pattern (like glob/find). Results sorted by modification time (newest first).",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "pattern": {
+                            "type": "string",
+                            "description": "For target='content': regex pattern to search for. For target='files': glob pattern (e.g., '*.py', '*config*')"
+                        },
+                        "target": {
+                            "type": "string",
+                            "enum": ["content", "files"],
+                            "description": "Search mode: 'content' searches inside files, 'files' searches for files by name",
+                            "default": "content"
+                        },
+                        "path": {
+                            "type": "string",
+                            "description": "Directory or file to search in (default: current directory)",
+                            "default": "."
+                        },
+                        "file_glob": {
+                            "type": "string",
+                            "description": "Filter files by pattern when target='content' (e.g., '*.py' to only search Python files)"
+                        },
+                        "limit": {
+                            "type": "integer",
+                            "description": "Maximum number of results (default: 50)",
+                            "default": 50
+                        },
+                        "offset": {
+                            "type": "integer",
+                            "description": "Skip first N results for pagination (default: 0)",
+                            "default": 0
+                        },
+                        "output_mode": {
+                            "type": "string",
+                            "enum": ["content", "files_only", "count"],
+                            "description": "For target='content': 'content' shows matches, 'files_only' shows file paths, 'count' shows match counts per file",
+                            "default": "content"
+                        },
+                        "context": {
+                            "type": "integer",
+                            "description": "Lines of context around matches (only for target='content', output_mode='content')",
+                            "default": 0
+                        }
+                    },
+                    "required": ["pattern"]
+                }
+            }
+        }
+    ]
+
+
 def get_all_tool_names() -> List[str]:
    """
    Get the names of all available tools across all toolsets.
@@ -733,6 +900,12 @@ def get_all_tool_names() -> List[str]:
            "rl_list_runs", "rl_test_inference"
        ])
    
+    # File manipulation tools (use terminal backend)
+    if check_file_requirements():
+        tool_names.extend([
+            "read_file", "write_file", "patch", "search"
+        ])
+    
    return tool_names


@@ -782,6 +955,11 @@ def get_toolset_for_tool(tool_name: str) -> str:
        "rl_stop_training": "rl_tools",
        "rl_get_results": "rl_tools",
        "rl_list_runs": "rl_tools",
+        # File manipulation tools
+        "read_file": "file_tools",
+        "write_file": "file_tools",
+        "patch": "file_tools",
+        "search": "file_tools",
    }
    
    return toolset_mapping.get(tool_name, "unknown")
@@ -864,6 +1042,11 @@ def get_tool_definitions(
        for tool in get_rl_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
    
+    # File manipulation tools (use terminal backend)
+    if check_file_requirements():
+        for tool in get_file_tool_definitions():
+            all_available_tools_map[tool["function"]["name"]] = tool
+    
    # Determine which tools to include based on toolsets
    tools_to_include = set()
    
@@ -899,7 +1082,8 @@ def get_tool_definitions(
                            "rl_start_training", "rl_check_status",
                            "rl_stop_training", "rl_get_results",
                            "rl_list_runs", "rl_test_inference"
-                        ]
+                        ],
+                        "file_tools": ["read_file", "write_file", "patch", "search"]
                    }
                    legacy_tools = legacy_map.get(toolset_name, [])
                    tools_to_include.update(legacy_tools)
@@ -951,7 +1135,8 @@ def get_tool_definitions(
                            "rl_start_training", "rl_check_status",
                            "rl_stop_training", "rl_get_results",
                            "rl_list_runs", "rl_test_inference"
-                        ]
+                        ],
+                        "file_tools": ["read_file", "write_file", "patch", "search"]
                    }
                    legacy_tools = legacy_map.get(toolset_name, [])
                    tools_to_include.difference_update(legacy_tools)
@@ -1338,6 +1523,70 @@ def handle_rl_function_call(
    return json.dumps({"error": f"Unknown RL function: {function_name}"}, ensure_ascii=False)


+def handle_file_function_call(
+    function_name: str,
+    function_args: Dict[str, Any],
+    task_id: Optional[str] = None
+) -> str:
+    """
+    Handle function calls for file manipulation tools.
+    
+    These tools use the terminal backend for all operations, supporting
+    local, docker, singularity, ssh, and modal environments.
+    
+    Args:
+        function_name (str): Name of the file function to call
+        function_args (Dict): Arguments for the function
+        task_id (str): Task identifier for environment isolation
+    
+    Returns:
+        str: Function result as JSON string
+    """
+    # Determine task_id to use
+    tid = task_id or "default"
+    
+    if function_name == "read_file":
+        return read_file_tool(
+            path=function_args.get("path", ""),
+            offset=function_args.get("offset", 1),
+            limit=function_args.get("limit", 500),
+            task_id=tid
+        )
+    
+    elif function_name == "write_file":
+        return write_file_tool(
+            path=function_args.get("path", ""),
+            content=function_args.get("content", ""),
+            task_id=tid
+        )
+    
+    elif function_name == "patch":
+        return patch_tool(
+            mode=function_args.get("mode", "replace"),
+            path=function_args.get("path"),
+            old_string=function_args.get("old_string"),
+            new_string=function_args.get("new_string"),
+            replace_all=function_args.get("replace_all", False),
+            patch=function_args.get("patch"),
+            task_id=tid
+        )
+    
+    elif function_name == "search":
+        return search_tool(
+            pattern=function_args.get("pattern", ""),
+            target=function_args.get("target", "content"),
+            path=function_args.get("path", "."),
+            file_glob=function_args.get("file_glob"),
+            limit=function_args.get("limit", 50),
+            offset=function_args.get("offset", 0),
+            output_mode=function_args.get("output_mode", "content"),
+            context=function_args.get("context", 0),
+            task_id=tid
+        )
+    
+    return json.dumps({"error": f"Unknown file function: {function_name}"}, ensure_ascii=False)
+
+
 def handle_function_call(
    function_name: str, 
    function_args: Dict[str, Any], 
@@ -1411,6 +1660,10 @@ def handle_function_call(
        ]:
            return handle_rl_function_call(function_name, function_args)

+        # Route file manipulation tools
+        elif function_name in ["read_file", "write_file", "patch", "search"]:
+            return handle_file_function_call(function_name, function_args, task_id)
+
        else:
            error_msg = f"Unknown function: {function_name}"
            print(f"❌ {error_msg}")
@@ -1482,6 +1735,12 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
            "tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
            "description": "Schedule and manage automated tasks (cronjobs) - only available in interactive CLI mode",
            "requirements": ["HERMES_INTERACTIVE=1 (set automatically by cli.py)"]
+        },
+        "file_tools": {
+            "available": check_file_requirements(),
+            "tools": ["read_file", "write_file", "patch", "search"],
+            "description": "File manipulation tools: read/write files, search content/files, patch with fuzzy matching",
+            "requirements": ["Terminal backend available (local/docker/ssh/singularity/modal)"]
        }
    }
    
@@ -1502,7 +1761,8 @@ def check_toolset_requirements() -> Dict[str, bool]:
        "image_tools": check_image_generation_requirements(),
        "skills_tools": check_skills_requirements(),
        "browser_tools": check_browser_requirements(),
-        "cronjob_tools": check_cronjob_requirements()
+        "cronjob_tools": check_cronjob_requirements(),
+        "file_tools": check_file_requirements()
    }

 if __name__ == "__main__":