don't log exit code !=0 as terminal failure

rate limits
add simple terminal
2026-05-06 10:47:12 +08:00 · 2025-11-17 18:39:16 -05:00 · 2025-11-17 18:35:36 -05:00 · 2025-11-17 01:14:31 -05:00 · 2025-11-16 01:03:23 +00:00 · 2025-11-15 14:03:24 -08:00
8 changed files with 492 additions and 80 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -20,4 +20,13 @@ logs/
 data/
 .pytest_cache/
 tmp/
-temp_vision_images/
+temp_vision_images/
 hermes-*/*
 examples/
 tests/quick_test_dataset.jsonl
 tests/sample_dataset.jsonl
 run_datagen_kimik2-thinking.sh
 run_datagen_megascience_glm4-6.sh
 run_datagen_sonnet.sh
 source-data/*
 run_datagen_megascience_glm4-6.sh
--- a/batch_runner.py
+++ b/batch_runner.py
@@ -98,10 +98,9 @@ def _extract_tool_stats(messages: List[Dict[str, Any]]) -> Dict[str, Dict[str, i
                    # Terminal wraps its response in a "content" field
                    if "content" in content_json and isinstance(content_json["content"], dict):
                        inner_content = content_json["content"]
-                        # Check for actual error (non-null error field or non-zero exit code)
+                        # Check for actual error (non-null error field)
-                        has_error = (inner_content.get("error") is not None or 
+                        # Note: non-zero exit codes are not failures - the model can self-correct
-                                   inner_content.get("exit_code", 0) != 0)
+                        if inner_content.get("error") is not None:
                        if has_error:
                            is_success = False
                    # Check for "success": false pattern used by some tools
@@ -164,7 +163,8 @@ def _process_single_prompt(
            enabled_toolsets=selected_toolsets,
            save_trajectories=False,  # We handle saving ourselves
            verbose_logging=config.get("verbose", False),
-            ephemeral_system_prompt=config.get("ephemeral_system_prompt")
+            ephemeral_system_prompt=config.get("ephemeral_system_prompt"),
            log_prefix_chars=config.get("log_prefix_chars", 100)
        )
        # Run the agent with task_id to ensure each task gets its own isolated VM
@@ -323,11 +323,12 @@ class BatchRunner:
        model: str = "claude-opus-4-20250514",
        num_workers: int = 4,
        verbose: bool = False,
-        ephemeral_system_prompt: str = None
+        ephemeral_system_prompt: str = None,
        log_prefix_chars: int = 100,
    ):
        """
        Initialize the batch runner.
-        
+
        Args:
            dataset_file (str): Path to the dataset JSONL file with 'prompt' field
            batch_size (int): Number of prompts per batch
@@ -340,6 +341,7 @@ class BatchRunner:
            num_workers (int): Number of parallel workers
            verbose (bool): Enable verbose logging
            ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
            log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
        """
        self.dataset_file = Path(dataset_file)
        self.batch_size = batch_size
@@ -352,6 +354,7 @@ class BatchRunner:
        self.num_workers = num_workers
        self.verbose = verbose
        self.ephemeral_system_prompt = ephemeral_system_prompt
        self.log_prefix_chars = log_prefix_chars
        # Validate distribution
        if not validate_distribution(distribution):
@@ -507,7 +510,8 @@ class BatchRunner:
            "base_url": self.base_url,
            "api_key": self.api_key,
            "verbose": self.verbose,
-            "ephemeral_system_prompt": self.ephemeral_system_prompt
+            "ephemeral_system_prompt": self.ephemeral_system_prompt,
            "log_prefix_chars": self.log_prefix_chars
        }
        # Get completed prompts set
@@ -650,11 +654,12 @@ def main(
    resume: bool = False,
    verbose: bool = False,
    list_distributions: bool = False,
-    ephemeral_system_prompt: str = None
+    ephemeral_system_prompt: str = None,
    log_prefix_chars: int = 100,
 ):
    """
    Run batch processing of agent prompts from a dataset.
-    
+
    Args:
        dataset_file (str): Path to JSONL file with 'prompt' field in each entry
        batch_size (int): Number of prompts per batch
@@ -669,6 +674,7 @@ def main(
        verbose (bool): Enable verbose logging (default: False)
        list_distributions (bool): List available toolset distributions and exit
        ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
    Examples:
        # Basic usage
@@ -729,9 +735,10 @@ def main(
            model=model,
            num_workers=num_workers,
            verbose=verbose,
-            ephemeral_system_prompt=ephemeral_system_prompt
+            ephemeral_system_prompt=ephemeral_system_prompt,
            log_prefix_chars=log_prefix_chars
        )
-        
+
        runner.run(resume=resume)
    except Exception as e:
--- a/model_tools.py
+++ b/model_tools.py
@@ -31,7 +31,9 @@ import asyncio
 from typing import Dict, Any, List, Optional
 from tools.web_tools import web_search_tool, web_extract_tool, web_crawl_tool, check_firecrawl_api_key
-from tools.terminal_tool import terminal_tool, check_hecate_requirements, TERMINAL_TOOL_DESCRIPTION
+from tools.simple_terminal_tool import simple_terminal_tool, check_requirements as check_simple_terminal_requirements, SIMPLE_TERMINAL_TOOL_DESCRIPTION
 # Keep old terminal tool for backwards compatibility if needed
 # from tools.terminal_tool import terminal_tool, check_hecate_requirements, TERMINAL_TOOL_DESCRIPTION
 from tools.vision_tools import vision_analyze_tool, check_vision_requirements
 from tools.mixture_of_agents_tool import mixture_of_agents_tool, check_moa_requirements
 from tools.image_generation_tool import image_generate_tool, check_image_generation_requirements
@@ -111,7 +113,7 @@ def get_web_tool_definitions() -> List[Dict[str, Any]]:
 def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
    """
    Get tool definitions for terminal tools in OpenAI's expected format.
-    
+
    Returns:
        List[Dict]: List of terminal tool definitions compatible with OpenAI API
    """
@@ -120,7 +122,7 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
            "type": "function",
            "function": {
                "name": "terminal",
-                "description": TERMINAL_TOOL_DESCRIPTION,
+                "description": SIMPLE_TERMINAL_TOOL_DESCRIPTION,
                "parameters": {
                    "type": "object",
                    "properties": {
@@ -128,28 +130,18 @@ def get_terminal_tool_definitions() -> List[Dict[str, Any]]:
                            "type": "string",
                            "description": "The command to execute on the VM"
                        },
                        "input_keys": {
                            "type": "string",
                            "description": "Keystrokes to send to the most recent interactive session (e.g., 'hello\\n' for typing hello + Enter). If no active session exists, this will be ignored."
                        },
                        "background": {
                            "type": "boolean",
                            "description": "Whether to run the command in the background (default: false)",
                            "default": False
                        },
                        "idle_threshold": {
                            "type": "number",
                            "description": "Seconds to wait for output before considering session idle (default: 5.0)",
                            "default": 5.0,
                            "minimum": 0.1
                        },
                        "timeout": {
                            "type": "integer",
                            "description": "Command timeout in seconds (optional)",
                            "minimum": 1
                        }
                    },
-                    "required": []
+                    "required": ["command"]
                }
            }
        }
@@ -262,11 +254,11 @@ def get_all_tool_names() -> List[str]:
    # Web tools
    if check_firecrawl_api_key():
        tool_names.extend(["web_search", "web_extract", "web_crawl"])
-    
+
-    # Terminal tools  
+    # Terminal tools
-    if check_hecate_requirements():
+    if check_simple_terminal_requirements():
        tool_names.extend(["terminal"])
-    
+
    # Vision tools
    if check_vision_requirements():
        tool_names.extend(["vision_analyze"])
@@ -346,11 +338,11 @@ def get_tool_definitions(
    if check_firecrawl_api_key():
        for tool in get_web_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
-    
+
-    if check_hecate_requirements():
+    if check_simple_terminal_requirements():
        for tool in get_terminal_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
-    
+
    if check_vision_requirements():
        for tool in get_vision_tool_definitions():
            all_available_tools_map[tool["function"]["name"]] = tool
@@ -494,12 +486,10 @@ def handle_terminal_function_call(function_name: str, function_args: Dict[str, A
    """
    if function_name == "terminal":
        command = function_args.get("command")
        input_keys = function_args.get("input_keys")
        background = function_args.get("background", False)
        idle_threshold = function_args.get("idle_threshold", 5.0)
        timeout = function_args.get("timeout")
-        return terminal_tool(command, input_keys, None, background, idle_threshold, timeout, task_id)
+        return simple_terminal_tool(command=command, background=background, timeout=timeout, task_id=task_id)
    else:
        return json.dumps({"error": f"Unknown terminal function: {function_name}"}, ensure_ascii=False)
@@ -681,10 +671,10 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
            "requirements": ["FIRECRAWL_API_KEY environment variable"]
        },
        "terminal_tools": {
-            "available": check_hecate_requirements(),
+            "available": check_simple_terminal_requirements(),
-            "tools": ["terminal_tool"],
+            "tools": ["simple_terminal_tool"],
-            "description": "Execute commands with optional interactive session support on Linux VMs",
+            "description": "Execute commands on secure Linux VMs without session persistence",
-            "requirements": ["MORPH_API_KEY environment variable", "hecate package"]
+            "requirements": ["MORPH_API_KEY environment variable"]
        },
        "vision_tools": {
            "available": check_vision_requirements(),
@@ -711,13 +701,13 @@ def get_available_toolsets() -> Dict[str, Dict[str, Any]]:
 def check_toolset_requirements() -> Dict[str, bool]:
    """
    Check if all requirements for available toolsets are met.
-    
+
    Returns:
        Dict: Status of each toolset's requirements
    """
    return {
        "web_tools": check_firecrawl_api_key(),
-        "terminal_tools": check_hecate_requirements(),
+        "terminal_tools": check_simple_terminal_requirements(),
        "vision_tools": check_vision_requirements(),
        "moa_tools": check_moa_requirements(),
        "image_tools": check_image_generation_requirements()
--- a/run_agent.py
+++ b/run_agent.py
@@ -65,7 +65,8 @@ class AIAgent:
        disabled_toolsets: List[str] = None,
        save_trajectories: bool = False,
        verbose_logging: bool = False,
-        ephemeral_system_prompt: str = None
+        ephemeral_system_prompt: str = None,
        log_prefix_chars: int = 100,
    ):
        """
        Initialize the AI Agent.
@@ -81,6 +82,7 @@ class AIAgent:
            save_trajectories (bool): Whether to save conversation trajectories to JSONL files (default: False)
            verbose_logging (bool): Enable verbose logging for debugging (default: False)
            ephemeral_system_prompt (str): System prompt used during agent execution but NOT saved to trajectories (optional)
            log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses (default: 20)
        """
        self.model = model
        self.max_iterations = max_iterations
@@ -88,6 +90,7 @@ class AIAgent:
        self.save_trajectories = save_trajectories
        self.verbose_logging = verbose_logging
        self.ephemeral_system_prompt = ephemeral_system_prompt
        self.log_prefix_chars = log_prefix_chars
        # Store toolset filtering options
        self.enabled_toolsets = enabled_toolsets
@@ -385,7 +388,7 @@ class AIAgent:
        while api_call_count < self.max_iterations:
            api_call_count += 1
-            print(f"\n🔄 Making API call #{api_call_count}...")
+            print(f"\n🔄 Making OpenAI-compatible API call #{api_call_count}...")
            # Log request details if verbose
            if self.verbose_logging:
@@ -394,8 +397,8 @@ class AIAgent:
            api_start_time = time.time()
            retry_count = 0
-            max_retries = 3
+            max_retries = 6  # Increased to allow longer backoff periods
-            
+
            while retry_count <= max_retries:
                try:
                    # Prepare messages for API call
@@ -404,30 +407,30 @@ class AIAgent:
                    if active_system_prompt:
                        # Insert system message at the beginning
                        api_messages = [{"role": "system", "content": active_system_prompt}] + api_messages
-                    
+
                    # Make API call with tools
                    response = self.client.chat.completions.create(
                        model=self.model,
                        messages=api_messages,
                        tools=self.tools if self.tools else None,
-                        timeout=60.0  # Add explicit timeout
+                        timeout=300.0  # 5 minute timeout for long-running agent tasks
                    )
-                    
+
                    api_duration = time.time() - api_start_time
-                    print(f"⏱️  API call completed in {api_duration:.2f}s")
+                    print(f"⏱️  OpenAI-compatible API call completed in {api_duration:.2f}s")
-                    
+
                    if self.verbose_logging:
                        logging.debug(f"API Response received - Usage: {response.usage if hasattr(response, 'usage') else 'N/A'}")
-                    
+
                    break  # Success, exit retry loop
-                    
+
                except Exception as api_error:
                    retry_count += 1
                    if retry_count > max_retries:
                        raise api_error
-                    
+
-                    wait_time = min(2 ** retry_count, 10)  # Exponential backoff, max 10s
+                    wait_time = min(2 ** retry_count, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s, 60s
-                    print(f"⚠️  API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
+                    print(f"⚠️  OpenAI-compatible API call failed (attempt {retry_count}/{max_retries}): {str(api_error)[:100]}")
                    print(f"⏳ Retrying in {wait_time}s...")
                    logging.warning(f"API retry {retry_count}/{max_retries} after error: {api_error}")
                    time.sleep(wait_time)
@@ -474,7 +477,10 @@ class AIAgent:
                            print(f"❌ Invalid JSON in tool call arguments: {e}")
                            function_args = {}
-                        print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())})")
+                        # Preview tool call arguments
                        args_str = json.dumps(function_args, ensure_ascii=False)
                        args_preview = args_str[:self.log_prefix_chars] + "..." if len(args_str) > self.log_prefix_chars else args_str
                        print(f"  📞 Tool {i}: {function_name}({list(function_args.keys())}) - {args_preview}")
                        tool_start_time = time.time()
@@ -483,19 +489,21 @@ class AIAgent:
                        tool_duration = time.time() - tool_start_time
                        result_preview = function_result[:200] if len(function_result) > 200 else function_result
-                        
+
                        if self.verbose_logging:
                            logging.debug(f"Tool {function_name} completed in {tool_duration:.2f}s")
                            logging.debug(f"Tool result preview: {result_preview}...")
-                        
+
                        # Add tool result to conversation
                        messages.append({
                            "role": "tool",
                            "content": function_result,
                            "tool_call_id": tool_call.id
                        })
-                        
+
-                        print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s")
+                        # Preview tool response
                        response_preview = function_result[:self.log_prefix_chars] + "..." if len(function_result) > self.log_prefix_chars else function_result
                        print(f"  ✅ Tool {i} completed in {tool_duration:.2f}s - {response_preview}")
                        # Delay between tool calls
                        if self.tool_delay > 0 and i < len(assistant_message.tool_calls):
@@ -514,11 +522,11 @@ class AIAgent:
                        "content": final_response
                    })
-                    print(f"🎉 Conversation completed after {api_call_count} API call(s)")
+                    print(f"🎉 Conversation completed after {api_call_count} OpenAI-compatible API call(s)")
                    break
            except Exception as e:
-                error_msg = f"Error during API call #{api_call_count}: {str(e)}"
+                error_msg = f"Error during OpenAI-compatible API call #{api_call_count}: {str(e)}"
                print(f"❌ {error_msg}")
                if self.verbose_logging:
@@ -577,7 +585,7 @@ class AIAgent:
 def main(
    query: str = None,
-    model: str = "claude-opus-4-20250514", 
+    model: str = "claude-opus-4-20250514",
    api_key: str = None,
    base_url: str = "https://api.anthropic.com/v1/",
    max_turns: int = 10,
@@ -585,25 +593,27 @@ def main(
    disabled_toolsets: str = None,
    list_tools: bool = False,
    save_trajectories: bool = False,
-    verbose: bool = False
+    verbose: bool = False,
    log_prefix_chars: int = 20
 ):
    """
    Main function for running the agent directly.
-    
+
    Args:
        query (str): Natural language query for the agent. Defaults to Python 3.13 example.
        model (str): Model name to use. Defaults to claude-opus-4-20250514.
        api_key (str): API key for authentication. Uses ANTHROPIC_API_KEY env var if not provided.
        base_url (str): Base URL for the model API. Defaults to https://api.anthropic.com/v1/
        max_turns (int): Maximum number of API call iterations. Defaults to 10.
-        enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined 
+        enabled_toolsets (str): Comma-separated list of toolsets to enable. Supports predefined
-                              toolsets (e.g., "research", "development", "safe"). 
+                              toolsets (e.g., "research", "development", "safe").
                              Multiple toolsets can be combined: "web,vision"
        disabled_toolsets (str): Comma-separated list of toolsets to disable (e.g., "terminal")
        list_tools (bool): Just list available tools and exit
        save_trajectories (bool): Save conversation trajectories to JSONL files. Defaults to False.
        verbose (bool): Enable verbose logging for debugging. Defaults to False.
-        
+        log_prefix_chars (int): Number of characters to show in log previews for tool calls/responses. Defaults to 20.
    Toolset Examples:
        - "research": Web search, extract, crawl + vision tools
    """
@@ -720,7 +730,8 @@ def main(
            enabled_toolsets=enabled_toolsets_list,
            disabled_toolsets=disabled_toolsets_list,
            save_trajectories=save_trajectories,
-            verbose_logging=verbose
+            verbose_logging=verbose,
            log_prefix_chars=log_prefix_chars
        )
    except RuntimeError as e:
        print(f"❌ Failed to initialize agent: {e}")
--- a/tools/mixture_of_agents_tool.py
+++ b/tools/mixture_of_agents_tool.py
@@ -161,11 +161,11 @@ def _construct_aggregator_prompt(system_prompt: str, responses: List[str]) -> st
 async def _run_reference_model_safe(
-    model: str, 
+    model: str,
-    user_prompt: str, 
+    user_prompt: str,
    temperature: float = REFERENCE_TEMPERATURE,
    max_tokens: int = 32000,
-    max_retries: int = 3
+    max_retries: int = 6
 ) -> tuple[str, str, bool]:
    """
    Run a single reference model with retry logic and graceful failure handling.
@@ -212,8 +212,8 @@ async def _run_reference_model_safe(
                print(f"⚠️  {model} unknown error (attempt {attempt + 1}): {error_str}")
            if attempt < max_retries - 1:
-                # Exponential backoff for rate limiting
+                # Exponential backoff for rate limiting: 2s, 4s, 8s, 16s, 32s, 60s
-                sleep_time = 2 ** attempt
+                sleep_time = min(2 ** (attempt + 1), 60)
                print(f"   Retrying in {sleep_time}s...")
                await asyncio.sleep(sleep_time)
            else:
--- a/tools/simple_terminal_tool.py
+++ b/tools/simple_terminal_tool.py
@@ -0,0 +1,395 @@
 #!/usr/bin/env python3
 """
 Simple Terminal Tool Module
 A simplified terminal tool that executes commands on MorphCloud VMs without tmux.
 No session persistence, no interactive app support - just simple command execution.
 Features:
 - Direct SSH command execution
 - Background task support
 - VM lifecycle management with TTL
 - Automatic cleanup after inactivity
 Usage:
    from simple_terminal_tool import simple_terminal_tool
    # Execute a simple command
    result = simple_terminal_tool("ls -la")
    # Execute in background
    result = simple_terminal_tool("python server.py", background=True)
 """
 import json
 import os
 import time
 import threading
 import atexit
 from typing import Optional, Dict, Any
 # Tool description for LLM
 SIMPLE_TERMINAL_TOOL_DESCRIPTION = """Execute commands on a secure Linux VM environment.
 **Environment:**
 - Minimal Debian-based OS with internet access
 - Automatic VM lifecycle management (creates on-demand, reuses, cleans up)
 - Filesystem is persisted between tool calls but environment variables, venvs, etc are reset.
 **Command Execution:**
 - Simple commands: Just provide the 'command' parameter
 - Background processes: Set 'background': True for servers/long-running tasks
 - Command timeout: Optional 'timeout' parameter in seconds
 **Examples:**
 - Run command: `{"command": "ls -la"}`
 - Background task: `{"command": "source path/to/my/venv/bin/activate && python server.py", "background": True}`
 - With timeout: `{"command": "long_task.sh", "timeout": 300}`
 **Best Practices:**
 - Run servers/long processes in background
 - Monitor disk usage for large tasks
 - Install whatever tools you need with sudo apt-get
 - Do not be afraid to run pip with --break-system-packages
 **Things to avoid**
 - Do NOT use interactive tools such as tmux, vim, nano, python repl - you will get stuck. Even git sometimes becomes interactive if the output is large. If you're not sure pipe to cat.
 """
 # Global state for VM lifecycle management
 _active_instances: Dict[str, Any] = {}
 _last_activity: Dict[str, float] = {}
 _instance_lock = threading.Lock()
 _cleanup_thread = None
 _cleanup_running = False
 def _cleanup_inactive_vms(vm_lifetime_seconds: int = 300):
    """Clean up VMs that have been inactive for longer than vm_lifetime_seconds."""
    global _active_instances, _last_activity
    current_time = time.time()
    tasks_to_cleanup = []
    with _instance_lock:
        for task_id, last_time in list(_last_activity.items()):
            if current_time - last_time > vm_lifetime_seconds:
                tasks_to_cleanup.append(task_id)
        for task_id in tasks_to_cleanup:
            try:
                if task_id in _active_instances:
                    instance = _active_instances[task_id]
                    if hasattr(instance, 'terminate'):
                        instance.terminate()
                    elif hasattr(instance, 'stop'):
                        instance.stop()
                    elif hasattr(instance, 'delete'):
                        instance.delete()
                    del _active_instances[task_id]
                    print(f"[VM Cleanup] Terminated inactive VM for task: {task_id}")
                if task_id in _last_activity:
                    del _last_activity[task_id]
            except Exception as e:
                # 404 errors are benign - VM already cleaned up by TTL
                error_str = str(e)
                if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
                    print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
                else:
                    print(f"[VM Cleanup] Error cleaning up VM for task {task_id}: {e}")
 def _cleanup_thread_worker():
    """Background thread worker that periodically cleans up inactive VMs."""
    global _cleanup_running
    while _cleanup_running:
        try:
            vm_lifetime = int(os.getenv("HECATE_VM_LIFETIME_SECONDS", "300"))
            _cleanup_inactive_vms(vm_lifetime)
        except Exception as e:
            print(f"[VM Cleanup] Error in cleanup thread: {e}")
        for _ in range(60):
            if not _cleanup_running:
                break
            time.sleep(1)
 def _start_cleanup_thread():
    """Start the background cleanup thread if not already running."""
    global _cleanup_thread, _cleanup_running
    with _instance_lock:
        if _cleanup_thread is None or not _cleanup_thread.is_alive():
            _cleanup_running = True
            _cleanup_thread = threading.Thread(target=_cleanup_thread_worker, daemon=True)
            _cleanup_thread.start()
 def _stop_cleanup_thread():
    """Stop the background cleanup thread."""
    global _cleanup_running
    _cleanup_running = False
    if _cleanup_thread is not None:
        _cleanup_thread.join(timeout=5)
 def cleanup_vm(task_id: str):
    """Manually clean up a specific VM by task_id."""
    global _active_instances, _last_activity
    with _instance_lock:
        try:
            if task_id in _active_instances:
                instance = _active_instances[task_id]
                if hasattr(instance, 'terminate'):
                    instance.terminate()
                elif hasattr(instance, 'stop'):
                    instance.stop()
                elif hasattr(instance, 'delete'):
                    instance.delete()
                del _active_instances[task_id]
                print(f"[VM Cleanup] Manually terminated VM for task: {task_id}")
            if task_id in _last_activity:
                del _last_activity[task_id]
        except Exception as e:
            # 404 errors are benign - VM already cleaned up by TTL
            error_str = str(e)
            if "404" in error_str or "InstanceNotFoundError" in error_str or "not found" in error_str.lower():
                print(f"[VM Cleanup] VM for task {task_id} already cleaned up (likely TTL expiration)")
            else:
                print(f"[VM Cleanup] Error manually cleaning up VM for task {task_id}: {e}")
 atexit.register(_stop_cleanup_thread)
 def _execute_ssh_command(instance, command: str, timeout: Optional[int] = None) -> Dict[str, Any]:
    """
    Execute a command via SSH on the VM instance.
    Args:
        instance: MorphVM instance
        command: Command to execute
        timeout: Optional timeout in seconds
    Returns:
        dict with stdout, stderr, returncode
    """
    ssh_context_manager = None
    try:
        # Use the instance's SSH context manager
        ssh_context_manager = instance.ssh()
        ssh_context = ssh_context_manager.__enter__()
        # Execute the command
        result = ssh_context.run(command, get_pty=False, timeout=timeout or 120)
        # Close the SSH connection
        if ssh_context_manager:
            try:
                ssh_context_manager.__exit__(None, None, None)
            except:
                pass
        return {
            "stdout": result.stdout or "",
            "stderr": result.stderr or "",
            "returncode": result.returncode
        }
    except Exception as e:
        # Close connection on error
        if ssh_context_manager:
            try:
                ssh_context_manager.__exit__(None, None, None)
            except:
                pass
        # Check if it's a timeout
        error_str = str(e).lower()
        if "timeout" in error_str:
            return {
                "stdout": "",
                "stderr": f"Command timed out after {timeout or 120} seconds",
                "returncode": 124
            }
        return {
            "stdout": "",
            "stderr": f"SSH execution failed: {str(e)}",
            "returncode": -1
        }
 def simple_terminal_tool(
    command: str,
    background: bool = False,
    timeout: Optional[int] = None,
    task_id: Optional[str] = None
 ) -> str:
    """
    Execute a command on a MorphCloud VM without session persistence.
    Args:
        command: The command to execute
        background: Whether to run in background (default: False)
        timeout: Command timeout in seconds (default: 120)
        task_id: Unique identifier for VM isolation (optional)
    Returns:
        str: JSON string with output, exit_code, and error fields
    Examples:
        # Execute a simple command
        >>> result = simple_terminal_tool(command="ls -la /tmp")
        # Run a background task
        >>> result = simple_terminal_tool(command="python server.py", background=True)
        # With custom timeout
        >>> result = simple_terminal_tool(command="long_task.sh", timeout=300)
    """
    global _active_instances, _last_activity
    try:
        # Import required modules
        try:
            from morphcloud.api import MorphCloudClient
        except ImportError as import_error:
            return json.dumps({
                "output": "",
                "exit_code": -1,
                "error": f"Terminal tool disabled: {import_error}",
                "status": "disabled"
            }, ensure_ascii=False)
        # Get configuration
        vm_ttl_seconds = int(os.getenv("HECATE_VM_TTL_SECONDS", "1200"))
        snapshot_id = os.getenv("HECATE_DEFAULT_SNAPSHOT_ID", "snapshot_defv9tjg")
        # Check API key
        morph_api_key = os.getenv("MORPH_API_KEY")
        if not morph_api_key:
            return json.dumps({
                "output": "",
                "exit_code": -1,
                "error": "MORPH_API_KEY environment variable not set",
                "status": "disabled"
            }, ensure_ascii=False)
        # Use task_id for VM isolation
        effective_task_id = task_id or "default"
        # Start cleanup thread
        _start_cleanup_thread()
        # Get or create VM instance
        with _instance_lock:
            if effective_task_id not in _active_instances:
                morph_client = MorphCloudClient(api_key=morph_api_key)
                _active_instances[effective_task_id] = morph_client.instances.start(
                    snapshot_id=snapshot_id,
                    ttl_seconds=vm_ttl_seconds,
                    ttl_action="stop"
                )
            # Update last activity time
            _last_activity[effective_task_id] = time.time()
            instance = _active_instances[effective_task_id]
        # Wait for instance to be ready
        instance.wait_until_ready()
        # Prepare command for execution
        if background:
            # Run in background with nohup and redirect output
            exec_command = f"nohup {command} > /tmp/bg_output.log 2>&1 &"
            result = _execute_ssh_command(instance, exec_command, timeout=10)
            # For background tasks, return immediately with info
            if result["returncode"] == 0:
                return json.dumps({
                    "output": "Background task started successfully",
                    "exit_code": 0,
                    "error": None
                }, ensure_ascii=False)
            else:
                return json.dumps({
                    "output": result["stdout"],
                    "exit_code": result["returncode"],
                    "error": result["stderr"]
                }, ensure_ascii=False)
        else:
            # Run foreground command
            result = _execute_ssh_command(instance, command, timeout=timeout)
            # Combine stdout and stderr for output
            output = result["stdout"]
            if result["stderr"] and result["returncode"] != 0:
                output = f"{output}\n{result['stderr']}" if output else result["stderr"]
            return json.dumps({
                "output": output.strip(),
                "exit_code": result["returncode"],
                "error": result["stderr"] if result["returncode"] != 0 else None
            }, ensure_ascii=False)
    except Exception as e:
        return json.dumps({
            "output": "",
            "exit_code": -1,
            "error": f"Failed to execute command: {str(e)}",
            "status": "error"
        }, ensure_ascii=False)
 def check_requirements() -> bool:
    """Check if all requirements for the simple terminal tool are met."""
    required_vars = ["MORPH_API_KEY"]
    missing_required = [var for var in required_vars if not os.getenv(var)]
    if missing_required:
        print(f"Missing required environment variables: {', '.join(missing_required)}")
        return False
    try:
        from morphcloud.api import MorphCloudClient
        return True
    except Exception as e:
        print(f"MorphCloud not available: {e}")
        return False
 if __name__ == "__main__":
    """Simple test when run directly."""
    print("Simple Terminal Tool Module")
    print("=" * 40)
    if not check_requirements():
        print("Requirements not met. Please check the messages above.")
        exit(1)
    print("All requirements met!")
    print("\nAvailable Tool:")
    print("  - simple_terminal_tool: Execute commands without session persistence")
    print("\nUsage Examples:")
    print("  # Execute a command")
    print("  result = simple_terminal_tool(command='ls -la')")
    print("  ")
    print("  # Run a background task")
    print("  result = simple_terminal_tool(command='python server.py', background=True)")
    print("\nEnvironment Variables:")
    print(f"  MORPH_API_KEY: {'Set' if os.getenv('MORPH_API_KEY') else 'Not set'}")
    print(f"  HECATE_VM_TTL_SECONDS: {os.getenv('HECATE_VM_TTL_SECONDS', '1200')} (default: 1200 / 20 minutes)")
    print(f"  HECATE_VM_LIFETIME_SECONDS: {os.getenv('HECATE_VM_LIFETIME_SECONDS', '300')} (default: 300 / 5 minutes)")
    print(f"  HECATE_DEFAULT_SNAPSHOT_ID: {os.getenv('HECATE_DEFAULT_SNAPSHOT_ID', 'snapshot_defv9tjg')}")
--- a/tools/web_tools.py
+++ b/tools/web_tools.py
@@ -184,10 +184,10 @@ Your goal is to preserve ALL important information while reducing length. Never
 Create a markdown summary that captures all key information in a well-organized, scannable format. Include important quotes and code snippets in their original formatting. Focus on actionable information, specific details, and unique insights."""
        # Call the LLM asynchronously with retry logic for flaky API
-        max_retries = 3
+        max_retries = 6
        retry_delay = 2  # Start with 2 seconds
        last_error = None
-        
+
        for attempt in range(max_retries):
            try:
                response = await nous_client.chat.completions.create(
@@ -206,7 +206,7 @@ Create a markdown summary that captures all key information in a well-organized,
                    print(f"⚠️  LLM API call failed (attempt {attempt + 1}/{max_retries}): {str(api_error)[:100]}")
                    print(f"   Retrying in {retry_delay}s...")
                    await asyncio.sleep(retry_delay)
-                    retry_delay *= 2  # Exponential backoff: 2s, 4s, 8s
+                    retry_delay = min(retry_delay * 2, 60)  # Exponential backoff: 2s, 4s, 8s, 16s, 32s, 60s
                else:
                    # All retries exhausted
                    raise last_error
--- a/toolset_distributions.py
+++ b/toolset_distributions.py
@@ -67,7 +67,7 @@ DISTRIBUTIONS = {
        "description": "Web research with vision analysis and reasoning",
        "toolsets": {
            "web": 94,      # 90% chance of web tools
-            "vision": 50,   # 50% chance of vision tools
+            "vision": 65,   # 50% chance of vision tools
            "moa": 10,      # 40% chance of reasoning tools
            "terminal": 94,  # 10% chance of terminal tools
            "image_gen": 15  # 80% chance of image generation tools
Author	SHA1	Message	Date
hjc-puro	ab7293bed6	don't log exit code !=0 as terminal failure	2025-11-17 18:39:16 -05:00
hjc-puro	1614c15bb1	rate limits	2025-11-17 18:35:36 -05:00
hjc-puro	f813959750	add simple terminal	2025-11-17 01:14:31 -05:00
teknium	f957ec2267	update distribution and gitignore	2025-11-16 01:03:23 +00:00
Teknium	92e3074c10	Merge pull request #9 from NousResearch/tc-logging Add logging for first 100 chars of the tool call args json / tool response	2025-11-15 14:03:24 -08:00
hjc-puro	0c618482c4	add logging of prefix of tool call and tool response	2025-11-07 14:43:44 -05:00
hjc-puro	2d8f6c46f1	log first 20 chars	2025-11-07 14:08:06 -05:00
teknium	c27787f09f	fix gitignore again	2025-11-05 06:43:03 +00:00
teknium	d90fcd4e2b	update gitignore	2025-11-05 06:43:03 +00:00
Teknium	69fd0ca9aa	Merge pull request #7 from NousResearch/test some cleanups	2025-11-04 19:54:49 -08:00