diff --git a/scripts/kill_modal.sh b/scripts/kill_modal.sh new file mode 100755 index 0000000000..aae3f63e27 --- /dev/null +++ b/scripts/kill_modal.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Kill all running Modal apps (sandboxes, deployments, etc.) +# +# Usage: +# bash scripts/kill_modal.sh # Stop swe-rex (the sandbox app) +# bash scripts/kill_modal.sh --all # Stop ALL Modal apps + +set -uo pipefail + +echo "Fetching Modal app list..." +APP_LIST=$(modal app list 2>/dev/null) + +if [[ "${1:-}" == "--all" ]]; then + echo "Stopping ALL Modal apps..." + echo "$APP_LIST" | grep -oE 'ap-[A-Za-z0-9]+' | sort -u | while read app_id; do + echo " Stopping $app_id" + modal app stop "$app_id" 2>/dev/null || true + done +else + echo "Stopping swe-rex sandboxes..." + APPS=$(echo "$APP_LIST" | grep 'swe-rex' | grep -oE 'ap-[A-Za-z0-9]+' || true) + if [[ -z "$APPS" ]]; then + echo " No swe-rex apps found." + else + echo "$APPS" | while read app_id; do + echo " Stopping $app_id" + modal app stop "$app_id" 2>/dev/null || true + done + fi +fi + +echo "" +echo "Current swe-rex status:" +modal app list 2>/dev/null | grep -E 'State|swe-rex' || echo " (none)" diff --git a/tools/file_tools.py b/tools/file_tools.py index 89c75eff58..35d4495078 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -30,62 +30,63 @@ def _get_file_ops(task_id: str = "default") -> ShellFileOperations: if task_id in _file_ops_cache: return _file_ops_cache[task_id] - # Check if we need to create a new environment + # Check if we need to create a new environment. + # Uses the same per-task creation locks as terminal_tool to prevent + # duplicate sandbox creation from concurrent tool calls. + from tools.terminal_tool import _creation_locks, _creation_locks_lock + needs_creation = False with _env_lock: if task_id not in _active_environments: needs_creation = True - # Create environment OUTSIDE locks so we don't block other rollouts - # during slow Modal/Docker startup (~10s) if needs_creation: - from tools.terminal_tool import _task_env_overrides - - config = _get_env_config() - env_type = config["env_type"] - - # Check per-task overrides (set by environments like TerminalBench2Env) - overrides = _task_env_overrides.get(task_id, {}) - - if env_type == "docker": - image = overrides.get("docker_image") or config["docker_image"] - elif env_type == "singularity": - image = overrides.get("singularity_image") or config["singularity_image"] - elif env_type == "modal": - image = overrides.get("modal_image") or config["modal_image"] - else: - image = "" - - cwd = overrides.get("cwd") or config["cwd"] - _check_disk_usage_warning() - if not os.getenv("HERMES_QUIET"): - print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True) - - new_env = _create_environment( - env_type=env_type, - image=image, - cwd=cwd, - timeout=config["timeout"], - ) - - # Store under lock (brief) -- do NOT call _start_cleanup_thread inside - # the lock because it also acquires _env_lock (non-reentrant = deadlock) - created = False - with _env_lock: - if task_id not in _active_environments: - _active_environments[task_id] = new_env - created = True - else: - try: - if hasattr(new_env, 'stop'): - new_env.stop() - except Exception: - pass - - if created: - _start_cleanup_thread() - if not os.getenv("HERMES_QUIET"): - print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True) + # Per-task lock: only one thread creates the sandbox, others wait + with _creation_locks_lock: + if task_id not in _creation_locks: + _creation_locks[task_id] = __import__("threading").Lock() + task_lock = _creation_locks[task_id] + + with task_lock: + # Double-check after acquiring the per-task lock + with _env_lock: + if task_id in _active_environments: + needs_creation = False + + if needs_creation: + from tools.terminal_tool import _task_env_overrides + + config = _get_env_config() + env_type = config["env_type"] + overrides = _task_env_overrides.get(task_id, {}) + + if env_type == "docker": + image = overrides.get("docker_image") or config["docker_image"] + elif env_type == "singularity": + image = overrides.get("singularity_image") or config["singularity_image"] + elif env_type == "modal": + image = overrides.get("modal_image") or config["modal_image"] + else: + image = "" + + cwd = overrides.get("cwd") or config["cwd"] + if not os.getenv("HERMES_QUIET"): + print(f"[FileTools] Creating new {env_type} environment for task {task_id[:8]}...", flush=True) + + new_env = _create_environment( + env_type=env_type, + image=image, + cwd=cwd, + timeout=config["timeout"], + ) + + with _env_lock: + _active_environments[task_id] = new_env + _last_activity[task_id] = __import__("time").time() + + _start_cleanup_thread() + if not os.getenv("HERMES_QUIET"): + print(f"[FileTools] {env_type} environment ready for task {task_id[:8]}", flush=True) # Now get the environment and build file_ops with _env_lock: diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 654a4a2082..2de2997ff2 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -1132,6 +1132,8 @@ _active_environments: Dict[str, Any] = {} _task_workdirs: Dict[str, str] = {} # Maps task_id to working directory _last_activity: Dict[str, float] = {} _env_lock = threading.Lock() +_creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation +_creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself _cleanup_thread = None _cleanup_running = False @@ -1515,64 +1517,69 @@ def terminal_tool( # Start cleanup thread _start_cleanup_thread() - # Get or create environment - # Check under lock, but create OUTSIDE lock so we don't block - # other concurrent rollouts during slow Modal/Docker startup - needs_creation = False + # Get or create environment. + # Use a per-task creation lock so concurrent tool calls for the same + # task_id wait for the first one to finish creating the sandbox, + # instead of each creating their own (wasting Modal resources). with _env_lock: - if effective_task_id not in _active_environments: - needs_creation = True - else: + if effective_task_id in _active_environments: _last_activity[effective_task_id] = time.time() env = _active_environments[effective_task_id] + needs_creation = False + else: + needs_creation = True if needs_creation: - # Disk usage warning only relevant for local/singularity backends - if env_type in ("singularity", "local"): - _check_disk_usage_warning() - if not os.getenv("HERMES_QUIET"): - print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True) - try: - ssh_config = None - if env_type == "ssh": - ssh_config = { - "host": config.get("ssh_host", ""), - "user": config.get("ssh_user", ""), - "port": config.get("ssh_port", 22), - "key": config.get("ssh_key", ""), - } + # Per-task lock: only one thread creates the sandbox, others wait + with _creation_locks_lock: + if effective_task_id not in _creation_locks: + _creation_locks[effective_task_id] = threading.Lock() + task_lock = _creation_locks[effective_task_id] - new_env = _create_environment( - env_type=env_type, - image=image, - cwd=cwd, - timeout=effective_timeout, - ssh_config=ssh_config - ) - except ImportError as e: - return json.dumps({ - "output": "", - "exit_code": -1, - "error": f"Terminal tool disabled: mini-swe-agent not available ({e})", - "status": "disabled" - }, ensure_ascii=False) + with task_lock: + # Double-check after acquiring the per-task lock + with _env_lock: + if effective_task_id in _active_environments: + _last_activity[effective_task_id] = time.time() + env = _active_environments[effective_task_id] + needs_creation = False - # Store under lock (brief) - with _env_lock: - if effective_task_id not in _active_environments: - _active_environments[effective_task_id] = new_env - else: - # Another thread created it while we were building -- clean up ours + if needs_creation: + if env_type in ("singularity", "local"): + _check_disk_usage_warning() + if not os.getenv("HERMES_QUIET"): + print(f"[Terminal] Creating new {env_type} environment for task {effective_task_id[:8]}...", flush=True) try: - if hasattr(new_env, 'stop'): - new_env.stop() - except Exception: - pass + ssh_config = None + if env_type == "ssh": + ssh_config = { + "host": config.get("ssh_host", ""), + "user": config.get("ssh_user", ""), + "port": config.get("ssh_port", 22), + "key": config.get("ssh_key", ""), + } - _last_activity[effective_task_id] = time.time() - env = _active_environments[effective_task_id] - if not os.getenv("HERMES_QUIET"): - print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True) + new_env = _create_environment( + env_type=env_type, + image=image, + cwd=cwd, + timeout=effective_timeout, + ssh_config=ssh_config + ) + except ImportError as e: + return json.dumps({ + "output": "", + "exit_code": -1, + "error": f"Terminal tool disabled: mini-swe-agent not available ({e})", + "status": "disabled" + }, ensure_ascii=False) + + with _env_lock: + _active_environments[effective_task_id] = new_env + _last_activity[effective_task_id] = time.time() + env = new_env + if not os.getenv("HERMES_QUIET"): + print(f"[Terminal] {env_type} environment ready for task {effective_task_id[:8]}", flush=True) # Check for dangerous commands (only for local/ssh in interactive modes) # Skip check if force=True (user has confirmed they want to run it)