mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-01 00:11:39 +08:00
1083 lines
40 KiB
Python
1083 lines
40 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Comprehensive Modal Integration Test Suite
|
|
|
|
Tests both:
|
|
1. terminal_tool.py Modal backend (CLI/agent use case)
|
|
2. atropos/backends/modal_backend.py (RL training use case)
|
|
|
|
Run with:
|
|
# All tests (requires Modal account)
|
|
python tests/test_modal_integration.py
|
|
|
|
# Dry run (no Modal, tests config/logic only)
|
|
python tests/test_modal_integration.py --dry-run
|
|
|
|
# Specific test category
|
|
python tests/test_modal_integration.py --category terminal
|
|
python tests/test_modal_integration.py --category atropos
|
|
python tests/test_modal_integration.py --category profiles
|
|
"""
|
|
|
|
import asyncio
|
|
import json
|
|
import os
|
|
import sys
|
|
import tempfile
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Dict, Any, List, Optional
|
|
from dataclasses import dataclass
|
|
|
|
# Add parent to path for imports
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
|
|
# =============================================================================
|
|
# Atropos Import Helper
|
|
# =============================================================================
|
|
|
|
def try_import_atropos_backend():
|
|
"""
|
|
Try to import atropos backend directly, bypassing the atroposlib check.
|
|
Returns (ModalToolBackend, ModalSandboxConfig, Slot, SlotState) or raises ImportError.
|
|
"""
|
|
try:
|
|
# Try direct import first (works if atroposlib is installed)
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
from atropos.slots.slot import Slot, SlotState
|
|
return ModalToolBackend, ModalSandboxConfig, Slot, SlotState
|
|
except (ImportError, ModuleNotFoundError):
|
|
# Try importing the module directly without going through atropos/__init__.py
|
|
import importlib.util
|
|
|
|
backend_path = Path(__file__).parent.parent / "atropos" / "backends" / "modal_backend.py"
|
|
slot_path = Path(__file__).parent.parent / "atropos" / "slots" / "slot.py"
|
|
executor_path = Path(__file__).parent.parent / "atropos" / "slots" / "executor.py"
|
|
base_path = Path(__file__).parent.parent / "atropos" / "backends" / "base.py"
|
|
|
|
if not backend_path.exists():
|
|
raise ImportError(f"modal_backend.py not found at {backend_path}")
|
|
|
|
# Load slot module first
|
|
spec = importlib.util.spec_from_file_location("atropos_slots_slot", slot_path)
|
|
slot_module = importlib.util.module_from_spec(spec)
|
|
sys.modules["atropos.slots.slot"] = slot_module
|
|
spec.loader.exec_module(slot_module)
|
|
|
|
# Load executor module
|
|
spec = importlib.util.spec_from_file_location("atropos_slots_executor", executor_path)
|
|
executor_module = importlib.util.module_from_spec(spec)
|
|
sys.modules["atropos.slots.executor"] = executor_module
|
|
spec.loader.exec_module(executor_module)
|
|
|
|
# Load base module
|
|
spec = importlib.util.spec_from_file_location("atropos_backends_base", base_path)
|
|
base_module = importlib.util.module_from_spec(spec)
|
|
sys.modules["atropos.backends.base"] = base_module
|
|
spec.loader.exec_module(base_module)
|
|
|
|
# Now load modal_backend
|
|
spec = importlib.util.spec_from_file_location("atropos_backends_modal_backend", backend_path)
|
|
backend_module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(backend_module)
|
|
|
|
return (
|
|
backend_module.ModalToolBackend,
|
|
backend_module.ModalSandboxConfig,
|
|
slot_module.Slot,
|
|
slot_module.SlotState,
|
|
)
|
|
|
|
|
|
# =============================================================================
|
|
# Test Configuration
|
|
# =============================================================================
|
|
|
|
@dataclass
|
|
class TestConfig:
|
|
dry_run: bool = False
|
|
verbose: bool = True
|
|
category: Optional[str] = None # None = all, or "terminal", "atropos", "profiles"
|
|
|
|
|
|
# =============================================================================
|
|
# Test Results Tracking
|
|
# =============================================================================
|
|
|
|
class TestResults:
|
|
def __init__(self):
|
|
self.passed: List[str] = []
|
|
self.failed: List[tuple] = [] # (name, error)
|
|
self.skipped: List[tuple] = [] # (name, reason)
|
|
|
|
def record_pass(self, name: str):
|
|
self.passed.append(name)
|
|
print(f" ✅ {name}")
|
|
|
|
def record_fail(self, name: str, error: str):
|
|
self.failed.append((name, error))
|
|
print(f" ❌ {name}: {error}")
|
|
|
|
def record_skip(self, name: str, reason: str):
|
|
self.skipped.append((name, reason))
|
|
print(f" ⏭️ {name}: {reason}")
|
|
|
|
def summary(self):
|
|
total = len(self.passed) + len(self.failed) + len(self.skipped)
|
|
print(f"\n{'='*60}")
|
|
print(f"TEST RESULTS: {len(self.passed)}/{total} passed")
|
|
print(f" Passed: {len(self.passed)}")
|
|
print(f" Failed: {len(self.failed)}")
|
|
print(f" Skipped: {len(self.skipped)}")
|
|
|
|
if self.failed:
|
|
print(f"\nFailed tests:")
|
|
for name, error in self.failed:
|
|
print(f" - {name}: {error}")
|
|
|
|
return len(self.failed) == 0
|
|
|
|
|
|
results = TestResults()
|
|
|
|
|
|
# =============================================================================
|
|
# CATEGORY 1: Profile Configuration Tests
|
|
# =============================================================================
|
|
|
|
def test_profile_loading_from_env():
|
|
"""Test ModalProfile.from_env() loads environment variables correctly."""
|
|
from tools.terminal_tool import ModalProfile
|
|
|
|
# Set test environment variables
|
|
# Note: The prefix is TERMINAL_MODAL_PROFILE_{profile_name}_ where profile_name is used as-is
|
|
os.environ["TERMINAL_MODAL_PROFILE_testenv_IMAGE"] = "python:3.12"
|
|
os.environ["TERMINAL_MODAL_PROFILE_testenv_GPU"] = "A100"
|
|
os.environ["TERMINAL_MODAL_PROFILE_testenv_CPU"] = "4.0"
|
|
os.environ["TERMINAL_MODAL_PROFILE_testenv_MEMORY"] = "32768"
|
|
os.environ["TERMINAL_MODAL_PROFILE_testenv_SECRETS"] = "secret1,secret2"
|
|
os.environ["TERMINAL_MODAL_PROFILE_testenv_ENV_VARS"] = "KEY1=val1;KEY2=val2"
|
|
|
|
try:
|
|
profile = ModalProfile.from_env("testenv")
|
|
|
|
assert profile.name == "testenv", f"Expected name 'testenv', got '{profile.name}'"
|
|
assert profile.image == "python:3.12", f"Expected image 'python:3.12', got '{profile.image}'"
|
|
assert profile.gpu == "A100", f"Expected GPU 'A100', got '{profile.gpu}'"
|
|
assert profile.cpu == 4.0, f"Expected CPU 4.0, got {profile.cpu}"
|
|
assert profile.memory == 32768, f"Expected memory 32768, got {profile.memory}"
|
|
assert profile.secrets == ["secret1", "secret2"], f"Secrets mismatch: {profile.secrets}"
|
|
assert profile.env_vars == {"KEY1": "val1", "KEY2": "val2"}, f"Env vars mismatch: {profile.env_vars}"
|
|
|
|
results.record_pass("test_profile_loading_from_env")
|
|
except Exception as e:
|
|
results.record_fail("test_profile_loading_from_env", str(e))
|
|
finally:
|
|
# Cleanup
|
|
for key in list(os.environ.keys()):
|
|
if key.startswith("TERMINAL_MODAL_PROFILE_testenv_"):
|
|
del os.environ[key]
|
|
|
|
|
|
def test_profile_loading_from_yaml():
|
|
"""Test ModalProfile.load_profiles() from YAML file."""
|
|
from tools.terminal_tool import ModalProfile, YAML_AVAILABLE
|
|
|
|
if not YAML_AVAILABLE:
|
|
results.record_skip("test_profile_loading_from_yaml", "PyYAML not installed")
|
|
return
|
|
|
|
yaml_content = """
|
|
profiles:
|
|
test-yaml:
|
|
image: pytorch/pytorch:2.0
|
|
gpu: T4
|
|
cpu: 2.0
|
|
memory: 8192
|
|
min_pool: 1
|
|
max_pool: 3
|
|
secrets:
|
|
- hf-token
|
|
env_vars:
|
|
CUDA_VISIBLE_DEVICES: "0"
|
|
test-yaml-2:
|
|
image: node:20
|
|
cpu: 1.0
|
|
"""
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
|
|
f.write(yaml_content)
|
|
yaml_path = f.name
|
|
|
|
try:
|
|
profiles = ModalProfile.load_profiles(yaml_path)
|
|
|
|
assert "test-yaml" in profiles, f"Profile 'test-yaml' not found in {list(profiles.keys())}"
|
|
assert "test-yaml-2" in profiles, f"Profile 'test-yaml-2' not found"
|
|
|
|
p1 = profiles["test-yaml"]
|
|
assert p1.image == "pytorch/pytorch:2.0"
|
|
assert p1.gpu == "T4"
|
|
assert p1.cpu == 2.0
|
|
assert p1.memory == 8192
|
|
assert p1.secrets == ["hf-token"]
|
|
assert p1.env_vars == {"CUDA_VISIBLE_DEVICES": "0"}
|
|
|
|
results.record_pass("test_profile_loading_from_yaml")
|
|
except Exception as e:
|
|
results.record_fail("test_profile_loading_from_yaml", str(e))
|
|
finally:
|
|
os.unlink(yaml_path)
|
|
|
|
|
|
def test_profile_defaults():
|
|
"""Test ModalProfile uses correct defaults."""
|
|
from tools.terminal_tool import ModalProfile
|
|
|
|
try:
|
|
profile = ModalProfile(name="minimal")
|
|
|
|
assert profile.image == "python:3.11"
|
|
assert profile.gpu is None
|
|
assert profile.cpu == 1.0
|
|
assert profile.memory == 2048
|
|
assert profile.min_pool == 1
|
|
assert profile.max_pool == 5
|
|
assert profile.idle_timeout == 120
|
|
assert profile.secrets == []
|
|
assert profile.env_vars == {}
|
|
|
|
results.record_pass("test_profile_defaults")
|
|
except Exception as e:
|
|
results.record_fail("test_profile_defaults", str(e))
|
|
|
|
|
|
def test_atropos_config_with_app_name():
|
|
"""Test ModalSandboxConfig.with_app_name() method."""
|
|
try:
|
|
# Try direct import first
|
|
try:
|
|
from atropos.backends.modal_backend import ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
# Try importing module directly without atropos/__init__.py
|
|
ModalToolBackend, ModalSandboxConfig, _, _ = try_import_atropos_backend()
|
|
|
|
config = ModalSandboxConfig(
|
|
name="test-convert",
|
|
image="python:3.10",
|
|
gpu="A10G",
|
|
cpu=2.0,
|
|
memory=4096,
|
|
secrets=["secret1"],
|
|
env_vars={"FOO": "bar"},
|
|
)
|
|
|
|
config_with_app = config.with_app_name("my-app")
|
|
|
|
assert config_with_app.app_name == "my-app-test-convert"
|
|
assert config_with_app.image == "python:3.10"
|
|
assert config_with_app.gpu == "A10G"
|
|
assert config_with_app.cpu == 2.0
|
|
assert config_with_app.memory == 4096
|
|
assert config_with_app.secrets == ["secret1"]
|
|
assert config_with_app.env_vars == {"FOO": "bar"}
|
|
|
|
results.record_pass("test_atropos_config_with_app_name")
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_config_with_app_name", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_config_with_app_name", str(e))
|
|
|
|
|
|
# =============================================================================
|
|
# CATEGORY 2: Terminal Tool Modal Tests
|
|
# =============================================================================
|
|
|
|
def test_terminal_modal_pool_manager_singleton():
|
|
"""Test _ModalPoolManager is a proper singleton."""
|
|
from tools.terminal_tool import _ModalPoolManager
|
|
|
|
try:
|
|
# Reset singleton for test
|
|
_ModalPoolManager._instance = None
|
|
|
|
manager1 = _ModalPoolManager.get_instance()
|
|
manager2 = _ModalPoolManager.get_instance()
|
|
|
|
assert manager1 is manager2, "Pool manager should be singleton"
|
|
|
|
results.record_pass("test_terminal_modal_pool_manager_singleton")
|
|
except Exception as e:
|
|
results.record_fail("test_terminal_modal_pool_manager_singleton", str(e))
|
|
|
|
|
|
def test_terminal_create_environment_modal():
|
|
"""Test _create_environment creates Modal environment correctly."""
|
|
from tools.terminal_tool import _create_environment
|
|
|
|
try:
|
|
env = _create_environment(
|
|
env_type="modal",
|
|
image="python:3.11",
|
|
cwd="/workspace",
|
|
timeout=60,
|
|
task_id="test-task-123",
|
|
profile="default",
|
|
)
|
|
|
|
# Check it's the right type
|
|
assert env.__class__.__name__ == "_ModalSandboxEnvironment"
|
|
assert env.profile == "default"
|
|
assert env.task_id == "test-task-123"
|
|
|
|
results.record_pass("test_terminal_create_environment_modal")
|
|
except Exception as e:
|
|
results.record_fail("test_terminal_create_environment_modal", str(e))
|
|
|
|
|
|
def test_terminal_tool_profile_parameter(config: TestConfig):
|
|
"""Test terminal_tool() accepts profile parameter."""
|
|
if config.dry_run:
|
|
results.record_skip("test_terminal_tool_profile_parameter", "Dry run mode")
|
|
return
|
|
|
|
from tools.terminal_tool import terminal_tool, cleanup_vm
|
|
|
|
# Save original env
|
|
original_env = os.environ.get("TERMINAL_ENV")
|
|
|
|
try:
|
|
os.environ["TERMINAL_ENV"] = "modal"
|
|
task_id = f"test-profile-param-{int(time.time())}"
|
|
|
|
# This should work without error (profile passed through)
|
|
result = terminal_tool(
|
|
"echo 'Hello from Modal'",
|
|
task_id=task_id,
|
|
profile="default",
|
|
)
|
|
|
|
result_data = json.loads(result)
|
|
# terminal_tool returns {"output", "exit_code", "error"} not {"success"}
|
|
assert result_data.get("exit_code") == 0, f"Command failed: {result_data}"
|
|
assert "Hello from Modal" in result_data.get("output", "")
|
|
|
|
cleanup_vm(task_id)
|
|
results.record_pass("test_terminal_tool_profile_parameter")
|
|
except Exception as e:
|
|
results.record_fail("test_terminal_tool_profile_parameter", str(e))
|
|
finally:
|
|
if original_env:
|
|
os.environ["TERMINAL_ENV"] = original_env
|
|
elif "TERMINAL_ENV" in os.environ:
|
|
del os.environ["TERMINAL_ENV"]
|
|
|
|
|
|
def test_terminal_modal_execute_simple(config: TestConfig):
|
|
"""Test basic command execution in Modal sandbox."""
|
|
if config.dry_run:
|
|
results.record_skip("test_terminal_modal_execute_simple", "Dry run mode")
|
|
return
|
|
|
|
from tools.terminal_tool import terminal_tool, cleanup_vm
|
|
|
|
original_env = os.environ.get("TERMINAL_ENV")
|
|
|
|
try:
|
|
os.environ["TERMINAL_ENV"] = "modal"
|
|
task_id = f"test-simple-{int(time.time())}"
|
|
|
|
# Test echo
|
|
result = json.loads(terminal_tool("echo 'test123'", task_id=task_id))
|
|
assert result["exit_code"] == 0, f"Echo failed: {result}"
|
|
assert "test123" in result["output"]
|
|
|
|
# Test pwd
|
|
result = json.loads(terminal_tool("pwd", task_id=task_id))
|
|
assert result["exit_code"] == 0, f"pwd failed: {result}"
|
|
|
|
# Test file creation and reading
|
|
result = json.loads(terminal_tool("echo 'content' > test.txt && cat test.txt", task_id=task_id))
|
|
assert result["exit_code"] == 0, f"File ops failed: {result}"
|
|
assert "content" in result["output"]
|
|
|
|
cleanup_vm(task_id)
|
|
results.record_pass("test_terminal_modal_execute_simple")
|
|
except Exception as e:
|
|
results.record_fail("test_terminal_modal_execute_simple", str(e))
|
|
finally:
|
|
if original_env:
|
|
os.environ["TERMINAL_ENV"] = original_env
|
|
elif "TERMINAL_ENV" in os.environ:
|
|
del os.environ["TERMINAL_ENV"]
|
|
|
|
|
|
def test_terminal_modal_persistence(config: TestConfig):
|
|
"""Test state persists within same task_id."""
|
|
if config.dry_run:
|
|
results.record_skip("test_terminal_modal_persistence", "Dry run mode")
|
|
return
|
|
|
|
from tools.terminal_tool import terminal_tool, cleanup_vm
|
|
|
|
original_env = os.environ.get("TERMINAL_ENV")
|
|
|
|
try:
|
|
os.environ["TERMINAL_ENV"] = "modal"
|
|
task_id = f"test-persist-{int(time.time())}"
|
|
|
|
# Create a file
|
|
result1 = json.loads(terminal_tool("echo 'persistent data' > /workspace/persist.txt", task_id=task_id))
|
|
assert result1["exit_code"] == 0, f"Create file failed: {result1}"
|
|
|
|
# Read it in separate call (same task_id)
|
|
result2 = json.loads(terminal_tool("cat /workspace/persist.txt", task_id=task_id))
|
|
assert result2["exit_code"] == 0, f"Read file failed: {result2}"
|
|
assert "persistent data" in result2["output"]
|
|
|
|
cleanup_vm(task_id)
|
|
results.record_pass("test_terminal_modal_persistence")
|
|
except Exception as e:
|
|
results.record_fail("test_terminal_modal_persistence", str(e))
|
|
finally:
|
|
if original_env:
|
|
os.environ["TERMINAL_ENV"] = original_env
|
|
elif "TERMINAL_ENV" in os.environ:
|
|
del os.environ["TERMINAL_ENV"]
|
|
|
|
|
|
def test_terminal_modal_isolation(config: TestConfig):
|
|
"""Test different task_ids are isolated."""
|
|
if config.dry_run:
|
|
results.record_skip("test_terminal_modal_isolation", "Dry run mode")
|
|
return
|
|
|
|
from tools.terminal_tool import terminal_tool, cleanup_vm
|
|
|
|
original_env = os.environ.get("TERMINAL_ENV")
|
|
|
|
try:
|
|
os.environ["TERMINAL_ENV"] = "modal"
|
|
task_id_1 = f"test-iso-1-{int(time.time())}"
|
|
task_id_2 = f"test-iso-2-{int(time.time())}"
|
|
|
|
# Create file in task 1
|
|
result1 = json.loads(terminal_tool("echo 'task1' > /workspace/iso.txt", task_id=task_id_1))
|
|
assert result1["exit_code"] == 0, f"Task 1 create failed: {result1}"
|
|
|
|
# Create different file in task 2
|
|
result2 = json.loads(terminal_tool("echo 'task2' > /workspace/iso.txt", task_id=task_id_2))
|
|
assert result2["exit_code"] == 0, f"Task 2 create failed: {result2}"
|
|
|
|
# Verify task 1 still has its own content
|
|
result3 = json.loads(terminal_tool("cat /workspace/iso.txt", task_id=task_id_1))
|
|
assert result3["exit_code"] == 0, f"Task 1 read failed: {result3}"
|
|
assert "task1" in result3["output"], f"Task 1 content corrupted: {result3['output']}"
|
|
|
|
# Verify task 2 has its content
|
|
result4 = json.loads(terminal_tool("cat /workspace/iso.txt", task_id=task_id_2))
|
|
assert result4["exit_code"] == 0, f"Task 2 read failed: {result4}"
|
|
assert "task2" in result4["output"], f"Task 2 content corrupted: {result4['output']}"
|
|
|
|
cleanup_vm(task_id_1)
|
|
cleanup_vm(task_id_2)
|
|
results.record_pass("test_terminal_modal_isolation")
|
|
except Exception as e:
|
|
results.record_fail("test_terminal_modal_isolation", str(e))
|
|
finally:
|
|
if original_env:
|
|
os.environ["TERMINAL_ENV"] = original_env
|
|
elif "TERMINAL_ENV" in os.environ:
|
|
del os.environ["TERMINAL_ENV"]
|
|
|
|
|
|
# =============================================================================
|
|
# CATEGORY 3: Atropos Modal Backend Tests
|
|
# =============================================================================
|
|
|
|
async def test_atropos_backend_lifecycle(config: TestConfig):
|
|
"""Test ModalToolBackend start/stop lifecycle."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_backend_lifecycle", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend()
|
|
|
|
config_obj = ModalSandboxConfig(
|
|
app_name="test-lifecycle",
|
|
min_sandboxes=1,
|
|
max_sandboxes=2,
|
|
slots_per_sandbox=3,
|
|
)
|
|
|
|
backend = ModalToolBackend(config_obj)
|
|
|
|
# Start
|
|
await backend.start()
|
|
|
|
status = backend.get_status()
|
|
assert status["sandboxes"] >= 1, f"Expected at least 1 sandbox, got {status}"
|
|
assert status["slots_per_sandbox"] == 3
|
|
|
|
# Stop
|
|
await backend.stop(purge=True)
|
|
|
|
results.record_pass("test_atropos_backend_lifecycle")
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_backend_lifecycle", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_backend_lifecycle", str(e))
|
|
|
|
|
|
async def test_atropos_slot_acquire_release(config: TestConfig):
|
|
"""Test slot acquisition and release."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_slot_acquire_release", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend()
|
|
|
|
config_obj = ModalSandboxConfig(
|
|
app_name="test-slots",
|
|
min_sandboxes=1,
|
|
max_sandboxes=2,
|
|
slots_per_sandbox=5,
|
|
)
|
|
|
|
backend = ModalToolBackend(config_obj)
|
|
await backend.start()
|
|
|
|
try:
|
|
# Acquire slot
|
|
slot = await backend.acquire("trajectory-1")
|
|
|
|
assert slot is not None
|
|
assert slot.trajectory_id == "trajectory-1"
|
|
assert "/data/" in slot.workspace_dir
|
|
|
|
# Check status shows slot in use
|
|
status = backend.get_status()
|
|
assert status["available_slots"] < status["total_slots"]
|
|
|
|
# Release slot
|
|
await backend.release(slot)
|
|
|
|
# Check slot is available again
|
|
status = backend.get_status()
|
|
# Note: might need small delay for status update
|
|
|
|
results.record_pass("test_atropos_slot_acquire_release")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_slot_acquire_release", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_slot_acquire_release", str(e))
|
|
|
|
|
|
async def test_atropos_execute_in_slot(config: TestConfig):
|
|
"""Test command execution in acquired slot."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_execute_in_slot", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend()
|
|
|
|
config_obj = ModalSandboxConfig(
|
|
app_name="test-execute",
|
|
min_sandboxes=1,
|
|
max_sandboxes=1,
|
|
slots_per_sandbox=3,
|
|
)
|
|
|
|
backend = ModalToolBackend(config_obj)
|
|
await backend.start()
|
|
|
|
try:
|
|
slot = await backend.acquire("test-exec")
|
|
|
|
# Execute bash command
|
|
results_list = await backend.execute_batch([
|
|
(slot, "bash", {"command": "echo 'hello world'"})
|
|
])
|
|
|
|
assert len(results_list) == 1
|
|
result = results_list[0]
|
|
assert result.success, f"Command failed: {result.error}"
|
|
assert "hello world" in result.output
|
|
|
|
await backend.release(slot)
|
|
results.record_pass("test_atropos_execute_in_slot")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_execute_in_slot", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_execute_in_slot", str(e))
|
|
|
|
|
|
async def test_atropos_batched_execution(config: TestConfig):
|
|
"""Test batched parallel execution across multiple slots."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_batched_execution", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend()
|
|
|
|
config_obj = ModalSandboxConfig(
|
|
app_name="test-batch",
|
|
min_sandboxes=1,
|
|
max_sandboxes=2,
|
|
slots_per_sandbox=5,
|
|
)
|
|
|
|
backend = ModalToolBackend(config_obj)
|
|
await backend.start()
|
|
|
|
try:
|
|
# Acquire multiple slots
|
|
slots = []
|
|
for i in range(3):
|
|
slot = await backend.acquire(f"batch-{i}")
|
|
slots.append(slot)
|
|
|
|
# Execute batch of commands
|
|
start_time = time.time()
|
|
results_list = await backend.execute_batch([
|
|
(slots[0], "bash", {"command": "sleep 1 && echo 'slot0'"}),
|
|
(slots[1], "bash", {"command": "sleep 1 && echo 'slot1'"}),
|
|
(slots[2], "bash", {"command": "sleep 1 && echo 'slot2'"}),
|
|
])
|
|
elapsed = time.time() - start_time
|
|
|
|
# All should succeed
|
|
assert len(results_list) == 3
|
|
for i, result in enumerate(results_list):
|
|
assert result.success, f"Slot {i} failed: {result.error}"
|
|
assert f"slot{i}" in result.output
|
|
|
|
# Should be parallel - with Modal overhead, allow up to 5s for 3x 1-second sleeps
|
|
# (If sequential, would take > 3s just for the sleeps)
|
|
assert elapsed < 5.0, f"Batch execution took {elapsed}s, expected < 5.0s (parallel)"
|
|
|
|
for slot in slots:
|
|
await backend.release(slot)
|
|
|
|
results.record_pass("test_atropos_batched_execution")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_batched_execution", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_batched_execution", str(e))
|
|
|
|
|
|
async def test_atropos_slot_workspace_isolation(config: TestConfig):
|
|
"""Test workspace isolation between slots."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_slot_workspace_isolation", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend()
|
|
|
|
config_obj = ModalSandboxConfig(
|
|
app_name="test-isolation",
|
|
min_sandboxes=1,
|
|
max_sandboxes=1,
|
|
slots_per_sandbox=3,
|
|
)
|
|
|
|
backend = ModalToolBackend(config_obj)
|
|
await backend.start()
|
|
|
|
try:
|
|
slot1 = await backend.acquire("iso-1")
|
|
slot2 = await backend.acquire("iso-2")
|
|
|
|
# Write different content to each slot
|
|
await backend.execute_batch([
|
|
(slot1, "bash", {"command": "echo 'content1' > test.txt"}),
|
|
(slot2, "bash", {"command": "echo 'content2' > test.txt"}),
|
|
])
|
|
|
|
# Read back and verify isolation
|
|
results_list = await backend.execute_batch([
|
|
(slot1, "bash", {"command": "cat test.txt"}),
|
|
(slot2, "bash", {"command": "cat test.txt"}),
|
|
])
|
|
|
|
assert "content1" in results_list[0].output, f"Slot 1 content wrong: {results_list[0].output}"
|
|
assert "content2" in results_list[1].output, f"Slot 2 content wrong: {results_list[1].output}"
|
|
|
|
await backend.release(slot1)
|
|
await backend.release(slot2)
|
|
|
|
results.record_pass("test_atropos_slot_workspace_isolation")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_slot_workspace_isolation", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_slot_workspace_isolation", str(e))
|
|
|
|
|
|
async def test_atropos_workspace_reset(config: TestConfig):
|
|
"""Test workspace reset on slot release."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_workspace_reset", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend()
|
|
|
|
config_obj = ModalSandboxConfig(
|
|
app_name="test-reset",
|
|
min_sandboxes=1,
|
|
max_sandboxes=1,
|
|
slots_per_sandbox=2,
|
|
)
|
|
|
|
backend = ModalToolBackend(config_obj)
|
|
await backend.start()
|
|
|
|
try:
|
|
# Acquire, create file, release with reset
|
|
slot = await backend.acquire("reset-test")
|
|
slot_id = slot.slot_id
|
|
|
|
await backend.execute_batch([
|
|
(slot, "bash", {"command": "echo 'should be deleted' > test.txt"}),
|
|
])
|
|
|
|
await backend.release(slot, reset_workspace=True)
|
|
|
|
# Re-acquire (might get same slot)
|
|
slot2 = await backend.acquire("reset-test-2")
|
|
|
|
# Check file doesn't exist (or we got different slot)
|
|
result = await backend.execute_batch([
|
|
(slot2, "bash", {"command": "cat test.txt 2>/dev/null || echo 'file not found'"}),
|
|
])
|
|
|
|
# Either file not found OR different slot
|
|
output = result[0].output
|
|
if slot2.slot_id == slot_id:
|
|
assert "file not found" in output or not result[0].success, f"File should be deleted: {output}"
|
|
|
|
await backend.release(slot2)
|
|
results.record_pass("test_atropos_workspace_reset")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_workspace_reset", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_workspace_reset", str(e))
|
|
|
|
|
|
async def test_atropos_multi_profile(config: TestConfig):
|
|
"""Test multi-profile support with different resources."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_multi_profile", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _ = try_import_atropos_backend()
|
|
|
|
# Create backend with multiple profiles
|
|
backend = ModalToolBackend.with_profiles(
|
|
app_name="test-multiprofile",
|
|
profiles={
|
|
"default": ModalSandboxConfig(
|
|
name="default",
|
|
image="python:3.11",
|
|
cpu=1.0,
|
|
memory=2048,
|
|
min_sandboxes=1,
|
|
max_sandboxes=2,
|
|
slots_per_sandbox=3,
|
|
),
|
|
"compute": ModalSandboxConfig(
|
|
name="compute",
|
|
image="python:3.11",
|
|
cpu=2.0,
|
|
memory=4096,
|
|
min_sandboxes=0, # Start on demand
|
|
max_sandboxes=1,
|
|
slots_per_sandbox=2,
|
|
),
|
|
},
|
|
default_profile="default",
|
|
)
|
|
|
|
await backend.start(profiles_to_start=["default"])
|
|
|
|
try:
|
|
# List profiles
|
|
profiles = backend.list_profiles()
|
|
assert "default" in profiles
|
|
assert "compute" in profiles
|
|
assert profiles["default"]["active"] == True
|
|
assert profiles["compute"]["active"] == False # Not started yet
|
|
|
|
# Acquire from default profile
|
|
slot1 = await backend.acquire("traj-1", profile="default")
|
|
assert slot1 is not None
|
|
|
|
# Acquire from compute profile (should start it on demand)
|
|
slot2 = await backend.acquire("traj-2", profile="compute")
|
|
assert slot2 is not None
|
|
|
|
# Execute on both
|
|
results_list = await backend.execute_batch([
|
|
(slot1, "bash", {"command": "python --version"}),
|
|
(slot2, "bash", {"command": "python --version"}),
|
|
])
|
|
|
|
assert results_list[0].success
|
|
assert results_list[1].success
|
|
|
|
await backend.release(slot1)
|
|
await backend.release(slot2)
|
|
|
|
# Check status shows both profiles
|
|
status = backend.get_status()
|
|
assert "default" in status["pools"]
|
|
assert "compute" in status["pools"]
|
|
|
|
results.record_pass("test_atropos_multi_profile")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_multi_profile", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_multi_profile", str(e))
|
|
|
|
|
|
async def test_atropos_cross_profile_batch(config: TestConfig):
|
|
"""Test batched execution across different profiles."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_cross_profile_batch", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _ = try_import_atropos_backend()
|
|
|
|
backend = ModalToolBackend.with_profiles(
|
|
app_name="test-crossprofile",
|
|
profiles={
|
|
"profile-a": ModalSandboxConfig(
|
|
name="profile-a",
|
|
min_sandboxes=1,
|
|
max_sandboxes=1,
|
|
slots_per_sandbox=2,
|
|
),
|
|
"profile-b": ModalSandboxConfig(
|
|
name="profile-b",
|
|
min_sandboxes=1,
|
|
max_sandboxes=1,
|
|
slots_per_sandbox=2,
|
|
),
|
|
},
|
|
default_profile="profile-a",
|
|
)
|
|
|
|
await backend.start(profiles_to_start=["profile-a", "profile-b"])
|
|
|
|
try:
|
|
slot_a = await backend.acquire("traj-a", profile="profile-a")
|
|
slot_b = await backend.acquire("traj-b", profile="profile-b")
|
|
|
|
# Batch execute across profiles
|
|
results_list = await backend.execute_batch([
|
|
(slot_a, "bash", {"command": "echo 'from-a'"}),
|
|
(slot_b, "bash", {"command": "echo 'from-b'"}),
|
|
])
|
|
|
|
assert len(results_list) == 2
|
|
assert "from-a" in results_list[0].output
|
|
assert "from-b" in results_list[1].output
|
|
|
|
await backend.release(slot_a)
|
|
await backend.release(slot_b)
|
|
|
|
results.record_pass("test_atropos_cross_profile_batch")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_cross_profile_batch", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_cross_profile_batch", str(e))
|
|
|
|
|
|
async def test_atropos_artifact_helpers(config: TestConfig):
|
|
"""Test read_artifact, list_artifacts, archive_artifacts."""
|
|
if config.dry_run:
|
|
results.record_skip("test_atropos_artifact_helpers", "Dry run mode")
|
|
return
|
|
|
|
try:
|
|
try:
|
|
from atropos.backends.modal_backend import ModalToolBackend, ModalSandboxConfig
|
|
except (ImportError, ModuleNotFoundError):
|
|
ModalToolBackend, ModalSandboxConfig, _, _, _ = try_import_atropos_backend()
|
|
|
|
config_obj = ModalSandboxConfig(
|
|
app_name="test-artifacts",
|
|
min_sandboxes=1,
|
|
max_sandboxes=1,
|
|
slots_per_sandbox=2,
|
|
)
|
|
|
|
backend = ModalToolBackend(config_obj)
|
|
await backend.start()
|
|
|
|
try:
|
|
slot = await backend.acquire("artifact-test")
|
|
|
|
# Create test files
|
|
await backend.execute_batch([
|
|
(slot, "bash", {"command": "echo 'hello' > file1.txt && echo 'world' > file2.txt && mkdir subdir && echo 'nested' > subdir/file3.txt"}),
|
|
])
|
|
|
|
# Test read_artifact
|
|
content = await backend.read_artifact(slot, "file1.txt")
|
|
assert content["success"]
|
|
assert "hello" in content["content"]
|
|
|
|
# Test list_artifacts
|
|
listing = await backend.list_artifacts(slot, ".", recursive=False)
|
|
assert listing["success"]
|
|
assert "file1.txt" in listing["entries"] or any("file1" in e for e in listing["entries"])
|
|
|
|
# Test archive_artifacts
|
|
archive = await backend.archive_artifacts(slot, ".", archive_format="tar.gz")
|
|
assert archive["success"]
|
|
assert len(archive["archive_base64"]) > 0
|
|
|
|
await backend.release(slot)
|
|
results.record_pass("test_atropos_artifact_helpers")
|
|
finally:
|
|
await backend.stop(purge=True)
|
|
except ImportError as e:
|
|
results.record_skip("test_atropos_artifact_helpers", f"Requires atroposlib: pip install -e '.[atropos]'")
|
|
except Exception as e:
|
|
results.record_fail("test_atropos_artifact_helpers", str(e))
|
|
|
|
|
|
# =============================================================================
|
|
# Test Runner
|
|
# =============================================================================
|
|
|
|
def run_sync_tests(config: TestConfig):
|
|
"""Run synchronous tests."""
|
|
print("\n" + "="*60)
|
|
print("SYNCHRONOUS TESTS")
|
|
print("="*60)
|
|
|
|
if config.category in (None, "profiles"):
|
|
print("\n--- Profile Configuration Tests ---")
|
|
test_profile_loading_from_env()
|
|
test_profile_loading_from_yaml()
|
|
test_profile_defaults()
|
|
test_atropos_config_with_app_name()
|
|
|
|
if config.category in (None, "terminal"):
|
|
print("\n--- Terminal Tool Modal Tests ---")
|
|
test_terminal_modal_pool_manager_singleton()
|
|
test_terminal_create_environment_modal()
|
|
test_terminal_tool_profile_parameter(config)
|
|
test_terminal_modal_execute_simple(config)
|
|
test_terminal_modal_persistence(config)
|
|
test_terminal_modal_isolation(config)
|
|
|
|
|
|
async def run_async_tests(config: TestConfig):
|
|
"""Run asynchronous tests."""
|
|
print("\n" + "="*60)
|
|
print("ASYNCHRONOUS TESTS (Atropos Backend)")
|
|
print("="*60)
|
|
|
|
if config.category in (None, "atropos"):
|
|
print("\n--- Backend Lifecycle Tests ---")
|
|
await test_atropos_backend_lifecycle(config)
|
|
|
|
print("\n--- Slot Management Tests ---")
|
|
await test_atropos_slot_acquire_release(config)
|
|
await test_atropos_execute_in_slot(config)
|
|
await test_atropos_batched_execution(config)
|
|
await test_atropos_slot_workspace_isolation(config)
|
|
await test_atropos_workspace_reset(config)
|
|
|
|
print("\n--- Multi-Profile Tests ---")
|
|
await test_atropos_multi_profile(config)
|
|
await test_atropos_cross_profile_batch(config)
|
|
|
|
print("\n--- Artifact Helper Tests ---")
|
|
await test_atropos_artifact_helpers(config)
|
|
|
|
|
|
def main():
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Modal Integration Test Suite")
|
|
parser.add_argument("--dry-run", action="store_true", help="Skip tests requiring Modal")
|
|
parser.add_argument("--category", choices=["terminal", "atropos", "profiles"], help="Run specific category")
|
|
parser.add_argument("--verbose", action="store_true", default=True)
|
|
args = parser.parse_args()
|
|
|
|
config = TestConfig(
|
|
dry_run=args.dry_run,
|
|
verbose=args.verbose,
|
|
category=args.category,
|
|
)
|
|
|
|
print("="*60)
|
|
print("MODAL INTEGRATION TEST SUITE")
|
|
print("="*60)
|
|
print(f"Mode: {'DRY RUN' if config.dry_run else 'LIVE'}")
|
|
print(f"Category: {config.category or 'ALL'}")
|
|
|
|
# Run sync tests
|
|
run_sync_tests(config)
|
|
|
|
# Run async tests
|
|
asyncio.run(run_async_tests(config))
|
|
|
|
# Summary
|
|
success = results.summary()
|
|
sys.exit(0 if success else 1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|