Files
hermes-agent/tests/test_modal_stress.py
Jai Suphavadeeprasit eb2e6b73fe integration
2026-02-06 04:15:56 -05:00

924 lines
32 KiB
Python

#!/usr/bin/env python3
"""
Modal Integration Stress Tests & Full Integration Tests
This test suite includes:
1. Stress tests for Modal sandbox pools (concurrent load, scaling)
2. Atropos backend tests (requires atroposlib)
3. mini-swe-agent integration tests
Prerequisites:
# Install dev dependencies
pip install -e '.[dev,modal]'
# Install atroposlib for Atropos tests
pip install -e '.[atropos]'
# Clone mini-swe-agent (if not present)
git clone https://github.com/anthropics/mini-swe-agent.git mini-swe-agent
# Or as submodule:
git submodule add https://github.com/anthropics/mini-swe-agent.git mini-swe-agent
Run with:
# All tests
python tests/test_modal_stress.py
# Stress tests only
python tests/test_modal_stress.py --category stress
# Atropos tests only
python tests/test_modal_stress.py --category atropos
# Mini-swe-agent tests only
python tests/test_modal_stress.py --category miniswe
# Dry run (no Modal calls)
python tests/test_modal_stress.py --dry-run
"""
import asyncio
import json
import os
import sys
import time
import random
import traceback
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from typing import Dict, Any, List, Optional, Tuple
from dataclasses import dataclass
# Add parent to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
# =============================================================================
# Test Configuration
# =============================================================================
@dataclass
class StressTestConfig:
dry_run: bool = False
verbose: bool = True
category: Optional[str] = None
# Stress test parameters (reduced defaults for faster first-run)
concurrent_tasks: int = 3 # Start small - Modal cold starts are slow
total_operations: int = 10
max_sandboxes: int = 3
slots_per_sandbox: int = 3
# =============================================================================
# Test Results Tracking
# =============================================================================
class TestResults:
def __init__(self):
self.passed: List[str] = []
self.failed: List[Tuple[str, str]] = []
self.skipped: List[Tuple[str, str]] = []
self.metrics: Dict[str, Any] = {}
def record_pass(self, name: str, metrics: Optional[Dict] = None):
self.passed.append(name)
if metrics:
self.metrics[name] = metrics
print(f"{name}")
if metrics:
for k, v in metrics.items():
print(f" 📊 {k}: {v}")
def record_fail(self, name: str, error: str):
self.failed.append((name, error))
print(f"{name}: {error}")
def record_skip(self, name: str, reason: str):
self.skipped.append((name, reason))
print(f" ⏭️ {name}: {reason}")
def summary(self):
total = len(self.passed) + len(self.failed) + len(self.skipped)
print(f"\n{'='*70}")
print(f"STRESS TEST RESULTS: {len(self.passed)}/{total} passed")
print(f" Passed: {len(self.passed)}")
print(f" Failed: {len(self.failed)}")
print(f" Skipped: {len(self.skipped)}")
if self.failed:
print(f"\nFailed tests:")
for name, error in self.failed:
print(f" - {name}: {error}")
if self.metrics:
print(f"\nPerformance Metrics:")
for test, metrics in self.metrics.items():
print(f" {test}:")
for k, v in metrics.items():
print(f" - {k}: {v}")
return len(self.failed) == 0
results = TestResults()
# =============================================================================
# Helper: Atropos Import
# =============================================================================
def try_import_atropos():
"""Try importing Atropos backend components."""
try:
from atropos.backends.modal_backend import (
ModalToolBackend, ModalSandboxConfig,
_ModalMultiProfileManager
)
from atropos.slots.slot import Slot, SlotState
return ModalToolBackend, ModalSandboxConfig, Slot, SlotState
except (ImportError, ModuleNotFoundError) as e:
return None
def try_import_miniswe():
"""Try importing mini-swe-agent components."""
try:
# Check if mini-swe-agent path exists and has content
mini_swe_path = Path(__file__).parent.parent / "mini-swe-agent" / "src"
if mini_swe_path.exists() and list(mini_swe_path.iterdir()):
sys.path.insert(0, str(mini_swe_path))
import minisweagent
return minisweagent
return None
except (ImportError, ModuleNotFoundError) as e:
return None
# =============================================================================
# CATEGORY 1: Stress Tests (Terminal Tool)
# =============================================================================
def test_stress_concurrent_tasks(config: StressTestConfig):
"""Stress test: Multiple concurrent task_ids hitting the pool."""
if config.dry_run:
results.record_skip("test_stress_concurrent_tasks", "Dry run mode")
return
from tools.terminal_tool import terminal_tool, cleanup_vm
original_env = os.environ.get("TERMINAL_ENV")
os.environ["TERMINAL_ENV"] = "modal"
try:
num_tasks = config.concurrent_tasks
task_ids = [f"stress-concurrent-{i}-{int(time.time())}" for i in range(num_tasks)]
start_time = time.time()
errors = []
successes = 0
def run_task(task_id: str) -> Tuple[bool, str]:
try:
result = json.loads(terminal_tool(
f"echo 'Hello from {task_id}' && sleep 0.5",
task_id=task_id,
))
success = result["exit_code"] == 0
# IMPORTANT: Clean up immediately after task completes
# This releases the sandbox back to the pool for other tasks
try:
cleanup_vm(task_id)
except:
pass
if success:
return True, ""
# Include more details for debugging
error_detail = result.get("error", "no error message")
output = result.get("output", "")[:100] # First 100 chars
return False, f"Exit code: {result['exit_code']}, error: {error_detail}, output: {output}"
except Exception as e:
# Clean up even on failure
try:
cleanup_vm(task_id)
except:
pass
import traceback
return False, f"Exception: {str(e)}\n{traceback.format_exc()}"
# Run all tasks concurrently using threads
with ThreadPoolExecutor(max_workers=num_tasks) as executor:
futures = {executor.submit(run_task, tid): tid for tid in task_ids}
for future in as_completed(futures):
task_id = futures[future]
try:
success, error = future.result(timeout=60)
if success:
successes += 1
else:
errors.append(f"{task_id}: {error}")
except Exception as e:
errors.append(f"{task_id}: {str(e)}")
elapsed = time.time() - start_time
# No need for cleanup here - each task cleans up immediately
# Report
success_rate = successes / num_tasks * 100
if success_rate >= 90: # Allow 10% failure rate for stress test
results.record_pass("test_stress_concurrent_tasks", {
"concurrent_tasks": num_tasks,
"successes": successes,
"failures": len(errors),
"success_rate": f"{success_rate:.1f}%",
"total_time": f"{elapsed:.2f}s",
"avg_time_per_task": f"{elapsed/num_tasks:.2f}s",
})
else:
results.record_fail(
"test_stress_concurrent_tasks",
f"Success rate {success_rate:.1f}% < 90%. Errors: {errors[:3]}"
)
except Exception as e:
results.record_fail("test_stress_concurrent_tasks", str(e))
finally:
if original_env:
os.environ["TERMINAL_ENV"] = original_env
elif "TERMINAL_ENV" in os.environ:
del os.environ["TERMINAL_ENV"]
def test_stress_rapid_fire(config: StressTestConfig):
"""Stress test: Rapid sequential commands to same task_id."""
if config.dry_run:
results.record_skip("test_stress_rapid_fire", "Dry run mode")
return
from tools.terminal_tool import terminal_tool, cleanup_vm
original_env = os.environ.get("TERMINAL_ENV")
os.environ["TERMINAL_ENV"] = "modal"
try:
task_id = f"stress-rapid-{int(time.time())}"
num_commands = config.total_operations
start_time = time.time()
successes = 0
errors = []
for i in range(num_commands):
try:
result = json.loads(terminal_tool(f"echo {i}", task_id=task_id))
if result["exit_code"] == 0 and str(i) in result["output"]:
successes += 1
else:
errors.append(f"Command {i}: unexpected result")
except Exception as e:
errors.append(f"Command {i}: {str(e)}")
elapsed = time.time() - start_time
cleanup_vm(task_id)
success_rate = successes / num_commands * 100
commands_per_second = num_commands / elapsed
if success_rate >= 95:
results.record_pass("test_stress_rapid_fire", {
"total_commands": num_commands,
"successes": successes,
"success_rate": f"{success_rate:.1f}%",
"total_time": f"{elapsed:.2f}s",
"commands_per_second": f"{commands_per_second:.1f}",
})
else:
results.record_fail(
"test_stress_rapid_fire",
f"Success rate {success_rate:.1f}% < 95%"
)
except Exception as e:
results.record_fail("test_stress_rapid_fire", str(e))
finally:
if original_env:
os.environ["TERMINAL_ENV"] = original_env
elif "TERMINAL_ENV" in os.environ:
del os.environ["TERMINAL_ENV"]
def test_stress_pool_scaling(config: StressTestConfig):
"""Stress test: Force pool to scale up and down by running tasks in batches."""
if config.dry_run:
results.record_skip("test_stress_pool_scaling", "Dry run mode")
return
from tools.terminal_tool import terminal_tool, cleanup_vm, _ModalPoolManager
original_env = os.environ.get("TERMINAL_ENV")
os.environ["TERMINAL_ENV"] = "modal"
try:
# Run tasks in batches matching max_sandboxes to test pool reuse
# This verifies sandboxes can be acquired, used, released, and reused
batch_size = config.max_sandboxes
num_batches = 3
total_tasks = batch_size * num_batches
start_time = time.time()
successes = 0
for batch in range(num_batches):
task_ids = [f"stress-scale-{batch}-{i}-{int(time.time())}" for i in range(batch_size)]
def run_task(task_id: str):
try:
result = json.loads(terminal_tool(
"echo done", # Fast command to test scaling
task_id=task_id,
))
success = result["exit_code"] == 0
try:
cleanup_vm(task_id)
except:
pass
return success
except:
try:
cleanup_vm(task_id)
except:
pass
return False
# Run batch concurrently
with ThreadPoolExecutor(max_workers=batch_size) as executor:
batch_results = list(executor.map(run_task, task_ids))
successes += sum(batch_results)
elapsed = time.time() - start_time
# Check pool status
try:
manager = _ModalPoolManager.get_instance()
pool_status = manager.get_status() if hasattr(manager, 'get_status') else {}
except:
pool_status = {}
success_rate = successes / total_tasks * 100
if success_rate >= 80: # Allow some tolerance
results.record_pass("test_stress_pool_scaling", {
"total_tasks": total_tasks,
"num_batches": num_batches,
"batch_size": batch_size,
"successes": successes,
"success_rate": f"{success_rate:.1f}%",
"total_time": f"{elapsed:.2f}s",
"pool_status": pool_status,
})
else:
results.record_fail(
"test_stress_pool_scaling",
f"Success rate {success_rate:.1f}% < 80%"
)
except Exception as e:
results.record_fail("test_stress_pool_scaling", str(e))
finally:
if original_env:
os.environ["TERMINAL_ENV"] = original_env
elif "TERMINAL_ENV" in os.environ:
del os.environ["TERMINAL_ENV"]
def test_stress_large_output(config: StressTestConfig):
"""Stress test: Commands producing large output."""
if config.dry_run:
results.record_skip("test_stress_large_output", "Dry run mode")
return
from tools.terminal_tool import terminal_tool, cleanup_vm
original_env = os.environ.get("TERMINAL_ENV")
os.environ["TERMINAL_ENV"] = "modal"
try:
task_id = f"stress-large-{int(time.time())}"
# First verify basic connectivity with simple command
warmup = json.loads(terminal_tool("echo warmup", task_id=task_id))
if warmup["exit_code"] != 0:
results.record_fail(
"test_stress_large_output",
f"Warmup failed: {warmup.get('error', 'unknown')}"
)
return
# Generate output - use seq which is more portable
start_time = time.time()
result = json.loads(terminal_tool(
'seq 1 500 | while read i; do echo "Line $i: This is test content for large output"; done',
task_id=task_id,
timeout=60,
))
elapsed = time.time() - start_time
cleanup_vm(task_id)
output_size = len(result.get("output", ""))
error_msg = result.get("error", "")
if result["exit_code"] == 0 and output_size > 5000:
results.record_pass("test_stress_large_output", {
"output_size": f"{output_size:,} bytes",
"time": f"{elapsed:.2f}s",
"throughput": f"{output_size/elapsed/1024:.1f} KB/s" if elapsed > 0 else "N/A",
})
else:
results.record_fail(
"test_stress_large_output",
f"Exit code: {result['exit_code']}, output size: {output_size}, error: {error_msg}"
)
except Exception as e:
import traceback
results.record_fail("test_stress_large_output", f"{str(e)}\n{traceback.format_exc()}")
finally:
try:
cleanup_vm(task_id)
except:
pass
if original_env:
os.environ["TERMINAL_ENV"] = original_env
elif "TERMINAL_ENV" in os.environ:
del os.environ["TERMINAL_ENV"]
def test_stress_error_recovery(config: StressTestConfig):
"""Stress test: Commands that fail and verify sandbox continues working."""
if config.dry_run:
results.record_skip("test_stress_error_recovery", "Dry run mode")
return
from tools.terminal_tool import terminal_tool, cleanup_vm
original_env = os.environ.get("TERMINAL_ENV")
os.environ["TERMINAL_ENV"] = "modal"
try:
task_id = f"stress-error-{int(time.time())}"
# Run some failing commands
failing_commands = [
"exit 1",
"false",
"cat /nonexistent/file",
"command_that_does_not_exist",
]
for cmd in failing_commands:
result = json.loads(terminal_tool(cmd, task_id=task_id))
# These should fail but not crash
assert result["exit_code"] != 0 or result.get("error"), f"Expected failure for: {cmd}"
# Now run a command that should succeed
result = json.loads(terminal_tool("echo 'recovery success'", task_id=task_id))
cleanup_vm(task_id)
if result["exit_code"] == 0 and "recovery success" in result["output"]:
results.record_pass("test_stress_error_recovery", {
"failed_commands": len(failing_commands),
"recovery": "success",
})
else:
results.record_fail(
"test_stress_error_recovery",
f"Recovery failed: {result}"
)
except Exception as e:
results.record_fail("test_stress_error_recovery", str(e))
finally:
if original_env:
os.environ["TERMINAL_ENV"] = original_env
elif "TERMINAL_ENV" in os.environ:
del os.environ["TERMINAL_ENV"]
# =============================================================================
# CATEGORY 2: Atropos Backend Stress Tests
# =============================================================================
async def test_atropos_stress_slot_churn(config: StressTestConfig):
"""Atropos stress test: Rapid slot acquire/release cycles."""
if config.dry_run:
results.record_skip("test_atropos_stress_slot_churn", "Dry run mode")
return
imports = try_import_atropos()
if imports is None:
results.record_skip("test_atropos_stress_slot_churn", "Requires atroposlib")
return
ModalToolBackend, ModalSandboxConfig, _, _ = imports
try:
backend_config = ModalSandboxConfig(
app_name=f"stress-churn-{int(time.time())}",
min_sandboxes=1,
max_sandboxes=3,
slots_per_sandbox=5,
)
backend = ModalToolBackend(backend_config)
await backend.start()
try:
num_cycles = config.total_operations
start_time = time.time()
successes = 0
for i in range(num_cycles):
try:
slot = await backend.acquire(f"churn-{i}")
# Quick command
results_list = await backend.execute_batch([
(slot, "bash", {"command": f"echo {i}"})
])
if results_list[0].success:
successes += 1
await backend.release(slot, reset_workspace=(i % 5 == 0))
except Exception as e:
pass # Count as failure
elapsed = time.time() - start_time
success_rate = successes / num_cycles * 100
if success_rate >= 90:
results.record_pass("test_atropos_stress_slot_churn", {
"cycles": num_cycles,
"successes": successes,
"success_rate": f"{success_rate:.1f}%",
"total_time": f"{elapsed:.2f}s",
"cycles_per_second": f"{num_cycles/elapsed:.1f}",
})
else:
results.record_fail(
"test_atropos_stress_slot_churn",
f"Success rate {success_rate:.1f}% < 90%"
)
finally:
await backend.stop(purge=True)
except Exception as e:
results.record_fail("test_atropos_stress_slot_churn", str(e))
async def test_atropos_stress_parallel_batches(config: StressTestConfig):
"""Atropos stress test: Multiple parallel batch executions."""
if config.dry_run:
results.record_skip("test_atropos_stress_parallel_batches", "Dry run mode")
return
imports = try_import_atropos()
if imports is None:
results.record_skip("test_atropos_stress_parallel_batches", "Requires atroposlib")
return
ModalToolBackend, ModalSandboxConfig, _, _ = imports
try:
backend_config = ModalSandboxConfig(
app_name=f"stress-batch-{int(time.time())}",
min_sandboxes=2,
max_sandboxes=4,
slots_per_sandbox=5,
)
backend = ModalToolBackend(backend_config)
await backend.start()
try:
num_slots = 10
slots = []
# Acquire multiple slots
for i in range(num_slots):
slot = await backend.acquire(f"batch-{i}")
slots.append(slot)
# Run multiple batches in parallel
start_time = time.time()
num_batches = 5
async def run_batch(batch_id: int):
requests = [
(slot, "bash", {"command": f"echo 'batch{batch_id}-slot{i}'"})
for i, slot in enumerate(slots)
]
return await backend.execute_batch(requests)
batch_tasks = [run_batch(i) for i in range(num_batches)]
all_results = await asyncio.gather(*batch_tasks)
elapsed = time.time() - start_time
# Count successes
total_commands = num_batches * num_slots
successes = sum(
1 for batch_result in all_results
for r in batch_result
if r.success
)
# Release slots
for slot in slots:
await backend.release(slot)
success_rate = successes / total_commands * 100
if success_rate >= 90:
results.record_pass("test_atropos_stress_parallel_batches", {
"batches": num_batches,
"slots": num_slots,
"total_commands": total_commands,
"successes": successes,
"success_rate": f"{success_rate:.1f}%",
"total_time": f"{elapsed:.2f}s",
"commands_per_second": f"{total_commands/elapsed:.1f}",
})
else:
results.record_fail(
"test_atropos_stress_parallel_batches",
f"Success rate {success_rate:.1f}% < 90%"
)
finally:
await backend.stop(purge=True)
except Exception as e:
results.record_fail("test_atropos_stress_parallel_batches", str(e))
async def test_atropos_stress_multi_profile_load(config: StressTestConfig):
"""Atropos stress test: Load across multiple profiles."""
if config.dry_run:
results.record_skip("test_atropos_stress_multi_profile_load", "Dry run mode")
return
imports = try_import_atropos()
if imports is None:
results.record_skip("test_atropos_stress_multi_profile_load", "Requires atroposlib")
return
ModalToolBackend, ModalSandboxConfig, _, _ = imports
try:
backend = ModalToolBackend.with_profiles(
app_name=f"stress-multiprofile-{int(time.time())}",
profiles={
"cpu-light": ModalSandboxConfig(
name="cpu-light",
cpu=0.5,
memory=1024,
min_sandboxes=1,
max_sandboxes=2,
slots_per_sandbox=5,
),
"cpu-heavy": ModalSandboxConfig(
name="cpu-heavy",
cpu=2.0,
memory=4096,
min_sandboxes=0,
max_sandboxes=2,
slots_per_sandbox=3,
),
}
)
await backend.start(profiles_to_start=["cpu-light", "cpu-heavy"])
try:
num_tasks_per_profile = 5
slots = []
# Acquire from both profiles
for i in range(num_tasks_per_profile):
light_slot = await backend.acquire(f"light-{i}", profile="cpu-light")
heavy_slot = await backend.acquire(f"heavy-{i}", profile="cpu-heavy")
slots.append((light_slot, "cpu-light"))
slots.append((heavy_slot, "cpu-heavy"))
# Execute batch across all profiles
start_time = time.time()
requests = [
(slot, "bash", {"command": f"echo 'profile={profile}'"})
for slot, profile in slots
]
batch_results = await backend.execute_batch(requests)
elapsed = time.time() - start_time
successes = sum(1 for r in batch_results if r.success)
# Release all
for slot, _ in slots:
await backend.release(slot)
status = backend.get_status()
success_rate = successes / len(slots) * 100
if success_rate >= 90:
results.record_pass("test_atropos_stress_multi_profile_load", {
"profiles": 2,
"tasks_per_profile": num_tasks_per_profile,
"total_tasks": len(slots),
"successes": successes,
"success_rate": f"{success_rate:.1f}%",
"time": f"{elapsed:.2f}s",
"status": status,
})
else:
results.record_fail(
"test_atropos_stress_multi_profile_load",
f"Success rate {success_rate:.1f}% < 90%"
)
finally:
await backend.stop(purge=True)
except Exception as e:
results.record_fail("test_atropos_stress_multi_profile_load", str(e))
# =============================================================================
# CATEGORY 3: Mini-SWE-Agent Integration Tests
# =============================================================================
def test_miniswe_environment_available():
"""Check if mini-swe-agent is properly set up."""
mini_swe_path = Path(__file__).parent.parent / "mini-swe-agent" / "src"
if not mini_swe_path.exists():
results.record_skip(
"test_miniswe_environment_available",
"mini-swe-agent not found. Run: git clone https://github.com/anthropics/mini-swe-agent.git mini-swe-agent"
)
return
if not list(mini_swe_path.iterdir()):
results.record_skip(
"test_miniswe_environment_available",
"mini-swe-agent directory is empty. Run: git submodule update --init"
)
return
miniswe = try_import_miniswe()
if miniswe is None:
results.record_fail(
"test_miniswe_environment_available",
"Failed to import minisweagent module"
)
return
results.record_pass("test_miniswe_environment_available", {
"path": str(mini_swe_path),
"module": miniswe.__name__,
})
def test_miniswe_modal_backend(config: StressTestConfig):
"""Test mini-swe-agent with Modal backend."""
if config.dry_run:
results.record_skip("test_miniswe_modal_backend", "Dry run mode")
return
miniswe = try_import_miniswe()
if miniswe is None:
results.record_skip(
"test_miniswe_modal_backend",
"mini-swe-agent not available"
)
return
try:
# Check if ModalEnvironment exists in minisweagent
if not hasattr(miniswe, 'ModalEnvironment'):
results.record_skip(
"test_miniswe_modal_backend",
"minisweagent.ModalEnvironment not found"
)
return
# Create Modal environment
env = miniswe.ModalEnvironment(
image="python:3.11",
timeout=60,
)
# Execute a command
result = env.execute("echo 'Hello from mini-swe-agent Modal'")
env.cleanup()
if "Hello from mini-swe-agent Modal" in str(result):
results.record_pass("test_miniswe_modal_backend")
else:
results.record_fail(
"test_miniswe_modal_backend",
f"Unexpected result: {result}"
)
except Exception as e:
results.record_fail("test_miniswe_modal_backend", str(e))
# =============================================================================
# Test Runner
# =============================================================================
def run_sync_tests(config: StressTestConfig):
"""Run synchronous tests."""
if config.category in (None, "stress"):
print("\n" + "="*70)
print("STRESS TESTS (Terminal Tool)")
print("="*70)
test_stress_concurrent_tasks(config)
test_stress_rapid_fire(config)
test_stress_pool_scaling(config)
test_stress_large_output(config)
test_stress_error_recovery(config)
if config.category in (None, "miniswe"):
print("\n" + "="*70)
print("MINI-SWE-AGENT INTEGRATION TESTS")
print("="*70)
test_miniswe_environment_available()
test_miniswe_modal_backend(config)
async def run_async_tests(config: StressTestConfig):
"""Run asynchronous tests."""
if config.category in (None, "atropos"):
print("\n" + "="*70)
print("ATROPOS BACKEND STRESS TESTS")
print("="*70)
await test_atropos_stress_slot_churn(config)
await test_atropos_stress_parallel_batches(config)
await test_atropos_stress_multi_profile_load(config)
def main():
import argparse
parser = argparse.ArgumentParser(description="Modal Stress Test Suite")
parser.add_argument("--dry-run", action="store_true", help="Skip tests requiring Modal")
parser.add_argument("--category", choices=["stress", "atropos", "miniswe"], help="Run specific category")
parser.add_argument("--concurrent", type=int, default=10, help="Number of concurrent tasks")
parser.add_argument("--operations", type=int, default=50, help="Total operations for stress tests")
parser.add_argument("--verbose", action="store_true", default=True)
args = parser.parse_args()
config = StressTestConfig(
dry_run=args.dry_run,
verbose=args.verbose,
category=args.category,
concurrent_tasks=args.concurrent,
total_operations=args.operations,
)
print("="*70)
print("MODAL STRESS & INTEGRATION TEST SUITE")
print("="*70)
print(f"Mode: {'DRY RUN' if config.dry_run else 'LIVE'}")
print(f"Category: {config.category or 'ALL'}")
print(f"Concurrent tasks: {config.concurrent_tasks}")
print(f"Total operations: {config.total_operations}")
# Run sync tests
run_sync_tests(config)
# Run async tests
asyncio.run(run_async_tests(config))
# Summary
success = results.summary()
sys.exit(0 if success else 1)
if __name__ == "__main__":
main()