Files
hermes-agent/tests/tools/test_vercel_sandbox_environment.py
Scott Trinh 5a1d4f6804 feat: add Vercel Sandbox backend
Adds Vercel Sandbox as a supported Hermes terminal backend alongside
existing providers (Local, Docker, Modal, SSH, Daytona, Singularity).

Uses the Vercel Python SDK to create/manage cloud microVMs, supports
snapshot-based filesystem persistence keyed by task_id, and integrates
with the existing BaseEnvironment shell contract and FileSyncManager
for credential/skill syncing.

Based on #17127 by @scotttrinh, cherry-picked onto current main.
2026-04-29 07:22:33 -07:00

624 lines
21 KiB
Python

"""Unit tests for the Vercel Sandbox terminal backend."""
from __future__ import annotations
import importlib
import io
import re
import sys
import tarfile
import threading
import types
from dataclasses import dataclass
from enum import StrEnum
from pathlib import Path
from types import SimpleNamespace
import pytest
class _FakeRunResult:
def __init__(self, output: str | bytes = "", exit_code: int = 0):
self._output = output
self.exit_code = exit_code
def output(self) -> str | bytes:
return self._output
class _FakeSandboxStatus(StrEnum):
PENDING = "pending"
RUNNING = "running"
STOPPING = "stopping"
STOPPED = "stopped"
FAILED = "failed"
ABORTED = "aborted"
SNAPSHOTTING = "snapshotting"
@dataclass(frozen=True)
class _FakeSnapshot:
snapshot_id: str
class _FakeSandbox:
def __init__(
self,
*,
cwd: str = "/vercel/sandbox",
home: str = "/home/vercel",
status: _FakeSandboxStatus = _FakeSandboxStatus.RUNNING,
):
self.sandbox = SimpleNamespace(cwd=cwd, id="sb-123")
self.status = status
self.home = home
self.closed = 0
self.client = SimpleNamespace(close=self._close)
self.run_command_calls: list[tuple[str, list[str], dict]] = []
self.run_command_side_effects: list[object] = []
self.write_files_calls: list[list[dict[str, object]]] = []
self.write_files_side_effects: list[object] = []
self.download_file_calls: list[tuple[str, Path]] = []
self.download_file_side_effects: list[object] = []
self.download_file_content = b""
self.stop_calls: list[tuple[tuple, dict]] = []
self.snapshot_calls: list[tuple[tuple, dict]] = []
self.snapshot_side_effects: list[object] = []
self.snapshot_id = "snap_default"
self.refresh_calls = 0
self.wait_for_status_calls: list[tuple[object, object, object]] = []
self.wait_for_status_side_effects: list[object] = []
def _close(self) -> None:
self.closed += 1
def refresh(self) -> None:
self.refresh_calls += 1
def wait_for_status(self, status: _FakeSandboxStatus | str, *, timeout, poll_interval) -> None:
self.wait_for_status_calls.append((status, timeout, poll_interval))
if self.wait_for_status_side_effects:
effect = self.wait_for_status_side_effects.pop(0)
if isinstance(effect, Exception):
raise effect
if callable(effect):
effect(status, timeout, poll_interval)
return
self.status = _FakeSandboxStatus(status)
def run_command(self, cmd: str, args: list[str] | None = None, **kwargs):
args = list(args or [])
self.run_command_calls.append((cmd, args, kwargs))
if self.run_command_side_effects:
effect = self.run_command_side_effects.pop(0)
if isinstance(effect, Exception):
raise effect
if callable(effect):
return effect(cmd, args, kwargs)
return effect
script = args[1] if len(args) > 1 else ""
if 'printf %s "$HOME"' in script:
return _FakeRunResult(self.home)
return _FakeRunResult("")
def write_files(self, files: list[dict[str, object]]) -> None:
self.write_files_calls.append(files)
if self.write_files_side_effects:
effect = self.write_files_side_effects.pop(0)
if isinstance(effect, Exception):
raise effect
if callable(effect):
effect(files)
def download_file(self, remote_path: str, local_path) -> str:
destination = Path(local_path)
self.download_file_calls.append((remote_path, destination))
if self.download_file_side_effects:
effect = self.download_file_side_effects.pop(0)
if isinstance(effect, Exception):
raise effect
if callable(effect):
return effect(remote_path, destination)
destination.write_bytes(self.download_file_content)
return str(destination.resolve())
def stop(self, *args, **kwargs) -> None:
self.stop_calls.append((args, kwargs))
def snapshot(self, *args, **kwargs):
self.snapshot_calls.append((args, kwargs))
if self.snapshot_side_effects:
effect = self.snapshot_side_effects.pop(0)
if isinstance(effect, Exception):
raise effect
if callable(effect):
return effect(*args, **kwargs)
if isinstance(effect, str):
return _FakeSnapshot(effect)
return effect
return _FakeSnapshot(self.snapshot_id)
@dataclass(frozen=True)
class _FakeResources:
vcpus: float | None = None
memory: int | None = None
@dataclass(frozen=True)
class _FakeWriteFile:
path: str
content: bytes
class _FakeSDK:
def __init__(self):
self.create_kwargs: list[dict[str, object]] = []
self.create_side_effects: list[object] = []
self.sandboxes: list[_FakeSandbox] = []
@property
def current(self) -> _FakeSandbox:
return self.sandboxes[-1]
def create(self, **kwargs):
self.create_kwargs.append(kwargs)
if self.create_side_effects:
effect = self.create_side_effects.pop(0)
if isinstance(effect, Exception):
raise effect
if isinstance(effect, _FakeSandbox):
self.sandboxes.append(effect)
return effect
sandbox = _FakeSandbox()
self.sandboxes.append(sandbox)
return sandbox
def _cwd_result(body: str = "", *, cwd: str = "/vercel/sandbox", exit_code: int = 0):
def _result(_cmd: str, args: list[str], _kwargs: dict):
script = args[1] if len(args) > 1 else ""
match = re.search(r"__HERMES_CWD_[A-Za-z0-9]+__", script)
marker = match.group(0) if match else "__HERMES_CWD_MISSING__"
prefix = f"{body}\n\n" if body else "\n"
return _FakeRunResult(f"{prefix}{marker}{cwd}{marker}\n", exit_code)
return _result
def _tar_bytes(entries: dict[str, bytes]) -> bytes:
buffer = io.BytesIO()
with tarfile.open(fileobj=buffer, mode="w") as tar:
for name, content in entries.items():
info = tarfile.TarInfo(name)
info.size = len(content)
tar.addfile(info, io.BytesIO(content))
return buffer.getvalue()
@pytest.fixture()
def vercel_sdk(monkeypatch):
fake_sdk = _FakeSDK()
sandbox_mod = types.ModuleType("vercel.sandbox")
sandbox_mod.Sandbox = types.SimpleNamespace(create=fake_sdk.create)
sandbox_mod.Resources = _FakeResources
sandbox_mod.WriteFile = _FakeWriteFile
sandbox_mod.SandboxStatus = _FakeSandboxStatus
vercel_mod = types.ModuleType("vercel")
vercel_mod.sandbox = sandbox_mod
monkeypatch.setitem(sys.modules, "vercel", vercel_mod)
monkeypatch.setitem(sys.modules, "vercel.sandbox", sandbox_mod)
return fake_sdk
@pytest.fixture()
def vercel_module(vercel_sdk, monkeypatch):
monkeypatch.setattr("tools.environments.base.is_interrupted", lambda: False)
monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kwargs: [])
monkeypatch.setattr("tools.credential_files.iter_cache_files", lambda **kwargs: [])
module = importlib.import_module("tools.environments.vercel_sandbox")
return importlib.reload(module)
@pytest.fixture()
def make_env(vercel_module, request):
envs = []
def _cleanup_envs():
for env in envs:
env._sync_manager = None
env.cleanup()
request.addfinalizer(_cleanup_envs)
def _factory(**kwargs):
kwargs.setdefault("runtime", "node22")
kwargs.setdefault("cwd", vercel_module.DEFAULT_VERCEL_CWD)
kwargs.setdefault("timeout", 30)
kwargs.setdefault("task_id", "task-123")
env = vercel_module.VercelSandboxEnvironment(**kwargs)
envs.append(env)
return env
return _factory
class TestStartup:
def test_default_cwd_tracks_remote_workspace_root(self, make_env, vercel_sdk):
sandbox = _FakeSandbox(cwd="/workspace")
vercel_sdk.create_side_effects.append(sandbox)
env = make_env()
assert env.cwd == "/workspace"
def test_tilde_cwd_resolves_against_remote_home(self, make_env, vercel_sdk):
sandbox = _FakeSandbox(home="/home/custom")
vercel_sdk.create_side_effects.append(sandbox)
env = make_env(cwd="~")
assert env.cwd == "/home/custom"
def test_pending_sandbox_timeout_raises_descriptive_error(
self, make_env, vercel_sdk
):
sandbox = _FakeSandbox(status=_FakeSandboxStatus.PENDING)
sandbox.wait_for_status_side_effects.append(TimeoutError("still pending"))
vercel_sdk.create_side_effects.append(sandbox)
with pytest.raises(RuntimeError, match="Sandbox did not reach running state"):
make_env()
class TestFileSync:
def test_initial_sync_uploads_managed_files_under_remote_home(
self, make_env, vercel_sdk, monkeypatch, tmp_path
):
src = tmp_path / "token.txt"
src.write_text("secret-token")
monkeypatch.setattr(
"tools.credential_files.get_credential_file_mounts",
lambda: [
{
"host_path": str(src),
"container_path": "/root/.hermes/credentials/token.txt",
}
],
)
monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kwargs: [])
monkeypatch.setattr("tools.credential_files.iter_cache_files", lambda **kwargs: [])
make_env()
uploaded = vercel_sdk.current.write_files_calls[0]
assert uploaded == [
{
"path": "/home/vercel/.hermes/credentials/token.txt",
"content": b"secret-token",
}
]
def test_execute_resyncs_changed_managed_files(
self, make_env, vercel_sdk, monkeypatch, tmp_path
):
src = tmp_path / "token.txt"
src.write_text("secret-token")
monkeypatch.setattr(
"tools.credential_files.get_credential_file_mounts",
lambda: [
{
"host_path": str(src),
"container_path": "/root/.hermes/credentials/token.txt",
}
],
)
monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kwargs: [])
monkeypatch.setattr("tools.credential_files.iter_cache_files", lambda **kwargs: [])
env = make_env()
src.write_text("updated-secret-token")
monkeypatch.setenv("HERMES_FORCE_FILE_SYNC", "1")
vercel_sdk.current.run_command_side_effects.append(_cwd_result("hello"))
result = env.execute("echo hello")
assert result == {"output": "hello\n", "returncode": 0}
assert vercel_sdk.current.write_files_calls[-1] == [
{
"path": "/home/vercel/.hermes/credentials/token.txt",
"content": b"updated-secret-token",
}
]
def test_cleanup_syncs_back_snapshots_closes_and_is_idempotent(
self, make_env, vercel_module, vercel_sdk, monkeypatch, tmp_path
):
hermes_home = tmp_path / ".hermes"
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
src = tmp_path / "token.txt"
src.write_text("host-token")
monkeypatch.setattr(
"tools.credential_files.get_credential_file_mounts",
lambda: [
{
"host_path": str(src),
"container_path": "/root/.hermes/credentials/token.txt",
}
],
)
monkeypatch.setattr(
"tools.credential_files.iter_skills_files",
lambda **kwargs: [],
)
monkeypatch.setattr(
"tools.credential_files.iter_cache_files",
lambda **kwargs: [],
)
env = make_env()
sandbox = vercel_sdk.current
sandbox.snapshot_id = "snap_cleanup"
vercel_sdk.current.download_file_content = _tar_bytes(
{
"home/vercel/.hermes/credentials/token.txt": b"remote-token",
"home/vercel/.hermes/credentials/new.txt": b"new-remote",
"home/vercel/.hermes/unmapped/skip.txt": b"skip",
}
)
env.cleanup()
env.cleanup()
assert src.read_text() == "remote-token"
assert (tmp_path / "new.txt").read_text() == "new-remote"
assert not (tmp_path / "skip.txt").exists()
assert len(sandbox.snapshot_calls) == 1
assert len(sandbox.stop_calls) == 1 # always stop after snapshot to avoid resource leaks
assert sandbox.closed == 1
assert vercel_module._load_snapshots() == {"task-123": "snap_cleanup"}
def test_cleanup_sync_back_failure_from_download_does_not_block_snapshot(
self, make_env, vercel_sdk, monkeypatch, tmp_path
):
src = tmp_path / "token.txt"
src.write_text("host-token")
monkeypatch.setattr(
"tools.credential_files.get_credential_file_mounts",
lambda: [
{
"host_path": str(src),
"container_path": "/root/.hermes/credentials/token.txt",
}
],
)
monkeypatch.setattr(
"tools.credential_files.iter_skills_files",
lambda **kwargs: [],
)
monkeypatch.setattr(
"tools.credential_files.iter_cache_files",
lambda **kwargs: [],
)
env = make_env()
sandbox = vercel_sdk.current
sandbox.run_command_side_effects.extend(
[
_FakeRunResult("tar failed", exit_code=2),
_FakeRunResult(""),
_FakeRunResult("tar failed", exit_code=2),
_FakeRunResult(""),
_FakeRunResult("tar failed", exit_code=2),
_FakeRunResult(""),
]
)
monkeypatch.setattr("tools.environments.file_sync.time.sleep", lambda _delay: None)
env.cleanup()
assert src.read_text() == "host-token"
assert len(sandbox.snapshot_calls) == 1
assert sandbox.closed == 1
assert len(sandbox.download_file_calls) == 0
class TestExecute:
def test_execute_runs_command_from_workspace_root_and_updates_cwd(
self, make_env, vercel_sdk
):
env = make_env()
vercel_sdk.current.run_command_side_effects.append(
_cwd_result("/tmp", cwd="/tmp")
)
result = env.execute("pwd", cwd="/tmp")
assert result == {"output": "/tmp\n", "returncode": 0}
assert env.cwd == "/tmp"
cmd, args, kwargs = vercel_sdk.current.run_command_calls[-1]
assert cmd == "bash"
assert args[0] == "-c"
assert "cd /tmp" in args[1]
assert kwargs["cwd"] == "/vercel/sandbox"
@pytest.mark.parametrize(
("make_unhealthy", "label"),
[
(
lambda sandbox: setattr(
sandbox, "status", _FakeSandboxStatus.STOPPED
),
"terminal state",
),
(
lambda sandbox: setattr(
sandbox,
"refresh",
lambda: (_ for _ in ()).throw(RuntimeError("refresh failed")),
),
"refresh failure",
),
],
ids=["terminal-state", "refresh-failure"],
)
def test_execute_recreates_unhealthy_sandbox_before_running_command(
self, make_env, vercel_sdk, make_unhealthy, label
):
env = make_env()
original = vercel_sdk.current
make_unhealthy(original)
replacement = _FakeSandbox()
replacement.run_command_side_effects.extend(
[
_FakeRunResult(replacement.home),
_cwd_result("hello"),
]
)
vercel_sdk.create_side_effects.append(replacement)
result = env.execute("echo hello")
assert result == {"output": "hello\n", "returncode": 0}, label
assert original.closed == 1
assert vercel_sdk.current is replacement
def test_run_bash_handle_uses_captured_sandbox_for_exec_and_cancel(
self, make_env
):
env = make_env()
original = env._sandbox
assert original is not None
replacement = _FakeSandbox()
started = threading.Event()
release = threading.Event()
def blocking_command(_cmd: str, _args: list[str], _kwargs: dict):
started.set()
release.wait(timeout=5)
return _FakeRunResult("done")
original.run_command_side_effects.append(blocking_command)
handle = env._run_bash("echo done")
assert started.wait(timeout=1)
env._sandbox = replacement
handle.kill()
release.set()
assert handle.wait(timeout=2) == 0
assert len(original.stop_calls) == 1
assert replacement.stop_calls == []
cmd, args, kwargs = original.run_command_calls[-1]
assert cmd == "bash"
assert args == ["-c", "echo done"]
assert kwargs["cwd"] == "/vercel/sandbox"
class TestSnapshotPersistence:
def test_create_restores_from_saved_snapshot(
self, make_env, vercel_module, vercel_sdk, monkeypatch, tmp_path
):
hermes_home = tmp_path / ".hermes"
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
vercel_module._store_snapshot("task-123", "snap_saved")
restored = _FakeSandbox(cwd="/restored")
vercel_sdk.create_side_effects.append(restored)
env = make_env()
assert env.cwd == "/restored"
assert vercel_sdk.create_kwargs[0]["source"] == {
"type": "snapshot",
"snapshot_id": "snap_saved",
}
assert vercel_module._load_snapshots() == {"task-123": "snap_saved"}
def test_restore_failure_prunes_snapshot_and_falls_back_to_fresh_sandbox(
self, make_env, vercel_module, vercel_sdk, monkeypatch, tmp_path
):
hermes_home = tmp_path / ".hermes"
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
vercel_module._store_snapshot("task-123", "snap_stale")
fresh = _FakeSandbox(cwd="/fresh")
vercel_sdk.create_side_effects.extend(
[RuntimeError("snapshot missing"), fresh]
)
env = make_env()
assert env.cwd == "/fresh"
assert vercel_sdk.create_kwargs[0]["source"] == {
"type": "snapshot",
"snapshot_id": "snap_stale",
}
assert "source" not in vercel_sdk.create_kwargs[1]
assert vercel_module._load_snapshots() == {}
def test_cleanup_stops_when_snapshot_fails_without_storing_metadata(
self, make_env, vercel_module, vercel_sdk, monkeypatch, tmp_path
):
hermes_home = tmp_path / ".hermes"
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
env = make_env()
sandbox = vercel_sdk.current
sandbox.snapshot_side_effects.append(RuntimeError("snapshot failed"))
env.cleanup()
assert len(sandbox.snapshot_calls) == 1
assert len(sandbox.stop_calls) == 1
assert sandbox.closed == 1
assert vercel_module._load_snapshots() == {}
def test_non_persistent_cleanup_stops_without_snapshot(
self, make_env, vercel_module, vercel_sdk, monkeypatch, tmp_path
):
hermes_home = tmp_path / ".hermes"
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
env = make_env(persistent_filesystem=False)
sandbox = vercel_sdk.current
env.cleanup()
assert sandbox.snapshot_calls == []
assert len(sandbox.stop_calls) == 1
assert sandbox.closed == 1
assert vercel_module._load_snapshots() == {}
def test_persistent_cleanup_without_task_id_stops_without_snapshot(
self, make_env, vercel_module, vercel_sdk, monkeypatch, tmp_path
):
hermes_home = tmp_path / ".hermes"
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
env = make_env(task_id="")
sandbox = vercel_sdk.current
env.cleanup()
assert sandbox.snapshot_calls == []
assert len(sandbox.stop_calls) == 1
assert sandbox.closed == 1
assert vercel_module._load_snapshots() == {}
class TestCleanup:
def test_cleanup_continues_when_sync_back_raises(self, make_env, vercel_sdk):
env = make_env()
sandbox = vercel_sdk.current
class FailingSyncManager:
def sync_back(self):
raise RuntimeError("download failed")
env._sync_manager = FailingSyncManager()
env.cleanup()
assert len(sandbox.snapshot_calls) == 1
assert sandbox.closed == 1