Files
hermes-agent/tests/tools/test_daytona_environment.py

448 lines
17 KiB
Python
Raw Normal View History

"""Unit tests for the Daytona cloud sandbox environment backend."""
import threading
from types import SimpleNamespace
from unittest.mock import MagicMock, patch, PropertyMock
import pytest
# ---------------------------------------------------------------------------
# Helpers to build mock Daytona SDK objects
# ---------------------------------------------------------------------------
def _make_exec_response(result="", exit_code=0):
return SimpleNamespace(result=result, exit_code=exit_code)
def _make_sandbox(sandbox_id="sb-123", state="started"):
sb = MagicMock()
sb.id = sandbox_id
sb.state = state
sb.process.exec.return_value = _make_exec_response()
return sb
def _patch_daytona_imports(monkeypatch):
"""Patch the daytona SDK so DaytonaEnvironment can be imported without it."""
import types as _types
import enum
class _SandboxState(str, enum.Enum):
STARTED = "started"
STOPPED = "stopped"
ARCHIVED = "archived"
ERROR = "error"
daytona_mod = _types.ModuleType("daytona")
daytona_mod.Daytona = MagicMock
daytona_mod.CreateSandboxFromImageParams = MagicMock
daytona_mod.DaytonaError = type("DaytonaError", (Exception,), {})
daytona_mod.Resources = MagicMock(name="Resources")
daytona_mod.SandboxState = _SandboxState
monkeypatch.setitem(__import__("sys").modules, "daytona", daytona_mod)
return daytona_mod
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture()
def daytona_sdk(monkeypatch):
"""Provide a mock daytona SDK module and return it for assertions."""
return _patch_daytona_imports(monkeypatch)
@pytest.fixture()
def make_env(daytona_sdk, monkeypatch):
"""Factory that creates a DaytonaEnvironment with a mocked SDK."""
# Prevent is_interrupted from interfering
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False)
# Prevent skills/credential sync from consuming mock exec calls
monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None)
monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kw: [])
def _factory(
sandbox=None,
get_side_effect=None,
list_return=None,
home_dir="/root",
persistent=True,
**kwargs,
):
sandbox = sandbox or _make_sandbox()
# Mock the $HOME detection
sandbox.process.exec.return_value = _make_exec_response(result=home_dir)
mock_client = MagicMock()
mock_client.create.return_value = sandbox
if get_side_effect is not None:
mock_client.get.side_effect = get_side_effect
else:
# Default: no existing sandbox found via get()
mock_client.get.side_effect = daytona_sdk.DaytonaError("not found")
# Default: no legacy sandbox found via list()
if list_return is not None:
mock_client.list.return_value = list_return
else:
mock_client.list.return_value = SimpleNamespace(items=[])
daytona_sdk.Daytona = MagicMock(return_value=mock_client)
from tools.environments.daytona import DaytonaEnvironment
kwargs.setdefault("disk", 10240)
env = DaytonaEnvironment(
image="test-image:latest",
persistent_filesystem=persistent,
**kwargs,
)
env._mock_client = mock_client # expose for assertions
return env
return _factory
# ---------------------------------------------------------------------------
# Constructor / cwd resolution
# ---------------------------------------------------------------------------
class TestCwdResolution:
def test_default_cwd_resolves_home(self, make_env):
env = make_env(home_dir="/home/testuser")
assert env.cwd == "/home/testuser"
def test_tilde_cwd_resolves_home(self, make_env):
env = make_env(cwd="~", home_dir="/home/testuser")
assert env.cwd == "/home/testuser"
def test_explicit_cwd_not_overridden(self, make_env):
"""Explicit cwd should be set before init_session.
After init_session(), the cwdfile may update cwd to whatever the
login shell reports. We make the mock return /workspace for the
cwdfile read so init_session doesn't override the explicit cwd.
"""
sb = _make_sandbox()
# Return /workspace for all exec calls including init_session's
# snapshot bootstrap and cwdfile reads
sb.process.exec.return_value = _make_exec_response(result="/workspace")
env = make_env(sandbox=sb, cwd="/workspace", home_dir="/workspace")
assert env.cwd == "/workspace"
def test_home_detection_failure_keeps_default_cwd(self, make_env):
"""When $HOME detection fails, cwd falls back to constructor default.
init_session() still runs but its cwdfile read returns empty,
so cwd is not overwritten.
"""
sb = _make_sandbox()
call_count = {"n": 0}
def _exec_side_effect(*args, **kwargs):
call_count["n"] += 1
if call_count["n"] == 1:
# $HOME detection fails
raise RuntimeError("exec failed")
# All subsequent calls (init_session, cwdfile reads) succeed
# but return empty so they don't override cwd
return _make_exec_response(result="", exit_code=0)
sb.process.exec.side_effect = _exec_side_effect
env = make_env(sandbox=sb)
assert env.cwd == "/home/daytona"
def test_empty_home_keeps_default_cwd(self, make_env):
env = make_env(home_dir="")
assert env.cwd == "/home/daytona" # keeps constructor default
# ---------------------------------------------------------------------------
# Sandbox persistence / resume
# ---------------------------------------------------------------------------
class TestPersistence:
def test_persistent_resumes_via_get(self, make_env):
existing = _make_sandbox(sandbox_id="sb-existing")
existing.process.exec.return_value = _make_exec_response(result="/root")
env = make_env(get_side_effect=lambda name: existing, persistent=True,
task_id="mytask")
existing.start.assert_called_once()
env._mock_client.get.assert_called_once_with("hermes-mytask")
env._mock_client.create.assert_not_called()
def test_persistent_resumes_legacy_via_list(self, make_env, daytona_sdk):
legacy = _make_sandbox(sandbox_id="sb-legacy")
legacy.process.exec.return_value = _make_exec_response(result="/root")
env = make_env(
get_side_effect=daytona_sdk.DaytonaError("not found"),
list_return=SimpleNamespace(items=[legacy]),
persistent=True,
task_id="mytask",
)
legacy.start.assert_called_once()
env._mock_client.list.assert_called_once_with(
labels={"hermes_task_id": "mytask"}, page=1, limit=1)
env._mock_client.create.assert_not_called()
def test_persistent_creates_new_when_none_found(self, make_env, daytona_sdk):
env = make_env(
get_side_effect=daytona_sdk.DaytonaError("not found"),
persistent=True,
task_id="mytask",
)
env._mock_client.create.assert_called_once()
# Verify the name and labels were passed to CreateSandboxFromImageParams
# by checking get() was called with the right sandbox name
env._mock_client.get.assert_called_with("hermes-mytask")
env._mock_client.list.assert_called_with(
labels={"hermes_task_id": "mytask"}, page=1, limit=1)
def test_non_persistent_skips_lookup(self, make_env):
env = make_env(persistent=False)
env._mock_client.get.assert_not_called()
env._mock_client.list.assert_not_called()
env._mock_client.create.assert_called_once()
# ---------------------------------------------------------------------------
# Cleanup
# ---------------------------------------------------------------------------
class TestCleanup:
def test_persistent_cleanup_stops_sandbox(self, make_env):
env = make_env(persistent=True)
sb = env._sandbox
env.cleanup()
sb.stop.assert_called_once()
def test_non_persistent_cleanup_deletes_sandbox(self, make_env):
env = make_env(persistent=False)
sb = env._sandbox
env.cleanup()
env._mock_client.delete.assert_called_once_with(sb)
def test_cleanup_idempotent(self, make_env):
env = make_env(persistent=True)
env.cleanup()
env.cleanup() # should not raise
def test_cleanup_swallows_errors(self, make_env):
env = make_env(persistent=True)
env._sandbox.stop.side_effect = RuntimeError("stop failed")
env.cleanup() # should not raise
assert env._sandbox is None
# ---------------------------------------------------------------------------
# Execute
# ---------------------------------------------------------------------------
class TestExecute:
def test_basic_command(self, make_env):
sb = _make_sandbox()
# Calls: $HOME detection, init_session bootstrap, init_session cat,
# _before_execute sandbox refresh, _run_bash command, _update_cwd cat
sb.process.exec.return_value = _make_exec_response(result="/root")
sb.state = "started"
env = make_env(sandbox=sb)
# Reset mock to control just the execute() calls
sb.process.exec.reset_mock()
sb.process.exec.return_value = _make_exec_response(result="hello", exit_code=0)
result = env.execute("echo hello")
assert "hello" in result["output"]
assert result["returncode"] == 0
def test_command_wrapped_with_shell_timeout(self, make_env):
sb = _make_sandbox()
sb.process.exec.return_value = _make_exec_response(result="/root")
sb.state = "started"
env = make_env(sandbox=sb, timeout=42)
sb.process.exec.reset_mock()
sb.process.exec.return_value = _make_exec_response(result="ok", exit_code=0)
env.execute("echo hello")
# The command sent to _ThreadedProcessHandle should be wrapped with
# `timeout N bash -c '...'`
call_args = sb.process.exec.call_args_list[-1]
cmd = call_args[0][0]
assert "timeout 42 bash -c " in cmd
def test_timeout_returns_exit_code_124(self, make_env):
"""Shell timeout utility returns exit code 124."""
sb = _make_sandbox()
sb.process.exec.return_value = _make_exec_response(result="/root")
sb.state = "started"
env = make_env(sandbox=sb)
sb.process.exec.reset_mock()
sb.process.exec.return_value = _make_exec_response(result="", exit_code=124)
result = env.execute("sleep 300", timeout=5)
assert result["returncode"] == 124
def test_nonzero_exit_code(self, make_env):
sb = _make_sandbox()
sb.process.exec.return_value = _make_exec_response(result="/root")
sb.state = "started"
env = make_env(sandbox=sb)
sb.process.exec.reset_mock()
sb.process.exec.return_value = _make_exec_response(result="not found", exit_code=127)
result = env.execute("bad_cmd")
assert result["returncode"] == 127
def test_stdin_data_wraps_heredoc(self, make_env):
sb = _make_sandbox()
sb.process.exec.return_value = _make_exec_response(result="/root")
sb.state = "started"
env = make_env(sandbox=sb)
sb.process.exec.reset_mock()
sb.process.exec.return_value = _make_exec_response(result="ok", exit_code=0)
env.execute("python3", stdin_data="print('hi')")
# Check that one of the exec calls contains heredoc markers.
# The last call may be the cwdfile read, so check all calls.
all_cmds = [
call_args[0][0]
for call_args in sb.process.exec.call_args_list
]
heredoc_cmd = [c for c in all_cmds if "HERMES_EOF_" in c]
assert heredoc_cmd, f"No heredoc found in exec calls: {all_cmds}"
cmd = heredoc_cmd[0]
assert "print" in cmd
assert "hi" in cmd
def test_custom_cwd_passed_through(self, make_env):
sb = _make_sandbox()
sb.process.exec.return_value = _make_exec_response(result="/root")
sb.state = "started"
env = make_env(sandbox=sb)
sb.process.exec.reset_mock()
sb.process.exec.return_value = _make_exec_response(result="/tmp", exit_code=0)
env.execute("pwd", cwd="/tmp")
# In the unified model, cwd is embedded in the _wrap_command output
# and the _ThreadedProcessHandle also passes cwd to the SDK
call_args = sb.process.exec.call_args_list[-1]
cmd = call_args[0][0]
# The wrapped command includes a cd to the cwd
assert "/tmp" in cmd
def test_daytona_error_returns_error_result(self, make_env, daytona_sdk):
"""In the unified model, SDK errors are caught by _ThreadedProcessHandle
and returned as error results (no automatic retry)."""
sb = _make_sandbox()
sb.state = "started"
sb.process.exec.return_value = _make_exec_response(result="/root")
env = make_env(sandbox=sb)
sb.process.exec.reset_mock()
sb.process.exec.side_effect = daytona_sdk.DaytonaError("transient")
result = env.execute("echo retry")
assert result["returncode"] == 1
assert "transient" in result["output"]
# ---------------------------------------------------------------------------
# Resource conversion
# ---------------------------------------------------------------------------
class TestResourceConversion:
def _get_resources_kwargs(self, daytona_sdk):
return daytona_sdk.Resources.call_args.kwargs
def test_memory_converted_to_gib(self, make_env, daytona_sdk):
env = make_env(memory=5120)
assert self._get_resources_kwargs(daytona_sdk)["memory"] == 5
def test_disk_converted_to_gib(self, make_env, daytona_sdk):
env = make_env(disk=10240)
assert self._get_resources_kwargs(daytona_sdk)["disk"] == 10
def test_small_values_clamped_to_1(self, make_env, daytona_sdk):
env = make_env(memory=100, disk=100)
kw = self._get_resources_kwargs(daytona_sdk)
assert kw["memory"] == 1
assert kw["disk"] == 1
# ---------------------------------------------------------------------------
# Ensure sandbox ready
# ---------------------------------------------------------------------------
class TestInterrupt:
def test_interrupt_returns_130(self, make_env, monkeypatch):
"""In the unified model, interrupt is handled by BaseEnvironment._wait_for_process."""
sb = _make_sandbox()
sb.state = "started"
sb.process.exec.return_value = _make_exec_response(result="/root")
env = make_env(sandbox=sb)
# Make the SDK exec block long enough for the interrupt check to fire
import time as time_mod
def slow_exec(*args, **kwargs):
time_mod.sleep(5)
return _make_exec_response(result="done", exit_code=0)
sb.process.exec.reset_mock()
sb.process.exec.side_effect = slow_exec
# Patch is_interrupted in the base module where _wait_for_process uses it
monkeypatch.setattr(
"tools.environments.base.is_interrupted", lambda: True
)
result = env.execute("sleep 10")
assert result["returncode"] == 130
# ---------------------------------------------------------------------------
# SDK error handling
# ---------------------------------------------------------------------------
class TestSdkError:
def test_sdk_error_returns_error_result(self, make_env, daytona_sdk):
"""SDK errors in _ThreadedProcessHandle are caught and returned cleanly."""
sb = _make_sandbox()
sb.state = "started"
sb.process.exec.return_value = _make_exec_response(result="/root")
env = make_env(sandbox=sb)
sb.process.exec.reset_mock()
sb.process.exec.side_effect = daytona_sdk.DaytonaError("fail")
result = env.execute("echo x")
assert result["returncode"] == 1
assert "fail" in result["output"]
# ---------------------------------------------------------------------------
# Ensure sandbox ready
# ---------------------------------------------------------------------------
class TestEnsureSandboxReady:
def test_restarts_stopped_sandbox(self, make_env):
env = make_env()
env._needs_refresh = True
env._sandbox.state = "stopped"
env._ensure_sandbox_ready()
env._sandbox.start.assert_called()
def test_no_restart_when_running(self, make_env):
env = make_env()
env._needs_refresh = True
env._sandbox.state = "started"
env._ensure_sandbox_ready()
env._sandbox.start.assert_not_called()
def test_skips_refresh_when_not_needed(self, make_env):
env = make_env()
env._needs_refresh = False
env._ensure_sandbox_ready()
env._sandbox.refresh_data.assert_not_called()