mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 01:07:31 +08:00
- Daytona: skip refresh_data() API call unless sandbox was interrupted/errored - Docker: cache _build_forward_env_args() to avoid re-reading .env every command - All remote backends: TTL-based sync skip (5s) to avoid redundant dir walks
448 lines
17 KiB
Python
448 lines
17 KiB
Python
"""Unit tests for the Daytona cloud sandbox environment backend."""
|
|
|
|
import threading
|
|
from types import SimpleNamespace
|
|
from unittest.mock import MagicMock, patch, PropertyMock
|
|
|
|
import pytest
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Helpers to build mock Daytona SDK objects
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _make_exec_response(result="", exit_code=0):
|
|
return SimpleNamespace(result=result, exit_code=exit_code)
|
|
|
|
|
|
def _make_sandbox(sandbox_id="sb-123", state="started"):
|
|
sb = MagicMock()
|
|
sb.id = sandbox_id
|
|
sb.state = state
|
|
sb.process.exec.return_value = _make_exec_response()
|
|
return sb
|
|
|
|
|
|
def _patch_daytona_imports(monkeypatch):
|
|
"""Patch the daytona SDK so DaytonaEnvironment can be imported without it."""
|
|
import types as _types
|
|
|
|
import enum
|
|
|
|
class _SandboxState(str, enum.Enum):
|
|
STARTED = "started"
|
|
STOPPED = "stopped"
|
|
ARCHIVED = "archived"
|
|
ERROR = "error"
|
|
|
|
daytona_mod = _types.ModuleType("daytona")
|
|
daytona_mod.Daytona = MagicMock
|
|
daytona_mod.CreateSandboxFromImageParams = MagicMock
|
|
daytona_mod.DaytonaError = type("DaytonaError", (Exception,), {})
|
|
daytona_mod.Resources = MagicMock(name="Resources")
|
|
daytona_mod.SandboxState = _SandboxState
|
|
|
|
monkeypatch.setitem(__import__("sys").modules, "daytona", daytona_mod)
|
|
return daytona_mod
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Fixtures
|
|
# ---------------------------------------------------------------------------
|
|
|
|
@pytest.fixture()
|
|
def daytona_sdk(monkeypatch):
|
|
"""Provide a mock daytona SDK module and return it for assertions."""
|
|
return _patch_daytona_imports(monkeypatch)
|
|
|
|
|
|
@pytest.fixture()
|
|
def make_env(daytona_sdk, monkeypatch):
|
|
"""Factory that creates a DaytonaEnvironment with a mocked SDK."""
|
|
# Prevent is_interrupted from interfering
|
|
monkeypatch.setattr("tools.interrupt.is_interrupted", lambda: False)
|
|
# Prevent skills/credential sync from consuming mock exec calls
|
|
monkeypatch.setattr("tools.credential_files.get_credential_file_mounts", lambda: [])
|
|
monkeypatch.setattr("tools.credential_files.get_skills_directory_mount", lambda **kw: None)
|
|
monkeypatch.setattr("tools.credential_files.iter_skills_files", lambda **kw: [])
|
|
|
|
def _factory(
|
|
sandbox=None,
|
|
get_side_effect=None,
|
|
list_return=None,
|
|
home_dir="/root",
|
|
persistent=True,
|
|
**kwargs,
|
|
):
|
|
sandbox = sandbox or _make_sandbox()
|
|
# Mock the $HOME detection
|
|
sandbox.process.exec.return_value = _make_exec_response(result=home_dir)
|
|
|
|
mock_client = MagicMock()
|
|
mock_client.create.return_value = sandbox
|
|
|
|
if get_side_effect is not None:
|
|
mock_client.get.side_effect = get_side_effect
|
|
else:
|
|
# Default: no existing sandbox found via get()
|
|
mock_client.get.side_effect = daytona_sdk.DaytonaError("not found")
|
|
|
|
# Default: no legacy sandbox found via list()
|
|
if list_return is not None:
|
|
mock_client.list.return_value = list_return
|
|
else:
|
|
mock_client.list.return_value = SimpleNamespace(items=[])
|
|
|
|
daytona_sdk.Daytona = MagicMock(return_value=mock_client)
|
|
|
|
from tools.environments.daytona import DaytonaEnvironment
|
|
|
|
kwargs.setdefault("disk", 10240)
|
|
env = DaytonaEnvironment(
|
|
image="test-image:latest",
|
|
persistent_filesystem=persistent,
|
|
**kwargs,
|
|
)
|
|
env._mock_client = mock_client # expose for assertions
|
|
return env
|
|
|
|
return _factory
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Constructor / cwd resolution
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestCwdResolution:
|
|
def test_default_cwd_resolves_home(self, make_env):
|
|
env = make_env(home_dir="/home/testuser")
|
|
assert env.cwd == "/home/testuser"
|
|
|
|
def test_tilde_cwd_resolves_home(self, make_env):
|
|
env = make_env(cwd="~", home_dir="/home/testuser")
|
|
assert env.cwd == "/home/testuser"
|
|
|
|
def test_explicit_cwd_not_overridden(self, make_env):
|
|
"""Explicit cwd should be set before init_session.
|
|
|
|
After init_session(), the cwdfile may update cwd to whatever the
|
|
login shell reports. We make the mock return /workspace for the
|
|
cwdfile read so init_session doesn't override the explicit cwd.
|
|
"""
|
|
sb = _make_sandbox()
|
|
# Return /workspace for all exec calls including init_session's
|
|
# snapshot bootstrap and cwdfile reads
|
|
sb.process.exec.return_value = _make_exec_response(result="/workspace")
|
|
env = make_env(sandbox=sb, cwd="/workspace", home_dir="/workspace")
|
|
assert env.cwd == "/workspace"
|
|
|
|
def test_home_detection_failure_keeps_default_cwd(self, make_env):
|
|
"""When $HOME detection fails, cwd falls back to constructor default.
|
|
|
|
init_session() still runs but its cwdfile read returns empty,
|
|
so cwd is not overwritten.
|
|
"""
|
|
sb = _make_sandbox()
|
|
call_count = {"n": 0}
|
|
|
|
def _exec_side_effect(*args, **kwargs):
|
|
call_count["n"] += 1
|
|
if call_count["n"] == 1:
|
|
# $HOME detection fails
|
|
raise RuntimeError("exec failed")
|
|
# All subsequent calls (init_session, cwdfile reads) succeed
|
|
# but return empty so they don't override cwd
|
|
return _make_exec_response(result="", exit_code=0)
|
|
|
|
sb.process.exec.side_effect = _exec_side_effect
|
|
env = make_env(sandbox=sb)
|
|
assert env.cwd == "/home/daytona"
|
|
|
|
def test_empty_home_keeps_default_cwd(self, make_env):
|
|
env = make_env(home_dir="")
|
|
assert env.cwd == "/home/daytona" # keeps constructor default
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Sandbox persistence / resume
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestPersistence:
|
|
def test_persistent_resumes_via_get(self, make_env):
|
|
existing = _make_sandbox(sandbox_id="sb-existing")
|
|
existing.process.exec.return_value = _make_exec_response(result="/root")
|
|
env = make_env(get_side_effect=lambda name: existing, persistent=True,
|
|
task_id="mytask")
|
|
existing.start.assert_called_once()
|
|
env._mock_client.get.assert_called_once_with("hermes-mytask")
|
|
env._mock_client.create.assert_not_called()
|
|
|
|
def test_persistent_resumes_legacy_via_list(self, make_env, daytona_sdk):
|
|
legacy = _make_sandbox(sandbox_id="sb-legacy")
|
|
legacy.process.exec.return_value = _make_exec_response(result="/root")
|
|
env = make_env(
|
|
get_side_effect=daytona_sdk.DaytonaError("not found"),
|
|
list_return=SimpleNamespace(items=[legacy]),
|
|
persistent=True,
|
|
task_id="mytask",
|
|
)
|
|
legacy.start.assert_called_once()
|
|
env._mock_client.list.assert_called_once_with(
|
|
labels={"hermes_task_id": "mytask"}, page=1, limit=1)
|
|
env._mock_client.create.assert_not_called()
|
|
|
|
def test_persistent_creates_new_when_none_found(self, make_env, daytona_sdk):
|
|
env = make_env(
|
|
get_side_effect=daytona_sdk.DaytonaError("not found"),
|
|
persistent=True,
|
|
task_id="mytask",
|
|
)
|
|
env._mock_client.create.assert_called_once()
|
|
# Verify the name and labels were passed to CreateSandboxFromImageParams
|
|
# by checking get() was called with the right sandbox name
|
|
env._mock_client.get.assert_called_with("hermes-mytask")
|
|
env._mock_client.list.assert_called_with(
|
|
labels={"hermes_task_id": "mytask"}, page=1, limit=1)
|
|
|
|
def test_non_persistent_skips_lookup(self, make_env):
|
|
env = make_env(persistent=False)
|
|
env._mock_client.get.assert_not_called()
|
|
env._mock_client.list.assert_not_called()
|
|
env._mock_client.create.assert_called_once()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Cleanup
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestCleanup:
|
|
def test_persistent_cleanup_stops_sandbox(self, make_env):
|
|
env = make_env(persistent=True)
|
|
sb = env._sandbox
|
|
env.cleanup()
|
|
sb.stop.assert_called_once()
|
|
|
|
def test_non_persistent_cleanup_deletes_sandbox(self, make_env):
|
|
env = make_env(persistent=False)
|
|
sb = env._sandbox
|
|
env.cleanup()
|
|
env._mock_client.delete.assert_called_once_with(sb)
|
|
|
|
def test_cleanup_idempotent(self, make_env):
|
|
env = make_env(persistent=True)
|
|
env.cleanup()
|
|
env.cleanup() # should not raise
|
|
|
|
def test_cleanup_swallows_errors(self, make_env):
|
|
env = make_env(persistent=True)
|
|
env._sandbox.stop.side_effect = RuntimeError("stop failed")
|
|
env.cleanup() # should not raise
|
|
assert env._sandbox is None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Execute
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestExecute:
|
|
def test_basic_command(self, make_env):
|
|
sb = _make_sandbox()
|
|
# Calls: $HOME detection, init_session bootstrap, init_session cat,
|
|
# _before_execute sandbox refresh, _run_bash command, _update_cwd cat
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
sb.state = "started"
|
|
env = make_env(sandbox=sb)
|
|
|
|
# Reset mock to control just the execute() calls
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.return_value = _make_exec_response(result="hello", exit_code=0)
|
|
result = env.execute("echo hello")
|
|
assert "hello" in result["output"]
|
|
assert result["returncode"] == 0
|
|
|
|
def test_command_wrapped_with_shell_timeout(self, make_env):
|
|
sb = _make_sandbox()
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
sb.state = "started"
|
|
env = make_env(sandbox=sb, timeout=42)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.return_value = _make_exec_response(result="ok", exit_code=0)
|
|
env.execute("echo hello")
|
|
# The command sent to _ThreadedProcessHandle should be wrapped with
|
|
# `timeout N bash -c '...'`
|
|
call_args = sb.process.exec.call_args_list[-1]
|
|
cmd = call_args[0][0]
|
|
assert "timeout 42 bash -c " in cmd
|
|
|
|
def test_timeout_returns_exit_code_124(self, make_env):
|
|
"""Shell timeout utility returns exit code 124."""
|
|
sb = _make_sandbox()
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
sb.state = "started"
|
|
env = make_env(sandbox=sb)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.return_value = _make_exec_response(result="", exit_code=124)
|
|
result = env.execute("sleep 300", timeout=5)
|
|
assert result["returncode"] == 124
|
|
|
|
def test_nonzero_exit_code(self, make_env):
|
|
sb = _make_sandbox()
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
sb.state = "started"
|
|
env = make_env(sandbox=sb)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.return_value = _make_exec_response(result="not found", exit_code=127)
|
|
result = env.execute("bad_cmd")
|
|
assert result["returncode"] == 127
|
|
|
|
def test_stdin_data_wraps_heredoc(self, make_env):
|
|
sb = _make_sandbox()
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
sb.state = "started"
|
|
env = make_env(sandbox=sb)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.return_value = _make_exec_response(result="ok", exit_code=0)
|
|
env.execute("python3", stdin_data="print('hi')")
|
|
# Check that one of the exec calls contains heredoc markers.
|
|
# The last call may be the cwdfile read, so check all calls.
|
|
all_cmds = [
|
|
call_args[0][0]
|
|
for call_args in sb.process.exec.call_args_list
|
|
]
|
|
heredoc_cmd = [c for c in all_cmds if "HERMES_EOF_" in c]
|
|
assert heredoc_cmd, f"No heredoc found in exec calls: {all_cmds}"
|
|
cmd = heredoc_cmd[0]
|
|
assert "print" in cmd
|
|
assert "hi" in cmd
|
|
|
|
def test_custom_cwd_passed_through(self, make_env):
|
|
sb = _make_sandbox()
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
sb.state = "started"
|
|
env = make_env(sandbox=sb)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.return_value = _make_exec_response(result="/tmp", exit_code=0)
|
|
env.execute("pwd", cwd="/tmp")
|
|
# In the unified model, cwd is embedded in the _wrap_command output
|
|
# and the _ThreadedProcessHandle also passes cwd to the SDK
|
|
call_args = sb.process.exec.call_args_list[-1]
|
|
cmd = call_args[0][0]
|
|
# The wrapped command includes a cd to the cwd
|
|
assert "/tmp" in cmd
|
|
|
|
def test_daytona_error_returns_error_result(self, make_env, daytona_sdk):
|
|
"""In the unified model, SDK errors are caught by _ThreadedProcessHandle
|
|
and returned as error results (no automatic retry)."""
|
|
sb = _make_sandbox()
|
|
sb.state = "started"
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
env = make_env(sandbox=sb)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.side_effect = daytona_sdk.DaytonaError("transient")
|
|
result = env.execute("echo retry")
|
|
assert result["returncode"] == 1
|
|
assert "transient" in result["output"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Resource conversion
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestResourceConversion:
|
|
def _get_resources_kwargs(self, daytona_sdk):
|
|
return daytona_sdk.Resources.call_args.kwargs
|
|
|
|
def test_memory_converted_to_gib(self, make_env, daytona_sdk):
|
|
env = make_env(memory=5120)
|
|
assert self._get_resources_kwargs(daytona_sdk)["memory"] == 5
|
|
|
|
def test_disk_converted_to_gib(self, make_env, daytona_sdk):
|
|
env = make_env(disk=10240)
|
|
assert self._get_resources_kwargs(daytona_sdk)["disk"] == 10
|
|
|
|
def test_small_values_clamped_to_1(self, make_env, daytona_sdk):
|
|
env = make_env(memory=100, disk=100)
|
|
kw = self._get_resources_kwargs(daytona_sdk)
|
|
assert kw["memory"] == 1
|
|
assert kw["disk"] == 1
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Ensure sandbox ready
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestInterrupt:
|
|
def test_interrupt_returns_130(self, make_env, monkeypatch):
|
|
"""In the unified model, interrupt is handled by BaseEnvironment._wait_for_process."""
|
|
sb = _make_sandbox()
|
|
sb.state = "started"
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
env = make_env(sandbox=sb)
|
|
|
|
# Make the SDK exec block long enough for the interrupt check to fire
|
|
import time as time_mod
|
|
def slow_exec(*args, **kwargs):
|
|
time_mod.sleep(5)
|
|
return _make_exec_response(result="done", exit_code=0)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.side_effect = slow_exec
|
|
|
|
# Patch is_interrupted in the base module where _wait_for_process uses it
|
|
monkeypatch.setattr(
|
|
"tools.environments.base.is_interrupted", lambda: True
|
|
)
|
|
result = env.execute("sleep 10")
|
|
assert result["returncode"] == 130
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SDK error handling
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestSdkError:
|
|
def test_sdk_error_returns_error_result(self, make_env, daytona_sdk):
|
|
"""SDK errors in _ThreadedProcessHandle are caught and returned cleanly."""
|
|
sb = _make_sandbox()
|
|
sb.state = "started"
|
|
sb.process.exec.return_value = _make_exec_response(result="/root")
|
|
env = make_env(sandbox=sb)
|
|
|
|
sb.process.exec.reset_mock()
|
|
sb.process.exec.side_effect = daytona_sdk.DaytonaError("fail")
|
|
result = env.execute("echo x")
|
|
assert result["returncode"] == 1
|
|
assert "fail" in result["output"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Ensure sandbox ready
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class TestEnsureSandboxReady:
|
|
def test_restarts_stopped_sandbox(self, make_env):
|
|
env = make_env()
|
|
env._needs_refresh = True
|
|
env._sandbox.state = "stopped"
|
|
env._ensure_sandbox_ready()
|
|
env._sandbox.start.assert_called()
|
|
|
|
def test_no_restart_when_running(self, make_env):
|
|
env = make_env()
|
|
env._needs_refresh = True
|
|
env._sandbox.state = "started"
|
|
env._ensure_sandbox_ready()
|
|
env._sandbox.start.assert_not_called()
|
|
|
|
def test_skips_refresh_when_not_needed(self, make_env):
|
|
env = make_env()
|
|
env._needs_refresh = False
|
|
env._ensure_sandbox_ready()
|
|
env._sandbox.refresh_data.assert_not_called()
|