refactor: remove mini-swe-agent dependency — inline Docker/Modal backends (#2804)

Drop the mini-swe-agent git submodule. All terminal backends now use
hermes-agent's own environment implementations directly.

Docker backend:
- Inline the `docker run -d` container startup (was 15 lines in
  minisweagent's DockerEnvironment). Our wrapper already handled
  execute(), cleanup(), security hardening, volumes, and resource limits.

Modal backend:
- Import swe-rex's ModalDeployment directly instead of going through
  minisweagent's 90-line passthrough wrapper.
- Bake the _AsyncWorker pattern (from environments/patches.py) directly
  into ModalEnvironment for Atropos compatibility without monkey-patching.

Cleanup:
- Remove minisweagent_path.py (submodule path resolution helper)
- Remove submodule init/install from install.sh and setup-hermes.sh
- Remove mini-swe-agent from .gitmodules
- environments/patches.py is now a no-op (kept for backward compat)
- terminal_tool.py no longer does sys.path hacking for minisweagent
- mini_swe_runner.py guards imports (optional, for RL training only)
- Update all affected tests to mock the new direct subprocess calls
- Update README.md, CONTRIBUTING.md

No functionality change — all Docker, Modal, local, SSH, Singularity,
and Daytona backends behave identically. 6093 tests pass.
This commit is contained in:
Teknium
2026-03-24 07:30:25 -07:00
committed by GitHub
parent 2233f764af
commit 02b38b93cb
22 changed files with 283 additions and 591 deletions

View File

@@ -41,7 +41,6 @@ except ImportError:
# Add project root to path for imports
parent_dir = Path(__file__).parent.parent.parent
sys.path.insert(0, str(parent_dir))
sys.path.insert(0, str(parent_dir / "mini-swe-agent" / "src"))
# Import terminal_tool module directly using importlib to avoid tools/__init__.py
import importlib.util

View File

@@ -1,34 +1,5 @@
"""Tests for minisweagent_path.py."""
"""Tests for minisweagent_path.py — REMOVED.
from pathlib import Path
from minisweagent_path import discover_minisweagent_src
def test_discover_minisweagent_src_in_current_checkout(tmp_path):
repo = tmp_path / "repo"
src = repo / "mini-swe-agent" / "src"
src.mkdir(parents=True)
assert discover_minisweagent_src(repo) == src.resolve()
def test_discover_minisweagent_src_falls_back_from_worktree_to_main_checkout(tmp_path):
main_repo = tmp_path / "main-repo"
(main_repo / ".git" / "worktrees" / "wt1").mkdir(parents=True)
main_src = main_repo / "mini-swe-agent" / "src"
main_src.mkdir(parents=True)
worktree = tmp_path / "worktree"
worktree.mkdir()
(worktree / ".git").write_text(f"gitdir: {main_repo / '.git' / 'worktrees' / 'wt1'}\n", encoding="utf-8")
(worktree / "mini-swe-agent").mkdir() # empty placeholder, no src/
assert discover_minisweagent_src(worktree) == main_src.resolve()
def test_discover_minisweagent_src_returns_none_when_missing(tmp_path):
repo = tmp_path / "repo"
repo.mkdir()
assert discover_minisweagent_src(repo) is None
minisweagent_path.py was removed as part of dropping the mini-swe-agent
dependency. These tests are no longer applicable.
"""

View File

@@ -131,9 +131,9 @@ class TestExecuteCode(unittest.TestCase):
def test_repo_root_modules_are_importable(self):
"""Sandboxed scripts can import modules that live at the repo root."""
result = self._run('import minisweagent_path; print(minisweagent_path.__file__)')
result = self._run('import hermes_constants; print(hermes_constants.__file__)')
self.assertEqual(result["status"], "success")
self.assertIn("minisweagent_path.py", result["output"])
self.assertIn("hermes_constants.py", result["output"])
def test_single_tool_call(self):
"""Script calls terminal and prints the result."""

View File

@@ -9,25 +9,24 @@ import pytest
from tools.environments import docker as docker_env
def _install_fake_minisweagent(monkeypatch, captured_run_args):
class MockInnerDocker:
container_id = "fake-container"
config = type("Config", (), {"executable": "/usr/bin/docker", "forward_env": [], "env": {}})()
def _mock_subprocess_run(monkeypatch):
"""Mock subprocess.run to intercept docker run -d and docker version calls.
def __init__(self, **kwargs):
captured_run_args.extend(kwargs.get("run_args", []))
Returns a list of captured (cmd, kwargs) tuples for inspection.
"""
calls = []
def cleanup(self):
pass
def _run(cmd, **kwargs):
calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if isinstance(cmd, list) and len(cmd) >= 2:
if cmd[1] == "version":
return subprocess.CompletedProcess(cmd, 0, stdout="Docker version", stderr="")
if cmd[1] == "run":
return subprocess.CompletedProcess(cmd, 0, stdout="fake-container-id\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
minisweagent_mod = types.ModuleType("minisweagent")
environments_mod = types.ModuleType("minisweagent.environments")
docker_mod = types.ModuleType("minisweagent.environments.docker")
docker_mod.DockerEnvironment = MockInnerDocker
monkeypatch.setitem(sys.modules, "minisweagent", minisweagent_mod)
monkeypatch.setitem(sys.modules, "minisweagent.environments", environments_mod)
monkeypatch.setitem(sys.modules, "minisweagent.environments.docker", docker_mod)
monkeypatch.setattr(docker_env.subprocess, "run", _run)
return calls
def _make_dummy_env(**kwargs):
@@ -49,7 +48,7 @@ def _make_dummy_env(**kwargs):
def test_ensure_docker_available_logs_and_raises_when_not_found(monkeypatch, caplog):
"""When docker cannot be found, raise a clear error before mini-swe setup."""
"""When docker cannot be found, raise a clear error before container setup."""
monkeypatch.setattr(docker_env, "find_docker", lambda: None)
monkeypatch.setattr(
@@ -118,14 +117,8 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
project_dir = tmp_path / "my-project"
project_dir.mkdir()
def _run_docker_version(*args, **kwargs):
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
captured_run_args = []
_install_fake_minisweagent(monkeypatch, captured_run_args)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
@@ -133,7 +126,10 @@ def test_auto_mount_host_cwd_adds_volume(monkeypatch, tmp_path):
auto_mount_cwd=True,
)
run_args_str = " ".join(captured_run_args)
# Find the docker run call and check its args
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" in run_args_str
@@ -142,14 +138,8 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
project_dir = tmp_path / "my-project"
project_dir.mkdir()
def _run_docker_version(*args, **kwargs):
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
captured_run_args = []
_install_fake_minisweagent(monkeypatch, captured_run_args)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/root",
@@ -157,7 +147,9 @@ def test_auto_mount_disabled_by_default(monkeypatch, tmp_path):
auto_mount_cwd=False,
)
run_args_str = " ".join(captured_run_args)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" not in run_args_str
@@ -168,14 +160,8 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
other_dir = tmp_path / "other"
other_dir.mkdir()
def _run_docker_version(*args, **kwargs):
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
captured_run_args = []
_install_fake_minisweagent(monkeypatch, captured_run_args)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
@@ -184,7 +170,9 @@ def test_auto_mount_skipped_when_workspace_already_mounted(monkeypatch, tmp_path
volumes=[f"{other_dir}:/workspace"],
)
run_args_str = " ".join(captured_run_args)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{other_dir}:/workspace" in run_args_str
assert run_args_str.count(":/workspace") == 1
@@ -194,14 +182,8 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
project_dir = tmp_path / "my-project"
project_dir.mkdir()
def _run_docker_version(*args, **kwargs):
return subprocess.CompletedProcess(args[0], 0, stdout="Docker version", stderr="")
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _run_docker_version)
captured_run_args = []
_install_fake_minisweagent(monkeypatch, captured_run_args)
calls = _mock_subprocess_run(monkeypatch)
_make_dummy_env(
cwd="/workspace",
@@ -211,28 +193,23 @@ def test_auto_mount_replaces_persistent_workspace_bind(monkeypatch, tmp_path):
task_id="test-persistent-auto-mount",
)
run_args_str = " ".join(captured_run_args)
run_calls = [c for c in calls if isinstance(c[0], list) and len(c[0]) >= 2 and c[0][1] == "run"]
assert run_calls, "docker run should have been called"
run_args_str = " ".join(run_calls[0][0])
assert f"{project_dir}:/workspace" in run_args_str
assert "/sandboxes/docker/test-persistent-auto-mount/workspace:/workspace" not in run_args_str
def test_non_persistent_cleanup_removes_container(monkeypatch):
"""When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
run_calls = []
def _run(cmd, **kwargs):
run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if cmd and getattr(cmd[0], "__str__", None) and "docker" in str(cmd[0]):
if len(cmd) >= 2 and cmd[1] == "run":
return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
"""When persistent=false, cleanup() must schedule docker stop + rm."""
monkeypatch.setattr(docker_env, "find_docker", lambda: "/usr/bin/docker")
monkeypatch.setattr(docker_env.subprocess, "run", _run)
monkeypatch.setattr(docker_env.subprocess, "Popen", lambda *a, **k: type("P", (), {"poll": lambda: None, "wait": lambda **kw: None, "returncode": 0, "stdout": iter([]), "stdin": None})())
calls = _mock_subprocess_run(monkeypatch)
captured_run_args = []
_install_fake_minisweagent(monkeypatch, captured_run_args)
popen_cmds = []
monkeypatch.setattr(
docker_env.subprocess, "Popen",
lambda cmd, **kw: (popen_cmds.append(cmd), type("P", (), {"poll": lambda s: 0, "wait": lambda s, **k: None, "returncode": 0, "stdout": iter([]), "stdin": None})())[1],
)
env = _make_dummy_env(persistent_filesystem=False, task_id="ephemeral-task")
assert env._container_id
@@ -240,8 +217,9 @@ def test_non_persistent_cleanup_removes_container(monkeypatch):
env.cleanup()
rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ["rm", "-f", container_id]]
assert len(rm_calls) >= 1, "cleanup() should run docker rm -f <container_id> when container_persistent=false"
# Should have stop and rm calls via Popen
stop_cmds = [c for c in popen_cmds if container_id in str(c) and "stop" in str(c)]
assert len(stop_cmds) >= 1, f"cleanup() should schedule docker stop for {container_id}"
class _FakePopen:
@@ -263,10 +241,8 @@ def _make_execute_only_env(forward_env=None):
env._forward_env = forward_env or []
env._prepare_command = lambda command: (command, None)
env._timeout_result = lambda timeout: {"output": f"timed out after {timeout}", "returncode": 124}
env._inner = type("Inner", (), {
"container_id": "test-container",
"config": type("Cfg", (), {"executable": "/usr/bin/docker", "env": {}})(),
})()
env._container_id = "test-container"
env._docker_exe = "/usr/bin/docker"
return env
@@ -304,31 +280,3 @@ def test_execute_prefers_shell_env_over_hermes_dotenv(monkeypatch):
assert "GITHUB_TOKEN=value_from_shell" in popen_calls[0]
assert "GITHUB_TOKEN=value_from_dotenv" not in popen_calls[0]
def test_non_persistent_cleanup_removes_container(monkeypatch):
"""When container_persistent=false, cleanup() must run docker rm -f so the container is removed (Fixes #1679)."""
run_calls = []
def _run(cmd, **kwargs):
run_calls.append((list(cmd) if isinstance(cmd, list) else cmd, kwargs))
if cmd and getattr(cmd[0], '__str__', None) and 'docker' in str(cmd[0]):
if len(cmd) >= 2 and cmd[1] == 'run':
return subprocess.CompletedProcess(cmd, 0, stdout="abc123container\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout='', stderr='')
monkeypatch.setattr(docker_env, 'find_docker', lambda: '/usr/bin/docker')
monkeypatch.setattr(docker_env.subprocess, 'run', _run)
monkeypatch.setattr(docker_env.subprocess, 'Popen', lambda *a, **k: type('P', (), {'poll': lambda: None, 'wait': lambda **kw: None, 'returncode': 0, 'stdout': iter([]), 'stdin': None})())
captured_run_args = []
_install_fake_minisweagent(monkeypatch, captured_run_args)
env = _make_dummy_env(persistent_filesystem=False, task_id='ephemeral-task')
assert env._container_id
container_id = env._container_id
env.cleanup()
rm_calls = [c for c in run_calls if isinstance(c[0], list) and len(c[0]) >= 4 and c[0][1:4] == ['rm', '-f', container_id]]
assert len(rm_calls) >= 1, 'cleanup() should run docker rm -f <container_id> when container_persistent=false'

View File

@@ -1,11 +1,11 @@
"""Tests for Modal sandbox infrastructure fixes (TBLite baseline).
Covers the 9 bugs discovered while setting up TBLite evaluation:
1. Tool resolution — terminal + file tools load with minisweagent
Covers the bugs discovered while setting up TBLite evaluation:
1. Tool resolution — terminal + file tools load correctly
2. CWD fix — host paths get replaced with /root for container backends
3. ephemeral_disk version check
4. Tilde ~ replaced with /root for container backends
5. ensurepip fix in patches.py for Modal image builder
5. ensurepip fix in Modal image builder
6. install_pipx stays True for swerex-remote
7. /home/ added to host prefix check
"""
@@ -36,17 +36,8 @@ except ImportError:
class TestToolResolution:
"""Verify get_tool_definitions returns all expected tools for eval."""
def _has_minisweagent(self):
try:
import minisweagent # noqa: F401
return True
except ImportError:
return False
def test_terminal_and_file_toolsets_resolve_all_tools(self):
"""enabled_toolsets=['terminal', 'file'] should produce 6 tools."""
if not self._has_minisweagent():
pytest.skip("minisweagent not installed (git submodule update --init)")
from model_tools import get_tool_definitions
tools = get_tool_definitions(
enabled_toolsets=["terminal", "file"],
@@ -58,18 +49,13 @@ class TestToolResolution:
def test_terminal_tool_present(self):
"""The terminal tool must be present (not silently dropped)."""
if not self._has_minisweagent():
pytest.skip("minisweagent not installed (git submodule update --init)")
from model_tools import get_tool_definitions
tools = get_tool_definitions(
enabled_toolsets=["terminal", "file"],
quiet_mode=True,
)
names = [t["function"]["name"] for t in tools]
assert "terminal" in names, (
f"terminal tool missing! Only got: {names}. "
"Check that minisweagent is installed (git submodule update --init)."
)
assert "terminal" in names, f"terminal tool missing! Only got: {names}."
# =========================================================================
@@ -269,38 +255,37 @@ class TestModalEnvironmentDefaults:
# =========================================================================
class TestEnsurepipFix:
"""Verify the pip fix is applied in the patched Modal init."""
"""Verify the pip fix is applied in the ModalEnvironment init."""
def test_patched_init_creates_image_with_setup_commands(self):
"""The patched __init__ should create a modal.Image with pip fix."""
def test_modal_environment_creates_image_with_setup_commands(self):
"""ModalEnvironment.__init__ should create a modal.Image with pip fix."""
try:
from environments.patches import _patch_swerex_modal
from tools.environments.modal import ModalEnvironment
except ImportError:
pytest.skip("environments.patches not importable")
pytest.skip("tools.environments.modal not importable")
# Check that the patch code references ensurepip
import inspect
source = inspect.getsource(_patch_swerex_modal)
source = inspect.getsource(ModalEnvironment.__init__)
assert "ensurepip" in source, (
"patches._patch_swerex_modal should include ensurepip fix "
"ModalEnvironment should include ensurepip fix "
"for Modal's legacy image builder"
)
assert "setup_dockerfile_commands" in source, (
"patches._patch_swerex_modal should use setup_dockerfile_commands "
"ModalEnvironment should use setup_dockerfile_commands "
"to fix pip before Modal's bootstrap"
)
def test_patched_init_uses_install_pipx_from_config(self):
"""The patched init should respect install_pipx from config."""
def test_modal_environment_uses_install_pipx(self):
"""ModalEnvironment should pass install_pipx to ModalDeployment."""
try:
from environments.patches import _patch_swerex_modal
from tools.environments.modal import ModalEnvironment
except ImportError:
pytest.skip("environments.patches not importable")
pytest.skip("tools.environments.modal not importable")
import inspect
source = inspect.getsource(_patch_swerex_modal)
source = inspect.getsource(ModalEnvironment.__init__)
assert "install_pipx" in source, (
"patches._patch_swerex_modal should pass install_pipx to ModalDeployment"
"ModalEnvironment should pass install_pipx to ModalDeployment"
)

View File

@@ -18,9 +18,8 @@ def _clear_terminal_env(monkeypatch):
monkeypatch.delenv(key, raising=False)
def test_local_terminal_requirements_do_not_depend_on_minisweagent(monkeypatch, caplog):
"""Local backend uses Hermes' own LocalEnvironment wrapper and should not
be marked unavailable just because `minisweagent` isn't importable."""
def test_local_terminal_requirements(monkeypatch, caplog):
"""Local backend uses Hermes' own LocalEnvironment wrapper."""
_clear_terminal_env(monkeypatch)
monkeypatch.setenv("TERMINAL_ENV", "local")
@@ -64,7 +63,7 @@ def test_modal_backend_without_token_or_config_logs_specific_error(monkeypatch,
monkeypatch.setenv("TERMINAL_ENV", "modal")
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("USERPROFILE", str(tmp_path))
monkeypatch.setattr(terminal_tool_module, "ensure_minisweagent_on_path", lambda *_args, **_kwargs: None)
# Pretend swerex is installed
monkeypatch.setattr(terminal_tool_module.importlib.util, "find_spec", lambda _name: object())
with caplog.at_level(logging.ERROR):

View File

@@ -8,7 +8,7 @@ terminal_tool_module = importlib.import_module("tools.terminal_tool")
class TestTerminalRequirements:
def test_local_backend_does_not_require_minisweagent_package(self, monkeypatch):
def test_local_backend_requirements(self, monkeypatch):
monkeypatch.setattr(
terminal_tool_module,
"_get_env_config",