refactor(environments): migrate Docker + SSH to unified model — Phase 2+4

DockerEnvironment: - Add _run_bash/_run_bash_login, extract _build_forward_env_args() - Remove execute() override with duplicate timeout/interrupt loop - Remove -w flag (CWD handled by wrapping template) - Call init_session() after container creation - 42/42 tests passing on debian:bookworm-slim (21.7s) SSHEnvironment: - Remove PersistentShellMixin inheritance entirely - Remove all IPC methods: _read_temp_files, _kill_shell_children, _cleanup_temp_files, _spawn_shell_process, _execute_oneshot - Add _run_bash/_run_bash_login with shlex.quote for SSH transport - Override _read_file_in_env with capture_output=True to suppress SSH connection warnings (post-quantum key exchange etc.) - Move _sync_skills_and_credentials to _before_execute hook - Remove persistent parameter - 42/42 tests passing on SSH (173s, no more polling overhead) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-05-03 09:17:09 +08:00 · 2026-04-02 13:45:53 +05:30
parent cd814392ae
commit 637bbbee7e
2 changed files with 121 additions and 293 deletions
--- a/tools/environments/docker.py
+++ b/tools/environments/docker.py
@@ -8,17 +8,13 @@ persistence via bind mounts.
 import logging
 import os
 import re
-import shlex
 import shutil
 import subprocess
 import sys
-import threading
-import time
 import uuid
 from typing import Optional

 from tools.environments.base import BaseEnvironment
-from tools.interrupt import is_interrupted

 logger = logging.getLogger(__name__)

@@ -61,36 +57,6 @@ def _normalize_forward_env_names(forward_env: list[str] | None) -> list[str]:
    return normalized


-def _normalize_env_dict(env: dict | None) -> dict[str, str]:
-    """Validate and normalize a docker_env dict to {str: str}.
-
-    Filters out entries with invalid variable names or non-string values.
-    """
-    if not env:
-        return {}
-    if not isinstance(env, dict):
-        logger.warning("docker_env is not a dict: %r", env)
-        return {}
-
-    normalized: dict[str, str] = {}
-    for key, value in env.items():
-        if not isinstance(key, str) or not _ENV_VAR_NAME_RE.match(key.strip()):
-            logger.warning("Ignoring invalid docker_env key: %r", key)
-            continue
-        key = key.strip()
-        if not isinstance(value, str):
-            # Coerce simple scalar types (int, bool, float) to string;
-            # reject complex types.
-            if isinstance(value, (int, float, bool)):
-                value = str(value)
-            else:
-                logger.warning("Ignoring non-string docker_env value for %r: %r", key, value)
-                continue
-        normalized[key] = value
-
-    return normalized
-
-
 def _load_hermes_env_vars() -> dict[str, str]:
    """Load ~/.hermes/.env values without failing Docker command execution."""
    try:
@@ -241,7 +207,6 @@ class DockerEnvironment(BaseEnvironment):
        task_id: str = "default",
        volumes: list = None,
        forward_env: list[str] | None = None,
-        env: dict | None = None,
        network: bool = True,
        host_cwd: str = None,
        auto_mount_cwd: bool = False,
@@ -253,7 +218,6 @@ class DockerEnvironment(BaseEnvironment):
        self._persistent = persistent_filesystem
        self._task_id = task_id
        self._forward_env = _normalize_forward_env_names(forward_env)
-        self._env = _normalize_env_dict(env)
        self._container_id: Optional[str] = None
        logger.info(f"DockerEnvironment volumes: {volumes}")
        # Ensure volumes is a list (config.yaml could be malformed)
@@ -348,11 +312,7 @@ class DockerEnvironment(BaseEnvironment):
        # Mount credential files (OAuth tokens, etc.) declared by skills.
        # Read-only so the container can authenticate but not modify host creds.
        try:
-            from tools.credential_files import (
-                get_credential_file_mounts,
-                get_skills_directory_mount,
-                get_cache_directory_mounts,
-            )
+            from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount

            for mount_entry in get_credential_file_mounts():
                volume_args.extend([
@@ -365,9 +325,10 @@ class DockerEnvironment(BaseEnvironment):
                    mount_entry["container_path"],
                )

-            # Mount skill directories (local + external) so skill
-            # scripts/templates are available inside the container.
-            for skills_mount in get_skills_directory_mount():
+            # Mount the skills directory so skill scripts/templates are
+            # available inside the container at the same relative path.
+            skills_mount = get_skills_directory_mount()
+            if skills_mount:
                volume_args.extend([
                    "-v",
                    f"{skills_mount['host_path']}:{skills_mount['container_path']}:ro",
@@ -377,32 +338,11 @@ class DockerEnvironment(BaseEnvironment):
                    skills_mount["host_path"],
                    skills_mount["container_path"],
                )
-
-            # Mount host-side cache directories (documents, images, audio,
-            # screenshots) so the agent can access uploaded files and other
-            # cached media from inside the container.  Read-only — the
-            # container reads these but the host gateway manages writes.
-            for cache_mount in get_cache_directory_mounts():
-                volume_args.extend([
-                    "-v",
-                    f"{cache_mount['host_path']}:{cache_mount['container_path']}:ro",
-                ])
-                logger.info(
-                    "Docker: mounting cache dir %s -> %s",
-                    cache_mount["host_path"],
-                    cache_mount["container_path"],
-                )
        except Exception as e:
            logger.debug("Docker: could not load credential file mounts: %s", e)

-        # Explicit environment variables (docker_env config) — set at container
-        # creation so they're available to all processes (including entrypoint).
-        env_args = []
-        for key in sorted(self._env):
-            env_args.extend(["-e", f"{key}={self._env[key]}"])
-
        logger.info(f"Docker volume_args: {volume_args}")
-        all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args + env_args
+        all_run_args = list(_SECURITY_ARGS) + writable_args + resource_args + volume_args
        logger.info(f"Docker run_args: {all_run_args}")

        # Resolve the docker executable once so it works even when
@@ -430,6 +370,9 @@ class DockerEnvironment(BaseEnvironment):
        self._container_id = result.stdout.strip()
        logger.info(f"Started container {container_name} ({self._container_id[:12]})")

+        # Capture login-shell environment into a snapshot for the unified model
+        self.init_session()
+
    @staticmethod
    def _storage_opt_supported() -> bool:
        """Check if Docker's storage driver supports --storage-opt size=.
@@ -470,110 +413,86 @@ class DockerEnvironment(BaseEnvironment):
        logger.debug("Docker --storage-opt support: %s", _storage_opt_ok)
        return _storage_opt_ok

-    def execute(self, command: str, cwd: str = "", *,
-                timeout: int | None = None,
-                stdin_data: str | None = None) -> dict:
-        exec_command, sudo_stdin = self._prepare_command(command)
-        work_dir = cwd or self.cwd
-        effective_timeout = timeout or self.timeout
+    # ------------------------------------------------------------------
+    # Unified execution primitives
+    # ------------------------------------------------------------------

-        # Merge sudo password (if any) with caller-supplied stdin_data.
-        if sudo_stdin is not None and stdin_data is not None:
-            effective_stdin = sudo_stdin + stdin_data
-        elif sudo_stdin is not None:
-            effective_stdin = sudo_stdin
-        else:
-            effective_stdin = stdin_data
-
-        # docker exec -w doesn't expand ~, so prepend a cd into the command.
-        # Keep ~ unquoted (for shell expansion) and quote only the subpath.
-        if work_dir == "~":
-            exec_command = f"cd ~ && {exec_command}"
-            work_dir = "/"
-        elif work_dir.startswith("~/"):
-            exec_command = f"cd ~/{shlex.quote(work_dir[2:])} && {exec_command}"
-            work_dir = "/"
-
-        assert self._container_id, "Container not started"
-        cmd = [self._docker_exe, "exec"]
-        if effective_stdin is not None:
-            cmd.append("-i")
-        cmd.extend(["-w", work_dir])
-        # Build the per-exec environment: start with explicit docker_env values
-        # (static config), then overlay docker_forward_env / skill env_passthrough
-        # (dynamic from host process).  Forward values take precedence.
-        exec_env: dict[str, str] = dict(self._env)
+    def _build_forward_env_args(self) -> list[str]:
+        """Build ``-e KEY=VALUE`` arguments for docker exec env forwarding.

+        Combines explicit ``docker_forward_env`` with skill-declared
+        ``env_passthrough`` vars so skills that declare
+        ``required_environment_variables`` (e.g. Notion) have their keys
+        forwarded into the container automatically.
+        """
        forward_keys = set(self._forward_env)
        try:
            from tools.env_passthrough import get_all_passthrough
            forward_keys |= get_all_passthrough()
        except Exception:
            pass
+
        hermes_env = _load_hermes_env_vars() if forward_keys else {}
+        args: list[str] = []
        for key in sorted(forward_keys):
            value = os.getenv(key)
            if value is None:
                value = hermes_env.get(key)
            if value is not None:
-                exec_env[key] = value
+                args.extend(["-e", f"{key}={value}"])
+        return args

-        for key in sorted(exec_env):
-            cmd.extend(["-e", f"{key}={exec_env[key]}"])
-        cmd.extend([self._container_id, "bash", "-lc", exec_command])
+    def _run_bash(self, cmd_string: str, *,
+                  stdin_data: str | None = None) -> subprocess.Popen:
+        """Spawn ``bash -c <cmd_string>`` inside the Docker container."""
+        assert self._container_id, "Container not started"
+        cmd = [self._docker_exe, "exec"]
+        if stdin_data is not None:
+            cmd.append("-i")
+        cmd.extend(self._build_forward_env_args())
+        cmd.extend([self._container_id, "bash", "-c", cmd_string])
+        proc = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+            stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
+            text=True,
+        )
+        if stdin_data:
+            try:
+                proc.stdin.write(stdin_data)
+                proc.stdin.close()
+            except (BrokenPipeError, OSError):
+                pass
+        return proc

-        try:
-            _output_chunks = []
-            proc = subprocess.Popen(
-                cmd,
-                stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                stdin=subprocess.PIPE if effective_stdin else subprocess.DEVNULL,
-                text=True,
-            )
-            if effective_stdin:
-                try:
-                    proc.stdin.write(effective_stdin)
-                    proc.stdin.close()
-                except Exception:
-                    pass
-
-            def _drain():
-                try:
-                    for line in proc.stdout:
-                        _output_chunks.append(line)
-                except Exception:
-                    pass
-
-            reader = threading.Thread(target=_drain, daemon=True)
-            reader.start()
-            deadline = time.monotonic() + effective_timeout
-
-            while proc.poll() is None:
-                if is_interrupted():
-                    proc.terminate()
-                    try:
-                        proc.wait(timeout=1)
-                    except subprocess.TimeoutExpired:
-                        proc.kill()
-                    reader.join(timeout=2)
-                    return {
-                        "output": "".join(_output_chunks) + "\n[Command interrupted]",
-                        "returncode": 130,
-                    }
-                if time.monotonic() > deadline:
-                    proc.kill()
-                    reader.join(timeout=2)
-                    return self._timeout_result(effective_timeout)
-                time.sleep(0.2)
-
-            reader.join(timeout=5)
-            return {"output": "".join(_output_chunks), "returncode": proc.returncode}
-        except Exception as e:
-            return {"output": f"Docker execution error: {e}", "returncode": 1}
+    def _run_bash_login(self, cmd_string: str) -> subprocess.Popen:
+        """Spawn ``bash -l -c <cmd_string>`` for snapshot creation."""
+        assert self._container_id, "Container not started"
+        cmd = [self._docker_exe, "exec", self._container_id,
+               "bash", "-l", "-c", cmd_string]
+        return subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+            stdin=subprocess.DEVNULL, text=True,
+        )

    def cleanup(self):
        """Stop and remove the container. Bind-mount dirs persist if persistent=True."""
        if self._container_id:
+            # Clean up snapshot and cwdfile inside the container
+            paths_to_rm = " ".join(
+                p for p in (self._snapshot_path, self._cwdfile_path) if p
+            )
+            if paths_to_rm:
+                try:
+                    subprocess.run(
+                        [self._docker_exe, "exec", self._container_id,
+                         "rm", "-f", *paths_to_rm.split()],
+                        capture_output=True, timeout=5,
+                    )
+                except Exception:
+                    pass
+
            try:
                # Stop in background so cleanup doesn't block
                stop_cmd = (