Files
hermes-agent/tools/environments/ssh.py
alt-glitch 637bbbee7e refactor(environments): migrate Docker + SSH to unified model — Phase 2+4
DockerEnvironment:
- Add _run_bash/_run_bash_login, extract _build_forward_env_args()
- Remove execute() override with duplicate timeout/interrupt loop
- Remove -w flag (CWD handled by wrapping template)
- Call init_session() after container creation
- 42/42 tests passing on debian:bookworm-slim (21.7s)

SSHEnvironment:
- Remove PersistentShellMixin inheritance entirely
- Remove all IPC methods: _read_temp_files, _kill_shell_children,
  _cleanup_temp_files, _spawn_shell_process, _execute_oneshot
- Add _run_bash/_run_bash_login with shlex.quote for SSH transport
- Override _read_file_in_env with capture_output=True to suppress
  SSH connection warnings (post-quantum key exchange etc.)
- Move _sync_skills_and_credentials to _before_execute hook
- Remove persistent parameter
- 42/42 tests passing on SSH (173s, no more polling overhead)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 01:31:34 +00:00

223 lines
9.3 KiB
Python

"""SSH remote execution environment with ControlMaster connection persistence."""
import logging
import shlex
import shutil
import subprocess
import tempfile
from pathlib import Path
from tools.environments.base import BaseEnvironment
logger = logging.getLogger(__name__)
def _ensure_ssh_available() -> None:
"""Fail fast with a clear error when the SSH client is unavailable."""
if not shutil.which("ssh"):
raise RuntimeError(
"SSH is not installed or not in PATH. Install OpenSSH client: apt install openssh-client"
)
class SSHEnvironment(BaseEnvironment):
"""Run commands on a remote machine over SSH.
Uses SSH ControlMaster for connection persistence so subsequent
commands are fast. Security benefit: the agent cannot modify its
own code since execution happens on a separate machine.
Foreground commands are interruptible: the local ssh process is killed
and a remote kill is attempted over the ControlMaster socket.
Uses the unified spawn-per-call model:
- bash -l once at session start to capture env snapshot on the remote
- bash -c for every subsequent command (fast, no shell init overhead)
- CWD tracked via cwdfile written after each command on the remote host
"""
def __init__(self, host: str, user: str, cwd: str = "~",
timeout: int = 60, port: int = 22, key_path: str = "",
**kwargs):
super().__init__(cwd=cwd, timeout=timeout)
self.host = host
self.user = user
self.port = port
self.key_path = key_path
self.control_dir = Path(tempfile.gettempdir()) / "hermes-ssh"
self.control_dir.mkdir(parents=True, exist_ok=True)
self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
_ensure_ssh_available()
self._establish_connection()
self._remote_home = self._detect_remote_home()
self._sync_skills_and_credentials()
self.init_session()
def _build_ssh_command(self, extra_args: list | None = None) -> list:
cmd = ["ssh"]
cmd.extend(["-o", f"ControlPath={self.control_socket}"])
cmd.extend(["-o", "ControlMaster=auto"])
cmd.extend(["-o", "ControlPersist=300"])
cmd.extend(["-o", "BatchMode=yes"])
cmd.extend(["-o", "StrictHostKeyChecking=accept-new"])
cmd.extend(["-o", "ConnectTimeout=10"])
if self.port != 22:
cmd.extend(["-p", str(self.port)])
if self.key_path:
cmd.extend(["-i", self.key_path])
if extra_args:
cmd.extend(extra_args)
cmd.append(f"{self.user}@{self.host}")
return cmd
def _establish_connection(self):
cmd = self._build_ssh_command()
cmd.append("echo 'SSH connection established'")
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
if result.returncode != 0:
error_msg = result.stderr.strip() or result.stdout.strip()
raise RuntimeError(f"SSH connection failed: {error_msg}")
except subprocess.TimeoutExpired:
raise RuntimeError(f"SSH connection to {self.user}@{self.host} timed out")
def _detect_remote_home(self) -> str:
"""Detect the remote user's home directory."""
try:
cmd = self._build_ssh_command()
cmd.append("echo $HOME")
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
home = result.stdout.strip()
if home and result.returncode == 0:
logger.debug("SSH: remote home = %s", home)
return home
except Exception:
pass
# Fallback: guess from username
if self.user == "root":
return "/root"
return f"/home/{self.user}"
def _sync_skills_and_credentials(self) -> None:
"""Rsync skills directory and credential files to the remote host."""
try:
container_base = f"{self._remote_home}/.hermes"
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
rsync_base = ["rsync", "-az", "--timeout=30", "--safe-links"]
ssh_opts = f"ssh -o ControlPath={self.control_socket} -o ControlMaster=auto"
if self.port != 22:
ssh_opts += f" -p {self.port}"
if self.key_path:
ssh_opts += f" -i {self.key_path}"
rsync_base.extend(["-e", ssh_opts])
dest_prefix = f"{self.user}@{self.host}"
# Sync individual credential files (remap /root/.hermes to detected home)
for mount_entry in get_credential_file_mounts():
remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
parent_dir = str(Path(remote_path).parent)
mkdir_cmd = self._build_ssh_command()
mkdir_cmd.append(f"mkdir -p {parent_dir}")
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
cmd = rsync_base + [mount_entry["host_path"], f"{dest_prefix}:{remote_path}"]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode == 0:
logger.info("SSH: synced credential %s -> %s", mount_entry["host_path"], remote_path)
else:
logger.debug("SSH: rsync credential failed: %s", result.stderr.strip())
# Sync skills directory (remap to detected home)
skills_mount = get_skills_directory_mount(container_base=container_base)
if skills_mount:
remote_path = skills_mount["container_path"]
mkdir_cmd = self._build_ssh_command()
mkdir_cmd.append(f"mkdir -p {remote_path}")
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
cmd = rsync_base + [
skills_mount["host_path"].rstrip("/") + "/",
f"{dest_prefix}:{remote_path}/",
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if result.returncode == 0:
logger.info("SSH: synced skills dir %s -> %s", skills_mount["host_path"], remote_path)
else:
logger.debug("SSH: rsync skills dir failed: %s", result.stderr.strip())
except Exception as e:
logger.debug("SSH: could not sync skills/credentials: %s", e)
# ------------------------------------------------------------------
# Unified execution hooks
# ------------------------------------------------------------------
def _before_execute(self):
"""Incremental sync before each command so mid-session credential
refreshes and skill updates are picked up."""
self._sync_skills_and_credentials()
def _run_bash(self, cmd_string, *, stdin_data=None):
cmd = self._build_ssh_command()
cmd.extend(["bash", "-c", shlex.quote(cmd_string)])
proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
stdin=subprocess.PIPE if stdin_data is not None else subprocess.DEVNULL,
text=True,
)
if stdin_data:
try:
proc.stdin.write(stdin_data)
proc.stdin.close()
except (BrokenPipeError, OSError):
pass
return proc
def _run_bash_login(self, cmd_string):
cmd = self._build_ssh_command()
cmd.extend(["bash", "-l", "-c", shlex.quote(cmd_string)])
return subprocess.Popen(
cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
stdin=subprocess.DEVNULL, text=True,
)
def _read_file_in_env(self, path: str) -> str:
"""SSH override: use subprocess.run for single-shot cat, suppress stderr.
SSH connection warnings (post-quantum, etc.) must not pollute
the cwdfile read — use separate stderr to discard them.
"""
cmd = self._build_ssh_command()
cmd.append(f"cat {shlex.quote(path)} 2>/dev/null")
try:
result = subprocess.run(
cmd, capture_output=True, text=True, timeout=10,
)
return result.stdout
except (subprocess.TimeoutExpired, OSError):
return ""
def cleanup(self):
# Clean up remote snapshot and cwdfile before closing ControlMaster
if self._snapshot_path or self._cwdfile_path:
paths = " ".join(p for p in (self._snapshot_path, self._cwdfile_path) if p)
try:
cmd = self._build_ssh_command()
cmd.append(f"rm -f {paths}")
subprocess.run(cmd, capture_output=True, timeout=5)
except Exception:
pass
super().cleanup()
if self.control_socket.exists():
try:
cmd = ["ssh", "-o", f"ControlPath={self.control_socket}",
"-O", "exit", f"{self.user}@{self.host}"]
subprocess.run(cmd, capture_output=True, timeout=5)
except (OSError, subprocess.SubprocessError):
pass
try:
self.control_socket.unlink()
except OSError:
pass