perf(ssh): add mtime-based caching to file sync

SSH _before_execute() ran rsync unconditionally before every command,
adding ~2.3s overhead even when zero bytes were transferred. This was
80% of per-command latency (actual execution: ~0.6s).

Add (mtime, size) caching — matching the pattern Modal and Daytona
already use — to skip rsync when local files haven't changed:

- Per-file mtime+size check for credential files
- Directory fingerprint (set of relpath/mtime/size tuples) for skills
- --delete flag on skills rsync to prune uninstalled skills
- Track created remote dirs to avoid redundant mkdir -p calls
- Cache invalidation on rsync failure (remote may have been wiped)
- force=True parameter as escape hatch for debugging

Before: ~3s per SSH command (2.3s rsync + 0.6s execution)
After:  ~0.6s per SSH command (mtime check + execution)
SSH test suite: 134s → 50s
This commit is contained in:
alt-glitch
2026-04-02 23:33:59 +05:30
committed by Hermes Agent
parent 49d1390b40
commit 454edc7771

View File

@@ -48,6 +48,12 @@ class SSHEnvironment(BaseEnvironment):
self.control_dir = Path(tempfile.gettempdir()) / "hermes-ssh"
self.control_dir.mkdir(parents=True, exist_ok=True)
self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
# Sync caches — skip rsync when local files haven't changed.
self._synced_files: dict[str, tuple] = {} # remote_path → (mtime, size)
self._skills_fingerprint: set | None = None # {(relpath, mtime, size), ...}
self._created_remote_dirs: set[str] = set()
_ensure_ssh_available()
self._establish_connection()
self._remote_home = self._detect_remote_home()
@@ -99,8 +105,12 @@ class SSHEnvironment(BaseEnvironment):
return "/root"
return f"/home/{self.user}"
def _sync_skills_and_credentials(self) -> None:
"""Rsync skills directory and credential files to the remote host."""
def _sync_skills_and_credentials(self, *, force: bool = False) -> None:
"""Rsync skills directory and credential files to the remote host.
Uses local mtime+size caching to skip rsync when nothing changed.
Pass force=True to bypass the cache (e.g. for debugging).
"""
try:
container_base = f"{self._remote_home}/.hermes"
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
@@ -114,39 +124,88 @@ class SSHEnvironment(BaseEnvironment):
rsync_base.extend(["-e", ssh_opts])
dest_prefix = f"{self.user}@{self.host}"
# Sync individual credential files (remap /root/.hermes to detected home)
# --- Credential files: per-file mtime check ---
cred_to_sync = []
for mount_entry in get_credential_file_mounts():
remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
parent_dir = str(Path(remote_path).parent)
hp = Path(mount_entry["host_path"])
remote_path = mount_entry["container_path"].replace(
"/root/.hermes", container_base, 1
)
try:
s = hp.stat()
key = (s.st_mtime, s.st_size)
except FileNotFoundError:
continue
if not force and self._synced_files.get(remote_path) == key:
continue
cred_to_sync.append((mount_entry["host_path"], remote_path, key))
# Ensure remote directories exist for any new credential paths.
# container_base is always included so skills rsync has its parent.
needed_dirs = {container_base}
for _, remote_path, _ in cred_to_sync:
needed_dirs.add(str(Path(remote_path).parent))
new_dirs = needed_dirs - self._created_remote_dirs
if new_dirs:
mkdir_cmd = self._build_ssh_command()
mkdir_cmd.append(f"mkdir -p {parent_dir}")
mkdir_cmd.append(f"mkdir -p {' '.join(shlex.quote(d) for d in new_dirs)}")
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
cmd = rsync_base + [mount_entry["host_path"], f"{dest_prefix}:{remote_path}"]
self._created_remote_dirs |= new_dirs
# Rsync changed credential files
for host_path, remote_path, key in cred_to_sync:
cmd = rsync_base + [host_path, f"{dest_prefix}:{remote_path}"]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode == 0:
logger.info("SSH: synced credential %s -> %s", mount_entry["host_path"], remote_path)
self._synced_files[remote_path] = key
logger.info("SSH: synced credential %s -> %s", host_path, remote_path)
else:
self._invalidate_sync_cache()
logger.debug("SSH: rsync credential failed: %s", result.stderr.strip())
# Sync skills directory (remap to detected home)
# --- Skills directory: fingerprint check + --delete for pruning ---
skills_mount = get_skills_directory_mount(container_base=container_base)
if skills_mount:
if skills_mount and (force or self._skills_dir_changed(skills_mount["host_path"])):
remote_path = skills_mount["container_path"]
mkdir_cmd = self._build_ssh_command()
mkdir_cmd.append(f"mkdir -p {remote_path}")
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
cmd = rsync_base + [
"--delete",
skills_mount["host_path"].rstrip("/") + "/",
f"{dest_prefix}:{remote_path}/",
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
if result.returncode == 0:
logger.info("SSH: synced skills dir %s -> %s", skills_mount["host_path"], remote_path)
logger.info("SSH: synced skills dir %s -> %s",
skills_mount["host_path"], remote_path)
else:
self._invalidate_sync_cache()
logger.debug("SSH: rsync skills dir failed: %s", result.stderr.strip())
except Exception as e:
logger.debug("SSH: could not sync skills/credentials: %s", e)
def _skills_dir_changed(self, host_path: str) -> bool:
"""Return True if any file in the skills dir has changed since last sync."""
root = Path(host_path)
if not root.is_dir():
return False
current: set[tuple] = set()
try:
for f in root.rglob("*"):
if f.is_file() and not f.is_symlink():
s = f.stat()
current.add((str(f.relative_to(root)), s.st_mtime, s.st_size))
except OSError:
return True
if current == self._skills_fingerprint:
return False
self._skills_fingerprint = current
return True
def _invalidate_sync_cache(self) -> None:
"""Clear sync caches — call on rsync failure or reconnect."""
self._synced_files.clear()
self._skills_fingerprint = None
self._created_remote_dirs.clear()
# ------------------------------------------------------------------
# Unified execution hooks
# ------------------------------------------------------------------