mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-03 01:07:31 +08:00
perf(ssh): add mtime-based caching to file sync
SSH _before_execute() ran rsync unconditionally before every command, adding ~2.3s overhead even when zero bytes were transferred. This was 80% of per-command latency (actual execution: ~0.6s). Add (mtime, size) caching — matching the pattern Modal and Daytona already use — to skip rsync when local files haven't changed: - Per-file mtime+size check for credential files - Directory fingerprint (set of relpath/mtime/size tuples) for skills - --delete flag on skills rsync to prune uninstalled skills - Track created remote dirs to avoid redundant mkdir -p calls - Cache invalidation on rsync failure (remote may have been wiped) - force=True parameter as escape hatch for debugging Before: ~3s per SSH command (2.3s rsync + 0.6s execution) After: ~0.6s per SSH command (mtime check + execution) SSH test suite: 134s → 50s
This commit is contained in:
@@ -48,6 +48,12 @@ class SSHEnvironment(BaseEnvironment):
|
||||
self.control_dir = Path(tempfile.gettempdir()) / "hermes-ssh"
|
||||
self.control_dir.mkdir(parents=True, exist_ok=True)
|
||||
self.control_socket = self.control_dir / f"{user}@{host}:{port}.sock"
|
||||
|
||||
# Sync caches — skip rsync when local files haven't changed.
|
||||
self._synced_files: dict[str, tuple] = {} # remote_path → (mtime, size)
|
||||
self._skills_fingerprint: set | None = None # {(relpath, mtime, size), ...}
|
||||
self._created_remote_dirs: set[str] = set()
|
||||
|
||||
_ensure_ssh_available()
|
||||
self._establish_connection()
|
||||
self._remote_home = self._detect_remote_home()
|
||||
@@ -99,8 +105,12 @@ class SSHEnvironment(BaseEnvironment):
|
||||
return "/root"
|
||||
return f"/home/{self.user}"
|
||||
|
||||
def _sync_skills_and_credentials(self) -> None:
|
||||
"""Rsync skills directory and credential files to the remote host."""
|
||||
def _sync_skills_and_credentials(self, *, force: bool = False) -> None:
|
||||
"""Rsync skills directory and credential files to the remote host.
|
||||
|
||||
Uses local mtime+size caching to skip rsync when nothing changed.
|
||||
Pass force=True to bypass the cache (e.g. for debugging).
|
||||
"""
|
||||
try:
|
||||
container_base = f"{self._remote_home}/.hermes"
|
||||
from tools.credential_files import get_credential_file_mounts, get_skills_directory_mount
|
||||
@@ -114,39 +124,88 @@ class SSHEnvironment(BaseEnvironment):
|
||||
rsync_base.extend(["-e", ssh_opts])
|
||||
dest_prefix = f"{self.user}@{self.host}"
|
||||
|
||||
# Sync individual credential files (remap /root/.hermes to detected home)
|
||||
# --- Credential files: per-file mtime check ---
|
||||
cred_to_sync = []
|
||||
for mount_entry in get_credential_file_mounts():
|
||||
remote_path = mount_entry["container_path"].replace("/root/.hermes", container_base, 1)
|
||||
parent_dir = str(Path(remote_path).parent)
|
||||
hp = Path(mount_entry["host_path"])
|
||||
remote_path = mount_entry["container_path"].replace(
|
||||
"/root/.hermes", container_base, 1
|
||||
)
|
||||
try:
|
||||
s = hp.stat()
|
||||
key = (s.st_mtime, s.st_size)
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
if not force and self._synced_files.get(remote_path) == key:
|
||||
continue
|
||||
cred_to_sync.append((mount_entry["host_path"], remote_path, key))
|
||||
|
||||
# Ensure remote directories exist for any new credential paths.
|
||||
# container_base is always included so skills rsync has its parent.
|
||||
needed_dirs = {container_base}
|
||||
for _, remote_path, _ in cred_to_sync:
|
||||
needed_dirs.add(str(Path(remote_path).parent))
|
||||
new_dirs = needed_dirs - self._created_remote_dirs
|
||||
if new_dirs:
|
||||
mkdir_cmd = self._build_ssh_command()
|
||||
mkdir_cmd.append(f"mkdir -p {parent_dir}")
|
||||
mkdir_cmd.append(f"mkdir -p {' '.join(shlex.quote(d) for d in new_dirs)}")
|
||||
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
|
||||
cmd = rsync_base + [mount_entry["host_path"], f"{dest_prefix}:{remote_path}"]
|
||||
self._created_remote_dirs |= new_dirs
|
||||
|
||||
# Rsync changed credential files
|
||||
for host_path, remote_path, key in cred_to_sync:
|
||||
cmd = rsync_base + [host_path, f"{dest_prefix}:{remote_path}"]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
if result.returncode == 0:
|
||||
logger.info("SSH: synced credential %s -> %s", mount_entry["host_path"], remote_path)
|
||||
self._synced_files[remote_path] = key
|
||||
logger.info("SSH: synced credential %s -> %s", host_path, remote_path)
|
||||
else:
|
||||
self._invalidate_sync_cache()
|
||||
logger.debug("SSH: rsync credential failed: %s", result.stderr.strip())
|
||||
|
||||
# Sync skills directory (remap to detected home)
|
||||
# --- Skills directory: fingerprint check + --delete for pruning ---
|
||||
skills_mount = get_skills_directory_mount(container_base=container_base)
|
||||
if skills_mount:
|
||||
if skills_mount and (force or self._skills_dir_changed(skills_mount["host_path"])):
|
||||
remote_path = skills_mount["container_path"]
|
||||
mkdir_cmd = self._build_ssh_command()
|
||||
mkdir_cmd.append(f"mkdir -p {remote_path}")
|
||||
subprocess.run(mkdir_cmd, capture_output=True, text=True, timeout=10)
|
||||
cmd = rsync_base + [
|
||||
"--delete",
|
||||
skills_mount["host_path"].rstrip("/") + "/",
|
||||
f"{dest_prefix}:{remote_path}/",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
||||
if result.returncode == 0:
|
||||
logger.info("SSH: synced skills dir %s -> %s", skills_mount["host_path"], remote_path)
|
||||
logger.info("SSH: synced skills dir %s -> %s",
|
||||
skills_mount["host_path"], remote_path)
|
||||
else:
|
||||
self._invalidate_sync_cache()
|
||||
logger.debug("SSH: rsync skills dir failed: %s", result.stderr.strip())
|
||||
except Exception as e:
|
||||
logger.debug("SSH: could not sync skills/credentials: %s", e)
|
||||
|
||||
def _skills_dir_changed(self, host_path: str) -> bool:
|
||||
"""Return True if any file in the skills dir has changed since last sync."""
|
||||
root = Path(host_path)
|
||||
if not root.is_dir():
|
||||
return False
|
||||
current: set[tuple] = set()
|
||||
try:
|
||||
for f in root.rglob("*"):
|
||||
if f.is_file() and not f.is_symlink():
|
||||
s = f.stat()
|
||||
current.add((str(f.relative_to(root)), s.st_mtime, s.st_size))
|
||||
except OSError:
|
||||
return True
|
||||
if current == self._skills_fingerprint:
|
||||
return False
|
||||
self._skills_fingerprint = current
|
||||
return True
|
||||
|
||||
def _invalidate_sync_cache(self) -> None:
|
||||
"""Clear sync caches — call on rsync failure or reconnect."""
|
||||
self._synced_files.clear()
|
||||
self._skills_fingerprint = None
|
||||
self._created_remote_dirs.clear()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Unified execution hooks
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user