mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat(checkpoints): auto-prune orphan and stale shadow repos at startup (#16303)
Every working dir hermes ever touches gets its own shadow git repo under
~/.hermes/checkpoints/{sha256(abs_dir)[:16]}/. The per-repo _prune is a
no-op (comment in CheckpointManager._prune says so), so abandoned repos
from deleted/moved projects or one-off tmp dirs pile up forever. Field
reports put the typical offender at 1000+ repos / ~12 GB on active
contributor machines.
Adds an opt-in startup sweep that mirrors the sessions.auto_prune
pattern from #13861 / #16286:
- tools/checkpoint_manager.py: new prune_checkpoints() and
maybe_auto_prune_checkpoints() helpers. Deletes shadow repos that
are orphan (HERMES_WORKDIR marker points to a path that no longer
exists) or stale (newest in-repo mtime older than retention_days).
Idempotent via a CHECKPOINT_BASE/.last_prune marker file so it only
runs once per min_interval_hours regardless of how many hermes
processes start up.
- hermes_cli/config.py: new checkpoints.auto_prune /
retention_days / delete_orphans / min_interval_hours knobs.
Default auto_prune: false so users who rely on /rollback against
long-ago sessions never lose data silently.
- cli.py / gateway/run.py: startup hooks gated on checkpoints.auto_prune,
called right next to the existing state.db maintenance block.
- Docs updated with the new config knobs.
- 11 regression tests: orphan/stale deletion, precedence, byte-freed
tracking, non-shadow dir skip, interval gating, corrupt marker
recovery.
Refs #3015 (session-file disk growth was fixed in #16286; this covers
the checkpoint side noted out-of-scope there).
This commit is contained in:
28
cli.py
28
cli.py
@@ -988,6 +988,29 @@ def _run_state_db_auto_maintenance(session_db) -> None:
|
||||
logger.debug("state.db auto-maintenance skipped: %s", exc)
|
||||
|
||||
|
||||
def _run_checkpoint_auto_maintenance() -> None:
|
||||
"""Call ``checkpoint_manager.maybe_auto_prune_checkpoints`` using current config.
|
||||
|
||||
Reads the ``checkpoints:`` section from config.yaml via
|
||||
:func:`hermes_cli.config.load_config`. Honours ``auto_prune`` /
|
||||
``retention_days`` / ``delete_orphans`` / ``min_interval_hours``.
|
||||
Never raises — maintenance must never block interactive startup.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_full_config
|
||||
cfg = (_load_full_config().get("checkpoints") or {})
|
||||
if not cfg.get("auto_prune", False):
|
||||
return
|
||||
from tools.checkpoint_manager import maybe_auto_prune_checkpoints
|
||||
maybe_auto_prune_checkpoints(
|
||||
retention_days=int(cfg.get("retention_days", 7)),
|
||||
min_interval_hours=int(cfg.get("min_interval_hours", 24)),
|
||||
delete_orphans=bool(cfg.get("delete_orphans", True)),
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("checkpoint auto-maintenance skipped: %s", exc)
|
||||
|
||||
|
||||
def _prune_stale_worktrees(repo_root: str, max_age_hours: int = 24) -> None:
|
||||
"""Remove stale worktrees and orphaned branches on startup.
|
||||
|
||||
@@ -2054,6 +2077,11 @@ class HermesCLI:
|
||||
# Never blocks startup on failure.
|
||||
_run_state_db_auto_maintenance(self._session_db)
|
||||
|
||||
# Opportunistic shadow-repo cleanup — deletes orphan/stale
|
||||
# checkpoint repos under ~/.hermes/checkpoints/. Opt-in via
|
||||
# checkpoints.auto_prune, idempotent via .last_prune marker.
|
||||
_run_checkpoint_auto_maintenance()
|
||||
|
||||
# Deferred title: stored in memory until the session is created in the DB
|
||||
self._pending_title: Optional[str] = None
|
||||
|
||||
|
||||
Reference in New Issue
Block a user