diff --git a/cli.py b/cli.py index ae87c15c51..58e9d9c0af 100644 --- a/cli.py +++ b/cli.py @@ -974,6 +974,7 @@ def _run_state_db_auto_maintenance(session_db) -> None: return try: from hermes_cli.config import load_config as _load_full_config + from hermes_constants import get_hermes_home as _get_hermes_home cfg = (_load_full_config().get("sessions") or {}) if not cfg.get("auto_prune", False): return @@ -981,6 +982,7 @@ def _run_state_db_auto_maintenance(session_db) -> None: retention_days=int(cfg.get("retention_days", 90)), min_interval_hours=int(cfg.get("min_interval_hours", 24)), vacuum=bool(cfg.get("vacuum_after_prune", True)), + sessions_dir=_get_hermes_home() / "sessions", ) except Exception as exc: logger.debug("state.db auto-maintenance skipped: %s", exc) diff --git a/gateway/run.py b/gateway/run.py index fcab91b443..014278fabc 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -763,6 +763,7 @@ class GatewayRunner: retention_days=int(_sess_cfg.get("retention_days", 90)), min_interval_hours=int(_sess_cfg.get("min_interval_hours", 24)), vacuum=bool(_sess_cfg.get("vacuum_after_prune", True)), + sessions_dir=self.config.sessions_dir, ) except Exception as exc: logger.debug("state.db auto-maintenance skipped: %s", exc) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 1bca6f0e5f..9a3b59f0cc 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -9230,7 +9230,8 @@ Examples: ): print("Cancelled.") return - if db.delete_session(resolved_session_id): + sessions_dir = get_hermes_home() / "sessions" + if db.delete_session(resolved_session_id, sessions_dir=sessions_dir): print(f"Deleted session '{resolved_session_id}'.") else: print(f"Session '{args.session_id}' not found.") @@ -9244,7 +9245,9 @@ Examples: ): print("Cancelled.") return - count = db.prune_sessions(older_than_days=days, source=args.source) + sessions_dir = get_hermes_home() / "sessions" + count = db.prune_sessions(older_than_days=days, source=args.source, + sessions_dir=sessions_dir) print(f"Pruned {count} session(s).") elif action == "rename": diff --git a/hermes_state.py b/hermes_state.py index cc40313084..479ce47b5d 100644 --- a/hermes_state.py +++ b/hermes_state.py @@ -1512,12 +1512,45 @@ class SessionDB: ) self._execute_write(_do) - def delete_session(self, session_id: str) -> bool: + @staticmethod + def _remove_session_files(sessions_dir: Optional[Path], session_id: str) -> None: + """Remove on-disk transcript files for a session. + + Cleans up ``{session_id}.json``, ``{session_id}.jsonl``, and any + ``request_dump_{session_id}_*.json`` files left by the gateway. + Silently skips files that don't exist and swallows OSError so a + filesystem hiccup never blocks a DB operation. + """ + if sessions_dir is None: + return + for suffix in (".json", ".jsonl"): + p = sessions_dir / f"{session_id}{suffix}" + try: + p.unlink(missing_ok=True) + except OSError: + pass + # request_dump files use session_id as a prefix component + try: + for p in sessions_dir.glob(f"request_dump_{session_id}_*.json"): + try: + p.unlink(missing_ok=True) + except OSError: + pass + except OSError: + pass + + def delete_session( + self, + session_id: str, + sessions_dir: Optional[Path] = None, + ) -> bool: """Delete a session and all its messages. Child sessions are orphaned (parent_session_id set to NULL) rather than cascade-deleted, so they remain accessible independently. - Returns True if the session was found and deleted. + When *sessions_dir* is provided, also removes on-disk transcript + files (``.json`` / ``.jsonl`` / ``request_dump_*``) for the deleted + session. Returns True if the session was found and deleted. """ def _do(conn): cursor = conn.execute( @@ -1534,16 +1567,29 @@ class SessionDB: conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,)) conn.execute("DELETE FROM sessions WHERE id = ?", (session_id,)) return True - return self._execute_write(_do) - def prune_sessions(self, older_than_days: int = 90, source: str = None) -> int: + deleted = self._execute_write(_do) + if deleted: + self._remove_session_files(sessions_dir, session_id) + return deleted + + def prune_sessions( + self, + older_than_days: int = 90, + source: str = None, + sessions_dir: Optional[Path] = None, + ) -> int: """Delete sessions older than N days. Returns count of deleted sessions. Only prunes ended sessions (not active ones). Child sessions outside the prune window are orphaned (parent_session_id set to NULL) rather - than cascade-deleted. + than cascade-deleted. When *sessions_dir* is provided, also removes + on-disk transcript files (``.json`` / ``.jsonl`` / + ``request_dump_*``) for every pruned session, outside the DB + transaction. """ cutoff = time.time() - (older_than_days * 86400) + removed_ids: list[str] = [] def _do(conn): if source: @@ -1573,9 +1619,14 @@ class SessionDB: for sid in session_ids: conn.execute("DELETE FROM messages WHERE session_id = ?", (sid,)) conn.execute("DELETE FROM sessions WHERE id = ?", (sid,)) + removed_ids.append(sid) return len(session_ids) - return self._execute_write(_do) + count = self._execute_write(_do) + # Clean up on-disk files outside the DB transaction + for sid in removed_ids: + self._remove_session_files(sessions_dir, sid) + return count # ── Meta key/value (for scheduler bookkeeping) ── @@ -1629,6 +1680,7 @@ class SessionDB: retention_days: int = 90, min_interval_hours: int = 24, vacuum: bool = True, + sessions_dir: Optional[Path] = None, ) -> Dict[str, Any]: """Idempotent auto-maintenance: prune old sessions + optional VACUUM. @@ -1636,6 +1688,10 @@ class SessionDB: within ``min_interval_hours`` no-op. Designed to be called once at startup from long-lived entrypoints (CLI, gateway, cron scheduler). + When *sessions_dir* is provided, on-disk transcript files + (``.json`` / ``.jsonl`` / ``request_dump_*``) for pruned sessions + are removed as part of the same sweep (issue #3015). + Never raises. On any failure, logs a warning and returns a dict with ``"error"`` set. @@ -1659,7 +1715,10 @@ class SessionDB: except (TypeError, ValueError): pass # corrupt meta; treat as no prior run - pruned = self.prune_sessions(older_than_days=retention_days) + pruned = self.prune_sessions( + older_than_days=retention_days, + sessions_dir=sessions_dir, + ) result["pruned"] = pruned # Only VACUUM if we actually freed rows — VACUUM on a tight DB