mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat(state): auto-prune old sessions + VACUUM state.db at startup (#13861)
* feat(state): auto-prune old sessions + VACUUM state.db at startup state.db accumulates every session, message, and FTS5 index entry forever. A heavy user (gateway + cron) reported 384MB with 982 sessions / 68K messages causing slowdown; manual 'hermes sessions prune --older-than 7' + VACUUM brought it to 43MB. The prune command and VACUUM are not wired to run automatically anywhere — sessions grew unbounded until users noticed. Changes: - hermes_state.py: new state_meta key/value table, vacuum() method, and maybe_auto_prune_and_vacuum() — idempotent via last-run timestamp in state_meta so it only actually executes once per min_interval_hours across all Hermes processes for a given HERMES_HOME. Never raises. - hermes_cli/config.py: new 'sessions:' block in DEFAULT_CONFIG (auto_prune=True, retention_days=90, vacuum_after_prune=True, min_interval_hours=24). Added to _KNOWN_ROOT_KEYS. - cli.py: call maintenance once at HermesCLI init (shared helper _run_state_db_auto_maintenance reads config and delegates to DB). - gateway/run.py: call maintenance once at GatewayRunner init. - Docs: user-guide/sessions.md rewrites 'Automatic Cleanup' section. Why VACUUM matters: SQLite does NOT shrink the file on DELETE — freed pages get reused on next INSERT. Without VACUUM, a delete-heavy DB stays bloated forever. VACUUM only runs when the prune actually removed rows, so tight DBs don't pay the I/O cost. Tests: 10 new tests in tests/test_hermes_state.py covering state_meta, vacuum, idempotency, interval skipping, VACUUM-only-when-needed, corrupt-marker recovery. All 246 existing state/config/gateway tests still pass. Verified E2E with real imports + isolated HERMES_HOME: DEFAULT_CONFIG exposes the new block, load_config() returns it for fresh installs, first call prunes+vacuums, second call within min_interval_hours skips, and the state_meta marker persists across connection close/reopen. * sessions.auto_prune defaults to false (opt-in) Session history powers session_search recall across past conversations, so silently pruning on startup could surprise users. Ship the machinery disabled and let users opt in when they notice state.db is hurting performance. - DEFAULT_CONFIG.sessions.auto_prune: True → False - Call-site fallbacks in cli.py and gateway/run.py match the new default (so unmigrated configs still see off) - Docs: flip 'Enable in config.yaml' framing + tip explains the tradeoff
This commit is contained in:
@@ -893,6 +893,34 @@ DEFAULT_CONFIG = {
|
||||
"force_ipv4": False,
|
||||
},
|
||||
|
||||
# Session storage — controls automatic cleanup of ~/.hermes/state.db.
|
||||
# state.db accumulates every session, message, tool call, and FTS5 index
|
||||
# entry forever. Without auto-pruning, a heavy user (gateway + cron)
|
||||
# reports 384MB+ databases with 68K+ messages, which slows down FTS5
|
||||
# inserts, /resume listing, and insights queries.
|
||||
"sessions": {
|
||||
# When true, prune ended sessions older than retention_days once
|
||||
# per (roughly) min_interval_hours at CLI/gateway/cron startup.
|
||||
# Only touches ended sessions — active sessions are always preserved.
|
||||
# Default false: session history is valuable for search recall, and
|
||||
# silently deleting it could surprise users. Opt in explicitly.
|
||||
"auto_prune": False,
|
||||
# How many days of ended-session history to keep. Matches the
|
||||
# default of ``hermes sessions prune``.
|
||||
"retention_days": 90,
|
||||
# VACUUM after a prune that actually deleted rows. SQLite does not
|
||||
# reclaim disk space on DELETE — freed pages are just reused on
|
||||
# subsequent INSERTs — so without VACUUM the file stays bloated
|
||||
# even after pruning. VACUUM blocks writes for a few seconds per
|
||||
# 100MB, so it only runs at startup, and only when prune deleted
|
||||
# ≥1 session.
|
||||
"vacuum_after_prune": True,
|
||||
# Minimum hours between auto-maintenance runs (avoids repeating
|
||||
# the sweep on every CLI invocation). Tracked via state_meta in
|
||||
# state.db itself, so it's shared across all processes.
|
||||
"min_interval_hours": 24,
|
||||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 22,
|
||||
}
|
||||
@@ -2118,6 +2146,7 @@ _KNOWN_ROOT_KEYS = {
|
||||
"fallback_providers", "credential_pool_strategies", "toolsets",
|
||||
"agent", "terminal", "display", "compression", "delegation",
|
||||
"auxiliary", "custom_providers", "context", "memory", "gateway",
|
||||
"sessions",
|
||||
}
|
||||
|
||||
# Valid fields inside a custom_providers list entry
|
||||
|
||||
Reference in New Issue
Block a user