feat: add persistent memory system + SQLite session store

Two-part implementation: Part A - Curated Bounded Memory: - New memory tool (tools/memory_tool.py) with MEMORY.md + USER.md stores - Character-limited (2200/1375 chars), § delimited entries - Frozen snapshot injected into system prompt at session start - Model manages pruning via replace/remove with substring matching - Usage indicator shown in system prompt header Part B - SQLite Session Store: - New hermes_state.py with SessionDB class, FTS5 full-text search - Gateway session.py rewritten to dual-write SQLite + legacy JSONL - Compression-triggered session splitting with parent_session_id chains - New session_search tool with Gemini Flash summarization of matched sessions - CLI session lifecycle (create on launch, close on exit) Also: - System prompt now cached per session, only rebuilt on compression (fixes prefix cache invalidation from date/time changes every turn) - Config version bumped to 3, hermes doctor checks for new artifacts - Disabled in batch_runner and RL environments
2026-04-28 06:51:16 +08:00 · 2026-02-19 00:57:31 -08:00
parent 655303f2f1
commit 440c244cac
19 changed files with 2397 additions and 327 deletions
--- a/tools/memory_tool.py
+++ b/tools/memory_tool.py
@@ -0,0 +1,385 @@
+#!/usr/bin/env python3
+"""
+Memory Tool Module - Persistent Curated Memory
+
+Provides bounded, file-backed memory that persists across sessions. Two stores:
+  - MEMORY.md: agent's personal notes and observations (environment facts, project
+    conventions, tool quirks, things learned)
+  - USER.md: what the agent knows about the user (preferences, communication style,
+    expectations, workflow habits)
+
+Both are injected into the system prompt as a frozen snapshot at session start.
+Mid-session writes update files on disk immediately (durable) but do NOT change
+the system prompt -- this preserves the prefix cache for the entire session.
+The snapshot refreshes on the next session start.
+
+Entry delimiter: § (section sign). Entries can be multiline.
+Character limits (not tokens) because char counts are model-independent.
+
+Design:
+- Single `memory` tool with action parameter: add, replace, remove, read
+- replace/remove use short unique substring matching (not full text or IDs)
+- Behavioral guidance lives in the tool schema description
+- Frozen snapshot pattern: system prompt is stable, tool responses show live state
+"""
+
+import json
+import os
+import fcntl
+from pathlib import Path
+from typing import Dict, Any, List, Optional
+
+# Where memory files live
+MEMORY_DIR = Path(os.getenv("HERMES_HOME", Path.home() / ".hermes")) / "memories"
+
+ENTRY_DELIMITER = "\n§\n"
+
+
+class MemoryStore:
+    """
+    Bounded curated memory with file persistence. One instance per AIAgent.
+
+    Maintains two parallel states:
+      - _system_prompt_snapshot: frozen at load time, used for system prompt injection.
+        Never mutated mid-session. Keeps prefix cache stable.
+      - memory_entries / user_entries: live state, mutated by tool calls, persisted to disk.
+        Tool responses always reflect this live state.
+    """
+
+    def __init__(self, memory_char_limit: int = 2200, user_char_limit: int = 1375):
+        self.memory_entries: List[str] = []
+        self.user_entries: List[str] = []
+        self.memory_char_limit = memory_char_limit
+        self.user_char_limit = user_char_limit
+        # Frozen snapshot for system prompt -- set once at load_from_disk()
+        self._system_prompt_snapshot: Dict[str, str] = {"memory": "", "user": ""}
+
+    def load_from_disk(self):
+        """Load entries from MEMORY.md and USER.md, capture system prompt snapshot."""
+        MEMORY_DIR.mkdir(parents=True, exist_ok=True)
+
+        self.memory_entries = self._read_file(MEMORY_DIR / "MEMORY.md")
+        self.user_entries = self._read_file(MEMORY_DIR / "USER.md")
+
+        # Capture frozen snapshot for system prompt injection
+        self._system_prompt_snapshot = {
+            "memory": self._render_block("memory", self.memory_entries),
+            "user": self._render_block("user", self.user_entries),
+        }
+
+    def save_to_disk(self, target: str):
+        """Persist entries to the appropriate file. Called after every mutation."""
+        MEMORY_DIR.mkdir(parents=True, exist_ok=True)
+
+        if target == "memory":
+            self._write_file(MEMORY_DIR / "MEMORY.md", self.memory_entries)
+        elif target == "user":
+            self._write_file(MEMORY_DIR / "USER.md", self.user_entries)
+
+    def _entries_for(self, target: str) -> List[str]:
+        if target == "user":
+            return self.user_entries
+        return self.memory_entries
+
+    def _set_entries(self, target: str, entries: List[str]):
+        if target == "user":
+            self.user_entries = entries
+        else:
+            self.memory_entries = entries
+
+    def _char_count(self, target: str) -> int:
+        entries = self._entries_for(target)
+        if not entries:
+            return 0
+        return len(ENTRY_DELIMITER.join(entries))
+
+    def _char_limit(self, target: str) -> int:
+        if target == "user":
+            return self.user_char_limit
+        return self.memory_char_limit
+
+    def add(self, target: str, content: str) -> Dict[str, Any]:
+        """Append a new entry. Returns error if it would exceed the char limit."""
+        content = content.strip()
+        if not content:
+            return {"success": False, "error": "Content cannot be empty."}
+
+        entries = self._entries_for(target)
+        limit = self._char_limit(target)
+
+        # Calculate what the new total would be
+        new_entries = entries + [content]
+        new_total = len(ENTRY_DELIMITER.join(new_entries))
+
+        if new_total > limit:
+            current = self._char_count(target)
+            return {
+                "success": False,
+                "error": (
+                    f"Memory at {current:,}/{limit:,} chars. "
+                    f"Adding this entry ({len(content)} chars) would exceed the limit. "
+                    f"Replace or remove existing entries first."
+                ),
+                "current_entries": entries,
+                "usage": f"{current:,}/{limit:,}",
+            }
+
+        entries.append(content)
+        self._set_entries(target, entries)
+        self.save_to_disk(target)
+
+        return self._success_response(target, "Entry added.")
+
+    def replace(self, target: str, old_text: str, new_content: str) -> Dict[str, Any]:
+        """Find entry containing old_text substring, replace it with new_content."""
+        old_text = old_text.strip()
+        new_content = new_content.strip()
+        if not old_text:
+            return {"success": False, "error": "old_text cannot be empty."}
+        if not new_content:
+            return {"success": False, "error": "new_content cannot be empty. Use 'remove' to delete entries."}
+
+        entries = self._entries_for(target)
+        matches = [(i, e) for i, e in enumerate(entries) if old_text in e]
+
+        if len(matches) == 0:
+            return {"success": False, "error": f"No entry matched '{old_text}'."}
+
+        if len(matches) > 1:
+            previews = [e[:80] + ("..." if len(e) > 80 else "") for _, e in matches]
+            return {
+                "success": False,
+                "error": f"Multiple entries matched '{old_text}'. Be more specific.",
+                "matches": previews,
+            }
+
+        idx = matches[0][0]
+        limit = self._char_limit(target)
+
+        # Check that replacement doesn't blow the budget
+        test_entries = entries.copy()
+        test_entries[idx] = new_content
+        new_total = len(ENTRY_DELIMITER.join(test_entries))
+
+        if new_total > limit:
+            return {
+                "success": False,
+                "error": (
+                    f"Replacement would put memory at {new_total:,}/{limit:,} chars. "
+                    f"Shorten the new content or remove other entries first."
+                ),
+            }
+
+        entries[idx] = new_content
+        self._set_entries(target, entries)
+        self.save_to_disk(target)
+
+        return self._success_response(target, "Entry replaced.")
+
+    def remove(self, target: str, old_text: str) -> Dict[str, Any]:
+        """Remove the entry containing old_text substring."""
+        old_text = old_text.strip()
+        if not old_text:
+            return {"success": False, "error": "old_text cannot be empty."}
+
+        entries = self._entries_for(target)
+        matches = [(i, e) for i, e in enumerate(entries) if old_text in e]
+
+        if len(matches) == 0:
+            return {"success": False, "error": f"No entry matched '{old_text}'."}
+
+        if len(matches) > 1:
+            previews = [e[:80] + ("..." if len(e) > 80 else "") for _, e in matches]
+            return {
+                "success": False,
+                "error": f"Multiple entries matched '{old_text}'. Be more specific.",
+                "matches": previews,
+            }
+
+        idx = matches[0][0]
+        entries.pop(idx)
+        self._set_entries(target, entries)
+        self.save_to_disk(target)
+
+        return self._success_response(target, "Entry removed.")
+
+    def read(self, target: str) -> Dict[str, Any]:
+        """Return live current entries and usage stats."""
+        return self._success_response(target)
+
+    def format_for_system_prompt(self, target: str) -> Optional[str]:
+        """
+        Return the frozen snapshot for system prompt injection.
+
+        This returns the state captured at load_from_disk() time, NOT the live
+        state. Mid-session writes do not affect this. This keeps the system
+        prompt stable across all turns, preserving the prefix cache.
+
+        Returns None if the snapshot is empty (no entries at load time).
+        """
+        block = self._system_prompt_snapshot.get(target, "")
+        return block if block else None
+
+    # -- Internal helpers --
+
+    def _success_response(self, target: str, message: str = None) -> Dict[str, Any]:
+        entries = self._entries_for(target)
+        current = self._char_count(target)
+        limit = self._char_limit(target)
+        pct = int((current / limit) * 100) if limit > 0 else 0
+
+        resp = {
+            "success": True,
+            "target": target,
+            "entries": entries,
+            "usage": f"{pct}% — {current:,}/{limit:,} chars",
+            "entry_count": len(entries),
+        }
+        if message:
+            resp["message"] = message
+        return resp
+
+    def _render_block(self, target: str, entries: List[str]) -> str:
+        """Render a system prompt block with header and usage indicator."""
+        if not entries:
+            return ""
+
+        limit = self._char_limit(target)
+        content = ENTRY_DELIMITER.join(entries)
+        current = len(content)
+        pct = int((current / limit) * 100) if limit > 0 else 0
+
+        if target == "user":
+            header = f"USER PROFILE (who the user is) [{pct}% — {current:,}/{limit:,} chars]"
+        else:
+            header = f"MEMORY (your personal notes) [{pct}% — {current:,}/{limit:,} chars]"
+
+        separator = "═" * 46
+        return f"{separator}\n{header}\n{separator}\n{content}"
+
+    @staticmethod
+    def _read_file(path: Path) -> List[str]:
+        """Read a memory file and split into entries."""
+        if not path.exists():
+            return []
+        try:
+            with open(path, "r", encoding="utf-8") as f:
+                fcntl.flock(f, fcntl.LOCK_SH)
+                try:
+                    raw = f.read()
+                finally:
+                    fcntl.flock(f, fcntl.LOCK_UN)
+        except (OSError, IOError):
+            return []
+
+        if not raw.strip():
+            return []
+
+        entries = [e.strip() for e in raw.split("§")]
+        return [e for e in entries if e]
+
+    @staticmethod
+    def _write_file(path: Path, entries: List[str]):
+        """Write entries to a memory file with file locking."""
+        content = ENTRY_DELIMITER.join(entries) if entries else ""
+        try:
+            with open(path, "w", encoding="utf-8") as f:
+                fcntl.flock(f, fcntl.LOCK_EX)
+                try:
+                    f.write(content)
+                finally:
+                    fcntl.flock(f, fcntl.LOCK_UN)
+        except (OSError, IOError) as e:
+            raise RuntimeError(f"Failed to write memory file {path}: {e}")
+
+
+def memory_tool(
+    action: str,
+    target: str = "memory",
+    content: str = None,
+    old_text: str = None,
+    store: Optional[MemoryStore] = None,
+) -> str:
+    """
+    Single entry point for the memory tool. Dispatches to MemoryStore methods.
+
+    Returns JSON string with results.
+    """
+    if store is None:
+        return json.dumps({"success": False, "error": "Memory is not available. It may be disabled in config or this environment."}, ensure_ascii=False)
+
+    if target not in ("memory", "user"):
+        return json.dumps({"success": False, "error": f"Invalid target '{target}'. Use 'memory' or 'user'."}, ensure_ascii=False)
+
+    if action == "add":
+        if not content:
+            return json.dumps({"success": False, "error": "Content is required for 'add' action."}, ensure_ascii=False)
+        result = store.add(target, content)
+
+    elif action == "replace":
+        if not old_text:
+            return json.dumps({"success": False, "error": "old_text is required for 'replace' action."}, ensure_ascii=False)
+        if not content:
+            return json.dumps({"success": False, "error": "content is required for 'replace' action."}, ensure_ascii=False)
+        result = store.replace(target, old_text, content)
+
+    elif action == "remove":
+        if not old_text:
+            return json.dumps({"success": False, "error": "old_text is required for 'remove' action."}, ensure_ascii=False)
+        result = store.remove(target, old_text)
+
+    elif action == "read":
+        result = store.read(target)
+
+    else:
+        return json.dumps({"success": False, "error": f"Unknown action '{action}'. Use: add, replace, remove, read"}, ensure_ascii=False)
+
+    return json.dumps(result, ensure_ascii=False)
+
+
+def check_memory_requirements() -> bool:
+    """Memory tool has no external requirements -- always available."""
+    return True
+
+
+# =============================================================================
+# OpenAI Function-Calling Schema
+# =============================================================================
+
+MEMORY_SCHEMA = {
+    "name": "memory",
+    "description": (
+        "Manage persistent memory (visible in system prompt). Targets: "
+        "'memory' (your notes) or 'user' (user profile).\n"
+        "Actions: add, replace, remove, read. For replace/remove, old_text "
+        "is a short unique snippet to identify the entry.\n"
+        "Usage indicator in system prompt shows capacity. When >80%, "
+        "consolidate/replace before adding. Prefer replacing over removing.\n"
+        "Write: non-obvious facts, user preferences, tool quirks. "
+        "Skip: trivial info, things in skills, re-discoverable content."
+    ),
+    "parameters": {
+        "type": "object",
+        "properties": {
+            "action": {
+                "type": "string",
+                "enum": ["add", "replace", "remove", "read"],
+                "description": "The action to perform."
+            },
+            "target": {
+                "type": "string",
+                "enum": ["memory", "user"],
+                "description": "Which memory store: 'memory' for personal notes, 'user' for user profile."
+            },
+            "content": {
+                "type": "string",
+                "description": "The entry content. Required for 'add' and 'replace'."
+            },
+            "old_text": {
+                "type": "string",
+                "description": "Short unique substring identifying the entry to replace or remove."
+            },
+        },
+        "required": ["action", "target"],
+    },
+}