Files
hermes-agent/plugins/cognitive-memory/__init__.py

379 lines
14 KiB
Python
Raw Normal View History

"""Cognitive memory plugin — MemoryProvider interface.
Semantic memory with vector embeddings (via litellm), auto-classification,
contradiction detection, importance decay, and time-based forgetting.
Local SQLite storage with binary-packed float32 embeddings.
Original PR #727 by 0xbyt4, adapted to MemoryProvider ABC.
Requires: litellm (for embeddings via any provider OpenAI, Cohere, etc.)
Config via environment: uses litellm's standard env vars (OPENAI_API_KEY, etc.)
"""
from __future__ import annotations
import json
import logging
import math
import os
import re
import sqlite3
import struct
import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from agent.memory_provider import MemoryProvider
logger = logging.getLogger(__name__)
_DB_DIR = Path(os.environ.get("HERMES_HOME", os.path.expanduser("~/.hermes"))) / "cognitive_memory"
_EMBEDDING_DIM = 1536 # text-embedding-3-small default
_SIMILARITY_DEDUP_THRESHOLD = 0.95
# ---------------------------------------------------------------------------
# Embedding helper
# ---------------------------------------------------------------------------
def _get_embedding(text: str) -> Optional[List[float]]:
"""Get embedding via litellm."""
try:
import litellm
resp = litellm.embedding(model="text-embedding-3-small", input=[text])
return resp.data[0]["embedding"]
except Exception as e:
logger.debug("Embedding failed: %s", e)
return None
def _cosine_similarity(a: List[float], b: List[float]) -> float:
dot = sum(x * y for x, y in zip(a, b))
mag_a = math.sqrt(sum(x * x for x in a))
mag_b = math.sqrt(sum(x * x for x in b))
if mag_a == 0 or mag_b == 0:
return 0.0
return dot / (mag_a * mag_b)
def _pack_embedding(emb: List[float]) -> bytes:
return struct.pack(f"{len(emb)}f", *emb)
def _unpack_embedding(data: bytes) -> List[float]:
n = len(data) // 4
return list(struct.unpack(f"{n}f", data))
# ---------------------------------------------------------------------------
# Classification
# ---------------------------------------------------------------------------
_CATEGORY_PATTERNS = {
"preference": [r"\b(?:prefer|like|love|hate|dislike|favorite)\b"],
"correction": [r"\b(?:actually|no,|wrong|incorrect|not right)\b"],
"fact": [r"\b(?:is|are|was|were|has|have)\b"],
"procedure": [r"\b(?:first|then|step|always|never|usually)\b"],
"environment": [r"\b(?:running|using|installed|version|os|platform)\b"],
}
def _classify(content: str) -> str:
content_lower = content.lower()
for category, patterns in _CATEGORY_PATTERNS.items():
for pattern in patterns:
if re.search(pattern, content_lower):
return category
return "general"
def _estimate_importance(content: str, category: str) -> float:
base = {"correction": 0.9, "preference": 0.7, "procedure": 0.6}.get(category, 0.5)
# Longer content slightly more important
length_bonus = min(len(content) / 500, 0.2)
return min(base + length_bonus, 1.0)
# ---------------------------------------------------------------------------
# Tool schema
# ---------------------------------------------------------------------------
COGNITIVE_RECALL_SCHEMA = {
"name": "cognitive_recall",
"description": (
"Semantic memory with automatic classification and importance scoring. "
"Actions: recall (search by meaning), store (add a fact), "
"forget (remove by ID), status (memory stats)."
),
"parameters": {
"type": "object",
"properties": {
"action": {
"type": "string",
"enum": ["recall", "store", "forget", "status"],
"description": "Action to perform.",
},
"query": {"type": "string", "description": "Search query (for 'recall')."},
"content": {"type": "string", "description": "Fact to store (for 'store')."},
"category": {
"type": "string",
"enum": ["preference", "fact", "procedure", "environment", "correction", "general"],
"description": "Category (auto-detected if omitted).",
},
"memory_id": {"type": "integer", "description": "Memory ID (for 'forget')."},
},
"required": ["action"],
},
}
# ---------------------------------------------------------------------------
# MemoryProvider implementation
# ---------------------------------------------------------------------------
class CognitiveMemoryProvider(MemoryProvider):
"""Semantic memory with embeddings, classification, and forgetting."""
def __init__(self):
self._conn = None
self._decay_half_life = 30 # days
self._last_decay = 0
@property
def name(self) -> str:
return "cognitive"
def is_available(self) -> bool:
try:
import litellm # noqa: F401
return True
except ImportError:
return False
def initialize(self, session_id: str, **kwargs) -> None:
_DB_DIR.mkdir(parents=True, exist_ok=True)
db_path = _DB_DIR / "cognitive.db"
self._conn = sqlite3.connect(str(db_path))
self._conn.execute("PRAGMA journal_mode=WAL")
self._conn.executescript("""
CREATE TABLE IF NOT EXISTS memories (
id INTEGER PRIMARY KEY AUTOINCREMENT,
content TEXT NOT NULL,
category TEXT DEFAULT 'general',
importance REAL DEFAULT 0.5,
embedding BLOB,
retrieval_count INTEGER DEFAULT 0,
helpful_count INTEGER DEFAULT 0,
created_at REAL,
updated_at REAL,
deleted INTEGER DEFAULT 0
);
CREATE INDEX IF NOT EXISTS idx_mem_importance ON memories(importance DESC);
CREATE INDEX IF NOT EXISTS idx_mem_category ON memories(category);
""")
self._conn.commit()
def system_prompt_block(self) -> str:
if not self._conn:
return ""
try:
count = self._conn.execute(
"SELECT COUNT(*) FROM memories WHERE deleted = 0"
).fetchone()[0]
except Exception:
count = 0
if count == 0:
return ""
return (
f"# Cognitive Memory\n"
f"Active. {count} memories with semantic recall and importance scoring.\n"
f"Use cognitive_recall to search, store facts, or check status.\n"
f"Memories decay over time — frequently used facts persist, unused ones fade."
)
def prefetch(self, query: str) -> str:
if not self._conn or not query:
return ""
emb = _get_embedding(query)
if not emb:
return ""
try:
rows = self._conn.execute(
"SELECT id, content, importance, embedding FROM memories "
"WHERE deleted = 0 AND embedding IS NOT NULL "
"ORDER BY importance DESC LIMIT 50"
).fetchall()
scored = []
now = time.time()
for row in rows:
mem_emb = _unpack_embedding(row[3])
sim = _cosine_similarity(emb, mem_emb)
importance = row[2]
score = 0.5 * sim + 0.3 * importance + 0.2 * max(0, 1 - (now - (row[0] * 86400)) / (30 * 86400))
if sim > 0.3:
scored.append((score, row[1]))
scored.sort(reverse=True)
if not scored:
return ""
lines = [f"- {content}" for _, content in scored[:5]]
return "## Cognitive Memory\n" + "\n".join(lines)
except Exception as e:
logger.debug("Cognitive prefetch failed: %s", e)
return ""
def sync_turn(self, user_content: str, assistant_content: str) -> None:
# Run decay cycle periodically
self._maybe_decay()
def get_tool_schemas(self) -> List[Dict[str, Any]]:
return [COGNITIVE_RECALL_SCHEMA]
def handle_tool_call(self, tool_name: str, args: dict, **kwargs) -> str:
if tool_name != "cognitive_recall":
return json.dumps({"error": f"Unknown tool: {tool_name}"})
action = args.get("action", "")
if action == "store":
return self._store(args)
elif action == "recall":
return self._recall(args)
elif action == "forget":
return self._forget(args)
elif action == "status":
return self._status()
return json.dumps({"error": f"Unknown action: {action}"})
def on_memory_write(self, action: str, target: str, content: str) -> None:
if action == "add" and self._conn and content:
category = "preference" if target == "user" else _classify(content)
importance = _estimate_importance(content, category)
emb = _get_embedding(content)
now = time.time()
self._conn.execute(
"INSERT INTO memories (content, category, importance, embedding, created_at, updated_at) "
"VALUES (?, ?, ?, ?, ?, ?)",
(content, category, importance, _pack_embedding(emb) if emb else None, now, now),
)
self._conn.commit()
def shutdown(self) -> None:
if self._conn:
self._conn.close()
self._conn = None
# -- Internal methods ----------------------------------------------------
def _store(self, args: dict) -> str:
content = args.get("content", "")
if not content:
return json.dumps({"error": "content is required"})
category = args.get("category") or _classify(content)
importance = _estimate_importance(content, category)
emb = _get_embedding(content)
# Dedup check
if emb:
rows = self._conn.execute(
"SELECT id, embedding FROM memories WHERE deleted = 0 AND embedding IS NOT NULL"
).fetchall()
for row in rows:
existing_emb = _unpack_embedding(row[1])
if _cosine_similarity(emb, existing_emb) > _SIMILARITY_DEDUP_THRESHOLD:
return json.dumps({"error": "Very similar memory already exists", "existing_id": row[0]})
now = time.time()
cur = self._conn.execute(
"INSERT INTO memories (content, category, importance, embedding, created_at, updated_at) "
"VALUES (?, ?, ?, ?, ?, ?)",
(content, category, importance, _pack_embedding(emb) if emb else None, now, now),
)
self._conn.commit()
return json.dumps({"id": cur.lastrowid, "category": category, "importance": round(importance, 2)})
def _recall(self, args: dict) -> str:
query = args.get("query", "")
if not query:
return json.dumps({"error": "query is required"})
emb = _get_embedding(query)
if not emb:
return json.dumps({"error": "Embedding generation failed"})
rows = self._conn.execute(
"SELECT id, content, category, importance, embedding, created_at FROM memories "
"WHERE deleted = 0 AND embedding IS NOT NULL "
"ORDER BY importance DESC LIMIT 50"
).fetchall()
now = time.time()
results = []
for row in rows:
mem_emb = _unpack_embedding(row[4])
sim = _cosine_similarity(emb, mem_emb)
days_old = (now - (row[5] or now)) / 86400
recency = max(0, 1 - days_old / 90)
score = 0.5 * sim + 0.3 * row[3] + 0.2 * recency
if sim > 0.2:
results.append({
"id": row[0], "content": row[1], "category": row[2],
"score": round(score, 3), "similarity": round(sim, 3),
})
results.sort(key=lambda x: x["score"], reverse=True)
# Bump retrieval counts
for r in results[:10]:
self._conn.execute(
"UPDATE memories SET retrieval_count = retrieval_count + 1 WHERE id = ?",
(r["id"],),
)
self._conn.commit()
return json.dumps({"results": results[:10], "count": len(results[:10])})
def _forget(self, args: dict) -> str:
memory_id = args.get("memory_id")
if memory_id is None:
return json.dumps({"error": "memory_id is required"})
self._conn.execute("UPDATE memories SET deleted = 1 WHERE id = ?", (int(memory_id),))
self._conn.commit()
return json.dumps({"forgotten": True, "id": memory_id})
def _status(self) -> str:
total = self._conn.execute("SELECT COUNT(*) FROM memories WHERE deleted = 0").fetchone()[0]
by_cat = self._conn.execute(
"SELECT category, COUNT(*) FROM memories WHERE deleted = 0 GROUP BY category"
).fetchall()
return json.dumps({
"total": total,
"by_category": {row[0]: row[1] for row in by_cat},
"decay_half_life_days": self._decay_half_life,
})
def _maybe_decay(self) -> None:
"""Run importance decay every ~1 hour."""
now = time.time()
if now - self._last_decay < 3600:
return
self._last_decay = now
if not self._conn or self._decay_half_life <= 0:
return
try:
factor = 0.5 ** (1.0 / self._decay_half_life)
self._conn.execute(
"UPDATE memories SET importance = importance * ? WHERE deleted = 0",
(factor,),
)
# Prune very low importance
self._conn.execute(
"UPDATE memories SET deleted = 1 WHERE deleted = 0 AND importance < 0.05"
)
self._conn.commit()
except Exception as e:
logger.debug("Cognitive decay failed: %s", e)
def register(ctx) -> None:
"""Register cognitive memory as a memory provider plugin."""
ctx.register_memory_provider(CognitiveMemoryProvider())