mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-18 16:10:27 +08:00
Compare commits
30 Commits
docs/execu
...
feat/sessi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce0f4838b0 | ||
|
|
2ecad49113 | ||
|
|
8245173d61 | ||
|
|
327e577acf | ||
|
|
b5996b6451 | ||
|
|
ef10d2e7c9 | ||
|
|
af1ea1f4ed | ||
|
|
29575b3712 | ||
|
|
71558e753d | ||
|
|
4f7e64c845 | ||
|
|
2cbf0631a5 | ||
|
|
659af123c3 | ||
|
|
f4c43f0886 | ||
|
|
b54b246071 | ||
|
|
1a00d730eb | ||
|
|
76f40e6449 | ||
|
|
2bed2124a4 | ||
|
|
8709e1ebec | ||
|
|
54d817f882 | ||
|
|
74fdfe6b50 | ||
|
|
02a54e01ce | ||
|
|
8a31985e4f | ||
|
|
41c13ba71d | ||
|
|
36c5b188b5 | ||
|
|
1e29fa8865 | ||
|
|
e74a682b0f | ||
|
|
2b606d20e2 | ||
|
|
3ac750ec07 | ||
|
|
aa2d3e2ee1 | ||
|
|
7d628eaa3d |
@@ -444,6 +444,10 @@ prompt_caching:
|
|||||||
# model: ""
|
# model: ""
|
||||||
# timeout: 30
|
# timeout: 30
|
||||||
# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s
|
# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s
|
||||||
|
# default_mode: "fast" # 'fast' | 'summary' — mode used when caller passes none.
|
||||||
|
# # fast: FTS5 snippet hits, no LLM call. Default.
|
||||||
|
# # summary: LLM-generated prose synthesis across hits.
|
||||||
|
# # guided requires anchors and cannot be a default.
|
||||||
# extra_body: {} # Provider-specific OpenAI-compatible request fields
|
# extra_body: {} # Provider-specific OpenAI-compatible request fields
|
||||||
# # Example for providers that support request-body
|
# # Example for providers that support request-body
|
||||||
# # reasoning controls:
|
# # reasoning controls:
|
||||||
|
|||||||
@@ -846,6 +846,7 @@ DEFAULT_CONFIG = {
|
|||||||
"timeout": 30,
|
"timeout": 30,
|
||||||
"extra_body": {},
|
"extra_body": {},
|
||||||
"max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers
|
"max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers
|
||||||
|
"default_mode": "fast", # 'fast' | 'summary' — which mode session_search uses when caller passes none
|
||||||
},
|
},
|
||||||
"skills_hub": {
|
"skills_hub": {
|
||||||
"provider": "auto",
|
"provider": "auto",
|
||||||
|
|||||||
227
hermes_state.py
227
hermes_state.py
@@ -25,7 +25,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
from agent.memory_manager import sanitize_context
|
from agent.memory_manager import sanitize_context
|
||||||
from hermes_constants import get_hermes_home
|
from hermes_constants import get_hermes_home
|
||||||
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -1618,6 +1618,185 @@ class SessionDB:
|
|||||||
result.append(msg)
|
result.append(msg)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def get_messages_around(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
around_message_id: int,
|
||||||
|
window: int = 5,
|
||||||
|
) -> List[Dict[str, Any]]:
|
||||||
|
"""Load a window of messages anchored on a specific message id.
|
||||||
|
|
||||||
|
Returns up to ``window`` messages before the anchor, the anchor itself,
|
||||||
|
and up to ``window`` messages after — all from the same session,
|
||||||
|
ordered by id ascending. Boundaries are honoured: if the anchor is
|
||||||
|
near the start or end of the session, fewer messages are returned on
|
||||||
|
the truncated side.
|
||||||
|
|
||||||
|
If ``around_message_id`` is not a message id within ``session_id``,
|
||||||
|
returns an empty list. Callers decide whether to surface that as an
|
||||||
|
error.
|
||||||
|
|
||||||
|
Used by ``session_search`` mode='guided' to provide anchored
|
||||||
|
drill-down into a specific session at a specific message — without
|
||||||
|
the cost of summarisation or the risk of 100k-char truncation.
|
||||||
|
"""
|
||||||
|
if window < 0:
|
||||||
|
window = 0
|
||||||
|
with self._lock:
|
||||||
|
# Confirm the anchor exists in this session — cheap guard against
|
||||||
|
# cross-session contamination if a caller mixes up session/message
|
||||||
|
# ids.
|
||||||
|
anchor_exists = self._conn.execute(
|
||||||
|
"SELECT 1 FROM messages WHERE id = ? AND session_id = ? LIMIT 1",
|
||||||
|
(around_message_id, session_id),
|
||||||
|
).fetchone()
|
||||||
|
if not anchor_exists:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Two queries: anchor + before (DESC, take window+1), and after
|
||||||
|
# (ASC, take window). Final order is id ASC.
|
||||||
|
before_rows = self._conn.execute(
|
||||||
|
"SELECT * FROM messages "
|
||||||
|
"WHERE session_id = ? AND id <= ? "
|
||||||
|
"ORDER BY id DESC LIMIT ?",
|
||||||
|
(session_id, around_message_id, window + 1),
|
||||||
|
).fetchall()
|
||||||
|
after_rows = self._conn.execute(
|
||||||
|
"SELECT * FROM messages "
|
||||||
|
"WHERE session_id = ? AND id > ? "
|
||||||
|
"ORDER BY id ASC LIMIT ?",
|
||||||
|
(session_id, around_message_id, window),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# before_rows is DESC; reverse so it's ASC, then concatenate after_rows.
|
||||||
|
rows = list(reversed(before_rows)) + list(after_rows)
|
||||||
|
result = []
|
||||||
|
for row in rows:
|
||||||
|
msg = dict(row)
|
||||||
|
if "content" in msg:
|
||||||
|
msg["content"] = self._decode_content(msg["content"])
|
||||||
|
if msg.get("tool_calls"):
|
||||||
|
try:
|
||||||
|
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
logger.warning(
|
||||||
|
"Failed to deserialize tool_calls in get_messages_around, falling back to []"
|
||||||
|
)
|
||||||
|
msg["tool_calls"] = []
|
||||||
|
result.append(msg)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def get_anchored_view(
|
||||||
|
self,
|
||||||
|
session_id: str,
|
||||||
|
around_message_id: int,
|
||||||
|
window: int = 5,
|
||||||
|
bookend: int = 3,
|
||||||
|
keep_roles: Optional[Tuple[str, ...]] = ("user", "assistant"),
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""Return an anchored window plus session bookends, opinionated for guided recall.
|
||||||
|
|
||||||
|
Built on top of ``get_messages_around``:
|
||||||
|
- ``window``: messages immediately surrounding the anchor. Filtered to
|
||||||
|
``keep_roles`` (tool-response noise dropped by default), EXCEPT the
|
||||||
|
anchor itself is always included regardless of role — callers may
|
||||||
|
have anchored on a tool message and dropping it would break the
|
||||||
|
contract.
|
||||||
|
- ``bookend_start``: first ``bookend`` messages of the session
|
||||||
|
(filtered to ``keep_roles``), but ONLY those whose id sits strictly
|
||||||
|
before the window's first message id. If the window already covers
|
||||||
|
the session start, ``bookend_start`` is an empty list.
|
||||||
|
- ``bookend_end``: last ``bookend`` messages of the session (same
|
||||||
|
filter + non-overlap rule applied at the tail).
|
||||||
|
|
||||||
|
Bookends exist so an FTS5 hit anywhere in a long session still yields
|
||||||
|
the goal (opening) and the resolution (closing) on a single guided
|
||||||
|
call — without the cost of fetching the whole transcript.
|
||||||
|
|
||||||
|
Returns ``{"window": []}`` (empty) when the anchor isn't in the
|
||||||
|
session — caller decides how to surface that.
|
||||||
|
|
||||||
|
``keep_roles=None`` disables role filtering entirely (raw window +
|
||||||
|
raw bookends). Pass an explicit tuple to override the default.
|
||||||
|
"""
|
||||||
|
if bookend < 0:
|
||||||
|
bookend = 0
|
||||||
|
|
||||||
|
# Reuse the primitive — it already handles the anchor-existence check,
|
||||||
|
# window clamping, content decoding, and tool_calls deserialisation.
|
||||||
|
window_rows = self.get_messages_around(
|
||||||
|
session_id, around_message_id, window=window
|
||||||
|
)
|
||||||
|
if not window_rows:
|
||||||
|
return {"window": [], "bookend_start": [], "bookend_end": []}
|
||||||
|
|
||||||
|
# Apply role filter to the window, but never drop the anchor itself.
|
||||||
|
if keep_roles is not None:
|
||||||
|
keep_set = set(keep_roles)
|
||||||
|
filtered_window = [
|
||||||
|
m for m in window_rows
|
||||||
|
if m.get("id") == around_message_id or m.get("role") in keep_set
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
filtered_window = window_rows
|
||||||
|
|
||||||
|
window_min_id = window_rows[0]["id"]
|
||||||
|
window_max_id = window_rows[-1]["id"]
|
||||||
|
|
||||||
|
# Fetch bookends only if there's space outside the window. SQL filters
|
||||||
|
# by id range, role, and non-empty content — tool-call-only assistant
|
||||||
|
# turns (content='' with tool_calls populated) are excluded so they
|
||||||
|
# don't crowd out the actual prose openings/closings. ``bookend=0``
|
||||||
|
# short-circuits both queries.
|
||||||
|
bookend_start_rows: List[Any] = []
|
||||||
|
bookend_end_rows: List[Any] = []
|
||||||
|
if bookend > 0:
|
||||||
|
with self._lock:
|
||||||
|
role_clause = ""
|
||||||
|
role_params: list = []
|
||||||
|
if keep_roles is not None:
|
||||||
|
role_placeholders = ",".join("?" for _ in keep_roles)
|
||||||
|
role_clause = f" AND role IN ({role_placeholders})"
|
||||||
|
role_params = list(keep_roles)
|
||||||
|
|
||||||
|
bookend_start_rows = self._conn.execute(
|
||||||
|
f"SELECT * FROM messages "
|
||||||
|
f"WHERE session_id = ? AND id < ?{role_clause} "
|
||||||
|
f"AND length(content) > 0 "
|
||||||
|
f"ORDER BY id ASC LIMIT ?",
|
||||||
|
(session_id, window_min_id, *role_params, bookend),
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
bookend_end_rows = self._conn.execute(
|
||||||
|
f"SELECT * FROM messages "
|
||||||
|
f"WHERE session_id = ? AND id > ?{role_clause} "
|
||||||
|
f"AND length(content) > 0 "
|
||||||
|
f"ORDER BY id DESC LIMIT ?",
|
||||||
|
(session_id, window_max_id, *role_params, bookend),
|
||||||
|
).fetchall()
|
||||||
|
# End rows came back DESC for the LIMIT cap; flip to ASC.
|
||||||
|
bookend_end_rows = list(reversed(bookend_end_rows))
|
||||||
|
|
||||||
|
def _hydrate(row) -> Dict[str, Any]:
|
||||||
|
msg = dict(row)
|
||||||
|
if "content" in msg:
|
||||||
|
msg["content"] = self._decode_content(msg["content"])
|
||||||
|
if msg.get("tool_calls"):
|
||||||
|
try:
|
||||||
|
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||||
|
except (json.JSONDecodeError, TypeError):
|
||||||
|
logger.warning(
|
||||||
|
"Failed to deserialize tool_calls in get_anchored_view, falling back to []"
|
||||||
|
)
|
||||||
|
msg["tool_calls"] = []
|
||||||
|
return msg
|
||||||
|
|
||||||
|
return {
|
||||||
|
"window": filtered_window,
|
||||||
|
"bookend_start": [_hydrate(r) for r in bookend_start_rows],
|
||||||
|
"bookend_end": [_hydrate(r) for r in bookend_end_rows],
|
||||||
|
}
|
||||||
|
|
||||||
def resolve_resume_session_id(self, session_id: str) -> str:
|
def resolve_resume_session_id(self, session_id: str) -> str:
|
||||||
"""Redirect a resume target to the descendant session that holds the messages.
|
"""Redirect a resume target to the descendant session that holds the messages.
|
||||||
|
|
||||||
@@ -1885,6 +2064,7 @@ class SessionDB:
|
|||||||
role_filter: List[str] = None,
|
role_filter: List[str] = None,
|
||||||
limit: int = 20,
|
limit: int = 20,
|
||||||
offset: int = 0,
|
offset: int = 0,
|
||||||
|
sort: str = None,
|
||||||
) -> List[Dict[str, Any]]:
|
) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Full-text search across session messages using FTS5.
|
Full-text search across session messages using FTS5.
|
||||||
@@ -1897,6 +2077,19 @@ class SessionDB:
|
|||||||
|
|
||||||
Returns matching messages with session metadata, content snippet,
|
Returns matching messages with session metadata, content snippet,
|
||||||
and surrounding context (1 message before and after the match).
|
and surrounding context (1 message before and after the match).
|
||||||
|
|
||||||
|
``sort`` controls temporal ordering of results:
|
||||||
|
- ``None`` (default): FTS5 BM25 relevance only. Time-neutral, but
|
||||||
|
ties between equally-relevant messages are broken arbitrarily.
|
||||||
|
- ``"newest"``: order by message timestamp DESC, then by rank.
|
||||||
|
Recent matches surface first; rank breaks same-timestamp ties.
|
||||||
|
- ``"oldest"``: order by message timestamp ASC, then by rank.
|
||||||
|
For "how did this start" / "what was the original X" questions.
|
||||||
|
|
||||||
|
The LIKE fallback path (short CJK queries) ignores ``sort`` because
|
||||||
|
it has no rank to combine with — it already orders by timestamp DESC
|
||||||
|
unconditionally. The trigram CJK path honours ``sort`` like the main
|
||||||
|
FTS5 path.
|
||||||
"""
|
"""
|
||||||
if not query or not query.strip():
|
if not query or not query.strip():
|
||||||
return []
|
return []
|
||||||
@@ -1905,6 +2098,25 @@ class SessionDB:
|
|||||||
if not query:
|
if not query:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# Normalise sort. Anything not in the allowed set falls back to None
|
||||||
|
# (FTS5 rank-only) — be forgiving to callers who pass empty string or
|
||||||
|
# an unexpected value rather than failing the search.
|
||||||
|
if isinstance(sort, str):
|
||||||
|
sort_norm = sort.strip().lower()
|
||||||
|
if sort_norm not in ("newest", "oldest"):
|
||||||
|
sort_norm = None
|
||||||
|
else:
|
||||||
|
sort_norm = None
|
||||||
|
|
||||||
|
# ORDER BY shared by both FTS5 paths. With sort set, timestamp is
|
||||||
|
# primary and rank is the tiebreaker; otherwise rank alone.
|
||||||
|
if sort_norm == "newest":
|
||||||
|
order_by_sql = "ORDER BY m.timestamp DESC, rank"
|
||||||
|
elif sort_norm == "oldest":
|
||||||
|
order_by_sql = "ORDER BY m.timestamp ASC, rank"
|
||||||
|
else:
|
||||||
|
order_by_sql = "ORDER BY rank"
|
||||||
|
|
||||||
# Build WHERE clauses dynamically
|
# Build WHERE clauses dynamically
|
||||||
where_clauses = ["messages_fts MATCH ?"]
|
where_clauses = ["messages_fts MATCH ?"]
|
||||||
params: list = [query]
|
params: list = [query]
|
||||||
@@ -1943,7 +2155,7 @@ class SessionDB:
|
|||||||
JOIN messages m ON m.id = messages_fts.rowid
|
JOIN messages m ON m.id = messages_fts.rowid
|
||||||
JOIN sessions s ON s.id = m.session_id
|
JOIN sessions s ON s.id = m.session_id
|
||||||
WHERE {where_sql}
|
WHERE {where_sql}
|
||||||
ORDER BY rank
|
{order_by_sql}
|
||||||
LIMIT ? OFFSET ?
|
LIMIT ? OFFSET ?
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -2012,7 +2224,7 @@ class SessionDB:
|
|||||||
JOIN messages m ON m.id = messages_fts_trigram.rowid
|
JOIN messages m ON m.id = messages_fts_trigram.rowid
|
||||||
JOIN sessions s ON s.id = m.session_id
|
JOIN sessions s ON s.id = m.session_id
|
||||||
WHERE {' AND '.join(tri_where)}
|
WHERE {' AND '.join(tri_where)}
|
||||||
ORDER BY rank
|
{order_by_sql}
|
||||||
LIMIT ? OFFSET ?
|
LIMIT ? OFFSET ?
|
||||||
"""
|
"""
|
||||||
tri_params.extend([limit, offset])
|
tri_params.extend([limit, offset])
|
||||||
@@ -2051,6 +2263,13 @@ class SessionDB:
|
|||||||
if role_filter:
|
if role_filter:
|
||||||
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
|
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
|
||||||
like_params.extend(role_filter)
|
like_params.extend(role_filter)
|
||||||
|
# LIKE fallback has no rank to combine with — just timestamp
|
||||||
|
# direction. Default/"newest" → DESC; "oldest" → ASC.
|
||||||
|
like_order_sql = (
|
||||||
|
"ORDER BY m.timestamp ASC"
|
||||||
|
if sort_norm == "oldest"
|
||||||
|
else "ORDER BY m.timestamp DESC"
|
||||||
|
)
|
||||||
like_sql = f"""
|
like_sql = f"""
|
||||||
SELECT m.id, m.session_id, m.role,
|
SELECT m.id, m.session_id, m.role,
|
||||||
substr(m.content,
|
substr(m.content,
|
||||||
@@ -2061,7 +2280,7 @@ class SessionDB:
|
|||||||
FROM messages m
|
FROM messages m
|
||||||
JOIN sessions s ON s.id = m.session_id
|
JOIN sessions s ON s.id = m.session_id
|
||||||
WHERE {' AND '.join(like_where)}
|
WHERE {' AND '.join(like_where)}
|
||||||
ORDER BY m.timestamp DESC
|
{like_order_sql}
|
||||||
LIMIT ? OFFSET ?
|
LIMIT ? OFFSET ?
|
||||||
"""
|
"""
|
||||||
like_params.extend([limit, offset])
|
like_params.extend([limit, offset])
|
||||||
|
|||||||
10
run_agent.py
10
run_agent.py
@@ -10689,6 +10689,11 @@ class AIAgent:
|
|||||||
limit=function_args.get("limit", 3),
|
limit=function_args.get("limit", 3),
|
||||||
db=session_db,
|
db=session_db,
|
||||||
current_session_id=self.session_id,
|
current_session_id=self.session_id,
|
||||||
|
mode=function_args.get("mode"),
|
||||||
|
session_id=function_args.get("session_id"),
|
||||||
|
around_message_id=function_args.get("around_message_id"),
|
||||||
|
window=function_args.get("window", 5),
|
||||||
|
anchors=function_args.get("anchors"),
|
||||||
)
|
)
|
||||||
elif function_name == "memory":
|
elif function_name == "memory":
|
||||||
target = function_args.get("target", "memory")
|
target = function_args.get("target", "memory")
|
||||||
@@ -11321,6 +11326,11 @@ class AIAgent:
|
|||||||
limit=function_args.get("limit", 3),
|
limit=function_args.get("limit", 3),
|
||||||
db=session_db,
|
db=session_db,
|
||||||
current_session_id=self.session_id,
|
current_session_id=self.session_id,
|
||||||
|
mode=function_args.get("mode"),
|
||||||
|
session_id=function_args.get("session_id"),
|
||||||
|
around_message_id=function_args.get("around_message_id"),
|
||||||
|
window=function_args.get("window", 5),
|
||||||
|
anchors=function_args.get("anchors"),
|
||||||
)
|
)
|
||||||
tool_duration = time.time() - tool_start_time
|
tool_duration = time.time() - tool_start_time
|
||||||
if self._should_emit_quiet_tool_messages():
|
if self._should_emit_quiet_tool_messages():
|
||||||
|
|||||||
@@ -1051,6 +1051,7 @@ AUTHOR_MAP = {
|
|||||||
"openclaw@agent.local": "29206394", # PR #22194 salvage (sudo -S brute-force guard, #9590)
|
"openclaw@agent.local": "29206394", # PR #22194 salvage (sudo -S brute-force guard, #9590)
|
||||||
"freedemon@gmail.com": "fr33d3m0n", # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4)
|
"freedemon@gmail.com": "fr33d3m0n", # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4)
|
||||||
"zhaowh3613@outlook.com": "VinceZcrikl", # PR #23647 salvage (npm UTF-8 decode on GBK Windows)
|
"zhaowh3613@outlook.com": "VinceZcrikl", # PR #23647 salvage (npm UTF-8 decode on GBK Windows)
|
||||||
|
"abcdjmm970703@gmail.com": "JabberELF", # PR #20238 salvage (session_search fast/summary dual-mode)
|
||||||
"anton.kuenzi@gmail.com": "ZeterMordio", # PR #11754 salvage (zsh completion compdef + _arguments syntax)
|
"anton.kuenzi@gmail.com": "ZeterMordio", # PR #11754 salvage (zsh completion compdef + _arguments syntax)
|
||||||
"23yntong@stu.edu.cn": "iuyup", # PR #6155 salvage (shell=True hardening)
|
"23yntong@stu.edu.cn": "iuyup", # PR #6155 salvage (shell=True hardening)
|
||||||
"86501179+1RB@users.noreply.github.com": "1RB", # PR #25462 salvage (discord forwarded messages)
|
"86501179+1RB@users.noreply.github.com": "1RB", # PR #25462 salvage (discord forwarded messages)
|
||||||
|
|||||||
3
skills/memory/DESCRIPTION.md
Normal file
3
skills/memory/DESCRIPTION.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
---
|
||||||
|
description: Primitives for searching, recalling, and reasoning over Hermes' own session history and stored memory.
|
||||||
|
---
|
||||||
112
skills/memory/session-recall/SKILL.md
Normal file
112
skills/memory/session-recall/SKILL.md
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
---
|
||||||
|
name: session-recall
|
||||||
|
description: Use session_search effectively for finding and reading prior Hermes sessions.
|
||||||
|
metadata:
|
||||||
|
hermes:
|
||||||
|
category: memory
|
||||||
|
---
|
||||||
|
|
||||||
|
# session-recall
|
||||||
|
|
||||||
|
session_search is the tool. Three modes — fast, guided, summary — answer different question shapes. Picking the wrong mode costs latency, money, or correctness.
|
||||||
|
|
||||||
|
## Pre-flight
|
||||||
|
|
||||||
|
1. If the user asks about prior work ("find the session where X", "catch me up on Y", "we drafted Z"), your first move is session_search. Not filesystem search, not a different tool.
|
||||||
|
2. If the user names an artefact, search the literal name first. No OR-expansion.
|
||||||
|
3. Default to fast → guided. Reach for summary only when you need cross-session synthesis prose in one shot.
|
||||||
|
|
||||||
|
## Mode picker
|
||||||
|
|
||||||
|
| Question shape | Mode | Why |
|
||||||
|
|---|---|---|
|
||||||
|
| Catch me up / where did we get to / what did we decide | fast → guided | FTS5 finds sessions; guided reads the transcript. SQL-only. |
|
||||||
|
| Find an artefact by name / which session mentions X | fast | Snippets only, no LLM. |
|
||||||
|
| Read around a specific message in a known session | guided | Raw window around anchor. |
|
||||||
|
| Cross-session prose synthesis in one shot | summary | LLM call per hit (aux model if configured, else main). Opt-in. |
|
||||||
|
|
||||||
|
## Levers
|
||||||
|
|
||||||
|
| Lever | Default | When to change |
|
||||||
|
|---|---|---|
|
||||||
|
| `limit` (fast) | 3 | 5–10 when topic spans sessions or user wants to pick from a list |
|
||||||
|
| `sort` (fast) | unset (relevance) | `newest` for "where did we leave X"; `oldest` for "how did X start" |
|
||||||
|
| `role_filter` (fast) | user,assistant | Add `tool` only when debugging tool output specifically |
|
||||||
|
| `window` (guided) | 5 | Bump for long resolutions; shrink if response truncates |
|
||||||
|
| anchor count (guided) | 1 | 2–3 anchors when topic spans recent sessions |
|
||||||
|
| `limit` (summary) | 3 | Bump cautiously; cost scales directly |
|
||||||
|
|
||||||
|
## Composition patterns
|
||||||
|
|
||||||
|
1. **Discover → drill.** fast first, drill the top hit with guided. Widen `window` or re-anchor if the resolution isn't covered.
|
||||||
|
2. **Multi-anchor for arcs.** When fast returns 2–3 relevant hits on the same topic, pass them all to guided in one call.
|
||||||
|
3. **Bookend-first reading.** For "what was the conclusion" questions, read `bookend_end` before `messages`.
|
||||||
|
4. **Delegate when transcripts are big.** If you're about to pull 30K+ chars of transcript into your context just to summarise it, hand the dumps to a subagent and ask for a digest.
|
||||||
|
5. **Verify before quoting.** High-stakes recall does two passes: fast with the literal term (does the hit list contain the right session?) → guided (does the transcript confirm the outcome?).
|
||||||
|
|
||||||
|
## Worked examples
|
||||||
|
|
||||||
|
### A — find a named artefact
|
||||||
|
|
||||||
|
User: "we drafted a deployment plan in a session yesterday, find it"
|
||||||
|
|
||||||
|
Right: `session_search(query="deployment plan", limit=5)`. The user named it — search the name. Drill the top hit if you need details.
|
||||||
|
|
||||||
|
Wrong: `session_search(query="deploy OR deployment OR rollout OR plan")`. OR-expansion drowns the hit in unrelated sessions.
|
||||||
|
|
||||||
|
### B — catch up on a multi-session arc
|
||||||
|
|
||||||
|
User: "where did we get to with the auth refactor?"
|
||||||
|
|
||||||
|
Right: fast with `sort='newest'`, then multi-anchor guided across the top 2–3 hits:
|
||||||
|
|
||||||
|
```
|
||||||
|
session_search(query="auth refactor", limit=5, sort='newest')
|
||||||
|
session_search(mode='guided', anchors=[
|
||||||
|
{'session_id': hit_1.session_id, 'around_message_id': hit_1.match_message_id},
|
||||||
|
{'session_id': hit_2.session_id, 'around_message_id': hit_2.match_message_id},
|
||||||
|
{'session_id': hit_3.session_id, 'around_message_id': hit_3.match_message_id},
|
||||||
|
])
|
||||||
|
```
|
||||||
|
|
||||||
|
Read all three slices (bookend_start / messages / bookend_end) on each window and the arc reconstructs.
|
||||||
|
|
||||||
|
Wrong: `session_search(query="auth refactor", mode='summary')`. Summary launders FTS5 hits through an LLM and can confabulate when the right session isn't in the hit list.
|
||||||
|
|
||||||
|
### C — drill into a known session for a conclusion
|
||||||
|
|
||||||
|
User: "in the session about the caching layer, what did we decide?"
|
||||||
|
|
||||||
|
fast to locate, guided to drill, read `bookend_end` first:
|
||||||
|
|
||||||
|
```
|
||||||
|
session_search(query="caching layer", limit=3)
|
||||||
|
session_search(mode='guided', anchors=[
|
||||||
|
{'session_id': <top>, 'around_message_id': <match_id>}
|
||||||
|
])
|
||||||
|
```
|
||||||
|
|
||||||
|
Conclusions ("decided X", "shipped Y") usually live in `bookend_end`.
|
||||||
|
|
||||||
|
## Reading guided responses
|
||||||
|
|
||||||
|
Every guided window has three slices:
|
||||||
|
|
||||||
|
- `bookend_start` — opening prose (kickoff, goal)
|
||||||
|
- `messages` — the anchored window (FTS5 hit + neighbours)
|
||||||
|
- `bookend_end` — closing prose (resolution, decisions, commits)
|
||||||
|
|
||||||
|
Read all three. Bookends are prose that summarises; snippets and the middle window can be noisy when sessions are *about* the search term.
|
||||||
|
|
||||||
|
## Pitfalls
|
||||||
|
|
||||||
|
- **Manual-archaeology trap.** If fast snippets look noisy, drill the top hit with guided. Don't pivot to find / grep / raw SQL.
|
||||||
|
- **Summary confabulation.** Summary will produce confident prose even when FTS5 missed the right session. Verify by re-querying in fast mode and checking the hit list.
|
||||||
|
- **FTS5 is AND by default.** Multi-word queries require all terms; use OR or quoted phrases deliberately.
|
||||||
|
- **Anchor mismatch.** `around_message_id` must exist in the named session. Re-anchor from a fresh fast result if guided rejects.
|
||||||
|
- **Window truncation.** Re-call with a smaller window if a dump truncates.
|
||||||
|
- **Compaction lineage.** A fast hit with `parent_session_id` set means the session was split by compaction; its `bookend_start` is a handoff summary, not the original opener.
|
||||||
|
|
||||||
|
## Note on skill limits
|
||||||
|
|
||||||
|
This skill teaches composition but cannot enforce it. If your default behaviour drifts — composing paraphrase queries instead of drilling, reaching for summary when fast → guided would do, pivoting to filesystem search when fast returned hits — the skill is being ignored, not failing. When in doubt: fast first, then drill.
|
||||||
189
tests/hermes_state/test_get_anchored_view.py
Normal file
189
tests/hermes_state/test_get_anchored_view.py
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
"""Unit tests for SessionDB.get_anchored_view() — window + bookends + role filter.
|
||||||
|
|
||||||
|
Used by ``session_search`` mode='guided'. Builds on ``get_messages_around``
|
||||||
|
and adds:
|
||||||
|
- opinionated default role filter (drops tool messages from the window,
|
||||||
|
but never drops the anchor itself)
|
||||||
|
- session-head and session-tail bookends (default 3 messages each) so an
|
||||||
|
FTS5 hit anywhere in a long session still yields the goal + resolution
|
||||||
|
- bookends are skipped when the main window already overlaps the head or tail
|
||||||
|
|
||||||
|
These properties are the reason guided is useful for state recall on long
|
||||||
|
sessions, so the suite below pins them all down.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from hermes_state import SessionDB
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db(tmp_path):
|
||||||
|
return SessionDB(tmp_path / "state.db")
|
||||||
|
|
||||||
|
|
||||||
|
def _seed(db: SessionDB, session_id: str, roles: list[str]) -> list[int]:
|
||||||
|
"""Append messages with the given role sequence. Returns message ids."""
|
||||||
|
db.create_session(session_id, source="cli")
|
||||||
|
ids = []
|
||||||
|
for i, role in enumerate(roles):
|
||||||
|
ids.append(db.append_message(session_id, role=role, content=f"{role}-{i}"))
|
||||||
|
return ids
|
||||||
|
|
||||||
|
|
||||||
|
def test_window_filters_tool_messages_but_keeps_anchor_when_tool(db):
|
||||||
|
"""The anchor is preserved even when its role is tool. Other tool
|
||||||
|
messages in the window are dropped."""
|
||||||
|
ids = _seed(db, "s1", [
|
||||||
|
"user", "assistant", "tool", # 0..2
|
||||||
|
"user", "tool", # 3..4 ← anchor on a tool (idx 4)
|
||||||
|
"tool", "assistant", "user", # 5..7
|
||||||
|
])
|
||||||
|
view = db.get_anchored_view("s1", ids[4], window=3, bookend=0)
|
||||||
|
roles = [m["role"] for m in view["window"]]
|
||||||
|
# Anchor (tool) preserved; surrounding tool messages dropped.
|
||||||
|
assert "tool" in roles
|
||||||
|
anchor = next(m for m in view["window"] if m["id"] == ids[4])
|
||||||
|
assert anchor["role"] == "tool"
|
||||||
|
# Only the anchor tool message remains — other tools filtered.
|
||||||
|
tool_rows = [m for m in view["window"] if m["role"] == "tool"]
|
||||||
|
assert len(tool_rows) == 1 and tool_rows[0]["id"] == ids[4]
|
||||||
|
|
||||||
|
|
||||||
|
def test_window_keeps_user_and_assistant_by_default(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "assistant"] * 6)
|
||||||
|
view = db.get_anchored_view("s1", ids[5], window=2, bookend=0)
|
||||||
|
# All user/assistant → all should survive the filter.
|
||||||
|
assert {m["role"] for m in view["window"]} == {"user", "assistant"}
|
||||||
|
assert len(view["window"]) == 5 # 2 before + anchor + 2 after
|
||||||
|
|
||||||
|
|
||||||
|
def test_bookends_returned_when_window_in_middle(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "assistant"] * 10) # 20 messages
|
||||||
|
view = db.get_anchored_view("s1", ids[10], window=2, bookend=3)
|
||||||
|
assert len(view["bookend_start"]) == 3
|
||||||
|
assert len(view["bookend_end"]) == 3
|
||||||
|
# Bookends are the actual session head/tail.
|
||||||
|
assert [m["id"] for m in view["bookend_start"]] == ids[:3]
|
||||||
|
assert [m["id"] for m in view["bookend_end"]] == ids[-3:]
|
||||||
|
|
||||||
|
|
||||||
|
def test_bookend_start_empty_when_window_covers_session_head(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "assistant"] * 5) # 10 messages
|
||||||
|
# Anchor on id ids[1]; window=3 → covers ids[0..4]. Head overlaps.
|
||||||
|
view = db.get_anchored_view("s1", ids[1], window=3, bookend=3)
|
||||||
|
assert view["bookend_start"] == []
|
||||||
|
# Tail still has space → returns bookend_end.
|
||||||
|
assert len(view["bookend_end"]) == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_bookend_end_empty_when_window_covers_session_tail(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "assistant"] * 5) # 10 messages
|
||||||
|
view = db.get_anchored_view("s1", ids[-2], window=3, bookend=3)
|
||||||
|
assert view["bookend_end"] == []
|
||||||
|
assert len(view["bookend_start"]) == 3
|
||||||
|
|
||||||
|
|
||||||
|
def test_bookends_skip_tool_messages(db):
|
||||||
|
ids = _seed(db, "s1", [
|
||||||
|
"tool", "tool", "user", "assistant", # head: only 2 user/assistant
|
||||||
|
"user", "assistant", "user", "assistant",
|
||||||
|
"tool", "user", "assistant", "tool", # tail: 2 user/assistant + tool
|
||||||
|
])
|
||||||
|
# Anchor in the middle; bookends should pull only user/assistant.
|
||||||
|
view = db.get_anchored_view("s1", ids[5], window=1, bookend=3)
|
||||||
|
assert all(m["role"] in ("user", "assistant") for m in view["bookend_start"])
|
||||||
|
assert all(m["role"] in ("user", "assistant") for m in view["bookend_end"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_bookend_zero_returns_empty_bookends(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "assistant"] * 10)
|
||||||
|
view = db.get_anchored_view("s1", ids[10], window=2, bookend=0)
|
||||||
|
assert view["bookend_start"] == []
|
||||||
|
assert view["bookend_end"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_anchor_not_in_session_returns_empty_view(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "assistant"] * 5)
|
||||||
|
_seed(db, "s2", ["user", "assistant"] * 5)
|
||||||
|
view = db.get_anchored_view("s1", 999999, window=3, bookend=3)
|
||||||
|
assert view == {"window": [], "bookend_start": [], "bookend_end": []}
|
||||||
|
|
||||||
|
|
||||||
|
def test_keep_roles_none_disables_filtering(db):
|
||||||
|
"""Pass keep_roles=None to get raw window + raw bookends including tool."""
|
||||||
|
ids = _seed(db, "s1", ["user", "tool", "assistant", "tool", "user"] * 3)
|
||||||
|
view = db.get_anchored_view(
|
||||||
|
"s1", ids[7], window=2, bookend=3, keep_roles=None
|
||||||
|
)
|
||||||
|
# Tool messages in the window survive when filtering is disabled.
|
||||||
|
roles_in_window = [m["role"] for m in view["window"]]
|
||||||
|
assert "tool" in roles_in_window
|
||||||
|
|
||||||
|
|
||||||
|
def test_keep_roles_can_include_tool_when_caller_wants_it(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "tool", "assistant"] * 5)
|
||||||
|
view = db.get_anchored_view(
|
||||||
|
"s1", ids[7], window=2, bookend=3, keep_roles=("user", "assistant", "tool")
|
||||||
|
)
|
||||||
|
# All three roles allowed → tool messages should now appear in the window.
|
||||||
|
assert any(m["role"] == "tool" for m in view["window"])
|
||||||
|
|
||||||
|
|
||||||
|
def test_negative_bookend_treated_as_zero(db):
|
||||||
|
ids = _seed(db, "s1", ["user", "assistant"] * 10)
|
||||||
|
view = db.get_anchored_view("s1", ids[10], window=2, bookend=-3)
|
||||||
|
assert view["bookend_start"] == []
|
||||||
|
assert view["bookend_end"] == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_bookends_do_not_leak_across_sessions(db):
|
||||||
|
"""Bookends are session-scoped. A second session with adjacent ids must
|
||||||
|
never appear in the first session's bookends."""
|
||||||
|
s1_ids = _seed(db, "s1", ["user", "assistant"] * 4)
|
||||||
|
s2_ids = _seed(db, "s2", ["user", "assistant"] * 4)
|
||||||
|
view = db.get_anchored_view("s1", s1_ids[3], window=1, bookend=3)
|
||||||
|
bookend_ids = (
|
||||||
|
[m["id"] for m in view["bookend_start"]]
|
||||||
|
+ [m["id"] for m in view["bookend_end"]]
|
||||||
|
)
|
||||||
|
assert set(bookend_ids).isdisjoint(set(s2_ids))
|
||||||
|
|
||||||
|
|
||||||
|
def test_bookends_skip_empty_content_assistant_turns(db):
|
||||||
|
"""Tool-call-only assistant turns (content='' with tool_calls populated)
|
||||||
|
must NOT eat bookend slots. Bookends exist to surface the session's
|
||||||
|
spoken opening + resolution; 'let me check...'-shaped no-content
|
||||||
|
assistants are signal-free here."""
|
||||||
|
db.create_session("s1", source="cli")
|
||||||
|
# Real opener
|
||||||
|
open_id = db.append_message("s1", role="user", content="kick off the work")
|
||||||
|
db.append_message("s1", role="assistant", content="on it")
|
||||||
|
# A burst of tool-call-only assistants (orchestration heartbeats)
|
||||||
|
for _ in range(5):
|
||||||
|
db.append_message("s1", role="assistant", content="")
|
||||||
|
db.append_message("s1", role="tool", content="some output")
|
||||||
|
# Middle prose
|
||||||
|
mid_id = db.append_message("s1", role="user", content="status?")
|
||||||
|
db.append_message("s1", role="assistant", content="midway")
|
||||||
|
# Tail: more empty assistants interleaved with prose closer
|
||||||
|
for _ in range(3):
|
||||||
|
db.append_message("s1", role="assistant", content="")
|
||||||
|
db.append_message("s1", role="tool", content="poll")
|
||||||
|
close_id = db.append_message(
|
||||||
|
"s1", role="assistant", content="Done. Final summary here."
|
||||||
|
)
|
||||||
|
|
||||||
|
view = db.get_anchored_view("s1", mid_id, window=1, bookend=3)
|
||||||
|
|
||||||
|
# bookend_start should contain prose user/assistant, never empty content
|
||||||
|
assert all(m["content"] for m in view["bookend_start"]), \
|
||||||
|
"bookend_start leaked an empty-content row"
|
||||||
|
# First message must be the actual opener
|
||||||
|
assert view["bookend_start"][0]["id"] == open_id
|
||||||
|
|
||||||
|
# bookend_end likewise — and the closer prose must appear
|
||||||
|
assert all(m["content"] for m in view["bookend_end"]), \
|
||||||
|
"bookend_end leaked an empty-content row"
|
||||||
|
assert any(m["id"] == close_id for m in view["bookend_end"]), \
|
||||||
|
"actual session closer must survive into bookend_end"
|
||||||
|
|
||||||
137
tests/hermes_state/test_get_messages_around.py
Normal file
137
tests/hermes_state/test_get_messages_around.py
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
"""Unit tests for SessionDB.get_messages_around() — anchored message windows.
|
||||||
|
|
||||||
|
The method is used by ``session_search`` mode='guided' for anchored drill-down.
|
||||||
|
It must:
|
||||||
|
- Return an ordered window: up to ``window`` messages before the anchor,
|
||||||
|
the anchor itself, then up to ``window`` after, all id-ascending.
|
||||||
|
- Honour session boundaries (fewer messages returned at start / end).
|
||||||
|
- Honour session isolation (same id range, different session = nothing).
|
||||||
|
- Return an empty list when the anchor is not in the named session.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from hermes_state import SessionDB
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def db(tmp_path):
|
||||||
|
return SessionDB(tmp_path / "state.db")
|
||||||
|
|
||||||
|
|
||||||
|
def _seed_session(db: SessionDB, session_id: str, n_messages: int):
|
||||||
|
"""Append n_messages alternating user/assistant messages to a session.
|
||||||
|
|
||||||
|
Returns the list of message ids created (in append order).
|
||||||
|
"""
|
||||||
|
db.create_session(session_id, source="cli")
|
||||||
|
ids = []
|
||||||
|
for i in range(n_messages):
|
||||||
|
role = "user" if i % 2 == 0 else "assistant"
|
||||||
|
msg_id = db.append_message(session_id, role=role, content=f"msg {i}")
|
||||||
|
ids.append(msg_id)
|
||||||
|
return ids
|
||||||
|
|
||||||
|
|
||||||
|
def test_returns_window_around_anchor_in_middle(db):
|
||||||
|
ids = _seed_session(db, "s1", 11)
|
||||||
|
anchor = ids[5] # middle of 11
|
||||||
|
|
||||||
|
result = db.get_messages_around("s1", anchor, window=3)
|
||||||
|
|
||||||
|
# Expect 3 before + anchor + 3 after = 7 messages
|
||||||
|
assert len(result) == 7
|
||||||
|
# All from the right session
|
||||||
|
assert all(m["session_id"] == "s1" for m in result)
|
||||||
|
# Order is id ASC and contiguous
|
||||||
|
result_ids = [m["id"] for m in result]
|
||||||
|
assert result_ids == ids[2:9]
|
||||||
|
|
||||||
|
|
||||||
|
def test_anchor_at_first_message_returns_only_after_slice(db):
|
||||||
|
ids = _seed_session(db, "s1", 8)
|
||||||
|
anchor = ids[0] # first
|
||||||
|
|
||||||
|
result = db.get_messages_around("s1", anchor, window=3)
|
||||||
|
|
||||||
|
# Anchor + 3 after = 4 messages, no "before"
|
||||||
|
assert len(result) == 4
|
||||||
|
assert [m["id"] for m in result] == ids[0:4]
|
||||||
|
|
||||||
|
|
||||||
|
def test_anchor_at_last_message_returns_only_before_slice(db):
|
||||||
|
ids = _seed_session(db, "s1", 8)
|
||||||
|
anchor = ids[-1] # last
|
||||||
|
|
||||||
|
result = db.get_messages_around("s1", anchor, window=3)
|
||||||
|
|
||||||
|
# 3 before + anchor = 4 messages, no "after"
|
||||||
|
assert len(result) == 4
|
||||||
|
assert [m["id"] for m in result] == ids[-4:]
|
||||||
|
|
||||||
|
|
||||||
|
def test_anchor_not_in_session_returns_empty_list(db):
|
||||||
|
ids = _seed_session(db, "s1", 5)
|
||||||
|
_seed_session(db, "s2", 5)
|
||||||
|
|
||||||
|
# Use s1 as session but pass an id that exists, just in s2
|
||||||
|
result = db.get_messages_around("s2", ids[2], window=3)
|
||||||
|
|
||||||
|
assert result == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_does_not_leak_across_sessions(db):
|
||||||
|
# Two sessions with adjacent message id ranges
|
||||||
|
s1_ids = _seed_session(db, "s1", 5)
|
||||||
|
s2_ids = _seed_session(db, "s2", 5)
|
||||||
|
|
||||||
|
# Anchor on s1's last message — even though s2 ids are "after", they must
|
||||||
|
# not appear in the window
|
||||||
|
result = db.get_messages_around("s1", s1_ids[-1], window=3)
|
||||||
|
|
||||||
|
assert all(m["session_id"] == "s1" for m in result)
|
||||||
|
# All result ids belong to s1, not s2
|
||||||
|
assert set(m["id"] for m in result).issubset(set(s1_ids))
|
||||||
|
assert set(m["id"] for m in result).isdisjoint(set(s2_ids))
|
||||||
|
|
||||||
|
|
||||||
|
def test_window_larger_than_session_returns_full_session(db):
|
||||||
|
ids = _seed_session(db, "s1", 4)
|
||||||
|
anchor = ids[1]
|
||||||
|
|
||||||
|
result = db.get_messages_around("s1", anchor, window=100)
|
||||||
|
|
||||||
|
# Whole session returned, ordered ASC
|
||||||
|
assert [m["id"] for m in result] == ids
|
||||||
|
|
||||||
|
|
||||||
|
def test_window_zero_returns_only_anchor(db):
|
||||||
|
ids = _seed_session(db, "s1", 5)
|
||||||
|
anchor = ids[2]
|
||||||
|
|
||||||
|
result = db.get_messages_around("s1", anchor, window=0)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]["id"] == anchor
|
||||||
|
|
||||||
|
|
||||||
|
def test_negative_window_treated_as_zero(db):
|
||||||
|
ids = _seed_session(db, "s1", 5)
|
||||||
|
anchor = ids[2]
|
||||||
|
|
||||||
|
result = db.get_messages_around("s1", anchor, window=-3)
|
||||||
|
|
||||||
|
assert len(result) == 1
|
||||||
|
assert result[0]["id"] == anchor
|
||||||
|
|
||||||
|
|
||||||
|
def test_decodes_content_like_get_messages(db):
|
||||||
|
"""Content roundtrip should match get_messages's behaviour (no surprises
|
||||||
|
for callers who switch between the two methods)."""
|
||||||
|
ids = _seed_session(db, "s1", 3)
|
||||||
|
anchor = ids[1]
|
||||||
|
|
||||||
|
around = db.get_messages_around("s1", anchor, window=1)
|
||||||
|
full = db.get_messages("s1")
|
||||||
|
|
||||||
|
# Same rows, same content shape
|
||||||
|
assert [m["content"] for m in around] == [m["content"] for m in full]
|
||||||
@@ -2494,6 +2494,103 @@ class TestExcludeSources:
|
|||||||
sources = [r["source"] for r in results]
|
sources = [r["source"] for r in results]
|
||||||
assert sources == ["cli"]
|
assert sources == ["cli"]
|
||||||
|
|
||||||
|
def test_search_messages_sort_newest_orders_by_timestamp_desc(self, db):
|
||||||
|
"""``sort='newest'`` makes timestamp the primary sort key (DESC) with
|
||||||
|
FTS5 rank as the tiebreaker. With three matching messages at distinct
|
||||||
|
timestamps, results come out newest-first regardless of BM25 score."""
|
||||||
|
db.create_session("old_sid", "cli")
|
||||||
|
db.create_session("mid_sid", "cli")
|
||||||
|
db.create_session("new_sid", "cli")
|
||||||
|
# Same content → identical BM25 score; only timestamps differ.
|
||||||
|
mid_old = db.append_message("old_sid", "user", "matchword discussion")
|
||||||
|
mid_mid = db.append_message("mid_sid", "user", "matchword discussion")
|
||||||
|
mid_new = db.append_message("new_sid", "user", "matchword discussion")
|
||||||
|
# Stamp explicit, well-separated timestamps after the fact.
|
||||||
|
with db._lock:
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (mid_old,))
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (mid_mid,))
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (mid_new,))
|
||||||
|
db._conn.commit()
|
||||||
|
|
||||||
|
results = db.search_messages("matchword", sort="newest")
|
||||||
|
session_order = [r["session_id"] for r in results]
|
||||||
|
assert session_order == ["new_sid", "mid_sid", "old_sid"], (
|
||||||
|
f"sort=newest must return newest first; got {session_order}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_search_messages_sort_oldest_orders_by_timestamp_asc(self, db):
|
||||||
|
"""``sort='oldest'`` is symmetric — earliest matches first. Critical
|
||||||
|
for 'how did X start' questions where rank-only ordering would hide
|
||||||
|
the origin under more recent revisitations."""
|
||||||
|
db.create_session("a", "cli")
|
||||||
|
db.create_session("b", "cli")
|
||||||
|
db.create_session("c", "cli")
|
||||||
|
m_a = db.append_message("a", "user", "matchword")
|
||||||
|
m_b = db.append_message("b", "user", "matchword")
|
||||||
|
m_c = db.append_message("c", "user", "matchword")
|
||||||
|
with db._lock:
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (m_a,))
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_b,))
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_c,))
|
||||||
|
db._conn.commit()
|
||||||
|
|
||||||
|
results = db.search_messages("matchword", sort="oldest")
|
||||||
|
session_order = [r["session_id"] for r in results]
|
||||||
|
assert session_order == ["b", "c", "a"], (
|
||||||
|
f"sort=oldest must return earliest first; got {session_order}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_search_messages_sort_unset_preserves_rank_ordering(self, db):
|
||||||
|
"""No sort param → ``ORDER BY rank`` (FTS5 BM25). With identical
|
||||||
|
single-keyword matches on different-length messages, BM25 prefers
|
||||||
|
the shorter / denser ones — that's the existing default and it must
|
||||||
|
not regress when the new param is omitted."""
|
||||||
|
db.create_session("short_sid", "cli")
|
||||||
|
db.create_session("long_sid", "cli")
|
||||||
|
# Single keyword in a short message scores higher than the same
|
||||||
|
# keyword buried in a much longer one (BM25 length normalisation).
|
||||||
|
m_short = db.append_message("short_sid", "user", "matchword.")
|
||||||
|
m_long = db.append_message(
|
||||||
|
"long_sid", "user", "matchword " + ("padding " * 200)
|
||||||
|
)
|
||||||
|
# Older = short_sid so we can confirm rank wins, not recency.
|
||||||
|
with db._lock:
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
|
||||||
|
db._conn.commit()
|
||||||
|
|
||||||
|
results = db.search_messages("matchword") # sort omitted
|
||||||
|
assert len(results) == 2
|
||||||
|
# BM25 should rank the short message first despite being older.
|
||||||
|
assert results[0]["session_id"] == "short_sid", (
|
||||||
|
"Default (no sort) must use FTS5 rank — short_sid should outrank "
|
||||||
|
f"the longer message. Got order: {[r['session_id'] for r in results]}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_search_messages_sort_invalid_value_falls_back_to_rank(self, db):
|
||||||
|
"""Passing a value outside the allowed set (e.g. 'sideways') silently
|
||||||
|
falls back to FTS5 rank-only ordering rather than raising. Same
|
||||||
|
forgiveness as the tool-layer normalisation, in case callers reach
|
||||||
|
SessionDB directly."""
|
||||||
|
db.create_session("short_sid", "cli")
|
||||||
|
db.create_session("long_sid", "cli")
|
||||||
|
m_short = db.append_message("short_sid", "user", "matchword.")
|
||||||
|
m_long = db.append_message(
|
||||||
|
"long_sid", "user", "matchword " + ("padding " * 200)
|
||||||
|
)
|
||||||
|
with db._lock:
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
|
||||||
|
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
|
||||||
|
db._conn.commit()
|
||||||
|
|
||||||
|
# Garbage sort should behave the same as no sort.
|
||||||
|
results_default = db.search_messages("matchword")
|
||||||
|
results_garbage = db.search_messages("matchword", sort="sideways")
|
||||||
|
assert (
|
||||||
|
[r["session_id"] for r in results_default]
|
||||||
|
== [r["session_id"] for r in results_garbage]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class TestResolveSessionByNameOrId:
|
class TestResolveSessionByNameOrId:
|
||||||
"""Tests for the main.py helper that resolves names or IDs."""
|
"""Tests for the main.py helper that resolves names or IDs."""
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -2,19 +2,16 @@
|
|||||||
"""
|
"""
|
||||||
Session Search Tool - Long-Term Conversation Recall
|
Session Search Tool - Long-Term Conversation Recall
|
||||||
|
|
||||||
Searches past session transcripts in SQLite via FTS5, then summarizes the top
|
Searches past session transcripts in SQLite via FTS5. Keyword search defaults
|
||||||
matching sessions using the configured auxiliary session_search model (same
|
to fast snippet/context hits without any LLM call; callers can opt into focused
|
||||||
pattern as web_extract). By default, auxiliary "auto" routing uses the main
|
LLM summaries with mode="summary" when deeper recall is worth the latency.
|
||||||
chat provider/model unless the user overrides auxiliary.session_search.
|
|
||||||
Returns focused summaries of past conversations rather than raw transcripts,
|
|
||||||
keeping the main model's context window clean.
|
|
||||||
|
|
||||||
Flow:
|
Flow:
|
||||||
1. FTS5 search finds matching messages ranked by relevance
|
1. FTS5 search finds matching messages ranked by relevance
|
||||||
2. Groups by session, takes the top N unique sessions (default 3)
|
2. Groups by session, takes the top N unique sessions (default 3)
|
||||||
3. Loads each session's conversation, truncates to ~100k chars centered on matches
|
3. Fast mode returns snippets and nearby context immediately
|
||||||
4. Sends to the configured auxiliary model with a focused summarization prompt
|
4. Summary mode loads each session, truncates around matches, and calls an LLM
|
||||||
5. Returns per-session summaries with metadata
|
5. Returns per-session hits/summaries with metadata
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
@@ -26,6 +23,62 @@ from typing import Dict, Any, List, Optional, Union
|
|||||||
|
|
||||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||||
MAX_SESSION_CHARS = 100_000
|
MAX_SESSION_CHARS = 100_000
|
||||||
|
|
||||||
|
|
||||||
|
# Default mode is fast unless the user sets ``auxiliary.session_search.default_mode``
|
||||||
|
# in ~/.hermes/config.yaml. Only ``fast`` and ``summary`` are valid — guided
|
||||||
|
# requires anchors. Resolver is lru_cache-wrapped so the YAML read happens at
|
||||||
|
# most once per process; restart to pick up config changes.
|
||||||
|
_VALID_DEFAULT_MODES = ("fast", "summary")
|
||||||
|
_FALLBACK_DEFAULT_MODE = "fast"
|
||||||
|
|
||||||
|
|
||||||
|
def _resolve_user_default_mode() -> str:
|
||||||
|
"""Look up ``auxiliary.session_search.default_mode`` from ~/.hermes/config.yaml.
|
||||||
|
|
||||||
|
Returns ``_FALLBACK_DEFAULT_MODE`` (``"fast"``) if unset, invalid, or the
|
||||||
|
config loader is unavailable (e.g. tests, tools loaded outside the CLI).
|
||||||
|
Logs a one-time warning on invalid values so users get feedback when they
|
||||||
|
typo their config.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
from hermes_cli.config import load_config
|
||||||
|
config = load_config() or {}
|
||||||
|
except ImportError:
|
||||||
|
logging.debug("hermes_cli.config not available; default_mode falls back to %r", _FALLBACK_DEFAULT_MODE)
|
||||||
|
return _FALLBACK_DEFAULT_MODE
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("Failed to load config for session_search default_mode: %s", e, exc_info=True)
|
||||||
|
return _FALLBACK_DEFAULT_MODE
|
||||||
|
|
||||||
|
raw = (
|
||||||
|
config.get("auxiliary", {})
|
||||||
|
.get("session_search", {})
|
||||||
|
.get("default_mode")
|
||||||
|
)
|
||||||
|
if raw is None:
|
||||||
|
return _FALLBACK_DEFAULT_MODE
|
||||||
|
if not isinstance(raw, str):
|
||||||
|
logging.warning(
|
||||||
|
"auxiliary.session_search.default_mode in config.yaml must be a string, got %r — falling back to %r",
|
||||||
|
raw, _FALLBACK_DEFAULT_MODE,
|
||||||
|
)
|
||||||
|
return _FALLBACK_DEFAULT_MODE
|
||||||
|
normalised = raw.strip().lower()
|
||||||
|
if normalised not in _VALID_DEFAULT_MODES:
|
||||||
|
logging.warning(
|
||||||
|
"auxiliary.session_search.default_mode=%r is not one of %s — falling back to %r. "
|
||||||
|
"(guided requires anchors and cannot be a default.)",
|
||||||
|
raw, _VALID_DEFAULT_MODES, _FALLBACK_DEFAULT_MODE,
|
||||||
|
)
|
||||||
|
return _FALLBACK_DEFAULT_MODE
|
||||||
|
return normalised
|
||||||
|
|
||||||
|
|
||||||
|
# Process-level cache so repeated session_search calls don't re-read YAML.
|
||||||
|
# Cleared by tests via _resolve_user_default_mode.cache_clear() when needed.
|
||||||
|
import functools # noqa: E402 — local to the cache wrap
|
||||||
|
_resolve_user_default_mode = functools.lru_cache(maxsize=1)(_resolve_user_default_mode)
|
||||||
MAX_SUMMARY_TOKENS = 10000
|
MAX_SUMMARY_TOKENS = 10000
|
||||||
|
|
||||||
|
|
||||||
@@ -197,8 +250,16 @@ def _truncate_around_matches(
|
|||||||
|
|
||||||
async def _summarize_session(
|
async def _summarize_session(
|
||||||
conversation_text: str, query: str, session_meta: Dict[str, Any]
|
conversation_text: str, query: str, session_meta: Dict[str, Any]
|
||||||
) -> Optional[str]:
|
) -> tuple[Optional[str], Optional[Dict[str, Any]]]:
|
||||||
"""Summarize a single session conversation focused on the search query."""
|
"""Summarize a single session conversation focused on the search query.
|
||||||
|
|
||||||
|
Returns ``(content, usage)`` where ``usage`` is a dict with
|
||||||
|
``{model, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens}``
|
||||||
|
parsed from the aux LLM response, or ``None`` when the model didn't surface
|
||||||
|
usage data. The usage dict lets callers attribute the cost of summary-mode
|
||||||
|
aux calls back to the parent session — without this, summary-mode spend is
|
||||||
|
invisible to per-session accounting.
|
||||||
|
"""
|
||||||
system_prompt = (
|
system_prompt = (
|
||||||
"You are reviewing a past conversation transcript to help recall what happened. "
|
"You are reviewing a past conversation transcript to help recall what happened. "
|
||||||
"Summarize the conversation with a focus on the search topic. Include:\n"
|
"Summarize the conversation with a focus on the search topic. Include:\n"
|
||||||
@@ -235,17 +296,18 @@ async def _summarize_session(
|
|||||||
max_tokens=MAX_SUMMARY_TOKENS,
|
max_tokens=MAX_SUMMARY_TOKENS,
|
||||||
)
|
)
|
||||||
content = extract_content_or_reasoning(response)
|
content = extract_content_or_reasoning(response)
|
||||||
|
usage = _extract_aux_usage(response)
|
||||||
if content:
|
if content:
|
||||||
return content
|
return content, usage
|
||||||
# Reasoning-only / empty — let the retry loop handle it
|
# Reasoning-only / empty — let the retry loop handle it
|
||||||
logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
|
logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
|
||||||
if attempt < max_retries - 1:
|
if attempt < max_retries - 1:
|
||||||
await asyncio.sleep(1 * (attempt + 1))
|
await asyncio.sleep(1 * (attempt + 1))
|
||||||
continue
|
continue
|
||||||
return content
|
return content, usage
|
||||||
except RuntimeError:
|
except RuntimeError:
|
||||||
logging.warning("No auxiliary model available for session summarization")
|
logging.warning("No auxiliary model available for session summarization")
|
||||||
return None
|
return None, None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if attempt < max_retries - 1:
|
if attempt < max_retries - 1:
|
||||||
await asyncio.sleep(1 * (attempt + 1))
|
await asyncio.sleep(1 * (attempt + 1))
|
||||||
@@ -256,7 +318,48 @@ async def _summarize_session(
|
|||||||
e,
|
e,
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
return None
|
return None, None
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_aux_usage(response: Any) -> Optional[Dict[str, Any]]:
|
||||||
|
"""Pull usage data off an aux LLM response, normalising provider variants.
|
||||||
|
|
||||||
|
Returns ``None`` when the response carries no usage info (test mocks,
|
||||||
|
providers that don't surface it). Returns a dict with the fields we care
|
||||||
|
about for cost attribution otherwise. Reads both OpenAI-style
|
||||||
|
(``prompt_tokens``/``completion_tokens``) and Anthropic-style
|
||||||
|
(``input_tokens``/``output_tokens``) usage shapes.
|
||||||
|
"""
|
||||||
|
usage = getattr(response, "usage", None)
|
||||||
|
if not usage:
|
||||||
|
return None
|
||||||
|
# Provider variants — read whichever is populated.
|
||||||
|
input_tokens = (
|
||||||
|
getattr(usage, "input_tokens", None)
|
||||||
|
or getattr(usage, "prompt_tokens", None)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
output_tokens = (
|
||||||
|
getattr(usage, "output_tokens", None)
|
||||||
|
or getattr(usage, "completion_tokens", None)
|
||||||
|
or 0
|
||||||
|
)
|
||||||
|
# Anthropic prompt-caching fields.
|
||||||
|
cache_read = getattr(usage, "cache_read_input_tokens", None) or 0
|
||||||
|
cache_create = getattr(usage, "cache_creation_input_tokens", None) or 0
|
||||||
|
# OpenAI-style cached tokens may live under prompt_tokens_details.
|
||||||
|
if not cache_read:
|
||||||
|
details = getattr(usage, "prompt_tokens_details", None)
|
||||||
|
if details:
|
||||||
|
cache_read = getattr(details, "cached_tokens", 0) or 0
|
||||||
|
model = getattr(response, "model", None)
|
||||||
|
return {
|
||||||
|
"model": model,
|
||||||
|
"input_tokens": int(input_tokens or 0),
|
||||||
|
"output_tokens": int(output_tokens or 0),
|
||||||
|
"cache_read_tokens": int(cache_read or 0),
|
||||||
|
"cache_creation_tokens": int(cache_create or 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Sources that are excluded from session browsing/searching by default.
|
# Sources that are excluded from session browsing/searching by default.
|
||||||
@@ -322,19 +425,380 @@ def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str
|
|||||||
return tool_error(f"Failed to list recent sessions: {e}", success=False)
|
return tool_error(f"Failed to list recent sessions: {e}", success=False)
|
||||||
|
|
||||||
|
|
||||||
|
def _guided_drill_down(
|
||||||
|
db,
|
||||||
|
session_id: str,
|
||||||
|
around_message_id,
|
||||||
|
window: int,
|
||||||
|
current_session_id: str = None,
|
||||||
|
anchors: Optional[List[Dict[str, Any]]] = None,
|
||||||
|
) -> str:
|
||||||
|
"""Anchored drill-down for ``mode='guided'`` of ``session_search``.
|
||||||
|
|
||||||
|
Returns a JSON string carrying one or more windows of messages — each
|
||||||
|
centred on a specific message id in a specific session. No FTS5, no
|
||||||
|
auxiliary LLM, no 100k-char truncation — N indexed DB lookups (where
|
||||||
|
N = number of anchors).
|
||||||
|
|
||||||
|
Two input shapes (use one):
|
||||||
|
|
||||||
|
* **Single anchor** (back-compat): pass ``session_id`` and
|
||||||
|
``around_message_id`` directly. Internally normalised to a single-
|
||||||
|
element ``anchors`` list. Response always carries ``windows``
|
||||||
|
as a list, plus the legacy single-anchor fields at the top level
|
||||||
|
when there's exactly one anchor.
|
||||||
|
|
||||||
|
* **Multi-anchor**: pass ``anchors=[{"session_id":..., "around_message_id":...}, ...]``.
|
||||||
|
The agent picks the most promising K hits from a wider fast call
|
||||||
|
and drills into all of them at once — same conversation in the
|
||||||
|
steering loop, more context per turn.
|
||||||
|
|
||||||
|
Each anchor is validated independently. Per-anchor failures (missing
|
||||||
|
session, anchor not in session, current-lineage rejection) become
|
||||||
|
error entries inside the response's ``windows`` list rather than
|
||||||
|
aborting the whole call. ``window`` is shared across all anchors
|
||||||
|
and clamped to ``[1, 20]`` (silent, matches the existing limit-clamp
|
||||||
|
pattern).
|
||||||
|
"""
|
||||||
|
# 1. Normalise inputs into a single ``anchors`` list. Three shapes:
|
||||||
|
# (a) anchors= parameter is set (preferred for multi-anchor)
|
||||||
|
# (b) session_id + around_message_id (single-anchor back-compat)
|
||||||
|
# (c) neither set → user-facing error
|
||||||
|
if anchors:
|
||||||
|
if not isinstance(anchors, list):
|
||||||
|
return tool_error(
|
||||||
|
"guided mode: 'anchors' must be a list of {session_id, around_message_id} dicts",
|
||||||
|
success=False,
|
||||||
|
)
|
||||||
|
normalised_anchors = anchors
|
||||||
|
elif session_id or around_message_id is not None:
|
||||||
|
normalised_anchors = [{
|
||||||
|
"session_id": session_id,
|
||||||
|
"around_message_id": around_message_id,
|
||||||
|
}]
|
||||||
|
else:
|
||||||
|
return tool_error(
|
||||||
|
"guided mode requires either anchors=[...] or session_id+around_message_id "
|
||||||
|
"(use match_message_id+session_id from a prior fast-mode hit)",
|
||||||
|
success=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
if len(normalised_anchors) == 0:
|
||||||
|
return tool_error(
|
||||||
|
"guided mode: anchors list is empty (pass at least one {session_id, around_message_id})",
|
||||||
|
success=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Window clamp (shared across all anchors). Matches the existing
|
||||||
|
# limit-clamp pattern (silent).
|
||||||
|
if not isinstance(window, int):
|
||||||
|
try:
|
||||||
|
window = int(window)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
window = 5
|
||||||
|
window = max(1, min(window, 20))
|
||||||
|
|
||||||
|
# 3. Helper: resolve to lineage root (used by the current-lineage
|
||||||
|
# rejection check below).
|
||||||
|
def _resolve_to_parent(sid: str) -> str:
|
||||||
|
visited = set()
|
||||||
|
cur = sid
|
||||||
|
while cur and cur not in visited:
|
||||||
|
visited.add(cur)
|
||||||
|
try:
|
||||||
|
meta = db.get_session(cur)
|
||||||
|
if not meta:
|
||||||
|
break
|
||||||
|
parent = meta.get("parent_session_id")
|
||||||
|
if parent:
|
||||||
|
cur = parent
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("Error resolving parent for %s: %s", cur, e, exc_info=True)
|
||||||
|
break
|
||||||
|
return cur
|
||||||
|
|
||||||
|
current_root = _resolve_to_parent(current_session_id) if current_session_id else None
|
||||||
|
|
||||||
|
# 4. Drill into each anchor. Per-anchor errors are recorded inline
|
||||||
|
# rather than aborting the whole call — the agent can still use
|
||||||
|
# successful drills even if one anchor was malformed.
|
||||||
|
windows_out: List[Dict[str, Any]] = []
|
||||||
|
for raw_anchor in normalised_anchors:
|
||||||
|
if not isinstance(raw_anchor, dict):
|
||||||
|
windows_out.append({
|
||||||
|
"success": False,
|
||||||
|
"error": "anchor must be a dict with session_id + around_message_id",
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
a_sid = raw_anchor.get("session_id")
|
||||||
|
a_msg = raw_anchor.get("around_message_id")
|
||||||
|
|
||||||
|
if not a_sid or not isinstance(a_sid, str) or not a_sid.strip():
|
||||||
|
windows_out.append({
|
||||||
|
"success": False,
|
||||||
|
"error": "anchor missing session_id",
|
||||||
|
"anchor": raw_anchor,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
a_sid = a_sid.strip()
|
||||||
|
|
||||||
|
try:
|
||||||
|
a_msg_id = int(a_msg)
|
||||||
|
except (TypeError, ValueError):
|
||||||
|
windows_out.append({
|
||||||
|
"success": False,
|
||||||
|
"error": "anchor missing or non-integer around_message_id",
|
||||||
|
"anchor": raw_anchor,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Current-lineage rejection: per-anchor, so other valid anchors
|
||||||
|
# in a multi-anchor call still drill.
|
||||||
|
if current_root:
|
||||||
|
target_root = _resolve_to_parent(a_sid)
|
||||||
|
if target_root and target_root == current_root:
|
||||||
|
windows_out.append({
|
||||||
|
"success": False,
|
||||||
|
"error": "anchor rejects drill-down into the current session lineage — those messages are already in your active context",
|
||||||
|
"session_id": a_sid,
|
||||||
|
"around_message_id": a_msg_id,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Session existence check.
|
||||||
|
try:
|
||||||
|
session_meta = db.get_session(a_sid) or {}
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("get_session failed for %s: %s", a_sid, e, exc_info=True)
|
||||||
|
session_meta = {}
|
||||||
|
if not session_meta:
|
||||||
|
windows_out.append({
|
||||||
|
"success": False,
|
||||||
|
"error": f"session_id not found: {a_sid}",
|
||||||
|
"session_id": a_sid,
|
||||||
|
"around_message_id": a_msg_id,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Fetch the window + bookends. ``get_anchored_view`` filters tool-response
|
||||||
|
# noise from the window (anchor itself is preserved regardless of role)
|
||||||
|
# and returns up to ``bookend`` user/assistant messages from the session
|
||||||
|
# head and tail — but only when those slices don't overlap the window.
|
||||||
|
# See SessionDB.get_anchored_view for the contract.
|
||||||
|
try:
|
||||||
|
view = db.get_anchored_view(a_sid, a_msg_id, window=window, bookend=3)
|
||||||
|
messages = view.get("window") or []
|
||||||
|
bookend_start = view.get("bookend_start") or []
|
||||||
|
bookend_end = view.get("bookend_end") or []
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("get_anchored_view failed: %s", e, exc_info=True)
|
||||||
|
windows_out.append({
|
||||||
|
"success": False,
|
||||||
|
"error": f"failed to load messages around {a_msg_id} in {a_sid}: {e}",
|
||||||
|
"session_id": a_sid,
|
||||||
|
"around_message_id": a_msg_id,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Safety net: the agent (or memory, or a legacy caller) may pair a
|
||||||
|
# parent/lineage-root session_id with a message_id that actually
|
||||||
|
# lives in a descendant (child) session. Before this commit, fast
|
||||||
|
# mode returned exactly that broken pair. We now emit the matching
|
||||||
|
# raw sid in fast mode, but guided should remain forgiving for
|
||||||
|
# callers that haven't updated yet.
|
||||||
|
#
|
||||||
|
# Recovery rule: locate the real owning session by message id; if
|
||||||
|
# that session is in the same lineage as ``a_sid``, transparently
|
||||||
|
# rebind and refetch. Record a warning so the rebind is visible.
|
||||||
|
rebind_warning = None
|
||||||
|
if not messages:
|
||||||
|
owning = None
|
||||||
|
# Prefer a helper if SessionDB exposes one (forward-compat).
|
||||||
|
try:
|
||||||
|
if hasattr(db, "get_session_id_for_message"):
|
||||||
|
owning = db.get_session_id_for_message(a_msg_id)
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("get_session_id_for_message failed: %s", e, exc_info=True)
|
||||||
|
owning = None
|
||||||
|
# Fallback: query through SessionDB._conn (the canonical connection).
|
||||||
|
if not owning:
|
||||||
|
try:
|
||||||
|
conn = getattr(db, "_conn", None)
|
||||||
|
if conn is not None:
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT session_id FROM messages WHERE id = ?",
|
||||||
|
(a_msg_id,),
|
||||||
|
).fetchone()
|
||||||
|
# sqlite3.Row supports indexing; tuple fallback works too.
|
||||||
|
owning = row[0] if row else None
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("owning-session lookup failed: %s", e, exc_info=True)
|
||||||
|
owning = None
|
||||||
|
|
||||||
|
if owning and owning != a_sid:
|
||||||
|
# Check same lineage (walk both up to roots).
|
||||||
|
a_root = _resolve_to_parent(a_sid)
|
||||||
|
o_root = _resolve_to_parent(owning)
|
||||||
|
if a_root and o_root and a_root == o_root:
|
||||||
|
try:
|
||||||
|
rebind_view = db.get_anchored_view(
|
||||||
|
owning, a_msg_id, window=window, bookend=3
|
||||||
|
)
|
||||||
|
messages = rebind_view.get("window") or []
|
||||||
|
bookend_start = rebind_view.get("bookend_start") or []
|
||||||
|
bookend_end = rebind_view.get("bookend_end") or []
|
||||||
|
except Exception as e:
|
||||||
|
logging.debug("rebind get_anchored_view failed: %s", e, exc_info=True)
|
||||||
|
messages = []
|
||||||
|
if messages:
|
||||||
|
rebind_warning = (
|
||||||
|
f"around_message_id {a_msg_id} lives in {owning} "
|
||||||
|
f"(child of {a_sid}); rebound transparently"
|
||||||
|
)
|
||||||
|
# Re-fetch session_meta for the actual owning session.
|
||||||
|
try:
|
||||||
|
session_meta = db.get_session(owning) or session_meta
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
a_sid = owning
|
||||||
|
|
||||||
|
if not messages:
|
||||||
|
windows_out.append({
|
||||||
|
"success": False,
|
||||||
|
"error": f"around_message_id {a_msg_id} not in session_id {a_sid}",
|
||||||
|
"session_id": a_sid,
|
||||||
|
"around_message_id": a_msg_id,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Wrap with anchor flag + boundary counts.
|
||||||
|
out_messages = []
|
||||||
|
messages_before = 0
|
||||||
|
messages_after = 0
|
||||||
|
for m in messages:
|
||||||
|
is_anchor = m.get("id") == a_msg_id
|
||||||
|
if not is_anchor and m.get("id", 0) < a_msg_id:
|
||||||
|
messages_before += 1
|
||||||
|
elif not is_anchor:
|
||||||
|
messages_after += 1
|
||||||
|
entry = {
|
||||||
|
"id": m.get("id"),
|
||||||
|
"role": m.get("role"),
|
||||||
|
"content": m.get("content"),
|
||||||
|
"tool_name": m.get("tool_name"),
|
||||||
|
"tool_calls": m.get("tool_calls") or None,
|
||||||
|
"tool_call_id": m.get("tool_call_id"),
|
||||||
|
"timestamp": m.get("timestamp"),
|
||||||
|
}
|
||||||
|
if is_anchor:
|
||||||
|
entry["anchor"] = True
|
||||||
|
# Strip None-valued optional fields to keep payload tight (keep
|
||||||
|
# 'content' even if None, since absent-content is meaningful).
|
||||||
|
entry = {k: v for k, v in entry.items() if v is not None or k in ("content",)}
|
||||||
|
out_messages.append(entry)
|
||||||
|
|
||||||
|
def _shape_bookend(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
|
entry = {
|
||||||
|
"id": m.get("id"),
|
||||||
|
"role": m.get("role"),
|
||||||
|
"content": m.get("content"),
|
||||||
|
"timestamp": m.get("timestamp"),
|
||||||
|
}
|
||||||
|
return {k: v for k, v in entry.items() if v is not None or k in ("content",)}
|
||||||
|
|
||||||
|
out_bookend_start = [_shape_bookend(m) for m in bookend_start]
|
||||||
|
out_bookend_end = [_shape_bookend(m) for m in bookend_end]
|
||||||
|
|
||||||
|
success_entry = {
|
||||||
|
"success": True,
|
||||||
|
"session_id": a_sid,
|
||||||
|
"around_message_id": a_msg_id,
|
||||||
|
"session_meta": {
|
||||||
|
"when": _format_timestamp(session_meta.get("started_at")),
|
||||||
|
"source": session_meta.get("source"),
|
||||||
|
"model": session_meta.get("model"),
|
||||||
|
"title": session_meta.get("title"),
|
||||||
|
},
|
||||||
|
"messages": out_messages,
|
||||||
|
"messages_before": messages_before,
|
||||||
|
"messages_after": messages_after,
|
||||||
|
"bookend_start": out_bookend_start,
|
||||||
|
"bookend_end": out_bookend_end,
|
||||||
|
}
|
||||||
|
if rebind_warning:
|
||||||
|
success_entry["warning"] = rebind_warning
|
||||||
|
windows_out.append(success_entry)
|
||||||
|
|
||||||
|
# 5. Top-level response shape. ``windows`` is always a list. For
|
||||||
|
# single-anchor calls (the common case), we mirror the legacy fields
|
||||||
|
# at the top level so existing callers / tests continue to work
|
||||||
|
# without branching on len(windows).
|
||||||
|
response: Dict[str, Any] = {
|
||||||
|
"success": True,
|
||||||
|
"mode": "guided",
|
||||||
|
"window": window,
|
||||||
|
"windows": windows_out,
|
||||||
|
"anchor_count": len(windows_out),
|
||||||
|
}
|
||||||
|
if len(windows_out) == 1:
|
||||||
|
only = windows_out[0]
|
||||||
|
if only.get("success"):
|
||||||
|
response.update({
|
||||||
|
"session_id": only["session_id"],
|
||||||
|
"around_message_id": only["around_message_id"],
|
||||||
|
"session_meta": only["session_meta"],
|
||||||
|
"messages": only["messages"],
|
||||||
|
"messages_before": only["messages_before"],
|
||||||
|
"messages_after": only["messages_after"],
|
||||||
|
"bookend_start": only.get("bookend_start", []),
|
||||||
|
"bookend_end": only.get("bookend_end", []),
|
||||||
|
})
|
||||||
|
if only.get("warning"):
|
||||||
|
response["warning"] = only["warning"]
|
||||||
|
else:
|
||||||
|
# Single-anchor failure: surface as a top-level tool_error so
|
||||||
|
# callers don't have to dig into the windows array for the
|
||||||
|
# error string. Keeps the legacy single-anchor failure shape.
|
||||||
|
return tool_error(only.get("error", "guided drill-down failed"), success=False)
|
||||||
|
|
||||||
|
return json.dumps(response, ensure_ascii=False)
|
||||||
|
|
||||||
|
|
||||||
def session_search(
|
def session_search(
|
||||||
query: str,
|
query: str = "",
|
||||||
role_filter: str = None,
|
role_filter: str = None,
|
||||||
limit: int = 3,
|
limit: int = 3,
|
||||||
db=None,
|
db=None,
|
||||||
current_session_id: str = None,
|
current_session_id: str = None,
|
||||||
|
mode: str = None,
|
||||||
|
# Guided-mode-only parameters: anchored drill-down into one or more
|
||||||
|
# session+message pairs. Required when mode='guided', ignored otherwise.
|
||||||
|
# Use either the single-anchor pair (session_id + around_message_id) or
|
||||||
|
# the multi-anchor list (anchors=[{session_id, around_message_id}, ...]).
|
||||||
|
session_id: str = None,
|
||||||
|
around_message_id: int = None,
|
||||||
|
window: int = 5,
|
||||||
|
anchors: list = None,
|
||||||
|
# Fast-mode-only temporal bias for ranking. ``None`` keeps FTS5's BM25
|
||||||
|
# ordering (time-neutral); ``"newest"`` / ``"oldest"`` make timestamp
|
||||||
|
# the primary key with rank as the tiebreaker. Silently ignored in
|
||||||
|
# other modes — see schema description.
|
||||||
|
sort: str = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Search past sessions and return focused summaries of matching conversations.
|
Search past sessions, or drill into a specific one.
|
||||||
|
|
||||||
Uses FTS5 to find matches, then summarizes the top sessions with the
|
Modes:
|
||||||
configured auxiliary session_search model.
|
* fast — FTS5 snippets + ±1 message context. Cheap discovery.
|
||||||
The current session is excluded from results since the agent already has that context.
|
* summary — fetch full session(s), truncate to 100k chars, run aux LLM
|
||||||
|
recap. Cross-session synthesis at ~30s tool-side cost.
|
||||||
|
* guided — anchored drill-down. Caller supplies session_id +
|
||||||
|
around_message_id (typically from a prior fast hit's
|
||||||
|
match_message_id field) and gets a window of messages
|
||||||
|
around the anchor with no LLM call and no truncation.
|
||||||
"""
|
"""
|
||||||
if db is None:
|
if db is None:
|
||||||
try:
|
try:
|
||||||
@@ -346,6 +810,52 @@ def session_search(
|
|||||||
from hermes_state import format_session_db_unavailable
|
from hermes_state import format_session_db_unavailable
|
||||||
return tool_error(format_session_db_unavailable(), success=False)
|
return tool_error(format_session_db_unavailable(), success=False)
|
||||||
|
|
||||||
|
# Mode normalisation. ``None`` / empty string / non-string → fall back to
|
||||||
|
# the user's configured default (via ~/.hermes/config.yaml, see
|
||||||
|
# ``_resolve_user_default_mode``). Defaults to "fast" if unset. An explicit
|
||||||
|
# "fast" / "summary" / "guided" wins regardless of config. An unknown
|
||||||
|
# string also falls back to the resolved user default rather than silently
|
||||||
|
# coercing to a hard-coded mode — silent coercion of typos would otherwise
|
||||||
|
# mask user errors.
|
||||||
|
if not isinstance(mode, str) or not mode.strip():
|
||||||
|
mode = _resolve_user_default_mode()
|
||||||
|
else:
|
||||||
|
mode = mode.strip().lower()
|
||||||
|
if mode in ("summarized", "summarise", "summarize", "deep"):
|
||||||
|
mode = "summary"
|
||||||
|
if mode in ("drill", "drilldown", "drill-down", "anchor", "around"):
|
||||||
|
mode = "guided"
|
||||||
|
if mode not in ("fast", "summary", "guided"):
|
||||||
|
mode = _resolve_user_default_mode()
|
||||||
|
|
||||||
|
# Normalise sort — only "newest"/"oldest" are accepted; anything else
|
||||||
|
# collapses to None (FTS5 rank-only). Sort affects fast mode only; logged
|
||||||
|
# and ignored elsewhere so misuse is visible but non-fatal.
|
||||||
|
sort_norm: Optional[str] = None
|
||||||
|
if isinstance(sort, str):
|
||||||
|
candidate = sort.strip().lower()
|
||||||
|
if candidate in ("newest", "oldest"):
|
||||||
|
sort_norm = candidate
|
||||||
|
if sort_norm and mode != "fast":
|
||||||
|
logging.debug(
|
||||||
|
"session_search: sort=%r is fast-mode only; ignored for mode=%s",
|
||||||
|
sort_norm, mode,
|
||||||
|
)
|
||||||
|
sort_norm = None
|
||||||
|
|
||||||
|
# Guided mode is a different shape: it doesn't search, it drills. Branch
|
||||||
|
# before FTS5 so we don't pay for anything we don't use, and so missing-arg
|
||||||
|
# validation happens up front.
|
||||||
|
if mode == "guided":
|
||||||
|
return _guided_drill_down(
|
||||||
|
db=db,
|
||||||
|
session_id=session_id,
|
||||||
|
around_message_id=around_message_id,
|
||||||
|
window=window,
|
||||||
|
current_session_id=current_session_id,
|
||||||
|
anchors=anchors,
|
||||||
|
)
|
||||||
|
|
||||||
# Defensive: models (especially open-source) may send non-int limit values
|
# Defensive: models (especially open-source) may send non-int limit values
|
||||||
# (None when JSON null, string "int", or even a type object). Coerce to a
|
# (None when JSON null, string "int", or even a type object). Coerce to a
|
||||||
# safe integer before any arithmetic/comparison to prevent TypeError.
|
# safe integer before any arithmetic/comparison to prevent TypeError.
|
||||||
@@ -354,7 +864,7 @@ def session_search(
|
|||||||
limit = int(limit)
|
limit = int(limit)
|
||||||
except (TypeError, ValueError):
|
except (TypeError, ValueError):
|
||||||
limit = 3
|
limit = 3
|
||||||
limit = max(1, min(limit, 5)) # Clamp to [1, 5]
|
limit = max(1, min(limit, 10)) # Clamp to [1, 10]
|
||||||
|
|
||||||
# Recent sessions mode: when query is empty, return metadata for recent sessions.
|
# Recent sessions mode: when query is empty, return metadata for recent sessions.
|
||||||
# No LLM calls — just DB queries for titles, previews, timestamps.
|
# No LLM calls — just DB queries for titles, previews, timestamps.
|
||||||
@@ -364,23 +874,30 @@ def session_search(
|
|||||||
query = query.strip()
|
query = query.strip()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Parse role filter
|
# Parse role filter. Defaults to user+assistant; tool messages are
|
||||||
|
# usually noisy and rarely the signal. Caller opts back in via
|
||||||
|
# role_filter='user,assistant,tool' or 'tool'.
|
||||||
role_list = None
|
role_list = None
|
||||||
if role_filter and role_filter.strip():
|
if role_filter and role_filter.strip():
|
||||||
role_list = [r.strip() for r in role_filter.split(",") if r.strip()]
|
role_list = [r.strip() for r in role_filter.split(",") if r.strip()]
|
||||||
|
else:
|
||||||
|
role_list = ["user", "assistant"]
|
||||||
|
|
||||||
# FTS5 search -- get matches ranked by relevance
|
# FTS5 search -- get matches ranked by relevance (with optional
|
||||||
|
# temporal bias when sort is set; see param docs).
|
||||||
raw_results = db.search_messages(
|
raw_results = db.search_messages(
|
||||||
query=query,
|
query=query,
|
||||||
role_filter=role_list,
|
role_filter=role_list,
|
||||||
exclude_sources=list(_HIDDEN_SESSION_SOURCES),
|
exclude_sources=list(_HIDDEN_SESSION_SOURCES),
|
||||||
limit=50, # Get more matches to find unique sessions
|
limit=50, # Get more matches to find unique sessions
|
||||||
offset=0,
|
offset=0,
|
||||||
|
sort=sort_norm,
|
||||||
)
|
)
|
||||||
|
|
||||||
if not raw_results:
|
if not raw_results:
|
||||||
return json.dumps({
|
return json.dumps({
|
||||||
"success": True,
|
"success": True,
|
||||||
|
"mode": mode,
|
||||||
"query": query,
|
"query": query,
|
||||||
"results": [],
|
"results": [],
|
||||||
"count": 0,
|
"count": 0,
|
||||||
@@ -421,6 +938,13 @@ def session_search(
|
|||||||
# Group by resolved (parent) session_id, dedup, skip the current
|
# Group by resolved (parent) session_id, dedup, skip the current
|
||||||
# session lineage. Compression and delegation create child sessions
|
# session lineage. Compression and delegation create child sessions
|
||||||
# that still belong to the same active conversation.
|
# that still belong to the same active conversation.
|
||||||
|
#
|
||||||
|
# IMPORTANT: group BY parent (one entry per conversation lineage), but
|
||||||
|
# preserve the raw FTS5 session_id on the surviving result. Only the
|
||||||
|
# raw sid pairs validly with ``match_message_id``; rewriting it to the
|
||||||
|
# parent produces a {parent_sid, child_message_id} handle that guided
|
||||||
|
# mode cannot resolve. ``parent_session_id`` is exposed separately for
|
||||||
|
# the lineage-root link the user expects to see.
|
||||||
seen_sessions = {}
|
seen_sessions = {}
|
||||||
for result in raw_results:
|
for result in raw_results:
|
||||||
raw_sid = result["session_id"]
|
raw_sid = result["session_id"]
|
||||||
@@ -433,11 +957,61 @@ def session_search(
|
|||||||
continue
|
continue
|
||||||
if resolved_sid not in seen_sessions:
|
if resolved_sid not in seen_sessions:
|
||||||
result = dict(result)
|
result = dict(result)
|
||||||
result["session_id"] = resolved_sid
|
# Keep raw_sid as session_id; expose lineage root separately.
|
||||||
|
result["session_id"] = raw_sid
|
||||||
|
if resolved_sid and resolved_sid != raw_sid:
|
||||||
|
result["parent_session_id"] = resolved_sid
|
||||||
seen_sessions[resolved_sid] = result
|
seen_sessions[resolved_sid] = result
|
||||||
if len(seen_sessions) >= limit:
|
if len(seen_sessions) >= limit:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
if mode == "fast":
|
||||||
|
results = []
|
||||||
|
for lineage_root, match_info in seen_sessions.items():
|
||||||
|
# Emit (raw_sid + match_message_id) so the agent's follow-up
|
||||||
|
# guided call has a valid {session_id, around_message_id}.
|
||||||
|
# ``parent_session_id`` (if different) carries the lineage root.
|
||||||
|
hit_sid = match_info.get("session_id") or lineage_root
|
||||||
|
try:
|
||||||
|
session_meta = db.get_session(lineage_root) or {}
|
||||||
|
except Exception:
|
||||||
|
session_meta = {}
|
||||||
|
snippet = match_info.get("snippet") or ""
|
||||||
|
context = match_info.get("context") or []
|
||||||
|
if not isinstance(context, list):
|
||||||
|
context = []
|
||||||
|
entry = {
|
||||||
|
"session_id": hit_sid,
|
||||||
|
"when": _format_timestamp(
|
||||||
|
session_meta.get("started_at") or match_info.get("session_started")
|
||||||
|
),
|
||||||
|
"source": session_meta.get("source") or match_info.get("source", "unknown"),
|
||||||
|
"model": session_meta.get("model") or match_info.get("model") or "unknown",
|
||||||
|
"matched_role": match_info.get("role"),
|
||||||
|
"match_message_id": match_info.get("id"),
|
||||||
|
"title": session_meta.get("title") or None,
|
||||||
|
"snippet": snippet,
|
||||||
|
"context": context,
|
||||||
|
"summary": "[Search hit — summary not generated in fast mode] Use snippet/context fields, or set mode='summary' for LLM-generated recall.",
|
||||||
|
}
|
||||||
|
# Only emit parent_session_id when the FTS5 row lives in a
|
||||||
|
# child of the displayed lineage — keeps the common case
|
||||||
|
# (no delegation/compression) tidy.
|
||||||
|
parent_sid = match_info.get("parent_session_id")
|
||||||
|
if parent_sid and parent_sid != hit_sid:
|
||||||
|
entry["parent_session_id"] = parent_sid
|
||||||
|
results.append(entry)
|
||||||
|
|
||||||
|
return json.dumps({
|
||||||
|
"success": True,
|
||||||
|
"mode": "fast",
|
||||||
|
"query": query,
|
||||||
|
"results": results,
|
||||||
|
"count": len(results),
|
||||||
|
"sessions_searched": len(seen_sessions),
|
||||||
|
"message": "Fast search returned FTS snippets without LLM summarization. Use mode='summary' for focused summaries when needed.",
|
||||||
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
# Prepare all sessions for parallel summarization
|
# Prepare all sessions for parallel summarization
|
||||||
tasks = []
|
tasks = []
|
||||||
for session_id, match_info in seen_sessions.items():
|
for session_id, match_info in seen_sessions.items():
|
||||||
@@ -458,12 +1032,12 @@ def session_search(
|
|||||||
)
|
)
|
||||||
|
|
||||||
# Summarize all sessions in parallel
|
# Summarize all sessions in parallel
|
||||||
async def _summarize_all() -> List[Union[str, Exception]]:
|
async def _summarize_all() -> List[Union[tuple, Exception]]:
|
||||||
"""Summarize all sessions with bounded concurrency."""
|
"""Summarize all sessions with bounded concurrency."""
|
||||||
max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
|
max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
|
||||||
semaphore = asyncio.Semaphore(max_concurrency)
|
semaphore = asyncio.Semaphore(max_concurrency)
|
||||||
|
|
||||||
async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]:
|
async def _bounded_summary(text: str, meta: Dict[str, Any]):
|
||||||
async with semaphore:
|
async with semaphore:
|
||||||
return await _summarize_session(text, query, meta)
|
return await _summarize_session(text, query, meta)
|
||||||
|
|
||||||
@@ -493,13 +1067,27 @@ def session_search(
|
|||||||
}, ensure_ascii=False)
|
}, ensure_ascii=False)
|
||||||
|
|
||||||
summaries = []
|
summaries = []
|
||||||
|
aux_total = {
|
||||||
|
"model": None,
|
||||||
|
"input_tokens": 0,
|
||||||
|
"output_tokens": 0,
|
||||||
|
"cache_read_tokens": 0,
|
||||||
|
"cache_creation_tokens": 0,
|
||||||
|
"call_count": 0,
|
||||||
|
}
|
||||||
for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results):
|
for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results):
|
||||||
|
usage: Optional[Dict[str, Any]] = None
|
||||||
if isinstance(result, Exception):
|
if isinstance(result, Exception):
|
||||||
logging.warning(
|
logging.warning(
|
||||||
"Failed to summarize session %s: %s",
|
"Failed to summarize session %s: %s",
|
||||||
session_id, result, exc_info=True,
|
session_id, result, exc_info=True,
|
||||||
)
|
)
|
||||||
result = None
|
summary_text = None
|
||||||
|
elif isinstance(result, tuple):
|
||||||
|
summary_text, usage = result
|
||||||
|
else:
|
||||||
|
# Defensive: a future code path might still return a bare string.
|
||||||
|
summary_text, usage = result, None
|
||||||
|
|
||||||
# Prefer resolved parent session metadata over FTS5 match metadata.
|
# Prefer resolved parent session metadata over FTS5 match metadata.
|
||||||
# match_info carries source/model from the *child* session that contained
|
# match_info carries source/model from the *child* session that contained
|
||||||
@@ -515,23 +1103,39 @@ def session_search(
|
|||||||
"model": session_meta.get("model") or match_info.get("model"),
|
"model": session_meta.get("model") or match_info.get("model"),
|
||||||
}
|
}
|
||||||
|
|
||||||
if result:
|
if summary_text:
|
||||||
entry["summary"] = result
|
entry["summary"] = summary_text
|
||||||
else:
|
else:
|
||||||
# Fallback: raw preview so matched sessions aren't silently
|
# Fallback: raw preview so matched sessions aren't silently
|
||||||
# dropped when the summarizer is unavailable (fixes #3409).
|
# dropped when the summarizer is unavailable (fixes #3409).
|
||||||
preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available."
|
preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available."
|
||||||
entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}"
|
entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}"
|
||||||
|
|
||||||
|
if usage:
|
||||||
|
entry["aux_usage"] = usage
|
||||||
|
aux_total["model"] = aux_total["model"] or usage.get("model")
|
||||||
|
aux_total["input_tokens"] += usage["input_tokens"]
|
||||||
|
aux_total["output_tokens"] += usage["output_tokens"]
|
||||||
|
aux_total["cache_read_tokens"] += usage["cache_read_tokens"]
|
||||||
|
aux_total["cache_creation_tokens"] += usage["cache_creation_tokens"]
|
||||||
|
aux_total["call_count"] += 1
|
||||||
|
|
||||||
summaries.append(entry)
|
summaries.append(entry)
|
||||||
|
|
||||||
return json.dumps({
|
payload = {
|
||||||
"success": True,
|
"success": True,
|
||||||
|
"mode": "summary",
|
||||||
"query": query,
|
"query": query,
|
||||||
"results": summaries,
|
"results": summaries,
|
||||||
"count": len(summaries),
|
"count": len(summaries),
|
||||||
"sessions_searched": len(seen_sessions),
|
"sessions_searched": len(seen_sessions),
|
||||||
}, ensure_ascii=False)
|
}
|
||||||
|
# Only surface aux_usage_total when we actually captured any (test mocks
|
||||||
|
# and providers that don't report usage produce an all-zero/empty dict —
|
||||||
|
# don't pollute the payload in that case).
|
||||||
|
if aux_total["call_count"]:
|
||||||
|
payload["aux_usage_total"] = aux_total
|
||||||
|
return json.dumps(payload, ensure_ascii=False)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Session search failed: %s", e, exc_info=True)
|
logging.error("Session search failed: %s", e, exc_info=True)
|
||||||
@@ -539,7 +1143,7 @@ def session_search(
|
|||||||
|
|
||||||
|
|
||||||
def check_session_search_requirements() -> bool:
|
def check_session_search_requirements() -> bool:
|
||||||
"""Requires SQLite state database and an auxiliary text model."""
|
"""Requires SQLite state database; summary mode also needs an auxiliary model."""
|
||||||
try:
|
try:
|
||||||
from hermes_state import DEFAULT_DB_PATH
|
from hermes_state import DEFAULT_DB_PATH
|
||||||
return DEFAULT_DB_PATH.parent.exists()
|
return DEFAULT_DB_PATH.parent.exists()
|
||||||
@@ -550,44 +1154,101 @@ def check_session_search_requirements() -> bool:
|
|||||||
SESSION_SEARCH_SCHEMA = {
|
SESSION_SEARCH_SCHEMA = {
|
||||||
"name": "session_search",
|
"name": "session_search",
|
||||||
"description": (
|
"description": (
|
||||||
"Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- "
|
"Search past sessions stored in the local session DB. Three modes plus a default "
|
||||||
"every past session is searchable, and this tool summarizes what happened.\n\n"
|
"browsing mode when no arguments are passed. All three modes operate on the same "
|
||||||
"TWO MODES:\n"
|
"FTS5-indexed message store; they differ in what they return and at what cost.\n\n"
|
||||||
"1. Recent sessions (no query): Call with no arguments to see what was worked on recently. "
|
"MODES\n\n"
|
||||||
"Returns titles, previews, and timestamps. Zero LLM cost, instant. "
|
" • mode='fast' — FTS5 snippets across matched sessions. No LLM call. Returns one "
|
||||||
"Start here when the user asks what were we working on or what did we do recently.\n"
|
"entry per matched session with session_id, match_message_id, a one-message context "
|
||||||
"2. Keyword search (with query): Search for specific topics across all past sessions. "
|
"window, and metadata. Use this as the starting move for any recall question — "
|
||||||
"Returns LLM-generated summaries of matching sessions.\n\n"
|
"discovery and state reconstruction both. The match_message_id is the anchor you "
|
||||||
"USE THIS PROACTIVELY when:\n"
|
"pass to guided.\n\n"
|
||||||
"- The user says 'we did this before', 'remember when', 'last time', 'as I mentioned'\n"
|
" • mode='guided' — REQUIRES anchors from a prior fast call. Returns a window of "
|
||||||
"- The user asks about a topic you worked on before but don't have in current context\n"
|
"raw messages around each anchor plus session bookends (bookend_start, bookend_end). "
|
||||||
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
|
"No LLM call, no truncation. Single or multi-anchor: pass "
|
||||||
"- You want to check if you've solved a similar problem before\n"
|
"``anchors=[{session_id, around_message_id}, ...]``. Each anchor returns its own "
|
||||||
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
|
"window in the response's ``windows`` array. Bookends are the first/last "
|
||||||
"Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
|
"user+assistant messages of the session, empty when the window already overlaps "
|
||||||
"Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
|
"the session head/tail. Tool messages are filtered from the window (the anchor "
|
||||||
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
|
"itself is preserved even if role='tool').\n\n"
|
||||||
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
|
" • mode='summary' — LLM-generated prose synthesis across matched sessions. Issues "
|
||||||
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
|
"one auxiliary-model call per session in the hit list, so cost scales with whatever "
|
||||||
"sessions that only mention some terms. If a broad OR query returns nothing, try individual "
|
"auxiliary model (or main model fallback) is configured. Returns aux token usage in "
|
||||||
"keyword searches in parallel. Returns summaries of the top matching sessions."
|
"the response (``aux_usage`` per call, ``aux_usage_total`` per batch). Reach for "
|
||||||
|
"this when you genuinely need cross-session prose synthesis in one shot.\n\n"
|
||||||
|
" • No query, no mode — browses recent sessions chronologically. Returns titles, "
|
||||||
|
"previews, timestamps. No LLM call.\n\n"
|
||||||
|
"DEFAULT MODE\n\n"
|
||||||
|
" When ``mode=`` is unset, the resolver checks ``auxiliary.session_search.default_mode`` "
|
||||||
|
"in ~/.hermes/config.yaml (accepted values: ``fast`` | ``summary``). If the user "
|
||||||
|
"has set a default, honour it on the first call. With no config, the default is "
|
||||||
|
"``fast``. An explicit ``mode=`` argument always wins.\n\n"
|
||||||
|
"ANCHOR CONTRACT\n\n"
|
||||||
|
" An anchor is the pair (session_id, around_message_id). The session_id MUST be "
|
||||||
|
"the raw owning session of around_message_id — guided rejects anchors where the "
|
||||||
|
"message_id does not exist in the named session. Fast results return both "
|
||||||
|
"session_id (raw owning) and parent_session_id (when different, for display "
|
||||||
|
"context only). Pair session_id with match_message_id from the same fast hit; do "
|
||||||
|
"not substitute parent_session_id.\n\n"
|
||||||
|
"FTS5 SYNTAX\n\n"
|
||||||
|
" FTS5 defaults to AND across terms — multi-word queries require all terms to "
|
||||||
|
"match. Use OR explicitly for broader recall (``alpha OR beta OR gamma``), quoted "
|
||||||
|
"phrases for exact match (``\"docker networking\"``), boolean (``python NOT java``), "
|
||||||
|
"or prefix wildcards (``deploy*``).\n\n"
|
||||||
|
"WHEN TO USE\n\n"
|
||||||
|
" Before reaching for ``gh``, web search, or filesystem inspection on questions "
|
||||||
|
"about prior work — what was discussed, what was decided, where an artefact was "
|
||||||
|
"created. The session DB carries what was said when; external tools show current "
|
||||||
|
"world state."
|
||||||
),
|
),
|
||||||
"parameters": {
|
"parameters": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"query": {
|
"query": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Search query — keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead (returns titles, previews, timestamps with no LLM cost).",
|
"description": "Search query (modes 'fast' and 'summary'). Keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead. Ignored when mode='guided'.",
|
||||||
},
|
},
|
||||||
"role_filter": {
|
"role_filter": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Optional: only search messages from specific roles (comma-separated). E.g. 'user,assistant' to skip tool outputs.",
|
"description": "Optional: only search messages from specific roles (comma-separated). Defaults to 'user,assistant' for fast/summary modes — tool messages are usually noisy (large outputs, serialised tool calls). Pass 'user,assistant,tool' to include tool output (debugging tool behaviour) or 'tool' to search tool output only. Ignored when mode='guided'.",
|
||||||
},
|
},
|
||||||
"limit": {
|
"limit": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"description": "Max sessions to summarize (default: 3, max: 5).",
|
"description": "Max sessions to return (default: 3, max: 10). Bump higher (5–10) when the user wants to be in the retrieval loop and pick the right anchor for a guided drill-down. Ignored when mode='guided' (which returns one anchored window per anchor).",
|
||||||
"default": 3,
|
"default": 3,
|
||||||
},
|
},
|
||||||
|
"mode": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["fast", "summary", "guided"],
|
||||||
|
"description": (
|
||||||
|
"fast — FTS5 snippets, no LLM. Default. "
|
||||||
|
"guided — requires anchors from a prior fast call; returns raw message window per anchor. "
|
||||||
|
"summary — LLM synthesis across matched sessions; opt-in, costs per aux-model call."
|
||||||
|
),
|
||||||
|
"default": "fast",
|
||||||
|
},
|
||||||
|
"anchors": {
|
||||||
|
"type": "array",
|
||||||
|
"description": "Required for mode='guided'. List of {session_id, around_message_id} dicts to drill into. Copy session_id and match_message_id verbatim from prior fast-mode results — they pair as a single self-consistent handle. Do NOT substitute parent_session_id (shown for display context only; pairs incorrectly with match_message_id). One anchor is fine when the topic lives in a single session; for multi-session catch-up (topic touched across several recent sessions), pass the top 2–3 fast hits as separate anchors in ONE call — each gets its own window + bookends in the response's 'windows' array.",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"session_id": {"type": "string"},
|
||||||
|
"around_message_id": {"type": "integer"},
|
||||||
|
},
|
||||||
|
"required": ["session_id", "around_message_id"],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"window": {
|
||||||
|
"type": "integer",
|
||||||
|
"description": "Mode='guided' only. Number of messages to return on each side of each anchor (the anchor itself is always included). Shared across all anchors in a multi-anchor call. Clamped to [1, 20]. Default 5.",
|
||||||
|
"default": 5,
|
||||||
|
},
|
||||||
|
"sort": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": ["newest", "oldest"],
|
||||||
|
"description": "Mode='fast' only. Temporal bias on top of FTS5 ranking. Omit to keep relevance-only ordering (the default, suitable for exploratory recall — 'what do we know about X'). Set 'newest' for recency-shaped questions ('where did we leave X', 'latest status of Y') so recent matches surface first with rank as the tiebreaker. Set 'oldest' for origin-shaped questions ('how did X start', 'first time we discussed Y') so the earliest matches surface first. Silently ignored in summary / guided / recent modes — for temporal narrative across sessions, drive fast with sort, then drill the right anchors with guided.",
|
||||||
|
},
|
||||||
},
|
},
|
||||||
"required": [],
|
"required": [],
|
||||||
},
|
},
|
||||||
@@ -605,6 +1266,12 @@ registry.register(
|
|||||||
query=args.get("query") or "",
|
query=args.get("query") or "",
|
||||||
role_filter=args.get("role_filter"),
|
role_filter=args.get("role_filter"),
|
||||||
limit=args.get("limit", 3),
|
limit=args.get("limit", 3),
|
||||||
|
mode=args.get("mode"),
|
||||||
|
session_id=args.get("session_id"),
|
||||||
|
around_message_id=args.get("around_message_id"),
|
||||||
|
window=args.get("window", 5),
|
||||||
|
anchors=args.get("anchors"),
|
||||||
|
sort=args.get("sort"),
|
||||||
db=kw.get("db"),
|
db=kw.get("db"),
|
||||||
current_session_id=kw.get("current_session_id")),
|
current_session_id=kw.get("current_session_id")),
|
||||||
check_fn=check_session_search_requirements,
|
check_fn=check_session_search_requirements,
|
||||||
|
|||||||
Reference in New Issue
Block a user