mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-10 12:18:44 +08:00
Compare commits
30 Commits
feat/deskt
...
feat/sessi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ce0f4838b0 | ||
|
|
2ecad49113 | ||
|
|
8245173d61 | ||
|
|
327e577acf | ||
|
|
b5996b6451 | ||
|
|
ef10d2e7c9 | ||
|
|
af1ea1f4ed | ||
|
|
29575b3712 | ||
|
|
71558e753d | ||
|
|
4f7e64c845 | ||
|
|
2cbf0631a5 | ||
|
|
659af123c3 | ||
|
|
f4c43f0886 | ||
|
|
b54b246071 | ||
|
|
1a00d730eb | ||
|
|
76f40e6449 | ||
|
|
2bed2124a4 | ||
|
|
8709e1ebec | ||
|
|
54d817f882 | ||
|
|
74fdfe6b50 | ||
|
|
02a54e01ce | ||
|
|
8a31985e4f | ||
|
|
41c13ba71d | ||
|
|
36c5b188b5 | ||
|
|
1e29fa8865 | ||
|
|
e74a682b0f | ||
|
|
2b606d20e2 | ||
|
|
3ac750ec07 | ||
|
|
aa2d3e2ee1 | ||
|
|
7d628eaa3d |
@@ -444,6 +444,10 @@ prompt_caching:
|
||||
# model: ""
|
||||
# timeout: 30
|
||||
# max_concurrency: 3 # Limit parallel summaries to reduce request-burst 429s
|
||||
# default_mode: "fast" # 'fast' | 'summary' — mode used when caller passes none.
|
||||
# # fast: FTS5 snippet hits, no LLM call. Default.
|
||||
# # summary: LLM-generated prose synthesis across hits.
|
||||
# # guided requires anchors and cannot be a default.
|
||||
# extra_body: {} # Provider-specific OpenAI-compatible request fields
|
||||
# # Example for providers that support request-body
|
||||
# # reasoning controls:
|
||||
|
||||
@@ -846,6 +846,7 @@ DEFAULT_CONFIG = {
|
||||
"timeout": 30,
|
||||
"extra_body": {},
|
||||
"max_concurrency": 3, # Clamp parallel summaries to avoid request-burst 429s on small providers
|
||||
"default_mode": "fast", # 'fast' | 'summary' — which mode session_search uses when caller passes none
|
||||
},
|
||||
"skills_hub": {
|
||||
"provider": "auto",
|
||||
|
||||
227
hermes_state.py
227
hermes_state.py
@@ -25,7 +25,7 @@ from pathlib import Path
|
||||
|
||||
from agent.memory_manager import sanitize_context
|
||||
from hermes_constants import get_hermes_home
|
||||
from typing import Any, Callable, Dict, List, Optional, TypeVar
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -1618,6 +1618,185 @@ class SessionDB:
|
||||
result.append(msg)
|
||||
return result
|
||||
|
||||
def get_messages_around(
|
||||
self,
|
||||
session_id: str,
|
||||
around_message_id: int,
|
||||
window: int = 5,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Load a window of messages anchored on a specific message id.
|
||||
|
||||
Returns up to ``window`` messages before the anchor, the anchor itself,
|
||||
and up to ``window`` messages after — all from the same session,
|
||||
ordered by id ascending. Boundaries are honoured: if the anchor is
|
||||
near the start or end of the session, fewer messages are returned on
|
||||
the truncated side.
|
||||
|
||||
If ``around_message_id`` is not a message id within ``session_id``,
|
||||
returns an empty list. Callers decide whether to surface that as an
|
||||
error.
|
||||
|
||||
Used by ``session_search`` mode='guided' to provide anchored
|
||||
drill-down into a specific session at a specific message — without
|
||||
the cost of summarisation or the risk of 100k-char truncation.
|
||||
"""
|
||||
if window < 0:
|
||||
window = 0
|
||||
with self._lock:
|
||||
# Confirm the anchor exists in this session — cheap guard against
|
||||
# cross-session contamination if a caller mixes up session/message
|
||||
# ids.
|
||||
anchor_exists = self._conn.execute(
|
||||
"SELECT 1 FROM messages WHERE id = ? AND session_id = ? LIMIT 1",
|
||||
(around_message_id, session_id),
|
||||
).fetchone()
|
||||
if not anchor_exists:
|
||||
return []
|
||||
|
||||
# Two queries: anchor + before (DESC, take window+1), and after
|
||||
# (ASC, take window). Final order is id ASC.
|
||||
before_rows = self._conn.execute(
|
||||
"SELECT * FROM messages "
|
||||
"WHERE session_id = ? AND id <= ? "
|
||||
"ORDER BY id DESC LIMIT ?",
|
||||
(session_id, around_message_id, window + 1),
|
||||
).fetchall()
|
||||
after_rows = self._conn.execute(
|
||||
"SELECT * FROM messages "
|
||||
"WHERE session_id = ? AND id > ? "
|
||||
"ORDER BY id ASC LIMIT ?",
|
||||
(session_id, around_message_id, window),
|
||||
).fetchall()
|
||||
|
||||
# before_rows is DESC; reverse so it's ASC, then concatenate after_rows.
|
||||
rows = list(reversed(before_rows)) + list(after_rows)
|
||||
result = []
|
||||
for row in rows:
|
||||
msg = dict(row)
|
||||
if "content" in msg:
|
||||
msg["content"] = self._decode_content(msg["content"])
|
||||
if msg.get("tool_calls"):
|
||||
try:
|
||||
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning(
|
||||
"Failed to deserialize tool_calls in get_messages_around, falling back to []"
|
||||
)
|
||||
msg["tool_calls"] = []
|
||||
result.append(msg)
|
||||
return result
|
||||
|
||||
def get_anchored_view(
|
||||
self,
|
||||
session_id: str,
|
||||
around_message_id: int,
|
||||
window: int = 5,
|
||||
bookend: int = 3,
|
||||
keep_roles: Optional[Tuple[str, ...]] = ("user", "assistant"),
|
||||
) -> Dict[str, Any]:
|
||||
"""Return an anchored window plus session bookends, opinionated for guided recall.
|
||||
|
||||
Built on top of ``get_messages_around``:
|
||||
- ``window``: messages immediately surrounding the anchor. Filtered to
|
||||
``keep_roles`` (tool-response noise dropped by default), EXCEPT the
|
||||
anchor itself is always included regardless of role — callers may
|
||||
have anchored on a tool message and dropping it would break the
|
||||
contract.
|
||||
- ``bookend_start``: first ``bookend`` messages of the session
|
||||
(filtered to ``keep_roles``), but ONLY those whose id sits strictly
|
||||
before the window's first message id. If the window already covers
|
||||
the session start, ``bookend_start`` is an empty list.
|
||||
- ``bookend_end``: last ``bookend`` messages of the session (same
|
||||
filter + non-overlap rule applied at the tail).
|
||||
|
||||
Bookends exist so an FTS5 hit anywhere in a long session still yields
|
||||
the goal (opening) and the resolution (closing) on a single guided
|
||||
call — without the cost of fetching the whole transcript.
|
||||
|
||||
Returns ``{"window": []}`` (empty) when the anchor isn't in the
|
||||
session — caller decides how to surface that.
|
||||
|
||||
``keep_roles=None`` disables role filtering entirely (raw window +
|
||||
raw bookends). Pass an explicit tuple to override the default.
|
||||
"""
|
||||
if bookend < 0:
|
||||
bookend = 0
|
||||
|
||||
# Reuse the primitive — it already handles the anchor-existence check,
|
||||
# window clamping, content decoding, and tool_calls deserialisation.
|
||||
window_rows = self.get_messages_around(
|
||||
session_id, around_message_id, window=window
|
||||
)
|
||||
if not window_rows:
|
||||
return {"window": [], "bookend_start": [], "bookend_end": []}
|
||||
|
||||
# Apply role filter to the window, but never drop the anchor itself.
|
||||
if keep_roles is not None:
|
||||
keep_set = set(keep_roles)
|
||||
filtered_window = [
|
||||
m for m in window_rows
|
||||
if m.get("id") == around_message_id or m.get("role") in keep_set
|
||||
]
|
||||
else:
|
||||
filtered_window = window_rows
|
||||
|
||||
window_min_id = window_rows[0]["id"]
|
||||
window_max_id = window_rows[-1]["id"]
|
||||
|
||||
# Fetch bookends only if there's space outside the window. SQL filters
|
||||
# by id range, role, and non-empty content — tool-call-only assistant
|
||||
# turns (content='' with tool_calls populated) are excluded so they
|
||||
# don't crowd out the actual prose openings/closings. ``bookend=0``
|
||||
# short-circuits both queries.
|
||||
bookend_start_rows: List[Any] = []
|
||||
bookend_end_rows: List[Any] = []
|
||||
if bookend > 0:
|
||||
with self._lock:
|
||||
role_clause = ""
|
||||
role_params: list = []
|
||||
if keep_roles is not None:
|
||||
role_placeholders = ",".join("?" for _ in keep_roles)
|
||||
role_clause = f" AND role IN ({role_placeholders})"
|
||||
role_params = list(keep_roles)
|
||||
|
||||
bookend_start_rows = self._conn.execute(
|
||||
f"SELECT * FROM messages "
|
||||
f"WHERE session_id = ? AND id < ?{role_clause} "
|
||||
f"AND length(content) > 0 "
|
||||
f"ORDER BY id ASC LIMIT ?",
|
||||
(session_id, window_min_id, *role_params, bookend),
|
||||
).fetchall()
|
||||
|
||||
bookend_end_rows = self._conn.execute(
|
||||
f"SELECT * FROM messages "
|
||||
f"WHERE session_id = ? AND id > ?{role_clause} "
|
||||
f"AND length(content) > 0 "
|
||||
f"ORDER BY id DESC LIMIT ?",
|
||||
(session_id, window_max_id, *role_params, bookend),
|
||||
).fetchall()
|
||||
# End rows came back DESC for the LIMIT cap; flip to ASC.
|
||||
bookend_end_rows = list(reversed(bookend_end_rows))
|
||||
|
||||
def _hydrate(row) -> Dict[str, Any]:
|
||||
msg = dict(row)
|
||||
if "content" in msg:
|
||||
msg["content"] = self._decode_content(msg["content"])
|
||||
if msg.get("tool_calls"):
|
||||
try:
|
||||
msg["tool_calls"] = json.loads(msg["tool_calls"])
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
logger.warning(
|
||||
"Failed to deserialize tool_calls in get_anchored_view, falling back to []"
|
||||
)
|
||||
msg["tool_calls"] = []
|
||||
return msg
|
||||
|
||||
return {
|
||||
"window": filtered_window,
|
||||
"bookend_start": [_hydrate(r) for r in bookend_start_rows],
|
||||
"bookend_end": [_hydrate(r) for r in bookend_end_rows],
|
||||
}
|
||||
|
||||
def resolve_resume_session_id(self, session_id: str) -> str:
|
||||
"""Redirect a resume target to the descendant session that holds the messages.
|
||||
|
||||
@@ -1885,6 +2064,7 @@ class SessionDB:
|
||||
role_filter: List[str] = None,
|
||||
limit: int = 20,
|
||||
offset: int = 0,
|
||||
sort: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Full-text search across session messages using FTS5.
|
||||
@@ -1897,6 +2077,19 @@ class SessionDB:
|
||||
|
||||
Returns matching messages with session metadata, content snippet,
|
||||
and surrounding context (1 message before and after the match).
|
||||
|
||||
``sort`` controls temporal ordering of results:
|
||||
- ``None`` (default): FTS5 BM25 relevance only. Time-neutral, but
|
||||
ties between equally-relevant messages are broken arbitrarily.
|
||||
- ``"newest"``: order by message timestamp DESC, then by rank.
|
||||
Recent matches surface first; rank breaks same-timestamp ties.
|
||||
- ``"oldest"``: order by message timestamp ASC, then by rank.
|
||||
For "how did this start" / "what was the original X" questions.
|
||||
|
||||
The LIKE fallback path (short CJK queries) ignores ``sort`` because
|
||||
it has no rank to combine with — it already orders by timestamp DESC
|
||||
unconditionally. The trigram CJK path honours ``sort`` like the main
|
||||
FTS5 path.
|
||||
"""
|
||||
if not query or not query.strip():
|
||||
return []
|
||||
@@ -1905,6 +2098,25 @@ class SessionDB:
|
||||
if not query:
|
||||
return []
|
||||
|
||||
# Normalise sort. Anything not in the allowed set falls back to None
|
||||
# (FTS5 rank-only) — be forgiving to callers who pass empty string or
|
||||
# an unexpected value rather than failing the search.
|
||||
if isinstance(sort, str):
|
||||
sort_norm = sort.strip().lower()
|
||||
if sort_norm not in ("newest", "oldest"):
|
||||
sort_norm = None
|
||||
else:
|
||||
sort_norm = None
|
||||
|
||||
# ORDER BY shared by both FTS5 paths. With sort set, timestamp is
|
||||
# primary and rank is the tiebreaker; otherwise rank alone.
|
||||
if sort_norm == "newest":
|
||||
order_by_sql = "ORDER BY m.timestamp DESC, rank"
|
||||
elif sort_norm == "oldest":
|
||||
order_by_sql = "ORDER BY m.timestamp ASC, rank"
|
||||
else:
|
||||
order_by_sql = "ORDER BY rank"
|
||||
|
||||
# Build WHERE clauses dynamically
|
||||
where_clauses = ["messages_fts MATCH ?"]
|
||||
params: list = [query]
|
||||
@@ -1943,7 +2155,7 @@ class SessionDB:
|
||||
JOIN messages m ON m.id = messages_fts.rowid
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {where_sql}
|
||||
ORDER BY rank
|
||||
{order_by_sql}
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
|
||||
@@ -2012,7 +2224,7 @@ class SessionDB:
|
||||
JOIN messages m ON m.id = messages_fts_trigram.rowid
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {' AND '.join(tri_where)}
|
||||
ORDER BY rank
|
||||
{order_by_sql}
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
tri_params.extend([limit, offset])
|
||||
@@ -2051,6 +2263,13 @@ class SessionDB:
|
||||
if role_filter:
|
||||
like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
|
||||
like_params.extend(role_filter)
|
||||
# LIKE fallback has no rank to combine with — just timestamp
|
||||
# direction. Default/"newest" → DESC; "oldest" → ASC.
|
||||
like_order_sql = (
|
||||
"ORDER BY m.timestamp ASC"
|
||||
if sort_norm == "oldest"
|
||||
else "ORDER BY m.timestamp DESC"
|
||||
)
|
||||
like_sql = f"""
|
||||
SELECT m.id, m.session_id, m.role,
|
||||
substr(m.content,
|
||||
@@ -2061,7 +2280,7 @@ class SessionDB:
|
||||
FROM messages m
|
||||
JOIN sessions s ON s.id = m.session_id
|
||||
WHERE {' AND '.join(like_where)}
|
||||
ORDER BY m.timestamp DESC
|
||||
{like_order_sql}
|
||||
LIMIT ? OFFSET ?
|
||||
"""
|
||||
like_params.extend([limit, offset])
|
||||
|
||||
10
run_agent.py
10
run_agent.py
@@ -10689,6 +10689,11 @@ class AIAgent:
|
||||
limit=function_args.get("limit", 3),
|
||||
db=session_db,
|
||||
current_session_id=self.session_id,
|
||||
mode=function_args.get("mode"),
|
||||
session_id=function_args.get("session_id"),
|
||||
around_message_id=function_args.get("around_message_id"),
|
||||
window=function_args.get("window", 5),
|
||||
anchors=function_args.get("anchors"),
|
||||
)
|
||||
elif function_name == "memory":
|
||||
target = function_args.get("target", "memory")
|
||||
@@ -11321,6 +11326,11 @@ class AIAgent:
|
||||
limit=function_args.get("limit", 3),
|
||||
db=session_db,
|
||||
current_session_id=self.session_id,
|
||||
mode=function_args.get("mode"),
|
||||
session_id=function_args.get("session_id"),
|
||||
around_message_id=function_args.get("around_message_id"),
|
||||
window=function_args.get("window", 5),
|
||||
anchors=function_args.get("anchors"),
|
||||
)
|
||||
tool_duration = time.time() - tool_start_time
|
||||
if self._should_emit_quiet_tool_messages():
|
||||
|
||||
@@ -1051,6 +1051,7 @@ AUTHOR_MAP = {
|
||||
"openclaw@agent.local": "29206394", # PR #22194 salvage (sudo -S brute-force guard, #9590)
|
||||
"freedemon@gmail.com": "fr33d3m0n", # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4)
|
||||
"zhaowh3613@outlook.com": "VinceZcrikl", # PR #23647 salvage (npm UTF-8 decode on GBK Windows)
|
||||
"abcdjmm970703@gmail.com": "JabberELF", # PR #20238 salvage (session_search fast/summary dual-mode)
|
||||
"anton.kuenzi@gmail.com": "ZeterMordio", # PR #11754 salvage (zsh completion compdef + _arguments syntax)
|
||||
"23yntong@stu.edu.cn": "iuyup", # PR #6155 salvage (shell=True hardening)
|
||||
"86501179+1RB@users.noreply.github.com": "1RB", # PR #25462 salvage (discord forwarded messages)
|
||||
|
||||
3
skills/memory/DESCRIPTION.md
Normal file
3
skills/memory/DESCRIPTION.md
Normal file
@@ -0,0 +1,3 @@
|
||||
---
|
||||
description: Primitives for searching, recalling, and reasoning over Hermes' own session history and stored memory.
|
||||
---
|
||||
112
skills/memory/session-recall/SKILL.md
Normal file
112
skills/memory/session-recall/SKILL.md
Normal file
@@ -0,0 +1,112 @@
|
||||
---
|
||||
name: session-recall
|
||||
description: Use session_search effectively for finding and reading prior Hermes sessions.
|
||||
metadata:
|
||||
hermes:
|
||||
category: memory
|
||||
---
|
||||
|
||||
# session-recall
|
||||
|
||||
session_search is the tool. Three modes — fast, guided, summary — answer different question shapes. Picking the wrong mode costs latency, money, or correctness.
|
||||
|
||||
## Pre-flight
|
||||
|
||||
1. If the user asks about prior work ("find the session where X", "catch me up on Y", "we drafted Z"), your first move is session_search. Not filesystem search, not a different tool.
|
||||
2. If the user names an artefact, search the literal name first. No OR-expansion.
|
||||
3. Default to fast → guided. Reach for summary only when you need cross-session synthesis prose in one shot.
|
||||
|
||||
## Mode picker
|
||||
|
||||
| Question shape | Mode | Why |
|
||||
|---|---|---|
|
||||
| Catch me up / where did we get to / what did we decide | fast → guided | FTS5 finds sessions; guided reads the transcript. SQL-only. |
|
||||
| Find an artefact by name / which session mentions X | fast | Snippets only, no LLM. |
|
||||
| Read around a specific message in a known session | guided | Raw window around anchor. |
|
||||
| Cross-session prose synthesis in one shot | summary | LLM call per hit (aux model if configured, else main). Opt-in. |
|
||||
|
||||
## Levers
|
||||
|
||||
| Lever | Default | When to change |
|
||||
|---|---|---|
|
||||
| `limit` (fast) | 3 | 5–10 when topic spans sessions or user wants to pick from a list |
|
||||
| `sort` (fast) | unset (relevance) | `newest` for "where did we leave X"; `oldest` for "how did X start" |
|
||||
| `role_filter` (fast) | user,assistant | Add `tool` only when debugging tool output specifically |
|
||||
| `window` (guided) | 5 | Bump for long resolutions; shrink if response truncates |
|
||||
| anchor count (guided) | 1 | 2–3 anchors when topic spans recent sessions |
|
||||
| `limit` (summary) | 3 | Bump cautiously; cost scales directly |
|
||||
|
||||
## Composition patterns
|
||||
|
||||
1. **Discover → drill.** fast first, drill the top hit with guided. Widen `window` or re-anchor if the resolution isn't covered.
|
||||
2. **Multi-anchor for arcs.** When fast returns 2–3 relevant hits on the same topic, pass them all to guided in one call.
|
||||
3. **Bookend-first reading.** For "what was the conclusion" questions, read `bookend_end` before `messages`.
|
||||
4. **Delegate when transcripts are big.** If you're about to pull 30K+ chars of transcript into your context just to summarise it, hand the dumps to a subagent and ask for a digest.
|
||||
5. **Verify before quoting.** High-stakes recall does two passes: fast with the literal term (does the hit list contain the right session?) → guided (does the transcript confirm the outcome?).
|
||||
|
||||
## Worked examples
|
||||
|
||||
### A — find a named artefact
|
||||
|
||||
User: "we drafted a deployment plan in a session yesterday, find it"
|
||||
|
||||
Right: `session_search(query="deployment plan", limit=5)`. The user named it — search the name. Drill the top hit if you need details.
|
||||
|
||||
Wrong: `session_search(query="deploy OR deployment OR rollout OR plan")`. OR-expansion drowns the hit in unrelated sessions.
|
||||
|
||||
### B — catch up on a multi-session arc
|
||||
|
||||
User: "where did we get to with the auth refactor?"
|
||||
|
||||
Right: fast with `sort='newest'`, then multi-anchor guided across the top 2–3 hits:
|
||||
|
||||
```
|
||||
session_search(query="auth refactor", limit=5, sort='newest')
|
||||
session_search(mode='guided', anchors=[
|
||||
{'session_id': hit_1.session_id, 'around_message_id': hit_1.match_message_id},
|
||||
{'session_id': hit_2.session_id, 'around_message_id': hit_2.match_message_id},
|
||||
{'session_id': hit_3.session_id, 'around_message_id': hit_3.match_message_id},
|
||||
])
|
||||
```
|
||||
|
||||
Read all three slices (bookend_start / messages / bookend_end) on each window and the arc reconstructs.
|
||||
|
||||
Wrong: `session_search(query="auth refactor", mode='summary')`. Summary launders FTS5 hits through an LLM and can confabulate when the right session isn't in the hit list.
|
||||
|
||||
### C — drill into a known session for a conclusion
|
||||
|
||||
User: "in the session about the caching layer, what did we decide?"
|
||||
|
||||
fast to locate, guided to drill, read `bookend_end` first:
|
||||
|
||||
```
|
||||
session_search(query="caching layer", limit=3)
|
||||
session_search(mode='guided', anchors=[
|
||||
{'session_id': <top>, 'around_message_id': <match_id>}
|
||||
])
|
||||
```
|
||||
|
||||
Conclusions ("decided X", "shipped Y") usually live in `bookend_end`.
|
||||
|
||||
## Reading guided responses
|
||||
|
||||
Every guided window has three slices:
|
||||
|
||||
- `bookend_start` — opening prose (kickoff, goal)
|
||||
- `messages` — the anchored window (FTS5 hit + neighbours)
|
||||
- `bookend_end` — closing prose (resolution, decisions, commits)
|
||||
|
||||
Read all three. Bookends are prose that summarises; snippets and the middle window can be noisy when sessions are *about* the search term.
|
||||
|
||||
## Pitfalls
|
||||
|
||||
- **Manual-archaeology trap.** If fast snippets look noisy, drill the top hit with guided. Don't pivot to find / grep / raw SQL.
|
||||
- **Summary confabulation.** Summary will produce confident prose even when FTS5 missed the right session. Verify by re-querying in fast mode and checking the hit list.
|
||||
- **FTS5 is AND by default.** Multi-word queries require all terms; use OR or quoted phrases deliberately.
|
||||
- **Anchor mismatch.** `around_message_id` must exist in the named session. Re-anchor from a fresh fast result if guided rejects.
|
||||
- **Window truncation.** Re-call with a smaller window if a dump truncates.
|
||||
- **Compaction lineage.** A fast hit with `parent_session_id` set means the session was split by compaction; its `bookend_start` is a handoff summary, not the original opener.
|
||||
|
||||
## Note on skill limits
|
||||
|
||||
This skill teaches composition but cannot enforce it. If your default behaviour drifts — composing paraphrase queries instead of drilling, reaching for summary when fast → guided would do, pivoting to filesystem search when fast returned hits — the skill is being ignored, not failing. When in doubt: fast first, then drill.
|
||||
189
tests/hermes_state/test_get_anchored_view.py
Normal file
189
tests/hermes_state/test_get_anchored_view.py
Normal file
@@ -0,0 +1,189 @@
|
||||
"""Unit tests for SessionDB.get_anchored_view() — window + bookends + role filter.
|
||||
|
||||
Used by ``session_search`` mode='guided'. Builds on ``get_messages_around``
|
||||
and adds:
|
||||
- opinionated default role filter (drops tool messages from the window,
|
||||
but never drops the anchor itself)
|
||||
- session-head and session-tail bookends (default 3 messages each) so an
|
||||
FTS5 hit anywhere in a long session still yields the goal + resolution
|
||||
- bookends are skipped when the main window already overlaps the head or tail
|
||||
|
||||
These properties are the reason guided is useful for state recall on long
|
||||
sessions, so the suite below pins them all down.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
return SessionDB(tmp_path / "state.db")
|
||||
|
||||
|
||||
def _seed(db: SessionDB, session_id: str, roles: list[str]) -> list[int]:
|
||||
"""Append messages with the given role sequence. Returns message ids."""
|
||||
db.create_session(session_id, source="cli")
|
||||
ids = []
|
||||
for i, role in enumerate(roles):
|
||||
ids.append(db.append_message(session_id, role=role, content=f"{role}-{i}"))
|
||||
return ids
|
||||
|
||||
|
||||
def test_window_filters_tool_messages_but_keeps_anchor_when_tool(db):
|
||||
"""The anchor is preserved even when its role is tool. Other tool
|
||||
messages in the window are dropped."""
|
||||
ids = _seed(db, "s1", [
|
||||
"user", "assistant", "tool", # 0..2
|
||||
"user", "tool", # 3..4 ← anchor on a tool (idx 4)
|
||||
"tool", "assistant", "user", # 5..7
|
||||
])
|
||||
view = db.get_anchored_view("s1", ids[4], window=3, bookend=0)
|
||||
roles = [m["role"] for m in view["window"]]
|
||||
# Anchor (tool) preserved; surrounding tool messages dropped.
|
||||
assert "tool" in roles
|
||||
anchor = next(m for m in view["window"] if m["id"] == ids[4])
|
||||
assert anchor["role"] == "tool"
|
||||
# Only the anchor tool message remains — other tools filtered.
|
||||
tool_rows = [m for m in view["window"] if m["role"] == "tool"]
|
||||
assert len(tool_rows) == 1 and tool_rows[0]["id"] == ids[4]
|
||||
|
||||
|
||||
def test_window_keeps_user_and_assistant_by_default(db):
|
||||
ids = _seed(db, "s1", ["user", "assistant"] * 6)
|
||||
view = db.get_anchored_view("s1", ids[5], window=2, bookend=0)
|
||||
# All user/assistant → all should survive the filter.
|
||||
assert {m["role"] for m in view["window"]} == {"user", "assistant"}
|
||||
assert len(view["window"]) == 5 # 2 before + anchor + 2 after
|
||||
|
||||
|
||||
def test_bookends_returned_when_window_in_middle(db):
|
||||
ids = _seed(db, "s1", ["user", "assistant"] * 10) # 20 messages
|
||||
view = db.get_anchored_view("s1", ids[10], window=2, bookend=3)
|
||||
assert len(view["bookend_start"]) == 3
|
||||
assert len(view["bookend_end"]) == 3
|
||||
# Bookends are the actual session head/tail.
|
||||
assert [m["id"] for m in view["bookend_start"]] == ids[:3]
|
||||
assert [m["id"] for m in view["bookend_end"]] == ids[-3:]
|
||||
|
||||
|
||||
def test_bookend_start_empty_when_window_covers_session_head(db):
|
||||
ids = _seed(db, "s1", ["user", "assistant"] * 5) # 10 messages
|
||||
# Anchor on id ids[1]; window=3 → covers ids[0..4]. Head overlaps.
|
||||
view = db.get_anchored_view("s1", ids[1], window=3, bookend=3)
|
||||
assert view["bookend_start"] == []
|
||||
# Tail still has space → returns bookend_end.
|
||||
assert len(view["bookend_end"]) == 3
|
||||
|
||||
|
||||
def test_bookend_end_empty_when_window_covers_session_tail(db):
|
||||
ids = _seed(db, "s1", ["user", "assistant"] * 5) # 10 messages
|
||||
view = db.get_anchored_view("s1", ids[-2], window=3, bookend=3)
|
||||
assert view["bookend_end"] == []
|
||||
assert len(view["bookend_start"]) == 3
|
||||
|
||||
|
||||
def test_bookends_skip_tool_messages(db):
|
||||
ids = _seed(db, "s1", [
|
||||
"tool", "tool", "user", "assistant", # head: only 2 user/assistant
|
||||
"user", "assistant", "user", "assistant",
|
||||
"tool", "user", "assistant", "tool", # tail: 2 user/assistant + tool
|
||||
])
|
||||
# Anchor in the middle; bookends should pull only user/assistant.
|
||||
view = db.get_anchored_view("s1", ids[5], window=1, bookend=3)
|
||||
assert all(m["role"] in ("user", "assistant") for m in view["bookend_start"])
|
||||
assert all(m["role"] in ("user", "assistant") for m in view["bookend_end"])
|
||||
|
||||
|
||||
def test_bookend_zero_returns_empty_bookends(db):
|
||||
ids = _seed(db, "s1", ["user", "assistant"] * 10)
|
||||
view = db.get_anchored_view("s1", ids[10], window=2, bookend=0)
|
||||
assert view["bookend_start"] == []
|
||||
assert view["bookend_end"] == []
|
||||
|
||||
|
||||
def test_anchor_not_in_session_returns_empty_view(db):
|
||||
ids = _seed(db, "s1", ["user", "assistant"] * 5)
|
||||
_seed(db, "s2", ["user", "assistant"] * 5)
|
||||
view = db.get_anchored_view("s1", 999999, window=3, bookend=3)
|
||||
assert view == {"window": [], "bookend_start": [], "bookend_end": []}
|
||||
|
||||
|
||||
def test_keep_roles_none_disables_filtering(db):
|
||||
"""Pass keep_roles=None to get raw window + raw bookends including tool."""
|
||||
ids = _seed(db, "s1", ["user", "tool", "assistant", "tool", "user"] * 3)
|
||||
view = db.get_anchored_view(
|
||||
"s1", ids[7], window=2, bookend=3, keep_roles=None
|
||||
)
|
||||
# Tool messages in the window survive when filtering is disabled.
|
||||
roles_in_window = [m["role"] for m in view["window"]]
|
||||
assert "tool" in roles_in_window
|
||||
|
||||
|
||||
def test_keep_roles_can_include_tool_when_caller_wants_it(db):
|
||||
ids = _seed(db, "s1", ["user", "tool", "assistant"] * 5)
|
||||
view = db.get_anchored_view(
|
||||
"s1", ids[7], window=2, bookend=3, keep_roles=("user", "assistant", "tool")
|
||||
)
|
||||
# All three roles allowed → tool messages should now appear in the window.
|
||||
assert any(m["role"] == "tool" for m in view["window"])
|
||||
|
||||
|
||||
def test_negative_bookend_treated_as_zero(db):
|
||||
ids = _seed(db, "s1", ["user", "assistant"] * 10)
|
||||
view = db.get_anchored_view("s1", ids[10], window=2, bookend=-3)
|
||||
assert view["bookend_start"] == []
|
||||
assert view["bookend_end"] == []
|
||||
|
||||
|
||||
def test_bookends_do_not_leak_across_sessions(db):
|
||||
"""Bookends are session-scoped. A second session with adjacent ids must
|
||||
never appear in the first session's bookends."""
|
||||
s1_ids = _seed(db, "s1", ["user", "assistant"] * 4)
|
||||
s2_ids = _seed(db, "s2", ["user", "assistant"] * 4)
|
||||
view = db.get_anchored_view("s1", s1_ids[3], window=1, bookend=3)
|
||||
bookend_ids = (
|
||||
[m["id"] for m in view["bookend_start"]]
|
||||
+ [m["id"] for m in view["bookend_end"]]
|
||||
)
|
||||
assert set(bookend_ids).isdisjoint(set(s2_ids))
|
||||
|
||||
|
||||
def test_bookends_skip_empty_content_assistant_turns(db):
|
||||
"""Tool-call-only assistant turns (content='' with tool_calls populated)
|
||||
must NOT eat bookend slots. Bookends exist to surface the session's
|
||||
spoken opening + resolution; 'let me check...'-shaped no-content
|
||||
assistants are signal-free here."""
|
||||
db.create_session("s1", source="cli")
|
||||
# Real opener
|
||||
open_id = db.append_message("s1", role="user", content="kick off the work")
|
||||
db.append_message("s1", role="assistant", content="on it")
|
||||
# A burst of tool-call-only assistants (orchestration heartbeats)
|
||||
for _ in range(5):
|
||||
db.append_message("s1", role="assistant", content="")
|
||||
db.append_message("s1", role="tool", content="some output")
|
||||
# Middle prose
|
||||
mid_id = db.append_message("s1", role="user", content="status?")
|
||||
db.append_message("s1", role="assistant", content="midway")
|
||||
# Tail: more empty assistants interleaved with prose closer
|
||||
for _ in range(3):
|
||||
db.append_message("s1", role="assistant", content="")
|
||||
db.append_message("s1", role="tool", content="poll")
|
||||
close_id = db.append_message(
|
||||
"s1", role="assistant", content="Done. Final summary here."
|
||||
)
|
||||
|
||||
view = db.get_anchored_view("s1", mid_id, window=1, bookend=3)
|
||||
|
||||
# bookend_start should contain prose user/assistant, never empty content
|
||||
assert all(m["content"] for m in view["bookend_start"]), \
|
||||
"bookend_start leaked an empty-content row"
|
||||
# First message must be the actual opener
|
||||
assert view["bookend_start"][0]["id"] == open_id
|
||||
|
||||
# bookend_end likewise — and the closer prose must appear
|
||||
assert all(m["content"] for m in view["bookend_end"]), \
|
||||
"bookend_end leaked an empty-content row"
|
||||
assert any(m["id"] == close_id for m in view["bookend_end"]), \
|
||||
"actual session closer must survive into bookend_end"
|
||||
|
||||
137
tests/hermes_state/test_get_messages_around.py
Normal file
137
tests/hermes_state/test_get_messages_around.py
Normal file
@@ -0,0 +1,137 @@
|
||||
"""Unit tests for SessionDB.get_messages_around() — anchored message windows.
|
||||
|
||||
The method is used by ``session_search`` mode='guided' for anchored drill-down.
|
||||
It must:
|
||||
- Return an ordered window: up to ``window`` messages before the anchor,
|
||||
the anchor itself, then up to ``window`` after, all id-ascending.
|
||||
- Honour session boundaries (fewer messages returned at start / end).
|
||||
- Honour session isolation (same id range, different session = nothing).
|
||||
- Return an empty list when the anchor is not in the named session.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from hermes_state import SessionDB
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def db(tmp_path):
|
||||
return SessionDB(tmp_path / "state.db")
|
||||
|
||||
|
||||
def _seed_session(db: SessionDB, session_id: str, n_messages: int):
|
||||
"""Append n_messages alternating user/assistant messages to a session.
|
||||
|
||||
Returns the list of message ids created (in append order).
|
||||
"""
|
||||
db.create_session(session_id, source="cli")
|
||||
ids = []
|
||||
for i in range(n_messages):
|
||||
role = "user" if i % 2 == 0 else "assistant"
|
||||
msg_id = db.append_message(session_id, role=role, content=f"msg {i}")
|
||||
ids.append(msg_id)
|
||||
return ids
|
||||
|
||||
|
||||
def test_returns_window_around_anchor_in_middle(db):
|
||||
ids = _seed_session(db, "s1", 11)
|
||||
anchor = ids[5] # middle of 11
|
||||
|
||||
result = db.get_messages_around("s1", anchor, window=3)
|
||||
|
||||
# Expect 3 before + anchor + 3 after = 7 messages
|
||||
assert len(result) == 7
|
||||
# All from the right session
|
||||
assert all(m["session_id"] == "s1" for m in result)
|
||||
# Order is id ASC and contiguous
|
||||
result_ids = [m["id"] for m in result]
|
||||
assert result_ids == ids[2:9]
|
||||
|
||||
|
||||
def test_anchor_at_first_message_returns_only_after_slice(db):
|
||||
ids = _seed_session(db, "s1", 8)
|
||||
anchor = ids[0] # first
|
||||
|
||||
result = db.get_messages_around("s1", anchor, window=3)
|
||||
|
||||
# Anchor + 3 after = 4 messages, no "before"
|
||||
assert len(result) == 4
|
||||
assert [m["id"] for m in result] == ids[0:4]
|
||||
|
||||
|
||||
def test_anchor_at_last_message_returns_only_before_slice(db):
|
||||
ids = _seed_session(db, "s1", 8)
|
||||
anchor = ids[-1] # last
|
||||
|
||||
result = db.get_messages_around("s1", anchor, window=3)
|
||||
|
||||
# 3 before + anchor = 4 messages, no "after"
|
||||
assert len(result) == 4
|
||||
assert [m["id"] for m in result] == ids[-4:]
|
||||
|
||||
|
||||
def test_anchor_not_in_session_returns_empty_list(db):
|
||||
ids = _seed_session(db, "s1", 5)
|
||||
_seed_session(db, "s2", 5)
|
||||
|
||||
# Use s1 as session but pass an id that exists, just in s2
|
||||
result = db.get_messages_around("s2", ids[2], window=3)
|
||||
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_does_not_leak_across_sessions(db):
|
||||
# Two sessions with adjacent message id ranges
|
||||
s1_ids = _seed_session(db, "s1", 5)
|
||||
s2_ids = _seed_session(db, "s2", 5)
|
||||
|
||||
# Anchor on s1's last message — even though s2 ids are "after", they must
|
||||
# not appear in the window
|
||||
result = db.get_messages_around("s1", s1_ids[-1], window=3)
|
||||
|
||||
assert all(m["session_id"] == "s1" for m in result)
|
||||
# All result ids belong to s1, not s2
|
||||
assert set(m["id"] for m in result).issubset(set(s1_ids))
|
||||
assert set(m["id"] for m in result).isdisjoint(set(s2_ids))
|
||||
|
||||
|
||||
def test_window_larger_than_session_returns_full_session(db):
|
||||
ids = _seed_session(db, "s1", 4)
|
||||
anchor = ids[1]
|
||||
|
||||
result = db.get_messages_around("s1", anchor, window=100)
|
||||
|
||||
# Whole session returned, ordered ASC
|
||||
assert [m["id"] for m in result] == ids
|
||||
|
||||
|
||||
def test_window_zero_returns_only_anchor(db):
|
||||
ids = _seed_session(db, "s1", 5)
|
||||
anchor = ids[2]
|
||||
|
||||
result = db.get_messages_around("s1", anchor, window=0)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["id"] == anchor
|
||||
|
||||
|
||||
def test_negative_window_treated_as_zero(db):
|
||||
ids = _seed_session(db, "s1", 5)
|
||||
anchor = ids[2]
|
||||
|
||||
result = db.get_messages_around("s1", anchor, window=-3)
|
||||
|
||||
assert len(result) == 1
|
||||
assert result[0]["id"] == anchor
|
||||
|
||||
|
||||
def test_decodes_content_like_get_messages(db):
|
||||
"""Content roundtrip should match get_messages's behaviour (no surprises
|
||||
for callers who switch between the two methods)."""
|
||||
ids = _seed_session(db, "s1", 3)
|
||||
anchor = ids[1]
|
||||
|
||||
around = db.get_messages_around("s1", anchor, window=1)
|
||||
full = db.get_messages("s1")
|
||||
|
||||
# Same rows, same content shape
|
||||
assert [m["content"] for m in around] == [m["content"] for m in full]
|
||||
@@ -2494,6 +2494,103 @@ class TestExcludeSources:
|
||||
sources = [r["source"] for r in results]
|
||||
assert sources == ["cli"]
|
||||
|
||||
def test_search_messages_sort_newest_orders_by_timestamp_desc(self, db):
|
||||
"""``sort='newest'`` makes timestamp the primary sort key (DESC) with
|
||||
FTS5 rank as the tiebreaker. With three matching messages at distinct
|
||||
timestamps, results come out newest-first regardless of BM25 score."""
|
||||
db.create_session("old_sid", "cli")
|
||||
db.create_session("mid_sid", "cli")
|
||||
db.create_session("new_sid", "cli")
|
||||
# Same content → identical BM25 score; only timestamps differ.
|
||||
mid_old = db.append_message("old_sid", "user", "matchword discussion")
|
||||
mid_mid = db.append_message("mid_sid", "user", "matchword discussion")
|
||||
mid_new = db.append_message("new_sid", "user", "matchword discussion")
|
||||
# Stamp explicit, well-separated timestamps after the fact.
|
||||
with db._lock:
|
||||
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (mid_old,))
|
||||
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (mid_mid,))
|
||||
db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (mid_new,))
|
||||
db._conn.commit()
|
||||
|
||||
results = db.search_messages("matchword", sort="newest")
|
||||
session_order = [r["session_id"] for r in results]
|
||||
assert session_order == ["new_sid", "mid_sid", "old_sid"], (
|
||||
f"sort=newest must return newest first; got {session_order}"
|
||||
)
|
||||
|
||||
def test_search_messages_sort_oldest_orders_by_timestamp_asc(self, db):
|
||||
"""``sort='oldest'`` is symmetric — earliest matches first. Critical
|
||||
for 'how did X start' questions where rank-only ordering would hide
|
||||
the origin under more recent revisitations."""
|
||||
db.create_session("a", "cli")
|
||||
db.create_session("b", "cli")
|
||||
db.create_session("c", "cli")
|
||||
m_a = db.append_message("a", "user", "matchword")
|
||||
m_b = db.append_message("b", "user", "matchword")
|
||||
m_c = db.append_message("c", "user", "matchword")
|
||||
with db._lock:
|
||||
db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (m_a,))
|
||||
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_b,))
|
||||
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_c,))
|
||||
db._conn.commit()
|
||||
|
||||
results = db.search_messages("matchword", sort="oldest")
|
||||
session_order = [r["session_id"] for r in results]
|
||||
assert session_order == ["b", "c", "a"], (
|
||||
f"sort=oldest must return earliest first; got {session_order}"
|
||||
)
|
||||
|
||||
def test_search_messages_sort_unset_preserves_rank_ordering(self, db):
|
||||
"""No sort param → ``ORDER BY rank`` (FTS5 BM25). With identical
|
||||
single-keyword matches on different-length messages, BM25 prefers
|
||||
the shorter / denser ones — that's the existing default and it must
|
||||
not regress when the new param is omitted."""
|
||||
db.create_session("short_sid", "cli")
|
||||
db.create_session("long_sid", "cli")
|
||||
# Single keyword in a short message scores higher than the same
|
||||
# keyword buried in a much longer one (BM25 length normalisation).
|
||||
m_short = db.append_message("short_sid", "user", "matchword.")
|
||||
m_long = db.append_message(
|
||||
"long_sid", "user", "matchword " + ("padding " * 200)
|
||||
)
|
||||
# Older = short_sid so we can confirm rank wins, not recency.
|
||||
with db._lock:
|
||||
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
|
||||
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
|
||||
db._conn.commit()
|
||||
|
||||
results = db.search_messages("matchword") # sort omitted
|
||||
assert len(results) == 2
|
||||
# BM25 should rank the short message first despite being older.
|
||||
assert results[0]["session_id"] == "short_sid", (
|
||||
"Default (no sort) must use FTS5 rank — short_sid should outrank "
|
||||
f"the longer message. Got order: {[r['session_id'] for r in results]}"
|
||||
)
|
||||
|
||||
def test_search_messages_sort_invalid_value_falls_back_to_rank(self, db):
|
||||
"""Passing a value outside the allowed set (e.g. 'sideways') silently
|
||||
falls back to FTS5 rank-only ordering rather than raising. Same
|
||||
forgiveness as the tool-layer normalisation, in case callers reach
|
||||
SessionDB directly."""
|
||||
db.create_session("short_sid", "cli")
|
||||
db.create_session("long_sid", "cli")
|
||||
m_short = db.append_message("short_sid", "user", "matchword.")
|
||||
m_long = db.append_message(
|
||||
"long_sid", "user", "matchword " + ("padding " * 200)
|
||||
)
|
||||
with db._lock:
|
||||
db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
|
||||
db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
|
||||
db._conn.commit()
|
||||
|
||||
# Garbage sort should behave the same as no sort.
|
||||
results_default = db.search_messages("matchword")
|
||||
results_garbage = db.search_messages("matchword", sort="sideways")
|
||||
assert (
|
||||
[r["session_id"] for r in results_default]
|
||||
== [r["session_id"] for r in results_garbage]
|
||||
)
|
||||
|
||||
|
||||
class TestResolveSessionByNameOrId:
|
||||
"""Tests for the main.py helper that resolves names or IDs."""
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -2,19 +2,16 @@
|
||||
"""
|
||||
Session Search Tool - Long-Term Conversation Recall
|
||||
|
||||
Searches past session transcripts in SQLite via FTS5, then summarizes the top
|
||||
matching sessions using the configured auxiliary session_search model (same
|
||||
pattern as web_extract). By default, auxiliary "auto" routing uses the main
|
||||
chat provider/model unless the user overrides auxiliary.session_search.
|
||||
Returns focused summaries of past conversations rather than raw transcripts,
|
||||
keeping the main model's context window clean.
|
||||
Searches past session transcripts in SQLite via FTS5. Keyword search defaults
|
||||
to fast snippet/context hits without any LLM call; callers can opt into focused
|
||||
LLM summaries with mode="summary" when deeper recall is worth the latency.
|
||||
|
||||
Flow:
|
||||
1. FTS5 search finds matching messages ranked by relevance
|
||||
2. Groups by session, takes the top N unique sessions (default 3)
|
||||
3. Loads each session's conversation, truncates to ~100k chars centered on matches
|
||||
4. Sends to the configured auxiliary model with a focused summarization prompt
|
||||
5. Returns per-session summaries with metadata
|
||||
3. Fast mode returns snippets and nearby context immediately
|
||||
4. Summary mode loads each session, truncates around matches, and calls an LLM
|
||||
5. Returns per-session hits/summaries with metadata
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
@@ -26,6 +23,62 @@ from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
|
||||
MAX_SESSION_CHARS = 100_000
|
||||
|
||||
|
||||
# Default mode is fast unless the user sets ``auxiliary.session_search.default_mode``
|
||||
# in ~/.hermes/config.yaml. Only ``fast`` and ``summary`` are valid — guided
|
||||
# requires anchors. Resolver is lru_cache-wrapped so the YAML read happens at
|
||||
# most once per process; restart to pick up config changes.
|
||||
_VALID_DEFAULT_MODES = ("fast", "summary")
|
||||
_FALLBACK_DEFAULT_MODE = "fast"
|
||||
|
||||
|
||||
def _resolve_user_default_mode() -> str:
|
||||
"""Look up ``auxiliary.session_search.default_mode`` from ~/.hermes/config.yaml.
|
||||
|
||||
Returns ``_FALLBACK_DEFAULT_MODE`` (``"fast"``) if unset, invalid, or the
|
||||
config loader is unavailable (e.g. tests, tools loaded outside the CLI).
|
||||
Logs a one-time warning on invalid values so users get feedback when they
|
||||
typo their config.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
config = load_config() or {}
|
||||
except ImportError:
|
||||
logging.debug("hermes_cli.config not available; default_mode falls back to %r", _FALLBACK_DEFAULT_MODE)
|
||||
return _FALLBACK_DEFAULT_MODE
|
||||
except Exception as e:
|
||||
logging.debug("Failed to load config for session_search default_mode: %s", e, exc_info=True)
|
||||
return _FALLBACK_DEFAULT_MODE
|
||||
|
||||
raw = (
|
||||
config.get("auxiliary", {})
|
||||
.get("session_search", {})
|
||||
.get("default_mode")
|
||||
)
|
||||
if raw is None:
|
||||
return _FALLBACK_DEFAULT_MODE
|
||||
if not isinstance(raw, str):
|
||||
logging.warning(
|
||||
"auxiliary.session_search.default_mode in config.yaml must be a string, got %r — falling back to %r",
|
||||
raw, _FALLBACK_DEFAULT_MODE,
|
||||
)
|
||||
return _FALLBACK_DEFAULT_MODE
|
||||
normalised = raw.strip().lower()
|
||||
if normalised not in _VALID_DEFAULT_MODES:
|
||||
logging.warning(
|
||||
"auxiliary.session_search.default_mode=%r is not one of %s — falling back to %r. "
|
||||
"(guided requires anchors and cannot be a default.)",
|
||||
raw, _VALID_DEFAULT_MODES, _FALLBACK_DEFAULT_MODE,
|
||||
)
|
||||
return _FALLBACK_DEFAULT_MODE
|
||||
return normalised
|
||||
|
||||
|
||||
# Process-level cache so repeated session_search calls don't re-read YAML.
|
||||
# Cleared by tests via _resolve_user_default_mode.cache_clear() when needed.
|
||||
import functools # noqa: E402 — local to the cache wrap
|
||||
_resolve_user_default_mode = functools.lru_cache(maxsize=1)(_resolve_user_default_mode)
|
||||
MAX_SUMMARY_TOKENS = 10000
|
||||
|
||||
|
||||
@@ -197,8 +250,16 @@ def _truncate_around_matches(
|
||||
|
||||
async def _summarize_session(
|
||||
conversation_text: str, query: str, session_meta: Dict[str, Any]
|
||||
) -> Optional[str]:
|
||||
"""Summarize a single session conversation focused on the search query."""
|
||||
) -> tuple[Optional[str], Optional[Dict[str, Any]]]:
|
||||
"""Summarize a single session conversation focused on the search query.
|
||||
|
||||
Returns ``(content, usage)`` where ``usage`` is a dict with
|
||||
``{model, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens}``
|
||||
parsed from the aux LLM response, or ``None`` when the model didn't surface
|
||||
usage data. The usage dict lets callers attribute the cost of summary-mode
|
||||
aux calls back to the parent session — without this, summary-mode spend is
|
||||
invisible to per-session accounting.
|
||||
"""
|
||||
system_prompt = (
|
||||
"You are reviewing a past conversation transcript to help recall what happened. "
|
||||
"Summarize the conversation with a focus on the search topic. Include:\n"
|
||||
@@ -235,17 +296,18 @@ async def _summarize_session(
|
||||
max_tokens=MAX_SUMMARY_TOKENS,
|
||||
)
|
||||
content = extract_content_or_reasoning(response)
|
||||
usage = _extract_aux_usage(response)
|
||||
if content:
|
||||
return content
|
||||
return content, usage
|
||||
# Reasoning-only / empty — let the retry loop handle it
|
||||
logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(1 * (attempt + 1))
|
||||
continue
|
||||
return content
|
||||
return content, usage
|
||||
except RuntimeError:
|
||||
logging.warning("No auxiliary model available for session summarization")
|
||||
return None
|
||||
return None, None
|
||||
except Exception as e:
|
||||
if attempt < max_retries - 1:
|
||||
await asyncio.sleep(1 * (attempt + 1))
|
||||
@@ -256,7 +318,48 @@ async def _summarize_session(
|
||||
e,
|
||||
exc_info=True,
|
||||
)
|
||||
return None
|
||||
return None, None
|
||||
|
||||
|
||||
def _extract_aux_usage(response: Any) -> Optional[Dict[str, Any]]:
|
||||
"""Pull usage data off an aux LLM response, normalising provider variants.
|
||||
|
||||
Returns ``None`` when the response carries no usage info (test mocks,
|
||||
providers that don't surface it). Returns a dict with the fields we care
|
||||
about for cost attribution otherwise. Reads both OpenAI-style
|
||||
(``prompt_tokens``/``completion_tokens``) and Anthropic-style
|
||||
(``input_tokens``/``output_tokens``) usage shapes.
|
||||
"""
|
||||
usage = getattr(response, "usage", None)
|
||||
if not usage:
|
||||
return None
|
||||
# Provider variants — read whichever is populated.
|
||||
input_tokens = (
|
||||
getattr(usage, "input_tokens", None)
|
||||
or getattr(usage, "prompt_tokens", None)
|
||||
or 0
|
||||
)
|
||||
output_tokens = (
|
||||
getattr(usage, "output_tokens", None)
|
||||
or getattr(usage, "completion_tokens", None)
|
||||
or 0
|
||||
)
|
||||
# Anthropic prompt-caching fields.
|
||||
cache_read = getattr(usage, "cache_read_input_tokens", None) or 0
|
||||
cache_create = getattr(usage, "cache_creation_input_tokens", None) or 0
|
||||
# OpenAI-style cached tokens may live under prompt_tokens_details.
|
||||
if not cache_read:
|
||||
details = getattr(usage, "prompt_tokens_details", None)
|
||||
if details:
|
||||
cache_read = getattr(details, "cached_tokens", 0) or 0
|
||||
model = getattr(response, "model", None)
|
||||
return {
|
||||
"model": model,
|
||||
"input_tokens": int(input_tokens or 0),
|
||||
"output_tokens": int(output_tokens or 0),
|
||||
"cache_read_tokens": int(cache_read or 0),
|
||||
"cache_creation_tokens": int(cache_create or 0),
|
||||
}
|
||||
|
||||
|
||||
# Sources that are excluded from session browsing/searching by default.
|
||||
@@ -322,19 +425,380 @@ def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str
|
||||
return tool_error(f"Failed to list recent sessions: {e}", success=False)
|
||||
|
||||
|
||||
def _guided_drill_down(
|
||||
db,
|
||||
session_id: str,
|
||||
around_message_id,
|
||||
window: int,
|
||||
current_session_id: str = None,
|
||||
anchors: Optional[List[Dict[str, Any]]] = None,
|
||||
) -> str:
|
||||
"""Anchored drill-down for ``mode='guided'`` of ``session_search``.
|
||||
|
||||
Returns a JSON string carrying one or more windows of messages — each
|
||||
centred on a specific message id in a specific session. No FTS5, no
|
||||
auxiliary LLM, no 100k-char truncation — N indexed DB lookups (where
|
||||
N = number of anchors).
|
||||
|
||||
Two input shapes (use one):
|
||||
|
||||
* **Single anchor** (back-compat): pass ``session_id`` and
|
||||
``around_message_id`` directly. Internally normalised to a single-
|
||||
element ``anchors`` list. Response always carries ``windows``
|
||||
as a list, plus the legacy single-anchor fields at the top level
|
||||
when there's exactly one anchor.
|
||||
|
||||
* **Multi-anchor**: pass ``anchors=[{"session_id":..., "around_message_id":...}, ...]``.
|
||||
The agent picks the most promising K hits from a wider fast call
|
||||
and drills into all of them at once — same conversation in the
|
||||
steering loop, more context per turn.
|
||||
|
||||
Each anchor is validated independently. Per-anchor failures (missing
|
||||
session, anchor not in session, current-lineage rejection) become
|
||||
error entries inside the response's ``windows`` list rather than
|
||||
aborting the whole call. ``window`` is shared across all anchors
|
||||
and clamped to ``[1, 20]`` (silent, matches the existing limit-clamp
|
||||
pattern).
|
||||
"""
|
||||
# 1. Normalise inputs into a single ``anchors`` list. Three shapes:
|
||||
# (a) anchors= parameter is set (preferred for multi-anchor)
|
||||
# (b) session_id + around_message_id (single-anchor back-compat)
|
||||
# (c) neither set → user-facing error
|
||||
if anchors:
|
||||
if not isinstance(anchors, list):
|
||||
return tool_error(
|
||||
"guided mode: 'anchors' must be a list of {session_id, around_message_id} dicts",
|
||||
success=False,
|
||||
)
|
||||
normalised_anchors = anchors
|
||||
elif session_id or around_message_id is not None:
|
||||
normalised_anchors = [{
|
||||
"session_id": session_id,
|
||||
"around_message_id": around_message_id,
|
||||
}]
|
||||
else:
|
||||
return tool_error(
|
||||
"guided mode requires either anchors=[...] or session_id+around_message_id "
|
||||
"(use match_message_id+session_id from a prior fast-mode hit)",
|
||||
success=False,
|
||||
)
|
||||
|
||||
if len(normalised_anchors) == 0:
|
||||
return tool_error(
|
||||
"guided mode: anchors list is empty (pass at least one {session_id, around_message_id})",
|
||||
success=False,
|
||||
)
|
||||
|
||||
# 2. Window clamp (shared across all anchors). Matches the existing
|
||||
# limit-clamp pattern (silent).
|
||||
if not isinstance(window, int):
|
||||
try:
|
||||
window = int(window)
|
||||
except (TypeError, ValueError):
|
||||
window = 5
|
||||
window = max(1, min(window, 20))
|
||||
|
||||
# 3. Helper: resolve to lineage root (used by the current-lineage
|
||||
# rejection check below).
|
||||
def _resolve_to_parent(sid: str) -> str:
|
||||
visited = set()
|
||||
cur = sid
|
||||
while cur and cur not in visited:
|
||||
visited.add(cur)
|
||||
try:
|
||||
meta = db.get_session(cur)
|
||||
if not meta:
|
||||
break
|
||||
parent = meta.get("parent_session_id")
|
||||
if parent:
|
||||
cur = parent
|
||||
else:
|
||||
break
|
||||
except Exception as e:
|
||||
logging.debug("Error resolving parent for %s: %s", cur, e, exc_info=True)
|
||||
break
|
||||
return cur
|
||||
|
||||
current_root = _resolve_to_parent(current_session_id) if current_session_id else None
|
||||
|
||||
# 4. Drill into each anchor. Per-anchor errors are recorded inline
|
||||
# rather than aborting the whole call — the agent can still use
|
||||
# successful drills even if one anchor was malformed.
|
||||
windows_out: List[Dict[str, Any]] = []
|
||||
for raw_anchor in normalised_anchors:
|
||||
if not isinstance(raw_anchor, dict):
|
||||
windows_out.append({
|
||||
"success": False,
|
||||
"error": "anchor must be a dict with session_id + around_message_id",
|
||||
})
|
||||
continue
|
||||
|
||||
a_sid = raw_anchor.get("session_id")
|
||||
a_msg = raw_anchor.get("around_message_id")
|
||||
|
||||
if not a_sid or not isinstance(a_sid, str) or not a_sid.strip():
|
||||
windows_out.append({
|
||||
"success": False,
|
||||
"error": "anchor missing session_id",
|
||||
"anchor": raw_anchor,
|
||||
})
|
||||
continue
|
||||
a_sid = a_sid.strip()
|
||||
|
||||
try:
|
||||
a_msg_id = int(a_msg)
|
||||
except (TypeError, ValueError):
|
||||
windows_out.append({
|
||||
"success": False,
|
||||
"error": "anchor missing or non-integer around_message_id",
|
||||
"anchor": raw_anchor,
|
||||
})
|
||||
continue
|
||||
|
||||
# Current-lineage rejection: per-anchor, so other valid anchors
|
||||
# in a multi-anchor call still drill.
|
||||
if current_root:
|
||||
target_root = _resolve_to_parent(a_sid)
|
||||
if target_root and target_root == current_root:
|
||||
windows_out.append({
|
||||
"success": False,
|
||||
"error": "anchor rejects drill-down into the current session lineage — those messages are already in your active context",
|
||||
"session_id": a_sid,
|
||||
"around_message_id": a_msg_id,
|
||||
})
|
||||
continue
|
||||
|
||||
# Session existence check.
|
||||
try:
|
||||
session_meta = db.get_session(a_sid) or {}
|
||||
except Exception as e:
|
||||
logging.debug("get_session failed for %s: %s", a_sid, e, exc_info=True)
|
||||
session_meta = {}
|
||||
if not session_meta:
|
||||
windows_out.append({
|
||||
"success": False,
|
||||
"error": f"session_id not found: {a_sid}",
|
||||
"session_id": a_sid,
|
||||
"around_message_id": a_msg_id,
|
||||
})
|
||||
continue
|
||||
|
||||
# Fetch the window + bookends. ``get_anchored_view`` filters tool-response
|
||||
# noise from the window (anchor itself is preserved regardless of role)
|
||||
# and returns up to ``bookend`` user/assistant messages from the session
|
||||
# head and tail — but only when those slices don't overlap the window.
|
||||
# See SessionDB.get_anchored_view for the contract.
|
||||
try:
|
||||
view = db.get_anchored_view(a_sid, a_msg_id, window=window, bookend=3)
|
||||
messages = view.get("window") or []
|
||||
bookend_start = view.get("bookend_start") or []
|
||||
bookend_end = view.get("bookend_end") or []
|
||||
except Exception as e:
|
||||
logging.debug("get_anchored_view failed: %s", e, exc_info=True)
|
||||
windows_out.append({
|
||||
"success": False,
|
||||
"error": f"failed to load messages around {a_msg_id} in {a_sid}: {e}",
|
||||
"session_id": a_sid,
|
||||
"around_message_id": a_msg_id,
|
||||
})
|
||||
continue
|
||||
|
||||
# Safety net: the agent (or memory, or a legacy caller) may pair a
|
||||
# parent/lineage-root session_id with a message_id that actually
|
||||
# lives in a descendant (child) session. Before this commit, fast
|
||||
# mode returned exactly that broken pair. We now emit the matching
|
||||
# raw sid in fast mode, but guided should remain forgiving for
|
||||
# callers that haven't updated yet.
|
||||
#
|
||||
# Recovery rule: locate the real owning session by message id; if
|
||||
# that session is in the same lineage as ``a_sid``, transparently
|
||||
# rebind and refetch. Record a warning so the rebind is visible.
|
||||
rebind_warning = None
|
||||
if not messages:
|
||||
owning = None
|
||||
# Prefer a helper if SessionDB exposes one (forward-compat).
|
||||
try:
|
||||
if hasattr(db, "get_session_id_for_message"):
|
||||
owning = db.get_session_id_for_message(a_msg_id)
|
||||
except Exception as e:
|
||||
logging.debug("get_session_id_for_message failed: %s", e, exc_info=True)
|
||||
owning = None
|
||||
# Fallback: query through SessionDB._conn (the canonical connection).
|
||||
if not owning:
|
||||
try:
|
||||
conn = getattr(db, "_conn", None)
|
||||
if conn is not None:
|
||||
row = conn.execute(
|
||||
"SELECT session_id FROM messages WHERE id = ?",
|
||||
(a_msg_id,),
|
||||
).fetchone()
|
||||
# sqlite3.Row supports indexing; tuple fallback works too.
|
||||
owning = row[0] if row else None
|
||||
except Exception as e:
|
||||
logging.debug("owning-session lookup failed: %s", e, exc_info=True)
|
||||
owning = None
|
||||
|
||||
if owning and owning != a_sid:
|
||||
# Check same lineage (walk both up to roots).
|
||||
a_root = _resolve_to_parent(a_sid)
|
||||
o_root = _resolve_to_parent(owning)
|
||||
if a_root and o_root and a_root == o_root:
|
||||
try:
|
||||
rebind_view = db.get_anchored_view(
|
||||
owning, a_msg_id, window=window, bookend=3
|
||||
)
|
||||
messages = rebind_view.get("window") or []
|
||||
bookend_start = rebind_view.get("bookend_start") or []
|
||||
bookend_end = rebind_view.get("bookend_end") or []
|
||||
except Exception as e:
|
||||
logging.debug("rebind get_anchored_view failed: %s", e, exc_info=True)
|
||||
messages = []
|
||||
if messages:
|
||||
rebind_warning = (
|
||||
f"around_message_id {a_msg_id} lives in {owning} "
|
||||
f"(child of {a_sid}); rebound transparently"
|
||||
)
|
||||
# Re-fetch session_meta for the actual owning session.
|
||||
try:
|
||||
session_meta = db.get_session(owning) or session_meta
|
||||
except Exception:
|
||||
pass
|
||||
a_sid = owning
|
||||
|
||||
if not messages:
|
||||
windows_out.append({
|
||||
"success": False,
|
||||
"error": f"around_message_id {a_msg_id} not in session_id {a_sid}",
|
||||
"session_id": a_sid,
|
||||
"around_message_id": a_msg_id,
|
||||
})
|
||||
continue
|
||||
|
||||
# Wrap with anchor flag + boundary counts.
|
||||
out_messages = []
|
||||
messages_before = 0
|
||||
messages_after = 0
|
||||
for m in messages:
|
||||
is_anchor = m.get("id") == a_msg_id
|
||||
if not is_anchor and m.get("id", 0) < a_msg_id:
|
||||
messages_before += 1
|
||||
elif not is_anchor:
|
||||
messages_after += 1
|
||||
entry = {
|
||||
"id": m.get("id"),
|
||||
"role": m.get("role"),
|
||||
"content": m.get("content"),
|
||||
"tool_name": m.get("tool_name"),
|
||||
"tool_calls": m.get("tool_calls") or None,
|
||||
"tool_call_id": m.get("tool_call_id"),
|
||||
"timestamp": m.get("timestamp"),
|
||||
}
|
||||
if is_anchor:
|
||||
entry["anchor"] = True
|
||||
# Strip None-valued optional fields to keep payload tight (keep
|
||||
# 'content' even if None, since absent-content is meaningful).
|
||||
entry = {k: v for k, v in entry.items() if v is not None or k in ("content",)}
|
||||
out_messages.append(entry)
|
||||
|
||||
def _shape_bookend(m: Dict[str, Any]) -> Dict[str, Any]:
|
||||
entry = {
|
||||
"id": m.get("id"),
|
||||
"role": m.get("role"),
|
||||
"content": m.get("content"),
|
||||
"timestamp": m.get("timestamp"),
|
||||
}
|
||||
return {k: v for k, v in entry.items() if v is not None or k in ("content",)}
|
||||
|
||||
out_bookend_start = [_shape_bookend(m) for m in bookend_start]
|
||||
out_bookend_end = [_shape_bookend(m) for m in bookend_end]
|
||||
|
||||
success_entry = {
|
||||
"success": True,
|
||||
"session_id": a_sid,
|
||||
"around_message_id": a_msg_id,
|
||||
"session_meta": {
|
||||
"when": _format_timestamp(session_meta.get("started_at")),
|
||||
"source": session_meta.get("source"),
|
||||
"model": session_meta.get("model"),
|
||||
"title": session_meta.get("title"),
|
||||
},
|
||||
"messages": out_messages,
|
||||
"messages_before": messages_before,
|
||||
"messages_after": messages_after,
|
||||
"bookend_start": out_bookend_start,
|
||||
"bookend_end": out_bookend_end,
|
||||
}
|
||||
if rebind_warning:
|
||||
success_entry["warning"] = rebind_warning
|
||||
windows_out.append(success_entry)
|
||||
|
||||
# 5. Top-level response shape. ``windows`` is always a list. For
|
||||
# single-anchor calls (the common case), we mirror the legacy fields
|
||||
# at the top level so existing callers / tests continue to work
|
||||
# without branching on len(windows).
|
||||
response: Dict[str, Any] = {
|
||||
"success": True,
|
||||
"mode": "guided",
|
||||
"window": window,
|
||||
"windows": windows_out,
|
||||
"anchor_count": len(windows_out),
|
||||
}
|
||||
if len(windows_out) == 1:
|
||||
only = windows_out[0]
|
||||
if only.get("success"):
|
||||
response.update({
|
||||
"session_id": only["session_id"],
|
||||
"around_message_id": only["around_message_id"],
|
||||
"session_meta": only["session_meta"],
|
||||
"messages": only["messages"],
|
||||
"messages_before": only["messages_before"],
|
||||
"messages_after": only["messages_after"],
|
||||
"bookend_start": only.get("bookend_start", []),
|
||||
"bookend_end": only.get("bookend_end", []),
|
||||
})
|
||||
if only.get("warning"):
|
||||
response["warning"] = only["warning"]
|
||||
else:
|
||||
# Single-anchor failure: surface as a top-level tool_error so
|
||||
# callers don't have to dig into the windows array for the
|
||||
# error string. Keeps the legacy single-anchor failure shape.
|
||||
return tool_error(only.get("error", "guided drill-down failed"), success=False)
|
||||
|
||||
return json.dumps(response, ensure_ascii=False)
|
||||
|
||||
|
||||
def session_search(
|
||||
query: str,
|
||||
query: str = "",
|
||||
role_filter: str = None,
|
||||
limit: int = 3,
|
||||
db=None,
|
||||
current_session_id: str = None,
|
||||
mode: str = None,
|
||||
# Guided-mode-only parameters: anchored drill-down into one or more
|
||||
# session+message pairs. Required when mode='guided', ignored otherwise.
|
||||
# Use either the single-anchor pair (session_id + around_message_id) or
|
||||
# the multi-anchor list (anchors=[{session_id, around_message_id}, ...]).
|
||||
session_id: str = None,
|
||||
around_message_id: int = None,
|
||||
window: int = 5,
|
||||
anchors: list = None,
|
||||
# Fast-mode-only temporal bias for ranking. ``None`` keeps FTS5's BM25
|
||||
# ordering (time-neutral); ``"newest"`` / ``"oldest"`` make timestamp
|
||||
# the primary key with rank as the tiebreaker. Silently ignored in
|
||||
# other modes — see schema description.
|
||||
sort: str = None,
|
||||
) -> str:
|
||||
"""
|
||||
Search past sessions and return focused summaries of matching conversations.
|
||||
Search past sessions, or drill into a specific one.
|
||||
|
||||
Uses FTS5 to find matches, then summarizes the top sessions with the
|
||||
configured auxiliary session_search model.
|
||||
The current session is excluded from results since the agent already has that context.
|
||||
Modes:
|
||||
* fast — FTS5 snippets + ±1 message context. Cheap discovery.
|
||||
* summary — fetch full session(s), truncate to 100k chars, run aux LLM
|
||||
recap. Cross-session synthesis at ~30s tool-side cost.
|
||||
* guided — anchored drill-down. Caller supplies session_id +
|
||||
around_message_id (typically from a prior fast hit's
|
||||
match_message_id field) and gets a window of messages
|
||||
around the anchor with no LLM call and no truncation.
|
||||
"""
|
||||
if db is None:
|
||||
try:
|
||||
@@ -346,6 +810,52 @@ def session_search(
|
||||
from hermes_state import format_session_db_unavailable
|
||||
return tool_error(format_session_db_unavailable(), success=False)
|
||||
|
||||
# Mode normalisation. ``None`` / empty string / non-string → fall back to
|
||||
# the user's configured default (via ~/.hermes/config.yaml, see
|
||||
# ``_resolve_user_default_mode``). Defaults to "fast" if unset. An explicit
|
||||
# "fast" / "summary" / "guided" wins regardless of config. An unknown
|
||||
# string also falls back to the resolved user default rather than silently
|
||||
# coercing to a hard-coded mode — silent coercion of typos would otherwise
|
||||
# mask user errors.
|
||||
if not isinstance(mode, str) or not mode.strip():
|
||||
mode = _resolve_user_default_mode()
|
||||
else:
|
||||
mode = mode.strip().lower()
|
||||
if mode in ("summarized", "summarise", "summarize", "deep"):
|
||||
mode = "summary"
|
||||
if mode in ("drill", "drilldown", "drill-down", "anchor", "around"):
|
||||
mode = "guided"
|
||||
if mode not in ("fast", "summary", "guided"):
|
||||
mode = _resolve_user_default_mode()
|
||||
|
||||
# Normalise sort — only "newest"/"oldest" are accepted; anything else
|
||||
# collapses to None (FTS5 rank-only). Sort affects fast mode only; logged
|
||||
# and ignored elsewhere so misuse is visible but non-fatal.
|
||||
sort_norm: Optional[str] = None
|
||||
if isinstance(sort, str):
|
||||
candidate = sort.strip().lower()
|
||||
if candidate in ("newest", "oldest"):
|
||||
sort_norm = candidate
|
||||
if sort_norm and mode != "fast":
|
||||
logging.debug(
|
||||
"session_search: sort=%r is fast-mode only; ignored for mode=%s",
|
||||
sort_norm, mode,
|
||||
)
|
||||
sort_norm = None
|
||||
|
||||
# Guided mode is a different shape: it doesn't search, it drills. Branch
|
||||
# before FTS5 so we don't pay for anything we don't use, and so missing-arg
|
||||
# validation happens up front.
|
||||
if mode == "guided":
|
||||
return _guided_drill_down(
|
||||
db=db,
|
||||
session_id=session_id,
|
||||
around_message_id=around_message_id,
|
||||
window=window,
|
||||
current_session_id=current_session_id,
|
||||
anchors=anchors,
|
||||
)
|
||||
|
||||
# Defensive: models (especially open-source) may send non-int limit values
|
||||
# (None when JSON null, string "int", or even a type object). Coerce to a
|
||||
# safe integer before any arithmetic/comparison to prevent TypeError.
|
||||
@@ -354,7 +864,7 @@ def session_search(
|
||||
limit = int(limit)
|
||||
except (TypeError, ValueError):
|
||||
limit = 3
|
||||
limit = max(1, min(limit, 5)) # Clamp to [1, 5]
|
||||
limit = max(1, min(limit, 10)) # Clamp to [1, 10]
|
||||
|
||||
# Recent sessions mode: when query is empty, return metadata for recent sessions.
|
||||
# No LLM calls — just DB queries for titles, previews, timestamps.
|
||||
@@ -364,23 +874,30 @@ def session_search(
|
||||
query = query.strip()
|
||||
|
||||
try:
|
||||
# Parse role filter
|
||||
# Parse role filter. Defaults to user+assistant; tool messages are
|
||||
# usually noisy and rarely the signal. Caller opts back in via
|
||||
# role_filter='user,assistant,tool' or 'tool'.
|
||||
role_list = None
|
||||
if role_filter and role_filter.strip():
|
||||
role_list = [r.strip() for r in role_filter.split(",") if r.strip()]
|
||||
else:
|
||||
role_list = ["user", "assistant"]
|
||||
|
||||
# FTS5 search -- get matches ranked by relevance
|
||||
# FTS5 search -- get matches ranked by relevance (with optional
|
||||
# temporal bias when sort is set; see param docs).
|
||||
raw_results = db.search_messages(
|
||||
query=query,
|
||||
role_filter=role_list,
|
||||
exclude_sources=list(_HIDDEN_SESSION_SOURCES),
|
||||
limit=50, # Get more matches to find unique sessions
|
||||
offset=0,
|
||||
sort=sort_norm,
|
||||
)
|
||||
|
||||
if not raw_results:
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"mode": mode,
|
||||
"query": query,
|
||||
"results": [],
|
||||
"count": 0,
|
||||
@@ -421,6 +938,13 @@ def session_search(
|
||||
# Group by resolved (parent) session_id, dedup, skip the current
|
||||
# session lineage. Compression and delegation create child sessions
|
||||
# that still belong to the same active conversation.
|
||||
#
|
||||
# IMPORTANT: group BY parent (one entry per conversation lineage), but
|
||||
# preserve the raw FTS5 session_id on the surviving result. Only the
|
||||
# raw sid pairs validly with ``match_message_id``; rewriting it to the
|
||||
# parent produces a {parent_sid, child_message_id} handle that guided
|
||||
# mode cannot resolve. ``parent_session_id`` is exposed separately for
|
||||
# the lineage-root link the user expects to see.
|
||||
seen_sessions = {}
|
||||
for result in raw_results:
|
||||
raw_sid = result["session_id"]
|
||||
@@ -433,11 +957,61 @@ def session_search(
|
||||
continue
|
||||
if resolved_sid not in seen_sessions:
|
||||
result = dict(result)
|
||||
result["session_id"] = resolved_sid
|
||||
# Keep raw_sid as session_id; expose lineage root separately.
|
||||
result["session_id"] = raw_sid
|
||||
if resolved_sid and resolved_sid != raw_sid:
|
||||
result["parent_session_id"] = resolved_sid
|
||||
seen_sessions[resolved_sid] = result
|
||||
if len(seen_sessions) >= limit:
|
||||
break
|
||||
|
||||
if mode == "fast":
|
||||
results = []
|
||||
for lineage_root, match_info in seen_sessions.items():
|
||||
# Emit (raw_sid + match_message_id) so the agent's follow-up
|
||||
# guided call has a valid {session_id, around_message_id}.
|
||||
# ``parent_session_id`` (if different) carries the lineage root.
|
||||
hit_sid = match_info.get("session_id") or lineage_root
|
||||
try:
|
||||
session_meta = db.get_session(lineage_root) or {}
|
||||
except Exception:
|
||||
session_meta = {}
|
||||
snippet = match_info.get("snippet") or ""
|
||||
context = match_info.get("context") or []
|
||||
if not isinstance(context, list):
|
||||
context = []
|
||||
entry = {
|
||||
"session_id": hit_sid,
|
||||
"when": _format_timestamp(
|
||||
session_meta.get("started_at") or match_info.get("session_started")
|
||||
),
|
||||
"source": session_meta.get("source") or match_info.get("source", "unknown"),
|
||||
"model": session_meta.get("model") or match_info.get("model") or "unknown",
|
||||
"matched_role": match_info.get("role"),
|
||||
"match_message_id": match_info.get("id"),
|
||||
"title": session_meta.get("title") or None,
|
||||
"snippet": snippet,
|
||||
"context": context,
|
||||
"summary": "[Search hit — summary not generated in fast mode] Use snippet/context fields, or set mode='summary' for LLM-generated recall.",
|
||||
}
|
||||
# Only emit parent_session_id when the FTS5 row lives in a
|
||||
# child of the displayed lineage — keeps the common case
|
||||
# (no delegation/compression) tidy.
|
||||
parent_sid = match_info.get("parent_session_id")
|
||||
if parent_sid and parent_sid != hit_sid:
|
||||
entry["parent_session_id"] = parent_sid
|
||||
results.append(entry)
|
||||
|
||||
return json.dumps({
|
||||
"success": True,
|
||||
"mode": "fast",
|
||||
"query": query,
|
||||
"results": results,
|
||||
"count": len(results),
|
||||
"sessions_searched": len(seen_sessions),
|
||||
"message": "Fast search returned FTS snippets without LLM summarization. Use mode='summary' for focused summaries when needed.",
|
||||
}, ensure_ascii=False)
|
||||
|
||||
# Prepare all sessions for parallel summarization
|
||||
tasks = []
|
||||
for session_id, match_info in seen_sessions.items():
|
||||
@@ -458,12 +1032,12 @@ def session_search(
|
||||
)
|
||||
|
||||
# Summarize all sessions in parallel
|
||||
async def _summarize_all() -> List[Union[str, Exception]]:
|
||||
async def _summarize_all() -> List[Union[tuple, Exception]]:
|
||||
"""Summarize all sessions with bounded concurrency."""
|
||||
max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
|
||||
semaphore = asyncio.Semaphore(max_concurrency)
|
||||
|
||||
async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]:
|
||||
async def _bounded_summary(text: str, meta: Dict[str, Any]):
|
||||
async with semaphore:
|
||||
return await _summarize_session(text, query, meta)
|
||||
|
||||
@@ -493,13 +1067,27 @@ def session_search(
|
||||
}, ensure_ascii=False)
|
||||
|
||||
summaries = []
|
||||
aux_total = {
|
||||
"model": None,
|
||||
"input_tokens": 0,
|
||||
"output_tokens": 0,
|
||||
"cache_read_tokens": 0,
|
||||
"cache_creation_tokens": 0,
|
||||
"call_count": 0,
|
||||
}
|
||||
for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results):
|
||||
usage: Optional[Dict[str, Any]] = None
|
||||
if isinstance(result, Exception):
|
||||
logging.warning(
|
||||
"Failed to summarize session %s: %s",
|
||||
session_id, result, exc_info=True,
|
||||
)
|
||||
result = None
|
||||
summary_text = None
|
||||
elif isinstance(result, tuple):
|
||||
summary_text, usage = result
|
||||
else:
|
||||
# Defensive: a future code path might still return a bare string.
|
||||
summary_text, usage = result, None
|
||||
|
||||
# Prefer resolved parent session metadata over FTS5 match metadata.
|
||||
# match_info carries source/model from the *child* session that contained
|
||||
@@ -515,23 +1103,39 @@ def session_search(
|
||||
"model": session_meta.get("model") or match_info.get("model"),
|
||||
}
|
||||
|
||||
if result:
|
||||
entry["summary"] = result
|
||||
if summary_text:
|
||||
entry["summary"] = summary_text
|
||||
else:
|
||||
# Fallback: raw preview so matched sessions aren't silently
|
||||
# dropped when the summarizer is unavailable (fixes #3409).
|
||||
preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available."
|
||||
entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}"
|
||||
|
||||
if usage:
|
||||
entry["aux_usage"] = usage
|
||||
aux_total["model"] = aux_total["model"] or usage.get("model")
|
||||
aux_total["input_tokens"] += usage["input_tokens"]
|
||||
aux_total["output_tokens"] += usage["output_tokens"]
|
||||
aux_total["cache_read_tokens"] += usage["cache_read_tokens"]
|
||||
aux_total["cache_creation_tokens"] += usage["cache_creation_tokens"]
|
||||
aux_total["call_count"] += 1
|
||||
|
||||
summaries.append(entry)
|
||||
|
||||
return json.dumps({
|
||||
payload = {
|
||||
"success": True,
|
||||
"mode": "summary",
|
||||
"query": query,
|
||||
"results": summaries,
|
||||
"count": len(summaries),
|
||||
"sessions_searched": len(seen_sessions),
|
||||
}, ensure_ascii=False)
|
||||
}
|
||||
# Only surface aux_usage_total when we actually captured any (test mocks
|
||||
# and providers that don't report usage produce an all-zero/empty dict —
|
||||
# don't pollute the payload in that case).
|
||||
if aux_total["call_count"]:
|
||||
payload["aux_usage_total"] = aux_total
|
||||
return json.dumps(payload, ensure_ascii=False)
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Session search failed: %s", e, exc_info=True)
|
||||
@@ -539,7 +1143,7 @@ def session_search(
|
||||
|
||||
|
||||
def check_session_search_requirements() -> bool:
|
||||
"""Requires SQLite state database and an auxiliary text model."""
|
||||
"""Requires SQLite state database; summary mode also needs an auxiliary model."""
|
||||
try:
|
||||
from hermes_state import DEFAULT_DB_PATH
|
||||
return DEFAULT_DB_PATH.parent.exists()
|
||||
@@ -550,44 +1154,101 @@ def check_session_search_requirements() -> bool:
|
||||
SESSION_SEARCH_SCHEMA = {
|
||||
"name": "session_search",
|
||||
"description": (
|
||||
"Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- "
|
||||
"every past session is searchable, and this tool summarizes what happened.\n\n"
|
||||
"TWO MODES:\n"
|
||||
"1. Recent sessions (no query): Call with no arguments to see what was worked on recently. "
|
||||
"Returns titles, previews, and timestamps. Zero LLM cost, instant. "
|
||||
"Start here when the user asks what were we working on or what did we do recently.\n"
|
||||
"2. Keyword search (with query): Search for specific topics across all past sessions. "
|
||||
"Returns LLM-generated summaries of matching sessions.\n\n"
|
||||
"USE THIS PROACTIVELY when:\n"
|
||||
"- The user says 'we did this before', 'remember when', 'last time', 'as I mentioned'\n"
|
||||
"- The user asks about a topic you worked on before but don't have in current context\n"
|
||||
"- The user references a project, person, or concept that seems familiar but isn't in memory\n"
|
||||
"- You want to check if you've solved a similar problem before\n"
|
||||
"- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
|
||||
"Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
|
||||
"Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
|
||||
"Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
|
||||
"phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
|
||||
"IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
|
||||
"sessions that only mention some terms. If a broad OR query returns nothing, try individual "
|
||||
"keyword searches in parallel. Returns summaries of the top matching sessions."
|
||||
"Search past sessions stored in the local session DB. Three modes plus a default "
|
||||
"browsing mode when no arguments are passed. All three modes operate on the same "
|
||||
"FTS5-indexed message store; they differ in what they return and at what cost.\n\n"
|
||||
"MODES\n\n"
|
||||
" • mode='fast' — FTS5 snippets across matched sessions. No LLM call. Returns one "
|
||||
"entry per matched session with session_id, match_message_id, a one-message context "
|
||||
"window, and metadata. Use this as the starting move for any recall question — "
|
||||
"discovery and state reconstruction both. The match_message_id is the anchor you "
|
||||
"pass to guided.\n\n"
|
||||
" • mode='guided' — REQUIRES anchors from a prior fast call. Returns a window of "
|
||||
"raw messages around each anchor plus session bookends (bookend_start, bookend_end). "
|
||||
"No LLM call, no truncation. Single or multi-anchor: pass "
|
||||
"``anchors=[{session_id, around_message_id}, ...]``. Each anchor returns its own "
|
||||
"window in the response's ``windows`` array. Bookends are the first/last "
|
||||
"user+assistant messages of the session, empty when the window already overlaps "
|
||||
"the session head/tail. Tool messages are filtered from the window (the anchor "
|
||||
"itself is preserved even if role='tool').\n\n"
|
||||
" • mode='summary' — LLM-generated prose synthesis across matched sessions. Issues "
|
||||
"one auxiliary-model call per session in the hit list, so cost scales with whatever "
|
||||
"auxiliary model (or main model fallback) is configured. Returns aux token usage in "
|
||||
"the response (``aux_usage`` per call, ``aux_usage_total`` per batch). Reach for "
|
||||
"this when you genuinely need cross-session prose synthesis in one shot.\n\n"
|
||||
" • No query, no mode — browses recent sessions chronologically. Returns titles, "
|
||||
"previews, timestamps. No LLM call.\n\n"
|
||||
"DEFAULT MODE\n\n"
|
||||
" When ``mode=`` is unset, the resolver checks ``auxiliary.session_search.default_mode`` "
|
||||
"in ~/.hermes/config.yaml (accepted values: ``fast`` | ``summary``). If the user "
|
||||
"has set a default, honour it on the first call. With no config, the default is "
|
||||
"``fast``. An explicit ``mode=`` argument always wins.\n\n"
|
||||
"ANCHOR CONTRACT\n\n"
|
||||
" An anchor is the pair (session_id, around_message_id). The session_id MUST be "
|
||||
"the raw owning session of around_message_id — guided rejects anchors where the "
|
||||
"message_id does not exist in the named session. Fast results return both "
|
||||
"session_id (raw owning) and parent_session_id (when different, for display "
|
||||
"context only). Pair session_id with match_message_id from the same fast hit; do "
|
||||
"not substitute parent_session_id.\n\n"
|
||||
"FTS5 SYNTAX\n\n"
|
||||
" FTS5 defaults to AND across terms — multi-word queries require all terms to "
|
||||
"match. Use OR explicitly for broader recall (``alpha OR beta OR gamma``), quoted "
|
||||
"phrases for exact match (``\"docker networking\"``), boolean (``python NOT java``), "
|
||||
"or prefix wildcards (``deploy*``).\n\n"
|
||||
"WHEN TO USE\n\n"
|
||||
" Before reaching for ``gh``, web search, or filesystem inspection on questions "
|
||||
"about prior work — what was discussed, what was decided, where an artefact was "
|
||||
"created. The session DB carries what was said when; external tools show current "
|
||||
"world state."
|
||||
),
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"query": {
|
||||
"type": "string",
|
||||
"description": "Search query — keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead (returns titles, previews, timestamps with no LLM cost).",
|
||||
"description": "Search query (modes 'fast' and 'summary'). Keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead. Ignored when mode='guided'.",
|
||||
},
|
||||
"role_filter": {
|
||||
"type": "string",
|
||||
"description": "Optional: only search messages from specific roles (comma-separated). E.g. 'user,assistant' to skip tool outputs.",
|
||||
"description": "Optional: only search messages from specific roles (comma-separated). Defaults to 'user,assistant' for fast/summary modes — tool messages are usually noisy (large outputs, serialised tool calls). Pass 'user,assistant,tool' to include tool output (debugging tool behaviour) or 'tool' to search tool output only. Ignored when mode='guided'.",
|
||||
},
|
||||
"limit": {
|
||||
"type": "integer",
|
||||
"description": "Max sessions to summarize (default: 3, max: 5).",
|
||||
"description": "Max sessions to return (default: 3, max: 10). Bump higher (5–10) when the user wants to be in the retrieval loop and pick the right anchor for a guided drill-down. Ignored when mode='guided' (which returns one anchored window per anchor).",
|
||||
"default": 3,
|
||||
},
|
||||
"mode": {
|
||||
"type": "string",
|
||||
"enum": ["fast", "summary", "guided"],
|
||||
"description": (
|
||||
"fast — FTS5 snippets, no LLM. Default. "
|
||||
"guided — requires anchors from a prior fast call; returns raw message window per anchor. "
|
||||
"summary — LLM synthesis across matched sessions; opt-in, costs per aux-model call."
|
||||
),
|
||||
"default": "fast",
|
||||
},
|
||||
"anchors": {
|
||||
"type": "array",
|
||||
"description": "Required for mode='guided'. List of {session_id, around_message_id} dicts to drill into. Copy session_id and match_message_id verbatim from prior fast-mode results — they pair as a single self-consistent handle. Do NOT substitute parent_session_id (shown for display context only; pairs incorrectly with match_message_id). One anchor is fine when the topic lives in a single session; for multi-session catch-up (topic touched across several recent sessions), pass the top 2–3 fast hits as separate anchors in ONE call — each gets its own window + bookends in the response's 'windows' array.",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"session_id": {"type": "string"},
|
||||
"around_message_id": {"type": "integer"},
|
||||
},
|
||||
"required": ["session_id", "around_message_id"],
|
||||
},
|
||||
},
|
||||
"window": {
|
||||
"type": "integer",
|
||||
"description": "Mode='guided' only. Number of messages to return on each side of each anchor (the anchor itself is always included). Shared across all anchors in a multi-anchor call. Clamped to [1, 20]. Default 5.",
|
||||
"default": 5,
|
||||
},
|
||||
"sort": {
|
||||
"type": "string",
|
||||
"enum": ["newest", "oldest"],
|
||||
"description": "Mode='fast' only. Temporal bias on top of FTS5 ranking. Omit to keep relevance-only ordering (the default, suitable for exploratory recall — 'what do we know about X'). Set 'newest' for recency-shaped questions ('where did we leave X', 'latest status of Y') so recent matches surface first with rank as the tiebreaker. Set 'oldest' for origin-shaped questions ('how did X start', 'first time we discussed Y') so the earliest matches surface first. Silently ignored in summary / guided / recent modes — for temporal narrative across sessions, drive fast with sort, then drill the right anchors with guided.",
|
||||
},
|
||||
},
|
||||
"required": [],
|
||||
},
|
||||
@@ -605,6 +1266,12 @@ registry.register(
|
||||
query=args.get("query") or "",
|
||||
role_filter=args.get("role_filter"),
|
||||
limit=args.get("limit", 3),
|
||||
mode=args.get("mode"),
|
||||
session_id=args.get("session_id"),
|
||||
around_message_id=args.get("around_message_id"),
|
||||
window=args.get("window", 5),
|
||||
anchors=args.get("anchors"),
|
||||
sort=args.get("sort"),
|
||||
db=kw.get("db"),
|
||||
current_session_id=kw.get("current_session_id")),
|
||||
check_fn=check_session_search_requirements,
|
||||
|
||||
Reference in New Issue
Block a user