style(session_search): tighten verbose inline comments

Pass over comments added during the iterative development of this PR, trimming where they restated the code, repeated themselves, or read as journal-style narration. Net -22 comment lines; behaviour unchanged, 123 tests still passing. Notable trims: - DEFAULT_CONFIG module header: 9 lines → 4. Dropped the 'auxiliary started as aux-LLM routing but in practice groups per-tool config' digression — irrelevant to readers of this module. - get_anchored_view bookend-SQL filter block: 8 lines → 5. The 'let me check…-shaped assistant messages' over-narration is gone; the SQL filter rationale survives. - Fast-mode lineage-grouping IMPORTANT block: 12 lines → 8. The '#regression introduced by the original match_message_id rollout' meta-note removed (the comment now states the contract directly). - Fast-mode result-emission comment: 8 lines → 3. The 'lineage_root is the dict key…' explanation was restating the variables; the load-bearing one-liner (emit raw_sid + match_message_id) stays. - sort normalisation comment: 4 lines → 3. - role_filter parse comment: 5 lines → 3. - ORDER BY comment in search_messages: 3 lines → 2. - LIKE fallback ordering comment: 4 lines → 2.
docs(session_search): document default_mode in cli-config.yaml.example
2026-06-18 16:10:27 +08:00 · 2026-05-15 18:31:21 +02:00 · 2026-05-15 16:48:34 +02:00 · 2026-05-15 16:43:52 +02:00 · 2026-05-15 16:34:08 +02:00 · 2026-05-15 16:30:12 +02:00
12 changed files with 3075 additions and 65 deletions
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -444,6 +444,10 @@ prompt_caching:
 #     model: ""
 #     timeout: 30
 #     max_concurrency: 3    # Limit parallel summaries to reduce request-burst 429s
 #     default_mode: "fast"  # 'fast' | 'summary' — mode used when caller passes none.
 #                           # fast: FTS5 snippet hits, no LLM call. Default.
 #                           # summary: LLM-generated prose synthesis across hits.
 #                           # guided requires anchors and cannot be a default.
 #     extra_body: {}        # Provider-specific OpenAI-compatible request fields
 #                           # Example for providers that support request-body
 #                           # reasoning controls:
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -846,6 +846,7 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
            "max_concurrency": 3,  # Clamp parallel summaries to avoid request-burst 429s on small providers
            "default_mode": "fast",  # 'fast' | 'summary' — which mode session_search uses when caller passes none
        },
        "skills_hub": {
            "provider": "auto",
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -25,7 +25,7 @@ from pathlib import Path
 from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
-from typing import Any, Callable, Dict, List, Optional, TypeVar
+from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar
 logger = logging.getLogger(__name__)
@@ -1618,6 +1618,185 @@ class SessionDB:
            result.append(msg)
        return result
    def get_messages_around(
        self,
        session_id: str,
        around_message_id: int,
        window: int = 5,
    ) -> List[Dict[str, Any]]:
        """Load a window of messages anchored on a specific message id.
        Returns up to ``window`` messages before the anchor, the anchor itself,
        and up to ``window`` messages after — all from the same session,
        ordered by id ascending. Boundaries are honoured: if the anchor is
        near the start or end of the session, fewer messages are returned on
        the truncated side.
        If ``around_message_id`` is not a message id within ``session_id``,
        returns an empty list. Callers decide whether to surface that as an
        error.
        Used by ``session_search`` mode='guided' to provide anchored
        drill-down into a specific session at a specific message — without
        the cost of summarisation or the risk of 100k-char truncation.
        """
        if window < 0:
            window = 0
        with self._lock:
            # Confirm the anchor exists in this session — cheap guard against
            # cross-session contamination if a caller mixes up session/message
            # ids.
            anchor_exists = self._conn.execute(
                "SELECT 1 FROM messages WHERE id = ? AND session_id = ? LIMIT 1",
                (around_message_id, session_id),
            ).fetchone()
            if not anchor_exists:
                return []
            # Two queries: anchor + before (DESC, take window+1), and after
            # (ASC, take window). Final order is id ASC.
            before_rows = self._conn.execute(
                "SELECT * FROM messages "
                "WHERE session_id = ? AND id <= ? "
                "ORDER BY id DESC LIMIT ?",
                (session_id, around_message_id, window + 1),
            ).fetchall()
            after_rows = self._conn.execute(
                "SELECT * FROM messages "
                "WHERE session_id = ? AND id > ? "
                "ORDER BY id ASC LIMIT ?",
                (session_id, around_message_id, window),
            ).fetchall()
        # before_rows is DESC; reverse so it's ASC, then concatenate after_rows.
        rows = list(reversed(before_rows)) + list(after_rows)
        result = []
        for row in rows:
            msg = dict(row)
            if "content" in msg:
                msg["content"] = self._decode_content(msg["content"])
            if msg.get("tool_calls"):
                try:
                    msg["tool_calls"] = json.loads(msg["tool_calls"])
                except (json.JSONDecodeError, TypeError):
                    logger.warning(
                        "Failed to deserialize tool_calls in get_messages_around, falling back to []"
                    )
                    msg["tool_calls"] = []
            result.append(msg)
        return result
    def get_anchored_view(
        self,
        session_id: str,
        around_message_id: int,
        window: int = 5,
        bookend: int = 3,
        keep_roles: Optional[Tuple[str, ...]] = ("user", "assistant"),
    ) -> Dict[str, Any]:
        """Return an anchored window plus session bookends, opinionated for guided recall.
        Built on top of ``get_messages_around``:
          - ``window``: messages immediately surrounding the anchor. Filtered to
            ``keep_roles`` (tool-response noise dropped by default), EXCEPT the
            anchor itself is always included regardless of role — callers may
            have anchored on a tool message and dropping it would break the
            contract.
          - ``bookend_start``: first ``bookend`` messages of the session
            (filtered to ``keep_roles``), but ONLY those whose id sits strictly
            before the window's first message id. If the window already covers
            the session start, ``bookend_start`` is an empty list.
          - ``bookend_end``: last ``bookend`` messages of the session (same
            filter + non-overlap rule applied at the tail).
        Bookends exist so an FTS5 hit anywhere in a long session still yields
        the goal (opening) and the resolution (closing) on a single guided
        call — without the cost of fetching the whole transcript.
        Returns ``{"window": []}`` (empty) when the anchor isn't in the
        session — caller decides how to surface that.
        ``keep_roles=None`` disables role filtering entirely (raw window +
        raw bookends). Pass an explicit tuple to override the default.
        """
        if bookend < 0:
            bookend = 0
        # Reuse the primitive — it already handles the anchor-existence check,
        # window clamping, content decoding, and tool_calls deserialisation.
        window_rows = self.get_messages_around(
            session_id, around_message_id, window=window
        )
        if not window_rows:
            return {"window": [], "bookend_start": [], "bookend_end": []}
        # Apply role filter to the window, but never drop the anchor itself.
        if keep_roles is not None:
            keep_set = set(keep_roles)
            filtered_window = [
                m for m in window_rows
                if m.get("id") == around_message_id or m.get("role") in keep_set
            ]
        else:
            filtered_window = window_rows
        window_min_id = window_rows[0]["id"]
        window_max_id = window_rows[-1]["id"]
        # Fetch bookends only if there's space outside the window. SQL filters
        # by id range, role, and non-empty content — tool-call-only assistant
        # turns (content='' with tool_calls populated) are excluded so they
        # don't crowd out the actual prose openings/closings. ``bookend=0``
        # short-circuits both queries.
        bookend_start_rows: List[Any] = []
        bookend_end_rows: List[Any] = []
        if bookend > 0:
            with self._lock:
                role_clause = ""
                role_params: list = []
                if keep_roles is not None:
                    role_placeholders = ",".join("?" for _ in keep_roles)
                    role_clause = f" AND role IN ({role_placeholders})"
                    role_params = list(keep_roles)
                bookend_start_rows = self._conn.execute(
                    f"SELECT * FROM messages "
                    f"WHERE session_id = ? AND id < ?{role_clause} "
                    f"AND length(content) > 0 "
                    f"ORDER BY id ASC LIMIT ?",
                    (session_id, window_min_id, *role_params, bookend),
                ).fetchall()
                bookend_end_rows = self._conn.execute(
                    f"SELECT * FROM messages "
                    f"WHERE session_id = ? AND id > ?{role_clause} "
                    f"AND length(content) > 0 "
                    f"ORDER BY id DESC LIMIT ?",
                    (session_id, window_max_id, *role_params, bookend),
                ).fetchall()
                # End rows came back DESC for the LIMIT cap; flip to ASC.
                bookend_end_rows = list(reversed(bookend_end_rows))
        def _hydrate(row) -> Dict[str, Any]:
            msg = dict(row)
            if "content" in msg:
                msg["content"] = self._decode_content(msg["content"])
            if msg.get("tool_calls"):
                try:
                    msg["tool_calls"] = json.loads(msg["tool_calls"])
                except (json.JSONDecodeError, TypeError):
                    logger.warning(
                        "Failed to deserialize tool_calls in get_anchored_view, falling back to []"
                    )
                    msg["tool_calls"] = []
            return msg
        return {
            "window": filtered_window,
            "bookend_start": [_hydrate(r) for r in bookend_start_rows],
            "bookend_end": [_hydrate(r) for r in bookend_end_rows],
        }
    def resolve_resume_session_id(self, session_id: str) -> str:
        """Redirect a resume target to the descendant session that holds the messages.
@@ -1885,6 +2064,7 @@ class SessionDB:
        role_filter: List[str] = None,
        limit: int = 20,
        offset: int = 0,
        sort: str = None,
    ) -> List[Dict[str, Any]]:
        """
        Full-text search across session messages using FTS5.
@@ -1897,6 +2077,19 @@ class SessionDB:
        Returns matching messages with session metadata, content snippet,
        and surrounding context (1 message before and after the match).
        ``sort`` controls temporal ordering of results:
          - ``None`` (default): FTS5 BM25 relevance only. Time-neutral, but
            ties between equally-relevant messages are broken arbitrarily.
          - ``"newest"``: order by message timestamp DESC, then by rank.
            Recent matches surface first; rank breaks same-timestamp ties.
          - ``"oldest"``: order by message timestamp ASC, then by rank.
            For "how did this start" / "what was the original X" questions.
        The LIKE fallback path (short CJK queries) ignores ``sort`` because
        it has no rank to combine with — it already orders by timestamp DESC
        unconditionally. The trigram CJK path honours ``sort`` like the main
        FTS5 path.
        """
        if not query or not query.strip():
            return []
@@ -1905,6 +2098,25 @@ class SessionDB:
        if not query:
            return []
        # Normalise sort. Anything not in the allowed set falls back to None
        # (FTS5 rank-only) — be forgiving to callers who pass empty string or
        # an unexpected value rather than failing the search.
        if isinstance(sort, str):
            sort_norm = sort.strip().lower()
            if sort_norm not in ("newest", "oldest"):
                sort_norm = None
        else:
            sort_norm = None
        # ORDER BY shared by both FTS5 paths. With sort set, timestamp is
        # primary and rank is the tiebreaker; otherwise rank alone.
        if sort_norm == "newest":
            order_by_sql = "ORDER BY m.timestamp DESC, rank"
        elif sort_norm == "oldest":
            order_by_sql = "ORDER BY m.timestamp ASC, rank"
        else:
            order_by_sql = "ORDER BY rank"
        # Build WHERE clauses dynamically
        where_clauses = ["messages_fts MATCH ?"]
        params: list = [query]
@@ -1943,7 +2155,7 @@ class SessionDB:
            JOIN messages m ON m.id = messages_fts.rowid
            JOIN sessions s ON s.id = m.session_id
            WHERE {where_sql}
-            ORDER BY rank
+            {order_by_sql}
            LIMIT ? OFFSET ?
        """
@@ -2012,7 +2224,7 @@ class SessionDB:
                    JOIN messages m ON m.id = messages_fts_trigram.rowid
                    JOIN sessions s ON s.id = m.session_id
                    WHERE {' AND '.join(tri_where)}
-                    ORDER BY rank
+                    {order_by_sql}
                    LIMIT ? OFFSET ?
                """
                tri_params.extend([limit, offset])
@@ -2051,6 +2263,13 @@ class SessionDB:
                if role_filter:
                    like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
                    like_params.extend(role_filter)
                # LIKE fallback has no rank to combine with — just timestamp
                # direction. Default/"newest" → DESC; "oldest" → ASC.
                like_order_sql = (
                    "ORDER BY m.timestamp ASC"
                    if sort_norm == "oldest"
                    else "ORDER BY m.timestamp DESC"
                )
                like_sql = f"""
                    SELECT m.id, m.session_id, m.role,
                           substr(m.content,
@@ -2061,7 +2280,7 @@ class SessionDB:
                    FROM messages m
                    JOIN sessions s ON s.id = m.session_id
                    WHERE {' AND '.join(like_where)}
-                    ORDER BY m.timestamp DESC
+                    {like_order_sql}
                    LIMIT ? OFFSET ?
                """
                like_params.extend([limit, offset])
--- a/run_agent.py
+++ b/run_agent.py
@@ -10689,6 +10689,11 @@ class AIAgent:
                limit=function_args.get("limit", 3),
                db=session_db,
                current_session_id=self.session_id,
                mode=function_args.get("mode"),
                session_id=function_args.get("session_id"),
                around_message_id=function_args.get("around_message_id"),
                window=function_args.get("window", 5),
                anchors=function_args.get("anchors"),
            )
        elif function_name == "memory":
            target = function_args.get("target", "memory")
@@ -11321,6 +11326,11 @@ class AIAgent:
                        limit=function_args.get("limit", 3),
                        db=session_db,
                        current_session_id=self.session_id,
                        mode=function_args.get("mode"),
                        session_id=function_args.get("session_id"),
                        around_message_id=function_args.get("around_message_id"),
                        window=function_args.get("window", 5),
                        anchors=function_args.get("anchors"),
                    )
                tool_duration = time.time() - tool_start_time
                if self._should_emit_quiet_tool_messages():
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1051,6 +1051,7 @@ AUTHOR_MAP = {
    "openclaw@agent.local": "29206394",  # PR #22194 salvage (sudo -S brute-force guard, #9590)
    "freedemon@gmail.com": "fr33d3m0n",  # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4)
    "zhaowh3613@outlook.com": "VinceZcrikl",  # PR #23647 salvage (npm UTF-8 decode on GBK Windows)
    "abcdjmm970703@gmail.com": "JabberELF",  # PR #20238 salvage (session_search fast/summary dual-mode)
    "anton.kuenzi@gmail.com": "ZeterMordio",  # PR #11754 salvage (zsh completion compdef + _arguments syntax)
    "23yntong@stu.edu.cn": "iuyup",  # PR #6155 salvage (shell=True hardening)
    "86501179+1RB@users.noreply.github.com": "1RB",  # PR #25462 salvage (discord forwarded messages)
--- a/skills/memory/DESCRIPTION.md
+++ b/skills/memory/DESCRIPTION.md
@@ -0,0 +1,3 @@
 ---
 description: Primitives for searching, recalling, and reasoning over Hermes' own session history and stored memory.
 ---
--- a/skills/memory/session-recall/SKILL.md
+++ b/skills/memory/session-recall/SKILL.md
@@ -0,0 +1,112 @@
 ---
 name: session-recall
 description: Use session_search effectively for finding and reading prior Hermes sessions.
 metadata:
  hermes:
    category: memory
 ---
 # session-recall
 session_search is the tool. Three modes — fast, guided, summary — answer different question shapes. Picking the wrong mode costs latency, money, or correctness.
 ## Pre-flight
 1. If the user asks about prior work ("find the session where X", "catch me up on Y", "we drafted Z"), your first move is session_search. Not filesystem search, not a different tool.
 2. If the user names an artefact, search the literal name first. No OR-expansion.
 3. Default to fast → guided. Reach for summary only when you need cross-session synthesis prose in one shot.
 ## Mode picker
 | Question shape | Mode | Why |
 |---|---|---|
 | Catch me up / where did we get to / what did we decide | fast → guided | FTS5 finds sessions; guided reads the transcript. SQL-only. |
 | Find an artefact by name / which session mentions X | fast | Snippets only, no LLM. |
 | Read around a specific message in a known session | guided | Raw window around anchor. |
 | Cross-session prose synthesis in one shot | summary | LLM call per hit (aux model if configured, else main). Opt-in. |
 ## Levers
 | Lever | Default | When to change |
 |---|---|---|
 | `limit` (fast) | 3 | 5–10 when topic spans sessions or user wants to pick from a list |
 | `sort` (fast) | unset (relevance) | `newest` for "where did we leave X"; `oldest` for "how did X start" |
 | `role_filter` (fast) | user,assistant | Add `tool` only when debugging tool output specifically |
 | `window` (guided) | 5 | Bump for long resolutions; shrink if response truncates |
 | anchor count (guided) | 1 | 2–3 anchors when topic spans recent sessions |
 | `limit` (summary) | 3 | Bump cautiously; cost scales directly |
 ## Composition patterns
 1. **Discover → drill.** fast first, drill the top hit with guided. Widen `window` or re-anchor if the resolution isn't covered.
 2. **Multi-anchor for arcs.** When fast returns 2–3 relevant hits on the same topic, pass them all to guided in one call.
 3. **Bookend-first reading.** For "what was the conclusion" questions, read `bookend_end` before `messages`.
 4. **Delegate when transcripts are big.** If you're about to pull 30K+ chars of transcript into your context just to summarise it, hand the dumps to a subagent and ask for a digest.
 5. **Verify before quoting.** High-stakes recall does two passes: fast with the literal term (does the hit list contain the right session?) → guided (does the transcript confirm the outcome?).
 ## Worked examples
 ### A — find a named artefact
 User: "we drafted a deployment plan in a session yesterday, find it"
 Right: `session_search(query="deployment plan", limit=5)`. The user named it — search the name. Drill the top hit if you need details.
 Wrong: `session_search(query="deploy OR deployment OR rollout OR plan")`. OR-expansion drowns the hit in unrelated sessions.
 ### B — catch up on a multi-session arc
 User: "where did we get to with the auth refactor?"
 Right: fast with `sort='newest'`, then multi-anchor guided across the top 2–3 hits:
 ```
 session_search(query="auth refactor", limit=5, sort='newest')
 session_search(mode='guided', anchors=[
  {'session_id': hit_1.session_id, 'around_message_id': hit_1.match_message_id},
  {'session_id': hit_2.session_id, 'around_message_id': hit_2.match_message_id},
  {'session_id': hit_3.session_id, 'around_message_id': hit_3.match_message_id},
 ])
 ```
 Read all three slices (bookend_start / messages / bookend_end) on each window and the arc reconstructs.
 Wrong: `session_search(query="auth refactor", mode='summary')`. Summary launders FTS5 hits through an LLM and can confabulate when the right session isn't in the hit list.
 ### C — drill into a known session for a conclusion
 User: "in the session about the caching layer, what did we decide?"
 fast to locate, guided to drill, read `bookend_end` first:
 ```
 session_search(query="caching layer", limit=3)
 session_search(mode='guided', anchors=[
  {'session_id': <top>, 'around_message_id': <match_id>}
 ])
 ```
 Conclusions ("decided X", "shipped Y") usually live in `bookend_end`.
 ## Reading guided responses
 Every guided window has three slices:
 - `bookend_start` — opening prose (kickoff, goal)
 - `messages` — the anchored window (FTS5 hit + neighbours)
 - `bookend_end` — closing prose (resolution, decisions, commits)
 Read all three. Bookends are prose that summarises; snippets and the middle window can be noisy when sessions are *about* the search term.
 ## Pitfalls
 - **Manual-archaeology trap.** If fast snippets look noisy, drill the top hit with guided. Don't pivot to find / grep / raw SQL.
 - **Summary confabulation.** Summary will produce confident prose even when FTS5 missed the right session. Verify by re-querying in fast mode and checking the hit list.
 - **FTS5 is AND by default.** Multi-word queries require all terms; use OR or quoted phrases deliberately.
 - **Anchor mismatch.** `around_message_id` must exist in the named session. Re-anchor from a fresh fast result if guided rejects.
 - **Window truncation.** Re-call with a smaller window if a dump truncates.
 - **Compaction lineage.** A fast hit with `parent_session_id` set means the session was split by compaction; its `bookend_start` is a handoff summary, not the original opener.
 ## Note on skill limits
 This skill teaches composition but cannot enforce it. If your default behaviour drifts — composing paraphrase queries instead of drilling, reaching for summary when fast → guided would do, pivoting to filesystem search when fast returned hits — the skill is being ignored, not failing. When in doubt: fast first, then drill.
--- a/tests/hermes_state/test_get_anchored_view.py
+++ b/tests/hermes_state/test_get_anchored_view.py
@@ -0,0 +1,189 @@
 """Unit tests for SessionDB.get_anchored_view() — window + bookends + role filter.
 Used by ``session_search`` mode='guided'. Builds on ``get_messages_around``
 and adds:
  - opinionated default role filter (drops tool messages from the window,
    but never drops the anchor itself)
  - session-head and session-tail bookends (default 3 messages each) so an
    FTS5 hit anywhere in a long session still yields the goal + resolution
  - bookends are skipped when the main window already overlaps the head or tail
 These properties are the reason guided is useful for state recall on long
 sessions, so the suite below pins them all down.
 """
 import pytest
 from hermes_state import SessionDB
@pytest.fixture
 def db(tmp_path):
    return SessionDB(tmp_path / "state.db")
 def _seed(db: SessionDB, session_id: str, roles: list[str]) -> list[int]:
    """Append messages with the given role sequence. Returns message ids."""
    db.create_session(session_id, source="cli")
    ids = []
    for i, role in enumerate(roles):
        ids.append(db.append_message(session_id, role=role, content=f"{role}-{i}"))
    return ids
 def test_window_filters_tool_messages_but_keeps_anchor_when_tool(db):
    """The anchor is preserved even when its role is tool. Other tool
    messages in the window are dropped."""
    ids = _seed(db, "s1", [
        "user", "assistant", "tool",     # 0..2
        "user", "tool",                  # 3..4  ← anchor on a tool (idx 4)
        "tool", "assistant", "user",     # 5..7
    ])
    view = db.get_anchored_view("s1", ids[4], window=3, bookend=0)
    roles = [m["role"] for m in view["window"]]
    # Anchor (tool) preserved; surrounding tool messages dropped.
    assert "tool" in roles
    anchor = next(m for m in view["window"] if m["id"] == ids[4])
    assert anchor["role"] == "tool"
    # Only the anchor tool message remains — other tools filtered.
    tool_rows = [m for m in view["window"] if m["role"] == "tool"]
    assert len(tool_rows) == 1 and tool_rows[0]["id"] == ids[4]
 def test_window_keeps_user_and_assistant_by_default(db):
    ids = _seed(db, "s1", ["user", "assistant"] * 6)
    view = db.get_anchored_view("s1", ids[5], window=2, bookend=0)
    # All user/assistant → all should survive the filter.
    assert {m["role"] for m in view["window"]} == {"user", "assistant"}
    assert len(view["window"]) == 5  # 2 before + anchor + 2 after
 def test_bookends_returned_when_window_in_middle(db):
    ids = _seed(db, "s1", ["user", "assistant"] * 10)  # 20 messages
    view = db.get_anchored_view("s1", ids[10], window=2, bookend=3)
    assert len(view["bookend_start"]) == 3
    assert len(view["bookend_end"]) == 3
    # Bookends are the actual session head/tail.
    assert [m["id"] for m in view["bookend_start"]] == ids[:3]
    assert [m["id"] for m in view["bookend_end"]] == ids[-3:]
 def test_bookend_start_empty_when_window_covers_session_head(db):
    ids = _seed(db, "s1", ["user", "assistant"] * 5)  # 10 messages
    # Anchor on id ids[1]; window=3 → covers ids[0..4]. Head overlaps.
    view = db.get_anchored_view("s1", ids[1], window=3, bookend=3)
    assert view["bookend_start"] == []
    # Tail still has space → returns bookend_end.
    assert len(view["bookend_end"]) == 3
 def test_bookend_end_empty_when_window_covers_session_tail(db):
    ids = _seed(db, "s1", ["user", "assistant"] * 5)  # 10 messages
    view = db.get_anchored_view("s1", ids[-2], window=3, bookend=3)
    assert view["bookend_end"] == []
    assert len(view["bookend_start"]) == 3
 def test_bookends_skip_tool_messages(db):
    ids = _seed(db, "s1", [
        "tool", "tool", "user", "assistant",     # head: only 2 user/assistant
        "user", "assistant", "user", "assistant",
        "tool", "user", "assistant", "tool",     # tail: 2 user/assistant + tool
    ])
    # Anchor in the middle; bookends should pull only user/assistant.
    view = db.get_anchored_view("s1", ids[5], window=1, bookend=3)
    assert all(m["role"] in ("user", "assistant") for m in view["bookend_start"])
    assert all(m["role"] in ("user", "assistant") for m in view["bookend_end"])
 def test_bookend_zero_returns_empty_bookends(db):
    ids = _seed(db, "s1", ["user", "assistant"] * 10)
    view = db.get_anchored_view("s1", ids[10], window=2, bookend=0)
    assert view["bookend_start"] == []
    assert view["bookend_end"] == []
 def test_anchor_not_in_session_returns_empty_view(db):
    ids = _seed(db, "s1", ["user", "assistant"] * 5)
    _seed(db, "s2", ["user", "assistant"] * 5)
    view = db.get_anchored_view("s1", 999999, window=3, bookend=3)
    assert view == {"window": [], "bookend_start": [], "bookend_end": []}
 def test_keep_roles_none_disables_filtering(db):
    """Pass keep_roles=None to get raw window + raw bookends including tool."""
    ids = _seed(db, "s1", ["user", "tool", "assistant", "tool", "user"] * 3)
    view = db.get_anchored_view(
        "s1", ids[7], window=2, bookend=3, keep_roles=None
    )
    # Tool messages in the window survive when filtering is disabled.
    roles_in_window = [m["role"] for m in view["window"]]
    assert "tool" in roles_in_window
 def test_keep_roles_can_include_tool_when_caller_wants_it(db):
    ids = _seed(db, "s1", ["user", "tool", "assistant"] * 5)
    view = db.get_anchored_view(
        "s1", ids[7], window=2, bookend=3, keep_roles=("user", "assistant", "tool")
    )
    # All three roles allowed → tool messages should now appear in the window.
    assert any(m["role"] == "tool" for m in view["window"])
 def test_negative_bookend_treated_as_zero(db):
    ids = _seed(db, "s1", ["user", "assistant"] * 10)
    view = db.get_anchored_view("s1", ids[10], window=2, bookend=-3)
    assert view["bookend_start"] == []
    assert view["bookend_end"] == []
 def test_bookends_do_not_leak_across_sessions(db):
    """Bookends are session-scoped. A second session with adjacent ids must
    never appear in the first session's bookends."""
    s1_ids = _seed(db, "s1", ["user", "assistant"] * 4)
    s2_ids = _seed(db, "s2", ["user", "assistant"] * 4)
    view = db.get_anchored_view("s1", s1_ids[3], window=1, bookend=3)
    bookend_ids = (
        [m["id"] for m in view["bookend_start"]]
        + [m["id"] for m in view["bookend_end"]]
    )
    assert set(bookend_ids).isdisjoint(set(s2_ids))
 def test_bookends_skip_empty_content_assistant_turns(db):
    """Tool-call-only assistant turns (content='' with tool_calls populated)
    must NOT eat bookend slots. Bookends exist to surface the session's
    spoken opening + resolution; 'let me check...'-shaped no-content
    assistants are signal-free here."""
    db.create_session("s1", source="cli")
    # Real opener
    open_id = db.append_message("s1", role="user", content="kick off the work")
    db.append_message("s1", role="assistant", content="on it")
    # A burst of tool-call-only assistants (orchestration heartbeats)
    for _ in range(5):
        db.append_message("s1", role="assistant", content="")
        db.append_message("s1", role="tool", content="some output")
    # Middle prose
    mid_id = db.append_message("s1", role="user", content="status?")
    db.append_message("s1", role="assistant", content="midway")
    # Tail: more empty assistants interleaved with prose closer
    for _ in range(3):
        db.append_message("s1", role="assistant", content="")
        db.append_message("s1", role="tool", content="poll")
    close_id = db.append_message(
        "s1", role="assistant", content="Done. Final summary here."
    )
    view = db.get_anchored_view("s1", mid_id, window=1, bookend=3)
    # bookend_start should contain prose user/assistant, never empty content
    assert all(m["content"] for m in view["bookend_start"]), \
        "bookend_start leaked an empty-content row"
    # First message must be the actual opener
    assert view["bookend_start"][0]["id"] == open_id
    # bookend_end likewise — and the closer prose must appear
    assert all(m["content"] for m in view["bookend_end"]), \
        "bookend_end leaked an empty-content row"
    assert any(m["id"] == close_id for m in view["bookend_end"]), \
        "actual session closer must survive into bookend_end"
--- a/tests/hermes_state/test_get_messages_around.py
+++ b/tests/hermes_state/test_get_messages_around.py
@@ -0,0 +1,137 @@
 """Unit tests for SessionDB.get_messages_around() — anchored message windows.
 The method is used by ``session_search`` mode='guided' for anchored drill-down.
 It must:
  - Return an ordered window: up to ``window`` messages before the anchor,
    the anchor itself, then up to ``window`` after, all id-ascending.
  - Honour session boundaries (fewer messages returned at start / end).
  - Honour session isolation (same id range, different session = nothing).
  - Return an empty list when the anchor is not in the named session.
 """
 import pytest
 from hermes_state import SessionDB
@pytest.fixture
 def db(tmp_path):
    return SessionDB(tmp_path / "state.db")
 def _seed_session(db: SessionDB, session_id: str, n_messages: int):
    """Append n_messages alternating user/assistant messages to a session.
    Returns the list of message ids created (in append order).
    """
    db.create_session(session_id, source="cli")
    ids = []
    for i in range(n_messages):
        role = "user" if i % 2 == 0 else "assistant"
        msg_id = db.append_message(session_id, role=role, content=f"msg {i}")
        ids.append(msg_id)
    return ids
 def test_returns_window_around_anchor_in_middle(db):
    ids = _seed_session(db, "s1", 11)
    anchor = ids[5]  # middle of 11
    result = db.get_messages_around("s1", anchor, window=3)
    # Expect 3 before + anchor + 3 after = 7 messages
    assert len(result) == 7
    # All from the right session
    assert all(m["session_id"] == "s1" for m in result)
    # Order is id ASC and contiguous
    result_ids = [m["id"] for m in result]
    assert result_ids == ids[2:9]
 def test_anchor_at_first_message_returns_only_after_slice(db):
    ids = _seed_session(db, "s1", 8)
    anchor = ids[0]  # first
    result = db.get_messages_around("s1", anchor, window=3)
    # Anchor + 3 after = 4 messages, no "before"
    assert len(result) == 4
    assert [m["id"] for m in result] == ids[0:4]
 def test_anchor_at_last_message_returns_only_before_slice(db):
    ids = _seed_session(db, "s1", 8)
    anchor = ids[-1]  # last
    result = db.get_messages_around("s1", anchor, window=3)
    # 3 before + anchor = 4 messages, no "after"
    assert len(result) == 4
    assert [m["id"] for m in result] == ids[-4:]
 def test_anchor_not_in_session_returns_empty_list(db):
    ids = _seed_session(db, "s1", 5)
    _seed_session(db, "s2", 5)
    # Use s1 as session but pass an id that exists, just in s2
    result = db.get_messages_around("s2", ids[2], window=3)
    assert result == []
 def test_does_not_leak_across_sessions(db):
    # Two sessions with adjacent message id ranges
    s1_ids = _seed_session(db, "s1", 5)
    s2_ids = _seed_session(db, "s2", 5)
    # Anchor on s1's last message — even though s2 ids are "after", they must
    # not appear in the window
    result = db.get_messages_around("s1", s1_ids[-1], window=3)
    assert all(m["session_id"] == "s1" for m in result)
    # All result ids belong to s1, not s2
    assert set(m["id"] for m in result).issubset(set(s1_ids))
    assert set(m["id"] for m in result).isdisjoint(set(s2_ids))
 def test_window_larger_than_session_returns_full_session(db):
    ids = _seed_session(db, "s1", 4)
    anchor = ids[1]
    result = db.get_messages_around("s1", anchor, window=100)
    # Whole session returned, ordered ASC
    assert [m["id"] for m in result] == ids
 def test_window_zero_returns_only_anchor(db):
    ids = _seed_session(db, "s1", 5)
    anchor = ids[2]
    result = db.get_messages_around("s1", anchor, window=0)
    assert len(result) == 1
    assert result[0]["id"] == anchor
 def test_negative_window_treated_as_zero(db):
    ids = _seed_session(db, "s1", 5)
    anchor = ids[2]
    result = db.get_messages_around("s1", anchor, window=-3)
    assert len(result) == 1
    assert result[0]["id"] == anchor
 def test_decodes_content_like_get_messages(db):
    """Content roundtrip should match get_messages's behaviour (no surprises
    for callers who switch between the two methods)."""
    ids = _seed_session(db, "s1", 3)
    anchor = ids[1]
    around = db.get_messages_around("s1", anchor, window=1)
    full = db.get_messages("s1")
    # Same rows, same content shape
    assert [m["content"] for m in around] == [m["content"] for m in full]
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -2494,6 +2494,103 @@ class TestExcludeSources:
        sources = [r["source"] for r in results]
        assert sources == ["cli"]
    def test_search_messages_sort_newest_orders_by_timestamp_desc(self, db):
        """``sort='newest'`` makes timestamp the primary sort key (DESC) with
        FTS5 rank as the tiebreaker. With three matching messages at distinct
        timestamps, results come out newest-first regardless of BM25 score."""
        db.create_session("old_sid", "cli")
        db.create_session("mid_sid", "cli")
        db.create_session("new_sid", "cli")
        # Same content → identical BM25 score; only timestamps differ.
        mid_old = db.append_message("old_sid", "user", "matchword discussion")
        mid_mid = db.append_message("mid_sid", "user", "matchword discussion")
        mid_new = db.append_message("new_sid", "user", "matchword discussion")
        # Stamp explicit, well-separated timestamps after the fact.
        with db._lock:
            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (mid_old,))
            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (mid_mid,))
            db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (mid_new,))
            db._conn.commit()
        results = db.search_messages("matchword", sort="newest")
        session_order = [r["session_id"] for r in results]
        assert session_order == ["new_sid", "mid_sid", "old_sid"], (
            f"sort=newest must return newest first; got {session_order}"
        )
    def test_search_messages_sort_oldest_orders_by_timestamp_asc(self, db):
        """``sort='oldest'`` is symmetric — earliest matches first. Critical
        for 'how did X start' questions where rank-only ordering would hide
        the origin under more recent revisitations."""
        db.create_session("a", "cli")
        db.create_session("b", "cli")
        db.create_session("c", "cli")
        m_a = db.append_message("a", "user", "matchword")
        m_b = db.append_message("b", "user", "matchword")
        m_c = db.append_message("c", "user", "matchword")
        with db._lock:
            db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (m_a,))
            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_b,))
            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_c,))
            db._conn.commit()
        results = db.search_messages("matchword", sort="oldest")
        session_order = [r["session_id"] for r in results]
        assert session_order == ["b", "c", "a"], (
            f"sort=oldest must return earliest first; got {session_order}"
        )
    def test_search_messages_sort_unset_preserves_rank_ordering(self, db):
        """No sort param → ``ORDER BY rank`` (FTS5 BM25). With identical
        single-keyword matches on different-length messages, BM25 prefers
        the shorter / denser ones — that's the existing default and it must
        not regress when the new param is omitted."""
        db.create_session("short_sid", "cli")
        db.create_session("long_sid", "cli")
        # Single keyword in a short message scores higher than the same
        # keyword buried in a much longer one (BM25 length normalisation).
        m_short = db.append_message("short_sid", "user", "matchword.")
        m_long = db.append_message(
            "long_sid", "user", "matchword " + ("padding " * 200)
        )
        # Older = short_sid so we can confirm rank wins, not recency.
        with db._lock:
            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
            db._conn.commit()
        results = db.search_messages("matchword")  # sort omitted
        assert len(results) == 2
        # BM25 should rank the short message first despite being older.
        assert results[0]["session_id"] == "short_sid", (
            "Default (no sort) must use FTS5 rank — short_sid should outrank "
            f"the longer message. Got order: {[r['session_id'] for r in results]}"
        )
    def test_search_messages_sort_invalid_value_falls_back_to_rank(self, db):
        """Passing a value outside the allowed set (e.g. 'sideways') silently
        falls back to FTS5 rank-only ordering rather than raising. Same
        forgiveness as the tool-layer normalisation, in case callers reach
        SessionDB directly."""
        db.create_session("short_sid", "cli")
        db.create_session("long_sid", "cli")
        m_short = db.append_message("short_sid", "user", "matchword.")
        m_long = db.append_message(
            "long_sid", "user", "matchword " + ("padding " * 200)
        )
        with db._lock:
            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
            db._conn.commit()
        # Garbage sort should behave the same as no sort.
        results_default = db.search_messages("matchword")
        results_garbage = db.search_messages("matchword", sort="sideways")
        assert (
            [r["session_id"] for r in results_default]
            == [r["session_id"] for r in results_garbage]
        )
 class TestResolveSessionByNameOrId:
    """Tests for the main.py helper that resolves names or IDs."""
--- a/tests/tools/test_session_search.py
+++ b/tests/tools/test_session_search.py
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -2,19 +2,16 @@
 """
 Session Search Tool - Long-Term Conversation Recall
-Searches past session transcripts in SQLite via FTS5, then summarizes the top
+Searches past session transcripts in SQLite via FTS5. Keyword search defaults
-matching sessions using the configured auxiliary session_search model (same
+to fast snippet/context hits without any LLM call; callers can opt into focused
-pattern as web_extract). By default, auxiliary "auto" routing uses the main
+LLM summaries with mode="summary" when deeper recall is worth the latency.
 chat provider/model unless the user overrides auxiliary.session_search.
 Returns focused summaries of past conversations rather than raw transcripts,
 keeping the main model's context window clean.
 Flow:
  1. FTS5 search finds matching messages ranked by relevance
  2. Groups by session, takes the top N unique sessions (default 3)
-  3. Loads each session's conversation, truncates to ~100k chars centered on matches
+  3. Fast mode returns snippets and nearby context immediately
-  4. Sends to the configured auxiliary model with a focused summarization prompt
+  4. Summary mode loads each session, truncates around matches, and calls an LLM
-  5. Returns per-session summaries with metadata
+  5. Returns per-session hits/summaries with metadata
 """
 import asyncio
@@ -26,6 +23,62 @@ from typing import Dict, Any, List, Optional, Union
 from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
 MAX_SESSION_CHARS = 100_000
 # Default mode is fast unless the user sets ``auxiliary.session_search.default_mode``
 # in ~/.hermes/config.yaml. Only ``fast`` and ``summary`` are valid — guided
 # requires anchors. Resolver is lru_cache-wrapped so the YAML read happens at
 # most once per process; restart to pick up config changes.
 _VALID_DEFAULT_MODES = ("fast", "summary")
 _FALLBACK_DEFAULT_MODE = "fast"
 def _resolve_user_default_mode() -> str:
    """Look up ``auxiliary.session_search.default_mode`` from ~/.hermes/config.yaml.
    Returns ``_FALLBACK_DEFAULT_MODE`` (``"fast"``) if unset, invalid, or the
    config loader is unavailable (e.g. tests, tools loaded outside the CLI).
    Logs a one-time warning on invalid values so users get feedback when they
    typo their config.
    """
    try:
        from hermes_cli.config import load_config
        config = load_config() or {}
    except ImportError:
        logging.debug("hermes_cli.config not available; default_mode falls back to %r", _FALLBACK_DEFAULT_MODE)
        return _FALLBACK_DEFAULT_MODE
    except Exception as e:
        logging.debug("Failed to load config for session_search default_mode: %s", e, exc_info=True)
        return _FALLBACK_DEFAULT_MODE
    raw = (
        config.get("auxiliary", {})
        .get("session_search", {})
        .get("default_mode")
    )
    if raw is None:
        return _FALLBACK_DEFAULT_MODE
    if not isinstance(raw, str):
        logging.warning(
            "auxiliary.session_search.default_mode in config.yaml must be a string, got %r — falling back to %r",
            raw, _FALLBACK_DEFAULT_MODE,
        )
        return _FALLBACK_DEFAULT_MODE
    normalised = raw.strip().lower()
    if normalised not in _VALID_DEFAULT_MODES:
        logging.warning(
            "auxiliary.session_search.default_mode=%r is not one of %s — falling back to %r. "
            "(guided requires anchors and cannot be a default.)",
            raw, _VALID_DEFAULT_MODES, _FALLBACK_DEFAULT_MODE,
        )
        return _FALLBACK_DEFAULT_MODE
    return normalised
 # Process-level cache so repeated session_search calls don't re-read YAML.
 # Cleared by tests via _resolve_user_default_mode.cache_clear() when needed.
 import functools  # noqa: E402  — local to the cache wrap
 _resolve_user_default_mode = functools.lru_cache(maxsize=1)(_resolve_user_default_mode)
 MAX_SUMMARY_TOKENS = 10000
@@ -197,8 +250,16 @@ def _truncate_around_matches(
 async def _summarize_session(
    conversation_text: str, query: str, session_meta: Dict[str, Any]
-) -> Optional[str]:
+) -> tuple[Optional[str], Optional[Dict[str, Any]]]:
-    """Summarize a single session conversation focused on the search query."""
+    """Summarize a single session conversation focused on the search query.
    Returns ``(content, usage)`` where ``usage`` is a dict with
    ``{model, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens}``
    parsed from the aux LLM response, or ``None`` when the model didn't surface
    usage data. The usage dict lets callers attribute the cost of summary-mode
    aux calls back to the parent session — without this, summary-mode spend is
    invisible to per-session accounting.
    """
    system_prompt = (
        "You are reviewing a past conversation transcript to help recall what happened. "
        "Summarize the conversation with a focus on the search topic. Include:\n"
@@ -235,17 +296,18 @@ async def _summarize_session(
                max_tokens=MAX_SUMMARY_TOKENS,
            )
            content = extract_content_or_reasoning(response)
            usage = _extract_aux_usage(response)
            if content:
-                return content
+                return content, usage
            # Reasoning-only / empty — let the retry loop handle it
            logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
            if attempt < max_retries - 1:
                await asyncio.sleep(1 * (attempt + 1))
                continue
-            return content
+            return content, usage
        except RuntimeError:
            logging.warning("No auxiliary model available for session summarization")
-            return None
+            return None, None
        except Exception as e:
            if attempt < max_retries - 1:
                await asyncio.sleep(1 * (attempt + 1))
@@ -256,7 +318,48 @@ async def _summarize_session(
                    e,
                    exc_info=True,
                )
-                return None
+                return None, None
 def _extract_aux_usage(response: Any) -> Optional[Dict[str, Any]]:
    """Pull usage data off an aux LLM response, normalising provider variants.
    Returns ``None`` when the response carries no usage info (test mocks,
    providers that don't surface it). Returns a dict with the fields we care
    about for cost attribution otherwise. Reads both OpenAI-style
    (``prompt_tokens``/``completion_tokens``) and Anthropic-style
    (``input_tokens``/``output_tokens``) usage shapes.
    """
    usage = getattr(response, "usage", None)
    if not usage:
        return None
    # Provider variants — read whichever is populated.
    input_tokens = (
        getattr(usage, "input_tokens", None)
        or getattr(usage, "prompt_tokens", None)
        or 0
    )
    output_tokens = (
        getattr(usage, "output_tokens", None)
        or getattr(usage, "completion_tokens", None)
        or 0
    )
    # Anthropic prompt-caching fields.
    cache_read = getattr(usage, "cache_read_input_tokens", None) or 0
    cache_create = getattr(usage, "cache_creation_input_tokens", None) or 0
    # OpenAI-style cached tokens may live under prompt_tokens_details.
    if not cache_read:
        details = getattr(usage, "prompt_tokens_details", None)
        if details:
            cache_read = getattr(details, "cached_tokens", 0) or 0
    model = getattr(response, "model", None)
    return {
        "model": model,
        "input_tokens": int(input_tokens or 0),
        "output_tokens": int(output_tokens or 0),
        "cache_read_tokens": int(cache_read or 0),
        "cache_creation_tokens": int(cache_create or 0),
    }
 # Sources that are excluded from session browsing/searching by default.
@@ -322,19 +425,380 @@ def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str
        return tool_error(f"Failed to list recent sessions: {e}", success=False)
 def _guided_drill_down(
    db,
    session_id: str,
    around_message_id,
    window: int,
    current_session_id: str = None,
    anchors: Optional[List[Dict[str, Any]]] = None,
 ) -> str:
    """Anchored drill-down for ``mode='guided'`` of ``session_search``.
    Returns a JSON string carrying one or more windows of messages — each
    centred on a specific message id in a specific session. No FTS5, no
    auxiliary LLM, no 100k-char truncation — N indexed DB lookups (where
    N = number of anchors).
    Two input shapes (use one):
      * **Single anchor** (back-compat): pass ``session_id`` and
        ``around_message_id`` directly. Internally normalised to a single-
        element ``anchors`` list. Response always carries ``windows``
        as a list, plus the legacy single-anchor fields at the top level
        when there's exactly one anchor.
      * **Multi-anchor**: pass ``anchors=[{"session_id":..., "around_message_id":...}, ...]``.
        The agent picks the most promising K hits from a wider fast call
        and drills into all of them at once — same conversation in the
        steering loop, more context per turn.
    Each anchor is validated independently. Per-anchor failures (missing
    session, anchor not in session, current-lineage rejection) become
    error entries inside the response's ``windows`` list rather than
    aborting the whole call. ``window`` is shared across all anchors
    and clamped to ``[1, 20]`` (silent, matches the existing limit-clamp
    pattern).
    """
    # 1. Normalise inputs into a single ``anchors`` list. Three shapes:
    #    (a) anchors= parameter is set (preferred for multi-anchor)
    #    (b) session_id + around_message_id (single-anchor back-compat)
    #    (c) neither set → user-facing error
    if anchors:
        if not isinstance(anchors, list):
            return tool_error(
                "guided mode: 'anchors' must be a list of {session_id, around_message_id} dicts",
                success=False,
            )
        normalised_anchors = anchors
    elif session_id or around_message_id is not None:
        normalised_anchors = [{
            "session_id": session_id,
            "around_message_id": around_message_id,
        }]
    else:
        return tool_error(
            "guided mode requires either anchors=[...] or session_id+around_message_id "
            "(use match_message_id+session_id from a prior fast-mode hit)",
            success=False,
        )
    if len(normalised_anchors) == 0:
        return tool_error(
            "guided mode: anchors list is empty (pass at least one {session_id, around_message_id})",
            success=False,
        )
    # 2. Window clamp (shared across all anchors). Matches the existing
    #    limit-clamp pattern (silent).
    if not isinstance(window, int):
        try:
            window = int(window)
        except (TypeError, ValueError):
            window = 5
    window = max(1, min(window, 20))
    # 3. Helper: resolve to lineage root (used by the current-lineage
    #    rejection check below).
    def _resolve_to_parent(sid: str) -> str:
        visited = set()
        cur = sid
        while cur and cur not in visited:
            visited.add(cur)
            try:
                meta = db.get_session(cur)
                if not meta:
                    break
                parent = meta.get("parent_session_id")
                if parent:
                    cur = parent
                else:
                    break
            except Exception as e:
                logging.debug("Error resolving parent for %s: %s", cur, e, exc_info=True)
                break
        return cur
    current_root = _resolve_to_parent(current_session_id) if current_session_id else None
    # 4. Drill into each anchor. Per-anchor errors are recorded inline
    #    rather than aborting the whole call — the agent can still use
    #    successful drills even if one anchor was malformed.
    windows_out: List[Dict[str, Any]] = []
    for raw_anchor in normalised_anchors:
        if not isinstance(raw_anchor, dict):
            windows_out.append({
                "success": False,
                "error": "anchor must be a dict with session_id + around_message_id",
            })
            continue
        a_sid = raw_anchor.get("session_id")
        a_msg = raw_anchor.get("around_message_id")
        if not a_sid or not isinstance(a_sid, str) or not a_sid.strip():
            windows_out.append({
                "success": False,
                "error": "anchor missing session_id",
                "anchor": raw_anchor,
            })
            continue
        a_sid = a_sid.strip()
        try:
            a_msg_id = int(a_msg)
        except (TypeError, ValueError):
            windows_out.append({
                "success": False,
                "error": "anchor missing or non-integer around_message_id",
                "anchor": raw_anchor,
            })
            continue
        # Current-lineage rejection: per-anchor, so other valid anchors
        # in a multi-anchor call still drill.
        if current_root:
            target_root = _resolve_to_parent(a_sid)
            if target_root and target_root == current_root:
                windows_out.append({
                    "success": False,
                    "error": "anchor rejects drill-down into the current session lineage — those messages are already in your active context",
                    "session_id": a_sid,
                    "around_message_id": a_msg_id,
                })
                continue
        # Session existence check.
        try:
            session_meta = db.get_session(a_sid) or {}
        except Exception as e:
            logging.debug("get_session failed for %s: %s", a_sid, e, exc_info=True)
            session_meta = {}
        if not session_meta:
            windows_out.append({
                "success": False,
                "error": f"session_id not found: {a_sid}",
                "session_id": a_sid,
                "around_message_id": a_msg_id,
            })
            continue
        # Fetch the window + bookends. ``get_anchored_view`` filters tool-response
        # noise from the window (anchor itself is preserved regardless of role)
        # and returns up to ``bookend`` user/assistant messages from the session
        # head and tail — but only when those slices don't overlap the window.
        # See SessionDB.get_anchored_view for the contract.
        try:
            view = db.get_anchored_view(a_sid, a_msg_id, window=window, bookend=3)
            messages = view.get("window") or []
            bookend_start = view.get("bookend_start") or []
            bookend_end = view.get("bookend_end") or []
        except Exception as e:
            logging.debug("get_anchored_view failed: %s", e, exc_info=True)
            windows_out.append({
                "success": False,
                "error": f"failed to load messages around {a_msg_id} in {a_sid}: {e}",
                "session_id": a_sid,
                "around_message_id": a_msg_id,
            })
            continue
        # Safety net: the agent (or memory, or a legacy caller) may pair a
        # parent/lineage-root session_id with a message_id that actually
        # lives in a descendant (child) session. Before this commit, fast
        # mode returned exactly that broken pair. We now emit the matching
        # raw sid in fast mode, but guided should remain forgiving for
        # callers that haven't updated yet.
        #
        # Recovery rule: locate the real owning session by message id; if
        # that session is in the same lineage as ``a_sid``, transparently
        # rebind and refetch. Record a warning so the rebind is visible.
        rebind_warning = None
        if not messages:
            owning = None
            # Prefer a helper if SessionDB exposes one (forward-compat).
            try:
                if hasattr(db, "get_session_id_for_message"):
                    owning = db.get_session_id_for_message(a_msg_id)
            except Exception as e:
                logging.debug("get_session_id_for_message failed: %s", e, exc_info=True)
                owning = None
            # Fallback: query through SessionDB._conn (the canonical connection).
            if not owning:
                try:
                    conn = getattr(db, "_conn", None)
                    if conn is not None:
                        row = conn.execute(
                            "SELECT session_id FROM messages WHERE id = ?",
                            (a_msg_id,),
                        ).fetchone()
                        # sqlite3.Row supports indexing; tuple fallback works too.
                        owning = row[0] if row else None
                except Exception as e:
                    logging.debug("owning-session lookup failed: %s", e, exc_info=True)
                    owning = None
            if owning and owning != a_sid:
                # Check same lineage (walk both up to roots).
                a_root = _resolve_to_parent(a_sid)
                o_root = _resolve_to_parent(owning)
                if a_root and o_root and a_root == o_root:
                    try:
                        rebind_view = db.get_anchored_view(
                            owning, a_msg_id, window=window, bookend=3
                        )
                        messages = rebind_view.get("window") or []
                        bookend_start = rebind_view.get("bookend_start") or []
                        bookend_end = rebind_view.get("bookend_end") or []
                    except Exception as e:
                        logging.debug("rebind get_anchored_view failed: %s", e, exc_info=True)
                        messages = []
                    if messages:
                        rebind_warning = (
                            f"around_message_id {a_msg_id} lives in {owning} "
                            f"(child of {a_sid}); rebound transparently"
                        )
                        # Re-fetch session_meta for the actual owning session.
                        try:
                            session_meta = db.get_session(owning) or session_meta
                        except Exception:
                            pass
                        a_sid = owning
        if not messages:
            windows_out.append({
                "success": False,
                "error": f"around_message_id {a_msg_id} not in session_id {a_sid}",
                "session_id": a_sid,
                "around_message_id": a_msg_id,
            })
            continue
        # Wrap with anchor flag + boundary counts.
        out_messages = []
        messages_before = 0
        messages_after = 0
        for m in messages:
            is_anchor = m.get("id") == a_msg_id
            if not is_anchor and m.get("id", 0) < a_msg_id:
                messages_before += 1
            elif not is_anchor:
                messages_after += 1
            entry = {
                "id": m.get("id"),
                "role": m.get("role"),
                "content": m.get("content"),
                "tool_name": m.get("tool_name"),
                "tool_calls": m.get("tool_calls") or None,
                "tool_call_id": m.get("tool_call_id"),
                "timestamp": m.get("timestamp"),
            }
            if is_anchor:
                entry["anchor"] = True
            # Strip None-valued optional fields to keep payload tight (keep
            # 'content' even if None, since absent-content is meaningful).
            entry = {k: v for k, v in entry.items() if v is not None or k in ("content",)}
            out_messages.append(entry)
        def _shape_bookend(m: Dict[str, Any]) -> Dict[str, Any]:
            entry = {
                "id": m.get("id"),
                "role": m.get("role"),
                "content": m.get("content"),
                "timestamp": m.get("timestamp"),
            }
            return {k: v for k, v in entry.items() if v is not None or k in ("content",)}
        out_bookend_start = [_shape_bookend(m) for m in bookend_start]
        out_bookend_end = [_shape_bookend(m) for m in bookend_end]
        success_entry = {
            "success": True,
            "session_id": a_sid,
            "around_message_id": a_msg_id,
            "session_meta": {
                "when": _format_timestamp(session_meta.get("started_at")),
                "source": session_meta.get("source"),
                "model": session_meta.get("model"),
                "title": session_meta.get("title"),
            },
            "messages": out_messages,
            "messages_before": messages_before,
            "messages_after": messages_after,
            "bookend_start": out_bookend_start,
            "bookend_end": out_bookend_end,
        }
        if rebind_warning:
            success_entry["warning"] = rebind_warning
        windows_out.append(success_entry)
    # 5. Top-level response shape. ``windows`` is always a list. For
    #    single-anchor calls (the common case), we mirror the legacy fields
    #    at the top level so existing callers / tests continue to work
    #    without branching on len(windows).
    response: Dict[str, Any] = {
        "success": True,
        "mode": "guided",
        "window": window,
        "windows": windows_out,
        "anchor_count": len(windows_out),
    }
    if len(windows_out) == 1:
        only = windows_out[0]
        if only.get("success"):
            response.update({
                "session_id": only["session_id"],
                "around_message_id": only["around_message_id"],
                "session_meta": only["session_meta"],
                "messages": only["messages"],
                "messages_before": only["messages_before"],
                "messages_after": only["messages_after"],
                "bookend_start": only.get("bookend_start", []),
                "bookend_end": only.get("bookend_end", []),
            })
            if only.get("warning"):
                response["warning"] = only["warning"]
        else:
            # Single-anchor failure: surface as a top-level tool_error so
            # callers don't have to dig into the windows array for the
            # error string. Keeps the legacy single-anchor failure shape.
            return tool_error(only.get("error", "guided drill-down failed"), success=False)
    return json.dumps(response, ensure_ascii=False)
 def session_search(
-    query: str,
+    query: str = "",
    role_filter: str = None,
    limit: int = 3,
    db=None,
    current_session_id: str = None,
    mode: str = None,
    # Guided-mode-only parameters: anchored drill-down into one or more
    # session+message pairs. Required when mode='guided', ignored otherwise.
    # Use either the single-anchor pair (session_id + around_message_id) or
    # the multi-anchor list (anchors=[{session_id, around_message_id}, ...]).
    session_id: str = None,
    around_message_id: int = None,
    window: int = 5,
    anchors: list = None,
    # Fast-mode-only temporal bias for ranking. ``None`` keeps FTS5's BM25
    # ordering (time-neutral); ``"newest"`` / ``"oldest"`` make timestamp
    # the primary key with rank as the tiebreaker. Silently ignored in
    # other modes — see schema description.
    sort: str = None,
 ) -> str:
    """
-    Search past sessions and return focused summaries of matching conversations.
+    Search past sessions, or drill into a specific one.
-    Uses FTS5 to find matches, then summarizes the top sessions with the
+    Modes:
-    configured auxiliary session_search model.
+      * fast    — FTS5 snippets + ±1 message context. Cheap discovery.
-    The current session is excluded from results since the agent already has that context.
+      * summary — fetch full session(s), truncate to 100k chars, run aux LLM
                  recap. Cross-session synthesis at ~30s tool-side cost.
      * guided  — anchored drill-down. Caller supplies session_id +
                  around_message_id (typically from a prior fast hit's
                  match_message_id field) and gets a window of messages
                  around the anchor with no LLM call and no truncation.
    """
    if db is None:
        try:
@@ -346,6 +810,52 @@ def session_search(
            from hermes_state import format_session_db_unavailable
            return tool_error(format_session_db_unavailable(), success=False)
    # Mode normalisation. ``None`` / empty string / non-string → fall back to
    # the user's configured default (via ~/.hermes/config.yaml, see
    # ``_resolve_user_default_mode``). Defaults to "fast" if unset. An explicit
    # "fast" / "summary" / "guided" wins regardless of config. An unknown
    # string also falls back to the resolved user default rather than silently
    # coercing to a hard-coded mode — silent coercion of typos would otherwise
    # mask user errors.
    if not isinstance(mode, str) or not mode.strip():
        mode = _resolve_user_default_mode()
    else:
        mode = mode.strip().lower()
    if mode in ("summarized", "summarise", "summarize", "deep"):
        mode = "summary"
    if mode in ("drill", "drilldown", "drill-down", "anchor", "around"):
        mode = "guided"
    if mode not in ("fast", "summary", "guided"):
        mode = _resolve_user_default_mode()
    # Normalise sort — only "newest"/"oldest" are accepted; anything else
    # collapses to None (FTS5 rank-only). Sort affects fast mode only; logged
    # and ignored elsewhere so misuse is visible but non-fatal.
    sort_norm: Optional[str] = None
    if isinstance(sort, str):
        candidate = sort.strip().lower()
        if candidate in ("newest", "oldest"):
            sort_norm = candidate
    if sort_norm and mode != "fast":
        logging.debug(
            "session_search: sort=%r is fast-mode only; ignored for mode=%s",
            sort_norm, mode,
        )
        sort_norm = None
    # Guided mode is a different shape: it doesn't search, it drills. Branch
    # before FTS5 so we don't pay for anything we don't use, and so missing-arg
    # validation happens up front.
    if mode == "guided":
        return _guided_drill_down(
            db=db,
            session_id=session_id,
            around_message_id=around_message_id,
            window=window,
            current_session_id=current_session_id,
            anchors=anchors,
        )
    # Defensive: models (especially open-source) may send non-int limit values
    # (None when JSON null, string "int", or even a type object).  Coerce to a
    # safe integer before any arithmetic/comparison to prevent TypeError.
@@ -354,7 +864,7 @@ def session_search(
            limit = int(limit)
        except (TypeError, ValueError):
            limit = 3
-    limit = max(1, min(limit, 5))  # Clamp to [1, 5]
+    limit = max(1, min(limit, 10))  # Clamp to [1, 10]
    # Recent sessions mode: when query is empty, return metadata for recent sessions.
    # No LLM calls — just DB queries for titles, previews, timestamps.
@@ -364,23 +874,30 @@ def session_search(
    query = query.strip()
    try:
-        # Parse role filter
+        # Parse role filter. Defaults to user+assistant; tool messages are
        # usually noisy and rarely the signal. Caller opts back in via
        # role_filter='user,assistant,tool' or 'tool'.
        role_list = None
        if role_filter and role_filter.strip():
            role_list = [r.strip() for r in role_filter.split(",") if r.strip()]
        else:
            role_list = ["user", "assistant"]
-        # FTS5 search -- get matches ranked by relevance
+        # FTS5 search -- get matches ranked by relevance (with optional
        # temporal bias when sort is set; see param docs).
        raw_results = db.search_messages(
            query=query,
            role_filter=role_list,
            exclude_sources=list(_HIDDEN_SESSION_SOURCES),
            limit=50,  # Get more matches to find unique sessions
            offset=0,
            sort=sort_norm,
        )
        if not raw_results:
            return json.dumps({
                "success": True,
                "mode": mode,
                "query": query,
                "results": [],
                "count": 0,
@@ -421,6 +938,13 @@ def session_search(
        # Group by resolved (parent) session_id, dedup, skip the current
        # session lineage. Compression and delegation create child sessions
        # that still belong to the same active conversation.
        #
        # IMPORTANT: group BY parent (one entry per conversation lineage), but
        # preserve the raw FTS5 session_id on the surviving result. Only the
        # raw sid pairs validly with ``match_message_id``; rewriting it to the
        # parent produces a {parent_sid, child_message_id} handle that guided
        # mode cannot resolve. ``parent_session_id`` is exposed separately for
        # the lineage-root link the user expects to see.
        seen_sessions = {}
        for result in raw_results:
            raw_sid = result["session_id"]
@@ -433,11 +957,61 @@ def session_search(
                continue
            if resolved_sid not in seen_sessions:
                result = dict(result)
-                result["session_id"] = resolved_sid
+                # Keep raw_sid as session_id; expose lineage root separately.
                result["session_id"] = raw_sid
                if resolved_sid and resolved_sid != raw_sid:
                    result["parent_session_id"] = resolved_sid
                seen_sessions[resolved_sid] = result
            if len(seen_sessions) >= limit:
                break
        if mode == "fast":
            results = []
            for lineage_root, match_info in seen_sessions.items():
                # Emit (raw_sid + match_message_id) so the agent's follow-up
                # guided call has a valid {session_id, around_message_id}.
                # ``parent_session_id`` (if different) carries the lineage root.
                hit_sid = match_info.get("session_id") or lineage_root
                try:
                    session_meta = db.get_session(lineage_root) or {}
                except Exception:
                    session_meta = {}
                snippet = match_info.get("snippet") or ""
                context = match_info.get("context") or []
                if not isinstance(context, list):
                    context = []
                entry = {
                    "session_id": hit_sid,
                    "when": _format_timestamp(
                        session_meta.get("started_at") or match_info.get("session_started")
                    ),
                    "source": session_meta.get("source") or match_info.get("source", "unknown"),
                    "model": session_meta.get("model") or match_info.get("model") or "unknown",
                    "matched_role": match_info.get("role"),
                    "match_message_id": match_info.get("id"),
                    "title": session_meta.get("title") or None,
                    "snippet": snippet,
                    "context": context,
                    "summary": "[Search hit — summary not generated in fast mode] Use snippet/context fields, or set mode='summary' for LLM-generated recall.",
                }
                # Only emit parent_session_id when the FTS5 row lives in a
                # child of the displayed lineage — keeps the common case
                # (no delegation/compression) tidy.
                parent_sid = match_info.get("parent_session_id")
                if parent_sid and parent_sid != hit_sid:
                    entry["parent_session_id"] = parent_sid
                results.append(entry)
            return json.dumps({
                "success": True,
                "mode": "fast",
                "query": query,
                "results": results,
                "count": len(results),
                "sessions_searched": len(seen_sessions),
                "message": "Fast search returned FTS snippets without LLM summarization. Use mode='summary' for focused summaries when needed.",
            }, ensure_ascii=False)
        # Prepare all sessions for parallel summarization
        tasks = []
        for session_id, match_info in seen_sessions.items():
@@ -458,12 +1032,12 @@ def session_search(
                )
        # Summarize all sessions in parallel
-        async def _summarize_all() -> List[Union[str, Exception]]:
+        async def _summarize_all() -> List[Union[tuple, Exception]]:
            """Summarize all sessions with bounded concurrency."""
            max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
            semaphore = asyncio.Semaphore(max_concurrency)
-            async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]:
+            async def _bounded_summary(text: str, meta: Dict[str, Any]):
                async with semaphore:
                    return await _summarize_session(text, query, meta)
@@ -493,13 +1067,27 @@ def session_search(
            }, ensure_ascii=False)
        summaries = []
        aux_total = {
            "model": None,
            "input_tokens": 0,
            "output_tokens": 0,
            "cache_read_tokens": 0,
            "cache_creation_tokens": 0,
            "call_count": 0,
        }
        for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results):
            usage: Optional[Dict[str, Any]] = None
            if isinstance(result, Exception):
                logging.warning(
                    "Failed to summarize session %s: %s",
                    session_id, result, exc_info=True,
                )
-                result = None
+                summary_text = None
            elif isinstance(result, tuple):
                summary_text, usage = result
            else:
                # Defensive: a future code path might still return a bare string.
                summary_text, usage = result, None
            # Prefer resolved parent session metadata over FTS5 match metadata.
            # match_info carries source/model from the *child* session that contained
@@ -515,23 +1103,39 @@ def session_search(
                "model": session_meta.get("model") or match_info.get("model"),
            }
-            if result:
+            if summary_text:
-                entry["summary"] = result
+                entry["summary"] = summary_text
            else:
                # Fallback: raw preview so matched sessions aren't silently
                # dropped when the summarizer is unavailable (fixes #3409).
                preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available."
                entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}"
            if usage:
                entry["aux_usage"] = usage
                aux_total["model"] = aux_total["model"] or usage.get("model")
                aux_total["input_tokens"] += usage["input_tokens"]
                aux_total["output_tokens"] += usage["output_tokens"]
                aux_total["cache_read_tokens"] += usage["cache_read_tokens"]
                aux_total["cache_creation_tokens"] += usage["cache_creation_tokens"]
                aux_total["call_count"] += 1
            summaries.append(entry)
-        return json.dumps({
+        payload = {
            "success": True,
            "mode": "summary",
            "query": query,
            "results": summaries,
            "count": len(summaries),
            "sessions_searched": len(seen_sessions),
-        }, ensure_ascii=False)
+        }
        # Only surface aux_usage_total when we actually captured any (test mocks
        # and providers that don't report usage produce an all-zero/empty dict —
        # don't pollute the payload in that case).
        if aux_total["call_count"]:
            payload["aux_usage_total"] = aux_total
        return json.dumps(payload, ensure_ascii=False)
    except Exception as e:
        logging.error("Session search failed: %s", e, exc_info=True)
@@ -539,7 +1143,7 @@ def session_search(
 def check_session_search_requirements() -> bool:
-    """Requires SQLite state database and an auxiliary text model."""
+    """Requires SQLite state database; summary mode also needs an auxiliary model."""
    try:
        from hermes_state import DEFAULT_DB_PATH
        return DEFAULT_DB_PATH.parent.exists()
@@ -550,44 +1154,101 @@ def check_session_search_requirements() -> bool:
 SESSION_SEARCH_SCHEMA = {
    "name": "session_search",
    "description": (
-        "Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- "
+        "Search past sessions stored in the local session DB. Three modes plus a default "
-        "every past session is searchable, and this tool summarizes what happened.\n\n"
+        "browsing mode when no arguments are passed. All three modes operate on the same "
-        "TWO MODES:\n"
+        "FTS5-indexed message store; they differ in what they return and at what cost.\n\n"
-        "1. Recent sessions (no query): Call with no arguments to see what was worked on recently. "
+        "MODES\n\n"
-        "Returns titles, previews, and timestamps. Zero LLM cost, instant. "
+        "  • mode='fast' — FTS5 snippets across matched sessions. No LLM call. Returns one "
-        "Start here when the user asks what were we working on or what did we do recently.\n"
+        "entry per matched session with session_id, match_message_id, a one-message context "
-        "2. Keyword search (with query): Search for specific topics across all past sessions. "
+        "window, and metadata. Use this as the starting move for any recall question — "
-        "Returns LLM-generated summaries of matching sessions.\n\n"
+        "discovery and state reconstruction both. The match_message_id is the anchor you "
-        "USE THIS PROACTIVELY when:\n"
+        "pass to guided.\n\n"
-        "- The user says 'we did this before', 'remember when', 'last time', 'as I mentioned'\n"
+        "  • mode='guided' — REQUIRES anchors from a prior fast call. Returns a window of "
-        "- The user asks about a topic you worked on before but don't have in current context\n"
+        "raw messages around each anchor plus session bookends (bookend_start, bookend_end). "
-        "- The user references a project, person, or concept that seems familiar but isn't in memory\n"
+        "No LLM call, no truncation. Single or multi-anchor: pass "
-        "- You want to check if you've solved a similar problem before\n"
+        "``anchors=[{session_id, around_message_id}, ...]``. Each anchor returns its own "
-        "- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
+        "window in the response's ``windows`` array. Bookends are the first/last "
-        "Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
+        "user+assistant messages of the session, empty when the window already overlaps "
-        "Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
+        "the session head/tail. Tool messages are filtered from the window (the anchor "
-        "Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
+        "itself is preserved even if role='tool').\n\n"
-        "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
+        "  • mode='summary' — LLM-generated prose synthesis across matched sessions. Issues "
-        "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
+        "one auxiliary-model call per session in the hit list, so cost scales with whatever "
-        "sessions that only mention some terms. If a broad OR query returns nothing, try individual "
+        "auxiliary model (or main model fallback) is configured. Returns aux token usage in "
-        "keyword searches in parallel. Returns summaries of the top matching sessions."
+        "the response (``aux_usage`` per call, ``aux_usage_total`` per batch). Reach for "
        "this when you genuinely need cross-session prose synthesis in one shot.\n\n"
        "  • No query, no mode — browses recent sessions chronologically. Returns titles, "
        "previews, timestamps. No LLM call.\n\n"
        "DEFAULT MODE\n\n"
        "  When ``mode=`` is unset, the resolver checks ``auxiliary.session_search.default_mode`` "
        "in ~/.hermes/config.yaml (accepted values: ``fast`` | ``summary``). If the user "
        "has set a default, honour it on the first call. With no config, the default is "
        "``fast``. An explicit ``mode=`` argument always wins.\n\n"
        "ANCHOR CONTRACT\n\n"
        "  An anchor is the pair (session_id, around_message_id). The session_id MUST be "
        "the raw owning session of around_message_id — guided rejects anchors where the "
        "message_id does not exist in the named session. Fast results return both "
        "session_id (raw owning) and parent_session_id (when different, for display "
        "context only). Pair session_id with match_message_id from the same fast hit; do "
        "not substitute parent_session_id.\n\n"
        "FTS5 SYNTAX\n\n"
        "  FTS5 defaults to AND across terms — multi-word queries require all terms to "
        "match. Use OR explicitly for broader recall (``alpha OR beta OR gamma``), quoted "
        "phrases for exact match (``\"docker networking\"``), boolean (``python NOT java``), "
        "or prefix wildcards (``deploy*``).\n\n"
        "WHEN TO USE\n\n"
        "  Before reaching for ``gh``, web search, or filesystem inspection on questions "
        "about prior work — what was discussed, what was decided, where an artefact was "
        "created. The session DB carries what was said when; external tools show current "
        "world state."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
-                "description": "Search query — keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead (returns titles, previews, timestamps with no LLM cost).",
+                "description": "Search query (modes 'fast' and 'summary'). Keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead. Ignored when mode='guided'.",
            },
            "role_filter": {
                "type": "string",
-                "description": "Optional: only search messages from specific roles (comma-separated). E.g. 'user,assistant' to skip tool outputs.",
+                "description": "Optional: only search messages from specific roles (comma-separated). Defaults to 'user,assistant' for fast/summary modes — tool messages are usually noisy (large outputs, serialised tool calls). Pass 'user,assistant,tool' to include tool output (debugging tool behaviour) or 'tool' to search tool output only. Ignored when mode='guided'.",
            },
            "limit": {
                "type": "integer",
-                "description": "Max sessions to summarize (default: 3, max: 5).",
+                "description": "Max sessions to return (default: 3, max: 10). Bump higher (5–10) when the user wants to be in the retrieval loop and pick the right anchor for a guided drill-down. Ignored when mode='guided' (which returns one anchored window per anchor).",
                "default": 3,
            },
            "mode": {
                "type": "string",
                "enum": ["fast", "summary", "guided"],
                "description": (
                    "fast — FTS5 snippets, no LLM. Default. "
                    "guided — requires anchors from a prior fast call; returns raw message window per anchor. "
                    "summary — LLM synthesis across matched sessions; opt-in, costs per aux-model call."
                ),
                "default": "fast",
            },
            "anchors": {
                "type": "array",
                "description": "Required for mode='guided'. List of {session_id, around_message_id} dicts to drill into. Copy session_id and match_message_id verbatim from prior fast-mode results — they pair as a single self-consistent handle. Do NOT substitute parent_session_id (shown for display context only; pairs incorrectly with match_message_id). One anchor is fine when the topic lives in a single session; for multi-session catch-up (topic touched across several recent sessions), pass the top 2–3 fast hits as separate anchors in ONE call — each gets its own window + bookends in the response's 'windows' array.",
                "items": {
                    "type": "object",
                    "properties": {
                        "session_id": {"type": "string"},
                        "around_message_id": {"type": "integer"},
                    },
                    "required": ["session_id", "around_message_id"],
                },
            },
            "window": {
                "type": "integer",
                "description": "Mode='guided' only. Number of messages to return on each side of each anchor (the anchor itself is always included). Shared across all anchors in a multi-anchor call. Clamped to [1, 20]. Default 5.",
                "default": 5,
            },
            "sort": {
                "type": "string",
                "enum": ["newest", "oldest"],
                "description": "Mode='fast' only. Temporal bias on top of FTS5 ranking. Omit to keep relevance-only ordering (the default, suitable for exploratory recall — 'what do we know about X'). Set 'newest' for recency-shaped questions ('where did we leave X', 'latest status of Y') so recent matches surface first with rank as the tiebreaker. Set 'oldest' for origin-shaped questions ('how did X start', 'first time we discussed Y') so the earliest matches surface first. Silently ignored in summary / guided / recent modes — for temporal narrative across sessions, drive fast with sort, then drill the right anchors with guided.",
            },
        },
        "required": [],
    },
@@ -605,6 +1266,12 @@ registry.register(
        query=args.get("query") or "",
        role_filter=args.get("role_filter"),
        limit=args.get("limit", 3),
        mode=args.get("mode"),
        session_id=args.get("session_id"),
        around_message_id=args.get("around_message_id"),
        window=args.get("window", 5),
        anchors=args.get("anchors"),
        sort=args.get("sort"),
        db=kw.get("db"),
        current_session_id=kw.get("current_session_id")),
    check_fn=check_session_search_requirements,