style(session_search): tighten verbose inline comments

Pass over comments added during the iterative development of this PR, trimming where they restated the code, repeated themselves, or read as journal-style narration. Net -22 comment lines; behaviour unchanged, 123 tests still passing. Notable trims: - DEFAULT_CONFIG module header: 9 lines → 4. Dropped the 'auxiliary started as aux-LLM routing but in practice groups per-tool config' digression — irrelevant to readers of this module. - get_anchored_view bookend-SQL filter block: 8 lines → 5. The 'let me check…-shaped assistant messages' over-narration is gone; the SQL filter rationale survives. - Fast-mode lineage-grouping IMPORTANT block: 12 lines → 8. The '#regression introduced by the original match_message_id rollout' meta-note removed (the comment now states the contract directly). - Fast-mode result-emission comment: 8 lines → 3. The 'lineage_root is the dict key…' explanation was restating the variables; the load-bearing one-liner (emit raw_sid + match_message_id) stays. - sort normalisation comment: 4 lines → 3. - role_filter parse comment: 5 lines → 3. - ORDER BY comment in search_messages: 3 lines → 2. - LIKE fallback ordering comment: 4 lines → 2.
docs(session_search): document default_mode in cli-config.yaml.example
2026-06-10 12:18:44 +08:00 · 2026-05-15 18:31:21 +02:00 · 2026-05-15 16:48:34 +02:00 · 2026-05-15 16:43:52 +02:00 · 2026-05-15 16:34:08 +02:00 · 2026-05-15 16:30:12 +02:00
12 changed files with 3075 additions and 65 deletions
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -444,6 +444,10 @@ prompt_caching:
 #     model: ""
 #     timeout: 30
 #     max_concurrency: 3    # Limit parallel summaries to reduce request-burst 429s
+#     default_mode: "fast"  # 'fast' | 'summary' — mode used when caller passes none.
+#                           # fast: FTS5 snippet hits, no LLM call. Default.
+#                           # summary: LLM-generated prose synthesis across hits.
+#                           # guided requires anchors and cannot be a default.
 #     extra_body: {}        # Provider-specific OpenAI-compatible request fields
 #                           # Example for providers that support request-body
 #                           # reasoning controls:
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -846,6 +846,7 @@ DEFAULT_CONFIG = {
            "timeout": 30,
            "extra_body": {},
            "max_concurrency": 3,  # Clamp parallel summaries to avoid request-burst 429s on small providers
+            "default_mode": "fast",  # 'fast' | 'summary' — which mode session_search uses when caller passes none
        },
        "skills_hub": {
            "provider": "auto",
--- a/hermes_state.py
+++ b/hermes_state.py
@@ -25,7 +25,7 @@ from pathlib import Path

 from agent.memory_manager import sanitize_context
 from hermes_constants import get_hermes_home
-from typing import Any, Callable, Dict, List, Optional, TypeVar
+from typing import Any, Callable, Dict, List, Optional, Tuple, TypeVar

 logger = logging.getLogger(__name__)

@@ -1618,6 +1618,185 @@ class SessionDB:
            result.append(msg)
        return result

+    def get_messages_around(
+        self,
+        session_id: str,
+        around_message_id: int,
+        window: int = 5,
+    ) -> List[Dict[str, Any]]:
+        """Load a window of messages anchored on a specific message id.
+
+        Returns up to ``window`` messages before the anchor, the anchor itself,
+        and up to ``window`` messages after — all from the same session,
+        ordered by id ascending. Boundaries are honoured: if the anchor is
+        near the start or end of the session, fewer messages are returned on
+        the truncated side.
+
+        If ``around_message_id`` is not a message id within ``session_id``,
+        returns an empty list. Callers decide whether to surface that as an
+        error.
+
+        Used by ``session_search`` mode='guided' to provide anchored
+        drill-down into a specific session at a specific message — without
+        the cost of summarisation or the risk of 100k-char truncation.
+        """
+        if window < 0:
+            window = 0
+        with self._lock:
+            # Confirm the anchor exists in this session — cheap guard against
+            # cross-session contamination if a caller mixes up session/message
+            # ids.
+            anchor_exists = self._conn.execute(
+                "SELECT 1 FROM messages WHERE id = ? AND session_id = ? LIMIT 1",
+                (around_message_id, session_id),
+            ).fetchone()
+            if not anchor_exists:
+                return []
+
+            # Two queries: anchor + before (DESC, take window+1), and after
+            # (ASC, take window). Final order is id ASC.
+            before_rows = self._conn.execute(
+                "SELECT * FROM messages "
+                "WHERE session_id = ? AND id <= ? "
+                "ORDER BY id DESC LIMIT ?",
+                (session_id, around_message_id, window + 1),
+            ).fetchall()
+            after_rows = self._conn.execute(
+                "SELECT * FROM messages "
+                "WHERE session_id = ? AND id > ? "
+                "ORDER BY id ASC LIMIT ?",
+                (session_id, around_message_id, window),
+            ).fetchall()
+
+        # before_rows is DESC; reverse so it's ASC, then concatenate after_rows.
+        rows = list(reversed(before_rows)) + list(after_rows)
+        result = []
+        for row in rows:
+            msg = dict(row)
+            if "content" in msg:
+                msg["content"] = self._decode_content(msg["content"])
+            if msg.get("tool_calls"):
+                try:
+                    msg["tool_calls"] = json.loads(msg["tool_calls"])
+                except (json.JSONDecodeError, TypeError):
+                    logger.warning(
+                        "Failed to deserialize tool_calls in get_messages_around, falling back to []"
+                    )
+                    msg["tool_calls"] = []
+            result.append(msg)
+        return result
+
+    def get_anchored_view(
+        self,
+        session_id: str,
+        around_message_id: int,
+        window: int = 5,
+        bookend: int = 3,
+        keep_roles: Optional[Tuple[str, ...]] = ("user", "assistant"),
+    ) -> Dict[str, Any]:
+        """Return an anchored window plus session bookends, opinionated for guided recall.
+
+        Built on top of ``get_messages_around``:
+          - ``window``: messages immediately surrounding the anchor. Filtered to
+            ``keep_roles`` (tool-response noise dropped by default), EXCEPT the
+            anchor itself is always included regardless of role — callers may
+            have anchored on a tool message and dropping it would break the
+            contract.
+          - ``bookend_start``: first ``bookend`` messages of the session
+            (filtered to ``keep_roles``), but ONLY those whose id sits strictly
+            before the window's first message id. If the window already covers
+            the session start, ``bookend_start`` is an empty list.
+          - ``bookend_end``: last ``bookend`` messages of the session (same
+            filter + non-overlap rule applied at the tail).
+
+        Bookends exist so an FTS5 hit anywhere in a long session still yields
+        the goal (opening) and the resolution (closing) on a single guided
+        call — without the cost of fetching the whole transcript.
+
+        Returns ``{"window": []}`` (empty) when the anchor isn't in the
+        session — caller decides how to surface that.
+
+        ``keep_roles=None`` disables role filtering entirely (raw window +
+        raw bookends). Pass an explicit tuple to override the default.
+        """
+        if bookend < 0:
+            bookend = 0
+
+        # Reuse the primitive — it already handles the anchor-existence check,
+        # window clamping, content decoding, and tool_calls deserialisation.
+        window_rows = self.get_messages_around(
+            session_id, around_message_id, window=window
+        )
+        if not window_rows:
+            return {"window": [], "bookend_start": [], "bookend_end": []}
+
+        # Apply role filter to the window, but never drop the anchor itself.
+        if keep_roles is not None:
+            keep_set = set(keep_roles)
+            filtered_window = [
+                m for m in window_rows
+                if m.get("id") == around_message_id or m.get("role") in keep_set
+            ]
+        else:
+            filtered_window = window_rows
+
+        window_min_id = window_rows[0]["id"]
+        window_max_id = window_rows[-1]["id"]
+
+        # Fetch bookends only if there's space outside the window. SQL filters
+        # by id range, role, and non-empty content — tool-call-only assistant
+        # turns (content='' with tool_calls populated) are excluded so they
+        # don't crowd out the actual prose openings/closings. ``bookend=0``
+        # short-circuits both queries.
+        bookend_start_rows: List[Any] = []
+        bookend_end_rows: List[Any] = []
+        if bookend > 0:
+            with self._lock:
+                role_clause = ""
+                role_params: list = []
+                if keep_roles is not None:
+                    role_placeholders = ",".join("?" for _ in keep_roles)
+                    role_clause = f" AND role IN ({role_placeholders})"
+                    role_params = list(keep_roles)
+
+                bookend_start_rows = self._conn.execute(
+                    f"SELECT * FROM messages "
+                    f"WHERE session_id = ? AND id < ?{role_clause} "
+                    f"AND length(content) > 0 "
+                    f"ORDER BY id ASC LIMIT ?",
+                    (session_id, window_min_id, *role_params, bookend),
+                ).fetchall()
+
+                bookend_end_rows = self._conn.execute(
+                    f"SELECT * FROM messages "
+                    f"WHERE session_id = ? AND id > ?{role_clause} "
+                    f"AND length(content) > 0 "
+                    f"ORDER BY id DESC LIMIT ?",
+                    (session_id, window_max_id, *role_params, bookend),
+                ).fetchall()
+                # End rows came back DESC for the LIMIT cap; flip to ASC.
+                bookend_end_rows = list(reversed(bookend_end_rows))
+
+        def _hydrate(row) -> Dict[str, Any]:
+            msg = dict(row)
+            if "content" in msg:
+                msg["content"] = self._decode_content(msg["content"])
+            if msg.get("tool_calls"):
+                try:
+                    msg["tool_calls"] = json.loads(msg["tool_calls"])
+                except (json.JSONDecodeError, TypeError):
+                    logger.warning(
+                        "Failed to deserialize tool_calls in get_anchored_view, falling back to []"
+                    )
+                    msg["tool_calls"] = []
+            return msg
+
+        return {
+            "window": filtered_window,
+            "bookend_start": [_hydrate(r) for r in bookend_start_rows],
+            "bookend_end": [_hydrate(r) for r in bookend_end_rows],
+        }
+
    def resolve_resume_session_id(self, session_id: str) -> str:
        """Redirect a resume target to the descendant session that holds the messages.

@@ -1885,6 +2064,7 @@ class SessionDB:
        role_filter: List[str] = None,
        limit: int = 20,
        offset: int = 0,
+        sort: str = None,
    ) -> List[Dict[str, Any]]:
        """
        Full-text search across session messages using FTS5.
@@ -1897,6 +2077,19 @@ class SessionDB:

        Returns matching messages with session metadata, content snippet,
        and surrounding context (1 message before and after the match).
+
+        ``sort`` controls temporal ordering of results:
+          - ``None`` (default): FTS5 BM25 relevance only. Time-neutral, but
+            ties between equally-relevant messages are broken arbitrarily.
+          - ``"newest"``: order by message timestamp DESC, then by rank.
+            Recent matches surface first; rank breaks same-timestamp ties.
+          - ``"oldest"``: order by message timestamp ASC, then by rank.
+            For "how did this start" / "what was the original X" questions.
+
+        The LIKE fallback path (short CJK queries) ignores ``sort`` because
+        it has no rank to combine with — it already orders by timestamp DESC
+        unconditionally. The trigram CJK path honours ``sort`` like the main
+        FTS5 path.
        """
        if not query or not query.strip():
            return []
@@ -1905,6 +2098,25 @@ class SessionDB:
        if not query:
            return []

+        # Normalise sort. Anything not in the allowed set falls back to None
+        # (FTS5 rank-only) — be forgiving to callers who pass empty string or
+        # an unexpected value rather than failing the search.
+        if isinstance(sort, str):
+            sort_norm = sort.strip().lower()
+            if sort_norm not in ("newest", "oldest"):
+                sort_norm = None
+        else:
+            sort_norm = None
+
+        # ORDER BY shared by both FTS5 paths. With sort set, timestamp is
+        # primary and rank is the tiebreaker; otherwise rank alone.
+        if sort_norm == "newest":
+            order_by_sql = "ORDER BY m.timestamp DESC, rank"
+        elif sort_norm == "oldest":
+            order_by_sql = "ORDER BY m.timestamp ASC, rank"
+        else:
+            order_by_sql = "ORDER BY rank"
+
        # Build WHERE clauses dynamically
        where_clauses = ["messages_fts MATCH ?"]
        params: list = [query]
@@ -1943,7 +2155,7 @@ class SessionDB:
            JOIN messages m ON m.id = messages_fts.rowid
            JOIN sessions s ON s.id = m.session_id
            WHERE {where_sql}
-            ORDER BY rank
+            {order_by_sql}
            LIMIT ? OFFSET ?
        """

@@ -2012,7 +2224,7 @@ class SessionDB:
                    JOIN messages m ON m.id = messages_fts_trigram.rowid
                    JOIN sessions s ON s.id = m.session_id
                    WHERE {' AND '.join(tri_where)}
-                    ORDER BY rank
+                    {order_by_sql}
                    LIMIT ? OFFSET ?
                """
                tri_params.extend([limit, offset])
@@ -2051,6 +2263,13 @@ class SessionDB:
                if role_filter:
                    like_where.append(f"m.role IN ({','.join('?' for _ in role_filter)})")
                    like_params.extend(role_filter)
+                # LIKE fallback has no rank to combine with — just timestamp
+                # direction. Default/"newest" → DESC; "oldest" → ASC.
+                like_order_sql = (
+                    "ORDER BY m.timestamp ASC"
+                    if sort_norm == "oldest"
+                    else "ORDER BY m.timestamp DESC"
+                )
                like_sql = f"""
                    SELECT m.id, m.session_id, m.role,
                           substr(m.content,
@@ -2061,7 +2280,7 @@ class SessionDB:
                    FROM messages m
                    JOIN sessions s ON s.id = m.session_id
                    WHERE {' AND '.join(like_where)}
-                    ORDER BY m.timestamp DESC
+                    {like_order_sql}
                    LIMIT ? OFFSET ?
                """
                like_params.extend([limit, offset])
--- a/run_agent.py
+++ b/run_agent.py
@@ -10689,6 +10689,11 @@ class AIAgent:
                limit=function_args.get("limit", 3),
                db=session_db,
                current_session_id=self.session_id,
+                mode=function_args.get("mode"),
+                session_id=function_args.get("session_id"),
+                around_message_id=function_args.get("around_message_id"),
+                window=function_args.get("window", 5),
+                anchors=function_args.get("anchors"),
            )
        elif function_name == "memory":
            target = function_args.get("target", "memory")
@@ -11321,6 +11326,11 @@ class AIAgent:
                        limit=function_args.get("limit", 3),
                        db=session_db,
                        current_session_id=self.session_id,
+                        mode=function_args.get("mode"),
+                        session_id=function_args.get("session_id"),
+                        around_message_id=function_args.get("around_message_id"),
+                        window=function_args.get("window", 5),
+                        anchors=function_args.get("anchors"),
                    )
                tool_duration = time.time() - tool_start_time
                if self._should_emit_quiet_tool_messages():
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -1051,6 +1051,7 @@ AUTHOR_MAP = {
    "openclaw@agent.local": "29206394",  # PR #22194 salvage (sudo -S brute-force guard, #9590)
    "freedemon@gmail.com": "fr33d3m0n",  # PR #21128 salvage (sudo stdin/askpass DANGEROUS, #17873 cat 4)
    "zhaowh3613@outlook.com": "VinceZcrikl",  # PR #23647 salvage (npm UTF-8 decode on GBK Windows)
+    "abcdjmm970703@gmail.com": "JabberELF",  # PR #20238 salvage (session_search fast/summary dual-mode)
    "anton.kuenzi@gmail.com": "ZeterMordio",  # PR #11754 salvage (zsh completion compdef + _arguments syntax)
    "23yntong@stu.edu.cn": "iuyup",  # PR #6155 salvage (shell=True hardening)
    "86501179+1RB@users.noreply.github.com": "1RB",  # PR #25462 salvage (discord forwarded messages)
--- a/skills/memory/DESCRIPTION.md
+++ b/skills/memory/DESCRIPTION.md
@@ -0,0 +1,3 @@
+---
+description: Primitives for searching, recalling, and reasoning over Hermes' own session history and stored memory.
+---
--- a/skills/memory/session-recall/SKILL.md
+++ b/skills/memory/session-recall/SKILL.md
@@ -0,0 +1,112 @@
+---
+name: session-recall
+description: Use session_search effectively for finding and reading prior Hermes sessions.
+metadata:
+  hermes:
+    category: memory
+---
+
+# session-recall
+
+session_search is the tool. Three modes — fast, guided, summary — answer different question shapes. Picking the wrong mode costs latency, money, or correctness.
+
+## Pre-flight
+
+1. If the user asks about prior work ("find the session where X", "catch me up on Y", "we drafted Z"), your first move is session_search. Not filesystem search, not a different tool.
+2. If the user names an artefact, search the literal name first. No OR-expansion.
+3. Default to fast → guided. Reach for summary only when you need cross-session synthesis prose in one shot.
+
+## Mode picker
+
+| Question shape | Mode | Why |
+|---|---|---|
+| Catch me up / where did we get to / what did we decide | fast → guided | FTS5 finds sessions; guided reads the transcript. SQL-only. |
+| Find an artefact by name / which session mentions X | fast | Snippets only, no LLM. |
+| Read around a specific message in a known session | guided | Raw window around anchor. |
+| Cross-session prose synthesis in one shot | summary | LLM call per hit (aux model if configured, else main). Opt-in. |
+
+## Levers
+
+| Lever | Default | When to change |
+|---|---|---|
+| `limit` (fast) | 3 | 5–10 when topic spans sessions or user wants to pick from a list |
+| `sort` (fast) | unset (relevance) | `newest` for "where did we leave X"; `oldest` for "how did X start" |
+| `role_filter` (fast) | user,assistant | Add `tool` only when debugging tool output specifically |
+| `window` (guided) | 5 | Bump for long resolutions; shrink if response truncates |
+| anchor count (guided) | 1 | 2–3 anchors when topic spans recent sessions |
+| `limit` (summary) | 3 | Bump cautiously; cost scales directly |
+
+## Composition patterns
+
+1. **Discover → drill.** fast first, drill the top hit with guided. Widen `window` or re-anchor if the resolution isn't covered.
+2. **Multi-anchor for arcs.** When fast returns 2–3 relevant hits on the same topic, pass them all to guided in one call.
+3. **Bookend-first reading.** For "what was the conclusion" questions, read `bookend_end` before `messages`.
+4. **Delegate when transcripts are big.** If you're about to pull 30K+ chars of transcript into your context just to summarise it, hand the dumps to a subagent and ask for a digest.
+5. **Verify before quoting.** High-stakes recall does two passes: fast with the literal term (does the hit list contain the right session?) → guided (does the transcript confirm the outcome?).
+
+## Worked examples
+
+### A — find a named artefact
+
+User: "we drafted a deployment plan in a session yesterday, find it"
+
+Right: `session_search(query="deployment plan", limit=5)`. The user named it — search the name. Drill the top hit if you need details.
+
+Wrong: `session_search(query="deploy OR deployment OR rollout OR plan")`. OR-expansion drowns the hit in unrelated sessions.
+
+### B — catch up on a multi-session arc
+
+User: "where did we get to with the auth refactor?"
+
+Right: fast with `sort='newest'`, then multi-anchor guided across the top 2–3 hits:
+
+```
+session_search(query="auth refactor", limit=5, sort='newest')
+session_search(mode='guided', anchors=[
+  {'session_id': hit_1.session_id, 'around_message_id': hit_1.match_message_id},
+  {'session_id': hit_2.session_id, 'around_message_id': hit_2.match_message_id},
+  {'session_id': hit_3.session_id, 'around_message_id': hit_3.match_message_id},
+])
+```
+
+Read all three slices (bookend_start / messages / bookend_end) on each window and the arc reconstructs.
+
+Wrong: `session_search(query="auth refactor", mode='summary')`. Summary launders FTS5 hits through an LLM and can confabulate when the right session isn't in the hit list.
+
+### C — drill into a known session for a conclusion
+
+User: "in the session about the caching layer, what did we decide?"
+
+fast to locate, guided to drill, read `bookend_end` first:
+
+```
+session_search(query="caching layer", limit=3)
+session_search(mode='guided', anchors=[
+  {'session_id': <top>, 'around_message_id': <match_id>}
+])
+```
+
+Conclusions ("decided X", "shipped Y") usually live in `bookend_end`.
+
+## Reading guided responses
+
+Every guided window has three slices:
+
+- `bookend_start` — opening prose (kickoff, goal)
+- `messages` — the anchored window (FTS5 hit + neighbours)
+- `bookend_end` — closing prose (resolution, decisions, commits)
+
+Read all three. Bookends are prose that summarises; snippets and the middle window can be noisy when sessions are *about* the search term.
+
+## Pitfalls
+
+- **Manual-archaeology trap.** If fast snippets look noisy, drill the top hit with guided. Don't pivot to find / grep / raw SQL.
+- **Summary confabulation.** Summary will produce confident prose even when FTS5 missed the right session. Verify by re-querying in fast mode and checking the hit list.
+- **FTS5 is AND by default.** Multi-word queries require all terms; use OR or quoted phrases deliberately.
+- **Anchor mismatch.** `around_message_id` must exist in the named session. Re-anchor from a fresh fast result if guided rejects.
+- **Window truncation.** Re-call with a smaller window if a dump truncates.
+- **Compaction lineage.** A fast hit with `parent_session_id` set means the session was split by compaction; its `bookend_start` is a handoff summary, not the original opener.
+
+## Note on skill limits
+
+This skill teaches composition but cannot enforce it. If your default behaviour drifts — composing paraphrase queries instead of drilling, reaching for summary when fast → guided would do, pivoting to filesystem search when fast returned hits — the skill is being ignored, not failing. When in doubt: fast first, then drill.
--- a/tests/hermes_state/test_get_anchored_view.py
+++ b/tests/hermes_state/test_get_anchored_view.py
@@ -0,0 +1,189 @@
+"""Unit tests for SessionDB.get_anchored_view() — window + bookends + role filter.
+
+Used by ``session_search`` mode='guided'. Builds on ``get_messages_around``
+and adds:
+  - opinionated default role filter (drops tool messages from the window,
+    but never drops the anchor itself)
+  - session-head and session-tail bookends (default 3 messages each) so an
+    FTS5 hit anywhere in a long session still yields the goal + resolution
+  - bookends are skipped when the main window already overlaps the head or tail
+
+These properties are the reason guided is useful for state recall on long
+sessions, so the suite below pins them all down.
+"""
+import pytest
+
+from hermes_state import SessionDB
+
+
+@pytest.fixture
+def db(tmp_path):
+    return SessionDB(tmp_path / "state.db")
+
+
+def _seed(db: SessionDB, session_id: str, roles: list[str]) -> list[int]:
+    """Append messages with the given role sequence. Returns message ids."""
+    db.create_session(session_id, source="cli")
+    ids = []
+    for i, role in enumerate(roles):
+        ids.append(db.append_message(session_id, role=role, content=f"{role}-{i}"))
+    return ids
+
+
+def test_window_filters_tool_messages_but_keeps_anchor_when_tool(db):
+    """The anchor is preserved even when its role is tool. Other tool
+    messages in the window are dropped."""
+    ids = _seed(db, "s1", [
+        "user", "assistant", "tool",     # 0..2
+        "user", "tool",                  # 3..4  ← anchor on a tool (idx 4)
+        "tool", "assistant", "user",     # 5..7
+    ])
+    view = db.get_anchored_view("s1", ids[4], window=3, bookend=0)
+    roles = [m["role"] for m in view["window"]]
+    # Anchor (tool) preserved; surrounding tool messages dropped.
+    assert "tool" in roles
+    anchor = next(m for m in view["window"] if m["id"] == ids[4])
+    assert anchor["role"] == "tool"
+    # Only the anchor tool message remains — other tools filtered.
+    tool_rows = [m for m in view["window"] if m["role"] == "tool"]
+    assert len(tool_rows) == 1 and tool_rows[0]["id"] == ids[4]
+
+
+def test_window_keeps_user_and_assistant_by_default(db):
+    ids = _seed(db, "s1", ["user", "assistant"] * 6)
+    view = db.get_anchored_view("s1", ids[5], window=2, bookend=0)
+    # All user/assistant → all should survive the filter.
+    assert {m["role"] for m in view["window"]} == {"user", "assistant"}
+    assert len(view["window"]) == 5  # 2 before + anchor + 2 after
+
+
+def test_bookends_returned_when_window_in_middle(db):
+    ids = _seed(db, "s1", ["user", "assistant"] * 10)  # 20 messages
+    view = db.get_anchored_view("s1", ids[10], window=2, bookend=3)
+    assert len(view["bookend_start"]) == 3
+    assert len(view["bookend_end"]) == 3
+    # Bookends are the actual session head/tail.
+    assert [m["id"] for m in view["bookend_start"]] == ids[:3]
+    assert [m["id"] for m in view["bookend_end"]] == ids[-3:]
+
+
+def test_bookend_start_empty_when_window_covers_session_head(db):
+    ids = _seed(db, "s1", ["user", "assistant"] * 5)  # 10 messages
+    # Anchor on id ids[1]; window=3 → covers ids[0..4]. Head overlaps.
+    view = db.get_anchored_view("s1", ids[1], window=3, bookend=3)
+    assert view["bookend_start"] == []
+    # Tail still has space → returns bookend_end.
+    assert len(view["bookend_end"]) == 3
+
+
+def test_bookend_end_empty_when_window_covers_session_tail(db):
+    ids = _seed(db, "s1", ["user", "assistant"] * 5)  # 10 messages
+    view = db.get_anchored_view("s1", ids[-2], window=3, bookend=3)
+    assert view["bookend_end"] == []
+    assert len(view["bookend_start"]) == 3
+
+
+def test_bookends_skip_tool_messages(db):
+    ids = _seed(db, "s1", [
+        "tool", "tool", "user", "assistant",     # head: only 2 user/assistant
+        "user", "assistant", "user", "assistant",
+        "tool", "user", "assistant", "tool",     # tail: 2 user/assistant + tool
+    ])
+    # Anchor in the middle; bookends should pull only user/assistant.
+    view = db.get_anchored_view("s1", ids[5], window=1, bookend=3)
+    assert all(m["role"] in ("user", "assistant") for m in view["bookend_start"])
+    assert all(m["role"] in ("user", "assistant") for m in view["bookend_end"])
+
+
+def test_bookend_zero_returns_empty_bookends(db):
+    ids = _seed(db, "s1", ["user", "assistant"] * 10)
+    view = db.get_anchored_view("s1", ids[10], window=2, bookend=0)
+    assert view["bookend_start"] == []
+    assert view["bookend_end"] == []
+
+
+def test_anchor_not_in_session_returns_empty_view(db):
+    ids = _seed(db, "s1", ["user", "assistant"] * 5)
+    _seed(db, "s2", ["user", "assistant"] * 5)
+    view = db.get_anchored_view("s1", 999999, window=3, bookend=3)
+    assert view == {"window": [], "bookend_start": [], "bookend_end": []}
+
+
+def test_keep_roles_none_disables_filtering(db):
+    """Pass keep_roles=None to get raw window + raw bookends including tool."""
+    ids = _seed(db, "s1", ["user", "tool", "assistant", "tool", "user"] * 3)
+    view = db.get_anchored_view(
+        "s1", ids[7], window=2, bookend=3, keep_roles=None
+    )
+    # Tool messages in the window survive when filtering is disabled.
+    roles_in_window = [m["role"] for m in view["window"]]
+    assert "tool" in roles_in_window
+
+
+def test_keep_roles_can_include_tool_when_caller_wants_it(db):
+    ids = _seed(db, "s1", ["user", "tool", "assistant"] * 5)
+    view = db.get_anchored_view(
+        "s1", ids[7], window=2, bookend=3, keep_roles=("user", "assistant", "tool")
+    )
+    # All three roles allowed → tool messages should now appear in the window.
+    assert any(m["role"] == "tool" for m in view["window"])
+
+
+def test_negative_bookend_treated_as_zero(db):
+    ids = _seed(db, "s1", ["user", "assistant"] * 10)
+    view = db.get_anchored_view("s1", ids[10], window=2, bookend=-3)
+    assert view["bookend_start"] == []
+    assert view["bookend_end"] == []
+
+
+def test_bookends_do_not_leak_across_sessions(db):
+    """Bookends are session-scoped. A second session with adjacent ids must
+    never appear in the first session's bookends."""
+    s1_ids = _seed(db, "s1", ["user", "assistant"] * 4)
+    s2_ids = _seed(db, "s2", ["user", "assistant"] * 4)
+    view = db.get_anchored_view("s1", s1_ids[3], window=1, bookend=3)
+    bookend_ids = (
+        [m["id"] for m in view["bookend_start"]]
+        + [m["id"] for m in view["bookend_end"]]
+    )
+    assert set(bookend_ids).isdisjoint(set(s2_ids))
+
+
+def test_bookends_skip_empty_content_assistant_turns(db):
+    """Tool-call-only assistant turns (content='' with tool_calls populated)
+    must NOT eat bookend slots. Bookends exist to surface the session's
+    spoken opening + resolution; 'let me check...'-shaped no-content
+    assistants are signal-free here."""
+    db.create_session("s1", source="cli")
+    # Real opener
+    open_id = db.append_message("s1", role="user", content="kick off the work")
+    db.append_message("s1", role="assistant", content="on it")
+    # A burst of tool-call-only assistants (orchestration heartbeats)
+    for _ in range(5):
+        db.append_message("s1", role="assistant", content="")
+        db.append_message("s1", role="tool", content="some output")
+    # Middle prose
+    mid_id = db.append_message("s1", role="user", content="status?")
+    db.append_message("s1", role="assistant", content="midway")
+    # Tail: more empty assistants interleaved with prose closer
+    for _ in range(3):
+        db.append_message("s1", role="assistant", content="")
+        db.append_message("s1", role="tool", content="poll")
+    close_id = db.append_message(
+        "s1", role="assistant", content="Done. Final summary here."
+    )
+
+    view = db.get_anchored_view("s1", mid_id, window=1, bookend=3)
+
+    # bookend_start should contain prose user/assistant, never empty content
+    assert all(m["content"] for m in view["bookend_start"]), \
+        "bookend_start leaked an empty-content row"
+    # First message must be the actual opener
+    assert view["bookend_start"][0]["id"] == open_id
+
+    # bookend_end likewise — and the closer prose must appear
+    assert all(m["content"] for m in view["bookend_end"]), \
+        "bookend_end leaked an empty-content row"
+    assert any(m["id"] == close_id for m in view["bookend_end"]), \
+        "actual session closer must survive into bookend_end"
+
--- a/tests/hermes_state/test_get_messages_around.py
+++ b/tests/hermes_state/test_get_messages_around.py
@@ -0,0 +1,137 @@
+"""Unit tests for SessionDB.get_messages_around() — anchored message windows.
+
+The method is used by ``session_search`` mode='guided' for anchored drill-down.
+It must:
+  - Return an ordered window: up to ``window`` messages before the anchor,
+    the anchor itself, then up to ``window`` after, all id-ascending.
+  - Honour session boundaries (fewer messages returned at start / end).
+  - Honour session isolation (same id range, different session = nothing).
+  - Return an empty list when the anchor is not in the named session.
+"""
+import pytest
+
+from hermes_state import SessionDB
+
+
+@pytest.fixture
+def db(tmp_path):
+    return SessionDB(tmp_path / "state.db")
+
+
+def _seed_session(db: SessionDB, session_id: str, n_messages: int):
+    """Append n_messages alternating user/assistant messages to a session.
+
+    Returns the list of message ids created (in append order).
+    """
+    db.create_session(session_id, source="cli")
+    ids = []
+    for i in range(n_messages):
+        role = "user" if i % 2 == 0 else "assistant"
+        msg_id = db.append_message(session_id, role=role, content=f"msg {i}")
+        ids.append(msg_id)
+    return ids
+
+
+def test_returns_window_around_anchor_in_middle(db):
+    ids = _seed_session(db, "s1", 11)
+    anchor = ids[5]  # middle of 11
+
+    result = db.get_messages_around("s1", anchor, window=3)
+
+    # Expect 3 before + anchor + 3 after = 7 messages
+    assert len(result) == 7
+    # All from the right session
+    assert all(m["session_id"] == "s1" for m in result)
+    # Order is id ASC and contiguous
+    result_ids = [m["id"] for m in result]
+    assert result_ids == ids[2:9]
+
+
+def test_anchor_at_first_message_returns_only_after_slice(db):
+    ids = _seed_session(db, "s1", 8)
+    anchor = ids[0]  # first
+
+    result = db.get_messages_around("s1", anchor, window=3)
+
+    # Anchor + 3 after = 4 messages, no "before"
+    assert len(result) == 4
+    assert [m["id"] for m in result] == ids[0:4]
+
+
+def test_anchor_at_last_message_returns_only_before_slice(db):
+    ids = _seed_session(db, "s1", 8)
+    anchor = ids[-1]  # last
+
+    result = db.get_messages_around("s1", anchor, window=3)
+
+    # 3 before + anchor = 4 messages, no "after"
+    assert len(result) == 4
+    assert [m["id"] for m in result] == ids[-4:]
+
+
+def test_anchor_not_in_session_returns_empty_list(db):
+    ids = _seed_session(db, "s1", 5)
+    _seed_session(db, "s2", 5)
+
+    # Use s1 as session but pass an id that exists, just in s2
+    result = db.get_messages_around("s2", ids[2], window=3)
+
+    assert result == []
+
+
+def test_does_not_leak_across_sessions(db):
+    # Two sessions with adjacent message id ranges
+    s1_ids = _seed_session(db, "s1", 5)
+    s2_ids = _seed_session(db, "s2", 5)
+
+    # Anchor on s1's last message — even though s2 ids are "after", they must
+    # not appear in the window
+    result = db.get_messages_around("s1", s1_ids[-1], window=3)
+
+    assert all(m["session_id"] == "s1" for m in result)
+    # All result ids belong to s1, not s2
+    assert set(m["id"] for m in result).issubset(set(s1_ids))
+    assert set(m["id"] for m in result).isdisjoint(set(s2_ids))
+
+
+def test_window_larger_than_session_returns_full_session(db):
+    ids = _seed_session(db, "s1", 4)
+    anchor = ids[1]
+
+    result = db.get_messages_around("s1", anchor, window=100)
+
+    # Whole session returned, ordered ASC
+    assert [m["id"] for m in result] == ids
+
+
+def test_window_zero_returns_only_anchor(db):
+    ids = _seed_session(db, "s1", 5)
+    anchor = ids[2]
+
+    result = db.get_messages_around("s1", anchor, window=0)
+
+    assert len(result) == 1
+    assert result[0]["id"] == anchor
+
+
+def test_negative_window_treated_as_zero(db):
+    ids = _seed_session(db, "s1", 5)
+    anchor = ids[2]
+
+    result = db.get_messages_around("s1", anchor, window=-3)
+
+    assert len(result) == 1
+    assert result[0]["id"] == anchor
+
+
+def test_decodes_content_like_get_messages(db):
+    """Content roundtrip should match get_messages's behaviour (no surprises
+    for callers who switch between the two methods)."""
+    ids = _seed_session(db, "s1", 3)
+    anchor = ids[1]
+
+    around = db.get_messages_around("s1", anchor, window=1)
+    full = db.get_messages("s1")
+
+    # Same rows, same content shape
+    assert [m["content"] for m in around] == [m["content"] for m in full]
--- a/tests/test_hermes_state.py
+++ b/tests/test_hermes_state.py
@@ -2494,6 +2494,103 @@ class TestExcludeSources:
        sources = [r["source"] for r in results]
        assert sources == ["cli"]

+    def test_search_messages_sort_newest_orders_by_timestamp_desc(self, db):
+        """``sort='newest'`` makes timestamp the primary sort key (DESC) with
+        FTS5 rank as the tiebreaker. With three matching messages at distinct
+        timestamps, results come out newest-first regardless of BM25 score."""
+        db.create_session("old_sid", "cli")
+        db.create_session("mid_sid", "cli")
+        db.create_session("new_sid", "cli")
+        # Same content → identical BM25 score; only timestamps differ.
+        mid_old = db.append_message("old_sid", "user", "matchword discussion")
+        mid_mid = db.append_message("mid_sid", "user", "matchword discussion")
+        mid_new = db.append_message("new_sid", "user", "matchword discussion")
+        # Stamp explicit, well-separated timestamps after the fact.
+        with db._lock:
+            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (mid_old,))
+            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (mid_mid,))
+            db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (mid_new,))
+            db._conn.commit()
+
+        results = db.search_messages("matchword", sort="newest")
+        session_order = [r["session_id"] for r in results]
+        assert session_order == ["new_sid", "mid_sid", "old_sid"], (
+            f"sort=newest must return newest first; got {session_order}"
+        )
+
+    def test_search_messages_sort_oldest_orders_by_timestamp_asc(self, db):
+        """``sort='oldest'`` is symmetric — earliest matches first. Critical
+        for 'how did X start' questions where rank-only ordering would hide
+        the origin under more recent revisitations."""
+        db.create_session("a", "cli")
+        db.create_session("b", "cli")
+        db.create_session("c", "cli")
+        m_a = db.append_message("a", "user", "matchword")
+        m_b = db.append_message("b", "user", "matchword")
+        m_c = db.append_message("c", "user", "matchword")
+        with db._lock:
+            db._conn.execute("UPDATE messages SET timestamp=3000 WHERE id=?", (m_a,))
+            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_b,))
+            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_c,))
+            db._conn.commit()
+
+        results = db.search_messages("matchword", sort="oldest")
+        session_order = [r["session_id"] for r in results]
+        assert session_order == ["b", "c", "a"], (
+            f"sort=oldest must return earliest first; got {session_order}"
+        )
+
+    def test_search_messages_sort_unset_preserves_rank_ordering(self, db):
+        """No sort param → ``ORDER BY rank`` (FTS5 BM25). With identical
+        single-keyword matches on different-length messages, BM25 prefers
+        the shorter / denser ones — that's the existing default and it must
+        not regress when the new param is omitted."""
+        db.create_session("short_sid", "cli")
+        db.create_session("long_sid", "cli")
+        # Single keyword in a short message scores higher than the same
+        # keyword buried in a much longer one (BM25 length normalisation).
+        m_short = db.append_message("short_sid", "user", "matchword.")
+        m_long = db.append_message(
+            "long_sid", "user", "matchword " + ("padding " * 200)
+        )
+        # Older = short_sid so we can confirm rank wins, not recency.
+        with db._lock:
+            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
+            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
+            db._conn.commit()
+
+        results = db.search_messages("matchword")  # sort omitted
+        assert len(results) == 2
+        # BM25 should rank the short message first despite being older.
+        assert results[0]["session_id"] == "short_sid", (
+            "Default (no sort) must use FTS5 rank — short_sid should outrank "
+            f"the longer message. Got order: {[r['session_id'] for r in results]}"
+        )
+
+    def test_search_messages_sort_invalid_value_falls_back_to_rank(self, db):
+        """Passing a value outside the allowed set (e.g. 'sideways') silently
+        falls back to FTS5 rank-only ordering rather than raising. Same
+        forgiveness as the tool-layer normalisation, in case callers reach
+        SessionDB directly."""
+        db.create_session("short_sid", "cli")
+        db.create_session("long_sid", "cli")
+        m_short = db.append_message("short_sid", "user", "matchword.")
+        m_long = db.append_message(
+            "long_sid", "user", "matchword " + ("padding " * 200)
+        )
+        with db._lock:
+            db._conn.execute("UPDATE messages SET timestamp=1000 WHERE id=?", (m_short,))
+            db._conn.execute("UPDATE messages SET timestamp=2000 WHERE id=?", (m_long,))
+            db._conn.commit()
+
+        # Garbage sort should behave the same as no sort.
+        results_default = db.search_messages("matchword")
+        results_garbage = db.search_messages("matchword", sort="sideways")
+        assert (
+            [r["session_id"] for r in results_default]
+            == [r["session_id"] for r in results_garbage]
+        )
+

 class TestResolveSessionByNameOrId:
    """Tests for the main.py helper that resolves names or IDs."""
--- a/tests/tools/test_session_search.py
+++ b/tests/tools/test_session_search.py
--- a/tools/session_search_tool.py
+++ b/tools/session_search_tool.py
@@ -2,19 +2,16 @@
 """
 Session Search Tool - Long-Term Conversation Recall

-Searches past session transcripts in SQLite via FTS5, then summarizes the top
-matching sessions using the configured auxiliary session_search model (same
-pattern as web_extract). By default, auxiliary "auto" routing uses the main
-chat provider/model unless the user overrides auxiliary.session_search.
-Returns focused summaries of past conversations rather than raw transcripts,
-keeping the main model's context window clean.
+Searches past session transcripts in SQLite via FTS5. Keyword search defaults
+to fast snippet/context hits without any LLM call; callers can opt into focused
+LLM summaries with mode="summary" when deeper recall is worth the latency.

 Flow:
  1. FTS5 search finds matching messages ranked by relevance
  2. Groups by session, takes the top N unique sessions (default 3)
-  3. Loads each session's conversation, truncates to ~100k chars centered on matches
-  4. Sends to the configured auxiliary model with a focused summarization prompt
-  5. Returns per-session summaries with metadata
+  3. Fast mode returns snippets and nearby context immediately
+  4. Summary mode loads each session, truncates around matches, and calls an LLM
+  5. Returns per-session hits/summaries with metadata
 """

 import asyncio
@@ -26,6 +23,62 @@ from typing import Dict, Any, List, Optional, Union

 from agent.auxiliary_client import async_call_llm, extract_content_or_reasoning
 MAX_SESSION_CHARS = 100_000
+
+
+# Default mode is fast unless the user sets ``auxiliary.session_search.default_mode``
+# in ~/.hermes/config.yaml. Only ``fast`` and ``summary`` are valid — guided
+# requires anchors. Resolver is lru_cache-wrapped so the YAML read happens at
+# most once per process; restart to pick up config changes.
+_VALID_DEFAULT_MODES = ("fast", "summary")
+_FALLBACK_DEFAULT_MODE = "fast"
+
+
+def _resolve_user_default_mode() -> str:
+    """Look up ``auxiliary.session_search.default_mode`` from ~/.hermes/config.yaml.
+
+    Returns ``_FALLBACK_DEFAULT_MODE`` (``"fast"``) if unset, invalid, or the
+    config loader is unavailable (e.g. tests, tools loaded outside the CLI).
+    Logs a one-time warning on invalid values so users get feedback when they
+    typo their config.
+    """
+    try:
+        from hermes_cli.config import load_config
+        config = load_config() or {}
+    except ImportError:
+        logging.debug("hermes_cli.config not available; default_mode falls back to %r", _FALLBACK_DEFAULT_MODE)
+        return _FALLBACK_DEFAULT_MODE
+    except Exception as e:
+        logging.debug("Failed to load config for session_search default_mode: %s", e, exc_info=True)
+        return _FALLBACK_DEFAULT_MODE
+
+    raw = (
+        config.get("auxiliary", {})
+        .get("session_search", {})
+        .get("default_mode")
+    )
+    if raw is None:
+        return _FALLBACK_DEFAULT_MODE
+    if not isinstance(raw, str):
+        logging.warning(
+            "auxiliary.session_search.default_mode in config.yaml must be a string, got %r — falling back to %r",
+            raw, _FALLBACK_DEFAULT_MODE,
+        )
+        return _FALLBACK_DEFAULT_MODE
+    normalised = raw.strip().lower()
+    if normalised not in _VALID_DEFAULT_MODES:
+        logging.warning(
+            "auxiliary.session_search.default_mode=%r is not one of %s — falling back to %r. "
+            "(guided requires anchors and cannot be a default.)",
+            raw, _VALID_DEFAULT_MODES, _FALLBACK_DEFAULT_MODE,
+        )
+        return _FALLBACK_DEFAULT_MODE
+    return normalised
+
+
+# Process-level cache so repeated session_search calls don't re-read YAML.
+# Cleared by tests via _resolve_user_default_mode.cache_clear() when needed.
+import functools  # noqa: E402  — local to the cache wrap
+_resolve_user_default_mode = functools.lru_cache(maxsize=1)(_resolve_user_default_mode)
 MAX_SUMMARY_TOKENS = 10000


@@ -197,8 +250,16 @@ def _truncate_around_matches(

 async def _summarize_session(
    conversation_text: str, query: str, session_meta: Dict[str, Any]
-) -> Optional[str]:
-    """Summarize a single session conversation focused on the search query."""
+) -> tuple[Optional[str], Optional[Dict[str, Any]]]:
+    """Summarize a single session conversation focused on the search query.
+
+    Returns ``(content, usage)`` where ``usage`` is a dict with
+    ``{model, input_tokens, output_tokens, cache_read_tokens, cache_creation_tokens}``
+    parsed from the aux LLM response, or ``None`` when the model didn't surface
+    usage data. The usage dict lets callers attribute the cost of summary-mode
+    aux calls back to the parent session — without this, summary-mode spend is
+    invisible to per-session accounting.
+    """
    system_prompt = (
        "You are reviewing a past conversation transcript to help recall what happened. "
        "Summarize the conversation with a focus on the search topic. Include:\n"
@@ -235,17 +296,18 @@ async def _summarize_session(
                max_tokens=MAX_SUMMARY_TOKENS,
            )
            content = extract_content_or_reasoning(response)
+            usage = _extract_aux_usage(response)
            if content:
-                return content
+                return content, usage
            # Reasoning-only / empty — let the retry loop handle it
            logging.warning("Session search LLM returned empty content (attempt %d/%d)", attempt + 1, max_retries)
            if attempt < max_retries - 1:
                await asyncio.sleep(1 * (attempt + 1))
                continue
-            return content
+            return content, usage
        except RuntimeError:
            logging.warning("No auxiliary model available for session summarization")
-            return None
+            return None, None
        except Exception as e:
            if attempt < max_retries - 1:
                await asyncio.sleep(1 * (attempt + 1))
@@ -256,7 +318,48 @@ async def _summarize_session(
                    e,
                    exc_info=True,
                )
-                return None
+                return None, None
+
+
+def _extract_aux_usage(response: Any) -> Optional[Dict[str, Any]]:
+    """Pull usage data off an aux LLM response, normalising provider variants.
+
+    Returns ``None`` when the response carries no usage info (test mocks,
+    providers that don't surface it). Returns a dict with the fields we care
+    about for cost attribution otherwise. Reads both OpenAI-style
+    (``prompt_tokens``/``completion_tokens``) and Anthropic-style
+    (``input_tokens``/``output_tokens``) usage shapes.
+    """
+    usage = getattr(response, "usage", None)
+    if not usage:
+        return None
+    # Provider variants — read whichever is populated.
+    input_tokens = (
+        getattr(usage, "input_tokens", None)
+        or getattr(usage, "prompt_tokens", None)
+        or 0
+    )
+    output_tokens = (
+        getattr(usage, "output_tokens", None)
+        or getattr(usage, "completion_tokens", None)
+        or 0
+    )
+    # Anthropic prompt-caching fields.
+    cache_read = getattr(usage, "cache_read_input_tokens", None) or 0
+    cache_create = getattr(usage, "cache_creation_input_tokens", None) or 0
+    # OpenAI-style cached tokens may live under prompt_tokens_details.
+    if not cache_read:
+        details = getattr(usage, "prompt_tokens_details", None)
+        if details:
+            cache_read = getattr(details, "cached_tokens", 0) or 0
+    model = getattr(response, "model", None)
+    return {
+        "model": model,
+        "input_tokens": int(input_tokens or 0),
+        "output_tokens": int(output_tokens or 0),
+        "cache_read_tokens": int(cache_read or 0),
+        "cache_creation_tokens": int(cache_create or 0),
+    }


 # Sources that are excluded from session browsing/searching by default.
@@ -322,19 +425,380 @@ def _list_recent_sessions(db, limit: int, current_session_id: str = None) -> str
        return tool_error(f"Failed to list recent sessions: {e}", success=False)


+def _guided_drill_down(
+    db,
+    session_id: str,
+    around_message_id,
+    window: int,
+    current_session_id: str = None,
+    anchors: Optional[List[Dict[str, Any]]] = None,
+) -> str:
+    """Anchored drill-down for ``mode='guided'`` of ``session_search``.
+
+    Returns a JSON string carrying one or more windows of messages — each
+    centred on a specific message id in a specific session. No FTS5, no
+    auxiliary LLM, no 100k-char truncation — N indexed DB lookups (where
+    N = number of anchors).
+
+    Two input shapes (use one):
+
+      * **Single anchor** (back-compat): pass ``session_id`` and
+        ``around_message_id`` directly. Internally normalised to a single-
+        element ``anchors`` list. Response always carries ``windows``
+        as a list, plus the legacy single-anchor fields at the top level
+        when there's exactly one anchor.
+
+      * **Multi-anchor**: pass ``anchors=[{"session_id":..., "around_message_id":...}, ...]``.
+        The agent picks the most promising K hits from a wider fast call
+        and drills into all of them at once — same conversation in the
+        steering loop, more context per turn.
+
+    Each anchor is validated independently. Per-anchor failures (missing
+    session, anchor not in session, current-lineage rejection) become
+    error entries inside the response's ``windows`` list rather than
+    aborting the whole call. ``window`` is shared across all anchors
+    and clamped to ``[1, 20]`` (silent, matches the existing limit-clamp
+    pattern).
+    """
+    # 1. Normalise inputs into a single ``anchors`` list. Three shapes:
+    #    (a) anchors= parameter is set (preferred for multi-anchor)
+    #    (b) session_id + around_message_id (single-anchor back-compat)
+    #    (c) neither set → user-facing error
+    if anchors:
+        if not isinstance(anchors, list):
+            return tool_error(
+                "guided mode: 'anchors' must be a list of {session_id, around_message_id} dicts",
+                success=False,
+            )
+        normalised_anchors = anchors
+    elif session_id or around_message_id is not None:
+        normalised_anchors = [{
+            "session_id": session_id,
+            "around_message_id": around_message_id,
+        }]
+    else:
+        return tool_error(
+            "guided mode requires either anchors=[...] or session_id+around_message_id "
+            "(use match_message_id+session_id from a prior fast-mode hit)",
+            success=False,
+        )
+
+    if len(normalised_anchors) == 0:
+        return tool_error(
+            "guided mode: anchors list is empty (pass at least one {session_id, around_message_id})",
+            success=False,
+        )
+
+    # 2. Window clamp (shared across all anchors). Matches the existing
+    #    limit-clamp pattern (silent).
+    if not isinstance(window, int):
+        try:
+            window = int(window)
+        except (TypeError, ValueError):
+            window = 5
+    window = max(1, min(window, 20))
+
+    # 3. Helper: resolve to lineage root (used by the current-lineage
+    #    rejection check below).
+    def _resolve_to_parent(sid: str) -> str:
+        visited = set()
+        cur = sid
+        while cur and cur not in visited:
+            visited.add(cur)
+            try:
+                meta = db.get_session(cur)
+                if not meta:
+                    break
+                parent = meta.get("parent_session_id")
+                if parent:
+                    cur = parent
+                else:
+                    break
+            except Exception as e:
+                logging.debug("Error resolving parent for %s: %s", cur, e, exc_info=True)
+                break
+        return cur
+
+    current_root = _resolve_to_parent(current_session_id) if current_session_id else None
+
+    # 4. Drill into each anchor. Per-anchor errors are recorded inline
+    #    rather than aborting the whole call — the agent can still use
+    #    successful drills even if one anchor was malformed.
+    windows_out: List[Dict[str, Any]] = []
+    for raw_anchor in normalised_anchors:
+        if not isinstance(raw_anchor, dict):
+            windows_out.append({
+                "success": False,
+                "error": "anchor must be a dict with session_id + around_message_id",
+            })
+            continue
+
+        a_sid = raw_anchor.get("session_id")
+        a_msg = raw_anchor.get("around_message_id")
+
+        if not a_sid or not isinstance(a_sid, str) or not a_sid.strip():
+            windows_out.append({
+                "success": False,
+                "error": "anchor missing session_id",
+                "anchor": raw_anchor,
+            })
+            continue
+        a_sid = a_sid.strip()
+
+        try:
+            a_msg_id = int(a_msg)
+        except (TypeError, ValueError):
+            windows_out.append({
+                "success": False,
+                "error": "anchor missing or non-integer around_message_id",
+                "anchor": raw_anchor,
+            })
+            continue
+
+        # Current-lineage rejection: per-anchor, so other valid anchors
+        # in a multi-anchor call still drill.
+        if current_root:
+            target_root = _resolve_to_parent(a_sid)
+            if target_root and target_root == current_root:
+                windows_out.append({
+                    "success": False,
+                    "error": "anchor rejects drill-down into the current session lineage — those messages are already in your active context",
+                    "session_id": a_sid,
+                    "around_message_id": a_msg_id,
+                })
+                continue
+
+        # Session existence check.
+        try:
+            session_meta = db.get_session(a_sid) or {}
+        except Exception as e:
+            logging.debug("get_session failed for %s: %s", a_sid, e, exc_info=True)
+            session_meta = {}
+        if not session_meta:
+            windows_out.append({
+                "success": False,
+                "error": f"session_id not found: {a_sid}",
+                "session_id": a_sid,
+                "around_message_id": a_msg_id,
+            })
+            continue
+
+        # Fetch the window + bookends. ``get_anchored_view`` filters tool-response
+        # noise from the window (anchor itself is preserved regardless of role)
+        # and returns up to ``bookend`` user/assistant messages from the session
+        # head and tail — but only when those slices don't overlap the window.
+        # See SessionDB.get_anchored_view for the contract.
+        try:
+            view = db.get_anchored_view(a_sid, a_msg_id, window=window, bookend=3)
+            messages = view.get("window") or []
+            bookend_start = view.get("bookend_start") or []
+            bookend_end = view.get("bookend_end") or []
+        except Exception as e:
+            logging.debug("get_anchored_view failed: %s", e, exc_info=True)
+            windows_out.append({
+                "success": False,
+                "error": f"failed to load messages around {a_msg_id} in {a_sid}: {e}",
+                "session_id": a_sid,
+                "around_message_id": a_msg_id,
+            })
+            continue
+
+        # Safety net: the agent (or memory, or a legacy caller) may pair a
+        # parent/lineage-root session_id with a message_id that actually
+        # lives in a descendant (child) session. Before this commit, fast
+        # mode returned exactly that broken pair. We now emit the matching
+        # raw sid in fast mode, but guided should remain forgiving for
+        # callers that haven't updated yet.
+        #
+        # Recovery rule: locate the real owning session by message id; if
+        # that session is in the same lineage as ``a_sid``, transparently
+        # rebind and refetch. Record a warning so the rebind is visible.
+        rebind_warning = None
+        if not messages:
+            owning = None
+            # Prefer a helper if SessionDB exposes one (forward-compat).
+            try:
+                if hasattr(db, "get_session_id_for_message"):
+                    owning = db.get_session_id_for_message(a_msg_id)
+            except Exception as e:
+                logging.debug("get_session_id_for_message failed: %s", e, exc_info=True)
+                owning = None
+            # Fallback: query through SessionDB._conn (the canonical connection).
+            if not owning:
+                try:
+                    conn = getattr(db, "_conn", None)
+                    if conn is not None:
+                        row = conn.execute(
+                            "SELECT session_id FROM messages WHERE id = ?",
+                            (a_msg_id,),
+                        ).fetchone()
+                        # sqlite3.Row supports indexing; tuple fallback works too.
+                        owning = row[0] if row else None
+                except Exception as e:
+                    logging.debug("owning-session lookup failed: %s", e, exc_info=True)
+                    owning = None
+
+            if owning and owning != a_sid:
+                # Check same lineage (walk both up to roots).
+                a_root = _resolve_to_parent(a_sid)
+                o_root = _resolve_to_parent(owning)
+                if a_root and o_root and a_root == o_root:
+                    try:
+                        rebind_view = db.get_anchored_view(
+                            owning, a_msg_id, window=window, bookend=3
+                        )
+                        messages = rebind_view.get("window") or []
+                        bookend_start = rebind_view.get("bookend_start") or []
+                        bookend_end = rebind_view.get("bookend_end") or []
+                    except Exception as e:
+                        logging.debug("rebind get_anchored_view failed: %s", e, exc_info=True)
+                        messages = []
+                    if messages:
+                        rebind_warning = (
+                            f"around_message_id {a_msg_id} lives in {owning} "
+                            f"(child of {a_sid}); rebound transparently"
+                        )
+                        # Re-fetch session_meta for the actual owning session.
+                        try:
+                            session_meta = db.get_session(owning) or session_meta
+                        except Exception:
+                            pass
+                        a_sid = owning
+
+        if not messages:
+            windows_out.append({
+                "success": False,
+                "error": f"around_message_id {a_msg_id} not in session_id {a_sid}",
+                "session_id": a_sid,
+                "around_message_id": a_msg_id,
+            })
+            continue
+
+        # Wrap with anchor flag + boundary counts.
+        out_messages = []
+        messages_before = 0
+        messages_after = 0
+        for m in messages:
+            is_anchor = m.get("id") == a_msg_id
+            if not is_anchor and m.get("id", 0) < a_msg_id:
+                messages_before += 1
+            elif not is_anchor:
+                messages_after += 1
+            entry = {
+                "id": m.get("id"),
+                "role": m.get("role"),
+                "content": m.get("content"),
+                "tool_name": m.get("tool_name"),
+                "tool_calls": m.get("tool_calls") or None,
+                "tool_call_id": m.get("tool_call_id"),
+                "timestamp": m.get("timestamp"),
+            }
+            if is_anchor:
+                entry["anchor"] = True
+            # Strip None-valued optional fields to keep payload tight (keep
+            # 'content' even if None, since absent-content is meaningful).
+            entry = {k: v for k, v in entry.items() if v is not None or k in ("content",)}
+            out_messages.append(entry)
+
+        def _shape_bookend(m: Dict[str, Any]) -> Dict[str, Any]:
+            entry = {
+                "id": m.get("id"),
+                "role": m.get("role"),
+                "content": m.get("content"),
+                "timestamp": m.get("timestamp"),
+            }
+            return {k: v for k, v in entry.items() if v is not None or k in ("content",)}
+
+        out_bookend_start = [_shape_bookend(m) for m in bookend_start]
+        out_bookend_end = [_shape_bookend(m) for m in bookend_end]
+
+        success_entry = {
+            "success": True,
+            "session_id": a_sid,
+            "around_message_id": a_msg_id,
+            "session_meta": {
+                "when": _format_timestamp(session_meta.get("started_at")),
+                "source": session_meta.get("source"),
+                "model": session_meta.get("model"),
+                "title": session_meta.get("title"),
+            },
+            "messages": out_messages,
+            "messages_before": messages_before,
+            "messages_after": messages_after,
+            "bookend_start": out_bookend_start,
+            "bookend_end": out_bookend_end,
+        }
+        if rebind_warning:
+            success_entry["warning"] = rebind_warning
+        windows_out.append(success_entry)
+
+    # 5. Top-level response shape. ``windows`` is always a list. For
+    #    single-anchor calls (the common case), we mirror the legacy fields
+    #    at the top level so existing callers / tests continue to work
+    #    without branching on len(windows).
+    response: Dict[str, Any] = {
+        "success": True,
+        "mode": "guided",
+        "window": window,
+        "windows": windows_out,
+        "anchor_count": len(windows_out),
+    }
+    if len(windows_out) == 1:
+        only = windows_out[0]
+        if only.get("success"):
+            response.update({
+                "session_id": only["session_id"],
+                "around_message_id": only["around_message_id"],
+                "session_meta": only["session_meta"],
+                "messages": only["messages"],
+                "messages_before": only["messages_before"],
+                "messages_after": only["messages_after"],
+                "bookend_start": only.get("bookend_start", []),
+                "bookend_end": only.get("bookend_end", []),
+            })
+            if only.get("warning"):
+                response["warning"] = only["warning"]
+        else:
+            # Single-anchor failure: surface as a top-level tool_error so
+            # callers don't have to dig into the windows array for the
+            # error string. Keeps the legacy single-anchor failure shape.
+            return tool_error(only.get("error", "guided drill-down failed"), success=False)
+
+    return json.dumps(response, ensure_ascii=False)
+
+
 def session_search(
-    query: str,
+    query: str = "",
    role_filter: str = None,
    limit: int = 3,
    db=None,
    current_session_id: str = None,
+    mode: str = None,
+    # Guided-mode-only parameters: anchored drill-down into one or more
+    # session+message pairs. Required when mode='guided', ignored otherwise.
+    # Use either the single-anchor pair (session_id + around_message_id) or
+    # the multi-anchor list (anchors=[{session_id, around_message_id}, ...]).
+    session_id: str = None,
+    around_message_id: int = None,
+    window: int = 5,
+    anchors: list = None,
+    # Fast-mode-only temporal bias for ranking. ``None`` keeps FTS5's BM25
+    # ordering (time-neutral); ``"newest"`` / ``"oldest"`` make timestamp
+    # the primary key with rank as the tiebreaker. Silently ignored in
+    # other modes — see schema description.
+    sort: str = None,
 ) -> str:
    """
-    Search past sessions and return focused summaries of matching conversations.
+    Search past sessions, or drill into a specific one.

-    Uses FTS5 to find matches, then summarizes the top sessions with the
-    configured auxiliary session_search model.
-    The current session is excluded from results since the agent already has that context.
+    Modes:
+      * fast    — FTS5 snippets + ±1 message context. Cheap discovery.
+      * summary — fetch full session(s), truncate to 100k chars, run aux LLM
+                  recap. Cross-session synthesis at ~30s tool-side cost.
+      * guided  — anchored drill-down. Caller supplies session_id +
+                  around_message_id (typically from a prior fast hit's
+                  match_message_id field) and gets a window of messages
+                  around the anchor with no LLM call and no truncation.
    """
    if db is None:
        try:
@@ -346,6 +810,52 @@ def session_search(
            from hermes_state import format_session_db_unavailable
            return tool_error(format_session_db_unavailable(), success=False)

+    # Mode normalisation. ``None`` / empty string / non-string → fall back to
+    # the user's configured default (via ~/.hermes/config.yaml, see
+    # ``_resolve_user_default_mode``). Defaults to "fast" if unset. An explicit
+    # "fast" / "summary" / "guided" wins regardless of config. An unknown
+    # string also falls back to the resolved user default rather than silently
+    # coercing to a hard-coded mode — silent coercion of typos would otherwise
+    # mask user errors.
+    if not isinstance(mode, str) or not mode.strip():
+        mode = _resolve_user_default_mode()
+    else:
+        mode = mode.strip().lower()
+    if mode in ("summarized", "summarise", "summarize", "deep"):
+        mode = "summary"
+    if mode in ("drill", "drilldown", "drill-down", "anchor", "around"):
+        mode = "guided"
+    if mode not in ("fast", "summary", "guided"):
+        mode = _resolve_user_default_mode()
+
+    # Normalise sort — only "newest"/"oldest" are accepted; anything else
+    # collapses to None (FTS5 rank-only). Sort affects fast mode only; logged
+    # and ignored elsewhere so misuse is visible but non-fatal.
+    sort_norm: Optional[str] = None
+    if isinstance(sort, str):
+        candidate = sort.strip().lower()
+        if candidate in ("newest", "oldest"):
+            sort_norm = candidate
+    if sort_norm and mode != "fast":
+        logging.debug(
+            "session_search: sort=%r is fast-mode only; ignored for mode=%s",
+            sort_norm, mode,
+        )
+        sort_norm = None
+
+    # Guided mode is a different shape: it doesn't search, it drills. Branch
+    # before FTS5 so we don't pay for anything we don't use, and so missing-arg
+    # validation happens up front.
+    if mode == "guided":
+        return _guided_drill_down(
+            db=db,
+            session_id=session_id,
+            around_message_id=around_message_id,
+            window=window,
+            current_session_id=current_session_id,
+            anchors=anchors,
+        )
+
    # Defensive: models (especially open-source) may send non-int limit values
    # (None when JSON null, string "int", or even a type object).  Coerce to a
    # safe integer before any arithmetic/comparison to prevent TypeError.
@@ -354,7 +864,7 @@ def session_search(
            limit = int(limit)
        except (TypeError, ValueError):
            limit = 3
-    limit = max(1, min(limit, 5))  # Clamp to [1, 5]
+    limit = max(1, min(limit, 10))  # Clamp to [1, 10]

    # Recent sessions mode: when query is empty, return metadata for recent sessions.
    # No LLM calls — just DB queries for titles, previews, timestamps.
@@ -364,23 +874,30 @@ def session_search(
    query = query.strip()

    try:
-        # Parse role filter
+        # Parse role filter. Defaults to user+assistant; tool messages are
+        # usually noisy and rarely the signal. Caller opts back in via
+        # role_filter='user,assistant,tool' or 'tool'.
        role_list = None
        if role_filter and role_filter.strip():
            role_list = [r.strip() for r in role_filter.split(",") if r.strip()]
+        else:
+            role_list = ["user", "assistant"]

-        # FTS5 search -- get matches ranked by relevance
+        # FTS5 search -- get matches ranked by relevance (with optional
+        # temporal bias when sort is set; see param docs).
        raw_results = db.search_messages(
            query=query,
            role_filter=role_list,
            exclude_sources=list(_HIDDEN_SESSION_SOURCES),
            limit=50,  # Get more matches to find unique sessions
            offset=0,
+            sort=sort_norm,
        )

        if not raw_results:
            return json.dumps({
                "success": True,
+                "mode": mode,
                "query": query,
                "results": [],
                "count": 0,
@@ -421,6 +938,13 @@ def session_search(
        # Group by resolved (parent) session_id, dedup, skip the current
        # session lineage. Compression and delegation create child sessions
        # that still belong to the same active conversation.
+        #
+        # IMPORTANT: group BY parent (one entry per conversation lineage), but
+        # preserve the raw FTS5 session_id on the surviving result. Only the
+        # raw sid pairs validly with ``match_message_id``; rewriting it to the
+        # parent produces a {parent_sid, child_message_id} handle that guided
+        # mode cannot resolve. ``parent_session_id`` is exposed separately for
+        # the lineage-root link the user expects to see.
        seen_sessions = {}
        for result in raw_results:
            raw_sid = result["session_id"]
@@ -433,11 +957,61 @@ def session_search(
                continue
            if resolved_sid not in seen_sessions:
                result = dict(result)
-                result["session_id"] = resolved_sid
+                # Keep raw_sid as session_id; expose lineage root separately.
+                result["session_id"] = raw_sid
+                if resolved_sid and resolved_sid != raw_sid:
+                    result["parent_session_id"] = resolved_sid
                seen_sessions[resolved_sid] = result
            if len(seen_sessions) >= limit:
                break

+        if mode == "fast":
+            results = []
+            for lineage_root, match_info in seen_sessions.items():
+                # Emit (raw_sid + match_message_id) so the agent's follow-up
+                # guided call has a valid {session_id, around_message_id}.
+                # ``parent_session_id`` (if different) carries the lineage root.
+                hit_sid = match_info.get("session_id") or lineage_root
+                try:
+                    session_meta = db.get_session(lineage_root) or {}
+                except Exception:
+                    session_meta = {}
+                snippet = match_info.get("snippet") or ""
+                context = match_info.get("context") or []
+                if not isinstance(context, list):
+                    context = []
+                entry = {
+                    "session_id": hit_sid,
+                    "when": _format_timestamp(
+                        session_meta.get("started_at") or match_info.get("session_started")
+                    ),
+                    "source": session_meta.get("source") or match_info.get("source", "unknown"),
+                    "model": session_meta.get("model") or match_info.get("model") or "unknown",
+                    "matched_role": match_info.get("role"),
+                    "match_message_id": match_info.get("id"),
+                    "title": session_meta.get("title") or None,
+                    "snippet": snippet,
+                    "context": context,
+                    "summary": "[Search hit — summary not generated in fast mode] Use snippet/context fields, or set mode='summary' for LLM-generated recall.",
+                }
+                # Only emit parent_session_id when the FTS5 row lives in a
+                # child of the displayed lineage — keeps the common case
+                # (no delegation/compression) tidy.
+                parent_sid = match_info.get("parent_session_id")
+                if parent_sid and parent_sid != hit_sid:
+                    entry["parent_session_id"] = parent_sid
+                results.append(entry)
+
+            return json.dumps({
+                "success": True,
+                "mode": "fast",
+                "query": query,
+                "results": results,
+                "count": len(results),
+                "sessions_searched": len(seen_sessions),
+                "message": "Fast search returned FTS snippets without LLM summarization. Use mode='summary' for focused summaries when needed.",
+            }, ensure_ascii=False)
+
        # Prepare all sessions for parallel summarization
        tasks = []
        for session_id, match_info in seen_sessions.items():
@@ -458,12 +1032,12 @@ def session_search(
                )

        # Summarize all sessions in parallel
-        async def _summarize_all() -> List[Union[str, Exception]]:
+        async def _summarize_all() -> List[Union[tuple, Exception]]:
            """Summarize all sessions with bounded concurrency."""
            max_concurrency = min(_get_session_search_max_concurrency(), max(1, len(tasks)))
            semaphore = asyncio.Semaphore(max_concurrency)

-            async def _bounded_summary(text: str, meta: Dict[str, Any]) -> Optional[str]:
+            async def _bounded_summary(text: str, meta: Dict[str, Any]):
                async with semaphore:
                    return await _summarize_session(text, query, meta)

@@ -493,13 +1067,27 @@ def session_search(
            }, ensure_ascii=False)

        summaries = []
+        aux_total = {
+            "model": None,
+            "input_tokens": 0,
+            "output_tokens": 0,
+            "cache_read_tokens": 0,
+            "cache_creation_tokens": 0,
+            "call_count": 0,
+        }
        for (session_id, match_info, conversation_text, session_meta), result in zip(tasks, results):
+            usage: Optional[Dict[str, Any]] = None
            if isinstance(result, Exception):
                logging.warning(
                    "Failed to summarize session %s: %s",
                    session_id, result, exc_info=True,
                )
-                result = None
+                summary_text = None
+            elif isinstance(result, tuple):
+                summary_text, usage = result
+            else:
+                # Defensive: a future code path might still return a bare string.
+                summary_text, usage = result, None

            # Prefer resolved parent session metadata over FTS5 match metadata.
            # match_info carries source/model from the *child* session that contained
@@ -515,23 +1103,39 @@ def session_search(
                "model": session_meta.get("model") or match_info.get("model"),
            }

-            if result:
-                entry["summary"] = result
+            if summary_text:
+                entry["summary"] = summary_text
            else:
                # Fallback: raw preview so matched sessions aren't silently
                # dropped when the summarizer is unavailable (fixes #3409).
                preview = (conversation_text[:500] + "\n…[truncated]") if conversation_text else "No preview available."
                entry["summary"] = f"[Raw preview — summarization unavailable]\n{preview}"

+            if usage:
+                entry["aux_usage"] = usage
+                aux_total["model"] = aux_total["model"] or usage.get("model")
+                aux_total["input_tokens"] += usage["input_tokens"]
+                aux_total["output_tokens"] += usage["output_tokens"]
+                aux_total["cache_read_tokens"] += usage["cache_read_tokens"]
+                aux_total["cache_creation_tokens"] += usage["cache_creation_tokens"]
+                aux_total["call_count"] += 1
+
            summaries.append(entry)

-        return json.dumps({
+        payload = {
            "success": True,
+            "mode": "summary",
            "query": query,
            "results": summaries,
            "count": len(summaries),
            "sessions_searched": len(seen_sessions),
-        }, ensure_ascii=False)
+        }
+        # Only surface aux_usage_total when we actually captured any (test mocks
+        # and providers that don't report usage produce an all-zero/empty dict —
+        # don't pollute the payload in that case).
+        if aux_total["call_count"]:
+            payload["aux_usage_total"] = aux_total
+        return json.dumps(payload, ensure_ascii=False)

    except Exception as e:
        logging.error("Session search failed: %s", e, exc_info=True)
@@ -539,7 +1143,7 @@ def session_search(


 def check_session_search_requirements() -> bool:
-    """Requires SQLite state database and an auxiliary text model."""
+    """Requires SQLite state database; summary mode also needs an auxiliary model."""
    try:
        from hermes_state import DEFAULT_DB_PATH
        return DEFAULT_DB_PATH.parent.exists()
@@ -550,44 +1154,101 @@ def check_session_search_requirements() -> bool:
 SESSION_SEARCH_SCHEMA = {
    "name": "session_search",
    "description": (
-        "Search your long-term memory of past conversations, or browse recent sessions. This is your recall -- "
-        "every past session is searchable, and this tool summarizes what happened.\n\n"
-        "TWO MODES:\n"
-        "1. Recent sessions (no query): Call with no arguments to see what was worked on recently. "
-        "Returns titles, previews, and timestamps. Zero LLM cost, instant. "
-        "Start here when the user asks what were we working on or what did we do recently.\n"
-        "2. Keyword search (with query): Search for specific topics across all past sessions. "
-        "Returns LLM-generated summaries of matching sessions.\n\n"
-        "USE THIS PROACTIVELY when:\n"
-        "- The user says 'we did this before', 'remember when', 'last time', 'as I mentioned'\n"
-        "- The user asks about a topic you worked on before but don't have in current context\n"
-        "- The user references a project, person, or concept that seems familiar but isn't in memory\n"
-        "- You want to check if you've solved a similar problem before\n"
-        "- The user asks 'what did we do about X?' or 'how did we fix Y?'\n\n"
-        "Don't hesitate to search when it is actually cross-session -- it's fast and cheap. "
-        "Better to search and confirm than to guess or ask the user to repeat themselves.\n\n"
-        "Search syntax: keywords joined with OR for broad recall (elevenlabs OR baseten OR funding), "
-        "phrases for exact match (\"docker networking\"), boolean (python NOT java), prefix (deploy*). "
-        "IMPORTANT: Use OR between keywords for best results — FTS5 defaults to AND which misses "
-        "sessions that only mention some terms. If a broad OR query returns nothing, try individual "
-        "keyword searches in parallel. Returns summaries of the top matching sessions."
+        "Search past sessions stored in the local session DB. Three modes plus a default "
+        "browsing mode when no arguments are passed. All three modes operate on the same "
+        "FTS5-indexed message store; they differ in what they return and at what cost.\n\n"
+        "MODES\n\n"
+        "  • mode='fast' — FTS5 snippets across matched sessions. No LLM call. Returns one "
+        "entry per matched session with session_id, match_message_id, a one-message context "
+        "window, and metadata. Use this as the starting move for any recall question — "
+        "discovery and state reconstruction both. The match_message_id is the anchor you "
+        "pass to guided.\n\n"
+        "  • mode='guided' — REQUIRES anchors from a prior fast call. Returns a window of "
+        "raw messages around each anchor plus session bookends (bookend_start, bookend_end). "
+        "No LLM call, no truncation. Single or multi-anchor: pass "
+        "``anchors=[{session_id, around_message_id}, ...]``. Each anchor returns its own "
+        "window in the response's ``windows`` array. Bookends are the first/last "
+        "user+assistant messages of the session, empty when the window already overlaps "
+        "the session head/tail. Tool messages are filtered from the window (the anchor "
+        "itself is preserved even if role='tool').\n\n"
+        "  • mode='summary' — LLM-generated prose synthesis across matched sessions. Issues "
+        "one auxiliary-model call per session in the hit list, so cost scales with whatever "
+        "auxiliary model (or main model fallback) is configured. Returns aux token usage in "
+        "the response (``aux_usage`` per call, ``aux_usage_total`` per batch). Reach for "
+        "this when you genuinely need cross-session prose synthesis in one shot.\n\n"
+        "  • No query, no mode — browses recent sessions chronologically. Returns titles, "
+        "previews, timestamps. No LLM call.\n\n"
+        "DEFAULT MODE\n\n"
+        "  When ``mode=`` is unset, the resolver checks ``auxiliary.session_search.default_mode`` "
+        "in ~/.hermes/config.yaml (accepted values: ``fast`` | ``summary``). If the user "
+        "has set a default, honour it on the first call. With no config, the default is "
+        "``fast``. An explicit ``mode=`` argument always wins.\n\n"
+        "ANCHOR CONTRACT\n\n"
+        "  An anchor is the pair (session_id, around_message_id). The session_id MUST be "
+        "the raw owning session of around_message_id — guided rejects anchors where the "
+        "message_id does not exist in the named session. Fast results return both "
+        "session_id (raw owning) and parent_session_id (when different, for display "
+        "context only). Pair session_id with match_message_id from the same fast hit; do "
+        "not substitute parent_session_id.\n\n"
+        "FTS5 SYNTAX\n\n"
+        "  FTS5 defaults to AND across terms — multi-word queries require all terms to "
+        "match. Use OR explicitly for broader recall (``alpha OR beta OR gamma``), quoted "
+        "phrases for exact match (``\"docker networking\"``), boolean (``python NOT java``), "
+        "or prefix wildcards (``deploy*``).\n\n"
+        "WHEN TO USE\n\n"
+        "  Before reaching for ``gh``, web search, or filesystem inspection on questions "
+        "about prior work — what was discussed, what was decided, where an artefact was "
+        "created. The session DB carries what was said when; external tools show current "
+        "world state."
    ),
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
-                "description": "Search query — keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead (returns titles, previews, timestamps with no LLM cost).",
+                "description": "Search query (modes 'fast' and 'summary'). Keywords, phrases, or boolean expressions to find in past sessions. Omit this parameter entirely to browse recent sessions instead. Ignored when mode='guided'.",
            },
            "role_filter": {
                "type": "string",
-                "description": "Optional: only search messages from specific roles (comma-separated). E.g. 'user,assistant' to skip tool outputs.",
+                "description": "Optional: only search messages from specific roles (comma-separated). Defaults to 'user,assistant' for fast/summary modes — tool messages are usually noisy (large outputs, serialised tool calls). Pass 'user,assistant,tool' to include tool output (debugging tool behaviour) or 'tool' to search tool output only. Ignored when mode='guided'.",
            },
            "limit": {
                "type": "integer",
-                "description": "Max sessions to summarize (default: 3, max: 5).",
+                "description": "Max sessions to return (default: 3, max: 10). Bump higher (5–10) when the user wants to be in the retrieval loop and pick the right anchor for a guided drill-down. Ignored when mode='guided' (which returns one anchored window per anchor).",
                "default": 3,
            },
+            "mode": {
+                "type": "string",
+                "enum": ["fast", "summary", "guided"],
+                "description": (
+                    "fast — FTS5 snippets, no LLM. Default. "
+                    "guided — requires anchors from a prior fast call; returns raw message window per anchor. "
+                    "summary — LLM synthesis across matched sessions; opt-in, costs per aux-model call."
+                ),
+                "default": "fast",
+            },
+            "anchors": {
+                "type": "array",
+                "description": "Required for mode='guided'. List of {session_id, around_message_id} dicts to drill into. Copy session_id and match_message_id verbatim from prior fast-mode results — they pair as a single self-consistent handle. Do NOT substitute parent_session_id (shown for display context only; pairs incorrectly with match_message_id). One anchor is fine when the topic lives in a single session; for multi-session catch-up (topic touched across several recent sessions), pass the top 2–3 fast hits as separate anchors in ONE call — each gets its own window + bookends in the response's 'windows' array.",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "session_id": {"type": "string"},
+                        "around_message_id": {"type": "integer"},
+                    },
+                    "required": ["session_id", "around_message_id"],
+                },
+            },
+            "window": {
+                "type": "integer",
+                "description": "Mode='guided' only. Number of messages to return on each side of each anchor (the anchor itself is always included). Shared across all anchors in a multi-anchor call. Clamped to [1, 20]. Default 5.",
+                "default": 5,
+            },
+            "sort": {
+                "type": "string",
+                "enum": ["newest", "oldest"],
+                "description": "Mode='fast' only. Temporal bias on top of FTS5 ranking. Omit to keep relevance-only ordering (the default, suitable for exploratory recall — 'what do we know about X'). Set 'newest' for recency-shaped questions ('where did we leave X', 'latest status of Y') so recent matches surface first with rank as the tiebreaker. Set 'oldest' for origin-shaped questions ('how did X start', 'first time we discussed Y') so the earliest matches surface first. Silently ignored in summary / guided / recent modes — for temporal narrative across sessions, drive fast with sort, then drill the right anchors with guided.",
+            },
        },
        "required": [],
    },
@@ -605,6 +1266,12 @@ registry.register(
        query=args.get("query") or "",
        role_filter=args.get("role_filter"),
        limit=args.get("limit", 3),
+        mode=args.get("mode"),
+        session_id=args.get("session_id"),
+        around_message_id=args.get("around_message_id"),
+        window=args.get("window", 5),
+        anchors=args.get("anchors"),
+        sort=args.get("sort"),
        db=kw.get("db"),
        current_session_id=kw.get("current_session_id")),
    check_fn=check_session_search_requirements,