agent/think_scrubber.py

"""Stateful scrubber for reasoning/thinking blocks in streamed assistant text.

``run_agent._strip_think_blocks`` is regex-based and correct for a complete
string, but when it runs *per-delta* in ``_fire_stream_delta`` it destroys
the state that downstream consumers (CLI ``_stream_delta``, gateway
``GatewayStreamConsumer._filter_and_accumulate``) rely on.

Concretely, when MiniMax-M2.7 streams

    delta1 = "<think>"
    delta2 = "Let me check their config"
    delta3 = "</think>"

the per-delta regex erases delta1 entirely (case 2: unterminated-open at
boundary matches ``^<think>...``), so the downstream state machine never
sees the open tag, treats delta2 as regular content, and leaks reasoning
to the user.  Consumers that don't run their own state machine (ACP,
api_server, TTS) never had any defence at all — they just emitted
whatever survived the upstream regex.

This module centralises the tag-suppression state machine at the
upstream layer so every stream_delta_callback sees text that has
already had reasoning blocks removed.  Partial tags at delta
boundaries are held back until the next delta resolves them, and
end-of-stream flushing surfaces any held-back prose that turned out
not to be a real tag.

Usage::

    scrubber = StreamingThinkScrubber()
    for delta in stream:
        visible = scrubber.feed(delta)
        if visible:
            emit(visible)
    tail = scrubber.flush()  # at end of stream
    if tail:
        emit(tail)

The scrubber is re-entrant per agent instance.  Call ``reset()`` at
the top of each new turn so a hung block from an interrupted prior
stream cannot taint the next turn's output.

Tag variants handled (case-insensitive):
  ``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
  ``<REASONING_SCRATCHPAD>``.

Block-boundary rule for opens: an opening tag is only treated as a
reasoning-block opener when it appears at the start of the stream,
after a newline (optionally followed by whitespace), or when only
whitespace has been emitted on the current line.  This prevents prose
that *mentions* the tag name (e.g. ``"use <think> tags here"``) from
being incorrectly suppressed.  Closed pairs (``<think>X</think>``) are
always suppressed regardless of boundary; a closed pair is an
intentional, bounded construct.
"""

from __future__ import annotations

from typing import Tuple

__all__ = ["StreamingThinkScrubber"]


class StreamingThinkScrubber:
    """Stateful scrubber for streaming reasoning/thinking blocks.

    State machine:
      - ``_in_block``: True while inside an opened block, waiting for
        a close tag.  All text inside is discarded.
      - ``_buf``: held-back partial-tag tail.  Emitted / discarded on
        the next ``feed()`` call or by ``flush()``.
      - ``_last_emitted_ended_newline``: True iff the most recent
        emission to the consumer ended with ``\\n``, or nothing has
        been emitted yet (start-of-stream counts as a boundary).  Used
        to decide whether an open tag at buffer position 0 is at a
        block boundary.
    """

    _OPEN_TAG_NAMES: Tuple[str, ...] = (
        "think",
        "thinking",
        "reasoning",
        "thought",
        "REASONING_SCRATCHPAD",
    )

    # Materialise literal tag strings so the hot path does string
    # operations, not regex compilation per feed().
    _OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)
    _CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)

    # Pre-compute the longest tag (for partial-tag hold-back bound).
    _MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)

    def __init__(self) -> None:
        self._in_block: bool = False
        self._buf: str = ""
        self._last_emitted_ended_newline: bool = True

    def reset(self) -> None:
        """Reset all state.  Call at the top of every new turn."""
        self._in_block = False
        self._buf = ""
        self._last_emitted_ended_newline = True

    def feed(self, text: str) -> str:
        """Feed one delta; return the scrubbed visible portion.

        May return an empty string when the entire delta is reasoning
        content or is being held back pending resolution of a partial
        tag at the boundary.
        """
        if not text:
            return ""
        buf = self._buf + text
        self._buf = ""
        out: list[str] = []

        while buf:
            if self._in_block:
                # Hunt for the earliest close tag.
                close_idx, close_len = self._find_first_tag(
                    buf, self._CLOSE_TAGS,
                )
                if close_idx == -1:
                    # No close yet — hold back a potential partial
                    # close-tag prefix; discard everything else.
                    held = self._max_partial_suffix(buf, self._CLOSE_TAGS)
                    self._buf = buf[-held:] if held else ""
                    return "".join(out)
                # Found close: discard block content + tag, continue.
                buf = buf[close_idx + close_len:]
                self._in_block = False
            else:
                # Priority 1 — closed <tag>X</tag> pair anywhere in
                # buf.  Closed pairs are always an intentional,
                # bounded construct (even mid-line prose containing
                # an open/close pair is almost certainly a model
                # leaking reasoning inline), so no boundary gating.
                pair = self._find_earliest_closed_pair(buf)
                # Priority 2 — unterminated open tag at a block
                # boundary.  Boundary-gated so prose that mentions
                # '<think>' isn't over-stripped.
                open_idx, open_len = self._find_open_at_boundary(
                    buf, out,
                )

                # Pick whichever match comes earliest in the buffer.
                if pair is not None and (
                    open_idx == -1 or pair[0] <= open_idx
                ):
                    start_idx, end_idx = pair
                    preceding = buf[:start_idx]
                    if preceding:
                        preceding = self._strip_orphan_close_tags(preceding)
                        if preceding:
                            out.append(preceding)
                            self._last_emitted_ended_newline = (
                                preceding.endswith("\n")
                            )
                    buf = buf[end_idx:]
                    continue

                if open_idx != -1:
                    # Unterminated open at boundary — emit preceding,
                    # enter block, continue loop with remainder.
                    preceding = buf[:open_idx]
                    if preceding:
                        preceding = self._strip_orphan_close_tags(preceding)
                        if preceding:
                            out.append(preceding)
                            self._last_emitted_ended_newline = (
                                preceding.endswith("\n")
                            )
                    self._in_block = True
                    buf = buf[open_idx + open_len:]
                    continue

                # No resolvable tag structure in buf.  Hold back any
                # partial-tag prefix at the tail so a split tag
                # across deltas isn't missed, then emit the rest.
                held = self._max_partial_suffix(buf, self._OPEN_TAGS)
                held_close = self._max_partial_suffix(
                    buf, self._CLOSE_TAGS,
                )
                held = max(held, held_close)
                if held:
                    emit_text = buf[:-held]
                    self._buf = buf[-held:]
                else:
                    emit_text = buf
                    self._buf = ""
                if emit_text:
                    emit_text = self._strip_orphan_close_tags(emit_text)
                    if emit_text:
                        out.append(emit_text)
                        self._last_emitted_ended_newline = (
                            emit_text.endswith("\n")
                        )
                return "".join(out)

        return "".join(out)

    def flush(self) -> str:
        """End-of-stream flush.

        If still inside an unterminated block, held-back content is
        discarded — leaking partial reasoning is worse than a
        truncated answer.  Otherwise the held-back partial-tag tail is
        emitted verbatim (it turned out not to be a real tag prefix).
        """
        if self._in_block:
            self._buf = ""
            self._in_block = False
            return ""
        tail = self._buf
        self._buf = ""
        if not tail:
            return ""
        tail = self._strip_orphan_close_tags(tail)
        if tail:
            self._last_emitted_ended_newline = tail.endswith("\n")
        return tail

    # ── internal helpers ───────────────────────────────────────────────

    @staticmethod
    def _find_first_tag(
        buf: str, tags: Tuple[str, ...],
    ) -> Tuple[int, int]:
        """Return (earliest_index, tag_length) over *tags*, or (-1, 0).

        Case-insensitive match.
        """
        buf_lower = buf.lower()
        best_idx = -1
        best_len = 0
        for tag in tags:
            idx = buf_lower.find(tag.lower())
            if idx != -1 and (best_idx == -1 or idx < best_idx):
                best_idx = idx
                best_len = len(tag)
        return best_idx, best_len

    def _find_earliest_closed_pair(self, buf: str):
        """Return (start_idx, end_idx) of the earliest closed pair, else None.

        A closed pair is ``<tag>...</tag>`` of any variant.  Matches are
        case-insensitive and non-greedy (the closest close tag after
        an open tag wins), matching the regex ``<tag>.*?</tag>``
        semantics of ``_strip_think_blocks`` case 1.  When two tag
        variants could both match, the one whose open tag appears
        earlier wins.
        """
        buf_lower = buf.lower()
        best: "tuple[int, int] | None" = None
        for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):
            open_lower = open_tag.lower()
            close_lower = close_tag.lower()
            open_idx = buf_lower.find(open_lower)
            if open_idx == -1:
                continue
            close_idx = buf_lower.find(
                close_lower, open_idx + len(open_lower),
            )
            if close_idx == -1:
                continue
            end_idx = close_idx + len(close_lower)
            if best is None or open_idx < best[0]:
                best = (open_idx, end_idx)
        return best

    def _find_open_at_boundary(
        self, buf: str, already_emitted: list[str],
    ) -> Tuple[int, int]:
        """Return the earliest block-boundary open-tag (idx, len).

        Returns (-1, 0) if no boundary-legal opener is present.
        """
        buf_lower = buf.lower()
        best_idx = -1
        best_len = 0
        for tag in self._OPEN_TAGS:
            tag_lower = tag.lower()
            search_start = 0
            while True:
                idx = buf_lower.find(tag_lower, search_start)
                if idx == -1:
                    break
                if self._is_block_boundary(buf, idx, already_emitted):
                    if best_idx == -1 or idx < best_idx:
                        best_idx = idx
                        best_len = len(tag)
                    break  # first boundary hit for this tag is enough
                search_start = idx + 1
        return best_idx, best_len

    def _is_block_boundary(
        self, buf: str, idx: int, already_emitted: list[str],
    ) -> bool:
        """True iff position *idx* in *buf* is a block boundary.

        A block boundary is:
          - buf position 0 AND the most recent emission ended with
            a newline (or nothing has been emitted yet)
          - any position whose preceding text on the current line
            (since the last newline in buf) is whitespace-only, AND
            if there is no newline in the preceding buf portion, the
            most recent prior emission ended with a newline
        """
        if idx == 0:
            # Check whether the last already-emitted chunk in THIS
            # feed() call ended with a newline, otherwise fall back
            # to the cross-feed flag.
            if already_emitted:
                return already_emitted[-1].endswith("\n")
            return self._last_emitted_ended_newline
        preceding = buf[:idx]
        last_nl = preceding.rfind("\n")
        if last_nl == -1:
            # No newline in buf before the tag — boundary only if the
            # prior emission ended with a newline AND everything since
            # is whitespace.
            if already_emitted:
                prior_newline = already_emitted[-1].endswith("\n")
            else:
                prior_newline = self._last_emitted_ended_newline
            return prior_newline and preceding.strip() == ""
        # Newline present — text between it and the tag must be
        # whitespace-only.
        return preceding[last_nl + 1:].strip() == ""

    @classmethod
    def _max_partial_suffix(
        cls, buf: str, tags: Tuple[str, ...],
    ) -> int:
        """Return the longest buf-suffix that is a prefix of any tag.

        Only prefixes strictly shorter than the tag itself count
        (full-length suffixes are the tag and are handled as matches,
        not held-back partials).  Case-insensitive.
        """
        if not buf:
            return 0
        buf_lower = buf.lower()
        max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)
        for i in range(max_check, 0, -1):
            suffix = buf_lower[-i:]
            for tag in tags:
                tag_lower = tag.lower()
                if len(tag_lower) > i and tag_lower.startswith(suffix):
                    return i
        return 0

    @classmethod
    def _strip_orphan_close_tags(cls, text: str) -> str:
        """Remove any close tags from *text* (orphan-close handling).

        An orphan close tag has no matching open in the current
        scrubber state; it's always noise, stripped with any trailing
        whitespace so the surrounding prose flows naturally.
        """
        if "</" not in text:
            return text
        text_lower = text.lower()
        out: list[str] = []
        i = 0
        while i < len(text):
            matched = False
            if text_lower[i:i + 2] == "</":
                for tag in cls._CLOSE_TAGS:
                    tag_lower = tag.lower()
                    tag_len = len(tag_lower)
                    if text_lower[i:i + tag_len] == tag_lower:
                        # Skip the tag and any trailing whitespace,
                        # matching _strip_think_blocks case 3.
                        j = i + tag_len
                        while j < len(text) and text[j] in " \t\n\r":
                            j += 1
                        i = j
                        matched = True
                        break
            if not matched:
                out.append(text[i])
                i += 1
        return "".join(out)
fix(agent): stateful streaming scrubber for reasoning-block leaks (#17924) (#20184) * revert(gateway): remove stale-code self-check and auto-restart Removes the _detect_stale_code / _trigger_stale_code_restart mechanism introduced in #17648 and iterated in #19740. On every incoming message the gateway compared the boot-time git HEAD SHA to the current SHA on disk, and if they differed it would reply with Gateway code was updated in the background -- restarting this gateway so your next message runs on the new code. Please retry in a moment. and then kick off a graceful restart. This is unwanted behaviour: users who run a long-lived gateway and do their own ad-hoc git operations on the checkout end up with their chat interrupted and the current message dropped every time HEAD moves, with no way to opt out. If an operator really needs the old protection against stale sys.modules after "hermes update", the SIGKILL-survivor sweep in hermes update (hermes_cli/main.py, also tagged #17648) already handles the supervisor-respawn case on its own. Removed: gateway/run.py: - _STALE_CODE_SENTINELS, _GIT_SHA_CACHE_TTL_SECS - _read_git_head_sha(), _compute_repo_mtime() module helpers - class-level _boot_wall_time / _boot_repo_mtime / _boot_git_sha / _stale_code_restart_triggered defaults - __init__ boot-snapshot block (_boot_, _cached_current_sha, _repo_root_for_staleness, _stale_code_notified) - _current_git_sha_cached(), _detect_stale_code(), _trigger_stale_code_restart() methods - stale-code check + user-facing restart notice at the top of _handle_message() tests/gateway/test_stale_code_self_check.py (deleted, 412 lines) No new logic added. Zero remaining references to any removed symbol. Gateway test suite passes the same 4589 tests it passed before; the 3 pre-existing unrelated failures (discord free-channel, feishu bot admission, teams typing) are unchanged by this commit. * fix(agent): stateful streaming scrubber for reasoning-block leaks (#17924) Per-delta _strip_think_blocks ran at _fire_stream_delta and destroyed downstream state. When MiniMax-M2.7 / DeepSeek / Qwen3 streamed a tag split across deltas (delta1='<think>', delta2='Let me check'), the regex case-2 match erased delta1 entirely, so CLI/gateway state machines never learned a block was open and leaked delta2 as content. Raw consumers (ACP, api_server, TTS) had no downstream defense at all. Replace the per-delta regex with a stateful StreamingThinkScrubber that survives delta boundaries: - Closed <tag>X</tag> pairs always stripped (matches _strip_think_blocks case 1). - Unterminated open at block boundary enters a block; content discarded until close tag arrives. At end-of-stream, held content is dropped. - Orphan close tags stripped without boundary gating. - Partial tags at delta boundaries held back until resolved. - Block-boundary rule (start-of-stream, after \n, or whitespace-only since last \n) preserves prose that mentions tag names. Reset at turn start alongside the existing context scrubber; flush at turn end so a benign '<' held back at end-of-stream reaches the UI. E2E-verified on live OpenRouter->MiniMax-m2 streams: closed pairs strip cleanly, first word of post-block content is preserved, pure content passes through unchanged. Stefan's screenshot case (#17924) — 'Let me check' getting chopped to ' me check' — no longer happens. Final _strip_think_blocks calls on completed strings (final_response, replay, compression) are preserved; only the streaming per-delta call site switched to the scrubber. 2026-05-05 04:33:38 -07:00			`"""Stateful scrubber for reasoning/thinking blocks in streamed assistant text.`

			``run_agent._strip_think_blocks`` is regex-based and correct for a complete
			string, but when it runs per-delta in ``_fire_stream_delta`` it destroys
			the state that downstream consumers (CLI ``_stream_delta``, gateway
			``GatewayStreamConsumer._filter_and_accumulate``) rely on.

			`Concretely, when MiniMax-M2.7 streams`

			`delta1 = "<think>"`
			`delta2 = "Let me check their config"`
			`delta3 = "</think>"`

			`the per-delta regex erases delta1 entirely (case 2: unterminated-open at`
			boundary matches ``^<think>...``), so the downstream state machine never
			`sees the open tag, treats delta2 as regular content, and leaks reasoning`
			`to the user. Consumers that don't run their own state machine (ACP,`
			`api_server, TTS) never had any defence at all — they just emitted`
			`whatever survived the upstream regex.`

			`This module centralises the tag-suppression state machine at the`
			`upstream layer so every stream_delta_callback sees text that has`
			`already had reasoning blocks removed. Partial tags at delta`
			`boundaries are held back until the next delta resolves them, and`
			`end-of-stream flushing surfaces any held-back prose that turned out`
			`not to be a real tag.`

			`Usage::`

			`scrubber = StreamingThinkScrubber()`
			`for delta in stream:`
			`visible = scrubber.feed(delta)`
			`if visible:`
			`emit(visible)`
			`tail = scrubber.flush() # at end of stream`
			`if tail:`
			`emit(tail)`

			The scrubber is re-entrant per agent instance. Call ``reset()`` at
			`the top of each new turn so a hung block from an interrupted prior`
			`stream cannot taint the next turn's output.`

			`Tag variants handled (case-insensitive):`
			``<think>``, ``<thinking>``, ``<reasoning>``, ``<thought>``,
			``<REASONING_SCRATCHPAD>``.

			`Block-boundary rule for opens: an opening tag is only treated as a`
			`reasoning-block opener when it appears at the start of the stream,`
			`after a newline (optionally followed by whitespace), or when only`
			`whitespace has been emitted on the current line. This prevents prose`
			that mentions the tag name (e.g. ``"use <think> tags here"``) from
			being incorrectly suppressed. Closed pairs (``<think>X</think>``) are
			`always suppressed regardless of boundary; a closed pair is an`
			`intentional, bounded construct.`
			`"""`

			`from __future__ import annotations`

			`from typing import Tuple`

			`__all__ = ["StreamingThinkScrubber"]`


			`class StreamingThinkScrubber:`
			`"""Stateful scrubber for streaming reasoning/thinking blocks.`

			`State machine:`
			- ``_in_block``: True while inside an opened block, waiting for
			`a close tag. All text inside is discarded.`
			- ``_buf``: held-back partial-tag tail. Emitted / discarded on
			the next ``feed()`` call or by ``flush()``.
			- ``_last_emitted_ended_newline``: True iff the most recent
			emission to the consumer ended with ``\\n``, or nothing has
			`been emitted yet (start-of-stream counts as a boundary). Used`
			`to decide whether an open tag at buffer position 0 is at a`
			`block boundary.`
			`"""`

			`_OPEN_TAG_NAMES: Tuple[str, ...] = (`
			`"think",`
			`"thinking",`
			`"reasoning",`
			`"thought",`
			`"REASONING_SCRATCHPAD",`
			`)`

			`# Materialise literal tag strings so the hot path does string`
			`# operations, not regex compilation per feed().`
			`_OPEN_TAGS: Tuple[str, ...] = tuple(f"<{name}>" for name in _OPEN_TAG_NAMES)`
			`_CLOSE_TAGS: Tuple[str, ...] = tuple(f"</{name}>" for name in _OPEN_TAG_NAMES)`

			`# Pre-compute the longest tag (for partial-tag hold-back bound).`
			`_MAX_TAG_LEN: int = max(len(tag) for tag in _OPEN_TAGS + _CLOSE_TAGS)`

			`def __init__(self) -> None:`
			`self._in_block: bool = False`
			`self._buf: str = ""`
			`self._last_emitted_ended_newline: bool = True`

			`def reset(self) -> None:`
			`"""Reset all state. Call at the top of every new turn."""`
			`self._in_block = False`
			`self._buf = ""`
			`self._last_emitted_ended_newline = True`

			`def feed(self, text: str) -> str:`
			`"""Feed one delta; return the scrubbed visible portion.`

			`May return an empty string when the entire delta is reasoning`
			`content or is being held back pending resolution of a partial`
			`tag at the boundary.`
			`"""`
			`if not text:`
			`return ""`
			`buf = self._buf + text`
			`self._buf = ""`
			`out: list[str] = []`

			`while buf:`
			`if self._in_block:`
			`# Hunt for the earliest close tag.`
			`close_idx, close_len = self._find_first_tag(`
			`buf, self._CLOSE_TAGS,`
			`)`
			`if close_idx == -1:`
			`# No close yet — hold back a potential partial`
			`# close-tag prefix; discard everything else.`
			`held = self._max_partial_suffix(buf, self._CLOSE_TAGS)`
			`self._buf = buf[-held:] if held else ""`
			`return "".join(out)`
			`# Found close: discard block content + tag, continue.`
			`buf = buf[close_idx + close_len:]`
			`self._in_block = False`
			`else:`
			`# Priority 1 — closed <tag>X</tag> pair anywhere in`
			`# buf. Closed pairs are always an intentional,`
			`# bounded construct (even mid-line prose containing`
			`# an open/close pair is almost certainly a model`
			`# leaking reasoning inline), so no boundary gating.`
			`pair = self._find_earliest_closed_pair(buf)`
			`# Priority 2 — unterminated open tag at a block`
			`# boundary. Boundary-gated so prose that mentions`
			`# '<think>' isn't over-stripped.`
			`open_idx, open_len = self._find_open_at_boundary(`
			`buf, out,`
			`)`

			`# Pick whichever match comes earliest in the buffer.`
			`if pair is not None and (`
			`open_idx == -1 or pair[0] <= open_idx`
			`):`
			`start_idx, end_idx = pair`
			`preceding = buf[:start_idx]`
			`if preceding:`
			`preceding = self._strip_orphan_close_tags(preceding)`
			`if preceding:`
			`out.append(preceding)`
			`self._last_emitted_ended_newline = (`
			`preceding.endswith("\n")`
			`)`
			`buf = buf[end_idx:]`
			`continue`

			`if open_idx != -1:`
			`# Unterminated open at boundary — emit preceding,`
			`# enter block, continue loop with remainder.`
			`preceding = buf[:open_idx]`
			`if preceding:`
			`preceding = self._strip_orphan_close_tags(preceding)`
			`if preceding:`
			`out.append(preceding)`
			`self._last_emitted_ended_newline = (`
			`preceding.endswith("\n")`
			`)`
			`self._in_block = True`
			`buf = buf[open_idx + open_len:]`
			`continue`

			`# No resolvable tag structure in buf. Hold back any`
			`# partial-tag prefix at the tail so a split tag`
			`# across deltas isn't missed, then emit the rest.`
			`held = self._max_partial_suffix(buf, self._OPEN_TAGS)`
			`held_close = self._max_partial_suffix(`
			`buf, self._CLOSE_TAGS,`
			`)`
			`held = max(held, held_close)`
			`if held:`
			`emit_text = buf[:-held]`
			`self._buf = buf[-held:]`
			`else:`
			`emit_text = buf`
			`self._buf = ""`
			`if emit_text:`
			`emit_text = self._strip_orphan_close_tags(emit_text)`
			`if emit_text:`
			`out.append(emit_text)`
			`self._last_emitted_ended_newline = (`
			`emit_text.endswith("\n")`
			`)`
			`return "".join(out)`

			`return "".join(out)`

			`def flush(self) -> str:`
			`"""End-of-stream flush.`

			`If still inside an unterminated block, held-back content is`
			`discarded — leaking partial reasoning is worse than a`
			`truncated answer. Otherwise the held-back partial-tag tail is`
			`emitted verbatim (it turned out not to be a real tag prefix).`
			`"""`
			`if self._in_block:`
			`self._buf = ""`
			`self._in_block = False`
			`return ""`
			`tail = self._buf`
			`self._buf = ""`
			`if not tail:`
			`return ""`
			`tail = self._strip_orphan_close_tags(tail)`
			`if tail:`
			`self._last_emitted_ended_newline = tail.endswith("\n")`
			`return tail`

			`# ── internal helpers ───────────────────────────────────────────────`

			`@staticmethod`
			`def _find_first_tag(`
			`buf: str, tags: Tuple[str, ...],`
			`) -> Tuple[int, int]:`
			`"""Return (earliest_index, tag_length) over tags, or (-1, 0).`

			`Case-insensitive match.`
			`"""`
			`buf_lower = buf.lower()`
			`best_idx = -1`
			`best_len = 0`
			`for tag in tags:`
			`idx = buf_lower.find(tag.lower())`
			`if idx != -1 and (best_idx == -1 or idx < best_idx):`
			`best_idx = idx`
			`best_len = len(tag)`
			`return best_idx, best_len`

			`def _find_earliest_closed_pair(self, buf: str):`
			`"""Return (start_idx, end_idx) of the earliest closed pair, else None.`

			A closed pair is ``<tag>...</tag>`` of any variant. Matches are
			`case-insensitive and non-greedy (the closest close tag after`
			an open tag wins), matching the regex ``<tag>.*?</tag>``
			semantics of ``_strip_think_blocks`` case 1. When two tag
			`variants could both match, the one whose open tag appears`
			`earlier wins.`
			`"""`
			`buf_lower = buf.lower()`
			`best: "tuple[int, int] \| None" = None`
			`for open_tag, close_tag in zip(self._OPEN_TAGS, self._CLOSE_TAGS):`
			`open_lower = open_tag.lower()`
			`close_lower = close_tag.lower()`
			`open_idx = buf_lower.find(open_lower)`
			`if open_idx == -1:`
			`continue`
			`close_idx = buf_lower.find(`
			`close_lower, open_idx + len(open_lower),`
			`)`
			`if close_idx == -1:`
			`continue`
			`end_idx = close_idx + len(close_lower)`
			`if best is None or open_idx < best[0]:`
			`best = (open_idx, end_idx)`
			`return best`

			`def _find_open_at_boundary(`
			`self, buf: str, already_emitted: list[str],`
			`) -> Tuple[int, int]:`
			`"""Return the earliest block-boundary open-tag (idx, len).`

			`Returns (-1, 0) if no boundary-legal opener is present.`
			`"""`
			`buf_lower = buf.lower()`
			`best_idx = -1`
			`best_len = 0`
			`for tag in self._OPEN_TAGS:`
			`tag_lower = tag.lower()`
			`search_start = 0`
			`while True:`
			`idx = buf_lower.find(tag_lower, search_start)`
			`if idx == -1:`
			`break`
			`if self._is_block_boundary(buf, idx, already_emitted):`
			`if best_idx == -1 or idx < best_idx:`
			`best_idx = idx`
			`best_len = len(tag)`
			`break # first boundary hit for this tag is enough`
			`search_start = idx + 1`
			`return best_idx, best_len`

			`def _is_block_boundary(`
			`self, buf: str, idx: int, already_emitted: list[str],`
			`) -> bool:`
			`"""True iff position idx in buf is a block boundary.`

			`A block boundary is:`
			`- buf position 0 AND the most recent emission ended with`
			`a newline (or nothing has been emitted yet)`
			`- any position whose preceding text on the current line`
			`(since the last newline in buf) is whitespace-only, AND`
			`if there is no newline in the preceding buf portion, the`
			`most recent prior emission ended with a newline`
			`"""`
			`if idx == 0:`
			`# Check whether the last already-emitted chunk in THIS`
			`# feed() call ended with a newline, otherwise fall back`
			`# to the cross-feed flag.`
			`if already_emitted:`
			`return already_emitted[-1].endswith("\n")`
			`return self._last_emitted_ended_newline`
			`preceding = buf[:idx]`
			`last_nl = preceding.rfind("\n")`
			`if last_nl == -1:`
			`# No newline in buf before the tag — boundary only if the`
			`# prior emission ended with a newline AND everything since`
			`# is whitespace.`
			`if already_emitted:`
			`prior_newline = already_emitted[-1].endswith("\n")`
			`else:`
			`prior_newline = self._last_emitted_ended_newline`
			`return prior_newline and preceding.strip() == ""`
			`# Newline present — text between it and the tag must be`
			`# whitespace-only.`
			`return preceding[last_nl + 1:].strip() == ""`

			`@classmethod`
			`def _max_partial_suffix(`
			`cls, buf: str, tags: Tuple[str, ...],`
			`) -> int:`
			`"""Return the longest buf-suffix that is a prefix of any tag.`

			`Only prefixes strictly shorter than the tag itself count`
			`(full-length suffixes are the tag and are handled as matches,`
			`not held-back partials). Case-insensitive.`
			`"""`
			`if not buf:`
			`return 0`
			`buf_lower = buf.lower()`
			`max_check = min(len(buf_lower), cls._MAX_TAG_LEN - 1)`
			`for i in range(max_check, 0, -1):`
			`suffix = buf_lower[-i:]`
			`for tag in tags:`
			`tag_lower = tag.lower()`
			`if len(tag_lower) > i and tag_lower.startswith(suffix):`
			`return i`
			`return 0`

			`@classmethod`
			`def _strip_orphan_close_tags(cls, text: str) -> str:`
			`"""Remove any close tags from text (orphan-close handling).`

			`An orphan close tag has no matching open in the current`
			`scrubber state; it's always noise, stripped with any trailing`
			`whitespace so the surrounding prose flows naturally.`
			`"""`
			`if "</" not in text:`
			`return text`
			`text_lower = text.lower()`
			`out: list[str] = []`
			`i = 0`
			`while i < len(text):`
			`matched = False`
			`if text_lower[i:i + 2] == "</":`
			`for tag in cls._CLOSE_TAGS:`
			`tag_lower = tag.lower()`
			`tag_len = len(tag_lower)`
			`if text_lower[i:i + tag_len] == tag_lower:`
			`# Skip the tag and any trailing whitespace,`
			`# matching _strip_think_blocks case 3.`
			`j = i + tag_len`
			`while j < len(text) and text[j] in " \t\n\r":`
			`j += 1`
			`i = j`
			`matched = True`
			`break`
			`if not matched:`
			`out.append(text[i])`
			`i += 1`
			`return "".join(out)`