diff --git a/run_agent.py b/run_agent.py index 0051fce63f..33635ef2fe 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2172,17 +2172,49 @@ class AIAgent: return bool(cleaned.strip()) def _strip_think_blocks(self, content: str) -> str: - """Remove reasoning/thinking blocks from content, returning only visible text.""" + """Remove reasoning/thinking blocks from content, returning only visible text. + + Handles four cases: + 1. Closed tag pairs (````) — the common path when + the provider emits complete reasoning blocks. + 2. Unterminated open tag at a block boundary (start of text or + after a newline) — e.g. MiniMax M2.7 / NIM endpoints where the + closing tag is dropped. Everything from the open tag to end + of string is stripped. The block-boundary check mirrors + ``gateway/stream_consumer.py``'s filter so models that mention + ```` in prose aren't over-stripped. + 3. Stray orphan open/close tags that slip through. + 4. Tag variants: ````, ````, ````, + ````, ```` (Gemma 4), all + case-insensitive. + """ if not content: return "" - # Strip all reasoning tag variants: , , , - # , , (Gemma 4) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) + # 1. Closed tag pairs — case-insensitive for all variants so + # mixed-case tags (, ) don't slip through to + # the unterminated-tag pass and take trailing content with them. + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) - content = re.sub(r'.*?', '', content, flags=re.DOTALL) + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) + content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) content = re.sub(r'.*?', '', content, flags=re.DOTALL | re.IGNORECASE) - content = re.sub(r'\s*', '', content, flags=re.IGNORECASE) + # 2. Unterminated reasoning block — open tag at a block boundary + # (start of text, or after a newline) with no matching close. + # Strip from the tag to end of string. Fixes #8878 / #9568 + # (MiniMax M2.7 leaking raw reasoning into assistant content). + content = re.sub( + r'(?:^|\n)[ \t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\b[^>]*>.*$', + '', + content, + flags=re.DOTALL | re.IGNORECASE, + ) + # 3. Stray orphan open/close tags that slipped through. + content = re.sub( + r'\s*', + '', + content, + flags=re.IGNORECASE, + ) return content @staticmethod diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 86f95580f0..bde5ed5aae 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -317,6 +317,60 @@ class TestStripThinkBlocks: result = agent._strip_think_blocks("orphaned reasoning without close") assert "" not in result + # ─── Unterminated-block coverage (#8878, #9568, #10408) ────────────── + # Reasoning models served via NIM / MiniMax M2.7 frequently drop the + # closing tag, leaking raw reasoning into assistant content. The open + # tag appears at a block boundary (start of text or after a newline); + # everything from that tag to end-of-string is stripped. + + def test_unterminated_think_block_content_stripped(self, agent): + """Content after unterminated is fully stripped.""" + result = agent._strip_think_blocks("orphaned reasoning without close") + assert "orphaned reasoning" not in result + assert result.strip() == "" + + def test_unterminated_thought_block_content_stripped(self, agent): + """Gemma-style with no close is fully stripped.""" + result = agent._strip_think_blocks("orphaned reasoning without close") + assert "orphaned reasoning" not in result + assert result.strip() == "" + + def test_unterminated_multiline_block_stripped(self, agent): + """Multi-line unterminated blocks are stripped in full.""" + result = agent._strip_think_blocks( + "\nmulti\nline\nreasoning\nthat never closes" + ) + assert "multi" not in result + assert "never closes" not in result + + def test_unterminated_block_after_answer_preserves_prefix(self, agent): + """Visible answer before a line-starting unterminated tag is kept.""" + result = agent._strip_think_blocks( + "Answer is 42.\nactually let me reconsider" + ) + assert "Answer is 42." in result + assert "reconsider" not in result + + def test_inline_think_mention_in_prose_not_over_stripped(self, agent): + """Mid-line `` mentioned in prose must not swallow the rest + of the content (the block-boundary check prevents this).""" + text = "Use the tag like this in your prose." + result = agent._strip_think_blocks(text) + # Block-boundary check prevents unterminated-strip from firing + assert "prose" in result + assert "Use the" in result + + def test_mixed_case_closed_pair_stripped(self, agent): + """Mixed-case variants , are + handled by case-insensitive closed-pair regex, so the trailing + content is preserved.""" + result = agent._strip_think_blocks("upperfinal") + assert "upper" not in result + assert "final" in result + result = agent._strip_think_blocks("mixedfinal") + assert "mixed" not in result + assert "final" in result + class TestExtractReasoning: def test_reasoning_field(self, agent):