fix(agent): route structured-reasoning empties to prefill, not nudge

Post-tool empty-response nudge fired before the prefill branch for thinking models that emit reasoning via structured API fields (OpenRouter reasoning / reasoning_details, e.g. qwen3-vl-8b-thinking). The nudge guard only checked _has_inline_thinking (<think> tags in content), so every tool-using turn on these models hit the nudge path — one wasted LLM round-trip (~3-5s, ~400 tokens) and a spurious warning, before self-recovering. Hoist the _has_structured computation above the nudge guard and widen the guard from 'not _has_inline_thinking' to 'not _has_structured'. Nudge and prefill are now disjoint on _has_structured; the empty-retry branch's existing _prefill_exhausted guard already handles always-reasoning models falling through after prefill. Closes #34655. Reported by @sawtdakhili.
2026-06-12 13:18:54 +08:00 · 2026-05-29 12:23:21 -07:00
1 changed files with 18 additions and 7 deletions
--- a/agent/conversation_loop.py
+++ b/agent/conversation_loop.py
@@ -3981,10 +3981,25 @@ def run_conversation(
                            re.IGNORECASE,
                        )
                    )
+                    # Detect structured reasoning emitted via API fields
+                    # (OpenRouter `reasoning` / `reasoning_details`, or the
+                    # streaming-accumulated `reasoning_content`).  Thinking
+                    # models like qwen3-vl-8b-thinking return reasoning here
+                    # with empty content after tool calls — that's the model
+                    # still working, not a genuine empty response.  Compute
+                    # this BEFORE the nudge guard so those turns route to the
+                    # prefill branch below instead of wasting an LLM round-trip
+                    # on a nudge.
+                    _has_structured = bool(
+                        getattr(assistant_message, "reasoning", None)
+                        or getattr(assistant_message, "reasoning_content", None)
+                        or getattr(assistant_message, "reasoning_details", None)
+                        or _has_inline_thinking
+                    )
                    if (
                        _prior_was_tool
                        and not getattr(agent, "_post_tool_empty_retried", False)
-                        and not _has_inline_thinking  # thinking model still working — let prefill handle
+                        and not _has_structured  # thinking model still working — let prefill handle
                    ):
                        agent._post_tool_empty_retried = True
                        # Clear stale narration so it doesn't resurface
@@ -4028,12 +4043,8 @@ def run_conversation(
                    # Inspired by clawdbot's "incomplete-text" recovery.
                    # Also covers Qwen3/Ollama in-content <think> blocks
                    # (detected above as _has_inline_thinking).
-                    _has_structured = bool(
-                        getattr(assistant_message, "reasoning", None)
-                        or getattr(assistant_message, "reasoning_content", None)
-                        or getattr(assistant_message, "reasoning_details", None)
-                        or _has_inline_thinking
-                    )
+                    # _has_structured was computed above the nudge guard so
+                    # both branches share the same definition.
                    if _has_structured and agent._thinking_prefill_retries < 2:
                        agent._thinking_prefill_retries += 1
                        logger.info(