Compare commits

...

1 Commits

Author SHA1 Message Date
teknium1
daa0a938e4 fix(agent): route structured-reasoning empties to prefill, not nudge
Post-tool empty-response nudge fired before the prefill branch for thinking
models that emit reasoning via structured API fields (OpenRouter reasoning /
reasoning_details, e.g. qwen3-vl-8b-thinking). The nudge guard only checked
_has_inline_thinking (<think> tags in content), so every tool-using turn on
these models hit the nudge path — one wasted LLM round-trip (~3-5s, ~400
tokens) and a spurious warning, before self-recovering.

Hoist the _has_structured computation above the nudge guard and widen the
guard from 'not _has_inline_thinking' to 'not _has_structured'. Nudge and
prefill are now disjoint on _has_structured; the empty-retry branch's
existing _prefill_exhausted guard already handles always-reasoning models
falling through after prefill.

Closes #34655. Reported by @sawtdakhili.
2026-05-29 12:23:21 -07:00

View File

@@ -3981,10 +3981,25 @@ def run_conversation(
re.IGNORECASE,
)
)
# Detect structured reasoning emitted via API fields
# (OpenRouter `reasoning` / `reasoning_details`, or the
# streaming-accumulated `reasoning_content`). Thinking
# models like qwen3-vl-8b-thinking return reasoning here
# with empty content after tool calls — that's the model
# still working, not a genuine empty response. Compute
# this BEFORE the nudge guard so those turns route to the
# prefill branch below instead of wasting an LLM round-trip
# on a nudge.
_has_structured = bool(
getattr(assistant_message, "reasoning", None)
or getattr(assistant_message, "reasoning_content", None)
or getattr(assistant_message, "reasoning_details", None)
or _has_inline_thinking
)
if (
_prior_was_tool
and not getattr(agent, "_post_tool_empty_retried", False)
and not _has_inline_thinking # thinking model still working — let prefill handle
and not _has_structured # thinking model still working — let prefill handle
):
agent._post_tool_empty_retried = True
# Clear stale narration so it doesn't resurface
@@ -4028,12 +4043,8 @@ def run_conversation(
# Inspired by clawdbot's "incomplete-text" recovery.
# Also covers Qwen3/Ollama in-content <think> blocks
# (detected above as _has_inline_thinking).
_has_structured = bool(
getattr(assistant_message, "reasoning", None)
or getattr(assistant_message, "reasoning_content", None)
or getattr(assistant_message, "reasoning_details", None)
or _has_inline_thinking
)
# _has_structured was computed above the nudge guard so
# both branches share the same definition.
if _has_structured and agent._thinking_prefill_retries < 2:
agent._thinking_prefill_retries += 1
logger.info(