fix(streaming): silent retry when stream dies mid tool-call (#14151)

When the streaming connection dropped AFTER user-visible text was
delivered but a tool call was in flight, we stubbed the turn with a
'⚠ Stream stalled mid tool-call; Ask me to retry' warning — costing
an iteration and breaking the flow.  Users report this happening
increasingly often on long SSE streams through flaky provider routes.

Fix: in the existing inner stream-retry loop, relax the
deltas_were_sent short-circuit.  If a tool call was in flight
(partial_tool_names populated) AND the error is a transient connection
error (timeout, RemoteProtocolError, SSE 'connection lost', etc.),
silently retry instead of bailing out.  Fire a brief 'Connection
dropped mid tool-call; reconnecting…' marker so the user understands
the preamble is about to be re-streamed.

Researched how Claude Code (tombstone + non-streaming fallback),
OpenCode (blind Effect.retry wrapping whole stream), and Clawdbot
(4-way gate: stopReason==error + output==0 + !hadPotentialSideEffects)
handle this.  Chose the narrow Clawdbot-style gate: retry only when
(a) a tool call was actually in flight (otherwise the existing
stub-with-recovered-text is correct for pure-text stalls) and
(b) the error is transient.  Side-effect safety is automatic — no
tool has been dispatched within this single API call yet.

UX trade-off: user sees preamble text twice on retry (OpenCode-style).
Strictly better than a lost action with a 'retry manually' message.
If retries exhaust, falls through to the existing stub-with-warning
path so the user isn't left with zero signal.

Tests: 3 new tests in TestSilentRetryMidToolCall covering
(1) silent retry recovers tool call; (2) exhausted retries fall back
to stub; (3) text-only stalls don't trigger retry.  30/30 pass.
This commit is contained in:
Teknium
2026-04-22 13:47:33 -07:00
committed by GitHub
parent 88564ad8bc
commit ea67e49574
2 changed files with 339 additions and 10 deletions

View File

@@ -1133,3 +1133,225 @@ class TestPartialToolCallWarning:
f"Unexpected warning on text-only partial stream: {content!r}"
)
class TestSilentRetryMidToolCall:
"""Regression: when the stream dies mid tool-call JSON after text was
already delivered, we previously stubbed the turn with a "retry manually"
warning. Now: if the error is a transient connection error AND a tool
call was in flight, silently retry the stream (the user sees a brief
reconnect marker + duplicated preamble, which is strictly better than
a lost action). If no tool call was in flight, or the error isn't
transient, the existing stub-with-warning behaviour is preserved.
"""
@patch("run_agent.AIAgent._replace_primary_openai_client")
@patch("run_agent.AIAgent._create_request_openai_client")
@patch("run_agent.AIAgent._close_request_openai_client")
def test_silent_retry_recovers_tool_call(
self, mock_close, mock_create, mock_replace,
):
"""First attempt: text + partial tool-call + connection drop.
Second attempt: text + complete tool-call. Response should contain
the recovered tool call; no warning stub should be returned."""
from run_agent import AIAgent
import httpx as _httpx
attempts = {"n": 0}
def _first_stream():
yield _make_stream_chunk(content="Let me write the audit: ")
yield _make_stream_chunk(tool_calls=[
_make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
])
yield _make_stream_chunk(tool_calls=[
_make_tool_call_delta(index=0, arguments='{"path": "/tmp/x", '),
])
raise _httpx.RemoteProtocolError("peer closed connection")
def _second_stream():
yield _make_stream_chunk(content="Let me write the audit: ")
yield _make_stream_chunk(tool_calls=[
_make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
])
yield _make_stream_chunk(tool_calls=[
_make_tool_call_delta(
index=0, arguments='{"path": "/tmp/x", "content": "hi"}',
),
])
yield _make_stream_chunk(finish_reason="tool_calls")
def _pick_stream(*a, **kw):
attempts["n"] += 1
return _first_stream() if attempts["n"] == 1 else _second_stream()
mock_client = MagicMock()
mock_client.chat.completions.create.side_effect = _pick_stream
mock_create.return_value = mock_client
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent.api_mode = "chat_completions"
agent._interrupt_requested = False
fired_deltas: list = []
agent._fire_stream_delta = lambda text: fired_deltas.append(text)
import os as _os
_prev = _os.environ.get("HERMES_STREAM_RETRIES")
_os.environ["HERMES_STREAM_RETRIES"] = "2"
try:
response = agent._interruptible_streaming_api_call({})
finally:
if _prev is None:
_os.environ.pop("HERMES_STREAM_RETRIES", None)
else:
_os.environ["HERMES_STREAM_RETRIES"] = _prev
assert attempts["n"] == 2, (
f"Expected silent retry (2 attempts), got {attempts['n']}"
)
# Response should carry the recovered tool call, not a warning stub.
msg = response.choices[0].message
tool_calls = getattr(msg, "tool_calls", None)
assert tool_calls, (
f"Silent retry should recover the tool call, got tool_calls={tool_calls!r} "
f"content={getattr(msg, 'content', None)!r}"
)
_tc0 = tool_calls[0]
_name = (
_tc0["function"]["name"] if isinstance(_tc0, dict)
else _tc0.function.name
)
assert _name == "write_file"
# User saw a reconnect marker between attempts.
assert any("reconnecting" in d.lower() for d in fired_deltas), (
f"Expected a reconnect marker delta, fired_deltas={fired_deltas}"
)
# Stub-path warning must NOT appear (this was the whole point).
joined = "".join(fired_deltas)
assert "Stream stalled" not in joined, (
f"Stub-path warning leaked into silent-retry path: {joined!r}"
)
@patch("run_agent.AIAgent._replace_primary_openai_client")
@patch("run_agent.AIAgent._create_request_openai_client")
@patch("run_agent.AIAgent._close_request_openai_client")
def test_silent_retry_exhausted_falls_back_to_stub(
self, mock_close, mock_create, mock_replace,
):
"""When all retry attempts fail with connection errors, fall back
to the original stub-with-warning behaviour so the user isn't left
with zero signal."""
from run_agent import AIAgent
import httpx as _httpx
def _always_fails():
yield _make_stream_chunk(content="Let me write the audit: ")
yield _make_stream_chunk(tool_calls=[
_make_tool_call_delta(index=0, tc_id="call_1", name="write_file"),
])
raise _httpx.RemoteProtocolError("peer closed connection")
mock_client = MagicMock()
mock_client.chat.completions.create.side_effect = lambda *a, **kw: _always_fails()
mock_create.return_value = mock_client
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent.api_mode = "chat_completions"
agent._interrupt_requested = False
fired_deltas: list = []
agent._fire_stream_delta = lambda text: fired_deltas.append(text)
import os as _os
_prev = _os.environ.get("HERMES_STREAM_RETRIES")
_os.environ["HERMES_STREAM_RETRIES"] = "1"
try:
response = agent._interruptible_streaming_api_call({})
finally:
if _prev is None:
_os.environ.pop("HERMES_STREAM_RETRIES", None)
else:
_os.environ["HERMES_STREAM_RETRIES"] = _prev
# After retries exhaust, the stub-with-warning path must engage.
content = response.choices[0].message.content or ""
assert "Stream stalled mid tool-call" in content, (
f"Exhausted-retry fallback dropped the user-visible warning: {content!r}"
)
assert response.choices[0].message.tool_calls is None
@patch("run_agent.AIAgent._replace_primary_openai_client")
@patch("run_agent.AIAgent._create_request_openai_client")
@patch("run_agent.AIAgent._close_request_openai_client")
def test_no_silent_retry_for_text_only_stall(
self, mock_close, mock_create, mock_replace,
):
"""Text-only stall (no tool call in flight) must NOT trigger silent
retry — that's the case where the user saw the model's text reply
and retrying would duplicate it with no benefit."""
from run_agent import AIAgent
import httpx as _httpx
attempts = {"n": 0}
def _text_stall(*a, **kw):
attempts["n"] += 1
def _gen():
yield _make_stream_chunk(content="Here's my answer so far")
raise _httpx.RemoteProtocolError("peer closed connection")
return _gen()
mock_client = MagicMock()
mock_client.chat.completions.create.side_effect = _text_stall
mock_create.return_value = mock_client
agent = AIAgent(
api_key="test-key",
base_url="https://openrouter.ai/api/v1",
model="test/model",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
agent.api_mode = "chat_completions"
agent._interrupt_requested = False
agent._current_streamed_assistant_text = "Here's my answer so far"
import os as _os
_prev = _os.environ.get("HERMES_STREAM_RETRIES")
_os.environ["HERMES_STREAM_RETRIES"] = "2"
try:
response = agent._interruptible_streaming_api_call({})
finally:
if _prev is None:
_os.environ.pop("HERMES_STREAM_RETRIES", None)
else:
_os.environ["HERMES_STREAM_RETRIES"] = _prev
# Only one attempt: text-only stall short-circuits retry.
assert attempts["n"] == 1, (
f"Text-only stall should not silent-retry, got {attempts['n']} attempts"
)
content = response.choices[0].message.content or ""
assert content == "Here's my answer so far", (
f"Text-only stall regressed: {content!r}"
)
assert "Stream stalled" not in content, (
f"Text-only stall should not emit tool-call warning: {content!r}"
)