fix: recover from partial stream delivery instead of duplicating

When streaming fails after tokens are already delivered to the platform, the agent now attempts to continue the response: Option A: append partial content as an assistant message and make a non-streaming API call — the model sees its previous partial output and naturally continues from where it left off. Option B (fallback): if trailing assistant is rejected, inject a user 'continue' instruction and retry — explicitly asks the model to resume without repeating. Last resort: if both fail, return the partial content as the final response (user sees what was delivered, no duplicate). Tested with real Sonnet and Opus models via both Anthropic native API and OpenRouter — continuation works seamlessly on all providers. Also adds partial text accumulation to the Anthropic streaming path (previously only chat_completions tracked deltas_were_sent). Inspired by PR #4871 (@trevorgordon981) which identified the bug.
fix(gateway): add message deduplication to Discord and Slack adapters (#4777 )
2026-06-22 10:00:56 +08:00 · 2026-04-04 10:18:36 -07:00 · 2026-04-03 14:27:31 -07:00
3 changed files with 175 additions and 0 deletions
--- a/gateway/platforms/discord.py
+++ b/gateway/platforms/discord.py
@@ -449,6 +449,11 @@ class DiscordAdapter(BasePlatformAdapter):
        self._bot_task: Optional[asyncio.Task] = None
        # Cap to prevent unbounded growth (Discord threads get archived).
        self._MAX_TRACKED_THREADS = 500
+        # Dedup cache: message_id → timestamp.  Prevents duplicate bot
+        # responses when Discord RESUME replays events after reconnects.
+        self._seen_messages: Dict[str, float] = {}
+        self._SEEN_TTL = 300   # 5 minutes
+        self._SEEN_MAX = 2000  # prune threshold

    async def connect(self) -> bool:
        """Connect to Discord and start receiving events."""
@@ -539,6 +544,19 @@ class DiscordAdapter(BasePlatformAdapter):

            @self._client.event
            async def on_message(message: DiscordMessage):
+                # Dedup: Discord RESUME replays events after reconnects (#4777)
+                msg_id = str(message.id)
+                now = time.time()
+                if msg_id in adapter_self._seen_messages:
+                    return
+                adapter_self._seen_messages[msg_id] = now
+                if len(adapter_self._seen_messages) > adapter_self._SEEN_MAX:
+                    cutoff = now - adapter_self._SEEN_TTL
+                    adapter_self._seen_messages = {
+                        k: v for k, v in adapter_self._seen_messages.items()
+                        if v > cutoff
+                    }
+
                # Always ignore our own messages
                if message.author == self._client.user:
                    return
--- a/gateway/platforms/slack.py
+++ b/gateway/platforms/slack.py
@@ -13,6 +13,7 @@ import json
 import logging
 import os
 import re
+import time
 from typing import Dict, Optional, Any

 try:
@@ -78,6 +79,11 @@ class SlackAdapter(BasePlatformAdapter):
        self._team_clients: Dict[str, AsyncWebClient] = {}   # team_id → WebClient
        self._team_bot_user_ids: Dict[str, str] = {}          # team_id → bot_user_id
        self._channel_team: Dict[str, str] = {}                # channel_id → team_id
+        # Dedup cache: event_ts → timestamp.  Prevents duplicate bot
+        # responses when Socket Mode reconnects redeliver events.
+        self._seen_messages: Dict[str, float] = {}
+        self._SEEN_TTL = 300   # 5 minutes
+        self._SEEN_MAX = 2000  # prune threshold

    async def connect(self) -> bool:
        """Connect to Slack via Socket Mode."""
@@ -710,6 +716,20 @@ class SlackAdapter(BasePlatformAdapter):

    async def _handle_slack_message(self, event: dict) -> None:
        """Handle an incoming Slack message event."""
+        # Dedup: Slack Socket Mode can redeliver events after reconnects (#4777)
+        event_ts = event.get("ts", "")
+        if event_ts:
+            now = time.time()
+            if event_ts in self._seen_messages:
+                return
+            self._seen_messages[event_ts] = now
+            if len(self._seen_messages) > self._SEEN_MAX:
+                cutoff = now - self._SEEN_TTL
+                self._seen_messages = {
+                    k: v for k, v in self._seen_messages.items()
+                    if v > cutoff
+                }
+
        # Ignore bot messages (including our own)
        if event.get("bot_id") or event.get("subtype") == "bot_message":
            return
--- a/run_agent.py
+++ b/run_agent.py
@@ -4027,6 +4027,7 @@ class AIAgent:
        request_client_holder = {"client": None}
        first_delta_fired = {"done": False}
        deltas_were_sent = {"yes": False}  # Track if any deltas were fired (for fallback)
+        partial_streamed_text = {"content": ""}  # Accumulates text delivered to platform
        # Wall-clock timestamp of the last real streaming chunk.  The outer
        # poll loop uses this to detect stale connections that keep receiving
        # SSE keep-alive pings but no actual data.
@@ -4114,6 +4115,7 @@ class AIAgent:
                        _fire_first_delta()
                        self._fire_stream_delta(delta.content)
                        deltas_were_sent["yes"] = True
+                        partial_streamed_text["content"] += delta.content
                    else:
                        # Tool calls suppress regular content streaming (avoids
                        # displaying chatty "I'll use the tool..." text alongside
@@ -4264,6 +4266,8 @@ class AIAgent:
                                if text and not has_tool_use:
                                    _fire_first_delta()
                                    self._fire_stream_delta(text)
+                                    deltas_were_sent["yes"] = True
+                                    partial_streamed_text["content"] += text
                            elif delta_type == "thinking_delta":
                                thinking_text = getattr(delta, "thinking", "")
                                if thinking_text:
@@ -4473,6 +4477,14 @@ class AIAgent:
                    pass
                raise InterruptedError("Agent interrupted during streaming API call")
        if result["error"] is not None:
+            if deltas_were_sent["yes"] and partial_streamed_text["content"]:
+                # Streaming failed AFTER tokens were delivered to the platform.
+                # Attach the partial text so the outer retry loop can attempt
+                # continuation (Option A: trailing assistant, Option B: user
+                # "continue" prompt) instead of duplicating or losing content.
+                err = result["error"]
+                err._partial_content = partial_streamed_text["content"]
+                raise err
            raise result["error"]
        return result["response"]

@@ -7277,6 +7289,131 @@ class AIAgent:
                    break

                except Exception as api_error:
+                    # -----------------------------------------------------------
+                    # Partial stream recovery.  When streaming fails AFTER some
+                    # tokens were delivered to the user, we attempt to continue
+                    # the response rather than duplicate or lose content.
+                    #   Option A: append partial as assistant message, retry
+                    #   Option B: also inject user "continue" prompt (fallback)
+                    # -----------------------------------------------------------
+                    _partial = getattr(api_error, "_partial_content", None)
+                    if _partial and not getattr(self, "_partial_recovery_done", False):
+                        logger.warning(
+                            "%sStream interrupted after %d chars delivered. "
+                            "Attempting continuation (Option A)...",
+                            self.log_prefix, len(_partial),
+                        )
+                        self._emit_status(
+                            "⚡ Stream interrupted — attempting to continue..."
+                        )
+                        # Option A: trailing assistant message
+                        api_messages.append({"role": "assistant", "content": _partial})
+                        try:
+                            _cont_resp = self._interruptible_api_call(
+                                {**api_kwargs, "messages": api_messages}
+                            )
+                            # Merge: prepend partial to continuation
+                            _cont_text = ""
+                            if self.api_mode == "anthropic_messages":
+                                for _blk in getattr(_cont_resp, "content", []):
+                                    if getattr(_blk, "type", None) == "text":
+                                        _cont_text = getattr(_blk, "text", "")
+                                        break
+                            else:
+                                _cont_text = getattr(
+                                    _cont_resp.choices[0].message, "content", ""
+                                ) or ""
+                            if _cont_text:
+                                # Fire the continuation to the platform stream
+                                self._fire_stream_delta(_cont_text)
+                            self._partial_recovery_done = True
+                            # Build a merged response for the agent loop
+                            _merged = _partial + _cont_text
+                            _merged_msg = SimpleNamespace(
+                                role="assistant", content=_merged,
+                                tool_calls=None, reasoning_content=None,
+                            )
+                            response = SimpleNamespace(
+                                id="partial-recovery",
+                                model=getattr(_cont_resp, "model", self.model),
+                                choices=[SimpleNamespace(
+                                    index=0, message=_merged_msg,
+                                    finish_reason="stop",
+                                )],
+                                usage=getattr(_cont_resp, "usage", None),
+                            )
+                            break  # Success — exit retry loop
+                        except Exception as opt_a_err:
+                            logger.warning(
+                                "%sOption A failed (%s), trying Option B...",
+                                self.log_prefix, opt_a_err,
+                            )
+                            # Option B: add user "continue" instruction
+                            api_messages.append({
+                                "role": "user",
+                                "content": (
+                                    "Your response was cut off mid-sentence "
+                                    "due to a connection error. Continue from "
+                                    "exactly where you stopped — do not repeat "
+                                    "what you already said."
+                                ),
+                            })
+                            try:
+                                _cont_resp_b = self._interruptible_api_call(
+                                    {**api_kwargs, "messages": api_messages}
+                                )
+                                _cont_text_b = ""
+                                if self.api_mode == "anthropic_messages":
+                                    for _blk in getattr(_cont_resp_b, "content", []):
+                                        if getattr(_blk, "type", None) == "text":
+                                            _cont_text_b = getattr(_blk, "text", "")
+                                            break
+                                else:
+                                    _cont_text_b = getattr(
+                                        _cont_resp_b.choices[0].message,
+                                        "content", "",
+                                    ) or ""
+                                if _cont_text_b:
+                                    self._fire_stream_delta(_cont_text_b)
+                                self._partial_recovery_done = True
+                                _merged_b = _partial + _cont_text_b
+                                _merged_msg_b = SimpleNamespace(
+                                    role="assistant", content=_merged_b,
+                                    tool_calls=None, reasoning_content=None,
+                                )
+                                response = SimpleNamespace(
+                                    id="partial-recovery-b",
+                                    model=getattr(_cont_resp_b, "model", self.model),
+                                    choices=[SimpleNamespace(
+                                        index=0, message=_merged_msg_b,
+                                        finish_reason="stop",
+                                    )],
+                                    usage=getattr(_cont_resp_b, "usage", None),
+                                )
+                                break  # Success via Option B
+                            except Exception as opt_b_err:
+                                logger.warning(
+                                    "%sBoth recovery options failed. "
+                                    "Returning partial content as final response.",
+                                    self.log_prefix,
+                                )
+                                # Last resort: return what we have
+                                _partial_msg = SimpleNamespace(
+                                    role="assistant", content=_partial,
+                                    tool_calls=None, reasoning_content=None,
+                                )
+                                response = SimpleNamespace(
+                                    id="partial-only",
+                                    model=getattr(self, "model", "unknown"),
+                                    choices=[SimpleNamespace(
+                                        index=0, message=_partial_msg,
+                                        finish_reason="stop",
+                                    )],
+                                    usage=None,
+                                )
+                                self._partial_recovery_done = True
+                                break
+
                    # Stop spinner before printing error messages
                    if thinking_spinner:
                        thinking_spinner.stop("(╥_╥) error, retrying...")