From 25080986a03efcbae11d4ea60117d32fc9450c5f Mon Sep 17 00:00:00 2001 From: Marc Bickel Date: Tue, 7 Apr 2026 13:46:59 +0200 Subject: [PATCH] fix(gateway): discard empty placeholder when voice transcription succeeds MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a Discord voice message arrives, the adapter sets event.text to "(The user sent a message with no text content)" since voice messages have no text content. The transcription enrichment in _enrich_message_with_transcription() then prepends the transcript but leaves the placeholder intact, causing the agent to receive both: [The user sent a voice message~ Here's what they said: "..."] (The user sent a message with no text content) The agent sees this as two separate user turns — one transcribed and one empty — creating confusing duplicate messages. Fix: when the transcription succeeds and user_text is only the empty placeholder, return just the transcript without the redundant placeholder. --- gateway/run.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index 68027f28bc4..99c71d9156c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -6044,6 +6044,11 @@ class GatewayRunner: if enriched_parts: prefix = "\n\n".join(enriched_parts) + # Strip the empty-content placeholder from the Discord adapter + # when we successfully transcribed the audio — it's redundant. + _placeholder = "(The user sent a message with no text content)" + if user_text and user_text.strip() == _placeholder: + return prefix if user_text: return f"{prefix}\n\n{user_text}" return prefix