From 3438d274f6cefc4728bd2b3b00e7c6e75c11da68 Mon Sep 17 00:00:00 2001 From: Teknium Date: Fri, 17 Apr 2026 00:38:16 -0700 Subject: [PATCH] fix(dingtalk): repair _extract_text for dingtalk-stream >= 0.20 SDK shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The cherry-picked SDK compat fix (previous commit) wired process() to parse CallbackMessage.data into a ChatbotMessage, but _extract_text() was still written against the pre-0.20 payload shape: * message.text changed from dict {content: ...} → TextContent object. The old code's str(text) fallback produced 'TextContent(content=...)' as the agent's input, so every received message came in mangled. * rich_text moved from message.rich_text (list) to message.rich_text_content.rich_text_list. This preserves legacy fallbacks (dict-shaped text, bare rich_text list) while handling the current SDK layout via hasattr(text, 'content'). Adds regression tests covering: * webhook domain allowlist (api.*, oapi.*, and hostile lookalikes) * _IncomingHandler.process is a coroutine function * _extract_text against TextContent object, dict, rich_text_content, legacy rich_text, and empty-message cases Also adds kevinskysunny to scripts/release.py AUTHOR_MAP (release CI blocks unmapped emails). --- gateway/platforms/dingtalk.py | 37 +++++++--- scripts/release.py | 1 + tests/gateway/test_dingtalk.py | 130 +++++++++++++++++++++++++++++++++ 3 files changed, 158 insertions(+), 10 deletions(-) diff --git a/gateway/platforms/dingtalk.py b/gateway/platforms/dingtalk.py index 1abf3a3689..424c7af52b 100644 --- a/gateway/platforms/dingtalk.py +++ b/gateway/platforms/dingtalk.py @@ -238,18 +238,35 @@ class DingTalkAdapter(BasePlatformAdapter): @staticmethod def _extract_text(message: "ChatbotMessage") -> str: - """Extract plain text from a DingTalk chatbot message.""" - text = getattr(message, "text", None) or "" - if isinstance(text, dict): - content = text.get("content", "").strip() - else: - content = str(text).strip() + """Extract plain text from a DingTalk chatbot message. + + Handles both legacy and current dingtalk-stream SDK payload shapes: + * legacy: ``message.text`` was a dict ``{"content": "..."}`` + * >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose + ``__str__`` returns ``"TextContent(content=...)"`` — never fall + back to ``str(text)`` without extracting ``.content`` first. + * rich text moved from ``message.rich_text`` (list) to + ``message.rich_text_content.rich_text_list`` (list of dicts). + """ + text = getattr(message, "text", None) + content = "" + if text is not None: + if isinstance(text, dict): + content = (text.get("content") or "").strip() + elif hasattr(text, "content"): + content = str(text.content or "").strip() + else: + content = str(text).strip() - # Fall back to rich text if present if not content: - rich_text = getattr(message, "rich_text", None) - if rich_text and isinstance(rich_text, list): - parts = [item["text"] for item in rich_text + rich_list = None + rtc = getattr(message, "rich_text_content", None) + if rtc is not None and hasattr(rtc, "rich_text_list"): + rich_list = rtc.rich_text_list + if rich_list is None: + rich_list = getattr(message, "rich_text", None) + if rich_list and isinstance(rich_list, list): + parts = [item["text"] for item in rich_list if isinstance(item, dict) and item.get("text")] content = " ".join(parts).strip() return content diff --git a/scripts/release.py b/scripts/release.py index 88f7331b28..42baf5b7cc 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -82,6 +82,7 @@ AUTHOR_MAP = { "xaydinoktay@gmail.com": "aydnOktay", "abdullahfarukozden@gmail.com": "Farukest", "lovre.pesut@gmail.com": "rovle", + "kevinskysunny@gmail.com": "kevinskysunny", "hakanerten02@hotmail.com": "teyrebaz33", "ruzzgarcn@gmail.com": "Ruzzgar", "alireza78.crypto@gmail.com": "alireza78a", diff --git a/tests/gateway/test_dingtalk.py b/tests/gateway/test_dingtalk.py index 5271136502..94d809850c 100644 --- a/tests/gateway/test_dingtalk.py +++ b/tests/gateway/test_dingtalk.py @@ -273,3 +273,133 @@ class TestPlatformEnum: def test_dingtalk_in_platform_enum(self): assert Platform.DINGTALK.value == "dingtalk" + + +# --------------------------------------------------------------------------- +# SDK compatibility regression tests (dingtalk-stream >= 0.20 / 0.24) +# --------------------------------------------------------------------------- + + +class TestWebhookDomainAllowlist: + """Guard the webhook origin allowlist against regression. + + The SDK started returning reply webhooks on ``oapi.dingtalk.com`` in + addition to ``api.dingtalk.com``. Both must be accepted, and hostile + lookalikes must still be rejected (SSRF defence-in-depth). + """ + + def test_api_domain_accepted(self): + from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE + assert _DINGTALK_WEBHOOK_RE.match( + "https://api.dingtalk.com/robot/send?access_token=x" + ) + + def test_oapi_domain_accepted(self): + from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE + assert _DINGTALK_WEBHOOK_RE.match( + "https://oapi.dingtalk.com/robot/send?access_token=x" + ) + + def test_http_rejected(self): + from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE + assert not _DINGTALK_WEBHOOK_RE.match("http://api.dingtalk.com/robot/send") + + def test_suffix_attack_rejected(self): + from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE + assert not _DINGTALK_WEBHOOK_RE.match( + "https://api.dingtalk.com.evil.example/" + ) + + def test_unsanctioned_subdomain_rejected(self): + from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE + # Only api.* and oapi.* are allowed — e.g. eapi.dingtalk.com must not slip through + assert not _DINGTALK_WEBHOOK_RE.match("https://eapi.dingtalk.com/robot/send") + + +class TestHandlerProcessIsAsync: + """dingtalk-stream >= 0.20 requires ``process`` to be a coroutine.""" + + def test_process_is_coroutine_function(self): + from gateway.platforms.dingtalk import _IncomingHandler + assert asyncio.iscoroutinefunction(_IncomingHandler.process) + + +class TestExtractText: + """_extract_text must handle both legacy and current SDK payload shapes. + + Before SDK 0.20 ``message.text`` was a ``dict`` with a ``content`` key. + From 0.20 onward it is a ``TextContent`` dataclass whose ``__str__`` + returns ``"TextContent(content=...)"`` — falling back to ``str(text)`` + leaks that repr into the agent's input. + """ + + def test_text_as_dict_legacy(self): + from gateway.platforms.dingtalk import DingTalkAdapter + msg = MagicMock() + msg.text = {"content": "hello world"} + msg.rich_text_content = None + msg.rich_text = None + assert DingTalkAdapter._extract_text(msg) == "hello world" + + def test_text_as_textcontent_object(self): + """SDK >= 0.20 shape: object with ``.content`` attribute.""" + from gateway.platforms.dingtalk import DingTalkAdapter + + class FakeTextContent: + content = "hello from new sdk" + + def __str__(self): # mimic real SDK repr + return f"TextContent(content={self.content})" + + msg = MagicMock() + msg.text = FakeTextContent() + msg.rich_text_content = None + msg.rich_text = None + result = DingTalkAdapter._extract_text(msg) + assert result == "hello from new sdk" + assert "TextContent(" not in result + + def test_text_content_attr_with_empty_string(self): + from gateway.platforms.dingtalk import DingTalkAdapter + + class FakeTextContent: + content = "" + + msg = MagicMock() + msg.text = FakeTextContent() + msg.rich_text_content = None + msg.rich_text = None + assert DingTalkAdapter._extract_text(msg) == "" + + def test_rich_text_content_new_shape(self): + """SDK >= 0.20 exposes rich text as ``message.rich_text_content.rich_text_list``.""" + from gateway.platforms.dingtalk import DingTalkAdapter + + class FakeRichText: + rich_text_list = [{"text": "hello "}, {"text": "world"}] + + msg = MagicMock() + msg.text = None + msg.rich_text_content = FakeRichText() + msg.rich_text = None + result = DingTalkAdapter._extract_text(msg) + assert "hello" in result and "world" in result + + def test_rich_text_legacy_shape(self): + """Legacy ``message.rich_text`` list remains supported.""" + from gateway.platforms.dingtalk import DingTalkAdapter + msg = MagicMock() + msg.text = None + msg.rich_text_content = None + msg.rich_text = [{"text": "legacy "}, {"text": "rich"}] + result = DingTalkAdapter._extract_text(msg) + assert "legacy" in result and "rich" in result + + def test_empty_message(self): + from gateway.platforms.dingtalk import DingTalkAdapter + msg = MagicMock() + msg.text = None + msg.rich_text_content = None + msg.rich_text = None + assert DingTalkAdapter._extract_text(msg) == "" +