mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(dingtalk): repair _extract_text for dingtalk-stream >= 0.20 SDK shape
The cherry-picked SDK compat fix (previous commit) wired process() to
parse CallbackMessage.data into a ChatbotMessage, but _extract_text()
was still written against the pre-0.20 payload shape:
* message.text changed from dict {content: ...} → TextContent object.
The old code's str(text) fallback produced 'TextContent(content=...)'
as the agent's input, so every received message came in mangled.
* rich_text moved from message.rich_text (list) to
message.rich_text_content.rich_text_list.
This preserves legacy fallbacks (dict-shaped text, bare rich_text list)
while handling the current SDK layout via hasattr(text, 'content').
Adds regression tests covering:
* webhook domain allowlist (api.*, oapi.*, and hostile lookalikes)
* _IncomingHandler.process is a coroutine function
* _extract_text against TextContent object, dict, rich_text_content,
legacy rich_text, and empty-message cases
Also adds kevinskysunny to scripts/release.py AUTHOR_MAP (release CI
blocks unmapped emails).
This commit is contained in:
@@ -238,18 +238,35 @@ class DingTalkAdapter(BasePlatformAdapter):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _extract_text(message: "ChatbotMessage") -> str:
|
def _extract_text(message: "ChatbotMessage") -> str:
|
||||||
"""Extract plain text from a DingTalk chatbot message."""
|
"""Extract plain text from a DingTalk chatbot message.
|
||||||
text = getattr(message, "text", None) or ""
|
|
||||||
if isinstance(text, dict):
|
Handles both legacy and current dingtalk-stream SDK payload shapes:
|
||||||
content = text.get("content", "").strip()
|
* legacy: ``message.text`` was a dict ``{"content": "..."}``
|
||||||
else:
|
* >= 0.20: ``message.text`` is a ``TextContent`` dataclass whose
|
||||||
content = str(text).strip()
|
``__str__`` returns ``"TextContent(content=...)"`` — never fall
|
||||||
|
back to ``str(text)`` without extracting ``.content`` first.
|
||||||
|
* rich text moved from ``message.rich_text`` (list) to
|
||||||
|
``message.rich_text_content.rich_text_list`` (list of dicts).
|
||||||
|
"""
|
||||||
|
text = getattr(message, "text", None)
|
||||||
|
content = ""
|
||||||
|
if text is not None:
|
||||||
|
if isinstance(text, dict):
|
||||||
|
content = (text.get("content") or "").strip()
|
||||||
|
elif hasattr(text, "content"):
|
||||||
|
content = str(text.content or "").strip()
|
||||||
|
else:
|
||||||
|
content = str(text).strip()
|
||||||
|
|
||||||
# Fall back to rich text if present
|
|
||||||
if not content:
|
if not content:
|
||||||
rich_text = getattr(message, "rich_text", None)
|
rich_list = None
|
||||||
if rich_text and isinstance(rich_text, list):
|
rtc = getattr(message, "rich_text_content", None)
|
||||||
parts = [item["text"] for item in rich_text
|
if rtc is not None and hasattr(rtc, "rich_text_list"):
|
||||||
|
rich_list = rtc.rich_text_list
|
||||||
|
if rich_list is None:
|
||||||
|
rich_list = getattr(message, "rich_text", None)
|
||||||
|
if rich_list and isinstance(rich_list, list):
|
||||||
|
parts = [item["text"] for item in rich_list
|
||||||
if isinstance(item, dict) and item.get("text")]
|
if isinstance(item, dict) and item.get("text")]
|
||||||
content = " ".join(parts).strip()
|
content = " ".join(parts).strip()
|
||||||
return content
|
return content
|
||||||
|
|||||||
@@ -82,6 +82,7 @@ AUTHOR_MAP = {
|
|||||||
"xaydinoktay@gmail.com": "aydnOktay",
|
"xaydinoktay@gmail.com": "aydnOktay",
|
||||||
"abdullahfarukozden@gmail.com": "Farukest",
|
"abdullahfarukozden@gmail.com": "Farukest",
|
||||||
"lovre.pesut@gmail.com": "rovle",
|
"lovre.pesut@gmail.com": "rovle",
|
||||||
|
"kevinskysunny@gmail.com": "kevinskysunny",
|
||||||
"hakanerten02@hotmail.com": "teyrebaz33",
|
"hakanerten02@hotmail.com": "teyrebaz33",
|
||||||
"ruzzgarcn@gmail.com": "Ruzzgar",
|
"ruzzgarcn@gmail.com": "Ruzzgar",
|
||||||
"alireza78.crypto@gmail.com": "alireza78a",
|
"alireza78.crypto@gmail.com": "alireza78a",
|
||||||
|
|||||||
@@ -273,3 +273,133 @@ class TestPlatformEnum:
|
|||||||
|
|
||||||
def test_dingtalk_in_platform_enum(self):
|
def test_dingtalk_in_platform_enum(self):
|
||||||
assert Platform.DINGTALK.value == "dingtalk"
|
assert Platform.DINGTALK.value == "dingtalk"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SDK compatibility regression tests (dingtalk-stream >= 0.20 / 0.24)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class TestWebhookDomainAllowlist:
|
||||||
|
"""Guard the webhook origin allowlist against regression.
|
||||||
|
|
||||||
|
The SDK started returning reply webhooks on ``oapi.dingtalk.com`` in
|
||||||
|
addition to ``api.dingtalk.com``. Both must be accepted, and hostile
|
||||||
|
lookalikes must still be rejected (SSRF defence-in-depth).
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_api_domain_accepted(self):
|
||||||
|
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||||
|
assert _DINGTALK_WEBHOOK_RE.match(
|
||||||
|
"https://api.dingtalk.com/robot/send?access_token=x"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_oapi_domain_accepted(self):
|
||||||
|
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||||
|
assert _DINGTALK_WEBHOOK_RE.match(
|
||||||
|
"https://oapi.dingtalk.com/robot/send?access_token=x"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_http_rejected(self):
|
||||||
|
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||||
|
assert not _DINGTALK_WEBHOOK_RE.match("http://api.dingtalk.com/robot/send")
|
||||||
|
|
||||||
|
def test_suffix_attack_rejected(self):
|
||||||
|
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||||
|
assert not _DINGTALK_WEBHOOK_RE.match(
|
||||||
|
"https://api.dingtalk.com.evil.example/"
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_unsanctioned_subdomain_rejected(self):
|
||||||
|
from gateway.platforms.dingtalk import _DINGTALK_WEBHOOK_RE
|
||||||
|
# Only api.* and oapi.* are allowed — e.g. eapi.dingtalk.com must not slip through
|
||||||
|
assert not _DINGTALK_WEBHOOK_RE.match("https://eapi.dingtalk.com/robot/send")
|
||||||
|
|
||||||
|
|
||||||
|
class TestHandlerProcessIsAsync:
|
||||||
|
"""dingtalk-stream >= 0.20 requires ``process`` to be a coroutine."""
|
||||||
|
|
||||||
|
def test_process_is_coroutine_function(self):
|
||||||
|
from gateway.platforms.dingtalk import _IncomingHandler
|
||||||
|
assert asyncio.iscoroutinefunction(_IncomingHandler.process)
|
||||||
|
|
||||||
|
|
||||||
|
class TestExtractText:
|
||||||
|
"""_extract_text must handle both legacy and current SDK payload shapes.
|
||||||
|
|
||||||
|
Before SDK 0.20 ``message.text`` was a ``dict`` with a ``content`` key.
|
||||||
|
From 0.20 onward it is a ``TextContent`` dataclass whose ``__str__``
|
||||||
|
returns ``"TextContent(content=...)"`` — falling back to ``str(text)``
|
||||||
|
leaks that repr into the agent's input.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def test_text_as_dict_legacy(self):
|
||||||
|
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||||
|
msg = MagicMock()
|
||||||
|
msg.text = {"content": "hello world"}
|
||||||
|
msg.rich_text_content = None
|
||||||
|
msg.rich_text = None
|
||||||
|
assert DingTalkAdapter._extract_text(msg) == "hello world"
|
||||||
|
|
||||||
|
def test_text_as_textcontent_object(self):
|
||||||
|
"""SDK >= 0.20 shape: object with ``.content`` attribute."""
|
||||||
|
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||||
|
|
||||||
|
class FakeTextContent:
|
||||||
|
content = "hello from new sdk"
|
||||||
|
|
||||||
|
def __str__(self): # mimic real SDK repr
|
||||||
|
return f"TextContent(content={self.content})"
|
||||||
|
|
||||||
|
msg = MagicMock()
|
||||||
|
msg.text = FakeTextContent()
|
||||||
|
msg.rich_text_content = None
|
||||||
|
msg.rich_text = None
|
||||||
|
result = DingTalkAdapter._extract_text(msg)
|
||||||
|
assert result == "hello from new sdk"
|
||||||
|
assert "TextContent(" not in result
|
||||||
|
|
||||||
|
def test_text_content_attr_with_empty_string(self):
|
||||||
|
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||||
|
|
||||||
|
class FakeTextContent:
|
||||||
|
content = ""
|
||||||
|
|
||||||
|
msg = MagicMock()
|
||||||
|
msg.text = FakeTextContent()
|
||||||
|
msg.rich_text_content = None
|
||||||
|
msg.rich_text = None
|
||||||
|
assert DingTalkAdapter._extract_text(msg) == ""
|
||||||
|
|
||||||
|
def test_rich_text_content_new_shape(self):
|
||||||
|
"""SDK >= 0.20 exposes rich text as ``message.rich_text_content.rich_text_list``."""
|
||||||
|
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||||
|
|
||||||
|
class FakeRichText:
|
||||||
|
rich_text_list = [{"text": "hello "}, {"text": "world"}]
|
||||||
|
|
||||||
|
msg = MagicMock()
|
||||||
|
msg.text = None
|
||||||
|
msg.rich_text_content = FakeRichText()
|
||||||
|
msg.rich_text = None
|
||||||
|
result = DingTalkAdapter._extract_text(msg)
|
||||||
|
assert "hello" in result and "world" in result
|
||||||
|
|
||||||
|
def test_rich_text_legacy_shape(self):
|
||||||
|
"""Legacy ``message.rich_text`` list remains supported."""
|
||||||
|
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||||
|
msg = MagicMock()
|
||||||
|
msg.text = None
|
||||||
|
msg.rich_text_content = None
|
||||||
|
msg.rich_text = [{"text": "legacy "}, {"text": "rich"}]
|
||||||
|
result = DingTalkAdapter._extract_text(msg)
|
||||||
|
assert "legacy" in result and "rich" in result
|
||||||
|
|
||||||
|
def test_empty_message(self):
|
||||||
|
from gateway.platforms.dingtalk import DingTalkAdapter
|
||||||
|
msg = MagicMock()
|
||||||
|
msg.text = None
|
||||||
|
msg.rich_text_content = None
|
||||||
|
msg.rich_text = None
|
||||||
|
assert DingTalkAdapter._extract_text(msg) == ""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user