fix(clarify): unwrap dict choices at the source so every surface gets clean text

The Discord fix (previous commit) handles dict-shaped clarify choices at the Discord adapter only. The same dict-repr leak originates upstream at tools/clarify_tool.py's str(c).strip() normalization — the single platform-agnostic point both the CLI and every gateway adapter flow through. When an LLM emits [{"description": "..."}] instead of bare strings, str(c) produced {'description': '...'} which leaked onto the CLI panel (cli.py:13048/13081), was returned verbatim as the user's answer (cli.py:11945), and hit Telegram's numbered list too. Add _flatten_choice (same label->description->text->title unwrap as the Discord adapter, name/value excluded, keyless dicts dropped) and apply it at the normalization line. Fixes CLI + Telegram + all platforms at the root; the Discord smart-truncation now operates on already-clean text. Adds johnjacobkenny to AUTHOR_MAP for the salvaged commit.
fix(discord): unwrap dict choices + soft-boundary truncate clarify buttons
2026-06-19 16:40:38 +08:00 · 2026-06-18 22:16:57 -07:00 · 2026-06-18 22:14:29 -07:00
5 changed files with 358 additions and 7 deletions
--- a/plugins/platforms/discord/adapter.py
+++ b/plugins/platforms/discord/adapter.py
@@ -4566,6 +4566,13 @@ class DiscordAdapter(BasePlatformAdapter):
        Open-ended mode (``choices`` empty/None): renders the question as
        plain embed text — no buttons. The gateway's text-intercept captures
        the next message in this session and resolves the clarify.
+
+        Choice normalisation: ``choices`` may contain bare strings OR dicts
+        (LLMs sometimes emit ``[{"description": "..."}]`` instead of bare
+        strings, which would otherwise render as raw Python repr on the
+        button label). Dict choices are unwrapped against the canonical
+        LLM tool-call keys ``label``, ``description``, ``text``, ``title``
+        in that order. Dicts with none of those keys are dropped.
        """
        if not self._client or not DISCORD_AVAILABLE:
            return SendResult(success=False, error="Not connected")
@@ -4591,8 +4598,37 @@ class DiscordAdapter(BasePlatformAdapter):
                color=discord.Color.orange(),
            )

+            # Normalise choices: LLMs sometimes emit `[{"description": "..."}]`
+            # instead of bare strings, which would render as raw Python repr on
+            # the button label. Unwrap the common shapes, then stringify.
+            def _flatten_choice(c):
+                if c is None:
+                    return ""
+                if isinstance(c, str):
+                    return c.strip()
+                if isinstance(c, dict):
+                    # Prefer the canonical LLM tool-call user-facing keys
+                    # in the order the LLM is most likely to emit them.
+                    # 'name' and 'value' are deliberately NOT here: they're
+                    # Discord-component-shaped fields that could appear in
+                    # dicts that aren't meant to be choices (e.g., a
+                    # developer-error wiring that passes a Button-shaped
+                    # object). Picking them would leak raw enum values
+                    # or 4-char model identifiers onto user-facing buttons.
+                    # If a dict has none of the canonical keys, drop it
+                    # rather than picking some random field — a garbage
+                    # button label is worse than no button at all.
+                    for key in ("label", "description", "text", "title"):
+                        v = c.get(key)
+                        if isinstance(v, str) and v.strip():
+                            return v.strip()
+                    return ""
+                if isinstance(c, (list, tuple)):
+                    return " ".join(_flatten_choice(x) for x in c).strip()
+                return str(c).strip()
+
            clean_choices = [
-                str(c).strip() for c in (choices or []) if c is not None and str(c).strip()
+                s for s in (_flatten_choice(c) for c in (choices or [])) if s
            ]
            # Discord allows up to 5 buttons per row, 5 rows per view = 25.
            # We reserve one slot for the "Other" button, so cap at 24 choices.
@@ -6129,10 +6165,47 @@ def _define_discord_view_classes() -> None:
            self.resolved = False

            for index, choice in enumerate(self.choices):
-                # Discord button labels are capped at 80 chars.
-                label_body = choice if len(choice) <= 75 else choice[:72] + "..."
+                # Discord button labels are capped at 80 chars. On mobile the
+                # visible width is much narrower (often <40 chars before it
+                # wraps to 2 lines and the second line gets cut off), so we
+                # cap aggressively and cut at a word boundary when possible
+                # to keep the trailing text readable.
+                #
+                # Cut strategy (most-preferred to least-preferred):
+                #   1. Last space in the trailing half of the budget
+                #      (cleanest word boundary)
+                #   2. Last soft boundary in the trailing half of the
+                #      budget (hyphen, comma, period, paren)
+                #   3. Hard cut at the budget limit (last resort)
+                prefix = f"{index + 1}. "
+                budget = 80 - len(prefix)
+                if len(choice) <= budget:
+                    label_body = choice
+                else:
+                    truncated = choice[: budget - 1].rstrip()
+                    cut_at = -1
+                    # 1. Last space in the trailing half of the budget.
+                    space = truncated.rfind(" ")
+                    if space >= budget // 2:
+                        cut_at = space
+                    # 2. Soft boundary — only if no word boundary found.
+                    # Find the latest soft boundary in the trailing half
+                    # of the budget; that maximizes preserved text length.
+                    # Cut AT the soft boundary (inclusive) so the label
+                    # ends on the soft char (e.g. "-" or ",") rather than
+                    # on the alpha char that followed it.
+                    if cut_at < 0:
+                        latest_soft = max(
+                            (truncated.rfind(s) for s in ("-", ",", ".", ")")),
+                            default=-1,
+                        )
+                        if latest_soft >= budget // 2:
+                            cut_at = latest_soft + 1
+                    if cut_at > 0:
+                        truncated = truncated[:cut_at]
+                    label_body = truncated.rstrip() + "…"
                button = discord.ui.Button(
-                    label=f"{index + 1}. {label_body}",
+                    label=f"{prefix}{label_body}",
                    style=discord.ButtonStyle.primary,
                    custom_id=f"clarify:{clarify_id}:{index}",
                )
--- a/scripts/release.py
+++ b/scripts/release.py
@@ -103,6 +103,7 @@ AUTHOR_MAP = {
    "290859878+synapsesx@users.noreply.github.com": "synapsesx",
    "157689911+itsflownium@users.noreply.github.com": "itsflownium",
    "dirtyren@users.noreply.github.com": "dirtyren",
+    "johnjacobkenny@users.noreply.github.com": "johnjacobkenny",
    "chanyoung.kim@nota.ai": "channkim",
    "stevenn.damatoo@gmail.com": "x1erra",
    "evansrory@gmail.com": "zimigit2020",
--- a/tests/gateway/test_discord_clarify_buttons.py
+++ b/tests/gateway/test_discord_clarify_buttons.py
@@ -122,13 +122,56 @@ class TestClarifyChoiceViewConstruction:
            clarify_id="cidZ",
            allowed_user_ids=set(),
        )
-        # 75 chars + 3 ellipsis chars in the body, plus "1. " prefix
+        # 78 chars + single-char ellipsis in the body, plus "1. " prefix.
+        # Uses U+2026 (…) instead of "..." to fit the 80-char Discord cap.
        first_label = view.children[0].label
        assert first_label.startswith("1. ")
-        assert first_label.endswith("...")
+        assert first_label.endswith("\u2026")
        # Final label total <= 80 (Discord cap on button labels)
        assert len(first_label) <= 80

+    def test_truncates_long_choice_label_breaks_on_word_boundary(self):
+        # Long choice with spaces — should cut at the last whole word so the
+        # trailing text stays readable on Discord mobile.
+        long_choice = (
+            "Tight, well-illustrated, covers all 3 audiences "
+            "(patients, families, curious general readers)"
+        )
+        view = ClarifyChoiceView(
+            choices=[long_choice],
+            clarify_id="cidW",
+            allowed_user_ids=set(),
+        )
+        first_label = view.children[0].label
+        assert first_label.startswith("1. ")
+        assert first_label.endswith("\u2026")
+        # No mid-word fragment before the ellipsis.
+        assert not first_label.rstrip("\u2026").endswith("(")
+
+    def test_truncates_long_no_space_choice_on_soft_boundary(self):
+        # A long choice with soft boundaries (commas, hyphens) but no spaces
+        # should still cut on a soft boundary, not mid-word. We use an input
+        # where position 76 is NOT a soft boundary — the test only passes
+        # if the renderer actively searches backward for a soft char
+        # rather than blindly cutting at the budget limit.
+        long_choice = "a" * 30 + "-" + "b" * 30 + "-" + "c" * 30 + "-" + "d" * 30
+        # 30a-30b-30c-30d = 30 + 1 + 30 + 1 + 30 + 1 + 30 = 123 chars
+        # Position 76 is 'b' (a mid-word alpha). The renderer must look back
+        # for a '-' to cut on.
+        view = ClarifyChoiceView(
+            choices=[long_choice],
+            clarify_id="cidSB",
+            allowed_user_ids=set(),
+        )
+        first_label = view.children[0].label
+        assert first_label.endswith("\u2026")
+        assert len(first_label) <= 80
+        body = first_label[len("1. "):].rstrip("\u2026")
+        last_char = body[-1]
+        assert last_char in {"-", ",", ".", ")", " "}, (
+            f"Label cuts mid-word at {last_char!r}: {first_label!r}"
+        )
+

 # ===========================================================================
 # Choice callback → resolve_gateway_clarify
@@ -404,3 +447,134 @@ class TestDiscordSendClarify:
        # Only 1 real choice + 1 Other = 2 children
        assert len(view.children) == 2
        assert "real-choice" in view.children[0].label
+
+    @pytest.mark.asyncio
+    async def test_unwraps_dict_choices_to_description(self):
+        # LLMs sometimes emit [{"description": "..."}] instead of bare strings
+        # — the renderer must unwrap common dict shapes, not str() the whole
+        # dict into a Python repr on the button label.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 555
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        malformed = [
+            {"description": "Tight, well-illustrated"},
+            {"label": "Use label key"},
+            {"text": "Use text key"},
+            "normal-string",  # strings still pass through
+        ]
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=malformed,
+            clarify_id="cidU",
+            session_key="sk-U",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        labels = [b.label for b in view.children[:-1]]  # exclude Other
+        # No raw Python repr should leak onto any label.
+        for label in labels:
+            assert "{'" not in label
+            assert "':" not in label
+        # Each dict unwrapped to its inner string.
+        assert any("Tight, well-illustrated" in lbl for lbl in labels)
+        assert any("Use label key" in lbl for lbl in labels)
+        assert any("Use text key" in lbl for lbl in labels)
+        assert any("normal-string" in lbl for lbl in labels)
+
+    @pytest.mark.asyncio
+    async def test_unwrap_prefers_description_over_name_in_multi_key_dict(self):
+        # When the LLM emits both 'name' (often a short identifier in
+        # OpenAI-style tool calls) and 'description' (the user-facing text),
+        # the renderer must surface 'description'. The user should never see
+        # a 4-char model identifier on a button label.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 666
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[{"name": "tight", "description": "Tight, well-illustrated"}],
+            clarify_id="cidN",
+            session_key="sk-N",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_label = view.children[0].label
+        assert "Tight, well-illustrated" in choice_label
+        # The 'name' value (a short identifier) must NOT have leaked.
+        body = choice_label.split("1. ", 1)[1].rstrip("\u2026")
+        assert "tight" not in body, f"'name' leaked onto button: {choice_label!r}"
+
+    @pytest.mark.asyncio
+    async def test_unwrap_prefers_label_over_description(self):
+        # When both 'label' and 'description' are present, 'label' wins.
+        # 'label' is the canonical short user-facing text in most LLM tool
+        # conventions; 'description' is the longer explanation.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 777
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[{"label": "Short", "description": "Long verbose explanation"}],
+            clarify_id="cidL",
+            session_key="sk-L",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_label = view.children[0].label
+        assert "Short" in choice_label
+        # The longer description must NOT have leaked.
+        assert "Long verbose" not in choice_label, (
+            f"'description' leaked over 'label': {choice_label!r}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_unwrap_does_not_pick_value_or_name_alone(self):
+        # 'name' and 'value' are Discord-component-shaped fields that could
+        # accidentally appear in dicts not intended as choices (e.g., a
+        # developer-error in the gateway wiring). The renderer should not
+        # surface them as button labels — only the well-known LLM tool-call
+        # keys (label, description, text, title) should win.
+        adapter = _make_adapter()
+        channel = MagicMock()
+        sent_msg = MagicMock()
+        sent_msg.id = 888
+        channel.send = AsyncMock(return_value=sent_msg)
+        adapter._client.get_channel = MagicMock(return_value=channel)
+
+        await adapter.send_clarify(
+            chat_id="9001",
+            question="?",
+            choices=[
+                {"name": "only_name_here"},   # should be filtered out
+                {"value": "only_value_here"},  # should be filtered out
+                {"description": "real choice"},
+            ],
+            clarify_id="cidNV",
+            session_key="sk-NV",
+        )
+        kwargs = channel.send.call_args.kwargs
+        view = kwargs["view"]
+        choice_labels = [b.label for b in view.children[:-1]]  # exclude Other
+        # Only the well-formed dict survives.
+        assert len(choice_labels) == 1, (
+            f"Expected 1 choice, got {len(choice_labels)}: {choice_labels!r}"
+        )
+        assert "real choice" in choice_labels[0]
+        for label in choice_labels:
+            assert "only_name_here" not in label, f"name leaked: {label!r}"
+            assert "only_value_here" not in label, f"value leaked: {label!r}"
--- a/tests/tools/test_clarify_tool.py
+++ b/tests/tools/test_clarify_tool.py
@@ -9,6 +9,7 @@ from tools.clarify_tool import (
    check_clarify_requirements,
    MAX_CHOICES,
    CLARIFY_SCHEMA,
+    _flatten_choice,
 )


@@ -164,6 +165,70 @@ class TestCheckClarifyRequirements:
        assert check_clarify_requirements() is True


+class TestClarifyDictChoices:
+    """Dict-shaped choices must be unwrapped to user-facing text at the source.
+
+    LLMs sometimes emit [{"description": "..."}] instead of bare strings. The
+    naive str(c) coercion leaked the Python dict repr onto every surface (CLI
+    panel, Discord buttons, Telegram list) AND returned it verbatim as the
+    user's answer. _flatten_choice normalises at the one platform-agnostic
+    entry point so the whole class is fixed in one place.
+    """
+
+    def test_flatten_unwraps_label_first(self):
+        assert _flatten_choice({"label": "Short", "description": "Long"}) == "Short"
+
+    def test_flatten_unwraps_description_when_no_label(self):
+        assert _flatten_choice({"description": "A loose layout"}) == "A loose layout"
+
+    def test_flatten_unwrap_order_label_over_description(self):
+        assert _flatten_choice({"description": "verbose", "label": "tight"}) == "tight"
+
+    def test_flatten_drops_name_value_only_dict(self):
+        # name/value are component-shaped fields, not user-facing labels —
+        # picking them would leak raw enum values / short model ids.
+        assert _flatten_choice({"name": "tight", "value": "x"}) == ""
+
+    def test_flatten_prefers_canonical_key_over_name(self):
+        assert _flatten_choice({"name": "tight", "description": "Tight desc"}) == "Tight desc"
+
+    def test_flatten_drops_keyless_dict(self):
+        assert _flatten_choice({"foo": "bar", "n": 1}) == ""
+
+    def test_flatten_passthrough_string_and_scalar(self):
+        assert _flatten_choice("plain") == "plain"
+        assert _flatten_choice(7) == "7"
+        assert _flatten_choice(None) == ""
+
+    def test_dict_choices_reach_callback_as_clean_text(self):
+        """The whole point: the UI callback never sees a dict repr."""
+        seen = []
+
+        def cb(question, choices):
+            seen.extend(choices or [])
+            return choices[0]
+
+        result = json.loads(clarify_tool(
+            "Pick a layout",
+            choices=[
+                {"choice": "Tight", "description": "Tight, covers all 3 points"},
+                {"description": "Loose layout"},
+                {"name": "modelid", "value": "abc"},  # dropped, not leaked
+                "A plain string choice",
+            ],
+            callback=cb,
+        ))  # type: ignore
+        assert seen == [
+            "Tight, covers all 3 points",
+            "Loose layout",
+            "A plain string choice",
+        ]
+        # and the resolved answer is clean text, not a dict repr
+        assert result["user_response"] == "Tight, covers all 3 points"
+        assert "{" not in result["user_response"]
+        assert all("{" not in c for c in result["choices_offered"])
+
+
 class TestClarifySchema:
    """Tests for the OpenAI function-calling schema."""

--- a/tools/clarify_tool.py
+++ b/tools/clarify_tool.py
@@ -20,6 +20,39 @@ from typing import List, Optional, Callable
 MAX_CHOICES = 4


+def _flatten_choice(c) -> str:
+    """Coerce a single choice into its user-facing display string.
+
+    The schema declares choices as bare strings, but LLMs sometimes emit
+    dict-shaped choices like ``[{"description": "..."}]``. A naive ``str(c)``
+    turns the whole dict into its Python repr — ``{'description': '...'}`` —
+    which then leaks onto every surface that renders the choice (CLI panel,
+    Discord buttons, Telegram numbered list) AND is returned verbatim as the
+    user's answer. Normalising here, at the one platform-agnostic entry point,
+    fixes the whole class in one place instead of per-adapter.
+
+    Dict unwrap order is the canonical LLM tool-call user-facing keys:
+    ``label`` → ``description`` → ``text`` → ``title``. ``name`` and ``value``
+    are deliberately excluded — they're component-shaped fields that could
+    carry raw enum values or short identifiers, not human-readable labels. A
+    dict with none of the canonical keys is dropped (returns ""), since a
+    garbage label is worse than no choice at all.
+    """
+    if c is None:
+        return ""
+    if isinstance(c, str):
+        return c.strip()
+    if isinstance(c, dict):
+        for key in ("label", "description", "text", "title"):
+            v = c.get(key)
+            if isinstance(v, str) and v.strip():
+                return v.strip()
+        return ""
+    if isinstance(c, (list, tuple)):
+        return " ".join(_flatten_choice(x) for x in c).strip()
+    return str(c).strip()
+
+
 def clarify_tool(
    question: str,
    choices: Optional[List[str]] = None,
@@ -48,7 +81,12 @@ def clarify_tool(
    if choices is not None:
        if not isinstance(choices, list):
            return tool_error("choices must be a list of strings.")
-        choices = [str(c).strip() for c in choices if str(c).strip()]
+        # LLMs sometimes emit dict-shaped choices (e.g. [{"description": "..."}])
+        # instead of bare strings. _flatten_choice unwraps them to their
+        # user-facing text here — the single platform-agnostic entry point —
+        # so the CLI panel, Discord buttons, and Telegram list all render clean
+        # text and the resolved answer is never a raw Python dict repr.
+        choices = [s for s in (_flatten_choice(c) for c in choices) if s]
        if len(choices) > MAX_CHOICES:
            choices = choices[:MAX_CHOICES]
        if not choices: