mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-19 16:40:38 +08:00
Compare commits
2 Commits
dependabot
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
384e41856a | ||
|
|
6ef7555f29 |
@@ -4566,6 +4566,13 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
Open-ended mode (``choices`` empty/None): renders the question as
|
||||
plain embed text — no buttons. The gateway's text-intercept captures
|
||||
the next message in this session and resolves the clarify.
|
||||
|
||||
Choice normalisation: ``choices`` may contain bare strings OR dicts
|
||||
(LLMs sometimes emit ``[{"description": "..."}]`` instead of bare
|
||||
strings, which would otherwise render as raw Python repr on the
|
||||
button label). Dict choices are unwrapped against the canonical
|
||||
LLM tool-call keys ``label``, ``description``, ``text``, ``title``
|
||||
in that order. Dicts with none of those keys are dropped.
|
||||
"""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
@@ -4591,8 +4598,37 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
color=discord.Color.orange(),
|
||||
)
|
||||
|
||||
# Normalise choices: LLMs sometimes emit `[{"description": "..."}]`
|
||||
# instead of bare strings, which would render as raw Python repr on
|
||||
# the button label. Unwrap the common shapes, then stringify.
|
||||
def _flatten_choice(c):
|
||||
if c is None:
|
||||
return ""
|
||||
if isinstance(c, str):
|
||||
return c.strip()
|
||||
if isinstance(c, dict):
|
||||
# Prefer the canonical LLM tool-call user-facing keys
|
||||
# in the order the LLM is most likely to emit them.
|
||||
# 'name' and 'value' are deliberately NOT here: they're
|
||||
# Discord-component-shaped fields that could appear in
|
||||
# dicts that aren't meant to be choices (e.g., a
|
||||
# developer-error wiring that passes a Button-shaped
|
||||
# object). Picking them would leak raw enum values
|
||||
# or 4-char model identifiers onto user-facing buttons.
|
||||
# If a dict has none of the canonical keys, drop it
|
||||
# rather than picking some random field — a garbage
|
||||
# button label is worse than no button at all.
|
||||
for key in ("label", "description", "text", "title"):
|
||||
v = c.get(key)
|
||||
if isinstance(v, str) and v.strip():
|
||||
return v.strip()
|
||||
return ""
|
||||
if isinstance(c, (list, tuple)):
|
||||
return " ".join(_flatten_choice(x) for x in c).strip()
|
||||
return str(c).strip()
|
||||
|
||||
clean_choices = [
|
||||
str(c).strip() for c in (choices or []) if c is not None and str(c).strip()
|
||||
s for s in (_flatten_choice(c) for c in (choices or [])) if s
|
||||
]
|
||||
# Discord allows up to 5 buttons per row, 5 rows per view = 25.
|
||||
# We reserve one slot for the "Other" button, so cap at 24 choices.
|
||||
@@ -6129,10 +6165,47 @@ def _define_discord_view_classes() -> None:
|
||||
self.resolved = False
|
||||
|
||||
for index, choice in enumerate(self.choices):
|
||||
# Discord button labels are capped at 80 chars.
|
||||
label_body = choice if len(choice) <= 75 else choice[:72] + "..."
|
||||
# Discord button labels are capped at 80 chars. On mobile the
|
||||
# visible width is much narrower (often <40 chars before it
|
||||
# wraps to 2 lines and the second line gets cut off), so we
|
||||
# cap aggressively and cut at a word boundary when possible
|
||||
# to keep the trailing text readable.
|
||||
#
|
||||
# Cut strategy (most-preferred to least-preferred):
|
||||
# 1. Last space in the trailing half of the budget
|
||||
# (cleanest word boundary)
|
||||
# 2. Last soft boundary in the trailing half of the
|
||||
# budget (hyphen, comma, period, paren)
|
||||
# 3. Hard cut at the budget limit (last resort)
|
||||
prefix = f"{index + 1}. "
|
||||
budget = 80 - len(prefix)
|
||||
if len(choice) <= budget:
|
||||
label_body = choice
|
||||
else:
|
||||
truncated = choice[: budget - 1].rstrip()
|
||||
cut_at = -1
|
||||
# 1. Last space in the trailing half of the budget.
|
||||
space = truncated.rfind(" ")
|
||||
if space >= budget // 2:
|
||||
cut_at = space
|
||||
# 2. Soft boundary — only if no word boundary found.
|
||||
# Find the latest soft boundary in the trailing half
|
||||
# of the budget; that maximizes preserved text length.
|
||||
# Cut AT the soft boundary (inclusive) so the label
|
||||
# ends on the soft char (e.g. "-" or ",") rather than
|
||||
# on the alpha char that followed it.
|
||||
if cut_at < 0:
|
||||
latest_soft = max(
|
||||
(truncated.rfind(s) for s in ("-", ",", ".", ")")),
|
||||
default=-1,
|
||||
)
|
||||
if latest_soft >= budget // 2:
|
||||
cut_at = latest_soft + 1
|
||||
if cut_at > 0:
|
||||
truncated = truncated[:cut_at]
|
||||
label_body = truncated.rstrip() + "…"
|
||||
button = discord.ui.Button(
|
||||
label=f"{index + 1}. {label_body}",
|
||||
label=f"{prefix}{label_body}",
|
||||
style=discord.ButtonStyle.primary,
|
||||
custom_id=f"clarify:{clarify_id}:{index}",
|
||||
)
|
||||
|
||||
@@ -103,6 +103,7 @@ AUTHOR_MAP = {
|
||||
"290859878+synapsesx@users.noreply.github.com": "synapsesx",
|
||||
"157689911+itsflownium@users.noreply.github.com": "itsflownium",
|
||||
"dirtyren@users.noreply.github.com": "dirtyren",
|
||||
"johnjacobkenny@users.noreply.github.com": "johnjacobkenny",
|
||||
"chanyoung.kim@nota.ai": "channkim",
|
||||
"stevenn.damatoo@gmail.com": "x1erra",
|
||||
"evansrory@gmail.com": "zimigit2020",
|
||||
|
||||
@@ -122,13 +122,56 @@ class TestClarifyChoiceViewConstruction:
|
||||
clarify_id="cidZ",
|
||||
allowed_user_ids=set(),
|
||||
)
|
||||
# 75 chars + 3 ellipsis chars in the body, plus "1. " prefix
|
||||
# 78 chars + single-char ellipsis in the body, plus "1. " prefix.
|
||||
# Uses U+2026 (…) instead of "..." to fit the 80-char Discord cap.
|
||||
first_label = view.children[0].label
|
||||
assert first_label.startswith("1. ")
|
||||
assert first_label.endswith("...")
|
||||
assert first_label.endswith("\u2026")
|
||||
# Final label total <= 80 (Discord cap on button labels)
|
||||
assert len(first_label) <= 80
|
||||
|
||||
def test_truncates_long_choice_label_breaks_on_word_boundary(self):
|
||||
# Long choice with spaces — should cut at the last whole word so the
|
||||
# trailing text stays readable on Discord mobile.
|
||||
long_choice = (
|
||||
"Tight, well-illustrated, covers all 3 audiences "
|
||||
"(patients, families, curious general readers)"
|
||||
)
|
||||
view = ClarifyChoiceView(
|
||||
choices=[long_choice],
|
||||
clarify_id="cidW",
|
||||
allowed_user_ids=set(),
|
||||
)
|
||||
first_label = view.children[0].label
|
||||
assert first_label.startswith("1. ")
|
||||
assert first_label.endswith("\u2026")
|
||||
# No mid-word fragment before the ellipsis.
|
||||
assert not first_label.rstrip("\u2026").endswith("(")
|
||||
|
||||
def test_truncates_long_no_space_choice_on_soft_boundary(self):
|
||||
# A long choice with soft boundaries (commas, hyphens) but no spaces
|
||||
# should still cut on a soft boundary, not mid-word. We use an input
|
||||
# where position 76 is NOT a soft boundary — the test only passes
|
||||
# if the renderer actively searches backward for a soft char
|
||||
# rather than blindly cutting at the budget limit.
|
||||
long_choice = "a" * 30 + "-" + "b" * 30 + "-" + "c" * 30 + "-" + "d" * 30
|
||||
# 30a-30b-30c-30d = 30 + 1 + 30 + 1 + 30 + 1 + 30 = 123 chars
|
||||
# Position 76 is 'b' (a mid-word alpha). The renderer must look back
|
||||
# for a '-' to cut on.
|
||||
view = ClarifyChoiceView(
|
||||
choices=[long_choice],
|
||||
clarify_id="cidSB",
|
||||
allowed_user_ids=set(),
|
||||
)
|
||||
first_label = view.children[0].label
|
||||
assert first_label.endswith("\u2026")
|
||||
assert len(first_label) <= 80
|
||||
body = first_label[len("1. "):].rstrip("\u2026")
|
||||
last_char = body[-1]
|
||||
assert last_char in {"-", ",", ".", ")", " "}, (
|
||||
f"Label cuts mid-word at {last_char!r}: {first_label!r}"
|
||||
)
|
||||
|
||||
|
||||
# ===========================================================================
|
||||
# Choice callback → resolve_gateway_clarify
|
||||
@@ -404,3 +447,134 @@ class TestDiscordSendClarify:
|
||||
# Only 1 real choice + 1 Other = 2 children
|
||||
assert len(view.children) == 2
|
||||
assert "real-choice" in view.children[0].label
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unwraps_dict_choices_to_description(self):
|
||||
# LLMs sometimes emit [{"description": "..."}] instead of bare strings
|
||||
# — the renderer must unwrap common dict shapes, not str() the whole
|
||||
# dict into a Python repr on the button label.
|
||||
adapter = _make_adapter()
|
||||
channel = MagicMock()
|
||||
sent_msg = MagicMock()
|
||||
sent_msg.id = 555
|
||||
channel.send = AsyncMock(return_value=sent_msg)
|
||||
adapter._client.get_channel = MagicMock(return_value=channel)
|
||||
|
||||
malformed = [
|
||||
{"description": "Tight, well-illustrated"},
|
||||
{"label": "Use label key"},
|
||||
{"text": "Use text key"},
|
||||
"normal-string", # strings still pass through
|
||||
]
|
||||
await adapter.send_clarify(
|
||||
chat_id="9001",
|
||||
question="?",
|
||||
choices=malformed,
|
||||
clarify_id="cidU",
|
||||
session_key="sk-U",
|
||||
)
|
||||
kwargs = channel.send.call_args.kwargs
|
||||
view = kwargs["view"]
|
||||
labels = [b.label for b in view.children[:-1]] # exclude Other
|
||||
# No raw Python repr should leak onto any label.
|
||||
for label in labels:
|
||||
assert "{'" not in label
|
||||
assert "':" not in label
|
||||
# Each dict unwrapped to its inner string.
|
||||
assert any("Tight, well-illustrated" in lbl for lbl in labels)
|
||||
assert any("Use label key" in lbl for lbl in labels)
|
||||
assert any("Use text key" in lbl for lbl in labels)
|
||||
assert any("normal-string" in lbl for lbl in labels)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unwrap_prefers_description_over_name_in_multi_key_dict(self):
|
||||
# When the LLM emits both 'name' (often a short identifier in
|
||||
# OpenAI-style tool calls) and 'description' (the user-facing text),
|
||||
# the renderer must surface 'description'. The user should never see
|
||||
# a 4-char model identifier on a button label.
|
||||
adapter = _make_adapter()
|
||||
channel = MagicMock()
|
||||
sent_msg = MagicMock()
|
||||
sent_msg.id = 666
|
||||
channel.send = AsyncMock(return_value=sent_msg)
|
||||
adapter._client.get_channel = MagicMock(return_value=channel)
|
||||
|
||||
await adapter.send_clarify(
|
||||
chat_id="9001",
|
||||
question="?",
|
||||
choices=[{"name": "tight", "description": "Tight, well-illustrated"}],
|
||||
clarify_id="cidN",
|
||||
session_key="sk-N",
|
||||
)
|
||||
kwargs = channel.send.call_args.kwargs
|
||||
view = kwargs["view"]
|
||||
choice_label = view.children[0].label
|
||||
assert "Tight, well-illustrated" in choice_label
|
||||
# The 'name' value (a short identifier) must NOT have leaked.
|
||||
body = choice_label.split("1. ", 1)[1].rstrip("\u2026")
|
||||
assert "tight" not in body, f"'name' leaked onto button: {choice_label!r}"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unwrap_prefers_label_over_description(self):
|
||||
# When both 'label' and 'description' are present, 'label' wins.
|
||||
# 'label' is the canonical short user-facing text in most LLM tool
|
||||
# conventions; 'description' is the longer explanation.
|
||||
adapter = _make_adapter()
|
||||
channel = MagicMock()
|
||||
sent_msg = MagicMock()
|
||||
sent_msg.id = 777
|
||||
channel.send = AsyncMock(return_value=sent_msg)
|
||||
adapter._client.get_channel = MagicMock(return_value=channel)
|
||||
|
||||
await adapter.send_clarify(
|
||||
chat_id="9001",
|
||||
question="?",
|
||||
choices=[{"label": "Short", "description": "Long verbose explanation"}],
|
||||
clarify_id="cidL",
|
||||
session_key="sk-L",
|
||||
)
|
||||
kwargs = channel.send.call_args.kwargs
|
||||
view = kwargs["view"]
|
||||
choice_label = view.children[0].label
|
||||
assert "Short" in choice_label
|
||||
# The longer description must NOT have leaked.
|
||||
assert "Long verbose" not in choice_label, (
|
||||
f"'description' leaked over 'label': {choice_label!r}"
|
||||
)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_unwrap_does_not_pick_value_or_name_alone(self):
|
||||
# 'name' and 'value' are Discord-component-shaped fields that could
|
||||
# accidentally appear in dicts not intended as choices (e.g., a
|
||||
# developer-error in the gateway wiring). The renderer should not
|
||||
# surface them as button labels — only the well-known LLM tool-call
|
||||
# keys (label, description, text, title) should win.
|
||||
adapter = _make_adapter()
|
||||
channel = MagicMock()
|
||||
sent_msg = MagicMock()
|
||||
sent_msg.id = 888
|
||||
channel.send = AsyncMock(return_value=sent_msg)
|
||||
adapter._client.get_channel = MagicMock(return_value=channel)
|
||||
|
||||
await adapter.send_clarify(
|
||||
chat_id="9001",
|
||||
question="?",
|
||||
choices=[
|
||||
{"name": "only_name_here"}, # should be filtered out
|
||||
{"value": "only_value_here"}, # should be filtered out
|
||||
{"description": "real choice"},
|
||||
],
|
||||
clarify_id="cidNV",
|
||||
session_key="sk-NV",
|
||||
)
|
||||
kwargs = channel.send.call_args.kwargs
|
||||
view = kwargs["view"]
|
||||
choice_labels = [b.label for b in view.children[:-1]] # exclude Other
|
||||
# Only the well-formed dict survives.
|
||||
assert len(choice_labels) == 1, (
|
||||
f"Expected 1 choice, got {len(choice_labels)}: {choice_labels!r}"
|
||||
)
|
||||
assert "real choice" in choice_labels[0]
|
||||
for label in choice_labels:
|
||||
assert "only_name_here" not in label, f"name leaked: {label!r}"
|
||||
assert "only_value_here" not in label, f"value leaked: {label!r}"
|
||||
|
||||
@@ -9,6 +9,7 @@ from tools.clarify_tool import (
|
||||
check_clarify_requirements,
|
||||
MAX_CHOICES,
|
||||
CLARIFY_SCHEMA,
|
||||
_flatten_choice,
|
||||
)
|
||||
|
||||
|
||||
@@ -164,6 +165,70 @@ class TestCheckClarifyRequirements:
|
||||
assert check_clarify_requirements() is True
|
||||
|
||||
|
||||
class TestClarifyDictChoices:
|
||||
"""Dict-shaped choices must be unwrapped to user-facing text at the source.
|
||||
|
||||
LLMs sometimes emit [{"description": "..."}] instead of bare strings. The
|
||||
naive str(c) coercion leaked the Python dict repr onto every surface (CLI
|
||||
panel, Discord buttons, Telegram list) AND returned it verbatim as the
|
||||
user's answer. _flatten_choice normalises at the one platform-agnostic
|
||||
entry point so the whole class is fixed in one place.
|
||||
"""
|
||||
|
||||
def test_flatten_unwraps_label_first(self):
|
||||
assert _flatten_choice({"label": "Short", "description": "Long"}) == "Short"
|
||||
|
||||
def test_flatten_unwraps_description_when_no_label(self):
|
||||
assert _flatten_choice({"description": "A loose layout"}) == "A loose layout"
|
||||
|
||||
def test_flatten_unwrap_order_label_over_description(self):
|
||||
assert _flatten_choice({"description": "verbose", "label": "tight"}) == "tight"
|
||||
|
||||
def test_flatten_drops_name_value_only_dict(self):
|
||||
# name/value are component-shaped fields, not user-facing labels —
|
||||
# picking them would leak raw enum values / short model ids.
|
||||
assert _flatten_choice({"name": "tight", "value": "x"}) == ""
|
||||
|
||||
def test_flatten_prefers_canonical_key_over_name(self):
|
||||
assert _flatten_choice({"name": "tight", "description": "Tight desc"}) == "Tight desc"
|
||||
|
||||
def test_flatten_drops_keyless_dict(self):
|
||||
assert _flatten_choice({"foo": "bar", "n": 1}) == ""
|
||||
|
||||
def test_flatten_passthrough_string_and_scalar(self):
|
||||
assert _flatten_choice("plain") == "plain"
|
||||
assert _flatten_choice(7) == "7"
|
||||
assert _flatten_choice(None) == ""
|
||||
|
||||
def test_dict_choices_reach_callback_as_clean_text(self):
|
||||
"""The whole point: the UI callback never sees a dict repr."""
|
||||
seen = []
|
||||
|
||||
def cb(question, choices):
|
||||
seen.extend(choices or [])
|
||||
return choices[0]
|
||||
|
||||
result = json.loads(clarify_tool(
|
||||
"Pick a layout",
|
||||
choices=[
|
||||
{"choice": "Tight", "description": "Tight, covers all 3 points"},
|
||||
{"description": "Loose layout"},
|
||||
{"name": "modelid", "value": "abc"}, # dropped, not leaked
|
||||
"A plain string choice",
|
||||
],
|
||||
callback=cb,
|
||||
)) # type: ignore
|
||||
assert seen == [
|
||||
"Tight, covers all 3 points",
|
||||
"Loose layout",
|
||||
"A plain string choice",
|
||||
]
|
||||
# and the resolved answer is clean text, not a dict repr
|
||||
assert result["user_response"] == "Tight, covers all 3 points"
|
||||
assert "{" not in result["user_response"]
|
||||
assert all("{" not in c for c in result["choices_offered"])
|
||||
|
||||
|
||||
class TestClarifySchema:
|
||||
"""Tests for the OpenAI function-calling schema."""
|
||||
|
||||
|
||||
@@ -20,6 +20,39 @@ from typing import List, Optional, Callable
|
||||
MAX_CHOICES = 4
|
||||
|
||||
|
||||
def _flatten_choice(c) -> str:
|
||||
"""Coerce a single choice into its user-facing display string.
|
||||
|
||||
The schema declares choices as bare strings, but LLMs sometimes emit
|
||||
dict-shaped choices like ``[{"description": "..."}]``. A naive ``str(c)``
|
||||
turns the whole dict into its Python repr — ``{'description': '...'}`` —
|
||||
which then leaks onto every surface that renders the choice (CLI panel,
|
||||
Discord buttons, Telegram numbered list) AND is returned verbatim as the
|
||||
user's answer. Normalising here, at the one platform-agnostic entry point,
|
||||
fixes the whole class in one place instead of per-adapter.
|
||||
|
||||
Dict unwrap order is the canonical LLM tool-call user-facing keys:
|
||||
``label`` → ``description`` → ``text`` → ``title``. ``name`` and ``value``
|
||||
are deliberately excluded — they're component-shaped fields that could
|
||||
carry raw enum values or short identifiers, not human-readable labels. A
|
||||
dict with none of the canonical keys is dropped (returns ""), since a
|
||||
garbage label is worse than no choice at all.
|
||||
"""
|
||||
if c is None:
|
||||
return ""
|
||||
if isinstance(c, str):
|
||||
return c.strip()
|
||||
if isinstance(c, dict):
|
||||
for key in ("label", "description", "text", "title"):
|
||||
v = c.get(key)
|
||||
if isinstance(v, str) and v.strip():
|
||||
return v.strip()
|
||||
return ""
|
||||
if isinstance(c, (list, tuple)):
|
||||
return " ".join(_flatten_choice(x) for x in c).strip()
|
||||
return str(c).strip()
|
||||
|
||||
|
||||
def clarify_tool(
|
||||
question: str,
|
||||
choices: Optional[List[str]] = None,
|
||||
@@ -48,7 +81,12 @@ def clarify_tool(
|
||||
if choices is not None:
|
||||
if not isinstance(choices, list):
|
||||
return tool_error("choices must be a list of strings.")
|
||||
choices = [str(c).strip() for c in choices if str(c).strip()]
|
||||
# LLMs sometimes emit dict-shaped choices (e.g. [{"description": "..."}])
|
||||
# instead of bare strings. _flatten_choice unwraps them to their
|
||||
# user-facing text here — the single platform-agnostic entry point —
|
||||
# so the CLI panel, Discord buttons, and Telegram list all render clean
|
||||
# text and the resolved answer is never a raw Python dict repr.
|
||||
choices = [s for s in (_flatten_choice(c) for c in choices) if s]
|
||||
if len(choices) > MAX_CHOICES:
|
||||
choices = choices[:MAX_CHOICES]
|
||||
if not choices:
|
||||
|
||||
Reference in New Issue
Block a user