Compare commits

...

1 Commits

Author SHA1 Message Date
Teknium
8e0a3c6083 fix: handle Mistral Magistral structured content blocks
Mistral Magistral reasoning models (and mistral-large-2512+) return
message content as a list of typed blocks instead of a plain string:

  [{"type": "thinking", "thinking": [{"type": "text", "text": "..."}]},
   {"type": "text", "text": "final answer"}]

This happens in both streaming deltas and non-streaming responses,
causing TypeError: sequence item 0: expected str instance, list found
when the code tries to join content parts.

Changes:
- Add _normalize_structured_content() helper that extracts text and
  thinking parts from Mistral structured blocks
- Fix streaming path: normalize delta.content before appending to
  content_parts, route thinking to reasoning_parts
- Fix non-streaming normalization: use the helper to also extract
  thinking blocks as reasoning_content (was silently dropping them)
- Fix _build_assistant_message: normalize list content before
  string operations
- Fix length truncation/continuation paths: normalize content
  before string concatenation
- Add 25 tests covering the helper, streaming, non-streaming,
  and _build_assistant_message paths

Fixes the reported CLI/Discord bot crash when using magistral-latest
or magistral-medium-latest via api.mistral.ai.
2026-04-03 01:49:14 -07:00
2 changed files with 509 additions and 39 deletions

View File

@@ -375,6 +375,58 @@ def _sanitize_messages_surrogates(messages: list) -> bool:
return found
def _normalize_structured_content(content) -> tuple:
"""Normalize Mistral-style structured content blocks to (text, reasoning).
Mistral's Magistral models (and mistral-large-2512+) return ``content`` as
a list of typed blocks instead of a plain string::
[{"type": "thinking", "thinking": [{"type": "text", "text": "..."}]},
{"type": "text", "text": "final answer"},
{"type": "reference", ...}]
This also appears in streaming deltas (``delta.content`` is a list).
Returns:
(text_content, thinking_content) — text is always a string (possibly
empty), thinking is a string or None.
"""
if content is None:
return ("", None)
if isinstance(content, str):
return (content, None)
if not isinstance(content, list):
return (str(content), None)
text_parts: list = []
thinking_parts: list = []
for block in content:
if isinstance(block, str):
text_parts.append(block)
continue
if not isinstance(block, dict):
continue
block_type = block.get("type", "")
if block_type == "text":
text_parts.append(block.get("text", ""))
elif block_type == "thinking":
# "thinking" is itself a list of text blocks
thinking = block.get("thinking", [])
if isinstance(thinking, list):
for t in thinking:
if isinstance(t, dict) and t.get("type") == "text":
thinking_parts.append(t.get("text", ""))
elif isinstance(t, str):
thinking_parts.append(t)
elif isinstance(thinking, str):
thinking_parts.append(thinking)
# Other types (reference, image, document, audio, file) are skipped.
text = "\n".join(p for p in text_parts if p)
thinking = "\n\n".join(p for p in thinking_parts if p) or None
return (text, thinking)
def _strip_budget_warnings_from_history(messages: list) -> None:
"""Remove budget pressure warnings from tool-result messages in-place.
@@ -4107,30 +4159,43 @@ class AIAgent:
_fire_first_delta()
self._fire_reasoning_delta(reasoning_text)
# Accumulate text content — fire callback only when no tool calls
# Accumulate text content — fire callback only when no tool calls.
# Mistral Magistral models return delta.content as a list of
# structured blocks instead of a plain string; normalize first.
if delta and delta.content:
content_parts.append(delta.content)
if not tool_calls_acc:
_fire_first_delta()
self._fire_stream_delta(delta.content)
deltas_were_sent["yes"] = True
_raw_delta_content = delta.content
if isinstance(_raw_delta_content, list):
_delta_text, _delta_thinking = _normalize_structured_content(_raw_delta_content)
if _delta_thinking:
reasoning_parts.append(_delta_thinking)
_fire_first_delta()
self._fire_reasoning_delta(_delta_thinking)
else:
# Tool calls suppress regular content streaming (avoids
# displaying chatty "I'll use the tool..." text alongside
# tool calls). But reasoning tags embedded in suppressed
# content should still reach the display — otherwise the
# reasoning box only appears as a post-response fallback,
# rendering it confusingly after the already-streamed
# response. Route suppressed content through the stream
# delta callback so its tag extraction can fire the
# reasoning display. Non-reasoning text is harmlessly
# suppressed by the CLI's _stream_delta when the stream
# box is already closed (tool boundary flush).
if self.stream_delta_callback:
try:
self.stream_delta_callback(delta.content)
except Exception:
pass
_delta_text = _raw_delta_content
if _delta_text:
content_parts.append(_delta_text)
if not tool_calls_acc:
_fire_first_delta()
self._fire_stream_delta(_delta_text)
deltas_were_sent["yes"] = True
else:
# Tool calls suppress regular content streaming (avoids
# displaying chatty "I'll use the tool..." text alongside
# tool calls). But reasoning tags embedded in suppressed
# content should still reach the display — otherwise the
# reasoning box only appears as a post-response fallback,
# rendering it confusingly after the already-streamed
# response. Route suppressed content through the stream
# delta callback so its tag extraction can fire the
# reasoning display. Non-reasoning text is harmlessly
# suppressed by the CLI's _stream_delta when the stream
# box is already closed (tool boundary flush).
if self.stream_delta_callback:
try:
self.stream_delta_callback(_delta_text)
except Exception:
pass
# Accumulate tool call deltas — notify display on first name
if delta and delta.tool_calls:
@@ -5170,18 +5235,32 @@ class AIAgent:
Handles reasoning extraction, reasoning_details, and optional tool_calls
so both the tool-call path and the final-response path share one builder.
"""
# Normalize content early — Mistral Magistral models return content
# as a list of structured blocks instead of a string.
_raw_content = assistant_message.content
_structured_thinking = None
if isinstance(_raw_content, list):
_raw_content, _structured_thinking = _normalize_structured_content(_raw_content)
reasoning_text = self._extract_reasoning(assistant_message)
_from_structured = bool(reasoning_text)
# If the structured content included thinking blocks and
# _extract_reasoning didn't find anything, use the structured thinking.
if not reasoning_text and _structured_thinking:
reasoning_text = _structured_thinking
_from_structured = True
# Fallback: extract inline <think> blocks from content when no structured
# reasoning fields are present (some models/providers embed thinking
# directly in the content rather than returning separate API fields).
if not reasoning_text:
content = assistant_message.content or ""
think_blocks = re.findall(r'<think>(.*?)</think>', content, flags=re.DOTALL)
if think_blocks:
combined = "\n\n".join(b.strip() for b in think_blocks if b.strip())
reasoning_text = combined or None
content = _raw_content or ""
if isinstance(content, str):
think_blocks = re.findall(r'<think>(.*?)</think>', content, flags=re.DOTALL)
if think_blocks:
combined = "\n\n".join(b.strip() for b in think_blocks if b.strip())
reasoning_text = combined or None
if reasoning_text and self.verbose_logging:
logging.debug(f"Captured reasoning ({len(reasoning_text)} chars): {reasoning_text}")
@@ -5203,7 +5282,7 @@ class AIAgent:
msg = {
"role": "assistant",
"content": assistant_message.content or "",
"content": _raw_content or "",
"reasoning": reasoning_text,
"finish_reason": finish_reason,
}
@@ -7022,6 +7101,9 @@ class AIAgent:
if self.api_mode == "chat_completions":
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
# Mistral Magistral: content may be a list of blocks
if isinstance(_trunc_content, list):
_trunc_content, _ = _normalize_structured_content(_trunc_content)
elif self.api_mode == "anthropic_messages":
# Anthropic response.content is a list of blocks
_text_parts = []
@@ -7076,7 +7158,10 @@ class AIAgent:
interim_msg = self._build_assistant_message(assistant_message, finish_reason)
messages.append(interim_msg)
if assistant_message.content:
truncated_response_prefix += assistant_message.content
_cont = assistant_message.content
if isinstance(_cont, list):
_cont, _ = _normalize_structured_content(_cont)
truncated_response_prefix += _cont
if length_continue_retries < 3:
self._vprint(
@@ -7791,21 +7876,22 @@ class AIAgent:
# Normalize content to string — some OpenAI-compatible servers
# (llama-server, etc.) return content as a dict or list instead
# of a plain string, which crashes downstream .strip() calls.
# Mistral Magistral models return a list of structured blocks
# including {type: "thinking"} and {type: "text"}.
if assistant_message.content is not None and not isinstance(assistant_message.content, str):
raw = assistant_message.content
if isinstance(raw, dict):
assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
elif isinstance(raw, list):
# Multimodal content list — extract text parts
parts = []
for part in raw:
if isinstance(part, str):
parts.append(part)
elif isinstance(part, dict) and part.get("type") == "text":
parts.append(part.get("text", ""))
elif isinstance(part, dict) and "text" in part:
parts.append(str(part["text"]))
assistant_message.content = "\n".join(parts)
_norm_text, _norm_thinking = _normalize_structured_content(raw)
assistant_message.content = _norm_text
# Preserve extracted thinking as reasoning_content so
# _extract_reasoning / _build_assistant_message picks it up.
if _norm_thinking and not getattr(assistant_message, "reasoning_content", None):
try:
assistant_message.reasoning_content = _norm_thinking
except (AttributeError, TypeError):
pass # frozen/read-only SDK object
else:
assistant_message.content = str(raw)

View File

@@ -0,0 +1,384 @@
"""Tests for Mistral Magistral structured content handling.
Mistral's Magistral reasoning models return ``content`` as a list of typed
blocks instead of a plain string (both in streaming deltas and non-streaming
responses). This test suite verifies that:
1. _normalize_structured_content() correctly extracts text and thinking parts.
2. The streaming path handles list-valued delta.content without crashing.
3. The non-streaming path normalizes list content and extracts thinking.
4. _build_assistant_message handles list content correctly.
"""
import os
import uuid
from types import SimpleNamespace
from unittest.mock import MagicMock, patch
import pytest
# ── Ensure HERMES_HOME is set before importing run_agent ──────────────
if not os.environ.get("HERMES_HOME"):
import tempfile
_tmp = tempfile.mkdtemp(prefix="hermes_test_")
os.environ["HERMES_HOME"] = _tmp
from run_agent import AIAgent, _normalize_structured_content
# ── Fixtures ──────────────────────────────────────────────────────────
def _make_tool_defs(*names):
"""Build minimal tool definitions matching get_tool_definitions output."""
return [
{"type": "function", "function": {"name": n, "description": n, "parameters": {}}}
for n in names
]
@pytest.fixture
def agent():
"""Minimal AIAgent for testing _build_assistant_message."""
with (
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI"),
):
ag = AIAgent(
api_key="test-key-1234567890",
model="mistral/magistral-medium-latest",
quiet_mode=True,
skip_context_files=True,
skip_memory=True,
)
ag.client = MagicMock()
ag.verbose_logging = False
ag.reasoning_callback = None
ag.stream_delta_callback = None
return ag
# ── Sample data matching Mistral's API format ─────────────────────────
MAGISTRAL_CONTENT_BLOCKS = [
{
"type": "thinking",
"thinking": [
{"type": "text", "text": "Let me think about this step by step."},
{"type": "text", "text": "The capital of France is Paris."},
],
},
{"type": "text", "text": "The capital of France is Paris."},
]
MAGISTRAL_TEXT_ONLY_BLOCKS = [
{"type": "text", "text": "Hello, how can I help?"},
]
MAGISTRAL_WITH_REFERENCE = [
{"type": "thinking", "thinking": [{"type": "text", "text": "Checking references."}]},
{"type": "text", "text": "Here is the answer."},
{"type": "reference", "url": "https://example.com"},
]
STREAMING_THINKING_DELTA = [
{"type": "thinking", "thinking": [{"type": "text", "text": "Okay"}]},
]
STREAMING_TEXT_DELTA = [
{"type": "text", "text": "Hello"},
]
# ── Tests: _normalize_structured_content ──────────────────────────────
class TestNormalizeStructuredContent:
"""Tests for the _normalize_structured_content helper."""
def test_string_passthrough(self):
text, thinking = _normalize_structured_content("Hello world")
assert text == "Hello world"
assert thinking is None
def test_none_returns_empty_string(self):
text, thinking = _normalize_structured_content(None)
assert text == ""
assert thinking is None
def test_non_list_non_string_coerced(self):
text, thinking = _normalize_structured_content(42)
assert text == "42"
assert thinking is None
def test_magistral_full_response(self):
text, thinking = _normalize_structured_content(MAGISTRAL_CONTENT_BLOCKS)
assert text == "The capital of France is Paris."
assert "step by step" in thinking
assert "capital of France is Paris" in thinking
def test_text_only_blocks(self):
text, thinking = _normalize_structured_content(MAGISTRAL_TEXT_ONLY_BLOCKS)
assert text == "Hello, how can I help?"
assert thinking is None
def test_with_reference_blocks(self):
"""Reference blocks should be skipped, not cause errors."""
text, thinking = _normalize_structured_content(MAGISTRAL_WITH_REFERENCE)
assert text == "Here is the answer."
assert thinking == "Checking references."
def test_streaming_thinking_delta(self):
text, thinking = _normalize_structured_content(STREAMING_THINKING_DELTA)
assert text == ""
assert thinking == "Okay"
def test_streaming_text_delta(self):
text, thinking = _normalize_structured_content(STREAMING_TEXT_DELTA)
assert text == "Hello"
assert thinking is None
def test_empty_list(self):
text, thinking = _normalize_structured_content([])
assert text == ""
assert thinking is None
def test_mixed_string_and_dict_blocks(self):
"""Some providers might mix raw strings with typed blocks."""
content = ["raw text", {"type": "text", "text": "typed text"}]
text, thinking = _normalize_structured_content(content)
assert "raw text" in text
assert "typed text" in text
def test_thinking_as_plain_string(self):
"""Handle edge case where thinking value is a string not a list."""
content = [{"type": "thinking", "thinking": "I'm thinking..."}]
text, thinking = _normalize_structured_content(content)
assert text == ""
assert thinking == "I'm thinking..."
def test_multiple_text_blocks_joined(self):
content = [
{"type": "text", "text": "First paragraph."},
{"type": "text", "text": "Second paragraph."},
]
text, thinking = _normalize_structured_content(content)
assert "First paragraph." in text
assert "Second paragraph." in text
assert "\n" in text # joined with newline
def test_empty_thinking_block(self):
"""Thinking block with no text should result in thinking=None."""
content = [
{"type": "thinking", "thinking": []},
{"type": "text", "text": "Answer"},
]
text, thinking = _normalize_structured_content(content)
assert text == "Answer"
assert thinking is None
# ── Tests: _build_assistant_message with structured content ────────────
class TestBuildAssistantMessageStructuredContent:
"""Tests that _build_assistant_message correctly handles Mistral list content."""
def test_list_content_normalized_to_string(self, agent):
msg = SimpleNamespace(
content=MAGISTRAL_CONTENT_BLOCKS,
tool_calls=None,
)
result = agent._build_assistant_message(msg, "stop")
assert isinstance(result["content"], str)
assert "The capital of France is Paris." in result["content"]
def test_list_content_thinking_extracted(self, agent):
msg = SimpleNamespace(
content=MAGISTRAL_CONTENT_BLOCKS,
tool_calls=None,
)
result = agent._build_assistant_message(msg, "stop")
assert result["reasoning"] is not None
assert "step by step" in result["reasoning"]
def test_string_content_unchanged(self, agent):
msg = SimpleNamespace(
content="Normal string response",
tool_calls=None,
)
result = agent._build_assistant_message(msg, "stop")
assert result["content"] == "Normal string response"
def test_list_content_with_tool_calls(self, agent):
tool_call = SimpleNamespace(
id="call_123",
type="function",
function=SimpleNamespace(name="web_search", arguments='{"query": "test"}'),
)
msg = SimpleNamespace(
content=MAGISTRAL_CONTENT_BLOCKS,
tool_calls=[tool_call],
)
result = agent._build_assistant_message(msg, "tool_calls")
assert isinstance(result["content"], str)
assert "tool_calls" in result
def test_text_only_blocks_no_reasoning(self, agent):
msg = SimpleNamespace(
content=MAGISTRAL_TEXT_ONLY_BLOCKS,
tool_calls=None,
)
result = agent._build_assistant_message(msg, "stop")
assert result["content"] == "Hello, how can I help?"
assert result["reasoning"] is None
def test_structured_thinking_not_duplicated_with_reasoning_content(self, agent):
"""When reasoning_content is set AND content has thinking blocks,
don't duplicate the reasoning."""
msg = SimpleNamespace(
content=MAGISTRAL_CONTENT_BLOCKS,
tool_calls=None,
reasoning_content="Already extracted reasoning",
)
result = agent._build_assistant_message(msg, "stop")
# Should use the already-set reasoning_content, not duplicate
assert result["reasoning"] == "Already extracted reasoning"
# ── Tests: Non-streaming content normalization ─────────────────────────
class TestNonStreamingContentNormalization:
"""Tests for the non-streaming content normalization block in the agent loop."""
def test_list_content_normalized(self, agent):
"""Simulate the normalization block that runs after getting the
assistant_message from response.choices[0].message."""
msg = SimpleNamespace(content=MAGISTRAL_CONTENT_BLOCKS, tool_calls=None)
# Simulate the normalization block from run_agent.py
if msg.content is not None and not isinstance(msg.content, str):
raw = msg.content
if isinstance(raw, list):
text, thinking = _normalize_structured_content(raw)
msg.content = text
if thinking and not getattr(msg, "reasoning_content", None):
msg.reasoning_content = thinking
assert isinstance(msg.content, str)
assert "The capital of France is Paris." in msg.content
assert hasattr(msg, "reasoning_content")
assert "step by step" in msg.reasoning_content
def test_dict_content_handled(self, agent):
"""Dict content (from llama-server etc.) should still work."""
msg = SimpleNamespace(content={"text": "Hello from dict"}, tool_calls=None)
if msg.content is not None and not isinstance(msg.content, str):
raw = msg.content
if isinstance(raw, dict):
msg.content = raw.get("text", "") or raw.get("content", "") or str(raw)
assert msg.content == "Hello from dict"
# ── Tests: Streaming delta normalization ───────────────────────────────
class TestStreamingDeltaNormalization:
"""Tests for the streaming delta content normalization."""
def test_list_delta_content_split(self):
"""When delta.content is a list, text goes to content_parts
and thinking goes to reasoning_parts."""
content_parts = []
reasoning_parts = []
# Simulate the streaming normalization block
delta_content = MAGISTRAL_CONTENT_BLOCKS
if isinstance(delta_content, list):
text, thinking = _normalize_structured_content(delta_content)
if thinking:
reasoning_parts.append(thinking)
else:
text = delta_content
if text:
content_parts.append(text)
# Verify text and thinking are separated
assert len(content_parts) == 1
assert "The capital of France is Paris." in content_parts[0]
assert len(reasoning_parts) == 1
assert "step by step" in reasoning_parts[0]
# Verify join succeeds (this was the original crash)
full_content = "".join(content_parts)
assert isinstance(full_content, str)
def test_string_delta_passthrough(self):
"""Normal string deltas should work unchanged."""
content_parts = []
delta_content = "Hello"
if isinstance(delta_content, list):
text, _ = _normalize_structured_content(delta_content)
else:
text = delta_content
if text:
content_parts.append(text)
full_content = "".join(content_parts)
assert full_content == "Hello"
def test_thinking_only_delta(self):
"""Streaming delta with only thinking and no text."""
content_parts = []
reasoning_parts = []
delta_content = STREAMING_THINKING_DELTA
if isinstance(delta_content, list):
text, thinking = _normalize_structured_content(delta_content)
if thinking:
reasoning_parts.append(thinking)
else:
text = delta_content
if text:
content_parts.append(text)
# No text content, only reasoning
assert len(content_parts) == 0
assert len(reasoning_parts) == 1
assert reasoning_parts[0] == "Okay"
# Join should succeed (empty list)
full_content = "".join(content_parts) or None
assert full_content is None
def test_multiple_streaming_chunks_joined(self):
"""Multiple streaming chunks with mixed list and string content."""
content_parts = []
reasoning_parts = []
chunks = [
STREAMING_THINKING_DELTA, # list: thinking only
STREAMING_TEXT_DELTA, # list: text only
"more text", # string
]
for delta_content in chunks:
if isinstance(delta_content, list):
text, thinking = _normalize_structured_content(delta_content)
if thinking:
reasoning_parts.append(thinking)
else:
text = delta_content
if text:
content_parts.append(text)
full_content = "".join(content_parts)
full_reasoning = "".join(reasoning_parts) or None
assert full_content == "Hellomore text"
assert full_reasoning == "Okay"