mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 07:21:37 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
589d14c11e |
64
run_agent.py
64
run_agent.py
@@ -5823,6 +5823,7 @@ class AIAgent:
|
||||
api_msg.pop("reasoning", None)
|
||||
api_msg.pop("finish_reason", None)
|
||||
api_msg.pop("_flush_sentinel", None)
|
||||
api_msg.pop("_thinking_prefill", None)
|
||||
if _needs_sanitize:
|
||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
api_messages.append(api_msg)
|
||||
@@ -6746,7 +6747,7 @@ class AIAgent:
|
||||
api_messages = []
|
||||
for msg in messages:
|
||||
api_msg = msg.copy()
|
||||
for internal_field in ("reasoning", "finish_reason"):
|
||||
for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
|
||||
api_msg.pop(internal_field, None)
|
||||
if _needs_sanitize:
|
||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||
@@ -6938,6 +6939,7 @@ class AIAgent:
|
||||
self._empty_content_retries = 0
|
||||
self._incomplete_scratchpad_retries = 0
|
||||
self._codex_incomplete_retries = 0
|
||||
self._thinking_prefill_retries = 0
|
||||
self._last_content_with_tools = None
|
||||
self._mute_post_response = False
|
||||
self._surrogate_sanitized = False
|
||||
@@ -7283,6 +7285,8 @@ class AIAgent:
|
||||
# Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
|
||||
if "finish_reason" in api_msg:
|
||||
api_msg.pop("finish_reason")
|
||||
# Strip internal thinking-prefill marker
|
||||
api_msg.pop("_thinking_prefill", None)
|
||||
# Strip Codex Responses API fields (call_id, response_item_id) for
|
||||
# strict providers like Mistral, Fireworks, etc. that reject unknown fields.
|
||||
# Uses new dicts so the internal messages list retains the fields
|
||||
@@ -8817,6 +8821,15 @@ class AIAgent:
|
||||
if clean:
|
||||
self._vprint(f" ┊ 💬 {clean}")
|
||||
|
||||
# Pop thinking-only prefill message(s) before appending
|
||||
# (tool-call path — same rationale as the final-response path).
|
||||
while (
|
||||
messages
|
||||
and isinstance(messages[-1], dict)
|
||||
and messages[-1].get("_thinking_prefill")
|
||||
):
|
||||
messages.pop()
|
||||
|
||||
messages.append(assistant_msg)
|
||||
|
||||
# Close any open streaming display (response box, reasoning
|
||||
@@ -8930,11 +8943,36 @@ class AIAgent:
|
||||
self._response_was_previewed = True
|
||||
break
|
||||
|
||||
# Reasoning-only response: the model produced thinking
|
||||
# but no visible content. This is a valid response —
|
||||
# keep reasoning in its own field and set content to
|
||||
# "(empty)" so every provider accepts the message.
|
||||
# No retries needed.
|
||||
# ── Thinking-only prefill continuation ──────────
|
||||
# The model produced structured reasoning (via API
|
||||
# fields) but no visible text content. Rather than
|
||||
# giving up, append the assistant message as-is and
|
||||
# continue — the model will see its own reasoning
|
||||
# on the next turn and produce the text portion.
|
||||
# Inspired by clawdbot's "incomplete-text" recovery.
|
||||
_has_structured = bool(
|
||||
getattr(assistant_message, "reasoning", None)
|
||||
or getattr(assistant_message, "reasoning_content", None)
|
||||
or getattr(assistant_message, "reasoning_details", None)
|
||||
)
|
||||
if _has_structured and self._thinking_prefill_retries < 2:
|
||||
self._thinking_prefill_retries += 1
|
||||
self._vprint(
|
||||
f"{self.log_prefix}↻ Thinking-only response — "
|
||||
f"prefilling to continue "
|
||||
f"({self._thinking_prefill_retries}/2)"
|
||||
)
|
||||
interim_msg = self._build_assistant_message(
|
||||
assistant_message, "incomplete"
|
||||
)
|
||||
interim_msg["_thinking_prefill"] = True
|
||||
messages.append(interim_msg)
|
||||
self._session_messages = messages
|
||||
self._save_session_log(messages)
|
||||
continue
|
||||
|
||||
# Exhausted prefill attempts or no structured
|
||||
# reasoning — fall through to "(empty)" terminal.
|
||||
reasoning_text = self._extract_reasoning(assistant_message)
|
||||
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
assistant_msg["content"] = "(empty)"
|
||||
@@ -8953,6 +8991,7 @@ class AIAgent:
|
||||
if hasattr(self, '_empty_content_retries'):
|
||||
self._empty_content_retries = 0
|
||||
self._last_empty_content_signature = None
|
||||
self._thinking_prefill_retries = 0
|
||||
|
||||
if (
|
||||
self.api_mode == "codex_responses"
|
||||
@@ -8991,7 +9030,18 @@ class AIAgent:
|
||||
final_response = self._strip_think_blocks(final_response).strip()
|
||||
|
||||
final_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||
|
||||
|
||||
# Pop thinking-only prefill message(s) before appending
|
||||
# the final response. This avoids consecutive assistant
|
||||
# messages which break strict-alternation providers
|
||||
# (Anthropic Messages API) and keeps history clean.
|
||||
while (
|
||||
messages
|
||||
and isinstance(messages[-1], dict)
|
||||
and messages[-1].get("_thinking_prefill")
|
||||
):
|
||||
messages.pop()
|
||||
|
||||
messages.append(final_msg)
|
||||
|
||||
if not self.quiet_mode:
|
||||
|
||||
@@ -1547,7 +1547,7 @@ class TestRunConversation:
|
||||
assert any(m.get("reasoning") for m in assistant_msgs)
|
||||
|
||||
def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
|
||||
"""Reasoning-only responses no longer trigger compression — accepted immediately."""
|
||||
"""Reasoning-only responses no longer trigger compression — prefill then accepted."""
|
||||
self._setup_agent(agent)
|
||||
agent.base_url = "http://127.0.0.1:1234/v1"
|
||||
agent.compression_enabled = True
|
||||
@@ -1561,8 +1561,9 @@ class TestRunConversation:
|
||||
{"role": "assistant", "content": "old answer"},
|
||||
]
|
||||
|
||||
# 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill)
|
||||
with (
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp]),
|
||||
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]),
|
||||
patch.object(agent, "_compress_context") as mock_compress,
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
@@ -1573,17 +1574,18 @@ class TestRunConversation:
|
||||
mock_compress.assert_not_called() # no compression triggered
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
assert result["api_calls"] == 1
|
||||
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
|
||||
|
||||
def test_reasoning_only_response_accepted_without_retry(self, agent):
|
||||
"""Reasoning-only response should be accepted with (empty) content, no retries."""
|
||||
def test_reasoning_only_response_prefill_then_empty(self, agent):
|
||||
"""Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty)."""
|
||||
self._setup_agent(agent)
|
||||
empty_resp = _mock_response(
|
||||
content=None,
|
||||
finish_reason="stop",
|
||||
reasoning_content="structured reasoning answer",
|
||||
)
|
||||
agent.client.chat.completions.create.side_effect = [empty_resp]
|
||||
# 3 responses: original + 2 prefill continuations, all reasoning-only
|
||||
agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
@@ -1592,7 +1594,35 @@ class TestRunConversation:
|
||||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "(empty)"
|
||||
assert result["api_calls"] == 1 # no retries
|
||||
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
|
||||
|
||||
def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
|
||||
"""When prefill continuation produces content, it becomes the final response."""
|
||||
self._setup_agent(agent)
|
||||
empty_resp = _mock_response(
|
||||
content=None,
|
||||
finish_reason="stop",
|
||||
reasoning_content="structured reasoning answer",
|
||||
)
|
||||
content_resp = _mock_response(
|
||||
content="Here is the actual answer.",
|
||||
finish_reason="stop",
|
||||
)
|
||||
agent.client.chat.completions.create.side_effect = [empty_resp, content_resp]
|
||||
with (
|
||||
patch.object(agent, "_persist_session"),
|
||||
patch.object(agent, "_save_trajectory"),
|
||||
patch.object(agent, "_cleanup_task_resources"),
|
||||
):
|
||||
result = agent.run_conversation("answer me")
|
||||
assert result["completed"] is True
|
||||
assert result["final_response"] == "Here is the actual answer."
|
||||
assert result["api_calls"] == 2 # 1 original + 1 prefill continuation
|
||||
# Prefill message should be cleaned up — no consecutive assistant messages
|
||||
roles = [m.get("role") for m in result["messages"]]
|
||||
for i in range(len(roles) - 1):
|
||||
if roles[i] == "assistant" and roles[i + 1] == "assistant":
|
||||
raise AssertionError("Consecutive assistant messages found in history")
|
||||
|
||||
def test_truly_empty_response_accepted_without_retry(self, agent):
|
||||
"""Truly empty response (no content, no reasoning) should still complete with (empty)."""
|
||||
|
||||
Reference in New Issue
Block a user