mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-30 16:01:49 +08:00
Compare commits
1 Commits
fix/plugin
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
589d14c11e |
62
run_agent.py
62
run_agent.py
@@ -5823,6 +5823,7 @@ class AIAgent:
|
|||||||
api_msg.pop("reasoning", None)
|
api_msg.pop("reasoning", None)
|
||||||
api_msg.pop("finish_reason", None)
|
api_msg.pop("finish_reason", None)
|
||||||
api_msg.pop("_flush_sentinel", None)
|
api_msg.pop("_flush_sentinel", None)
|
||||||
|
api_msg.pop("_thinking_prefill", None)
|
||||||
if _needs_sanitize:
|
if _needs_sanitize:
|
||||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||||
api_messages.append(api_msg)
|
api_messages.append(api_msg)
|
||||||
@@ -6746,7 +6747,7 @@ class AIAgent:
|
|||||||
api_messages = []
|
api_messages = []
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
api_msg = msg.copy()
|
api_msg = msg.copy()
|
||||||
for internal_field in ("reasoning", "finish_reason"):
|
for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
|
||||||
api_msg.pop(internal_field, None)
|
api_msg.pop(internal_field, None)
|
||||||
if _needs_sanitize:
|
if _needs_sanitize:
|
||||||
self._sanitize_tool_calls_for_strict_api(api_msg)
|
self._sanitize_tool_calls_for_strict_api(api_msg)
|
||||||
@@ -6938,6 +6939,7 @@ class AIAgent:
|
|||||||
self._empty_content_retries = 0
|
self._empty_content_retries = 0
|
||||||
self._incomplete_scratchpad_retries = 0
|
self._incomplete_scratchpad_retries = 0
|
||||||
self._codex_incomplete_retries = 0
|
self._codex_incomplete_retries = 0
|
||||||
|
self._thinking_prefill_retries = 0
|
||||||
self._last_content_with_tools = None
|
self._last_content_with_tools = None
|
||||||
self._mute_post_response = False
|
self._mute_post_response = False
|
||||||
self._surrogate_sanitized = False
|
self._surrogate_sanitized = False
|
||||||
@@ -7283,6 +7285,8 @@ class AIAgent:
|
|||||||
# Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
|
# Remove finish_reason - not accepted by strict APIs (e.g. Mistral)
|
||||||
if "finish_reason" in api_msg:
|
if "finish_reason" in api_msg:
|
||||||
api_msg.pop("finish_reason")
|
api_msg.pop("finish_reason")
|
||||||
|
# Strip internal thinking-prefill marker
|
||||||
|
api_msg.pop("_thinking_prefill", None)
|
||||||
# Strip Codex Responses API fields (call_id, response_item_id) for
|
# Strip Codex Responses API fields (call_id, response_item_id) for
|
||||||
# strict providers like Mistral, Fireworks, etc. that reject unknown fields.
|
# strict providers like Mistral, Fireworks, etc. that reject unknown fields.
|
||||||
# Uses new dicts so the internal messages list retains the fields
|
# Uses new dicts so the internal messages list retains the fields
|
||||||
@@ -8817,6 +8821,15 @@ class AIAgent:
|
|||||||
if clean:
|
if clean:
|
||||||
self._vprint(f" ┊ 💬 {clean}")
|
self._vprint(f" ┊ 💬 {clean}")
|
||||||
|
|
||||||
|
# Pop thinking-only prefill message(s) before appending
|
||||||
|
# (tool-call path — same rationale as the final-response path).
|
||||||
|
while (
|
||||||
|
messages
|
||||||
|
and isinstance(messages[-1], dict)
|
||||||
|
and messages[-1].get("_thinking_prefill")
|
||||||
|
):
|
||||||
|
messages.pop()
|
||||||
|
|
||||||
messages.append(assistant_msg)
|
messages.append(assistant_msg)
|
||||||
|
|
||||||
# Close any open streaming display (response box, reasoning
|
# Close any open streaming display (response box, reasoning
|
||||||
@@ -8930,11 +8943,36 @@ class AIAgent:
|
|||||||
self._response_was_previewed = True
|
self._response_was_previewed = True
|
||||||
break
|
break
|
||||||
|
|
||||||
# Reasoning-only response: the model produced thinking
|
# ── Thinking-only prefill continuation ──────────
|
||||||
# but no visible content. This is a valid response —
|
# The model produced structured reasoning (via API
|
||||||
# keep reasoning in its own field and set content to
|
# fields) but no visible text content. Rather than
|
||||||
# "(empty)" so every provider accepts the message.
|
# giving up, append the assistant message as-is and
|
||||||
# No retries needed.
|
# continue — the model will see its own reasoning
|
||||||
|
# on the next turn and produce the text portion.
|
||||||
|
# Inspired by clawdbot's "incomplete-text" recovery.
|
||||||
|
_has_structured = bool(
|
||||||
|
getattr(assistant_message, "reasoning", None)
|
||||||
|
or getattr(assistant_message, "reasoning_content", None)
|
||||||
|
or getattr(assistant_message, "reasoning_details", None)
|
||||||
|
)
|
||||||
|
if _has_structured and self._thinking_prefill_retries < 2:
|
||||||
|
self._thinking_prefill_retries += 1
|
||||||
|
self._vprint(
|
||||||
|
f"{self.log_prefix}↻ Thinking-only response — "
|
||||||
|
f"prefilling to continue "
|
||||||
|
f"({self._thinking_prefill_retries}/2)"
|
||||||
|
)
|
||||||
|
interim_msg = self._build_assistant_message(
|
||||||
|
assistant_message, "incomplete"
|
||||||
|
)
|
||||||
|
interim_msg["_thinking_prefill"] = True
|
||||||
|
messages.append(interim_msg)
|
||||||
|
self._session_messages = messages
|
||||||
|
self._save_session_log(messages)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Exhausted prefill attempts or no structured
|
||||||
|
# reasoning — fall through to "(empty)" terminal.
|
||||||
reasoning_text = self._extract_reasoning(assistant_message)
|
reasoning_text = self._extract_reasoning(assistant_message)
|
||||||
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
|
assistant_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||||
assistant_msg["content"] = "(empty)"
|
assistant_msg["content"] = "(empty)"
|
||||||
@@ -8953,6 +8991,7 @@ class AIAgent:
|
|||||||
if hasattr(self, '_empty_content_retries'):
|
if hasattr(self, '_empty_content_retries'):
|
||||||
self._empty_content_retries = 0
|
self._empty_content_retries = 0
|
||||||
self._last_empty_content_signature = None
|
self._last_empty_content_signature = None
|
||||||
|
self._thinking_prefill_retries = 0
|
||||||
|
|
||||||
if (
|
if (
|
||||||
self.api_mode == "codex_responses"
|
self.api_mode == "codex_responses"
|
||||||
@@ -8992,6 +9031,17 @@ class AIAgent:
|
|||||||
|
|
||||||
final_msg = self._build_assistant_message(assistant_message, finish_reason)
|
final_msg = self._build_assistant_message(assistant_message, finish_reason)
|
||||||
|
|
||||||
|
# Pop thinking-only prefill message(s) before appending
|
||||||
|
# the final response. This avoids consecutive assistant
|
||||||
|
# messages which break strict-alternation providers
|
||||||
|
# (Anthropic Messages API) and keeps history clean.
|
||||||
|
while (
|
||||||
|
messages
|
||||||
|
and isinstance(messages[-1], dict)
|
||||||
|
and messages[-1].get("_thinking_prefill")
|
||||||
|
):
|
||||||
|
messages.pop()
|
||||||
|
|
||||||
messages.append(final_msg)
|
messages.append(final_msg)
|
||||||
|
|
||||||
if not self.quiet_mode:
|
if not self.quiet_mode:
|
||||||
|
|||||||
@@ -1547,7 +1547,7 @@ class TestRunConversation:
|
|||||||
assert any(m.get("reasoning") for m in assistant_msgs)
|
assert any(m.get("reasoning") for m in assistant_msgs)
|
||||||
|
|
||||||
def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
|
def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
|
||||||
"""Reasoning-only responses no longer trigger compression — accepted immediately."""
|
"""Reasoning-only responses no longer trigger compression — prefill then accepted."""
|
||||||
self._setup_agent(agent)
|
self._setup_agent(agent)
|
||||||
agent.base_url = "http://127.0.0.1:1234/v1"
|
agent.base_url = "http://127.0.0.1:1234/v1"
|
||||||
agent.compression_enabled = True
|
agent.compression_enabled = True
|
||||||
@@ -1561,8 +1561,9 @@ class TestRunConversation:
|
|||||||
{"role": "assistant", "content": "old answer"},
|
{"role": "assistant", "content": "old answer"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# 3 responses: original + 2 prefill continuations (structured reasoning triggers prefill)
|
||||||
with (
|
with (
|
||||||
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp]),
|
patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp, empty_resp, empty_resp]),
|
||||||
patch.object(agent, "_compress_context") as mock_compress,
|
patch.object(agent, "_compress_context") as mock_compress,
|
||||||
patch.object(agent, "_persist_session"),
|
patch.object(agent, "_persist_session"),
|
||||||
patch.object(agent, "_save_trajectory"),
|
patch.object(agent, "_save_trajectory"),
|
||||||
@@ -1573,17 +1574,18 @@ class TestRunConversation:
|
|||||||
mock_compress.assert_not_called() # no compression triggered
|
mock_compress.assert_not_called() # no compression triggered
|
||||||
assert result["completed"] is True
|
assert result["completed"] is True
|
||||||
assert result["final_response"] == "(empty)"
|
assert result["final_response"] == "(empty)"
|
||||||
assert result["api_calls"] == 1
|
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
|
||||||
|
|
||||||
def test_reasoning_only_response_accepted_without_retry(self, agent):
|
def test_reasoning_only_response_prefill_then_empty(self, agent):
|
||||||
"""Reasoning-only response should be accepted with (empty) content, no retries."""
|
"""Structured reasoning-only triggers prefill continuation (up to 2), then falls through to (empty)."""
|
||||||
self._setup_agent(agent)
|
self._setup_agent(agent)
|
||||||
empty_resp = _mock_response(
|
empty_resp = _mock_response(
|
||||||
content=None,
|
content=None,
|
||||||
finish_reason="stop",
|
finish_reason="stop",
|
||||||
reasoning_content="structured reasoning answer",
|
reasoning_content="structured reasoning answer",
|
||||||
)
|
)
|
||||||
agent.client.chat.completions.create.side_effect = [empty_resp]
|
# 3 responses: original + 2 prefill continuations, all reasoning-only
|
||||||
|
agent.client.chat.completions.create.side_effect = [empty_resp, empty_resp, empty_resp]
|
||||||
with (
|
with (
|
||||||
patch.object(agent, "_persist_session"),
|
patch.object(agent, "_persist_session"),
|
||||||
patch.object(agent, "_save_trajectory"),
|
patch.object(agent, "_save_trajectory"),
|
||||||
@@ -1592,7 +1594,35 @@ class TestRunConversation:
|
|||||||
result = agent.run_conversation("answer me")
|
result = agent.run_conversation("answer me")
|
||||||
assert result["completed"] is True
|
assert result["completed"] is True
|
||||||
assert result["final_response"] == "(empty)"
|
assert result["final_response"] == "(empty)"
|
||||||
assert result["api_calls"] == 1 # no retries
|
assert result["api_calls"] == 3 # 1 original + 2 prefill continuations
|
||||||
|
|
||||||
|
def test_reasoning_only_prefill_succeeds_on_continuation(self, agent):
|
||||||
|
"""When prefill continuation produces content, it becomes the final response."""
|
||||||
|
self._setup_agent(agent)
|
||||||
|
empty_resp = _mock_response(
|
||||||
|
content=None,
|
||||||
|
finish_reason="stop",
|
||||||
|
reasoning_content="structured reasoning answer",
|
||||||
|
)
|
||||||
|
content_resp = _mock_response(
|
||||||
|
content="Here is the actual answer.",
|
||||||
|
finish_reason="stop",
|
||||||
|
)
|
||||||
|
agent.client.chat.completions.create.side_effect = [empty_resp, content_resp]
|
||||||
|
with (
|
||||||
|
patch.object(agent, "_persist_session"),
|
||||||
|
patch.object(agent, "_save_trajectory"),
|
||||||
|
patch.object(agent, "_cleanup_task_resources"),
|
||||||
|
):
|
||||||
|
result = agent.run_conversation("answer me")
|
||||||
|
assert result["completed"] is True
|
||||||
|
assert result["final_response"] == "Here is the actual answer."
|
||||||
|
assert result["api_calls"] == 2 # 1 original + 1 prefill continuation
|
||||||
|
# Prefill message should be cleaned up — no consecutive assistant messages
|
||||||
|
roles = [m.get("role") for m in result["messages"]]
|
||||||
|
for i in range(len(roles) - 1):
|
||||||
|
if roles[i] == "assistant" and roles[i + 1] == "assistant":
|
||||||
|
raise AssertionError("Consecutive assistant messages found in history")
|
||||||
|
|
||||||
def test_truly_empty_response_accepted_without_retry(self, agent):
|
def test_truly_empty_response_accepted_without_retry(self, agent):
|
||||||
"""Truly empty response (no content, no reasoning) should still complete with (empty)."""
|
"""Truly empty response (no content, no reasoning) should still complete with (empty)."""
|
||||||
|
|||||||
Reference in New Issue
Block a user