mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-05 10:17:17 +08:00
Compare commits
1 Commits
skill/gith
...
hermes/her
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0a668f2cb8 |
@@ -74,8 +74,11 @@ def _get_anthropic_max_output(model: str) -> int:
|
|||||||
model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
|
model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
|
||||||
resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5"
|
resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5"
|
||||||
matching before "claude-3-5-sonnet".
|
matching before "claude-3-5-sonnet".
|
||||||
|
|
||||||
|
Normalizes dots to hyphens so that model names like
|
||||||
|
``anthropic/claude-opus-4.6`` match the ``claude-opus-4-6`` table key.
|
||||||
"""
|
"""
|
||||||
m = model.lower()
|
m = model.lower().replace(".", "-")
|
||||||
best_key = ""
|
best_key = ""
|
||||||
best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
|
best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
|
||||||
for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
|
for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
|
||||||
|
|||||||
70
cli.py
70
cli.py
@@ -2308,17 +2308,59 @@ class HermesCLI:
|
|||||||
# Append to a pre-filter buffer first
|
# Append to a pre-filter buffer first
|
||||||
self._stream_prefilt = getattr(self, "_stream_prefilt", "") + text
|
self._stream_prefilt = getattr(self, "_stream_prefilt", "") + text
|
||||||
|
|
||||||
# Check if we're entering a reasoning block
|
# Check if we're entering a reasoning block.
|
||||||
|
# Only match tags that appear at a "block boundary": start of the
|
||||||
|
# stream, after a newline (with optional whitespace), or when nothing
|
||||||
|
# but whitespace has been emitted on the current line.
|
||||||
|
# This prevents false positives when models *mention* tags in prose
|
||||||
|
# like "(/think not producing <think> tags)".
|
||||||
|
#
|
||||||
|
# _stream_last_was_newline tracks whether the last character emitted
|
||||||
|
# (or the start of the stream) is a line boundary. It's True at
|
||||||
|
# stream start and set True whenever emitted text ends with '\n'.
|
||||||
|
if not hasattr(self, "_stream_last_was_newline"):
|
||||||
|
self._stream_last_was_newline = True # start of stream = boundary
|
||||||
|
|
||||||
if not getattr(self, "_in_reasoning_block", False):
|
if not getattr(self, "_in_reasoning_block", False):
|
||||||
for tag in _OPEN_TAGS:
|
for tag in _OPEN_TAGS:
|
||||||
idx = self._stream_prefilt.find(tag)
|
search_start = 0
|
||||||
if idx != -1:
|
while True:
|
||||||
# Emit everything before the tag
|
idx = self._stream_prefilt.find(tag, search_start)
|
||||||
before = self._stream_prefilt[:idx]
|
if idx == -1:
|
||||||
if before:
|
break
|
||||||
self._emit_stream_text(before)
|
# Check if this is a block boundary position
|
||||||
self._in_reasoning_block = True
|
preceding = self._stream_prefilt[:idx]
|
||||||
self._stream_prefilt = self._stream_prefilt[idx + len(tag):]
|
if idx == 0:
|
||||||
|
# At buffer start — only a boundary if we're at
|
||||||
|
# a line start (stream start or last emit ended
|
||||||
|
# with newline)
|
||||||
|
is_block_boundary = getattr(self, "_stream_last_was_newline", True)
|
||||||
|
else:
|
||||||
|
# Find last newline in the buffer before the tag
|
||||||
|
last_nl = preceding.rfind("\n")
|
||||||
|
if last_nl == -1:
|
||||||
|
# No newline in buffer — boundary only if
|
||||||
|
# last emit was a newline AND only whitespace
|
||||||
|
# has accumulated before the tag
|
||||||
|
is_block_boundary = (
|
||||||
|
getattr(self, "_stream_last_was_newline", True)
|
||||||
|
and preceding.strip() == ""
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# Text between last newline and tag must be
|
||||||
|
# whitespace-only
|
||||||
|
is_block_boundary = preceding[last_nl + 1:].strip() == ""
|
||||||
|
if is_block_boundary:
|
||||||
|
# Emit everything before the tag
|
||||||
|
if preceding:
|
||||||
|
self._emit_stream_text(preceding)
|
||||||
|
self._stream_last_was_newline = preceding.endswith("\n")
|
||||||
|
self._in_reasoning_block = True
|
||||||
|
self._stream_prefilt = self._stream_prefilt[idx + len(tag):]
|
||||||
|
break
|
||||||
|
# Not a block boundary — keep searching after this occurrence
|
||||||
|
search_start = idx + 1
|
||||||
|
if getattr(self, "_in_reasoning_block", False):
|
||||||
break
|
break
|
||||||
|
|
||||||
# Could also be a partial open tag at the end — hold it back
|
# Could also be a partial open tag at the end — hold it back
|
||||||
@@ -2332,6 +2374,7 @@ class HermesCLI:
|
|||||||
break
|
break
|
||||||
if safe:
|
if safe:
|
||||||
self._emit_stream_text(safe)
|
self._emit_stream_text(safe)
|
||||||
|
self._stream_last_was_newline = safe.endswith("\n")
|
||||||
self._stream_prefilt = self._stream_prefilt[len(safe):]
|
self._stream_prefilt = self._stream_prefilt[len(safe):]
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -2421,6 +2464,14 @@ class HermesCLI:
|
|||||||
|
|
||||||
def _flush_stream(self) -> None:
|
def _flush_stream(self) -> None:
|
||||||
"""Emit any remaining partial line from the stream buffer and close the box."""
|
"""Emit any remaining partial line from the stream buffer and close the box."""
|
||||||
|
# If we're still inside a "reasoning block" at end-of-stream, it was
|
||||||
|
# a false positive — the model mentioned a tag like <think> in prose
|
||||||
|
# but never closed it. Recover the buffered content as regular text.
|
||||||
|
if getattr(self, "_in_reasoning_block", False) and getattr(self, "_stream_prefilt", ""):
|
||||||
|
self._in_reasoning_block = False
|
||||||
|
self._emit_stream_text(self._stream_prefilt)
|
||||||
|
self._stream_prefilt = ""
|
||||||
|
|
||||||
# Close reasoning box if still open (in case no content tokens arrived)
|
# Close reasoning box if still open (in case no content tokens arrived)
|
||||||
self._close_reasoning_box()
|
self._close_reasoning_box()
|
||||||
|
|
||||||
@@ -2443,6 +2494,7 @@ class HermesCLI:
|
|||||||
self._stream_text_ansi = ""
|
self._stream_text_ansi = ""
|
||||||
self._stream_prefilt = ""
|
self._stream_prefilt = ""
|
||||||
self._in_reasoning_block = False
|
self._in_reasoning_block = False
|
||||||
|
self._stream_last_was_newline = True
|
||||||
self._reasoning_box_opened = False
|
self._reasoning_box_opened = False
|
||||||
self._reasoning_buf = ""
|
self._reasoning_buf = ""
|
||||||
self._reasoning_preview_buf = ""
|
self._reasoning_preview_buf = ""
|
||||||
|
|||||||
23
run_agent.py
23
run_agent.py
@@ -5610,20 +5610,20 @@ class AIAgent:
|
|||||||
if self.max_tokens is not None:
|
if self.max_tokens is not None:
|
||||||
if not self._is_qwen_portal():
|
if not self._is_qwen_portal():
|
||||||
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
api_kwargs.update(self._max_tokens_param(self.max_tokens))
|
||||||
elif self._is_openrouter_url() and "claude" in (self.model or "").lower():
|
elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
|
||||||
# OpenRouter translates requests to Anthropic's Messages API,
|
# OpenRouter and Nous Portal translate requests to Anthropic's
|
||||||
# which requires max_tokens as a mandatory field. When we omit
|
# Messages API, which requires max_tokens as a mandatory field.
|
||||||
# it, OpenRouter picks a default that can be too low — the model
|
# When we omit it, the proxy picks a default that can be too
|
||||||
# spends its output budget on thinking and has almost nothing
|
# low — the model spends its output budget on thinking and has
|
||||||
# left for the actual response (especially large tool calls like
|
# almost nothing left for the actual response (especially large
|
||||||
# write_file). Sending the model's real output limit ensures
|
# tool calls like write_file). Sending the model's real output
|
||||||
# full capacity. Other providers handle the default fine.
|
# limit ensures full capacity.
|
||||||
try:
|
try:
|
||||||
from agent.anthropic_adapter import _get_anthropic_max_output
|
from agent.anthropic_adapter import _get_anthropic_max_output
|
||||||
_model_output_limit = _get_anthropic_max_output(self.model)
|
_model_output_limit = _get_anthropic_max_output(self.model)
|
||||||
api_kwargs["max_tokens"] = _model_output_limit
|
api_kwargs["max_tokens"] = _model_output_limit
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # fail open — let OpenRouter pick its default
|
pass # fail open — let the proxy pick its default
|
||||||
|
|
||||||
extra_body = {}
|
extra_body = {}
|
||||||
|
|
||||||
@@ -9116,6 +9116,11 @@ class AIAgent:
|
|||||||
|
|
||||||
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
self._execute_tool_calls(assistant_message, messages, effective_task_id, api_call_count)
|
||||||
|
|
||||||
|
# Reset per-turn retry counters after successful tool
|
||||||
|
# execution so a single truncation doesn't poison the
|
||||||
|
# entire conversation.
|
||||||
|
truncated_tool_call_retries = 0
|
||||||
|
|
||||||
# Signal that a paragraph break is needed before the next
|
# Signal that a paragraph break is needed before the next
|
||||||
# streamed text. We don't emit it immediately because
|
# streamed text. We don't emit it immediately because
|
||||||
# multiple consecutive tool iterations would stack up
|
# multiple consecutive tool iterations would stack up
|
||||||
|
|||||||
138
tests/cli/test_stream_delta_think_tag.py
Normal file
138
tests/cli/test_stream_delta_think_tag.py
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
"""Tests for _stream_delta's handling of <think> tags in prose vs real reasoning blocks."""
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", ".."))
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def _make_cli_stub():
|
||||||
|
"""Create a minimal HermesCLI-like object with stream state."""
|
||||||
|
from cli import HermesCLI
|
||||||
|
|
||||||
|
cli = HermesCLI.__new__(HermesCLI)
|
||||||
|
cli.show_reasoning = False
|
||||||
|
cli._stream_buf = ""
|
||||||
|
cli._stream_started = False
|
||||||
|
cli._stream_box_opened = False
|
||||||
|
cli._stream_prefilt = ""
|
||||||
|
cli._in_reasoning_block = False
|
||||||
|
cli._reasoning_stream_started = False
|
||||||
|
cli._reasoning_box_opened = False
|
||||||
|
cli._reasoning_buf = ""
|
||||||
|
cli._reasoning_preview_buf = ""
|
||||||
|
cli._deferred_content = ""
|
||||||
|
cli._stream_text_ansi = ""
|
||||||
|
cli._stream_needs_break = False
|
||||||
|
cli._emitted = []
|
||||||
|
|
||||||
|
# Mock _emit_stream_text to capture output
|
||||||
|
def mock_emit(text):
|
||||||
|
cli._emitted.append(text)
|
||||||
|
cli._emit_stream_text = mock_emit
|
||||||
|
|
||||||
|
# Mock _stream_reasoning_delta
|
||||||
|
cli._reasoning_emitted = []
|
||||||
|
def mock_reasoning(text):
|
||||||
|
cli._reasoning_emitted.append(text)
|
||||||
|
cli._stream_reasoning_delta = mock_reasoning
|
||||||
|
|
||||||
|
return cli
|
||||||
|
|
||||||
|
|
||||||
|
class TestThinkTagInProse:
|
||||||
|
"""<think> mentioned in prose should NOT trigger reasoning suppression."""
|
||||||
|
|
||||||
|
def test_think_tag_mid_sentence(self):
|
||||||
|
"""'(/think not producing <think> tags)' should pass through."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
tokens = [
|
||||||
|
" 1. Fix reasoning mode in eval ",
|
||||||
|
"(/think not producing ",
|
||||||
|
"<think>",
|
||||||
|
" tags — ~2% gap)",
|
||||||
|
"\n 2. Launch production",
|
||||||
|
]
|
||||||
|
for t in tokens:
|
||||||
|
cli._stream_delta(t)
|
||||||
|
assert not cli._in_reasoning_block, "<think> in prose should not enter reasoning block"
|
||||||
|
full = "".join(cli._emitted)
|
||||||
|
assert "<think>" in full, "The literal <think> tag should be in the emitted text"
|
||||||
|
assert "Launch production" in full
|
||||||
|
|
||||||
|
def test_think_tag_after_text_on_same_line(self):
|
||||||
|
"""'some text <think>' should NOT trigger reasoning."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
cli._stream_delta("Here is the <think> tag explanation")
|
||||||
|
assert not cli._in_reasoning_block
|
||||||
|
full = "".join(cli._emitted)
|
||||||
|
assert "<think>" in full
|
||||||
|
|
||||||
|
def test_think_tag_in_backticks(self):
|
||||||
|
"""'`<think>`' should NOT trigger reasoning."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
cli._stream_delta("Use the `<think>` tag for reasoning")
|
||||||
|
assert not cli._in_reasoning_block
|
||||||
|
|
||||||
|
|
||||||
|
class TestRealReasoningBlock:
|
||||||
|
"""Real <think> tags at block boundaries should still be caught."""
|
||||||
|
|
||||||
|
def test_think_at_start_of_stream(self):
|
||||||
|
"""'<think>reasoning</think>answer' should suppress reasoning."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
cli._stream_delta("<think>")
|
||||||
|
assert cli._in_reasoning_block
|
||||||
|
cli._stream_delta("I need to analyze this")
|
||||||
|
cli._stream_delta("</think>")
|
||||||
|
assert not cli._in_reasoning_block
|
||||||
|
cli._stream_delta("Here is my answer")
|
||||||
|
full = "".join(cli._emitted)
|
||||||
|
assert "Here is my answer" in full
|
||||||
|
assert "I need to analyze" not in full # reasoning was suppressed
|
||||||
|
|
||||||
|
def test_think_after_newline(self):
|
||||||
|
"""'text\\n<think>' should trigger reasoning block."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
cli._stream_delta("Some preamble\n<think>")
|
||||||
|
assert cli._in_reasoning_block
|
||||||
|
full = "".join(cli._emitted)
|
||||||
|
assert "Some preamble" in full
|
||||||
|
|
||||||
|
def test_think_after_newline_with_whitespace(self):
|
||||||
|
"""'text\\n <think>' should trigger reasoning block."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
cli._stream_delta("Some preamble\n <think>")
|
||||||
|
assert cli._in_reasoning_block
|
||||||
|
|
||||||
|
def test_think_with_only_whitespace_before(self):
|
||||||
|
"""' <think>' (whitespace only prefix) should trigger."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
cli._stream_delta(" <think>")
|
||||||
|
assert cli._in_reasoning_block
|
||||||
|
|
||||||
|
|
||||||
|
class TestFlushRecovery:
|
||||||
|
"""_flush_stream should recover content from false-positive reasoning blocks."""
|
||||||
|
|
||||||
|
def test_flush_recovers_buffered_content(self):
|
||||||
|
"""If somehow in reasoning block at flush, content is recovered."""
|
||||||
|
cli = _make_cli_stub()
|
||||||
|
# Manually set up a false-positive state
|
||||||
|
cli._in_reasoning_block = True
|
||||||
|
cli._stream_prefilt = " tags — ~2% gap)\n 2. Launch production"
|
||||||
|
cli._stream_box_opened = True
|
||||||
|
|
||||||
|
# Mock _close_reasoning_box and box closing
|
||||||
|
cli._close_reasoning_box = lambda: None
|
||||||
|
|
||||||
|
# Call flush
|
||||||
|
from unittest.mock import patch
|
||||||
|
import shutil
|
||||||
|
with patch.object(shutil, "get_terminal_size", return_value=os.terminal_size((80, 24))):
|
||||||
|
with patch("cli._cprint"):
|
||||||
|
cli._flush_stream()
|
||||||
|
|
||||||
|
assert not cli._in_reasoning_block
|
||||||
|
full = "".join(cli._emitted)
|
||||||
|
assert "Launch production" in full
|
||||||
Reference in New Issue
Block a user