mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix: count actual tool calls instead of tool-related messages
tool_call_count was inaccurate in two ways: 1. Under-counting: an assistant message with N parallel tool calls (e.g. "kill the light and shut off the fan" = 2 ha_call_service) only incremented tool_call_count by 1 instead of N. 2. Over-counting: tool response messages (role=tool) also incremented tool_call_count, double-counting every tool interaction. Combined: 2 parallel tool calls produced tool_call_count=3 (1 from assistant + 2 from tool responses) instead of the correct value of 2. Fix: only count from assistant messages with tool_calls, incrementing by len(tool_calls) to handle parallel calls correctly. Tool response messages no longer affect tool_call_count. This impacts /insights and /usage accuracy for sessions with tool use.
This commit is contained in:
@@ -94,13 +94,50 @@ class TestMessageStorage:
|
||||
session = db.get_session("s1")
|
||||
assert session["message_count"] == 2
|
||||
|
||||
def test_tool_message_increments_tool_count(self, db):
|
||||
def test_tool_response_does_not_increment_tool_count(self, db):
|
||||
"""Tool responses (role=tool) should not increment tool_call_count.
|
||||
|
||||
Only assistant messages with tool_calls should count.
|
||||
"""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
db.append_message("s1", role="tool", content="result", tool_name="web_search")
|
||||
|
||||
session = db.get_session("s1")
|
||||
assert session["tool_call_count"] == 0
|
||||
|
||||
def test_assistant_tool_calls_increment_by_count(self, db):
|
||||
"""An assistant message with N tool_calls should increment by N."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
tool_calls = [
|
||||
{"id": "call_1", "function": {"name": "web_search", "arguments": "{}"}},
|
||||
]
|
||||
db.append_message("s1", role="assistant", content="", tool_calls=tool_calls)
|
||||
|
||||
session = db.get_session("s1")
|
||||
assert session["tool_call_count"] == 1
|
||||
|
||||
def test_tool_call_count_matches_actual_calls(self, db):
|
||||
"""tool_call_count should equal the number of tool calls made, not messages."""
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
|
||||
# Assistant makes 2 parallel tool calls in one message
|
||||
tool_calls = [
|
||||
{"id": "call_1", "function": {"name": "ha_call_service", "arguments": "{}"}},
|
||||
{"id": "call_2", "function": {"name": "ha_call_service", "arguments": "{}"}},
|
||||
]
|
||||
db.append_message("s1", role="assistant", content="", tool_calls=tool_calls)
|
||||
|
||||
# Two tool responses come back
|
||||
db.append_message("s1", role="tool", content="ok", tool_name="ha_call_service")
|
||||
db.append_message("s1", role="tool", content="ok", tool_name="ha_call_service")
|
||||
|
||||
session = db.get_session("s1")
|
||||
# Should be 2 (the actual number of tool calls), not 3
|
||||
assert session["tool_call_count"] == 2, (
|
||||
f"Expected 2 tool calls but got {session['tool_call_count']}. "
|
||||
"tool responses are double-counted and multi-call messages are under-counted"
|
||||
)
|
||||
|
||||
def test_tool_calls_serialization(self, db):
|
||||
db.create_session(session_id="s1", source="cli")
|
||||
tool_calls = [{"id": "call_1", "function": {"name": "web_search", "arguments": "{}"}}]
|
||||
|
||||
Reference in New Issue
Block a user