mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat(hooks): add duration_ms to post_tool_call + transform_tool_result (#15429)
Plugin hooks fired after a tool dispatch now receive an integer
duration_ms kwarg measuring how long the tool's registry.dispatch()
call took (time.monotonic() before/after). Inspired by Claude Code
2.1.119 which added the same field to PostToolUse hook inputs.
Wire points:
- model_tools.py: measure dispatch latency, pass duration_ms to
invoke_hook("post_tool_call", ...) and invoke_hook("transform_tool_result", ...)
- hermes_cli/hooks.py: include duration_ms in the synthetic payload
used by 'hermes hooks test' and 'hermes hooks doctor' so shell-hook
authors see the same shape at development time as runtime
- shell hooks (agent/shell_hooks.py): no code change needed;
_serialize_payload already surfaces non-top-level kwargs under
payload['extra'], so duration_ms lands at extra.duration_ms for
shell-hook scripts
Plugin authors can now build latency dashboards, per-tool SLO alerts,
and regression canaries without having to wrap every tool manually.
Test: tests/test_model_tools.py::test_post_tool_call_receives_non_negative_integer_duration_ms
E2E: real PluginManager + dispatch monkey-patched with a 50ms sleep,
hook callback observes duration_ms=50 (int).
Refs: https://code.claude.com/docs/en/changelog (2.1.119, Apr 23 2026)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
"""Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets."""
|
||||
|
||||
import json
|
||||
from unittest.mock import call, patch
|
||||
from unittest.mock import ANY, call, patch
|
||||
|
||||
import pytest
|
||||
|
||||
@@ -71,6 +71,7 @@ class TestHandleFunctionCall:
|
||||
task_id="task-1",
|
||||
session_id="session-1",
|
||||
tool_call_id="call-1",
|
||||
duration_ms=ANY,
|
||||
),
|
||||
call(
|
||||
"transform_tool_result",
|
||||
@@ -80,9 +81,37 @@ class TestHandleFunctionCall:
|
||||
task_id="task-1",
|
||||
session_id="session-1",
|
||||
tool_call_id="call-1",
|
||||
duration_ms=ANY,
|
||||
),
|
||||
]
|
||||
|
||||
def test_post_tool_call_receives_non_negative_integer_duration_ms(self):
|
||||
"""Regression: post_tool_call and transform_tool_result hooks must
|
||||
receive a non-negative integer ``duration_ms`` kwarg measuring
|
||||
dispatch latency. Inspired by Claude Code 2.1.119, which added
|
||||
``duration_ms`` to its PostToolUse hook inputs.
|
||||
"""
|
||||
with (
|
||||
patch("model_tools.registry.dispatch", return_value='{"ok":true}'),
|
||||
patch("hermes_cli.plugins.invoke_hook") as mock_invoke_hook,
|
||||
):
|
||||
handle_function_call("web_search", {"q": "test"}, task_id="t1")
|
||||
|
||||
kwargs_by_hook = {
|
||||
c.args[0]: c.kwargs for c in mock_invoke_hook.call_args_list
|
||||
}
|
||||
assert "duration_ms" in kwargs_by_hook["post_tool_call"]
|
||||
assert "duration_ms" in kwargs_by_hook["transform_tool_result"]
|
||||
|
||||
post_duration = kwargs_by_hook["post_tool_call"]["duration_ms"]
|
||||
transform_duration = kwargs_by_hook["transform_tool_result"]["duration_ms"]
|
||||
assert isinstance(post_duration, int)
|
||||
assert post_duration >= 0
|
||||
# Both hooks should observe the same measured duration.
|
||||
assert post_duration == transform_duration
|
||||
# pre_tool_call does NOT get duration_ms (nothing has run yet).
|
||||
assert "duration_ms" not in kwargs_by_hook["pre_tool_call"]
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Agent loop tools
|
||||
|
||||
Reference in New Issue
Block a user