feat(hooks): add duration_ms to post_tool_call + transform_tool_result (#15429)

Plugin hooks fired after a tool dispatch now receive an integer duration_ms kwarg measuring how long the tool's registry.dispatch() call took (time.monotonic() before/after). Inspired by Claude Code 2.1.119 which added the same field to PostToolUse hook inputs. Wire points: - model_tools.py: measure dispatch latency, pass duration_ms to invoke_hook("post_tool_call", ...) and invoke_hook("transform_tool_result", ...) - hermes_cli/hooks.py: include duration_ms in the synthetic payload used by 'hermes hooks test' and 'hermes hooks doctor' so shell-hook authors see the same shape at development time as runtime - shell hooks (agent/shell_hooks.py): no code change needed; _serialize_payload already surfaces non-top-level kwargs under payload['extra'], so duration_ms lands at extra.duration_ms for shell-hook scripts Plugin authors can now build latency dashboards, per-tool SLO alerts, and regression canaries without having to wrap every tool manually. Test: tests/test_model_tools.py::test_post_tool_call_receives_non_negative_integer_duration_ms E2E: real PluginManager + dispatch monkey-patched with a 50ms sleep, hook callback observes duration_ms=50 (int). Refs: https://code.claude.com/docs/en/changelog (2.1.119, Apr 23 2026)
2026-04-28 06:51:16 +08:00 · 2026-04-25 22:13:12 -07:00
parent eb28145f36
commit 59b56d445c
5 changed files with 52 additions and 6 deletions
--- a/tests/test_model_tools.py
+++ b/tests/test_model_tools.py
@@ -1,7 +1,7 @@
 """Tests for model_tools.py — function call dispatch, agent-loop interception, legacy toolsets."""

 import json
-from unittest.mock import call, patch
+from unittest.mock import ANY, call, patch

 import pytest

@@ -71,6 +71,7 @@ class TestHandleFunctionCall:
                task_id="task-1",
                session_id="session-1",
                tool_call_id="call-1",
+                duration_ms=ANY,
            ),
            call(
                "transform_tool_result",
@@ -80,9 +81,37 @@ class TestHandleFunctionCall:
                task_id="task-1",
                session_id="session-1",
                tool_call_id="call-1",
+                duration_ms=ANY,
            ),
        ]

+    def test_post_tool_call_receives_non_negative_integer_duration_ms(self):
+        """Regression: post_tool_call and transform_tool_result hooks must
+        receive a non-negative integer ``duration_ms`` kwarg measuring
+        dispatch latency.  Inspired by Claude Code 2.1.119, which added
+        ``duration_ms`` to its PostToolUse hook inputs.
+        """
+        with (
+            patch("model_tools.registry.dispatch", return_value='{"ok":true}'),
+            patch("hermes_cli.plugins.invoke_hook") as mock_invoke_hook,
+        ):
+            handle_function_call("web_search", {"q": "test"}, task_id="t1")
+
+        kwargs_by_hook = {
+            c.args[0]: c.kwargs for c in mock_invoke_hook.call_args_list
+        }
+        assert "duration_ms" in kwargs_by_hook["post_tool_call"]
+        assert "duration_ms" in kwargs_by_hook["transform_tool_result"]
+
+        post_duration = kwargs_by_hook["post_tool_call"]["duration_ms"]
+        transform_duration = kwargs_by_hook["transform_tool_result"]["duration_ms"]
+        assert isinstance(post_duration, int)
+        assert post_duration >= 0
+        # Both hooks should observe the same measured duration.
+        assert post_duration == transform_duration
+        # pre_tool_call does NOT get duration_ms (nothing has run yet).
+        assert "duration_ms" not in kwargs_by_hook["pre_tool_call"]
+

 # =========================================================================
 # Agent loop tools