feat(plugins): add transform_tool_result hook for generic tool-result rewriting (#12972)

Closes #8933 more fully, extending the per-tool transform_terminal_output hook from #12929 to a generic seam that fires after every tool dispatch. Plugins can rewrite any tool's result string (normalize formats, redact fields, summarize verbose output) without wrapping individual tools. Changes - hermes_cli/plugins.py: add "transform_tool_result" to VALID_HOOKS - model_tools.py: invoke the hook in handle_function_call after post_tool_call (which remains observational); first valid str return replaces the result; fail-open - tests/test_transform_tool_result_hook.py: 9 new tests covering no-op, None return, non-string return, first-match wins, kwargs, hook exception fallback, post_tool_call observation invariant, ordering vs post_tool_call, and an end-to-end real-plugin integration - tests/hermes_cli/test_plugins.py: assert new hook in VALID_HOOKS - tests/test_model_tools.py: extend the hook-call-sequence assertion to include the new hook Design - transform_tool_result runs AFTER post_tool_call so observers always see the original (untransformed) result. This keeps post_tool_call's observational contract. - transform_terminal_output (from #12929) still runs earlier, inside terminal_tool, so plugins can canonicalize BEFORE the 50k truncation drops middle content. Both hooks coexist; they target different layers.
2026-04-28 06:51:16 +08:00 · 2026-04-20 03:48:08 -07:00
parent 9f22977fc0
commit 04068c5891
6 changed files with 222 additions and 0 deletions
--- a/hermes_cli/plugins.py
+++ b/hermes_cli/plugins.py
@@ -55,6 +55,7 @@ VALID_HOOKS: Set[str] = {
    "pre_tool_call",
    "post_tool_call",
    "transform_terminal_output",
+    "transform_tool_result",
    "pre_llm_call",
    "post_llm_call",
    "pre_api_request",
--- a/model_tools.py
+++ b/model_tools.py
@@ -550,6 +550,30 @@ def handle_function_call(
        except Exception:
            pass

+        # Generic tool-result canonicalization seam: plugins receive the
+        # final result string (JSON, usually) and may replace it by
+        # returning a string from transform_tool_result. Runs after
+        # post_tool_call (which stays observational) and before the result
+        # is appended back into conversation context. Fail-open; the first
+        # valid string return wins; non-string returns are ignored.
+        try:
+            from hermes_cli.plugins import invoke_hook
+            hook_results = invoke_hook(
+                "transform_tool_result",
+                tool_name=function_name,
+                args=function_args,
+                result=result,
+                task_id=task_id or "",
+                session_id=session_id or "",
+                tool_call_id=tool_call_id or "",
+            )
+            for hook_result in hook_results:
+                if isinstance(hook_result, str):
+                    result = hook_result
+                    break
+        except Exception:
+            pass
+
        return result

    except Exception as e:
--- a/tests/hermes_cli/test_plugins.py
+++ b/tests/hermes_cli/test_plugins.py
@@ -202,6 +202,7 @@ class TestPluginHooks:
        assert "pre_api_request" in VALID_HOOKS
        assert "post_api_request" in VALID_HOOKS
        assert "transform_terminal_output" in VALID_HOOKS
+        assert "transform_tool_result" in VALID_HOOKS

    def test_register_and_invoke_hook(self, tmp_path, monkeypatch):
        """Registered hooks are called on invoke_hook()."""
--- a/tests/test_model_tools.py
+++ b/tests/test_model_tools.py
@@ -72,6 +72,15 @@ class TestHandleFunctionCall:
                session_id="session-1",
                tool_call_id="call-1",
            ),
+            call(
+                "transform_tool_result",
+                tool_name="web_search",
+                args={"q": "test"},
+                result='{"ok":true}',
+                task_id="task-1",
+                session_id="session-1",
+                tool_call_id="call-1",
+            ),
        ]


--- a/tests/test_transform_tool_result_hook.py
+++ b/tests/test_transform_tool_result_hook.py
@@ -0,0 +1,183 @@
+"""Tests for the ``transform_tool_result`` plugin hook wired into
+``model_tools.handle_function_call``.
+
+Mirrors the ``transform_terminal_output`` hook tests from Phase 1 but
+targets the generic tool-result seam that runs for every tool dispatch.
+"""
+
+import json
+import os
+from pathlib import Path
+from unittest.mock import MagicMock
+
+import hermes_cli.plugins as plugins_mod
+import model_tools
+
+
+_UNSET = object()
+
+
+def _run_handle_function_call(
+    monkeypatch,
+    *,
+    tool_name="dummy_tool",
+    tool_args=None,
+    dispatch_result='{"output": "original"}',
+    invoke_hook=_UNSET,
+):
+    """Drive ``handle_function_call`` with a mocked registry dispatch."""
+    from tools.registry import registry
+
+    monkeypatch.setattr(
+        registry, "dispatch",
+        lambda name, args, **kw: dispatch_result,
+    )
+    # Skip unrelated side effects (read-loop tracker).
+    monkeypatch.setattr(model_tools, "_READ_SEARCH_TOOLS", frozenset())
+
+    if invoke_hook is not _UNSET:
+        # Patch the symbol actually imported inside handle_function_call.
+        monkeypatch.setattr("hermes_cli.plugins.invoke_hook", invoke_hook)
+
+    return model_tools.handle_function_call(
+        tool_name,
+        tool_args or {},
+        task_id="t1",
+        session_id="s1",
+        tool_call_id="tc1",
+        skip_pre_tool_call_hook=True,
+    )
+
+
+def test_result_unchanged_when_no_hook_registered(monkeypatch):
+    # Real invoke_hook with no plugins loaded returns [].
+    monkeypatch.setenv("HERMES_HOME", "/tmp/hermes_no_plugins")
+    # Force a fresh plugin manager so no stale plugins pollute state.
+    plugins_mod._plugin_manager = plugins_mod.PluginManager()
+
+    out = _run_handle_function_call(monkeypatch)
+    assert out == '{"output": "original"}'
+
+
+def test_result_unchanged_for_none_hook_return(monkeypatch):
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=lambda hook_name, **kw: [None],
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_result_ignores_non_string_hook_returns(monkeypatch):
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=lambda hook_name, **kw: [{"bad": True}, 123, ["nope"]],
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_first_valid_string_return_replaces_result(monkeypatch):
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=lambda hook_name, **kw: [None, {"x": 1}, "first", "second"],
+    )
+    assert out == "first"
+
+
+def test_hook_receives_expected_kwargs(monkeypatch):
+    captured = {}
+
+    def _hook(hook_name, **kwargs):
+        if hook_name == "transform_tool_result":
+            captured.update(kwargs)
+        return []
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        tool_name="my_tool",
+        tool_args={"a": 1, "b": "x"},
+        dispatch_result='{"ok": true}',
+        invoke_hook=_hook,
+    )
+    assert out == '{"ok": true}'
+    assert captured["tool_name"] == "my_tool"
+    assert captured["args"] == {"a": 1, "b": "x"}
+    assert captured["result"] == '{"ok": true}'
+    assert captured["task_id"] == "t1"
+    assert captured["session_id"] == "s1"
+    assert captured["tool_call_id"] == "tc1"
+
+
+def test_hook_exception_falls_back_to_original(monkeypatch):
+    def _raise(*_a, **_kw):
+        raise RuntimeError("boom")
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=_raise,
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_post_tool_call_remains_observational(monkeypatch):
+    """post_tool_call return values must NOT replace the result."""
+    def _hook(hook_name, **kw):
+        if hook_name == "post_tool_call":
+            # Observers returning a string must be ignored.
+            return ["observer return should be ignored"]
+        return []
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        invoke_hook=_hook,
+    )
+    assert out == '{"output": "original"}'
+
+
+def test_transform_tool_result_runs_after_post_tool_call(monkeypatch):
+    """post_tool_call sees ORIGINAL result; transform_tool_result sees same and may replace."""
+    observed = []
+
+    def _hook(hook_name, **kw):
+        if hook_name == "post_tool_call":
+            observed.append(("post_tool_call", kw["result"]))
+            return []
+        if hook_name == "transform_tool_result":
+            observed.append(("transform_tool_result", kw["result"]))
+            return ["rewritten"]
+        return []
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        dispatch_result='{"raw": "value"}',
+        invoke_hook=_hook,
+    )
+    assert out == "rewritten"
+    # Both hooks saw the ORIGINAL (untransformed) result.
+    assert observed == [
+        ("post_tool_call", '{"raw": "value"}'),
+        ("transform_tool_result", '{"raw": "value"}'),
+    ]
+
+
+def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path):
+    """End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results."""
+    hermes_home = Path(os.environ["HERMES_HOME"])
+    plugins_dir = hermes_home / "plugins"
+    plugin_dir = plugins_dir / "transform_result_canon"
+    plugin_dir.mkdir(parents=True)
+    (plugin_dir / "plugin.yaml").write_text("name: transform_result_canon\n", encoding="utf-8")
+    (plugin_dir / "__init__.py").write_text(
+        "def register(ctx):\n"
+        '    ctx.register_hook("transform_tool_result", '
+        'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n',
+        encoding="utf-8",
+    )
+
+    plugins_mod.discover_plugins()
+
+    out = _run_handle_function_call(
+        monkeypatch,
+        tool_name="some_tool",
+        dispatch_result='{"payload": 42}',
+    )
+    assert out == 'CANON[some_tool]{"payload": 42}'
--- a/tests/tools/test_terminal_output_transform_hook.py
+++ b/tests/tools/test_terminal_output_transform_hook.py
@@ -115,6 +115,10 @@ def test_terminal_output_transform_still_truncates_long_replacement(monkeypatch,


 def test_terminal_output_transform_still_runs_strip_and_redact(monkeypatch, tmp_path):
+    # Ensure redaction is active regardless of host HERMES_REDACT_SECRETS state
+    # or collection-time import order (the module snapshots env at import).
+    monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
+
    secret = "sk-proj-abc123def456ghi789jkl012mno345"
    result, _mock_env = _run_terminal(
        monkeypatch,