diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index 06002efe18..7796be4ded 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -55,6 +55,7 @@ VALID_HOOKS: Set[str] = { "pre_tool_call", "post_tool_call", "transform_terminal_output", + "transform_tool_result", "pre_llm_call", "post_llm_call", "pre_api_request", diff --git a/model_tools.py b/model_tools.py index 0e8bc877e2..db4b46326b 100644 --- a/model_tools.py +++ b/model_tools.py @@ -550,6 +550,30 @@ def handle_function_call( except Exception: pass + # Generic tool-result canonicalization seam: plugins receive the + # final result string (JSON, usually) and may replace it by + # returning a string from transform_tool_result. Runs after + # post_tool_call (which stays observational) and before the result + # is appended back into conversation context. Fail-open; the first + # valid string return wins; non-string returns are ignored. + try: + from hermes_cli.plugins import invoke_hook + hook_results = invoke_hook( + "transform_tool_result", + tool_name=function_name, + args=function_args, + result=result, + task_id=task_id or "", + session_id=session_id or "", + tool_call_id=tool_call_id or "", + ) + for hook_result in hook_results: + if isinstance(hook_result, str): + result = hook_result + break + except Exception: + pass + return result except Exception as e: diff --git a/tests/hermes_cli/test_plugins.py b/tests/hermes_cli/test_plugins.py index a8f8c1b262..1286484d05 100644 --- a/tests/hermes_cli/test_plugins.py +++ b/tests/hermes_cli/test_plugins.py @@ -202,6 +202,7 @@ class TestPluginHooks: assert "pre_api_request" in VALID_HOOKS assert "post_api_request" in VALID_HOOKS assert "transform_terminal_output" in VALID_HOOKS + assert "transform_tool_result" in VALID_HOOKS def test_register_and_invoke_hook(self, tmp_path, monkeypatch): """Registered hooks are called on invoke_hook().""" diff --git a/tests/test_model_tools.py b/tests/test_model_tools.py index bb8a79ab0b..12654e350f 100644 --- a/tests/test_model_tools.py +++ b/tests/test_model_tools.py @@ -72,6 +72,15 @@ class TestHandleFunctionCall: session_id="session-1", tool_call_id="call-1", ), + call( + "transform_tool_result", + tool_name="web_search", + args={"q": "test"}, + result='{"ok":true}', + task_id="task-1", + session_id="session-1", + tool_call_id="call-1", + ), ] diff --git a/tests/test_transform_tool_result_hook.py b/tests/test_transform_tool_result_hook.py new file mode 100644 index 0000000000..159446fd57 --- /dev/null +++ b/tests/test_transform_tool_result_hook.py @@ -0,0 +1,183 @@ +"""Tests for the ``transform_tool_result`` plugin hook wired into +``model_tools.handle_function_call``. + +Mirrors the ``transform_terminal_output`` hook tests from Phase 1 but +targets the generic tool-result seam that runs for every tool dispatch. +""" + +import json +import os +from pathlib import Path +from unittest.mock import MagicMock + +import hermes_cli.plugins as plugins_mod +import model_tools + + +_UNSET = object() + + +def _run_handle_function_call( + monkeypatch, + *, + tool_name="dummy_tool", + tool_args=None, + dispatch_result='{"output": "original"}', + invoke_hook=_UNSET, +): + """Drive ``handle_function_call`` with a mocked registry dispatch.""" + from tools.registry import registry + + monkeypatch.setattr( + registry, "dispatch", + lambda name, args, **kw: dispatch_result, + ) + # Skip unrelated side effects (read-loop tracker). + monkeypatch.setattr(model_tools, "_READ_SEARCH_TOOLS", frozenset()) + + if invoke_hook is not _UNSET: + # Patch the symbol actually imported inside handle_function_call. + monkeypatch.setattr("hermes_cli.plugins.invoke_hook", invoke_hook) + + return model_tools.handle_function_call( + tool_name, + tool_args or {}, + task_id="t1", + session_id="s1", + tool_call_id="tc1", + skip_pre_tool_call_hook=True, + ) + + +def test_result_unchanged_when_no_hook_registered(monkeypatch): + # Real invoke_hook with no plugins loaded returns []. + monkeypatch.setenv("HERMES_HOME", "/tmp/hermes_no_plugins") + # Force a fresh plugin manager so no stale plugins pollute state. + plugins_mod._plugin_manager = plugins_mod.PluginManager() + + out = _run_handle_function_call(monkeypatch) + assert out == '{"output": "original"}' + + +def test_result_unchanged_for_none_hook_return(monkeypatch): + out = _run_handle_function_call( + monkeypatch, + invoke_hook=lambda hook_name, **kw: [None], + ) + assert out == '{"output": "original"}' + + +def test_result_ignores_non_string_hook_returns(monkeypatch): + out = _run_handle_function_call( + monkeypatch, + invoke_hook=lambda hook_name, **kw: [{"bad": True}, 123, ["nope"]], + ) + assert out == '{"output": "original"}' + + +def test_first_valid_string_return_replaces_result(monkeypatch): + out = _run_handle_function_call( + monkeypatch, + invoke_hook=lambda hook_name, **kw: [None, {"x": 1}, "first", "second"], + ) + assert out == "first" + + +def test_hook_receives_expected_kwargs(monkeypatch): + captured = {} + + def _hook(hook_name, **kwargs): + if hook_name == "transform_tool_result": + captured.update(kwargs) + return [] + + out = _run_handle_function_call( + monkeypatch, + tool_name="my_tool", + tool_args={"a": 1, "b": "x"}, + dispatch_result='{"ok": true}', + invoke_hook=_hook, + ) + assert out == '{"ok": true}' + assert captured["tool_name"] == "my_tool" + assert captured["args"] == {"a": 1, "b": "x"} + assert captured["result"] == '{"ok": true}' + assert captured["task_id"] == "t1" + assert captured["session_id"] == "s1" + assert captured["tool_call_id"] == "tc1" + + +def test_hook_exception_falls_back_to_original(monkeypatch): + def _raise(*_a, **_kw): + raise RuntimeError("boom") + + out = _run_handle_function_call( + monkeypatch, + invoke_hook=_raise, + ) + assert out == '{"output": "original"}' + + +def test_post_tool_call_remains_observational(monkeypatch): + """post_tool_call return values must NOT replace the result.""" + def _hook(hook_name, **kw): + if hook_name == "post_tool_call": + # Observers returning a string must be ignored. + return ["observer return should be ignored"] + return [] + + out = _run_handle_function_call( + monkeypatch, + invoke_hook=_hook, + ) + assert out == '{"output": "original"}' + + +def test_transform_tool_result_runs_after_post_tool_call(monkeypatch): + """post_tool_call sees ORIGINAL result; transform_tool_result sees same and may replace.""" + observed = [] + + def _hook(hook_name, **kw): + if hook_name == "post_tool_call": + observed.append(("post_tool_call", kw["result"])) + return [] + if hook_name == "transform_tool_result": + observed.append(("transform_tool_result", kw["result"])) + return ["rewritten"] + return [] + + out = _run_handle_function_call( + monkeypatch, + dispatch_result='{"raw": "value"}', + invoke_hook=_hook, + ) + assert out == "rewritten" + # Both hooks saw the ORIGINAL (untransformed) result. + assert observed == [ + ("post_tool_call", '{"raw": "value"}'), + ("transform_tool_result", '{"raw": "value"}'), + ] + + +def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path): + """End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results.""" + hermes_home = Path(os.environ["HERMES_HOME"]) + plugins_dir = hermes_home / "plugins" + plugin_dir = plugins_dir / "transform_result_canon" + plugin_dir.mkdir(parents=True) + (plugin_dir / "plugin.yaml").write_text("name: transform_result_canon\n", encoding="utf-8") + (plugin_dir / "__init__.py").write_text( + "def register(ctx):\n" + ' ctx.register_hook("transform_tool_result", ' + 'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n', + encoding="utf-8", + ) + + plugins_mod.discover_plugins() + + out = _run_handle_function_call( + monkeypatch, + tool_name="some_tool", + dispatch_result='{"payload": 42}', + ) + assert out == 'CANON[some_tool]{"payload": 42}' diff --git a/tests/tools/test_terminal_output_transform_hook.py b/tests/tools/test_terminal_output_transform_hook.py index 6eca4135ee..bdbdcc0f5d 100644 --- a/tests/tools/test_terminal_output_transform_hook.py +++ b/tests/tools/test_terminal_output_transform_hook.py @@ -115,6 +115,10 @@ def test_terminal_output_transform_still_truncates_long_replacement(monkeypatch, def test_terminal_output_transform_still_runs_strip_and_redact(monkeypatch, tmp_path): + # Ensure redaction is active regardless of host HERMES_REDACT_SECRETS state + # or collection-time import order (the module snapshots env at import). + monkeypatch.setattr("agent.redact._REDACT_ENABLED", True) + secret = "sk-proj-abc123def456ghi789jkl012mno345" result, _mock_env = _run_terminal( monkeypatch,