mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
feat: shell hooks — wire shell scripts as Hermes hook callbacks
Users can declare shell scripts in config.yaml under a hooks: block that fire on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call, subagent_stop, etc). Scripts receive JSON on stdin, can return JSON on stdout to block tool calls or inject context pre-LLM. Key design: - Registers closures on existing PluginManager._hooks dict — zero changes to invoke_hook() call sites - subprocess.run(shell=False) via shlex.split — no shell injection - First-use consent per (event, command) pair, persisted to allowlist JSON - Bypass via --accept-hooks, HERMES_ACCEPT_HOOKS=1, or hooks_auto_accept - hermes hooks list/test/revoke/doctor CLI subcommands - Adds subagent_stop hook event fired after delegate_task children exit - Claude Code compatible response shapes accepted Cherry-picked from PR #13143 by @pefontana.
This commit is contained in:
716
tests/agent/test_shell_hooks.py
Normal file
716
tests/agent/test_shell_hooks.py
Normal file
@@ -0,0 +1,716 @@
|
||||
"""Tests for the shell-hooks subprocess bridge (agent.shell_hooks).
|
||||
|
||||
These tests focus on the pure translation layer — JSON serialisation,
|
||||
JSON parsing, matcher behaviour, block-schema correctness, and the
|
||||
subprocess runner's graceful error handling. Consent prompts are
|
||||
covered in ``test_shell_hooks_consent.py``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import stat
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
import pytest
|
||||
|
||||
from agent import shell_hooks
|
||||
|
||||
|
||||
# ── helpers ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _write_script(tmp_path: Path, name: str, body: str) -> Path:
|
||||
path = tmp_path / name
|
||||
path.write_text(body)
|
||||
path.chmod(0o755)
|
||||
return path
|
||||
|
||||
|
||||
def _allowlist_pair(monkeypatch, tmp_path, event: str, command: str) -> None:
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_home"))
|
||||
shell_hooks._record_approval(event, command)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_registration_state():
|
||||
shell_hooks.reset_for_tests()
|
||||
yield
|
||||
shell_hooks.reset_for_tests()
|
||||
|
||||
|
||||
# ── _parse_response ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestParseResponse:
|
||||
def test_block_claude_code_style(self):
|
||||
r = shell_hooks._parse_response(
|
||||
"pre_tool_call",
|
||||
'{"decision": "block", "reason": "nope"}',
|
||||
)
|
||||
assert r == {"action": "block", "message": "nope"}
|
||||
|
||||
def test_block_canonical_style(self):
|
||||
r = shell_hooks._parse_response(
|
||||
"pre_tool_call",
|
||||
'{"action": "block", "message": "nope"}',
|
||||
)
|
||||
assert r == {"action": "block", "message": "nope"}
|
||||
|
||||
def test_block_canonical_wins_over_claude_style(self):
|
||||
r = shell_hooks._parse_response(
|
||||
"pre_tool_call",
|
||||
'{"action": "block", "message": "canonical", '
|
||||
'"decision": "block", "reason": "claude"}',
|
||||
)
|
||||
assert r == {"action": "block", "message": "canonical"}
|
||||
|
||||
def test_empty_stdout_returns_none(self):
|
||||
assert shell_hooks._parse_response("pre_tool_call", "") is None
|
||||
assert shell_hooks._parse_response("pre_tool_call", " ") is None
|
||||
|
||||
def test_invalid_json_returns_none(self):
|
||||
assert shell_hooks._parse_response("pre_tool_call", "not json") is None
|
||||
|
||||
def test_non_dict_json_returns_none(self):
|
||||
assert shell_hooks._parse_response("pre_tool_call", "[1, 2]") is None
|
||||
|
||||
def test_non_block_pre_tool_call_returns_none(self):
|
||||
r = shell_hooks._parse_response("pre_tool_call", '{"decision": "allow"}')
|
||||
assert r is None
|
||||
|
||||
def test_pre_llm_call_context_passthrough(self):
|
||||
r = shell_hooks._parse_response(
|
||||
"pre_llm_call", '{"context": "today is Friday"}',
|
||||
)
|
||||
assert r == {"context": "today is Friday"}
|
||||
|
||||
def test_subagent_stop_context_passthrough(self):
|
||||
r = shell_hooks._parse_response(
|
||||
"subagent_stop", '{"context": "child role=leaf"}',
|
||||
)
|
||||
assert r == {"context": "child role=leaf"}
|
||||
|
||||
def test_pre_llm_call_block_ignored(self):
|
||||
"""Only pre_tool_call honors block directives."""
|
||||
r = shell_hooks._parse_response(
|
||||
"pre_llm_call", '{"decision": "block", "reason": "no"}',
|
||||
)
|
||||
assert r is None
|
||||
|
||||
|
||||
# ── _serialize_payload ────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSerializePayload:
|
||||
def test_basic_pre_tool_call_schema(self):
|
||||
raw = shell_hooks._serialize_payload(
|
||||
"pre_tool_call",
|
||||
{
|
||||
"tool_name": "terminal",
|
||||
"args": {"command": "ls"},
|
||||
"session_id": "sess-1",
|
||||
"task_id": "t-1",
|
||||
"tool_call_id": "c-1",
|
||||
},
|
||||
)
|
||||
payload = json.loads(raw)
|
||||
assert payload["hook_event_name"] == "pre_tool_call"
|
||||
assert payload["tool_name"] == "terminal"
|
||||
assert payload["tool_input"] == {"command": "ls"}
|
||||
assert payload["session_id"] == "sess-1"
|
||||
assert "cwd" in payload
|
||||
# task_id / tool_call_id end up under extra
|
||||
assert payload["extra"]["task_id"] == "t-1"
|
||||
assert payload["extra"]["tool_call_id"] == "c-1"
|
||||
|
||||
def test_args_not_dict_becomes_null(self):
|
||||
raw = shell_hooks._serialize_payload(
|
||||
"pre_tool_call", {"args": ["not", "a", "dict"]},
|
||||
)
|
||||
payload = json.loads(raw)
|
||||
assert payload["tool_input"] is None
|
||||
|
||||
def test_parent_session_id_used_when_no_session_id(self):
|
||||
raw = shell_hooks._serialize_payload(
|
||||
"subagent_stop", {"parent_session_id": "p-1"},
|
||||
)
|
||||
payload = json.loads(raw)
|
||||
assert payload["session_id"] == "p-1"
|
||||
|
||||
def test_unserialisable_extras_stringified(self):
|
||||
class Weird:
|
||||
def __repr__(self) -> str:
|
||||
return "<weird>"
|
||||
|
||||
raw = shell_hooks._serialize_payload(
|
||||
"on_session_start", {"obj": Weird()},
|
||||
)
|
||||
payload = json.loads(raw)
|
||||
assert payload["extra"]["obj"] == "<weird>"
|
||||
|
||||
|
||||
# ── Matcher behaviour ─────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestMatcher:
|
||||
def test_no_matcher_fires_for_any_tool(self):
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher=None,
|
||||
)
|
||||
assert spec.matches_tool("terminal")
|
||||
assert spec.matches_tool("write_file")
|
||||
|
||||
def test_single_name_matcher(self):
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher="terminal",
|
||||
)
|
||||
assert spec.matches_tool("terminal")
|
||||
assert not spec.matches_tool("web_search")
|
||||
|
||||
def test_alternation_matcher(self):
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher="terminal|file",
|
||||
)
|
||||
assert spec.matches_tool("terminal")
|
||||
assert spec.matches_tool("file")
|
||||
assert not spec.matches_tool("web")
|
||||
|
||||
def test_invalid_regex_falls_back_to_literal(self):
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher="foo[bar",
|
||||
)
|
||||
assert spec.matches_tool("foo[bar")
|
||||
assert not spec.matches_tool("foo")
|
||||
|
||||
def test_matcher_ignored_when_no_tool_name(self):
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher="terminal",
|
||||
)
|
||||
assert not spec.matches_tool(None)
|
||||
|
||||
def test_matcher_leading_whitespace_stripped(self):
|
||||
"""YAML quirks can introduce leading/trailing whitespace — must
|
||||
not silently break the matcher."""
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher=" terminal ",
|
||||
)
|
||||
assert spec.matcher == "terminal"
|
||||
assert spec.matches_tool("terminal")
|
||||
|
||||
def test_matcher_trailing_newline_stripped(self):
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher="terminal\n",
|
||||
)
|
||||
assert spec.matches_tool("terminal")
|
||||
|
||||
def test_whitespace_only_matcher_becomes_none(self):
|
||||
"""A matcher that's pure whitespace is treated as 'no matcher'."""
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command="echo", matcher=" ",
|
||||
)
|
||||
assert spec.matcher is None
|
||||
assert spec.matches_tool("anything")
|
||||
|
||||
|
||||
# ── End-to-end subprocess behaviour ───────────────────────────────────────
|
||||
|
||||
|
||||
class TestCallbackSubprocess:
|
||||
def test_timeout_returns_none(self, tmp_path):
|
||||
# Script that sleeps forever; we set a 1s timeout.
|
||||
script = _write_script(
|
||||
tmp_path, "slow.sh",
|
||||
"#!/usr/bin/env bash\nsleep 60\n",
|
||||
)
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="post_tool_call", command=str(script), timeout=1,
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
assert cb(tool_name="terminal") is None
|
||||
|
||||
def test_malformed_json_stdout_returns_none(self, tmp_path):
|
||||
script = _write_script(
|
||||
tmp_path, "bad_json.sh",
|
||||
"#!/usr/bin/env bash\necho 'not json at all'\n",
|
||||
)
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command=str(script),
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
# Matcher is None so the callback fires for any tool.
|
||||
assert cb(tool_name="terminal") is None
|
||||
|
||||
def test_non_zero_exit_with_block_stdout_still_blocks(self, tmp_path):
|
||||
"""A script that signals failure via exit code AND prints a block
|
||||
directive must still block — scripts should be free to mix exit
|
||||
codes with parseable output."""
|
||||
script = _write_script(
|
||||
tmp_path, "exit1_block.sh",
|
||||
"#!/usr/bin/env bash\n"
|
||||
'printf \'{"decision": "block", "reason": "via exit 1"}\\n\'\n'
|
||||
"exit 1\n",
|
||||
)
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command=str(script),
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
assert cb(tool_name="terminal") == {"action": "block", "message": "via exit 1"}
|
||||
|
||||
def test_block_translation_end_to_end(self, tmp_path):
|
||||
"""v1 schema-bug regression gate.
|
||||
|
||||
Shell hook returns the Claude-Code-style payload and the bridge
|
||||
must translate it to the canonical Hermes block shape so that
|
||||
get_pre_tool_call_block_message() surfaces the block.
|
||||
"""
|
||||
script = _write_script(
|
||||
tmp_path, "blocker.sh",
|
||||
"#!/usr/bin/env bash\n"
|
||||
'printf \'{"decision": "block", "reason": "no terminal"}\\n\'\n',
|
||||
)
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call",
|
||||
command=str(script),
|
||||
matcher="terminal",
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
result = cb(tool_name="terminal", args={"command": "rm -rf /"})
|
||||
assert result == {"action": "block", "message": "no terminal"}
|
||||
|
||||
def test_block_aggregation_through_plugin_manager(self, tmp_path, monkeypatch):
|
||||
"""Registering via register_from_config makes
|
||||
get_pre_tool_call_block_message surface the block — the real
|
||||
end-to-end control flow used by run_agent._invoke_tool."""
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_script(
|
||||
tmp_path, "block.sh",
|
||||
"#!/usr/bin/env bash\n"
|
||||
'printf \'{"decision": "block", "reason": "blocked-by-shell"}\\n\'\n',
|
||||
)
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
|
||||
|
||||
# Fresh manager
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
cfg = {
|
||||
"hooks": {
|
||||
"pre_tool_call": [
|
||||
{"matcher": "terminal", "command": str(script)},
|
||||
],
|
||||
},
|
||||
}
|
||||
registered = shell_hooks.register_from_config(cfg, accept_hooks=True)
|
||||
assert len(registered) == 1
|
||||
|
||||
msg = plugins.get_pre_tool_call_block_message(
|
||||
tool_name="terminal",
|
||||
args={"command": "rm"},
|
||||
)
|
||||
assert msg == "blocked-by-shell"
|
||||
|
||||
def test_matcher_regex_filters_callback(self, tmp_path, monkeypatch):
|
||||
"""A matcher set to 'terminal' must not fire for 'web_search'."""
|
||||
calls = tmp_path / "calls.log"
|
||||
script = _write_script(
|
||||
tmp_path, "log.sh",
|
||||
f"#!/usr/bin/env bash\n"
|
||||
f"echo \"$(cat -)\" >> {calls}\n"
|
||||
f"printf '{{}}\\n'\n",
|
||||
)
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call",
|
||||
command=str(script),
|
||||
matcher="terminal",
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
cb(tool_name="terminal", args={"command": "ls"})
|
||||
cb(tool_name="web_search", args={"q": "x"})
|
||||
cb(tool_name="file_read", args={"path": "x"})
|
||||
assert calls.exists()
|
||||
# Only the terminal call wrote to the log
|
||||
assert calls.read_text().count("pre_tool_call") == 1
|
||||
|
||||
def test_payload_schema_delivered(self, tmp_path):
|
||||
capture = tmp_path / "payload.json"
|
||||
script = _write_script(
|
||||
tmp_path, "capture.sh",
|
||||
f"#!/usr/bin/env bash\ncat - > {capture}\nprintf '{{}}\\n'\n",
|
||||
)
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_tool_call", command=str(script),
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
cb(
|
||||
tool_name="terminal",
|
||||
args={"command": "echo hi"},
|
||||
session_id="sess-77",
|
||||
task_id="task-77",
|
||||
)
|
||||
payload = json.loads(capture.read_text())
|
||||
assert payload["hook_event_name"] == "pre_tool_call"
|
||||
assert payload["tool_name"] == "terminal"
|
||||
assert payload["tool_input"] == {"command": "echo hi"}
|
||||
assert payload["session_id"] == "sess-77"
|
||||
assert "cwd" in payload
|
||||
assert payload["extra"]["task_id"] == "task-77"
|
||||
|
||||
def test_pre_llm_call_context_flows_through(self, tmp_path):
|
||||
script = _write_script(
|
||||
tmp_path, "ctx.sh",
|
||||
"#!/usr/bin/env bash\n"
|
||||
'printf \'{"context": "env-note"}\\n\'\n',
|
||||
)
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="pre_llm_call", command=str(script),
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
result = cb(
|
||||
session_id="s1", user_message="hello",
|
||||
conversation_history=[], is_first_turn=True,
|
||||
model="gpt-4", platform="cli",
|
||||
)
|
||||
assert result == {"context": "env-note"}
|
||||
|
||||
def test_shlex_handles_paths_with_spaces(self, tmp_path):
|
||||
dir_with_space = tmp_path / "path with space"
|
||||
dir_with_space.mkdir()
|
||||
script = _write_script(
|
||||
dir_with_space, "ok.sh",
|
||||
"#!/usr/bin/env bash\nprintf '{}\\n'\n",
|
||||
)
|
||||
# Quote the path so shlex keeps it as a single token.
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="post_tool_call",
|
||||
command=f'"{script}"',
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
# No crash = shlex parsed it correctly.
|
||||
assert cb(tool_name="terminal") is None # empty object parses to None
|
||||
|
||||
def test_missing_binary_logged_not_raised(self, tmp_path):
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="on_session_start",
|
||||
command=str(tmp_path / "does-not-exist"),
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
# Must not raise — agent loop should continue.
|
||||
assert cb(session_id="s") is None
|
||||
|
||||
def test_non_executable_binary_logged_not_raised(self, tmp_path):
|
||||
path = tmp_path / "no-exec"
|
||||
path.write_text("#!/usr/bin/env bash\necho hi\n")
|
||||
# Intentionally do NOT chmod +x.
|
||||
spec = shell_hooks.ShellHookSpec(
|
||||
event="on_session_start", command=str(path),
|
||||
)
|
||||
cb = shell_hooks._make_callback(spec)
|
||||
assert cb(session_id="s") is None
|
||||
|
||||
|
||||
# ── config parsing ────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestParseHooksBlock:
|
||||
def test_valid_entry(self):
|
||||
specs = shell_hooks._parse_hooks_block({
|
||||
"pre_tool_call": [
|
||||
{"matcher": "terminal", "command": "/tmp/hook.sh", "timeout": 30},
|
||||
],
|
||||
})
|
||||
assert len(specs) == 1
|
||||
assert specs[0].event == "pre_tool_call"
|
||||
assert specs[0].matcher == "terminal"
|
||||
assert specs[0].command == "/tmp/hook.sh"
|
||||
assert specs[0].timeout == 30
|
||||
|
||||
def test_unknown_event_skipped(self, caplog):
|
||||
specs = shell_hooks._parse_hooks_block({
|
||||
"pre_tools_call": [ # typo
|
||||
{"command": "/tmp/hook.sh"},
|
||||
],
|
||||
})
|
||||
assert specs == []
|
||||
|
||||
def test_missing_command_skipped(self):
|
||||
specs = shell_hooks._parse_hooks_block({
|
||||
"pre_tool_call": [{"matcher": "terminal"}],
|
||||
})
|
||||
assert specs == []
|
||||
|
||||
def test_timeout_clamped_to_max(self):
|
||||
specs = shell_hooks._parse_hooks_block({
|
||||
"post_tool_call": [
|
||||
{"command": "/tmp/slow.sh", "timeout": 9999},
|
||||
],
|
||||
})
|
||||
assert specs[0].timeout == shell_hooks.MAX_TIMEOUT_SECONDS
|
||||
|
||||
def test_non_int_timeout_defaulted(self):
|
||||
specs = shell_hooks._parse_hooks_block({
|
||||
"post_tool_call": [
|
||||
{"command": "/tmp/x.sh", "timeout": "thirty"},
|
||||
],
|
||||
})
|
||||
assert specs[0].timeout == shell_hooks.DEFAULT_TIMEOUT_SECONDS
|
||||
|
||||
def test_non_list_event_skipped(self):
|
||||
specs = shell_hooks._parse_hooks_block({
|
||||
"pre_tool_call": "not a list",
|
||||
})
|
||||
assert specs == []
|
||||
|
||||
def test_none_hooks_block(self):
|
||||
assert shell_hooks._parse_hooks_block(None) == []
|
||||
assert shell_hooks._parse_hooks_block("string") == []
|
||||
assert shell_hooks._parse_hooks_block([]) == []
|
||||
|
||||
def test_non_tool_event_matcher_warns_and_drops(self, caplog):
|
||||
"""matcher: is only honored for pre/post_tool_call; must warn
|
||||
and drop on other events so the spec reflects runtime."""
|
||||
import logging
|
||||
cfg = {"pre_llm_call": [{"matcher": "terminal", "command": "/bin/echo"}]}
|
||||
with caplog.at_level(logging.WARNING, logger=shell_hooks.logger.name):
|
||||
specs = shell_hooks._parse_hooks_block(cfg)
|
||||
assert len(specs) == 1 and specs[0].matcher is None
|
||||
assert any(
|
||||
"only honored for pre_tool_call" in r.getMessage()
|
||||
and "pre_llm_call" in r.getMessage()
|
||||
for r in caplog.records
|
||||
)
|
||||
|
||||
|
||||
# ── Idempotent registration ───────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestIdempotentRegistration:
|
||||
def test_double_call_registers_once(self, tmp_path, monkeypatch):
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_script(tmp_path, "h.sh",
|
||||
"#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
|
||||
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
|
||||
|
||||
first = shell_hooks.register_from_config(cfg, accept_hooks=True)
|
||||
second = shell_hooks.register_from_config(cfg, accept_hooks=True)
|
||||
assert len(first) == 1
|
||||
assert second == []
|
||||
# Only one callback on the manager
|
||||
mgr = plugins.get_plugin_manager()
|
||||
assert len(mgr._hooks.get("on_session_start", [])) == 1
|
||||
|
||||
def test_same_command_different_matcher_registers_both(
|
||||
self, tmp_path, monkeypatch,
|
||||
):
|
||||
"""Same script used for different matchers under one event must
|
||||
register both callbacks — dedupe keys on (event, matcher, command)."""
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_script(tmp_path, "h.sh",
|
||||
"#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
|
||||
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
cfg = {
|
||||
"hooks": {
|
||||
"pre_tool_call": [
|
||||
{"matcher": "terminal", "command": str(script)},
|
||||
{"matcher": "web_search", "command": str(script)},
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
registered = shell_hooks.register_from_config(cfg, accept_hooks=True)
|
||||
assert len(registered) == 2
|
||||
mgr = plugins.get_plugin_manager()
|
||||
assert len(mgr._hooks.get("pre_tool_call", [])) == 2
|
||||
|
||||
|
||||
# ── Allowlist concurrency ─────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestAllowlistConcurrency:
|
||||
"""Regression tests for the Codex#1 finding: simultaneous
|
||||
_record_approval() calls used to collide on a fixed tmp path and
|
||||
silently lose entries under read-modify-write races."""
|
||||
|
||||
def test_parallel_record_approval_does_not_lose_entries(
|
||||
self, tmp_path, monkeypatch,
|
||||
):
|
||||
import threading
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
|
||||
N = 32
|
||||
barrier = threading.Barrier(N)
|
||||
errors: list = []
|
||||
|
||||
def worker(i: int) -> None:
|
||||
try:
|
||||
barrier.wait(timeout=5)
|
||||
shell_hooks._record_approval(
|
||||
"on_session_start", f"/bin/hook-{i}.sh",
|
||||
)
|
||||
except Exception as exc: # pragma: no cover
|
||||
errors.append(exc)
|
||||
|
||||
threads = [threading.Thread(target=worker, args=(i,)) for i in range(N)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert not errors, f"worker errors: {errors}"
|
||||
|
||||
data = shell_hooks.load_allowlist()
|
||||
commands = {e["command"] for e in data["approvals"]}
|
||||
assert commands == {f"/bin/hook-{i}.sh" for i in range(N)}, (
|
||||
f"expected all {N} entries, got {len(commands)}"
|
||||
)
|
||||
|
||||
def test_non_posix_fallback_does_not_self_deadlock(
|
||||
self, tmp_path, monkeypatch,
|
||||
):
|
||||
"""Regression: on platforms without fcntl, the fallback lock must
|
||||
be separate from _registered_lock. register_from_config holds
|
||||
_registered_lock while calling _record_approval (via the consent
|
||||
prompt path), so a shared non-reentrant lock would self-deadlock."""
|
||||
import threading
|
||||
|
||||
monkeypatch.setattr(shell_hooks, "fcntl", None)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
|
||||
completed = threading.Event()
|
||||
errors: list = []
|
||||
|
||||
def target() -> None:
|
||||
try:
|
||||
with shell_hooks._registered_lock:
|
||||
shell_hooks._record_approval(
|
||||
"on_session_start", "/bin/x.sh",
|
||||
)
|
||||
completed.set()
|
||||
except Exception as exc: # pragma: no cover
|
||||
errors.append(exc)
|
||||
completed.set()
|
||||
|
||||
t = threading.Thread(target=target, daemon=True)
|
||||
t.start()
|
||||
if not completed.wait(timeout=3.0):
|
||||
pytest.fail(
|
||||
"non-POSIX fallback self-deadlocked — "
|
||||
"_locked_update_approvals must not reuse _registered_lock",
|
||||
)
|
||||
t.join(timeout=1.0)
|
||||
assert not errors, f"errors: {errors}"
|
||||
assert shell_hooks._is_allowlisted(
|
||||
"on_session_start", "/bin/x.sh",
|
||||
)
|
||||
|
||||
def test_save_allowlist_failure_logs_actionable_warning(
|
||||
self, tmp_path, monkeypatch, caplog,
|
||||
):
|
||||
"""Persistence failures must log the path, errno, and
|
||||
re-prompt consequence so "hermes keeps asking" is debuggable."""
|
||||
import logging
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
monkeypatch.setattr(
|
||||
shell_hooks.tempfile, "mkstemp",
|
||||
lambda *a, **kw: (_ for _ in ()).throw(OSError(28, "No space")),
|
||||
)
|
||||
with caplog.at_level(logging.WARNING, logger=shell_hooks.logger.name):
|
||||
shell_hooks.save_allowlist({"approvals": []})
|
||||
msg = next(
|
||||
(r.getMessage() for r in caplog.records
|
||||
if "Failed to persist" in r.getMessage()), "",
|
||||
)
|
||||
assert "shell-hooks-allowlist.json" in msg
|
||||
assert "No space" in msg
|
||||
assert "re-prompt" in msg
|
||||
|
||||
def test_script_is_executable_handles_interpreter_prefix(self, tmp_path):
|
||||
"""For ``python3 hook.py`` and similar the interpreter reads
|
||||
the script, so X_OK on the script itself is not required —
|
||||
only R_OK. Bare invocations still require X_OK."""
|
||||
script = tmp_path / "hook.py"
|
||||
script.write_text("print()\n") # readable, NOT executable
|
||||
|
||||
# Interpreter prefix: R_OK is enough.
|
||||
assert shell_hooks.script_is_executable(f"python3 {script}")
|
||||
assert shell_hooks.script_is_executable(f"/usr/bin/env python3 {script}")
|
||||
|
||||
# Bare invocation on the same non-X_OK file: not runnable.
|
||||
assert not shell_hooks.script_is_executable(str(script))
|
||||
|
||||
# Flip +x; bare invocation is now runnable too.
|
||||
script.chmod(0o755)
|
||||
assert shell_hooks.script_is_executable(str(script))
|
||||
|
||||
def test_command_script_path_resolution(self):
|
||||
"""Regression: ``_command_script_path`` used to return the first
|
||||
shlex token, which picked the interpreter (``python3``, ``bash``,
|
||||
``/usr/bin/env``) instead of the actual script for any
|
||||
interpreter-prefixed command. That broke
|
||||
``hermes hooks doctor``'s executability check and silently
|
||||
disabled mtime drift detection for such hooks."""
|
||||
cases = [
|
||||
# bare path
|
||||
("/path/hook.sh", "/path/hook.sh"),
|
||||
("/bin/echo hi", "/bin/echo"),
|
||||
("~/hook.sh", "~/hook.sh"),
|
||||
("hook.sh", "hook.sh"),
|
||||
# interpreter prefix
|
||||
("python3 /path/hook.py", "/path/hook.py"),
|
||||
("bash /path/hook.sh", "/path/hook.sh"),
|
||||
("bash ~/hook.sh", "~/hook.sh"),
|
||||
("python3 -u /path/hook.py", "/path/hook.py"),
|
||||
("nice -n 10 /path/hook.sh", "/path/hook.sh"),
|
||||
# /usr/bin/env shebang form — must find the *script*, not env
|
||||
("/usr/bin/env python3 /path/hook.py", "/path/hook.py"),
|
||||
("/usr/bin/env bash /path/hook.sh", "/path/hook.sh"),
|
||||
# no path-like tokens → fallback to first token
|
||||
("my-binary --verbose", "my-binary"),
|
||||
("python3 -c 'print(1)'", "python3"),
|
||||
# unparseable (unbalanced quotes) → return command as-is
|
||||
("python3 'unterminated", "python3 'unterminated"),
|
||||
# empty
|
||||
("", ""),
|
||||
]
|
||||
for command, expected in cases:
|
||||
got = shell_hooks._command_script_path(command)
|
||||
assert got == expected, f"{command!r} -> {got!r}, expected {expected!r}"
|
||||
|
||||
def test_save_allowlist_uses_unique_tmp_paths(self, tmp_path, monkeypatch):
|
||||
"""Two save_allowlist calls in flight must use distinct tmp files
|
||||
so the loser's os.replace does not ENOENT on the winner's sweep."""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
p = shell_hooks.allowlist_path()
|
||||
p.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
tmp_paths_seen: list = []
|
||||
real_mkstemp = shell_hooks.tempfile.mkstemp
|
||||
|
||||
def spying_mkstemp(*args, **kwargs):
|
||||
fd, path = real_mkstemp(*args, **kwargs)
|
||||
tmp_paths_seen.append(path)
|
||||
return fd, path
|
||||
|
||||
monkeypatch.setattr(shell_hooks.tempfile, "mkstemp", spying_mkstemp)
|
||||
|
||||
shell_hooks.save_allowlist({"approvals": [{"event": "a", "command": "x"}]})
|
||||
shell_hooks.save_allowlist({"approvals": [{"event": "b", "command": "y"}]})
|
||||
|
||||
assert len(tmp_paths_seen) == 2
|
||||
assert tmp_paths_seen[0] != tmp_paths_seen[1]
|
||||
242
tests/agent/test_shell_hooks_consent.py
Normal file
242
tests/agent/test_shell_hooks_consent.py
Normal file
@@ -0,0 +1,242 @@
|
||||
"""Consent-flow tests for the shell-hook allowlist.
|
||||
|
||||
Covers the prompt/non-prompt decision tree: TTY vs non-TTY, and the
|
||||
three accept-hooks channels (--accept-hooks, HERMES_ACCEPT_HOOKS env,
|
||||
hooks_auto_accept: config key).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent import shell_hooks
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_home(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_home"))
|
||||
monkeypatch.delenv("HERMES_ACCEPT_HOOKS", raising=False)
|
||||
shell_hooks.reset_for_tests()
|
||||
yield
|
||||
shell_hooks.reset_for_tests()
|
||||
|
||||
|
||||
def _write_hook_script(tmp_path: Path) -> Path:
|
||||
script = tmp_path / "hook.sh"
|
||||
script.write_text("#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
script.chmod(0o755)
|
||||
return script
|
||||
|
||||
|
||||
# ── TTY prompt flow ───────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestTTYPromptFlow:
|
||||
def test_first_use_prompts_and_approves(self, tmp_path):
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_hook_script(tmp_path)
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="y"):
|
||||
mock_stdin.isatty.return_value = True
|
||||
registered = shell_hooks.register_from_config(
|
||||
{"hooks": {"on_session_start": [{"command": str(script)}]}},
|
||||
accept_hooks=False,
|
||||
)
|
||||
assert len(registered) == 1
|
||||
|
||||
entry = shell_hooks.allowlist_entry_for("on_session_start", str(script))
|
||||
assert entry is not None
|
||||
assert entry["event"] == "on_session_start"
|
||||
assert entry["command"] == str(script)
|
||||
|
||||
def test_first_use_prompts_and_rejects(self, tmp_path):
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_hook_script(tmp_path)
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="n"):
|
||||
mock_stdin.isatty.return_value = True
|
||||
registered = shell_hooks.register_from_config(
|
||||
{"hooks": {"on_session_start": [{"command": str(script)}]}},
|
||||
accept_hooks=False,
|
||||
)
|
||||
assert registered == []
|
||||
assert shell_hooks.allowlist_entry_for(
|
||||
"on_session_start", str(script),
|
||||
) is None
|
||||
|
||||
def test_subsequent_use_does_not_prompt(self, tmp_path):
|
||||
"""After the first approval, re-registration must be silent."""
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_hook_script(tmp_path)
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
# First call: TTY, approved.
|
||||
with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="y"):
|
||||
mock_stdin.isatty.return_value = True
|
||||
shell_hooks.register_from_config(
|
||||
{"hooks": {"on_session_start": [{"command": str(script)}]}},
|
||||
accept_hooks=False,
|
||||
)
|
||||
|
||||
# Reset registration set but keep the allowlist on disk.
|
||||
shell_hooks.reset_for_tests()
|
||||
|
||||
# Second call: TTY, input() must NOT be called.
|
||||
with patch("sys.stdin") as mock_stdin, patch(
|
||||
"builtins.input", side_effect=AssertionError("should not prompt"),
|
||||
):
|
||||
mock_stdin.isatty.return_value = True
|
||||
registered = shell_hooks.register_from_config(
|
||||
{"hooks": {"on_session_start": [{"command": str(script)}]}},
|
||||
accept_hooks=False,
|
||||
)
|
||||
assert len(registered) == 1
|
||||
|
||||
|
||||
# ── non-TTY flow ──────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestNonTTYFlow:
|
||||
def test_no_tty_no_flag_skips_registration(self, tmp_path):
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_hook_script(tmp_path)
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
with patch("sys.stdin") as mock_stdin:
|
||||
mock_stdin.isatty.return_value = False
|
||||
registered = shell_hooks.register_from_config(
|
||||
{"hooks": {"on_session_start": [{"command": str(script)}]}},
|
||||
accept_hooks=False,
|
||||
)
|
||||
assert registered == []
|
||||
|
||||
def test_no_tty_with_argument_flag_accepts(self, tmp_path):
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_hook_script(tmp_path)
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
with patch("sys.stdin") as mock_stdin:
|
||||
mock_stdin.isatty.return_value = False
|
||||
registered = shell_hooks.register_from_config(
|
||||
{"hooks": {"on_session_start": [{"command": str(script)}]}},
|
||||
accept_hooks=True,
|
||||
)
|
||||
assert len(registered) == 1
|
||||
|
||||
def test_no_tty_with_env_accepts(self, tmp_path, monkeypatch):
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_hook_script(tmp_path)
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
|
||||
|
||||
with patch("sys.stdin") as mock_stdin:
|
||||
mock_stdin.isatty.return_value = False
|
||||
registered = shell_hooks.register_from_config(
|
||||
{"hooks": {"on_session_start": [{"command": str(script)}]}},
|
||||
accept_hooks=False,
|
||||
)
|
||||
assert len(registered) == 1
|
||||
|
||||
def test_no_tty_with_config_accepts(self, tmp_path):
|
||||
from hermes_cli import plugins
|
||||
|
||||
script = _write_hook_script(tmp_path)
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
|
||||
with patch("sys.stdin") as mock_stdin:
|
||||
mock_stdin.isatty.return_value = False
|
||||
registered = shell_hooks.register_from_config(
|
||||
{
|
||||
"hooks_auto_accept": True,
|
||||
"hooks": {"on_session_start": [{"command": str(script)}]},
|
||||
},
|
||||
accept_hooks=False,
|
||||
)
|
||||
assert len(registered) == 1
|
||||
|
||||
|
||||
# ── Allowlist + revoke + mtime ────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestAllowlistOps:
|
||||
def test_mtime_recorded_on_approval(self, tmp_path):
|
||||
script = _write_hook_script(tmp_path)
|
||||
shell_hooks._record_approval("on_session_start", str(script))
|
||||
|
||||
entry = shell_hooks.allowlist_entry_for(
|
||||
"on_session_start", str(script),
|
||||
)
|
||||
assert entry is not None
|
||||
assert entry["script_mtime_at_approval"] is not None
|
||||
# ISO-8601 Z-suffix
|
||||
assert entry["script_mtime_at_approval"].endswith("Z")
|
||||
|
||||
def test_revoke_removes_entry(self, tmp_path):
|
||||
script = _write_hook_script(tmp_path)
|
||||
shell_hooks._record_approval("on_session_start", str(script))
|
||||
assert shell_hooks.allowlist_entry_for(
|
||||
"on_session_start", str(script),
|
||||
) is not None
|
||||
|
||||
removed = shell_hooks.revoke(str(script))
|
||||
assert removed == 1
|
||||
assert shell_hooks.allowlist_entry_for(
|
||||
"on_session_start", str(script),
|
||||
) is None
|
||||
|
||||
def test_revoke_unknown_returns_zero(self, tmp_path):
|
||||
assert shell_hooks.revoke(str(tmp_path / "never-approved.sh")) == 0
|
||||
|
||||
def test_tilde_path_approval_records_resolvable_mtime(self, tmp_path, monkeypatch):
|
||||
"""If the command uses ~ the approval must still find the file."""
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
target = tmp_path / "hook.sh"
|
||||
target.write_text("#!/usr/bin/env bash\n")
|
||||
target.chmod(0o755)
|
||||
|
||||
shell_hooks._record_approval("on_session_start", "~/hook.sh")
|
||||
entry = shell_hooks.allowlist_entry_for(
|
||||
"on_session_start", "~/hook.sh",
|
||||
)
|
||||
assert entry is not None
|
||||
# Must not be None — the tilde was expanded before stat().
|
||||
assert entry["script_mtime_at_approval"] is not None
|
||||
|
||||
def test_duplicate_approval_replaces_mtime(self, tmp_path):
|
||||
"""Re-approving the same pair refreshes the approval timestamp."""
|
||||
script = _write_hook_script(tmp_path)
|
||||
shell_hooks._record_approval("on_session_start", str(script))
|
||||
original_entry = shell_hooks.allowlist_entry_for(
|
||||
"on_session_start", str(script),
|
||||
)
|
||||
assert original_entry is not None
|
||||
|
||||
# Touch the script to bump its mtime then re-approve.
|
||||
import os
|
||||
import time
|
||||
new_mtime = original_entry.get("script_mtime_at_approval")
|
||||
time.sleep(0.01)
|
||||
os.utime(script, None) # current time
|
||||
|
||||
shell_hooks._record_approval("on_session_start", str(script))
|
||||
|
||||
# Exactly one entry per (event, command).
|
||||
approvals = shell_hooks.load_allowlist().get("approvals", [])
|
||||
matching = [
|
||||
e for e in approvals
|
||||
if e.get("event") == "on_session_start"
|
||||
and e.get("command") == str(script)
|
||||
]
|
||||
assert len(matching) == 1
|
||||
224
tests/agent/test_subagent_stop_hook.py
Normal file
224
tests/agent/test_subagent_stop_hook.py
Normal file
@@ -0,0 +1,224 @@
|
||||
"""Tests for the subagent_stop hook event.
|
||||
|
||||
Covers wire-up from tools.delegate_tool.delegate_task:
|
||||
* fires once per child in both single-task and batch modes
|
||||
* runs on the parent thread (no re-entrancy for hook authors)
|
||||
* carries child_role when the agent exposes _delegate_role
|
||||
* carries child_role=None when _delegate_role is not set (pre-M3)
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import threading
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.delegate_tool import delegate_task
|
||||
from hermes_cli import plugins
|
||||
|
||||
|
||||
def _make_parent(depth: int = 0, session_id: str = "parent-1"):
|
||||
parent = MagicMock()
|
||||
parent.base_url = "https://openrouter.ai/api/v1"
|
||||
parent.api_key = "***"
|
||||
parent.provider = "openrouter"
|
||||
parent.api_mode = "chat_completions"
|
||||
parent.model = "anthropic/claude-sonnet-4"
|
||||
parent.platform = "cli"
|
||||
parent.providers_allowed = None
|
||||
parent.providers_ignored = None
|
||||
parent.providers_order = None
|
||||
parent.provider_sort = None
|
||||
parent._session_db = None
|
||||
parent._delegate_depth = depth
|
||||
parent._active_children = []
|
||||
parent._active_children_lock = threading.Lock()
|
||||
parent._print_fn = None
|
||||
parent.tool_progress_callback = None
|
||||
parent.thinking_callback = None
|
||||
parent._memory_manager = None
|
||||
parent.session_id = session_id
|
||||
return parent
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _fresh_plugin_manager():
|
||||
"""Each test gets a fresh PluginManager so hook callbacks don't
|
||||
leak between tests."""
|
||||
original = plugins._plugin_manager
|
||||
plugins._plugin_manager = plugins.PluginManager()
|
||||
yield
|
||||
plugins._plugin_manager = original
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _stub_child_builder(monkeypatch):
|
||||
"""Replace _build_child_agent with a MagicMock factory so delegate_task
|
||||
never transitively imports run_agent / openai. Keeps the test runnable
|
||||
in environments without heavyweight runtime deps installed."""
|
||||
def _fake_build_child(task_index, **kwargs):
|
||||
child = MagicMock()
|
||||
child._delegate_saved_tool_names = []
|
||||
child._credential_pool = None
|
||||
return child
|
||||
|
||||
monkeypatch.setattr(
|
||||
"tools.delegate_tool._build_child_agent", _fake_build_child,
|
||||
)
|
||||
|
||||
|
||||
def _register_capturing_hook():
|
||||
captured = []
|
||||
|
||||
def _cb(**kwargs):
|
||||
kwargs["_thread"] = threading.current_thread()
|
||||
captured.append(kwargs)
|
||||
|
||||
mgr = plugins.get_plugin_manager()
|
||||
mgr._hooks.setdefault("subagent_stop", []).append(_cb)
|
||||
return captured
|
||||
|
||||
|
||||
# ── single-task mode ──────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestSingleTask:
|
||||
def test_fires_once(self):
|
||||
captured = _register_capturing_hook()
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0,
|
||||
"status": "completed",
|
||||
"summary": "Done!",
|
||||
"api_calls": 3,
|
||||
"duration_seconds": 5.0,
|
||||
"_child_role": "analyst",
|
||||
}
|
||||
delegate_task(goal="do X", parent_agent=_make_parent())
|
||||
|
||||
assert len(captured) == 1
|
||||
payload = captured[0]
|
||||
assert payload["child_role"] == "analyst"
|
||||
assert payload["child_status"] == "completed"
|
||||
assert payload["child_summary"] == "Done!"
|
||||
assert payload["duration_ms"] == 5000
|
||||
|
||||
def test_fires_on_parent_thread(self):
|
||||
captured = _register_capturing_hook()
|
||||
main_thread = threading.current_thread()
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0, "status": "completed",
|
||||
"summary": "x", "api_calls": 1, "duration_seconds": 0.1,
|
||||
"_child_role": None,
|
||||
}
|
||||
delegate_task(goal="go", parent_agent=_make_parent())
|
||||
|
||||
assert captured[0]["_thread"] is main_thread
|
||||
|
||||
def test_payload_includes_parent_session_id(self):
|
||||
captured = _register_capturing_hook()
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0, "status": "completed",
|
||||
"summary": "x", "api_calls": 1, "duration_seconds": 0.1,
|
||||
"_child_role": None,
|
||||
}
|
||||
delegate_task(
|
||||
goal="go",
|
||||
parent_agent=_make_parent(session_id="sess-xyz"),
|
||||
)
|
||||
|
||||
assert captured[0]["parent_session_id"] == "sess-xyz"
|
||||
|
||||
|
||||
# ── batch mode ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestBatchMode:
|
||||
def test_fires_per_child(self):
|
||||
captured = _register_capturing_hook()
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.side_effect = [
|
||||
{"task_index": 0, "status": "completed",
|
||||
"summary": "A", "api_calls": 1, "duration_seconds": 1.0,
|
||||
"_child_role": "role-a"},
|
||||
{"task_index": 1, "status": "completed",
|
||||
"summary": "B", "api_calls": 2, "duration_seconds": 2.0,
|
||||
"_child_role": "role-b"},
|
||||
{"task_index": 2, "status": "completed",
|
||||
"summary": "C", "api_calls": 3, "duration_seconds": 3.0,
|
||||
"_child_role": "role-c"},
|
||||
]
|
||||
delegate_task(
|
||||
tasks=[
|
||||
{"goal": "A"}, {"goal": "B"}, {"goal": "C"},
|
||||
],
|
||||
parent_agent=_make_parent(),
|
||||
)
|
||||
|
||||
assert len(captured) == 3
|
||||
roles = sorted(c["child_role"] for c in captured)
|
||||
assert roles == ["role-a", "role-b", "role-c"]
|
||||
|
||||
def test_all_fires_on_parent_thread(self):
|
||||
captured = _register_capturing_hook()
|
||||
main_thread = threading.current_thread()
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.side_effect = [
|
||||
{"task_index": 0, "status": "completed",
|
||||
"summary": "A", "api_calls": 1, "duration_seconds": 1.0,
|
||||
"_child_role": None},
|
||||
{"task_index": 1, "status": "completed",
|
||||
"summary": "B", "api_calls": 2, "duration_seconds": 2.0,
|
||||
"_child_role": None},
|
||||
]
|
||||
delegate_task(
|
||||
tasks=[{"goal": "A"}, {"goal": "B"}],
|
||||
parent_agent=_make_parent(),
|
||||
)
|
||||
|
||||
for payload in captured:
|
||||
assert payload["_thread"] is main_thread
|
||||
|
||||
|
||||
# ── payload shape ─────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestPayloadShape:
|
||||
def test_role_absent_becomes_none(self):
|
||||
captured = _register_capturing_hook()
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0, "status": "completed",
|
||||
"summary": "x", "api_calls": 1, "duration_seconds": 0.1,
|
||||
# Deliberately omit _child_role — pre-M3 shape.
|
||||
}
|
||||
delegate_task(goal="do X", parent_agent=_make_parent())
|
||||
|
||||
assert captured[0]["child_role"] is None
|
||||
|
||||
def test_result_does_not_leak_child_role_field(self):
|
||||
"""The internal _child_role key must be stripped before the
|
||||
result dict is serialised to JSON."""
|
||||
_register_capturing_hook()
|
||||
|
||||
with patch("tools.delegate_tool._run_single_child") as mock_run:
|
||||
mock_run.return_value = {
|
||||
"task_index": 0, "status": "completed",
|
||||
"summary": "x", "api_calls": 1, "duration_seconds": 0.1,
|
||||
"_child_role": "leaf",
|
||||
}
|
||||
raw = delegate_task(goal="do X", parent_agent=_make_parent())
|
||||
|
||||
parsed = json.loads(raw)
|
||||
assert "results" in parsed
|
||||
assert "_child_role" not in parsed["results"][0]
|
||||
@@ -91,3 +91,42 @@ class TestYoloEnvVar:
|
||||
args = parser.parse_args(["chat"])
|
||||
self._simulate_cmd_chat_yolo_check(args)
|
||||
assert os.environ.get("HERMES_YOLO_MODE") is None
|
||||
|
||||
|
||||
class TestAcceptHooksOnAgentSubparsers:
|
||||
"""Verify --accept-hooks is accepted at every agent-subcommand
|
||||
position (before the subcommand, between group/subcommand, and
|
||||
after the leaf subcommand) for gateway/cron/mcp/acp. Regression
|
||||
against prior behaviour where the flag only worked on the root
|
||||
parser and `chat`, so `hermes gateway run --accept-hooks` failed
|
||||
with `unrecognized arguments`."""
|
||||
|
||||
@pytest.mark.parametrize("argv", [
|
||||
["--accept-hooks", "gateway", "run", "--help"],
|
||||
["gateway", "--accept-hooks", "run", "--help"],
|
||||
["gateway", "run", "--accept-hooks", "--help"],
|
||||
["--accept-hooks", "cron", "tick", "--help"],
|
||||
["cron", "--accept-hooks", "tick", "--help"],
|
||||
["cron", "tick", "--accept-hooks", "--help"],
|
||||
["cron", "run", "--accept-hooks", "dummy-id", "--help"],
|
||||
["--accept-hooks", "mcp", "serve", "--help"],
|
||||
["mcp", "--accept-hooks", "serve", "--help"],
|
||||
["mcp", "serve", "--accept-hooks", "--help"],
|
||||
["acp", "--accept-hooks", "--help"],
|
||||
])
|
||||
def test_accepted_at_every_position(self, argv):
|
||||
"""Invoking `hermes <argv>` must exit 0 (help) rather than
|
||||
failing with `unrecognized arguments`."""
|
||||
import subprocess
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "hermes_cli.main", *argv],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=15,
|
||||
)
|
||||
assert result.returncode == 0, (
|
||||
f"argv={argv!r} returned {result.returncode}\n"
|
||||
f"stdout: {result.stdout[:300]}\n"
|
||||
f"stderr: {result.stderr[:300]}"
|
||||
)
|
||||
assert "unrecognized arguments" not in result.stderr
|
||||
|
||||
268
tests/hermes_cli/test_hooks_cli.py
Normal file
268
tests/hermes_cli/test_hooks_cli.py
Normal file
@@ -0,0 +1,268 @@
|
||||
"""Tests for the ``hermes hooks`` CLI subcommand."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import sys
|
||||
from contextlib import redirect_stdout
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from agent import shell_hooks
|
||||
from hermes_cli import hooks as hooks_cli
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolated_home(tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
|
||||
monkeypatch.delenv("HERMES_ACCEPT_HOOKS", raising=False)
|
||||
shell_hooks.reset_for_tests()
|
||||
yield
|
||||
shell_hooks.reset_for_tests()
|
||||
|
||||
|
||||
def _hook_script(tmp_path: Path, body: str, name: str = "hook.sh") -> Path:
|
||||
p = tmp_path / name
|
||||
p.write_text(body)
|
||||
p.chmod(0o755)
|
||||
return p
|
||||
|
||||
|
||||
def _run(sub_args: SimpleNamespace) -> str:
|
||||
"""Capture stdout for a hooks_command invocation."""
|
||||
buf = io.StringIO()
|
||||
with redirect_stdout(buf):
|
||||
hooks_cli.hooks_command(sub_args)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
# ── list ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestHooksList:
|
||||
def test_empty_config(self, tmp_path):
|
||||
with patch("hermes_cli.config.load_config", return_value={}):
|
||||
out = _run(SimpleNamespace(hooks_action="list"))
|
||||
assert "No shell hooks configured" in out
|
||||
|
||||
def test_shows_configured_and_consent_status(self, tmp_path):
|
||||
script = _hook_script(
|
||||
tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n",
|
||||
)
|
||||
cfg = {
|
||||
"hooks": {
|
||||
"pre_tool_call": [
|
||||
{"matcher": "terminal", "command": str(script), "timeout": 30},
|
||||
],
|
||||
"on_session_start": [
|
||||
{"command": str(script)},
|
||||
],
|
||||
}
|
||||
}
|
||||
|
||||
# Approve one of the two so we can see both states in the output
|
||||
shell_hooks._record_approval("pre_tool_call", str(script))
|
||||
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(hooks_action="list"))
|
||||
|
||||
assert "[pre_tool_call]" in out
|
||||
assert "[on_session_start]" in out
|
||||
assert "✓ allowed" in out
|
||||
assert "✗ not allowlisted" in out
|
||||
assert str(script) in out
|
||||
|
||||
|
||||
# ── test ──────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestHooksTest:
|
||||
def test_synthetic_payload_matches_production_shape(self, tmp_path):
|
||||
"""`hermes hooks test` must feed the script stdin in the same
|
||||
shape invoke_hook() would at runtime. Prior to this fix,
|
||||
run_once bypassed _serialize_payload and the two paths diverged —
|
||||
scripts tested with `hermes hooks test` saw different top-level
|
||||
keys than at runtime, silently breaking in production."""
|
||||
capture = tmp_path / "captured.json"
|
||||
script = _hook_script(
|
||||
tmp_path,
|
||||
f"#!/usr/bin/env bash\ncat - > {capture}\nprintf '{{}}\\n'\n",
|
||||
)
|
||||
cfg = {"hooks": {"subagent_stop": [{"command": str(script)}]}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
_run(SimpleNamespace(
|
||||
hooks_action="test", event="subagent_stop",
|
||||
for_tool=None, payload_file=None,
|
||||
))
|
||||
|
||||
seen = json.loads(capture.read_text())
|
||||
# Same top-level keys _serialize_payload produces at runtime
|
||||
assert set(seen.keys()) == {
|
||||
"hook_event_name", "tool_name", "tool_input",
|
||||
"session_id", "cwd", "extra",
|
||||
}
|
||||
# parent_session_id was routed to top-level session_id (matches runtime)
|
||||
assert seen["session_id"] == "parent-sess"
|
||||
assert "parent_session_id" not in seen["extra"]
|
||||
# subagent_stop has no tool, so tool_name / tool_input are null
|
||||
assert seen["tool_name"] is None
|
||||
assert seen["tool_input"] is None
|
||||
|
||||
def test_fires_real_subprocess_and_parses_block(self, tmp_path):
|
||||
block_script = _hook_script(
|
||||
tmp_path,
|
||||
"#!/usr/bin/env bash\n"
|
||||
'printf \'{"decision": "block", "reason": "nope"}\\n\'\n',
|
||||
name="block.sh",
|
||||
)
|
||||
cfg = {
|
||||
"hooks": {
|
||||
"pre_tool_call": [
|
||||
{"matcher": "terminal", "command": str(block_script)},
|
||||
],
|
||||
},
|
||||
}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(
|
||||
hooks_action="test", event="pre_tool_call",
|
||||
for_tool="terminal", payload_file=None,
|
||||
))
|
||||
|
||||
# Parsed block appears in output
|
||||
assert '"action": "block"' in out
|
||||
assert '"message": "nope"' in out
|
||||
|
||||
def test_for_tool_matcher_filters(self, tmp_path):
|
||||
script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
cfg = {
|
||||
"hooks": {
|
||||
"pre_tool_call": [
|
||||
{"matcher": "terminal", "command": str(script)},
|
||||
],
|
||||
}
|
||||
}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(
|
||||
hooks_action="test", event="pre_tool_call",
|
||||
for_tool="web_search", payload_file=None,
|
||||
))
|
||||
assert "No shell hooks" in out
|
||||
|
||||
def test_unknown_event(self):
|
||||
with patch("hermes_cli.config.load_config", return_value={}):
|
||||
out = _run(SimpleNamespace(
|
||||
hooks_action="test", event="bogus_event",
|
||||
for_tool=None, payload_file=None,
|
||||
))
|
||||
assert "Unknown event" in out
|
||||
|
||||
|
||||
# ── revoke ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestHooksRevoke:
|
||||
def test_revoke_removes_entry(self, tmp_path):
|
||||
script = _hook_script(tmp_path, "#!/usr/bin/env bash\n")
|
||||
shell_hooks._record_approval("on_session_start", str(script))
|
||||
|
||||
out = _run(SimpleNamespace(hooks_action="revoke", command=str(script)))
|
||||
assert "Removed 1" in out
|
||||
assert shell_hooks.allowlist_entry_for(
|
||||
"on_session_start", str(script),
|
||||
) is None
|
||||
|
||||
def test_revoke_unknown(self, tmp_path):
|
||||
out = _run(SimpleNamespace(
|
||||
hooks_action="revoke", command=str(tmp_path / "never.sh"),
|
||||
))
|
||||
assert "No allowlist entry" in out
|
||||
|
||||
|
||||
# ── doctor ────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class TestHooksDoctor:
|
||||
def test_flags_missing_exec_bit(self, tmp_path):
|
||||
script = tmp_path / "hook.sh"
|
||||
script.write_text("#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
# No chmod — intentionally not executable
|
||||
cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(hooks_action="doctor"))
|
||||
assert "not executable" in out.lower()
|
||||
|
||||
def test_flags_unallowlisted(self, tmp_path):
|
||||
script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(hooks_action="doctor"))
|
||||
assert "not allowlisted" in out.lower()
|
||||
|
||||
def test_flags_invalid_json(self, tmp_path):
|
||||
script = _hook_script(
|
||||
tmp_path,
|
||||
"#!/usr/bin/env bash\necho 'not json!'\n",
|
||||
)
|
||||
shell_hooks._record_approval("on_session_start", str(script))
|
||||
cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(hooks_action="doctor"))
|
||||
assert "not valid JSON" in out
|
||||
|
||||
def test_flags_mtime_drift(self, tmp_path, monkeypatch):
|
||||
"""Allowlist with older mtime than current -> drift warning."""
|
||||
script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
|
||||
# Manually stash an allowlist entry with an old mtime
|
||||
from agent.shell_hooks import allowlist_path
|
||||
allowlist_path().parent.mkdir(parents=True, exist_ok=True)
|
||||
allowlist_path().write_text(json.dumps({
|
||||
"approvals": [
|
||||
{
|
||||
"event": "on_session_start",
|
||||
"command": str(script),
|
||||
"approved_at": "2000-01-01T00:00:00Z",
|
||||
"script_mtime_at_approval": "2000-01-01T00:00:00Z",
|
||||
}
|
||||
]
|
||||
}))
|
||||
|
||||
cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(hooks_action="doctor"))
|
||||
assert "modified since approval" in out
|
||||
|
||||
def test_clean_script_runs(self, tmp_path):
|
||||
script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
|
||||
shell_hooks._record_approval("on_session_start", str(script))
|
||||
cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(hooks_action="doctor"))
|
||||
assert "All shell hooks look healthy" in out
|
||||
|
||||
def test_unallowlisted_script_is_not_executed(self, tmp_path):
|
||||
"""Regression for M4: `hermes hooks doctor` used to run every
|
||||
listed script against a synthetic payload as part of its JSON
|
||||
smoke test, which contradicted the documented workflow of
|
||||
"spot newly-added hooks *before they register*". An un-allowlisted
|
||||
script must not be executed during `doctor`."""
|
||||
sentinel = tmp_path / "executed"
|
||||
# Script would touch the sentinel if executed; we assert it wasn't.
|
||||
script = _hook_script(
|
||||
tmp_path,
|
||||
f"#!/usr/bin/env bash\ntouch {sentinel}\nprintf '{{}}\\n'\n",
|
||||
)
|
||||
cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
|
||||
with patch("hermes_cli.config.load_config", return_value=cfg):
|
||||
out = _run(SimpleNamespace(hooks_action="doctor"))
|
||||
|
||||
assert not sentinel.exists(), (
|
||||
"doctor executed an un-allowlisted script — "
|
||||
"M4 gate regressed"
|
||||
)
|
||||
assert "not allowlisted" in out.lower()
|
||||
assert "skipped JSON smoke test" in out
|
||||
Reference in New Issue
Block a user