feat: shell hooks — wire shell scripts as Hermes hook callbacks

Users can declare shell scripts in config.yaml under a hooks: block that fire on plugin-hook events (pre_tool_call, post_tool_call, pre_llm_call, subagent_stop, etc). Scripts receive JSON on stdin, can return JSON on stdout to block tool calls or inject context pre-LLM. Key design: - Registers closures on existing PluginManager._hooks dict — zero changes to invoke_hook() call sites - subprocess.run(shell=False) via shlex.split — no shell injection - First-use consent per (event, command) pair, persisted to allowlist JSON - Bypass via --accept-hooks, HERMES_ACCEPT_HOOKS=1, or hooks_auto_accept - hermes hooks list/test/revoke/doctor CLI subcommands - Adds subagent_stop hook event fired after delegate_task children exit - Claude Code compatible response shapes accepted Cherry-picked from PR #13143 by @pefontana.
2026-04-28 06:51:16 +08:00 · 2026-04-20 20:53:20 -07:00
parent 34c5c2538e
commit 3988c3c245
14 changed files with 3241 additions and 9 deletions
--- a/tests/agent/test_shell_hooks.py
+++ b/tests/agent/test_shell_hooks.py
@@ -0,0 +1,716 @@
+"""Tests for the shell-hooks subprocess bridge (agent.shell_hooks).
+
+These tests focus on the pure translation layer — JSON serialisation,
+JSON parsing, matcher behaviour, block-schema correctness, and the
+subprocess runner's graceful error handling.  Consent prompts are
+covered in ``test_shell_hooks_consent.py``.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import stat
+from pathlib import Path
+from typing import Any, Dict
+
+import pytest
+
+from agent import shell_hooks
+
+
+# ── helpers ───────────────────────────────────────────────────────────────
+
+
+def _write_script(tmp_path: Path, name: str, body: str) -> Path:
+    path = tmp_path / name
+    path.write_text(body)
+    path.chmod(0o755)
+    return path
+
+
+def _allowlist_pair(monkeypatch, tmp_path, event: str, command: str) -> None:
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_home"))
+    shell_hooks._record_approval(event, command)
+
+
+@pytest.fixture(autouse=True)
+def _reset_registration_state():
+    shell_hooks.reset_for_tests()
+    yield
+    shell_hooks.reset_for_tests()
+
+
+# ── _parse_response ───────────────────────────────────────────────────────
+
+
+class TestParseResponse:
+    def test_block_claude_code_style(self):
+        r = shell_hooks._parse_response(
+            "pre_tool_call",
+            '{"decision": "block", "reason": "nope"}',
+        )
+        assert r == {"action": "block", "message": "nope"}
+
+    def test_block_canonical_style(self):
+        r = shell_hooks._parse_response(
+            "pre_tool_call",
+            '{"action": "block", "message": "nope"}',
+        )
+        assert r == {"action": "block", "message": "nope"}
+
+    def test_block_canonical_wins_over_claude_style(self):
+        r = shell_hooks._parse_response(
+            "pre_tool_call",
+            '{"action": "block", "message": "canonical", '
+            '"decision": "block", "reason": "claude"}',
+        )
+        assert r == {"action": "block", "message": "canonical"}
+
+    def test_empty_stdout_returns_none(self):
+        assert shell_hooks._parse_response("pre_tool_call", "") is None
+        assert shell_hooks._parse_response("pre_tool_call", "   ") is None
+
+    def test_invalid_json_returns_none(self):
+        assert shell_hooks._parse_response("pre_tool_call", "not json") is None
+
+    def test_non_dict_json_returns_none(self):
+        assert shell_hooks._parse_response("pre_tool_call", "[1, 2]") is None
+
+    def test_non_block_pre_tool_call_returns_none(self):
+        r = shell_hooks._parse_response("pre_tool_call", '{"decision": "allow"}')
+        assert r is None
+
+    def test_pre_llm_call_context_passthrough(self):
+        r = shell_hooks._parse_response(
+            "pre_llm_call", '{"context": "today is Friday"}',
+        )
+        assert r == {"context": "today is Friday"}
+
+    def test_subagent_stop_context_passthrough(self):
+        r = shell_hooks._parse_response(
+            "subagent_stop", '{"context": "child role=leaf"}',
+        )
+        assert r == {"context": "child role=leaf"}
+
+    def test_pre_llm_call_block_ignored(self):
+        """Only pre_tool_call honors block directives."""
+        r = shell_hooks._parse_response(
+            "pre_llm_call", '{"decision": "block", "reason": "no"}',
+        )
+        assert r is None
+
+
+# ── _serialize_payload ────────────────────────────────────────────────────
+
+
+class TestSerializePayload:
+    def test_basic_pre_tool_call_schema(self):
+        raw = shell_hooks._serialize_payload(
+            "pre_tool_call",
+            {
+                "tool_name": "terminal",
+                "args": {"command": "ls"},
+                "session_id": "sess-1",
+                "task_id": "t-1",
+                "tool_call_id": "c-1",
+            },
+        )
+        payload = json.loads(raw)
+        assert payload["hook_event_name"] == "pre_tool_call"
+        assert payload["tool_name"] == "terminal"
+        assert payload["tool_input"] == {"command": "ls"}
+        assert payload["session_id"] == "sess-1"
+        assert "cwd" in payload
+        # task_id / tool_call_id end up under extra
+        assert payload["extra"]["task_id"] == "t-1"
+        assert payload["extra"]["tool_call_id"] == "c-1"
+
+    def test_args_not_dict_becomes_null(self):
+        raw = shell_hooks._serialize_payload(
+            "pre_tool_call", {"args": ["not", "a", "dict"]},
+        )
+        payload = json.loads(raw)
+        assert payload["tool_input"] is None
+
+    def test_parent_session_id_used_when_no_session_id(self):
+        raw = shell_hooks._serialize_payload(
+            "subagent_stop", {"parent_session_id": "p-1"},
+        )
+        payload = json.loads(raw)
+        assert payload["session_id"] == "p-1"
+
+    def test_unserialisable_extras_stringified(self):
+        class Weird:
+            def __repr__(self) -> str:
+                return "<weird>"
+
+        raw = shell_hooks._serialize_payload(
+            "on_session_start", {"obj": Weird()},
+        )
+        payload = json.loads(raw)
+        assert payload["extra"]["obj"] == "<weird>"
+
+
+# ── Matcher behaviour ─────────────────────────────────────────────────────
+
+
+class TestMatcher:
+    def test_no_matcher_fires_for_any_tool(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher=None,
+        )
+        assert spec.matches_tool("terminal")
+        assert spec.matches_tool("write_file")
+
+    def test_single_name_matcher(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal",
+        )
+        assert spec.matches_tool("terminal")
+        assert not spec.matches_tool("web_search")
+
+    def test_alternation_matcher(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal|file",
+        )
+        assert spec.matches_tool("terminal")
+        assert spec.matches_tool("file")
+        assert not spec.matches_tool("web")
+
+    def test_invalid_regex_falls_back_to_literal(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="foo[bar",
+        )
+        assert spec.matches_tool("foo[bar")
+        assert not spec.matches_tool("foo")
+
+    def test_matcher_ignored_when_no_tool_name(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal",
+        )
+        assert not spec.matches_tool(None)
+
+    def test_matcher_leading_whitespace_stripped(self):
+        """YAML quirks can introduce leading/trailing whitespace — must
+        not silently break the matcher."""
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher=" terminal ",
+        )
+        assert spec.matcher == "terminal"
+        assert spec.matches_tool("terminal")
+
+    def test_matcher_trailing_newline_stripped(self):
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="terminal\n",
+        )
+        assert spec.matches_tool("terminal")
+
+    def test_whitespace_only_matcher_becomes_none(self):
+        """A matcher that's pure whitespace is treated as 'no matcher'."""
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command="echo", matcher="   ",
+        )
+        assert spec.matcher is None
+        assert spec.matches_tool("anything")
+
+
+# ── End-to-end subprocess behaviour ───────────────────────────────────────
+
+
+class TestCallbackSubprocess:
+    def test_timeout_returns_none(self, tmp_path):
+        # Script that sleeps forever; we set a 1s timeout.
+        script = _write_script(
+            tmp_path, "slow.sh",
+            "#!/usr/bin/env bash\nsleep 60\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="post_tool_call", command=str(script), timeout=1,
+        )
+        cb = shell_hooks._make_callback(spec)
+        assert cb(tool_name="terminal") is None
+
+    def test_malformed_json_stdout_returns_none(self, tmp_path):
+        script = _write_script(
+            tmp_path, "bad_json.sh",
+            "#!/usr/bin/env bash\necho 'not json at all'\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        # Matcher is None so the callback fires for any tool.
+        assert cb(tool_name="terminal") is None
+
+    def test_non_zero_exit_with_block_stdout_still_blocks(self, tmp_path):
+        """A script that signals failure via exit code AND prints a block
+        directive must still block — scripts should be free to mix exit
+        codes with parseable output."""
+        script = _write_script(
+            tmp_path, "exit1_block.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "via exit 1"}\\n\'\n'
+            "exit 1\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        assert cb(tool_name="terminal") == {"action": "block", "message": "via exit 1"}
+
+    def test_block_translation_end_to_end(self, tmp_path):
+        """v1 schema-bug regression gate.
+
+        Shell hook returns the Claude-Code-style payload and the bridge
+        must translate it to the canonical Hermes block shape so that
+        get_pre_tool_call_block_message() surfaces the block.
+        """
+        script = _write_script(
+            tmp_path, "blocker.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "no terminal"}\\n\'\n',
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call",
+            command=str(script),
+            matcher="terminal",
+        )
+        cb = shell_hooks._make_callback(spec)
+        result = cb(tool_name="terminal", args={"command": "rm -rf /"})
+        assert result == {"action": "block", "message": "no terminal"}
+
+    def test_block_aggregation_through_plugin_manager(self, tmp_path, monkeypatch):
+        """Registering via register_from_config makes
+        get_pre_tool_call_block_message surface the block — the real
+        end-to-end control flow used by run_agent._invoke_tool."""
+        from hermes_cli import plugins
+
+        script = _write_script(
+            tmp_path, "block.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "blocked-by-shell"}\\n\'\n',
+        )
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        # Fresh manager
+        plugins._plugin_manager = plugins.PluginManager()
+
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script)},
+                ],
+            },
+        }
+        registered = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        assert len(registered) == 1
+
+        msg = plugins.get_pre_tool_call_block_message(
+            tool_name="terminal",
+            args={"command": "rm"},
+        )
+        assert msg == "blocked-by-shell"
+
+    def test_matcher_regex_filters_callback(self, tmp_path, monkeypatch):
+        """A matcher set to 'terminal' must not fire for 'web_search'."""
+        calls = tmp_path / "calls.log"
+        script = _write_script(
+            tmp_path, "log.sh",
+            f"#!/usr/bin/env bash\n"
+            f"echo \"$(cat -)\" >> {calls}\n"
+            f"printf '{{}}\\n'\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call",
+            command=str(script),
+            matcher="terminal",
+        )
+        cb = shell_hooks._make_callback(spec)
+        cb(tool_name="terminal", args={"command": "ls"})
+        cb(tool_name="web_search", args={"q": "x"})
+        cb(tool_name="file_read", args={"path": "x"})
+        assert calls.exists()
+        # Only the terminal call wrote to the log
+        assert calls.read_text().count("pre_tool_call") == 1
+
+    def test_payload_schema_delivered(self, tmp_path):
+        capture = tmp_path / "payload.json"
+        script = _write_script(
+            tmp_path, "capture.sh",
+            f"#!/usr/bin/env bash\ncat - > {capture}\nprintf '{{}}\\n'\n",
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_tool_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        cb(
+            tool_name="terminal",
+            args={"command": "echo hi"},
+            session_id="sess-77",
+            task_id="task-77",
+        )
+        payload = json.loads(capture.read_text())
+        assert payload["hook_event_name"] == "pre_tool_call"
+        assert payload["tool_name"] == "terminal"
+        assert payload["tool_input"] == {"command": "echo hi"}
+        assert payload["session_id"] == "sess-77"
+        assert "cwd" in payload
+        assert payload["extra"]["task_id"] == "task-77"
+
+    def test_pre_llm_call_context_flows_through(self, tmp_path):
+        script = _write_script(
+            tmp_path, "ctx.sh",
+            "#!/usr/bin/env bash\n"
+            'printf \'{"context": "env-note"}\\n\'\n',
+        )
+        spec = shell_hooks.ShellHookSpec(
+            event="pre_llm_call", command=str(script),
+        )
+        cb = shell_hooks._make_callback(spec)
+        result = cb(
+            session_id="s1", user_message="hello",
+            conversation_history=[], is_first_turn=True,
+            model="gpt-4", platform="cli",
+        )
+        assert result == {"context": "env-note"}
+
+    def test_shlex_handles_paths_with_spaces(self, tmp_path):
+        dir_with_space = tmp_path / "path with space"
+        dir_with_space.mkdir()
+        script = _write_script(
+            dir_with_space, "ok.sh",
+            "#!/usr/bin/env bash\nprintf '{}\\n'\n",
+        )
+        # Quote the path so shlex keeps it as a single token.
+        spec = shell_hooks.ShellHookSpec(
+            event="post_tool_call",
+            command=f'"{script}"',
+        )
+        cb = shell_hooks._make_callback(spec)
+        # No crash = shlex parsed it correctly.
+        assert cb(tool_name="terminal") is None  # empty object parses to None
+
+    def test_missing_binary_logged_not_raised(self, tmp_path):
+        spec = shell_hooks.ShellHookSpec(
+            event="on_session_start",
+            command=str(tmp_path / "does-not-exist"),
+        )
+        cb = shell_hooks._make_callback(spec)
+        # Must not raise — agent loop should continue.
+        assert cb(session_id="s") is None
+
+    def test_non_executable_binary_logged_not_raised(self, tmp_path):
+        path = tmp_path / "no-exec"
+        path.write_text("#!/usr/bin/env bash\necho hi\n")
+        # Intentionally do NOT chmod +x.
+        spec = shell_hooks.ShellHookSpec(
+            event="on_session_start", command=str(path),
+        )
+        cb = shell_hooks._make_callback(spec)
+        assert cb(session_id="s") is None
+
+
+# ── config parsing ────────────────────────────────────────────────────────
+
+
+class TestParseHooksBlock:
+    def test_valid_entry(self):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tool_call": [
+                {"matcher": "terminal", "command": "/tmp/hook.sh", "timeout": 30},
+            ],
+        })
+        assert len(specs) == 1
+        assert specs[0].event == "pre_tool_call"
+        assert specs[0].matcher == "terminal"
+        assert specs[0].command == "/tmp/hook.sh"
+        assert specs[0].timeout == 30
+
+    def test_unknown_event_skipped(self, caplog):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tools_call": [  # typo
+                {"command": "/tmp/hook.sh"},
+            ],
+        })
+        assert specs == []
+
+    def test_missing_command_skipped(self):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tool_call": [{"matcher": "terminal"}],
+        })
+        assert specs == []
+
+    def test_timeout_clamped_to_max(self):
+        specs = shell_hooks._parse_hooks_block({
+            "post_tool_call": [
+                {"command": "/tmp/slow.sh", "timeout": 9999},
+            ],
+        })
+        assert specs[0].timeout == shell_hooks.MAX_TIMEOUT_SECONDS
+
+    def test_non_int_timeout_defaulted(self):
+        specs = shell_hooks._parse_hooks_block({
+            "post_tool_call": [
+                {"command": "/tmp/x.sh", "timeout": "thirty"},
+            ],
+        })
+        assert specs[0].timeout == shell_hooks.DEFAULT_TIMEOUT_SECONDS
+
+    def test_non_list_event_skipped(self):
+        specs = shell_hooks._parse_hooks_block({
+            "pre_tool_call": "not a list",
+        })
+        assert specs == []
+
+    def test_none_hooks_block(self):
+        assert shell_hooks._parse_hooks_block(None) == []
+        assert shell_hooks._parse_hooks_block("string") == []
+        assert shell_hooks._parse_hooks_block([]) == []
+
+    def test_non_tool_event_matcher_warns_and_drops(self, caplog):
+        """matcher: is only honored for pre/post_tool_call; must warn
+        and drop on other events so the spec reflects runtime."""
+        import logging
+        cfg = {"pre_llm_call": [{"matcher": "terminal", "command": "/bin/echo"}]}
+        with caplog.at_level(logging.WARNING, logger=shell_hooks.logger.name):
+            specs = shell_hooks._parse_hooks_block(cfg)
+        assert len(specs) == 1 and specs[0].matcher is None
+        assert any(
+            "only honored for pre_tool_call" in r.getMessage()
+            and "pre_llm_call" in r.getMessage()
+            for r in caplog.records
+        )
+
+
+# ── Idempotent registration ───────────────────────────────────────────────
+
+
+class TestIdempotentRegistration:
+    def test_double_call_registers_once(self, tmp_path, monkeypatch):
+        from hermes_cli import plugins
+
+        script = _write_script(tmp_path, "h.sh",
+                               "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        plugins._plugin_manager = plugins.PluginManager()
+
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+
+        first = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        second = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        assert len(first) == 1
+        assert second == []
+        # Only one callback on the manager
+        mgr = plugins.get_plugin_manager()
+        assert len(mgr._hooks.get("on_session_start", [])) == 1
+
+    def test_same_command_different_matcher_registers_both(
+        self, tmp_path, monkeypatch,
+    ):
+        """Same script used for different matchers under one event must
+        register both callbacks — dedupe keys on (event, matcher, command)."""
+        from hermes_cli import plugins
+
+        script = _write_script(tmp_path, "h.sh",
+                               "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        plugins._plugin_manager = plugins.PluginManager()
+
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script)},
+                    {"matcher": "web_search", "command": str(script)},
+                ],
+            },
+        }
+
+        registered = shell_hooks.register_from_config(cfg, accept_hooks=True)
+        assert len(registered) == 2
+        mgr = plugins.get_plugin_manager()
+        assert len(mgr._hooks.get("pre_tool_call", [])) == 2
+
+
+# ── Allowlist concurrency ─────────────────────────────────────────────────
+
+
+class TestAllowlistConcurrency:
+    """Regression tests for the Codex#1 finding: simultaneous
+    _record_approval() calls used to collide on a fixed tmp path and
+    silently lose entries under read-modify-write races."""
+
+    def test_parallel_record_approval_does_not_lose_entries(
+        self, tmp_path, monkeypatch,
+    ):
+        import threading
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+
+        N = 32
+        barrier = threading.Barrier(N)
+        errors: list = []
+
+        def worker(i: int) -> None:
+            try:
+                barrier.wait(timeout=5)
+                shell_hooks._record_approval(
+                    "on_session_start", f"/bin/hook-{i}.sh",
+                )
+            except Exception as exc:  # pragma: no cover
+                errors.append(exc)
+
+        threads = [threading.Thread(target=worker, args=(i,)) for i in range(N)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
+
+        assert not errors, f"worker errors: {errors}"
+
+        data = shell_hooks.load_allowlist()
+        commands = {e["command"] for e in data["approvals"]}
+        assert commands == {f"/bin/hook-{i}.sh" for i in range(N)}, (
+            f"expected all {N} entries, got {len(commands)}"
+        )
+
+    def test_non_posix_fallback_does_not_self_deadlock(
+        self, tmp_path, monkeypatch,
+    ):
+        """Regression: on platforms without fcntl, the fallback lock must
+        be separate from _registered_lock.  register_from_config holds
+        _registered_lock while calling _record_approval (via the consent
+        prompt path), so a shared non-reentrant lock would self-deadlock."""
+        import threading
+
+        monkeypatch.setattr(shell_hooks, "fcntl", None)
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+
+        completed = threading.Event()
+        errors: list = []
+
+        def target() -> None:
+            try:
+                with shell_hooks._registered_lock:
+                    shell_hooks._record_approval(
+                        "on_session_start", "/bin/x.sh",
+                    )
+                completed.set()
+            except Exception as exc:  # pragma: no cover
+                errors.append(exc)
+                completed.set()
+
+        t = threading.Thread(target=target, daemon=True)
+        t.start()
+        if not completed.wait(timeout=3.0):
+            pytest.fail(
+                "non-POSIX fallback self-deadlocked — "
+                "_locked_update_approvals must not reuse _registered_lock",
+            )
+        t.join(timeout=1.0)
+        assert not errors, f"errors: {errors}"
+        assert shell_hooks._is_allowlisted(
+            "on_session_start", "/bin/x.sh",
+        )
+
+    def test_save_allowlist_failure_logs_actionable_warning(
+        self, tmp_path, monkeypatch, caplog,
+    ):
+        """Persistence failures must log the path, errno, and
+        re-prompt consequence so "hermes keeps asking" is debuggable."""
+        import logging
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        monkeypatch.setattr(
+            shell_hooks.tempfile, "mkstemp",
+            lambda *a, **kw: (_ for _ in ()).throw(OSError(28, "No space")),
+        )
+        with caplog.at_level(logging.WARNING, logger=shell_hooks.logger.name):
+            shell_hooks.save_allowlist({"approvals": []})
+        msg = next(
+            (r.getMessage() for r in caplog.records
+             if "Failed to persist" in r.getMessage()), "",
+        )
+        assert "shell-hooks-allowlist.json" in msg
+        assert "No space" in msg
+        assert "re-prompt" in msg
+
+    def test_script_is_executable_handles_interpreter_prefix(self, tmp_path):
+        """For ``python3 hook.py`` and similar the interpreter reads
+        the script, so X_OK on the script itself is not required —
+        only R_OK.  Bare invocations still require X_OK."""
+        script = tmp_path / "hook.py"
+        script.write_text("print()\n")  # readable, NOT executable
+
+        # Interpreter prefix: R_OK is enough.
+        assert shell_hooks.script_is_executable(f"python3 {script}")
+        assert shell_hooks.script_is_executable(f"/usr/bin/env python3 {script}")
+
+        # Bare invocation on the same non-X_OK file: not runnable.
+        assert not shell_hooks.script_is_executable(str(script))
+
+        # Flip +x; bare invocation is now runnable too.
+        script.chmod(0o755)
+        assert shell_hooks.script_is_executable(str(script))
+
+    def test_command_script_path_resolution(self):
+        """Regression: ``_command_script_path`` used to return the first
+        shlex token, which picked the interpreter (``python3``, ``bash``,
+        ``/usr/bin/env``) instead of the actual script for any
+        interpreter-prefixed command.  That broke
+        ``hermes hooks doctor``'s executability check and silently
+        disabled mtime drift detection for such hooks."""
+        cases = [
+            # bare path
+            ("/path/hook.sh", "/path/hook.sh"),
+            ("/bin/echo hi", "/bin/echo"),
+            ("~/hook.sh", "~/hook.sh"),
+            ("hook.sh", "hook.sh"),
+            # interpreter prefix
+            ("python3 /path/hook.py", "/path/hook.py"),
+            ("bash /path/hook.sh", "/path/hook.sh"),
+            ("bash ~/hook.sh", "~/hook.sh"),
+            ("python3 -u /path/hook.py", "/path/hook.py"),
+            ("nice -n 10 /path/hook.sh", "/path/hook.sh"),
+            # /usr/bin/env shebang form — must find the *script*, not env
+            ("/usr/bin/env python3 /path/hook.py", "/path/hook.py"),
+            ("/usr/bin/env bash /path/hook.sh", "/path/hook.sh"),
+            # no path-like tokens → fallback to first token
+            ("my-binary --verbose", "my-binary"),
+            ("python3 -c 'print(1)'", "python3"),
+            # unparseable (unbalanced quotes) → return command as-is
+            ("python3 'unterminated", "python3 'unterminated"),
+            # empty
+            ("", ""),
+        ]
+        for command, expected in cases:
+            got = shell_hooks._command_script_path(command)
+            assert got == expected, f"{command!r} -> {got!r}, expected {expected!r}"
+
+    def test_save_allowlist_uses_unique_tmp_paths(self, tmp_path, monkeypatch):
+        """Two save_allowlist calls in flight must use distinct tmp files
+        so the loser's os.replace does not ENOENT on the winner's sweep."""
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+        p = shell_hooks.allowlist_path()
+        p.parent.mkdir(parents=True, exist_ok=True)
+
+        tmp_paths_seen: list = []
+        real_mkstemp = shell_hooks.tempfile.mkstemp
+
+        def spying_mkstemp(*args, **kwargs):
+            fd, path = real_mkstemp(*args, **kwargs)
+            tmp_paths_seen.append(path)
+            return fd, path
+
+        monkeypatch.setattr(shell_hooks.tempfile, "mkstemp", spying_mkstemp)
+
+        shell_hooks.save_allowlist({"approvals": [{"event": "a", "command": "x"}]})
+        shell_hooks.save_allowlist({"approvals": [{"event": "b", "command": "y"}]})
+
+        assert len(tmp_paths_seen) == 2
+        assert tmp_paths_seen[0] != tmp_paths_seen[1]
--- a/tests/agent/test_shell_hooks_consent.py
+++ b/tests/agent/test_shell_hooks_consent.py
@@ -0,0 +1,242 @@
+"""Consent-flow tests for the shell-hook allowlist.
+
+Covers the prompt/non-prompt decision tree: TTY vs non-TTY, and the
+three accept-hooks channels (--accept-hooks, HERMES_ACCEPT_HOOKS env,
+hooks_auto_accept: config key).
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from unittest.mock import patch
+
+import pytest
+
+from agent import shell_hooks
+
+
+@pytest.fixture(autouse=True)
+def _isolated_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "hermes_home"))
+    monkeypatch.delenv("HERMES_ACCEPT_HOOKS", raising=False)
+    shell_hooks.reset_for_tests()
+    yield
+    shell_hooks.reset_for_tests()
+
+
+def _write_hook_script(tmp_path: Path) -> Path:
+    script = tmp_path / "hook.sh"
+    script.write_text("#!/usr/bin/env bash\nprintf '{}\\n'\n")
+    script.chmod(0o755)
+    return script
+
+
+# ── TTY prompt flow ───────────────────────────────────────────────────────
+
+
+class TestTTYPromptFlow:
+    def test_first_use_prompts_and_approves(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="y"):
+            mock_stdin.isatty.return_value = True
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+        entry = shell_hooks.allowlist_entry_for("on_session_start", str(script))
+        assert entry is not None
+        assert entry["event"] == "on_session_start"
+        assert entry["command"] == str(script)
+
+    def test_first_use_prompts_and_rejects(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="n"):
+            mock_stdin.isatty.return_value = True
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert registered == []
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is None
+
+    def test_subsequent_use_does_not_prompt(self, tmp_path):
+        """After the first approval, re-registration must be silent."""
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        # First call: TTY, approved.
+        with patch("sys.stdin") as mock_stdin, patch("builtins.input", return_value="y"):
+            mock_stdin.isatty.return_value = True
+            shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+
+        # Reset registration set but keep the allowlist on disk.
+        shell_hooks.reset_for_tests()
+
+        # Second call: TTY, input() must NOT be called.
+        with patch("sys.stdin") as mock_stdin, patch(
+            "builtins.input", side_effect=AssertionError("should not prompt"),
+        ):
+            mock_stdin.isatty.return_value = True
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+
+# ── non-TTY flow ──────────────────────────────────────────────────────────
+
+
+class TestNonTTYFlow:
+    def test_no_tty_no_flag_skips_registration(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert registered == []
+
+    def test_no_tty_with_argument_flag_accepts(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=True,
+            )
+        assert len(registered) == 1
+
+    def test_no_tty_with_env_accepts(self, tmp_path, monkeypatch):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+        monkeypatch.setenv("HERMES_ACCEPT_HOOKS", "1")
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {"hooks": {"on_session_start": [{"command": str(script)}]}},
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+    def test_no_tty_with_config_accepts(self, tmp_path):
+        from hermes_cli import plugins
+
+        script = _write_hook_script(tmp_path)
+        plugins._plugin_manager = plugins.PluginManager()
+
+        with patch("sys.stdin") as mock_stdin:
+            mock_stdin.isatty.return_value = False
+            registered = shell_hooks.register_from_config(
+                {
+                    "hooks_auto_accept": True,
+                    "hooks": {"on_session_start": [{"command": str(script)}]},
+                },
+                accept_hooks=False,
+            )
+        assert len(registered) == 1
+
+
+# ── Allowlist + revoke + mtime ────────────────────────────────────────────
+
+
+class TestAllowlistOps:
+    def test_mtime_recorded_on_approval(self, tmp_path):
+        script = _write_hook_script(tmp_path)
+        shell_hooks._record_approval("on_session_start", str(script))
+
+        entry = shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        )
+        assert entry is not None
+        assert entry["script_mtime_at_approval"] is not None
+        # ISO-8601 Z-suffix
+        assert entry["script_mtime_at_approval"].endswith("Z")
+
+    def test_revoke_removes_entry(self, tmp_path):
+        script = _write_hook_script(tmp_path)
+        shell_hooks._record_approval("on_session_start", str(script))
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is not None
+
+        removed = shell_hooks.revoke(str(script))
+        assert removed == 1
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is None
+
+    def test_revoke_unknown_returns_zero(self, tmp_path):
+        assert shell_hooks.revoke(str(tmp_path / "never-approved.sh")) == 0
+
+    def test_tilde_path_approval_records_resolvable_mtime(self, tmp_path, monkeypatch):
+        """If the command uses ~ the approval must still find the file."""
+        monkeypatch.setenv("HOME", str(tmp_path))
+        target = tmp_path / "hook.sh"
+        target.write_text("#!/usr/bin/env bash\n")
+        target.chmod(0o755)
+
+        shell_hooks._record_approval("on_session_start", "~/hook.sh")
+        entry = shell_hooks.allowlist_entry_for(
+            "on_session_start", "~/hook.sh",
+        )
+        assert entry is not None
+        # Must not be None — the tilde was expanded before stat().
+        assert entry["script_mtime_at_approval"] is not None
+
+    def test_duplicate_approval_replaces_mtime(self, tmp_path):
+        """Re-approving the same pair refreshes the approval timestamp."""
+        script = _write_hook_script(tmp_path)
+        shell_hooks._record_approval("on_session_start", str(script))
+        original_entry = shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        )
+        assert original_entry is not None
+
+        # Touch the script to bump its mtime then re-approve.
+        import os
+        import time
+        new_mtime = original_entry.get("script_mtime_at_approval")
+        time.sleep(0.01)
+        os.utime(script, None)  # current time
+
+        shell_hooks._record_approval("on_session_start", str(script))
+
+        # Exactly one entry per (event, command).
+        approvals = shell_hooks.load_allowlist().get("approvals", [])
+        matching = [
+            e for e in approvals
+            if e.get("event") == "on_session_start"
+            and e.get("command") == str(script)
+        ]
+        assert len(matching) == 1
--- a/tests/agent/test_subagent_stop_hook.py
+++ b/tests/agent/test_subagent_stop_hook.py
@@ -0,0 +1,224 @@
+"""Tests for the subagent_stop hook event.
+
+Covers wire-up from tools.delegate_tool.delegate_task:
+  * fires once per child in both single-task and batch modes
+  * runs on the parent thread (no re-entrancy for hook authors)
+  * carries child_role when the agent exposes _delegate_role
+  * carries child_role=None when _delegate_role is not set (pre-M3)
+"""
+
+from __future__ import annotations
+
+import json
+import threading
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from tools.delegate_tool import delegate_task
+from hermes_cli import plugins
+
+
+def _make_parent(depth: int = 0, session_id: str = "parent-1"):
+    parent = MagicMock()
+    parent.base_url = "https://openrouter.ai/api/v1"
+    parent.api_key = "***"
+    parent.provider = "openrouter"
+    parent.api_mode = "chat_completions"
+    parent.model = "anthropic/claude-sonnet-4"
+    parent.platform = "cli"
+    parent.providers_allowed = None
+    parent.providers_ignored = None
+    parent.providers_order = None
+    parent.provider_sort = None
+    parent._session_db = None
+    parent._delegate_depth = depth
+    parent._active_children = []
+    parent._active_children_lock = threading.Lock()
+    parent._print_fn = None
+    parent.tool_progress_callback = None
+    parent.thinking_callback = None
+    parent._memory_manager = None
+    parent.session_id = session_id
+    return parent
+
+
+@pytest.fixture(autouse=True)
+def _fresh_plugin_manager():
+    """Each test gets a fresh PluginManager so hook callbacks don't
+    leak between tests."""
+    original = plugins._plugin_manager
+    plugins._plugin_manager = plugins.PluginManager()
+    yield
+    plugins._plugin_manager = original
+
+
+@pytest.fixture(autouse=True)
+def _stub_child_builder(monkeypatch):
+    """Replace _build_child_agent with a MagicMock factory so delegate_task
+    never transitively imports run_agent / openai.  Keeps the test runnable
+    in environments without heavyweight runtime deps installed."""
+    def _fake_build_child(task_index, **kwargs):
+        child = MagicMock()
+        child._delegate_saved_tool_names = []
+        child._credential_pool = None
+        return child
+
+    monkeypatch.setattr(
+        "tools.delegate_tool._build_child_agent", _fake_build_child,
+    )
+
+
+def _register_capturing_hook():
+    captured = []
+
+    def _cb(**kwargs):
+        kwargs["_thread"] = threading.current_thread()
+        captured.append(kwargs)
+
+    mgr = plugins.get_plugin_manager()
+    mgr._hooks.setdefault("subagent_stop", []).append(_cb)
+    return captured
+
+
+# ── single-task mode ──────────────────────────────────────────────────────
+
+
+class TestSingleTask:
+    def test_fires_once(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0,
+                "status": "completed",
+                "summary": "Done!",
+                "api_calls": 3,
+                "duration_seconds": 5.0,
+                "_child_role": "analyst",
+            }
+            delegate_task(goal="do X", parent_agent=_make_parent())
+
+        assert len(captured) == 1
+        payload = captured[0]
+        assert payload["child_role"] == "analyst"
+        assert payload["child_status"] == "completed"
+        assert payload["child_summary"] == "Done!"
+        assert payload["duration_ms"] == 5000
+
+    def test_fires_on_parent_thread(self):
+        captured = _register_capturing_hook()
+        main_thread = threading.current_thread()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                "_child_role": None,
+            }
+            delegate_task(goal="go", parent_agent=_make_parent())
+
+        assert captured[0]["_thread"] is main_thread
+
+    def test_payload_includes_parent_session_id(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                "_child_role": None,
+            }
+            delegate_task(
+                goal="go",
+                parent_agent=_make_parent(session_id="sess-xyz"),
+            )
+
+        assert captured[0]["parent_session_id"] == "sess-xyz"
+
+
+# ── batch mode ────────────────────────────────────────────────────────────
+
+
+class TestBatchMode:
+    def test_fires_per_child(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.side_effect = [
+                {"task_index": 0, "status": "completed",
+                 "summary": "A", "api_calls": 1, "duration_seconds": 1.0,
+                 "_child_role": "role-a"},
+                {"task_index": 1, "status": "completed",
+                 "summary": "B", "api_calls": 2, "duration_seconds": 2.0,
+                 "_child_role": "role-b"},
+                {"task_index": 2, "status": "completed",
+                 "summary": "C", "api_calls": 3, "duration_seconds": 3.0,
+                 "_child_role": "role-c"},
+            ]
+            delegate_task(
+                tasks=[
+                    {"goal": "A"}, {"goal": "B"}, {"goal": "C"},
+                ],
+                parent_agent=_make_parent(),
+            )
+
+        assert len(captured) == 3
+        roles = sorted(c["child_role"] for c in captured)
+        assert roles == ["role-a", "role-b", "role-c"]
+
+    def test_all_fires_on_parent_thread(self):
+        captured = _register_capturing_hook()
+        main_thread = threading.current_thread()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.side_effect = [
+                {"task_index": 0, "status": "completed",
+                 "summary": "A", "api_calls": 1, "duration_seconds": 1.0,
+                 "_child_role": None},
+                {"task_index": 1, "status": "completed",
+                 "summary": "B", "api_calls": 2, "duration_seconds": 2.0,
+                 "_child_role": None},
+            ]
+            delegate_task(
+                tasks=[{"goal": "A"}, {"goal": "B"}],
+                parent_agent=_make_parent(),
+            )
+
+        for payload in captured:
+            assert payload["_thread"] is main_thread
+
+
+# ── payload shape ─────────────────────────────────────────────────────────
+
+
+class TestPayloadShape:
+    def test_role_absent_becomes_none(self):
+        captured = _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                # Deliberately omit _child_role — pre-M3 shape.
+            }
+            delegate_task(goal="do X", parent_agent=_make_parent())
+
+        assert captured[0]["child_role"] is None
+
+    def test_result_does_not_leak_child_role_field(self):
+        """The internal _child_role key must be stripped before the
+        result dict is serialised to JSON."""
+        _register_capturing_hook()
+
+        with patch("tools.delegate_tool._run_single_child") as mock_run:
+            mock_run.return_value = {
+                "task_index": 0, "status": "completed",
+                "summary": "x", "api_calls": 1, "duration_seconds": 0.1,
+                "_child_role": "leaf",
+            }
+            raw = delegate_task(goal="do X", parent_agent=_make_parent())
+
+        parsed = json.loads(raw)
+        assert "results" in parsed
+        assert "_child_role" not in parsed["results"][0]
--- a/tests/hermes_cli/test_argparse_flag_propagation.py
+++ b/tests/hermes_cli/test_argparse_flag_propagation.py
@@ -91,3 +91,42 @@ class TestYoloEnvVar:
        args = parser.parse_args(["chat"])
        self._simulate_cmd_chat_yolo_check(args)
        assert os.environ.get("HERMES_YOLO_MODE") is None
+
+
+class TestAcceptHooksOnAgentSubparsers:
+    """Verify --accept-hooks is accepted at every agent-subcommand
+    position (before the subcommand, between group/subcommand, and
+    after the leaf subcommand) for gateway/cron/mcp/acp.  Regression
+    against prior behaviour where the flag only worked on the root
+    parser and `chat`, so `hermes gateway run --accept-hooks` failed
+    with `unrecognized arguments`."""
+
+    @pytest.mark.parametrize("argv", [
+        ["--accept-hooks", "gateway", "run", "--help"],
+        ["gateway", "--accept-hooks", "run", "--help"],
+        ["gateway", "run", "--accept-hooks", "--help"],
+        ["--accept-hooks", "cron", "tick", "--help"],
+        ["cron", "--accept-hooks", "tick", "--help"],
+        ["cron", "tick", "--accept-hooks", "--help"],
+        ["cron", "run", "--accept-hooks", "dummy-id", "--help"],
+        ["--accept-hooks", "mcp", "serve", "--help"],
+        ["mcp", "--accept-hooks", "serve", "--help"],
+        ["mcp", "serve", "--accept-hooks", "--help"],
+        ["acp", "--accept-hooks", "--help"],
+    ])
+    def test_accepted_at_every_position(self, argv):
+        """Invoking `hermes <argv>` must exit 0 (help) rather than
+        failing with `unrecognized arguments`."""
+        import subprocess
+        result = subprocess.run(
+            [sys.executable, "-m", "hermes_cli.main", *argv],
+            capture_output=True,
+            text=True,
+            timeout=15,
+        )
+        assert result.returncode == 0, (
+            f"argv={argv!r} returned {result.returncode}\n"
+            f"stdout: {result.stdout[:300]}\n"
+            f"stderr: {result.stderr[:300]}"
+        )
+        assert "unrecognized arguments" not in result.stderr
--- a/tests/hermes_cli/test_hooks_cli.py
+++ b/tests/hermes_cli/test_hooks_cli.py
@@ -0,0 +1,268 @@
+"""Tests for the ``hermes hooks`` CLI subcommand."""
+
+from __future__ import annotations
+
+import io
+import json
+import sys
+from contextlib import redirect_stdout
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+from agent import shell_hooks
+from hermes_cli import hooks as hooks_cli
+
+
+@pytest.fixture(autouse=True)
+def _isolated_home(tmp_path, monkeypatch):
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path / "home"))
+    monkeypatch.delenv("HERMES_ACCEPT_HOOKS", raising=False)
+    shell_hooks.reset_for_tests()
+    yield
+    shell_hooks.reset_for_tests()
+
+
+def _hook_script(tmp_path: Path, body: str, name: str = "hook.sh") -> Path:
+    p = tmp_path / name
+    p.write_text(body)
+    p.chmod(0o755)
+    return p
+
+
+def _run(sub_args: SimpleNamespace) -> str:
+    """Capture stdout for a hooks_command invocation."""
+    buf = io.StringIO()
+    with redirect_stdout(buf):
+        hooks_cli.hooks_command(sub_args)
+    return buf.getvalue()
+
+
+# ── list ──────────────────────────────────────────────────────────────────
+
+
+class TestHooksList:
+    def test_empty_config(self, tmp_path):
+        with patch("hermes_cli.config.load_config", return_value={}):
+            out = _run(SimpleNamespace(hooks_action="list"))
+        assert "No shell hooks configured" in out
+
+    def test_shows_configured_and_consent_status(self, tmp_path):
+        script = _hook_script(
+            tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n",
+        )
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script), "timeout": 30},
+                ],
+                "on_session_start": [
+                    {"command": str(script)},
+                ],
+            }
+        }
+
+        # Approve one of the two so we can see both states in the output
+        shell_hooks._record_approval("pre_tool_call", str(script))
+
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="list"))
+
+        assert "[pre_tool_call]" in out
+        assert "[on_session_start]" in out
+        assert "✓ allowed" in out
+        assert "✗ not allowlisted" in out
+        assert str(script) in out
+
+
+# ── test ──────────────────────────────────────────────────────────────────
+
+
+class TestHooksTest:
+    def test_synthetic_payload_matches_production_shape(self, tmp_path):
+        """`hermes hooks test` must feed the script stdin in the same
+        shape invoke_hook() would at runtime.  Prior to this fix,
+        run_once bypassed _serialize_payload and the two paths diverged —
+        scripts tested with `hermes hooks test` saw different top-level
+        keys than at runtime, silently breaking in production."""
+        capture = tmp_path / "captured.json"
+        script = _hook_script(
+            tmp_path,
+            f"#!/usr/bin/env bash\ncat - > {capture}\nprintf '{{}}\\n'\n",
+        )
+        cfg = {"hooks": {"subagent_stop": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            _run(SimpleNamespace(
+                hooks_action="test", event="subagent_stop",
+                for_tool=None, payload_file=None,
+            ))
+
+        seen = json.loads(capture.read_text())
+        # Same top-level keys _serialize_payload produces at runtime
+        assert set(seen.keys()) == {
+            "hook_event_name", "tool_name", "tool_input",
+            "session_id", "cwd", "extra",
+        }
+        # parent_session_id was routed to top-level session_id (matches runtime)
+        assert seen["session_id"] == "parent-sess"
+        assert "parent_session_id" not in seen["extra"]
+        # subagent_stop has no tool, so tool_name / tool_input are null
+        assert seen["tool_name"] is None
+        assert seen["tool_input"] is None
+
+    def test_fires_real_subprocess_and_parses_block(self, tmp_path):
+        block_script = _hook_script(
+            tmp_path,
+            "#!/usr/bin/env bash\n"
+            'printf \'{"decision": "block", "reason": "nope"}\\n\'\n',
+            name="block.sh",
+        )
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(block_script)},
+                ],
+            },
+        }
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="pre_tool_call",
+                for_tool="terminal", payload_file=None,
+            ))
+
+        # Parsed block appears in output
+        assert '"action": "block"' in out
+        assert '"message": "nope"' in out
+
+    def test_for_tool_matcher_filters(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        cfg = {
+            "hooks": {
+                "pre_tool_call": [
+                    {"matcher": "terminal", "command": str(script)},
+                ],
+            }
+        }
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="pre_tool_call",
+                for_tool="web_search", payload_file=None,
+            ))
+        assert "No shell hooks" in out
+
+    def test_unknown_event(self):
+        with patch("hermes_cli.config.load_config", return_value={}):
+            out = _run(SimpleNamespace(
+                hooks_action="test", event="bogus_event",
+                for_tool=None, payload_file=None,
+            ))
+        assert "Unknown event" in out
+
+
+# ── revoke ────────────────────────────────────────────────────────────────
+
+
+class TestHooksRevoke:
+    def test_revoke_removes_entry(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\n")
+        shell_hooks._record_approval("on_session_start", str(script))
+
+        out = _run(SimpleNamespace(hooks_action="revoke", command=str(script)))
+        assert "Removed 1" in out
+        assert shell_hooks.allowlist_entry_for(
+            "on_session_start", str(script),
+        ) is None
+
+    def test_revoke_unknown(self, tmp_path):
+        out = _run(SimpleNamespace(
+            hooks_action="revoke", command=str(tmp_path / "never.sh"),
+        ))
+        assert "No allowlist entry" in out
+
+
+# ── doctor ────────────────────────────────────────────────────────────────
+
+
+class TestHooksDoctor:
+    def test_flags_missing_exec_bit(self, tmp_path):
+        script = tmp_path / "hook.sh"
+        script.write_text("#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        # No chmod — intentionally not executable
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not executable" in out.lower()
+
+    def test_flags_unallowlisted(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not allowlisted" in out.lower()
+
+    def test_flags_invalid_json(self, tmp_path):
+        script = _hook_script(
+            tmp_path,
+            "#!/usr/bin/env bash\necho 'not json!'\n",
+        )
+        shell_hooks._record_approval("on_session_start", str(script))
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "not valid JSON" in out
+
+    def test_flags_mtime_drift(self, tmp_path, monkeypatch):
+        """Allowlist with older mtime than current -> drift warning."""
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+
+        # Manually stash an allowlist entry with an old mtime
+        from agent.shell_hooks import allowlist_path
+        allowlist_path().parent.mkdir(parents=True, exist_ok=True)
+        allowlist_path().write_text(json.dumps({
+            "approvals": [
+                {
+                    "event": "on_session_start",
+                    "command": str(script),
+                    "approved_at": "2000-01-01T00:00:00Z",
+                    "script_mtime_at_approval": "2000-01-01T00:00:00Z",
+                }
+            ]
+        }))
+
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "modified since approval" in out
+
+    def test_clean_script_runs(self, tmp_path):
+        script = _hook_script(tmp_path, "#!/usr/bin/env bash\nprintf '{}\\n'\n")
+        shell_hooks._record_approval("on_session_start", str(script))
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+        assert "All shell hooks look healthy" in out
+
+    def test_unallowlisted_script_is_not_executed(self, tmp_path):
+        """Regression for M4: `hermes hooks doctor` used to run every
+        listed script against a synthetic payload as part of its JSON
+        smoke test, which contradicted the documented workflow of
+        "spot newly-added hooks *before they register*".  An un-allowlisted
+        script must not be executed during `doctor`."""
+        sentinel = tmp_path / "executed"
+        # Script would touch the sentinel if executed; we assert it wasn't.
+        script = _hook_script(
+            tmp_path,
+            f"#!/usr/bin/env bash\ntouch {sentinel}\nprintf '{{}}\\n'\n",
+        )
+        cfg = {"hooks": {"on_session_start": [{"command": str(script)}]}}
+        with patch("hermes_cli.config.load_config", return_value=cfg):
+            out = _run(SimpleNamespace(hooks_action="doctor"))
+
+        assert not sentinel.exists(), (
+            "doctor executed an un-allowlisted script — "
+            "M4 gate regressed"
+        )
+        assert "not allowlisted" in out.lower()
+        assert "skipped JSON smoke test" in out