feat(hooks): spill oversized hook-injected context to disk

Port from openai/codex#21069 ("Spill large hook outputs from context"). Both shell hooks and Python plugins can return {"context": "..."} from pre_llm_call, which gets appended to the current turn's user message on every subsequent API call. A plugin that accidentally (or intentionally) emits a large blob inflates every turn and blows out the prompt cache prefix. This adds a per-hook context cap with disk spill: - tools/hook_output_spill.py: shared helper that writes oversized context to $HERMES_HOME/hook_outputs/<session_id>/<uuid>.txt and returns a head/tail preview plus the saved path. - run_agent.py: apply the cap at the pre_llm_call aggregation site, covering both Python plugins and shell hooks (which also flow through invoke_hook). - agent/shell_hooks.py: reserve output_spill as a sub-key under hooks: so the config is schema-friendly and doesn't emit "unknown hook event" warnings. - Docs: document the cap and config in build-a-hermes-plugin.md. Config (all optional, behaviour-preserving when absent): hooks: output_spill: enabled: true # default: true max_chars: 10000 # default preview_head: 500 # default preview_tail: 500 # default directory: null # default: $HERMES_HOME/hook_outputs Never raises — spill write failures fall back to a preview-only string so the model still gets bounded context even if the disk is full. Tests: 14 new unit tests in tests/tools/test_hook_output_spill.py; existing tests/agent/test_shell_hooks.py (49 tests) and tests/hermes_cli/test_plugins.py (62 tests) still pass. E2E validated with an isolated HERMES_HOME. Source: https://github.com/openai/codex/pull/21069
2026-05-06 10:47:12 +08:00 · 2026-05-05 17:06:35 -07:00
5 changed files with 488 additions and 2 deletions
--- a/agent/shell_hooks.py
+++ b/agent/shell_hooks.py
@@ -252,6 +252,11 @@ def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
    specs: List[ShellHookSpec] = []

    for event_name, entries in hooks_cfg.items():
+        # Reserved sub-keys that aren't event names — skip silently. These
+        # are config sub-sections nested under `hooks:` for related
+        # functionality (e.g. output-spill budgets).
+        if event_name in ("output_spill",):
+            continue
        if event_name not in VALID_HOOKS:
            suggestion = difflib.get_close_matches(
                str(event_name), VALID_HOOKS, n=1, cutoff=0.6,
--- a/run_agent.py
+++ b/run_agent.py
@@ -10883,11 +10883,37 @@ class AIAgent:
                sender_id=getattr(self, "_user_id", None) or "",
            )
            _ctx_parts: list[str] = []
+            # Spill oversized per-hook context to disk so a runaway plugin
+            # can't inflate every subsequent turn's prompt. Ported from
+            # openai/codex PR #21069 ("Spill large hook outputs from context").
+            try:
+                from tools.hook_output_spill import (
+                    get_spill_config as _spill_cfg,
+                    spill_if_oversized as _spill_if_oversized,
+                )
+                _spill_config_cached = _spill_cfg()
+            except Exception:
+                _spill_if_oversized = None  # type: ignore[assignment]
+                _spill_config_cached = None
            for r in _pre_results:
+                _piece: str = ""
                if isinstance(r, dict) and r.get("context"):
-                    _ctx_parts.append(str(r["context"]))
+                    _piece = str(r["context"])
                elif isinstance(r, str) and r.strip():
-                    _ctx_parts.append(r)
+                    _piece = r
+                else:
+                    continue
+                if _spill_if_oversized is not None:
+                    try:
+                        _piece = _spill_if_oversized(
+                            _piece,
+                            session_id=self.session_id,
+                            source="plugin hook",
+                            config=_spill_config_cached,
+                        )
+                    except Exception as _spill_exc:
+                        logger.warning("hook context spill failed: %s", _spill_exc)
+                _ctx_parts.append(_piece)
            if _ctx_parts:
                _plugin_user_context = "\n\n".join(_ctx_parts)
        except Exception as exc:
--- a/tests/tools/test_hook_output_spill.py
+++ b/tests/tools/test_hook_output_spill.py
@@ -0,0 +1,205 @@
+"""Tests for tools.hook_output_spill."""
+
+from __future__ import annotations
+
+import os
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from tools import hook_output_spill as hos
+
+
+class GetSpillConfigTests(unittest.TestCase):
+    def test_defaults_when_no_config(self):
+        with patch.object(hos, "load_config", create=True, return_value={}):
+            # load_config is resolved at call time via local import;
+            # patch the module's source instead.
+            pass
+        with patch("hermes_cli.config.load_config", return_value={}):
+            cfg = hos.get_spill_config()
+        self.assertTrue(cfg["enabled"])
+        self.assertEqual(cfg["max_chars"], hos.DEFAULT_MAX_CHARS)
+        self.assertEqual(cfg["preview_head"], hos.DEFAULT_PREVIEW_HEAD)
+        self.assertEqual(cfg["preview_tail"], hos.DEFAULT_PREVIEW_TAIL)
+        self.assertIsNone(cfg["directory"])
+
+    def test_user_overrides_are_respected(self):
+        user_cfg = {
+            "hooks": {
+                "output_spill": {
+                    "enabled": False,
+                    "max_chars": 500,
+                    "preview_head": 25,
+                    "preview_tail": 10,
+                    "directory": "/tmp/spill-test",
+                }
+            }
+        }
+        with patch("hermes_cli.config.load_config", return_value=user_cfg):
+            cfg = hos.get_spill_config()
+        self.assertFalse(cfg["enabled"])
+        self.assertEqual(cfg["max_chars"], 500)
+        self.assertEqual(cfg["preview_head"], 25)
+        self.assertEqual(cfg["preview_tail"], 10)
+        self.assertEqual(cfg["directory"], "/tmp/spill-test")
+
+    def test_bad_values_fall_back_to_defaults(self):
+        user_cfg = {
+            "hooks": {
+                "output_spill": {
+                    "max_chars": "not-a-number",
+                    "preview_head": -100,
+                    "preview_tail": None,
+                    "directory": 123,  # not a string
+                }
+            }
+        }
+        with patch("hermes_cli.config.load_config", return_value=user_cfg):
+            cfg = hos.get_spill_config()
+        self.assertEqual(cfg["max_chars"], hos.DEFAULT_MAX_CHARS)
+        self.assertEqual(cfg["preview_head"], hos.DEFAULT_PREVIEW_HEAD)
+        self.assertEqual(cfg["preview_tail"], hos.DEFAULT_PREVIEW_TAIL)
+        self.assertIsNone(cfg["directory"])
+
+    def test_load_config_exception_is_swallowed(self):
+        with patch("hermes_cli.config.load_config", side_effect=RuntimeError("bad")):
+            cfg = hos.get_spill_config()
+        self.assertEqual(cfg["max_chars"], hos.DEFAULT_MAX_CHARS)
+        self.assertTrue(cfg["enabled"])
+
+
+class SpillIfOversizedTests(unittest.TestCase):
+    def setUp(self):
+        self.tmpdir = tempfile.mkdtemp(prefix="hermes-spill-test-")
+
+    def tearDown(self):
+        import shutil
+        shutil.rmtree(self.tmpdir, ignore_errors=True)
+
+    def _cfg(self, **overrides):
+        base = {
+            "enabled": True,
+            "max_chars": 100,
+            "preview_head": 20,
+            "preview_tail": 20,
+            "directory": self.tmpdir,
+        }
+        base.update(overrides)
+        return base
+
+    def test_empty_and_none_are_noops(self):
+        self.assertEqual(hos.spill_if_oversized("", config=self._cfg()), "")
+        self.assertEqual(hos.spill_if_oversized(None, config=self._cfg()), "")
+
+    def test_text_under_cap_is_unchanged(self):
+        small = "x" * 50
+        self.assertEqual(hos.spill_if_oversized(small, config=self._cfg()), small)
+
+    def test_disabled_bypasses_spill_even_if_oversized(self):
+        big = "y" * 10_000
+        cfg = self._cfg(enabled=False)
+        self.assertEqual(hos.spill_if_oversized(big, config=cfg), big)
+        # No spill files written.
+        self.assertEqual(list(Path(self.tmpdir).rglob("*")), [])
+
+    def test_oversized_writes_spill_and_returns_preview(self):
+        big = "A" * 60 + "B" * 60 + "C" * 60  # 180 chars > cap 100
+        result = hos.spill_if_oversized(
+            big,
+            session_id="sess-123",
+            source="plugin hook",
+            config=self._cfg(),
+        )
+        # Preview contains the header, head, and tail markers.
+        self.assertIn("plugin hook output truncated — 180 chars", result)
+        self.assertIn("--- head ---", result)
+        self.assertIn("--- tail ---", result)
+        # Head is the first 20 chars, tail is the last 20.
+        self.assertIn("A" * 20, result)
+        self.assertIn("C" * 20, result)
+        # Spill file exists under the session subdir and has full content.
+        session_dir = Path(self.tmpdir) / "sess-123"
+        self.assertTrue(session_dir.is_dir())
+        files = list(session_dir.iterdir())
+        self.assertEqual(len(files), 1)
+        self.assertEqual(files[0].read_text().rstrip("\n"), big)
+        # Preview references the spill path.
+        self.assertIn(str(files[0]), result)
+
+    def test_missing_session_id_uses_no_session_segment(self):
+        big = "z" * 500
+        cfg = self._cfg(max_chars=10)
+        hos.spill_if_oversized(big, session_id=None, config=cfg)
+        self.assertTrue((Path(self.tmpdir) / "no-session").is_dir())
+
+    def test_session_id_with_path_separators_is_sanitised(self):
+        big = "q" * 500
+        cfg = self._cfg(max_chars=10)
+        # An attacker-style session id with .. and / must not escape the
+        # base directory.
+        hos.spill_if_oversized(big, session_id="../../etc/passwd", config=cfg)
+        # Nothing leaks outside self.tmpdir.
+        self.assertFalse(Path("/etc/passwd-hermes-test").exists())
+        # A sanitised path should exist under tmpdir.
+        entries = list(Path(self.tmpdir).rglob("*.txt"))
+        self.assertEqual(len(entries), 1)
+        # The path should be inside tmpdir.
+        self.assertTrue(str(entries[0]).startswith(self.tmpdir))
+
+    def test_spill_write_failure_falls_back_to_preview_only(self):
+        big = "w" * 500
+        # Point at a path that cannot be created (a file, not a dir).
+        existing_file = os.path.join(self.tmpdir, "not-a-dir")
+        with open(existing_file, "w") as f:
+            f.write("blocker")
+        cfg = self._cfg(max_chars=10, directory=existing_file)
+        result = hos.spill_if_oversized(big, session_id="x", config=cfg)
+        # Preview still returned, but with failure notice.
+        self.assertIn("spill write failed", result)
+        self.assertIn("--- head ---", result)
+        # Content still bounded (not the full 500 chars).
+        self.assertLess(len(result), 500)
+
+    def test_preview_head_only_no_tail(self):
+        big = "a" * 1000
+        cfg = self._cfg(max_chars=10, preview_head=30, preview_tail=0)
+        result = hos.spill_if_oversized(big, session_id="s", config=cfg)
+        self.assertIn("--- head ---", result)
+        self.assertNotIn("--- tail ---", result)
+
+    def test_non_string_input_coerced(self):
+        cfg = self._cfg(max_chars=5)
+
+        class StrFriendly:
+            def __str__(self):
+                return "stringified-" + "x" * 200
+
+        result = hos.spill_if_oversized(StrFriendly(), session_id="s", config=cfg)
+        self.assertIn("truncated", result)
+
+    def test_default_directory_uses_hermes_home(self):
+        """When no directory override, spill under HERMES_HOME/hook_outputs."""
+        test_home = tempfile.mkdtemp(prefix="hermes-home-")
+        try:
+            with patch.dict(os.environ, {"HERMES_HOME": test_home}):
+                # Also patch get_hermes_home to the env var to mirror production.
+                cfg = self._cfg(directory=None, max_chars=5)
+                hos.spill_if_oversized("x" * 200, session_id="sess", config=cfg)
+            # Spill directory exists somewhere under test_home OR default
+            # ~/.hermes/hook_outputs depending on get_hermes_home behaviour.
+            candidates = [
+                Path(test_home) / "hook_outputs" / "sess",
+                Path(os.path.expanduser("~/.hermes/hook_outputs/sess")),
+            ]
+            # At least one of the candidate dirs now exists and has a file.
+            existing = [c for c in candidates if c.is_dir() and list(c.iterdir())]
+            self.assertTrue(existing, f"No spill dir found in {candidates}")
+        finally:
+            import shutil
+            shutil.rmtree(test_home, ignore_errors=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tools/hook_output_spill.py
+++ b/tools/hook_output_spill.py
@@ -0,0 +1,236 @@
+"""Spill oversized hook-injected context to disk with a preview placeholder.
+
+Ported from openai/codex PR #21069 (``Spill large hook outputs from context``).
+
+Background
+----------
+Both shell hooks (``agent/shell_hooks.py``) and Python plugins
+(``pre_llm_call`` hook in ``run_agent.py``) can return ``{"context": "..."}``
+which gets concatenated into the current turn's user message on EVERY
+subsequent API call. If a hook emits a large blob (e.g. a debug dump, a
+full file, or a runaway prompt-engineering script), that blob inflates
+every turn of the session and blows out the prompt cache prefix the
+moment it's appended.
+
+This mirrors what Codex does for its ``PreToolUse``/``Stop``/feedback
+hooks: once the injected text exceeds a configured budget, write the
+full content to a per-session directory on disk and replace the in-prompt
+payload with a head/tail preview plus the saved path. The model can still
+inspect the full content via ``read_file`` or ``terminal`` if it needs to.
+
+Config (``config.yaml``)::
+
+    hooks:
+      output_spill:
+        enabled: true          # default: true; set false to disable spilling
+        max_chars: 10000       # default; context above this is spilled
+        preview_head: 500      # chars shown at the start of the preview
+        preview_tail: 500      # chars shown at the end of the preview
+        directory: null        # default: <HERMES_HOME>/hook_outputs
+
+Design invariants
+-----------------
+* Behaviour-preserving when ``enabled: false`` or when content is under
+  the cap — return the input string unchanged.
+* Never raises. Any I/O error (disk full, permission denied, missing
+  HERMES_HOME, etc.) falls back to a byte-length truncation with an
+  in-prompt notice — the hook context still reaches the model, just
+  bounded in size.
+* Spill files are grouped by session so a ``/new`` session doesn't grow
+  them forever in one directory.
+"""
+
+from __future__ import annotations
+
+import logging
+import os
+import uuid
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+DEFAULT_MAX_CHARS = 10_000
+DEFAULT_PREVIEW_HEAD = 500
+DEFAULT_PREVIEW_TAIL = 500
+DEFAULT_ENABLED = True
+
+
+def _coerce_positive_int(value: Any, default: int) -> int:
+    try:
+        iv = int(value)
+    except (TypeError, ValueError):
+        return default
+    if iv <= 0:
+        return default
+    return iv
+
+
+def _coerce_non_negative_int(value: Any, default: int) -> int:
+    """Like ``_coerce_positive_int`` but allows zero (e.g. empty tail)."""
+    try:
+        iv = int(value)
+    except (TypeError, ValueError):
+        return default
+    if iv < 0:
+        return default
+    return iv
+
+
+def get_spill_config() -> Dict[str, Any]:
+    """Return resolved hook output-spill config. Never raises."""
+    section: Dict[str, Any] = {}
+    try:
+        from hermes_cli.config import load_config
+        cfg = load_config() or {}
+        hooks = cfg.get("hooks") if isinstance(cfg, dict) else None
+        if isinstance(hooks, dict):
+            sub = hooks.get("output_spill")
+            if isinstance(sub, dict):
+                section = sub
+    except Exception:
+        section = {}
+
+    enabled_raw = section.get("enabled", DEFAULT_ENABLED)
+    enabled = bool(enabled_raw) if enabled_raw is not None else DEFAULT_ENABLED
+
+    directory = section.get("directory")
+    if directory is not None and not isinstance(directory, str):
+        directory = None
+
+    return {
+        "enabled": enabled,
+        "max_chars": _coerce_positive_int(section.get("max_chars"), DEFAULT_MAX_CHARS),
+        "preview_head": _coerce_non_negative_int(
+            section.get("preview_head"), DEFAULT_PREVIEW_HEAD
+        ),
+        "preview_tail": _coerce_non_negative_int(
+            section.get("preview_tail"), DEFAULT_PREVIEW_TAIL
+        ),
+        "directory": directory,
+    }
+
+
+def _resolve_spill_dir(directory_override: Optional[str], session_id: Optional[str]) -> Path:
+    """Return the directory where spill files for this session live."""
+    if directory_override:
+        base = Path(os.path.expanduser(directory_override))
+    else:
+        try:
+            from hermes_constants import get_hermes_home
+            base = Path(get_hermes_home()) / "hook_outputs"
+        except Exception:
+            # Last-resort fallback: HERMES_HOME env var, then ~/.hermes
+            home = os.environ.get("HERMES_HOME") or os.path.expanduser("~/.hermes")
+            base = Path(home) / "hook_outputs"
+
+    # Group by session so spills are contained per conversation.
+    session_segment = session_id or "no-session"
+    # Defensive: strip path separators so a weird session id can't
+    # escape the directory.
+    session_segment = session_segment.replace("/", "_").replace("\\", "_").replace("..", "_")
+    return base / session_segment
+
+
+def _build_preview(
+    text: str,
+    head: int,
+    tail: int,
+    saved_path: Optional[str],
+    *,
+    source: str,
+) -> str:
+    """Assemble the in-prompt preview with head/tail and saved-path footer."""
+    total = len(text)
+    head_chunk = text[:head] if head > 0 else ""
+    tail_chunk = text[-tail:] if tail > 0 and total > head else ""
+
+    parts = [
+        f"[{source} output truncated — {total:,} chars; full content "
+        + (f"saved to {saved_path}]" if saved_path else "unavailable — spill write failed]"),
+    ]
+    if head_chunk:
+        parts.append("--- head ---")
+        parts.append(head_chunk)
+    if tail_chunk:
+        parts.append("--- tail ---")
+        parts.append(tail_chunk)
+    return "\n".join(parts)
+
+
+def spill_if_oversized(
+    text: str,
+    *,
+    session_id: Optional[str] = None,
+    source: str = "hook",
+    config: Optional[Dict[str, Any]] = None,
+) -> str:
+    """Spill ``text`` to disk if it exceeds the configured cap.
+
+    Returns either ``text`` unchanged (when under the cap, disabled, or
+    empty) or a preview string with a filesystem path pointing at the
+    full content.
+
+    Parameters
+    ----------
+    text:
+        The raw injected-context string from a hook. Non-string inputs
+        are coerced with ``str()``.
+    session_id:
+        Used to group spill files by conversation. Falls back to
+        ``"no-session"`` if missing.
+    source:
+        Human-readable label used in the preview header (``"hook"``,
+        ``"plugin hook"``, ``"shell hook"``, etc.). Free-form.
+    config:
+        Optional override for tests; normally resolved from
+        ``config.yaml``.
+    """
+    if text is None:
+        return ""
+    if not isinstance(text, str):
+        try:
+            text = str(text)
+        except Exception:
+            return ""
+
+    cfg = config if config is not None else get_spill_config()
+    if not cfg.get("enabled", True):
+        return text
+
+    max_chars = int(cfg.get("max_chars") or DEFAULT_MAX_CHARS)
+    if len(text) <= max_chars:
+        return text
+
+    head = int(cfg.get("preview_head") or 0)
+    tail = int(cfg.get("preview_tail") or 0)
+    directory_override = cfg.get("directory")
+
+    # Try to write the spill file. If that fails we still need to return
+    # something bounded — never let a disk failure blow up the turn.
+    saved_path: Optional[str] = None
+    try:
+        spill_dir = _resolve_spill_dir(directory_override, session_id)
+        spill_dir.mkdir(parents=True, exist_ok=True)
+        filename = f"{uuid.uuid4().hex}.txt"
+        spill_path = spill_dir / filename
+        # Write the raw text plus a trailing newline so tail readers
+        # (``tail -f``, editors) don't report "missing newline".
+        spill_path.write_text(text if text.endswith("\n") else text + "\n", encoding="utf-8")
+        saved_path = str(spill_path)
+    except Exception as exc:
+        logger.warning("hook output spill failed: %s", exc)
+        saved_path = None
+
+    return _build_preview(text, head, tail, saved_path, source=source)
+
+
+__all__ = [
+    "DEFAULT_MAX_CHARS",
+    "DEFAULT_PREVIEW_HEAD",
+    "DEFAULT_PREVIEW_TAIL",
+    "DEFAULT_ENABLED",
+    "get_spill_config",
+    "spill_if_oversized",
+]
--- a/website/docs/guides/build-a-hermes-plugin.md
+++ b/website/docs/guides/build-a-hermes-plugin.md
@@ -461,6 +461,20 @@ return None

 Any non-None, non-empty return with a `"context"` key (or a plain non-empty string) is collected and appended to the user message for the current turn.

+#### Oversized-context spill
+
+Per-hook context is capped at `10,000` characters by default. Anything above the cap is written to `$HERMES_HOME/hook_outputs/<session_id>/<uuid>.txt` and replaced with a head/tail preview plus the saved path. The model can read the full content via `read_file` or `terminal` if it genuinely needs it. This keeps a runaway plugin from inflating every subsequent turn's prompt and blowing out the prompt cache prefix. Tune in `config.yaml`:
+
+```yaml
+hooks:
+  output_spill:
+    enabled: true          # default: true
+    max_chars: 10000       # default; set higher to opt out of spilling
+    preview_head: 500      # chars shown at the top of the preview
+    preview_tail: 500      # chars shown at the bottom of the preview
+    # directory: null      # default: $HERMES_HOME/hook_outputs
+```
+
 #### How injection works

 Injected context is appended to the **user message**, not the system prompt. This is a deliberate design choice: