Compare commits

...

1 Commits

Author SHA1 Message Date
hermes-agent bot
b6c53ef0be feat(hooks): spill oversized hook-injected context to disk
Port from openai/codex#21069 ("Spill large hook outputs from context").

Both shell hooks and Python plugins can return {"context": "..."} from
pre_llm_call, which gets appended to the current turn's user message on
every subsequent API call. A plugin that accidentally (or intentionally)
emits a large blob inflates every turn and blows out the prompt cache
prefix.

This adds a per-hook context cap with disk spill:

- tools/hook_output_spill.py: shared helper that writes oversized
  context to $HERMES_HOME/hook_outputs/<session_id>/<uuid>.txt and
  returns a head/tail preview plus the saved path.
- run_agent.py: apply the cap at the pre_llm_call aggregation site,
  covering both Python plugins and shell hooks (which also flow through
  invoke_hook).
- agent/shell_hooks.py: reserve output_spill as a sub-key under hooks:
  so the config is schema-friendly and doesn't emit
  "unknown hook event" warnings.
- Docs: document the cap and config in build-a-hermes-plugin.md.

Config (all optional, behaviour-preserving when absent):

    hooks:
      output_spill:
        enabled: true          # default: true
        max_chars: 10000       # default
        preview_head: 500      # default
        preview_tail: 500      # default
        directory: null        # default: $HERMES_HOME/hook_outputs

Never raises — spill write failures fall back to a preview-only string
so the model still gets bounded context even if the disk is full.

Tests: 14 new unit tests in tests/tools/test_hook_output_spill.py;
existing tests/agent/test_shell_hooks.py (49 tests) and
tests/hermes_cli/test_plugins.py (62 tests) still pass. E2E validated
with an isolated HERMES_HOME.

Source: https://github.com/openai/codex/pull/21069
2026-05-05 17:06:35 -07:00
5 changed files with 488 additions and 2 deletions

View File

@@ -252,6 +252,11 @@ def _parse_hooks_block(hooks_cfg: Any) -> List[ShellHookSpec]:
specs: List[ShellHookSpec] = []
for event_name, entries in hooks_cfg.items():
# Reserved sub-keys that aren't event names — skip silently. These
# are config sub-sections nested under `hooks:` for related
# functionality (e.g. output-spill budgets).
if event_name in ("output_spill",):
continue
if event_name not in VALID_HOOKS:
suggestion = difflib.get_close_matches(
str(event_name), VALID_HOOKS, n=1, cutoff=0.6,

View File

@@ -10883,11 +10883,37 @@ class AIAgent:
sender_id=getattr(self, "_user_id", None) or "",
)
_ctx_parts: list[str] = []
# Spill oversized per-hook context to disk so a runaway plugin
# can't inflate every subsequent turn's prompt. Ported from
# openai/codex PR #21069 ("Spill large hook outputs from context").
try:
from tools.hook_output_spill import (
get_spill_config as _spill_cfg,
spill_if_oversized as _spill_if_oversized,
)
_spill_config_cached = _spill_cfg()
except Exception:
_spill_if_oversized = None # type: ignore[assignment]
_spill_config_cached = None
for r in _pre_results:
_piece: str = ""
if isinstance(r, dict) and r.get("context"):
_ctx_parts.append(str(r["context"]))
_piece = str(r["context"])
elif isinstance(r, str) and r.strip():
_ctx_parts.append(r)
_piece = r
else:
continue
if _spill_if_oversized is not None:
try:
_piece = _spill_if_oversized(
_piece,
session_id=self.session_id,
source="plugin hook",
config=_spill_config_cached,
)
except Exception as _spill_exc:
logger.warning("hook context spill failed: %s", _spill_exc)
_ctx_parts.append(_piece)
if _ctx_parts:
_plugin_user_context = "\n\n".join(_ctx_parts)
except Exception as exc:

View File

@@ -0,0 +1,205 @@
"""Tests for tools.hook_output_spill."""
from __future__ import annotations
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from tools import hook_output_spill as hos
class GetSpillConfigTests(unittest.TestCase):
def test_defaults_when_no_config(self):
with patch.object(hos, "load_config", create=True, return_value={}):
# load_config is resolved at call time via local import;
# patch the module's source instead.
pass
with patch("hermes_cli.config.load_config", return_value={}):
cfg = hos.get_spill_config()
self.assertTrue(cfg["enabled"])
self.assertEqual(cfg["max_chars"], hos.DEFAULT_MAX_CHARS)
self.assertEqual(cfg["preview_head"], hos.DEFAULT_PREVIEW_HEAD)
self.assertEqual(cfg["preview_tail"], hos.DEFAULT_PREVIEW_TAIL)
self.assertIsNone(cfg["directory"])
def test_user_overrides_are_respected(self):
user_cfg = {
"hooks": {
"output_spill": {
"enabled": False,
"max_chars": 500,
"preview_head": 25,
"preview_tail": 10,
"directory": "/tmp/spill-test",
}
}
}
with patch("hermes_cli.config.load_config", return_value=user_cfg):
cfg = hos.get_spill_config()
self.assertFalse(cfg["enabled"])
self.assertEqual(cfg["max_chars"], 500)
self.assertEqual(cfg["preview_head"], 25)
self.assertEqual(cfg["preview_tail"], 10)
self.assertEqual(cfg["directory"], "/tmp/spill-test")
def test_bad_values_fall_back_to_defaults(self):
user_cfg = {
"hooks": {
"output_spill": {
"max_chars": "not-a-number",
"preview_head": -100,
"preview_tail": None,
"directory": 123, # not a string
}
}
}
with patch("hermes_cli.config.load_config", return_value=user_cfg):
cfg = hos.get_spill_config()
self.assertEqual(cfg["max_chars"], hos.DEFAULT_MAX_CHARS)
self.assertEqual(cfg["preview_head"], hos.DEFAULT_PREVIEW_HEAD)
self.assertEqual(cfg["preview_tail"], hos.DEFAULT_PREVIEW_TAIL)
self.assertIsNone(cfg["directory"])
def test_load_config_exception_is_swallowed(self):
with patch("hermes_cli.config.load_config", side_effect=RuntimeError("bad")):
cfg = hos.get_spill_config()
self.assertEqual(cfg["max_chars"], hos.DEFAULT_MAX_CHARS)
self.assertTrue(cfg["enabled"])
class SpillIfOversizedTests(unittest.TestCase):
def setUp(self):
self.tmpdir = tempfile.mkdtemp(prefix="hermes-spill-test-")
def tearDown(self):
import shutil
shutil.rmtree(self.tmpdir, ignore_errors=True)
def _cfg(self, **overrides):
base = {
"enabled": True,
"max_chars": 100,
"preview_head": 20,
"preview_tail": 20,
"directory": self.tmpdir,
}
base.update(overrides)
return base
def test_empty_and_none_are_noops(self):
self.assertEqual(hos.spill_if_oversized("", config=self._cfg()), "")
self.assertEqual(hos.spill_if_oversized(None, config=self._cfg()), "")
def test_text_under_cap_is_unchanged(self):
small = "x" * 50
self.assertEqual(hos.spill_if_oversized(small, config=self._cfg()), small)
def test_disabled_bypasses_spill_even_if_oversized(self):
big = "y" * 10_000
cfg = self._cfg(enabled=False)
self.assertEqual(hos.spill_if_oversized(big, config=cfg), big)
# No spill files written.
self.assertEqual(list(Path(self.tmpdir).rglob("*")), [])
def test_oversized_writes_spill_and_returns_preview(self):
big = "A" * 60 + "B" * 60 + "C" * 60 # 180 chars > cap 100
result = hos.spill_if_oversized(
big,
session_id="sess-123",
source="plugin hook",
config=self._cfg(),
)
# Preview contains the header, head, and tail markers.
self.assertIn("plugin hook output truncated — 180 chars", result)
self.assertIn("--- head ---", result)
self.assertIn("--- tail ---", result)
# Head is the first 20 chars, tail is the last 20.
self.assertIn("A" * 20, result)
self.assertIn("C" * 20, result)
# Spill file exists under the session subdir and has full content.
session_dir = Path(self.tmpdir) / "sess-123"
self.assertTrue(session_dir.is_dir())
files = list(session_dir.iterdir())
self.assertEqual(len(files), 1)
self.assertEqual(files[0].read_text().rstrip("\n"), big)
# Preview references the spill path.
self.assertIn(str(files[0]), result)
def test_missing_session_id_uses_no_session_segment(self):
big = "z" * 500
cfg = self._cfg(max_chars=10)
hos.spill_if_oversized(big, session_id=None, config=cfg)
self.assertTrue((Path(self.tmpdir) / "no-session").is_dir())
def test_session_id_with_path_separators_is_sanitised(self):
big = "q" * 500
cfg = self._cfg(max_chars=10)
# An attacker-style session id with .. and / must not escape the
# base directory.
hos.spill_if_oversized(big, session_id="../../etc/passwd", config=cfg)
# Nothing leaks outside self.tmpdir.
self.assertFalse(Path("/etc/passwd-hermes-test").exists())
# A sanitised path should exist under tmpdir.
entries = list(Path(self.tmpdir).rglob("*.txt"))
self.assertEqual(len(entries), 1)
# The path should be inside tmpdir.
self.assertTrue(str(entries[0]).startswith(self.tmpdir))
def test_spill_write_failure_falls_back_to_preview_only(self):
big = "w" * 500
# Point at a path that cannot be created (a file, not a dir).
existing_file = os.path.join(self.tmpdir, "not-a-dir")
with open(existing_file, "w") as f:
f.write("blocker")
cfg = self._cfg(max_chars=10, directory=existing_file)
result = hos.spill_if_oversized(big, session_id="x", config=cfg)
# Preview still returned, but with failure notice.
self.assertIn("spill write failed", result)
self.assertIn("--- head ---", result)
# Content still bounded (not the full 500 chars).
self.assertLess(len(result), 500)
def test_preview_head_only_no_tail(self):
big = "a" * 1000
cfg = self._cfg(max_chars=10, preview_head=30, preview_tail=0)
result = hos.spill_if_oversized(big, session_id="s", config=cfg)
self.assertIn("--- head ---", result)
self.assertNotIn("--- tail ---", result)
def test_non_string_input_coerced(self):
cfg = self._cfg(max_chars=5)
class StrFriendly:
def __str__(self):
return "stringified-" + "x" * 200
result = hos.spill_if_oversized(StrFriendly(), session_id="s", config=cfg)
self.assertIn("truncated", result)
def test_default_directory_uses_hermes_home(self):
"""When no directory override, spill under HERMES_HOME/hook_outputs."""
test_home = tempfile.mkdtemp(prefix="hermes-home-")
try:
with patch.dict(os.environ, {"HERMES_HOME": test_home}):
# Also patch get_hermes_home to the env var to mirror production.
cfg = self._cfg(directory=None, max_chars=5)
hos.spill_if_oversized("x" * 200, session_id="sess", config=cfg)
# Spill directory exists somewhere under test_home OR default
# ~/.hermes/hook_outputs depending on get_hermes_home behaviour.
candidates = [
Path(test_home) / "hook_outputs" / "sess",
Path(os.path.expanduser("~/.hermes/hook_outputs/sess")),
]
# At least one of the candidate dirs now exists and has a file.
existing = [c for c in candidates if c.is_dir() and list(c.iterdir())]
self.assertTrue(existing, f"No spill dir found in {candidates}")
finally:
import shutil
shutil.rmtree(test_home, ignore_errors=True)
if __name__ == "__main__":
unittest.main()

236
tools/hook_output_spill.py Normal file
View File

@@ -0,0 +1,236 @@
"""Spill oversized hook-injected context to disk with a preview placeholder.
Ported from openai/codex PR #21069 (``Spill large hook outputs from context``).
Background
----------
Both shell hooks (``agent/shell_hooks.py``) and Python plugins
(``pre_llm_call`` hook in ``run_agent.py``) can return ``{"context": "..."}``
which gets concatenated into the current turn's user message on EVERY
subsequent API call. If a hook emits a large blob (e.g. a debug dump, a
full file, or a runaway prompt-engineering script), that blob inflates
every turn of the session and blows out the prompt cache prefix the
moment it's appended.
This mirrors what Codex does for its ``PreToolUse``/``Stop``/feedback
hooks: once the injected text exceeds a configured budget, write the
full content to a per-session directory on disk and replace the in-prompt
payload with a head/tail preview plus the saved path. The model can still
inspect the full content via ``read_file`` or ``terminal`` if it needs to.
Config (``config.yaml``)::
hooks:
output_spill:
enabled: true # default: true; set false to disable spilling
max_chars: 10000 # default; context above this is spilled
preview_head: 500 # chars shown at the start of the preview
preview_tail: 500 # chars shown at the end of the preview
directory: null # default: <HERMES_HOME>/hook_outputs
Design invariants
-----------------
* Behaviour-preserving when ``enabled: false`` or when content is under
the cap — return the input string unchanged.
* Never raises. Any I/O error (disk full, permission denied, missing
HERMES_HOME, etc.) falls back to a byte-length truncation with an
in-prompt notice — the hook context still reaches the model, just
bounded in size.
* Spill files are grouped by session so a ``/new`` session doesn't grow
them forever in one directory.
"""
from __future__ import annotations
import logging
import os
import uuid
from pathlib import Path
from typing import Any, Dict, Optional
logger = logging.getLogger(__name__)
DEFAULT_MAX_CHARS = 10_000
DEFAULT_PREVIEW_HEAD = 500
DEFAULT_PREVIEW_TAIL = 500
DEFAULT_ENABLED = True
def _coerce_positive_int(value: Any, default: int) -> int:
try:
iv = int(value)
except (TypeError, ValueError):
return default
if iv <= 0:
return default
return iv
def _coerce_non_negative_int(value: Any, default: int) -> int:
"""Like ``_coerce_positive_int`` but allows zero (e.g. empty tail)."""
try:
iv = int(value)
except (TypeError, ValueError):
return default
if iv < 0:
return default
return iv
def get_spill_config() -> Dict[str, Any]:
"""Return resolved hook output-spill config. Never raises."""
section: Dict[str, Any] = {}
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
hooks = cfg.get("hooks") if isinstance(cfg, dict) else None
if isinstance(hooks, dict):
sub = hooks.get("output_spill")
if isinstance(sub, dict):
section = sub
except Exception:
section = {}
enabled_raw = section.get("enabled", DEFAULT_ENABLED)
enabled = bool(enabled_raw) if enabled_raw is not None else DEFAULT_ENABLED
directory = section.get("directory")
if directory is not None and not isinstance(directory, str):
directory = None
return {
"enabled": enabled,
"max_chars": _coerce_positive_int(section.get("max_chars"), DEFAULT_MAX_CHARS),
"preview_head": _coerce_non_negative_int(
section.get("preview_head"), DEFAULT_PREVIEW_HEAD
),
"preview_tail": _coerce_non_negative_int(
section.get("preview_tail"), DEFAULT_PREVIEW_TAIL
),
"directory": directory,
}
def _resolve_spill_dir(directory_override: Optional[str], session_id: Optional[str]) -> Path:
"""Return the directory where spill files for this session live."""
if directory_override:
base = Path(os.path.expanduser(directory_override))
else:
try:
from hermes_constants import get_hermes_home
base = Path(get_hermes_home()) / "hook_outputs"
except Exception:
# Last-resort fallback: HERMES_HOME env var, then ~/.hermes
home = os.environ.get("HERMES_HOME") or os.path.expanduser("~/.hermes")
base = Path(home) / "hook_outputs"
# Group by session so spills are contained per conversation.
session_segment = session_id or "no-session"
# Defensive: strip path separators so a weird session id can't
# escape the directory.
session_segment = session_segment.replace("/", "_").replace("\\", "_").replace("..", "_")
return base / session_segment
def _build_preview(
text: str,
head: int,
tail: int,
saved_path: Optional[str],
*,
source: str,
) -> str:
"""Assemble the in-prompt preview with head/tail and saved-path footer."""
total = len(text)
head_chunk = text[:head] if head > 0 else ""
tail_chunk = text[-tail:] if tail > 0 and total > head else ""
parts = [
f"[{source} output truncated — {total:,} chars; full content "
+ (f"saved to {saved_path}]" if saved_path else "unavailable — spill write failed]"),
]
if head_chunk:
parts.append("--- head ---")
parts.append(head_chunk)
if tail_chunk:
parts.append("--- tail ---")
parts.append(tail_chunk)
return "\n".join(parts)
def spill_if_oversized(
text: str,
*,
session_id: Optional[str] = None,
source: str = "hook",
config: Optional[Dict[str, Any]] = None,
) -> str:
"""Spill ``text`` to disk if it exceeds the configured cap.
Returns either ``text`` unchanged (when under the cap, disabled, or
empty) or a preview string with a filesystem path pointing at the
full content.
Parameters
----------
text:
The raw injected-context string from a hook. Non-string inputs
are coerced with ``str()``.
session_id:
Used to group spill files by conversation. Falls back to
``"no-session"`` if missing.
source:
Human-readable label used in the preview header (``"hook"``,
``"plugin hook"``, ``"shell hook"``, etc.). Free-form.
config:
Optional override for tests; normally resolved from
``config.yaml``.
"""
if text is None:
return ""
if not isinstance(text, str):
try:
text = str(text)
except Exception:
return ""
cfg = config if config is not None else get_spill_config()
if not cfg.get("enabled", True):
return text
max_chars = int(cfg.get("max_chars") or DEFAULT_MAX_CHARS)
if len(text) <= max_chars:
return text
head = int(cfg.get("preview_head") or 0)
tail = int(cfg.get("preview_tail") or 0)
directory_override = cfg.get("directory")
# Try to write the spill file. If that fails we still need to return
# something bounded — never let a disk failure blow up the turn.
saved_path: Optional[str] = None
try:
spill_dir = _resolve_spill_dir(directory_override, session_id)
spill_dir.mkdir(parents=True, exist_ok=True)
filename = f"{uuid.uuid4().hex}.txt"
spill_path = spill_dir / filename
# Write the raw text plus a trailing newline so tail readers
# (``tail -f``, editors) don't report "missing newline".
spill_path.write_text(text if text.endswith("\n") else text + "\n", encoding="utf-8")
saved_path = str(spill_path)
except Exception as exc:
logger.warning("hook output spill failed: %s", exc)
saved_path = None
return _build_preview(text, head, tail, saved_path, source=source)
__all__ = [
"DEFAULT_MAX_CHARS",
"DEFAULT_PREVIEW_HEAD",
"DEFAULT_PREVIEW_TAIL",
"DEFAULT_ENABLED",
"get_spill_config",
"spill_if_oversized",
]

View File

@@ -461,6 +461,20 @@ return None
Any non-None, non-empty return with a `"context"` key (or a plain non-empty string) is collected and appended to the user message for the current turn.
#### Oversized-context spill
Per-hook context is capped at `10,000` characters by default. Anything above the cap is written to `$HERMES_HOME/hook_outputs/<session_id>/<uuid>.txt` and replaced with a head/tail preview plus the saved path. The model can read the full content via `read_file` or `terminal` if it genuinely needs it. This keeps a runaway plugin from inflating every subsequent turn's prompt and blowing out the prompt cache prefix. Tune in `config.yaml`:
```yaml
hooks:
output_spill:
enabled: true # default: true
max_chars: 10000 # default; set higher to opt out of spilling
preview_head: 500 # chars shown at the top of the preview
preview_tail: 500 # chars shown at the bottom of the preview
# directory: null # default: $HERMES_HOME/hook_outputs
```
#### How injection works
Injected context is appended to the **user message**, not the system prompt. This is a deliberate design choice: