Compare commits

...

2 Commits

Author SHA1 Message Date
alt-glitch
a7cbf3e232 chore: trim verbose comments/docstrings, add AUTHOR_MAP entry
- Replace 18-line comment block with 3-line invariant statement
- Trim test docstrings from multi-paragraph to single-line summaries
- Trim assertion messages from 4-line to 2-line mismatch reports
- Replace 5-line WHAT comments in stubs with 1-line WHY comments
- Add ziliangdotme@gmail.com -> ziliangpeng to AUTHOR_MAP
2026-05-21 07:17:06 +00:00
Ziliang Peng
08a3207fe1 fix(background_review): propagate parent toolset config to keep tools[] cache-stable
## Summary

The background skill/memory-review fork constructed a child `AIAgent`
without propagating `enabled_toolsets` / `disabled_toolsets` from the
parent. When the parent narrowed its toolset (via `hermes tools
disable` or `config.yaml`), the fork's default `enabled_toolsets=None`
expanded to "all registered tools" — and the fork's outbound request
body sent a wider `tools[]` array than the parent's main-turn request.

Anthropic's prompt-cache key includes the `tools[]` array byte-for-byte,
so this divergence forked the cache lineage on every nudge and forced a
full prefix rewrite. On a captured ~4 hour Claude-via-Hermes session
this cost roughly 4.3 M cache-write tokens — about half of those
attributable to the per-nudge alternation between the main turn's
narrowed `tools[]` and the review fork's wider `tools[]`.

## Goal

Extend the byte-stability invariant established by PR #17276 (which
fixed `system`) to the `tools[]` slot of the request body, so the
review fork's outbound request hits the parent's warmed Anthropic
prefix cache regardless of how the parent's toolset is configured.

## Implementation

Two-line change in `agent/background_review.py`: pass
`enabled_toolsets=getattr(agent, "enabled_toolsets", None)` and the
matching `disabled_toolsets` kwarg into the `AIAgent(...)` call inside
`_spawn_background_review`. Adds an explanatory block comment that
calls out the cache-key dependency and the relationship to PR #17276.

The post-construction runtime whitelist
(`set_thread_tool_whitelist({memory, skills})`) is untouched — it
still gates which tools the model is allowed to *dispatch*. This
change aligns only what the request body *transmits*, not what the
review is allowed to do, so the safety contract from issue #15204
remains intact.

## Testing

- `tests/run_agent/test_background_review_cache_parity.py`: new
  `test_review_fork_inherits_parent_toolset_config` asserts the
  parent's `enabled_toolsets` and `disabled_toolsets` reach the
  review-fork constructor as kwargs.
- `tests/run_agent/test_background_review_toolset_restriction.py`:
  the existing `test_background_review_does_not_narrow_toolset_schema`
  was inverted (its old "must NOT pass enabled_toolsets" rule was
  built on the assumption that the parent always ran with the
  registry default — wrong in practice when the parent is narrowed).
  Renamed to `test_background_review_matches_parent_toolset_config`
  and updated to assert the parent's value propagates verbatim.
- Verified the new positive test fails without the fix and passes
  with it.
- Full suite for `test_background_review*`:

  ```
  $ python -m pytest tests/run_agent/test_background_review.py \
                     tests/run_agent/test_background_review_summary.py \
                     tests/run_agent/test_background_review_toolset_restriction.py \
                     tests/run_agent/test_background_review_cache_parity.py -q
  18 passed in 1.85s
  ```

## Scope

- `agent/background_review.py`: 2 added kwargs + explanatory comment.
- Two test files: one new positive test, one inverted existing test.
- No production code paths outside the review fork; no schema changes;
  no public-API changes.

Refs: ziliangpeng/hermes-agent#1 (root-cause analysis with wire-level
cache-write measurements). Extends PR #17276's `system`-bytes
invariant to the `tools[]` slot.
2026-05-20 18:10:27 -07:00
4 changed files with 73 additions and 12 deletions

View File

@@ -390,6 +390,9 @@ def _run_review_in_thread(
# parent below so memory(action="add") writes from
# the review still land on disk; the review just
# has zero side effects on external providers.
# Match parent's toolset config so ``tools[]`` is byte-identical
# in the request body — Anthropic's cache key includes it.
# (The runtime whitelist below still restricts dispatch.)
review_agent = AIAgent(
model=agent.model,
max_iterations=16,
@@ -401,6 +404,8 @@ def _run_review_in_thread(
api_key=_parent_runtime.get("api_key") or None,
credential_pool=getattr(agent, "_credential_pool", None),
parent_session_id=agent.session_id,
enabled_toolsets=getattr(agent, "enabled_toolsets", None),
disabled_toolsets=getattr(agent, "disabled_toolsets", None),
skip_memory=True,
)
review_agent._memory_write_origin = "background_review"

View File

@@ -713,6 +713,7 @@ AUTHOR_MAP = {
"9219265+cresslank@users.noreply.github.com": "cresslank",
"trevmanthony@gmail.com": "trevthefoolish",
"ziliangpeng@users.noreply.github.com": "ziliangpeng",
"ziliangdotme@gmail.com": "ziliangpeng",
"centripetal-star@users.noreply.github.com": "centripetal-star",
"LeonSGP43@users.noreply.github.com": "LeonSGP43",
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",

View File

@@ -38,6 +38,9 @@ def _make_agent_stub(agent_cls):
agent._MEMORY_REVIEW_PROMPT = "review memory"
agent._SKILL_REVIEW_PROMPT = "review skills"
agent._COMBINED_REVIEW_PROMPT = "review both"
# Non-None so the test catches a missing-kwarg regression.
agent.enabled_toolsets = ["memory", "skills", "terminal"]
agent.disabled_toolsets = ["spotify", "feishu_doc"]
return agent
@@ -183,3 +186,54 @@ def test_review_fork_pins_session_start_and_session_id():
"Review fork did not inherit parent's session_id — "
"system-prompt rebuild paths would diverge."
)
def test_review_fork_inherits_parent_toolset_config():
"""``tools[]`` byte-stability: fork must inherit parent's toolset config."""
import run_agent
agent = _make_agent_stub(run_agent.AIAgent)
captured = {}
class _Recorder:
def __init__(self, *args, **kwargs):
captured["enabled_toolsets"] = kwargs.get("enabled_toolsets")
captured["disabled_toolsets"] = kwargs.get("disabled_toolsets")
self._cached_system_prompt = None
self._memory_write_origin = None
self._memory_write_context = None
self._memory_store = None
self._memory_enabled = None
self._user_profile_enabled = None
self._memory_nudge_interval = None
self._skill_nudge_interval = None
self.suppress_status_output = None
self.session_start = None
self.session_id = None
def run_conversation(self, *args, **kwargs):
raise RuntimeError("stop after recording — don't actually call the API")
def shutdown_memory_provider(self):
pass
def close(self):
pass
with patch.object(run_agent, "AIAgent", _Recorder), \
patch("threading.Thread", _SyncThread):
agent._spawn_background_review(
messages_snapshot=[],
review_memory=True,
review_skills=False,
)
assert captured.get("enabled_toolsets") == agent.enabled_toolsets, (
f"enabled_toolsets mismatch: {captured.get('enabled_toolsets')!r} "
f"vs expected {agent.enabled_toolsets!r}"
)
assert captured.get("disabled_toolsets") == agent.disabled_toolsets, (
f"disabled_toolsets mismatch: {captured.get('disabled_toolsets')!r} "
f"vs expected {agent.disabled_toolsets!r}"
)

View File

@@ -38,6 +38,9 @@ def _make_agent_stub(agent_cls):
agent._MEMORY_REVIEW_PROMPT = "review memory"
agent._SKILL_REVIEW_PROMPT = "review skills"
agent._COMBINED_REVIEW_PROMPT = "review both"
# Non-None so the test catches a missing-kwarg regression.
agent.enabled_toolsets = ["memory", "skills", "terminal"]
agent.disabled_toolsets = ["spotify", "feishu_doc"]
return agent
@@ -52,13 +55,8 @@ class _SyncThread:
self._target()
def test_background_review_does_not_narrow_toolset_schema():
"""The review fork must NOT pass enabled_toolsets to AIAgent.
Narrowing the schema diverges the ``tools`` cache key from the parent's,
which sits above ``system`` in Anthropic's cache hierarchy and forces a
full prefix-cache miss on every review (see #25322, PR #17276).
"""
def test_background_review_matches_parent_toolset_config():
"""Fork must receive parent's toolset config so ``tools[]`` cache key matches."""
import run_agent
agent = _make_agent_stub(run_agent.AIAgent)
@@ -66,6 +64,7 @@ def test_background_review_does_not_narrow_toolset_schema():
def _capture_init(self, *args, **kwargs):
captured["enabled_toolsets"] = kwargs.get("enabled_toolsets", "UNSET")
captured["disabled_toolsets"] = kwargs.get("disabled_toolsets", "UNSET")
raise RuntimeError("stop after capturing init args")
with patch.object(run_agent.AIAgent, "__init__", _capture_init), \
@@ -77,11 +76,13 @@ def test_background_review_does_not_narrow_toolset_schema():
)
assert "enabled_toolsets" in captured, "AIAgent.__init__ was not called"
# The kwarg must be absent — letting AIAgent inherit the default full
# toolset so the schema bytes match the parent's.
assert captured["enabled_toolsets"] == "UNSET", (
f"Review fork narrowed the toolset schema (got {captured['enabled_toolsets']!r}), "
"which breaks prefix-cache parity with the parent."
assert captured["enabled_toolsets"] == agent.enabled_toolsets, (
f"enabled_toolsets mismatch: {captured['enabled_toolsets']!r} "
f"vs expected {agent.enabled_toolsets!r}"
)
assert captured["disabled_toolsets"] == agent.disabled_toolsets, (
f"disabled_toolsets mismatch: {captured['disabled_toolsets']!r} "
f"vs expected {agent.disabled_toolsets!r}"
)