feat(curator): split archived into consolidated vs pruned with model + heuristic classification (#17941)
* fix(curator): split 'archived' into consolidated vs pruned in run reports
Users who watched a curator run saw skills like 'anthropic-api' listed
under 'Skills archived' and interpreted that as pruning — but the curator
had actually absorbed those skills into a new umbrella (e.g. 'llm-providers')
during the same run. The directory gets archived for safety (all removals
are recoverable), but the content still lives under a different name.
Users then 'restored' what they thought were deleted skills and ended up
with confusingly duplicated skillsets (old-name + absorbed-inside-umbrella).
Classify removed skills using this run's skill_manage tool calls:
- consolidated: content absorbed into a surviving/newly-created skill
(evidenced by a skill_manage write_file/patch/create/edit whose target
is a different skill AND whose file_path/content references the
removed skill's name)
- pruned: archived without consolidation evidence (truly stale)
REPORT.md now shows two distinct sections:
- 'Consolidated into umbrella skills' — with `removed → merged into umbrella`
- 'Pruned — archived for staleness' — pure staleness archives
run.json schema additions (backward compatible):
- counts.consolidated_this_run, counts.pruned_this_run
- consolidated: [{name, into, evidence}, ...]
- pruned: [names]
- archived: retained as the union for backward compat
Also: relabel the auto-transitions 'archived' counter to 'archived (no
LLM, pure time-based staleness)' so it's clearly distinct from LLM-pass
archives.
Tests: 9 new tests in test_curator_classification.py covering consolidation
evidence parsing (write_file/patch/create), hyphen/underscore name variants,
self-reference rejection, destination-must-exist, mixed runs, and
malformed-JSON fallback safety. Existing test_report_md_is_human_readable
updated to cover the new section names.
E2E: isolated HERMES_HOME, realistic 3-skill run, REPORT.md verified
end-to-end.
* feat(curator): hybrid model-declared + heuristic classification
Extend the consolidated-vs-pruned split with LLM-authored intent:
1. Curator prompt now requires a structured YAML block at the end of the
final response (consolidations / prunings with short rationale).
2. _parse_structured_summary() extracts it tolerantly — missing block,
malformed YAML, partial lists all fall back to heuristic cleanly.
3. _reconcile_classification() merges model intent with the tool-call
heuristic:
- Model wins on rationale when its umbrella exists post-run
- Model hallucination (umbrella doesn't exist) is downgraded to the
heuristic's finding, or pruned if there's no evidence either
- Heuristic catches model omission — consolidations the model
enumerated tools for but forgot to list get surfaced with a
'(detected via tool-call audit)' tag
4. REPORT.md now shows per-row rationale alongside 'removed → umbrella'
and flags audit-only rows so the user knows why no reason is shown.
Backward compat: run.json's 'archived' field (union) is preserved.
'pruned' is now a list of dicts with {name, source, reason};
'pruned_names' is the flat-name list for legacy consumers.
Tests: 15 new covering YAML parse edge cases (malformed, empty lists,
bare-string entries, missing fields), reconciler rules (model wins,
hallucination fallback, heuristic catches omission, prune with reason),
and an end-to-end report-render test with all four paths exercised.
2026-04-30 10:31:23 -07:00
|
|
|
"""Tests for the curator consolidated-vs-pruned classifier.
|
|
|
|
|
|
|
|
|
|
The classifier splits skills that disappeared between the before/after
|
|
|
|
|
snapshots into two buckets:
|
|
|
|
|
|
|
|
|
|
- "consolidated" — absorbed into an umbrella; content still lives
|
|
|
|
|
under another skill's files
|
|
|
|
|
- "pruned" — archived for staleness; content not preserved elsewhere
|
|
|
|
|
|
|
|
|
|
Without the split the report lumped everything under "Skills archived",
|
|
|
|
|
which misled users into thinking consolidated skills had been pruned.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import json
|
|
|
|
|
from datetime import datetime, timezone
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def curator_env(tmp_path, monkeypatch):
|
|
|
|
|
home = tmp_path / ".hermes"
|
|
|
|
|
home.mkdir()
|
|
|
|
|
(home / "skills").mkdir()
|
|
|
|
|
(home / "logs").mkdir()
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
|
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
|
|
|
|
|
|
|
|
import importlib
|
|
|
|
|
import hermes_constants
|
|
|
|
|
importlib.reload(hermes_constants)
|
|
|
|
|
from agent import curator
|
|
|
|
|
importlib.reload(curator)
|
|
|
|
|
yield curator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_consolidated_via_write_file_evidence(curator_env):
|
|
|
|
|
"""skill_manage write_file on umbrella references/<removed>.md = consolidated."""
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["axolotl-training"],
|
|
|
|
|
added=[],
|
|
|
|
|
after_names={"training-platforms", "keeper"},
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "write_file",
|
|
|
|
|
"name": "training-platforms",
|
|
|
|
|
"file_path": "references/axolotl-training.md",
|
|
|
|
|
"file_content": "# Axolotl\n...",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
assert len(result["consolidated"]) == 1
|
|
|
|
|
assert result["consolidated"][0]["name"] == "axolotl-training"
|
|
|
|
|
assert result["consolidated"][0]["into"] == "training-platforms"
|
|
|
|
|
assert result["pruned"] == []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_pruned_when_no_destination_reference(curator_env):
|
|
|
|
|
"""Removed skill with no referencing tool call = pruned."""
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["old-stale-thing"],
|
|
|
|
|
added=[],
|
|
|
|
|
after_names={"keeper"},
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{"name": "skills_list", "arguments": "{}"},
|
|
|
|
|
{"name": "skill_manage", "arguments": json.dumps({
|
|
|
|
|
"action": "patch", "name": "keeper",
|
|
|
|
|
"old_string": "foo", "new_string": "bar",
|
|
|
|
|
})},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
assert result["consolidated"] == []
|
|
|
|
|
assert len(result["pruned"]) == 1
|
|
|
|
|
assert result["pruned"][0]["name"] == "old-stale-thing"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_consolidated_into_newly_created_umbrella(curator_env):
|
|
|
|
|
"""Removed skill absorbed into a skill that was created THIS run."""
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["anthropic-api"],
|
|
|
|
|
added=["llm-providers"], # new umbrella
|
|
|
|
|
after_names={"llm-providers"},
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "create",
|
|
|
|
|
"name": "llm-providers",
|
|
|
|
|
"content": "# LLM Providers\n\n## anthropic-api\nMerged from the old anthropic-api skill.\n",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
assert len(result["consolidated"]) == 1
|
|
|
|
|
assert result["consolidated"][0]["name"] == "anthropic-api"
|
|
|
|
|
assert result["consolidated"][0]["into"] == "llm-providers"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_handles_underscore_hyphen_variants(curator_env):
|
|
|
|
|
"""Names with hyphens match underscore forms in paths/content and vice versa."""
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["open-webui-setup"],
|
|
|
|
|
added=[],
|
|
|
|
|
after_names={"webui"},
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "write_file",
|
|
|
|
|
"name": "webui",
|
|
|
|
|
"file_path": "references/open_webui_setup.md",
|
|
|
|
|
"file_content": "...",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
assert len(result["consolidated"]) == 1
|
|
|
|
|
assert result["consolidated"][0]["into"] == "webui"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_self_reference_does_not_count(curator_env):
|
|
|
|
|
"""A tool call that targets the removed skill itself is NOT consolidation."""
|
|
|
|
|
# e.g. the curator patched the skill once and later archived it
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["doomed"],
|
|
|
|
|
added=[],
|
|
|
|
|
after_names={"keeper"},
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "patch",
|
|
|
|
|
"name": "doomed", # same as removed
|
|
|
|
|
"old_string": "x",
|
|
|
|
|
"new_string": "y",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
assert result["consolidated"] == []
|
|
|
|
|
assert result["pruned"][0]["name"] == "doomed"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_destination_must_exist_after_run(curator_env):
|
|
|
|
|
"""A reference to a skill that doesn't exist after the run can't be the umbrella."""
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["thing"],
|
|
|
|
|
added=[],
|
|
|
|
|
after_names={"keeper"}, # "ghost" not in here
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "write_file",
|
|
|
|
|
"name": "ghost", # not in after_names
|
|
|
|
|
"file_path": "references/thing.md",
|
|
|
|
|
"file_content": "...",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
assert result["consolidated"] == []
|
|
|
|
|
assert result["pruned"][0]["name"] == "thing"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_mixed_run_produces_both_buckets(curator_env):
|
|
|
|
|
"""A realistic run: one skill consolidated, one skill pruned."""
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["absorbed-skill", "dead-skill"],
|
|
|
|
|
added=["umbrella"],
|
|
|
|
|
after_names={"umbrella", "keeper"},
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "write_file",
|
|
|
|
|
"name": "umbrella",
|
|
|
|
|
"file_path": "references/absorbed-skill.md",
|
|
|
|
|
"file_content": "...",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
assert len(result["consolidated"]) == 1
|
|
|
|
|
assert result["consolidated"][0]["name"] == "absorbed-skill"
|
|
|
|
|
assert result["consolidated"][0]["into"] == "umbrella"
|
|
|
|
|
assert len(result["pruned"]) == 1
|
|
|
|
|
assert result["pruned"][0]["name"] == "dead-skill"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_classify_handles_malformed_arguments_string(curator_env):
|
|
|
|
|
"""Truncated/malformed JSON in arguments falls back to substring match."""
|
|
|
|
|
# Arguments truncated to 400 chars may not parse as JSON.
|
|
|
|
|
truncated_raw = (
|
|
|
|
|
'{"action":"write_file","name":"umbrella","file_path":"references/'
|
|
|
|
|
'absorbed-skill.md","file_content":"long content that was cut off mid'
|
|
|
|
|
)
|
|
|
|
|
result = curator_env._classify_removed_skills(
|
|
|
|
|
removed=["absorbed-skill"],
|
|
|
|
|
added=[],
|
|
|
|
|
after_names={"umbrella"},
|
|
|
|
|
tool_calls=[
|
|
|
|
|
{"name": "skill_manage", "arguments": truncated_raw},
|
|
|
|
|
],
|
|
|
|
|
)
|
|
|
|
|
# Fallback substring match finds "absorbed-skill" in the raw truncated string
|
|
|
|
|
# even though json.loads fails — but it can't identify target="umbrella"
|
|
|
|
|
# because _raw is the only haystack and there's no dict access. The
|
|
|
|
|
# classifier only promotes to "consolidated" if it can identify a target
|
|
|
|
|
# skill from args.get("name"). Ensure we fail safe: no false positive.
|
|
|
|
|
# (This is a correctness floor — better to prune-label than hallucinate
|
|
|
|
|
# an umbrella that wasn't really used.)
|
|
|
|
|
assert result["consolidated"] == []
|
|
|
|
|
assert len(result["pruned"]) == 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_report_md_splits_consolidated_and_pruned_sections(curator_env):
|
|
|
|
|
"""End-to-end: REPORT.md shows both sections distinctly."""
|
|
|
|
|
curator = curator_env
|
|
|
|
|
start = datetime.now(timezone.utc)
|
|
|
|
|
|
|
|
|
|
before = [
|
|
|
|
|
{"name": "absorbed-skill", "state": "active", "pinned": False},
|
|
|
|
|
{"name": "dead-skill", "state": "stale", "pinned": False},
|
|
|
|
|
{"name": "keeper", "state": "active", "pinned": False},
|
|
|
|
|
]
|
|
|
|
|
after = [
|
|
|
|
|
{"name": "keeper", "state": "active", "pinned": False},
|
|
|
|
|
{"name": "umbrella", "state": "active", "pinned": False},
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
run_dir = curator._write_run_report(
|
|
|
|
|
started_at=start,
|
|
|
|
|
elapsed_seconds=60.0,
|
|
|
|
|
auto_counts={"checked": 3, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
|
|
|
|
auto_summary="no auto changes",
|
|
|
|
|
before_report=before,
|
|
|
|
|
before_names={r["name"] for r in before},
|
|
|
|
|
after_report=after,
|
|
|
|
|
llm_meta={
|
|
|
|
|
"final": "Consolidated absorbed-skill into umbrella. Pruned dead-skill.",
|
|
|
|
|
"summary": "1 consolidated, 1 pruned",
|
|
|
|
|
"model": "m",
|
|
|
|
|
"provider": "p",
|
|
|
|
|
"error": None,
|
|
|
|
|
"tool_calls": [
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "create",
|
|
|
|
|
"name": "umbrella",
|
|
|
|
|
"content": "# umbrella\n\nAbsorbed absorbed-skill.",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
payload = json.loads((run_dir / "run.json").read_text())
|
|
|
|
|
# Both lists exist and are disjoint
|
|
|
|
|
consolidated_names = {e["name"] for e in payload["consolidated"]}
|
|
|
|
|
assert consolidated_names == {"absorbed-skill"}
|
|
|
|
|
# `pruned` holds full dicts {name, source, reason}; `pruned_names` is the
|
|
|
|
|
# flat list for quick scans / legacy compat.
|
|
|
|
|
pruned_names = payload["pruned_names"]
|
|
|
|
|
assert pruned_names == ["dead-skill"]
|
|
|
|
|
assert all(isinstance(e, dict) and "name" in e for e in payload["pruned"])
|
|
|
|
|
# The union still matches the legacy "archived" field for backward compat
|
|
|
|
|
assert set(payload["archived"]) == consolidated_names | set(pruned_names)
|
|
|
|
|
# counts exposed
|
|
|
|
|
assert payload["counts"]["consolidated_this_run"] == 1
|
|
|
|
|
assert payload["counts"]["pruned_this_run"] == 1
|
|
|
|
|
|
|
|
|
|
md = (run_dir / "REPORT.md").read_text()
|
|
|
|
|
# Two separate sections, not a single "Skills archived" lump
|
|
|
|
|
assert "Consolidated into umbrella skills" in md
|
|
|
|
|
assert "Pruned — archived for staleness" in md
|
|
|
|
|
assert "`absorbed-skill` → merged into `umbrella`" in md
|
|
|
|
|
assert "`dead-skill`" in md
|
|
|
|
|
# The old single-lump section should not appear
|
|
|
|
|
assert "### Skills archived" not in md
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _parse_structured_summary — extracting the model's required YAML block
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parse_structured_summary_happy_path(curator_env):
|
|
|
|
|
text = (
|
|
|
|
|
"Long human summary here. I processed clusters X, Y, Z.\n\n"
|
|
|
|
|
"## Structured summary (required)\n"
|
|
|
|
|
"```yaml\n"
|
|
|
|
|
"consolidations:\n"
|
|
|
|
|
" - from: anthropic-api\n"
|
|
|
|
|
" into: llm-providers\n"
|
|
|
|
|
" reason: duplicate of the generic llm-providers skill\n"
|
|
|
|
|
" - from: openai-api\n"
|
|
|
|
|
" into: llm-providers\n"
|
|
|
|
|
" reason: same — merged with sibling\n"
|
|
|
|
|
"prunings:\n"
|
|
|
|
|
" - name: random-old-notes\n"
|
|
|
|
|
" reason: pre-curator garbage, no overlap\n"
|
|
|
|
|
"```\n"
|
|
|
|
|
)
|
|
|
|
|
out = curator_env._parse_structured_summary(text)
|
|
|
|
|
assert len(out["consolidations"]) == 2
|
|
|
|
|
assert out["consolidations"][0] == {
|
|
|
|
|
"from": "anthropic-api",
|
|
|
|
|
"into": "llm-providers",
|
|
|
|
|
"reason": "duplicate of the generic llm-providers skill",
|
|
|
|
|
}
|
|
|
|
|
assert len(out["prunings"]) == 1
|
|
|
|
|
assert out["prunings"][0]["reason"] == "pre-curator garbage, no overlap"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parse_structured_summary_missing_block(curator_env):
|
|
|
|
|
out = curator_env._parse_structured_summary("No block in this text.")
|
|
|
|
|
assert out == {"consolidations": [], "prunings": []}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parse_structured_summary_malformed_yaml(curator_env):
|
|
|
|
|
text = "```yaml\nthis: is\n not: [valid yaml\n```"
|
|
|
|
|
out = curator_env._parse_structured_summary(text)
|
|
|
|
|
assert out == {"consolidations": [], "prunings": []}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parse_structured_summary_empty_lists(curator_env):
|
|
|
|
|
text = "```yaml\nconsolidations: []\nprunings: []\n```"
|
|
|
|
|
out = curator_env._parse_structured_summary(text)
|
|
|
|
|
assert out == {"consolidations": [], "prunings": []}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parse_structured_summary_ignores_bare_strings(curator_env):
|
|
|
|
|
"""Entries that aren't dicts (e.g. a model wrote bare names) are skipped."""
|
|
|
|
|
text = (
|
|
|
|
|
"```yaml\n"
|
|
|
|
|
"consolidations:\n"
|
|
|
|
|
" - just-a-bare-string\n"
|
|
|
|
|
" - from: real-entry\n"
|
|
|
|
|
" into: umbrella\n"
|
|
|
|
|
" reason: valid\n"
|
|
|
|
|
"prunings: []\n"
|
|
|
|
|
"```"
|
|
|
|
|
)
|
|
|
|
|
out = curator_env._parse_structured_summary(text)
|
|
|
|
|
assert len(out["consolidations"]) == 1
|
|
|
|
|
assert out["consolidations"][0]["from"] == "real-entry"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_parse_structured_summary_missing_required_fields(curator_env):
|
|
|
|
|
"""Consolidation entries without from+into are skipped."""
|
|
|
|
|
text = (
|
|
|
|
|
"```yaml\n"
|
|
|
|
|
"consolidations:\n"
|
|
|
|
|
" - from: only-from\n"
|
|
|
|
|
" reason: no into\n"
|
|
|
|
|
" - into: only-into\n"
|
|
|
|
|
" - from: good\n"
|
|
|
|
|
" into: umbrella\n"
|
|
|
|
|
"prunings: []\n"
|
|
|
|
|
"```"
|
|
|
|
|
)
|
|
|
|
|
out = curator_env._parse_structured_summary(text)
|
|
|
|
|
assert len(out["consolidations"]) == 1
|
|
|
|
|
assert out["consolidations"][0]["from"] == "good"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _reconcile_classification — merging model block with heuristic
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_model_wins_when_umbrella_exists(curator_env):
|
|
|
|
|
"""Model claim + umbrella in destinations → model authority (with reason)."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["anthropic-api"],
|
|
|
|
|
heuristic={"consolidated": [], "pruned": [{"name": "anthropic-api"}]},
|
|
|
|
|
model_block={
|
|
|
|
|
"consolidations": [{
|
|
|
|
|
"from": "anthropic-api",
|
|
|
|
|
"into": "llm-providers",
|
|
|
|
|
"reason": "duplicate",
|
|
|
|
|
}],
|
|
|
|
|
"prunings": [],
|
|
|
|
|
},
|
|
|
|
|
destinations={"llm-providers"},
|
|
|
|
|
)
|
|
|
|
|
assert len(out["consolidated"]) == 1
|
|
|
|
|
e = out["consolidated"][0]
|
|
|
|
|
assert e["name"] == "anthropic-api"
|
|
|
|
|
assert e["into"] == "llm-providers"
|
|
|
|
|
assert e["reason"] == "duplicate"
|
|
|
|
|
assert e["source"] == "model"
|
|
|
|
|
assert out["pruned"] == []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_model_hallucinates_umbrella(curator_env):
|
|
|
|
|
"""Model names a non-existent umbrella — downgrade, prefer heuristic if any."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["thing"],
|
|
|
|
|
heuristic={
|
|
|
|
|
"consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}],
|
|
|
|
|
"pruned": [],
|
|
|
|
|
},
|
|
|
|
|
model_block={
|
|
|
|
|
"consolidations": [{
|
|
|
|
|
"from": "thing",
|
|
|
|
|
"into": "nonexistent-umbrella",
|
|
|
|
|
"reason": "confused",
|
|
|
|
|
}],
|
|
|
|
|
"prunings": [],
|
|
|
|
|
},
|
|
|
|
|
destinations={"real-umbrella"},
|
|
|
|
|
)
|
|
|
|
|
assert len(out["consolidated"]) == 1
|
|
|
|
|
e = out["consolidated"][0]
|
|
|
|
|
assert e["into"] == "real-umbrella"
|
|
|
|
|
assert "tool-call audit" in e["source"]
|
|
|
|
|
assert e["model_claimed_into"] == "nonexistent-umbrella"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_model_hallucinates_with_no_heuristic_evidence(curator_env):
|
|
|
|
|
"""Model names a non-existent umbrella AND no tool-call evidence → prune."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["ghost"],
|
|
|
|
|
heuristic={"consolidated": [], "pruned": [{"name": "ghost"}]},
|
|
|
|
|
model_block={
|
|
|
|
|
"consolidations": [{
|
|
|
|
|
"from": "ghost",
|
|
|
|
|
"into": "nonexistent",
|
|
|
|
|
"reason": "wrong",
|
|
|
|
|
}],
|
|
|
|
|
"prunings": [],
|
|
|
|
|
},
|
|
|
|
|
destinations={"real-umbrella"},
|
|
|
|
|
)
|
|
|
|
|
assert out["consolidated"] == []
|
|
|
|
|
assert len(out["pruned"]) == 1
|
|
|
|
|
assert "fallback" in out["pruned"][0]["source"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_heuristic_catches_model_omission(curator_env):
|
|
|
|
|
"""Model forgot to list a consolidation, heuristic found it."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["forgotten"],
|
|
|
|
|
heuristic={
|
|
|
|
|
"consolidated": [{
|
|
|
|
|
"name": "forgotten",
|
|
|
|
|
"into": "umbrella",
|
|
|
|
|
"evidence": "write_file on umbrella referenced forgotten.md",
|
|
|
|
|
}],
|
|
|
|
|
"pruned": [],
|
|
|
|
|
},
|
|
|
|
|
model_block={"consolidations": [], "prunings": []},
|
|
|
|
|
destinations={"umbrella"},
|
|
|
|
|
)
|
|
|
|
|
assert len(out["consolidated"]) == 1
|
|
|
|
|
e = out["consolidated"][0]
|
|
|
|
|
assert e["into"] == "umbrella"
|
|
|
|
|
assert "model omitted" in e["source"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_model_prunes_with_reason(curator_env):
|
|
|
|
|
"""Model says pruned, heuristic agrees, we surface the reason."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["stale-skill"],
|
|
|
|
|
heuristic={"consolidated": [], "pruned": [{"name": "stale-skill"}]},
|
|
|
|
|
model_block={
|
|
|
|
|
"consolidations": [],
|
|
|
|
|
"prunings": [{"name": "stale-skill", "reason": "superseded by bundled skill"}],
|
|
|
|
|
},
|
|
|
|
|
destinations=set(),
|
|
|
|
|
)
|
|
|
|
|
assert len(out["pruned"]) == 1
|
|
|
|
|
e = out["pruned"][0]
|
|
|
|
|
assert e["reason"] == "superseded by bundled skill"
|
|
|
|
|
assert e["source"] == "model"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_model_block_visible_in_full_report(curator_env):
|
|
|
|
|
"""End-to-end: LLM final response with the YAML block → reasons in REPORT.md."""
|
|
|
|
|
import json as _json
|
|
|
|
|
from datetime import datetime as _dt, timezone as _tz
|
|
|
|
|
|
|
|
|
|
start = _dt.now(_tz.utc)
|
|
|
|
|
before = [
|
|
|
|
|
{"name": "anthropic-api", "state": "active", "pinned": False},
|
|
|
|
|
{"name": "stale-thing", "state": "stale", "pinned": False},
|
|
|
|
|
]
|
|
|
|
|
after = [{"name": "llm-providers", "state": "active", "pinned": False}]
|
|
|
|
|
|
|
|
|
|
llm_final_text = (
|
|
|
|
|
"Processed 3 clusters. Absorbed anthropic-api into llm-providers.\n\n"
|
|
|
|
|
"## Structured summary (required)\n"
|
|
|
|
|
"```yaml\n"
|
|
|
|
|
"consolidations:\n"
|
|
|
|
|
" - from: anthropic-api\n"
|
|
|
|
|
" into: llm-providers\n"
|
|
|
|
|
" reason: duplicate content, now a subsection\n"
|
|
|
|
|
"prunings:\n"
|
|
|
|
|
" - name: stale-thing\n"
|
|
|
|
|
" reason: pre-curator junk, no overlap with anything\n"
|
|
|
|
|
"```\n"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
run_dir = curator_env._write_run_report(
|
|
|
|
|
started_at=start,
|
|
|
|
|
elapsed_seconds=30.0,
|
|
|
|
|
auto_counts={"checked": 2, "marked_stale": 0, "archived": 0, "reactivated": 0},
|
|
|
|
|
auto_summary="none",
|
|
|
|
|
before_report=before,
|
|
|
|
|
before_names={r["name"] for r in before},
|
|
|
|
|
after_report=after,
|
|
|
|
|
llm_meta={
|
|
|
|
|
"final": llm_final_text,
|
|
|
|
|
"summary": "1 consolidated, 1 pruned",
|
|
|
|
|
"model": "m",
|
|
|
|
|
"provider": "p",
|
|
|
|
|
"error": None,
|
|
|
|
|
"tool_calls": [
|
|
|
|
|
{"name": "skill_manage", "arguments": _json.dumps({
|
|
|
|
|
"action": "create",
|
|
|
|
|
"name": "llm-providers",
|
|
|
|
|
"content": "# llm-providers\nIncludes anthropic-api",
|
|
|
|
|
})},
|
|
|
|
|
],
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
payload = _json.loads((run_dir / "run.json").read_text())
|
|
|
|
|
cons = payload["consolidated"][0]
|
|
|
|
|
assert cons["name"] == "anthropic-api"
|
|
|
|
|
assert cons["into"] == "llm-providers"
|
|
|
|
|
assert cons["reason"] == "duplicate content, now a subsection"
|
|
|
|
|
assert cons["source"] == "model+audit" # model AND heuristic both had it
|
|
|
|
|
|
|
|
|
|
pruned = payload["pruned"][0]
|
|
|
|
|
assert pruned["name"] == "stale-thing"
|
|
|
|
|
assert pruned["reason"] == "pre-curator junk, no overlap with anything"
|
|
|
|
|
|
|
|
|
|
md = (run_dir / "REPORT.md").read_text()
|
|
|
|
|
assert "duplicate content, now a subsection" in md
|
|
|
|
|
assert "pre-curator junk" in md
|
fix(curator): authoritative absorbed_into on delete + restore cron skill links on rollback (#18671) (#18731)
* fix(curator): authoritative absorbed_into declarations on skill delete
Closes #18671. The classification pipeline that feeds cron-ref rewriting
used to infer consolidation vs pruning from two brittle signals: the
curator model's post-hoc YAML summary block, and a substring heuristic
scanning other tool calls for the removed skill's name. Both miss in
real consolidations — the model forgets the YAML under reasoning
pressure, and the heuristic misses when the umbrella's patch content
describes the absorbed behavior abstractly instead of naming the old
slug. When both miss, the skill falls through to 'no-evidence fallback'
pruned, and #18253's cron rewriter drops the cron ref entirely instead
of mapping it to the umbrella. Same observable symptom as pre-#18253:
'Skill(s) not found and skipped' at the next cron run.
The fix makes the model declare intent at the moment of deletion.
skill_manage(action='delete') now accepts absorbed_into:
- absorbed_into='<umbrella>' -> consolidated, target must exist on disk
- absorbed_into='' -> explicit prune, no forwarding target
- missing -> legacy path, falls through to heuristic/YAML
The curator reconciler reads these declarations off llm_meta.tool_calls
BEFORE either the YAML block or the substring heuristic. Declaration
wins. Fallback logic stays intact for backward compat with any caller
(human or older curator conversation) that doesn't populate the arg.
Changes
- tools/skill_manager_tool.py: add absorbed_into param to skill_manage
+ _delete_skill. Validate target exists when non-empty. Reject
absorbed_into=<self>. Wire through dispatcher + registry + schema.
- agent/curator.py: new _extract_absorbed_into_declarations() walks
tool calls for skill_manage(delete) with the arg. _reconcile_classification
accepts absorbed_declarations= and treats them as authoritative. Curator
prompt updated to require the arg on every delete.
- Tests: 7 new skill_manager tests covering the tool contract (valid
target, empty string, nonexistent target, self-reference, whitespace,
backward compat, dispatcher plumbing). 11 new curator tests covering
the extractor + authoritative reconciler path + mixed-legacy-and-
declared runs.
Validation
- 307/307 targeted tests pass (curator + cron + skill_manager suites).
- E2E #18671 repro: 3 narrow skills, 1 umbrella, cron job referencing
all 3. Model emits NO YAML block. Heuristic misses (patch prose
doesn't name old slugs). Delete calls carry absorbed_into. Result:
both PR skills correctly classified 'consolidated' + cron rewritten
['pr-review-format', 'pr-review-checklist', 'stale-junk'] ->
['hermes-agent-dev']; stale-junk pruned via absorbed_into=''.
- E2E backward-compat: delete without absorbed_into, model emits YAML
-> routed via existing 'model' source, cron still rewritten correctly.
* feat(curator): capture + restore cron skill links across snapshot/rollback
Before this, rolling back a curator run restored the skills tree but cron
jobs still pointed at the umbrella skills the curator had rewritten them
to. The user would see their old narrow skills back on disk but their
cron jobs still configured with the merged umbrella — not actually 'back
to how it was'.
Snapshot side: snapshot_skills() now captures ~/.hermes/cron/jobs.json
alongside the skills tarball, as cron-jobs.json. The manifest gets a new
'cron_jobs' block with {backed_up, jobs_count} so rollback (and the CLI
confirm dialog) can surface what's in the snapshot. If jobs.json is
missing/unreadable/malformed, snapshot proceeds without cron data — the
skills backup is the core guarantee; cron is additive.
Rollback side: after the skills extract succeeds, the new
_restore_cron_skill_links() reconciles the backed-up jobs into the live
jobs.json SURGICALLY. Only 'skills' and 'skill' fields are restored, and
only on jobs matched by id. Everything else about a cron job — schedule,
last_run_at, next_run_at, enabled, prompt, workdir, hooks — is live
state the user or scheduler has modified since the snapshot; overwriting
it would regress unrelated activity.
Reconciliation rules:
- Job in backup AND live, skills differ → skills restored.
- Job in backup AND live, skills match → no-op.
- Job in backup, NOT in live → skipped (user deleted it
after snapshot; their choice
is later than the snapshot).
- Job in live, NOT in backup → untouched (user created it
after snapshot).
- Snapshot missing cron-jobs.json at all → rollback still succeeds,
reports 'not captured'
(older pre-feature snapshots
keep working).
Writes go through cron.jobs.save_jobs under the same _jobs_file_lock the
scheduler uses, so rollback doesn't race tick().
Also:
- hermes_cli/curator.py: rollback confirm dialog now shows
'cron jobs: N (will be restored for skill-link fields only)' when the
snapshot has cron data, or 'not in snapshot (<reason>)' otherwise.
- rollback()'s message string includes a 'cron links: ...' clause
summarizing the reconciliation outcome.
Tests
- 9 new cases: snapshot-with-cron, snapshot-without-cron, malformed-json
captured-as-raw, full rollback-restores-skills-and-cron, rollback
touches only skill fields, rollback skips user-deleted jobs, rollback
leaves user-created jobs untouched, rollback still works with
pre-feature snapshot that has no cron-jobs.json, standalone unit test
on _restore_cron_skill_links exercising the full report shape.
Validation
- 484/484 targeted tests pass (curator + cron + skill_manager suites).
- E2E: real snapshot_skills, real cron rewrite, real rollback. Before:
['pr-review-format', 'pr-review-checklist', 'pr-triage-salvage'].
After curator: ['hermes-agent-dev']. After rollback: ['pr-review-format',
'pr-review-checklist', 'pr-triage-salvage']. Non-skill fields (id,
name, prompt) preserved across the round trip.
2026-05-02 01:29:57 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _extract_absorbed_into_declarations — authoritative signal from delete calls
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_picks_up_consolidation(curator_env):
|
|
|
|
|
"""Delete call with absorbed_into=<umbrella> yields a declaration."""
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "delete",
|
|
|
|
|
"name": "narrow-skill",
|
|
|
|
|
"absorbed_into": "umbrella",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {
|
|
|
|
|
"narrow-skill": {"into": "umbrella", "declared": True},
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_empty_string_is_explicit_prune(curator_env):
|
|
|
|
|
"""absorbed_into='' is recorded as an explicit prune declaration."""
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "delete",
|
|
|
|
|
"name": "stale",
|
|
|
|
|
"absorbed_into": "",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {"stale": {"into": "", "declared": True}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_missing_arg_ignored(curator_env):
|
|
|
|
|
"""Delete call without absorbed_into is skipped — fallback to heuristic."""
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "delete",
|
|
|
|
|
"name": "legacy-skill",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_ignores_non_delete_actions(curator_env):
|
|
|
|
|
"""Patch, create, write_file etc. must not leak into declarations."""
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "patch",
|
|
|
|
|
"name": "umbrella",
|
|
|
|
|
"old_string": "...",
|
|
|
|
|
"new_string": "...",
|
|
|
|
|
"absorbed_into": "something", # bogus on non-delete, must be ignored
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_accepts_dict_arguments(curator_env):
|
|
|
|
|
"""arguments can arrive as a dict (defensive path) — still works."""
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": {
|
|
|
|
|
"action": "delete",
|
|
|
|
|
"name": "narrow",
|
|
|
|
|
"absorbed_into": "umbrella",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_strips_whitespace(curator_env):
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{
|
|
|
|
|
"name": "skill_manage",
|
|
|
|
|
"arguments": json.dumps({
|
|
|
|
|
"action": "delete",
|
|
|
|
|
"name": " narrow ",
|
|
|
|
|
"absorbed_into": " umbrella ",
|
|
|
|
|
}),
|
|
|
|
|
},
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {"narrow": {"into": "umbrella", "declared": True}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_ignores_non_skill_manage_calls(curator_env):
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{"name": "terminal", "arguments": json.dumps({"command": "ls"})},
|
|
|
|
|
{"name": "read_file", "arguments": json.dumps({"path": "/tmp/x"})},
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_extract_absorbed_into_handles_malformed_arguments(curator_env):
|
|
|
|
|
"""Garbage JSON in arguments must not crash the extractor."""
|
|
|
|
|
declarations = curator_env._extract_absorbed_into_declarations([
|
|
|
|
|
{"name": "skill_manage", "arguments": "{not json"},
|
|
|
|
|
{"name": "skill_manage", "arguments": None},
|
|
|
|
|
{"name": "skill_manage"}, # no arguments key at all
|
|
|
|
|
])
|
|
|
|
|
assert declarations == {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# _reconcile_classification with absorbed_into declarations (authoritative)
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_absorbed_into_beats_everything_else(curator_env):
|
|
|
|
|
"""Model declared absorbed_into at delete; YAML/heuristic disagree — declaration wins.
|
|
|
|
|
|
|
|
|
|
This is the exact #18671 regression: the model forgets to emit the YAML
|
|
|
|
|
summary block, the heuristic's substring match misses because the
|
|
|
|
|
umbrella's patch content doesn't literally contain the old skill's
|
|
|
|
|
slug. Previously this fell through to 'no-evidence fallback' prune,
|
|
|
|
|
which dropped the cron ref instead of rewriting. With absorbed_into
|
|
|
|
|
declared, the model tells us directly.
|
|
|
|
|
"""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["pr-review-format"],
|
|
|
|
|
heuristic={"consolidated": [], "pruned": [{"name": "pr-review-format"}]},
|
|
|
|
|
model_block={"consolidations": [], "prunings": []}, # model forgot YAML block
|
|
|
|
|
destinations={"hermes-agent-dev"},
|
|
|
|
|
absorbed_declarations={
|
|
|
|
|
"pr-review-format": {"into": "hermes-agent-dev", "declared": True},
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
assert len(out["consolidated"]) == 1
|
|
|
|
|
assert out["pruned"] == []
|
|
|
|
|
e = out["consolidated"][0]
|
|
|
|
|
assert e["name"] == "pr-review-format"
|
|
|
|
|
assert e["into"] == "hermes-agent-dev"
|
|
|
|
|
assert "absorbed_into" in e["source"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_absorbed_into_empty_is_explicit_prune(curator_env):
|
|
|
|
|
"""absorbed_into='' takes precedence and routes to pruned, not fallback."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["stale"],
|
|
|
|
|
heuristic={"consolidated": [], "pruned": [{"name": "stale"}]},
|
|
|
|
|
model_block={"consolidations": [], "prunings": []},
|
|
|
|
|
destinations=set(),
|
|
|
|
|
absorbed_declarations={
|
|
|
|
|
"stale": {"into": "", "declared": True},
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
assert out["consolidated"] == []
|
|
|
|
|
assert len(out["pruned"]) == 1
|
|
|
|
|
assert "model-declared prune" in out["pruned"][0]["source"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_absorbed_into_nonexistent_target_falls_through(curator_env):
|
|
|
|
|
"""If the declared umbrella doesn't exist in destinations, fall through to
|
|
|
|
|
heuristic/YAML logic. Shouldn't happen in practice (the tool validates at
|
|
|
|
|
delete time) but the reconciler is defensive."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["thing"],
|
|
|
|
|
heuristic={
|
|
|
|
|
"consolidated": [{"name": "thing", "into": "real-umbrella", "evidence": "..."}],
|
|
|
|
|
"pruned": [],
|
|
|
|
|
},
|
|
|
|
|
model_block={"consolidations": [], "prunings": []},
|
|
|
|
|
destinations={"real-umbrella"},
|
|
|
|
|
absorbed_declarations={
|
|
|
|
|
"thing": {"into": "ghost-umbrella", "declared": True},
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
assert len(out["consolidated"]) == 1
|
|
|
|
|
assert out["consolidated"][0]["into"] == "real-umbrella"
|
|
|
|
|
assert "tool-call audit" in out["consolidated"][0]["source"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_declaration_preserves_yaml_reason(curator_env):
|
|
|
|
|
"""When the model both declared absorbed_into AND emitted YAML with reason,
|
|
|
|
|
the reason carries through so REPORT.md still has it."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["narrow"],
|
|
|
|
|
heuristic={"consolidated": [], "pruned": []},
|
|
|
|
|
model_block={
|
|
|
|
|
"consolidations": [{
|
|
|
|
|
"from": "narrow",
|
|
|
|
|
"into": "umbrella",
|
|
|
|
|
"reason": "duplicate of umbrella's main content",
|
|
|
|
|
}],
|
|
|
|
|
"prunings": [],
|
|
|
|
|
},
|
|
|
|
|
destinations={"umbrella"},
|
|
|
|
|
absorbed_declarations={
|
|
|
|
|
"narrow": {"into": "umbrella", "declared": True},
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
assert len(out["consolidated"]) == 1
|
|
|
|
|
e = out["consolidated"][0]
|
|
|
|
|
assert e["into"] == "umbrella"
|
|
|
|
|
assert "absorbed_into" in e["source"]
|
|
|
|
|
assert e["reason"] == "duplicate of umbrella's main content"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_without_declarations_preserves_legacy_behavior(curator_env):
|
|
|
|
|
"""Backward compat: no absorbed_declarations arg → all existing logic intact."""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["thing"],
|
|
|
|
|
heuristic={
|
|
|
|
|
"consolidated": [{"name": "thing", "into": "umbrella", "evidence": "..."}],
|
|
|
|
|
"pruned": [],
|
|
|
|
|
},
|
|
|
|
|
model_block={"consolidations": [], "prunings": []},
|
|
|
|
|
destinations={"umbrella"},
|
|
|
|
|
# no absorbed_declarations — defaults to None → behaves identically to pre-change
|
|
|
|
|
)
|
|
|
|
|
assert len(out["consolidated"]) == 1
|
|
|
|
|
assert out["consolidated"][0]["into"] == "umbrella"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_reconcile_mixed_declarations_and_legacy_calls(curator_env):
|
|
|
|
|
"""Real-world run: some deletes declared absorbed_into, some didn't.
|
|
|
|
|
Declared ones use the authoritative path; others fall through to YAML/heuristic.
|
|
|
|
|
"""
|
|
|
|
|
out = curator_env._reconcile_classification(
|
|
|
|
|
removed=["declared-cons", "declared-prune", "legacy-cons", "legacy-prune"],
|
|
|
|
|
heuristic={
|
|
|
|
|
"consolidated": [
|
|
|
|
|
{"name": "legacy-cons", "into": "umbrella-a", "evidence": "..."},
|
|
|
|
|
],
|
|
|
|
|
"pruned": [{"name": "legacy-prune"}],
|
|
|
|
|
},
|
|
|
|
|
model_block={"consolidations": [], "prunings": []},
|
|
|
|
|
destinations={"umbrella-a", "umbrella-b"},
|
|
|
|
|
absorbed_declarations={
|
|
|
|
|
"declared-cons": {"into": "umbrella-b", "declared": True},
|
|
|
|
|
"declared-prune": {"into": "", "declared": True},
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
cons_by_name = {e["name"]: e for e in out["consolidated"]}
|
|
|
|
|
pruned_by_name = {e["name"]: e for e in out["pruned"]}
|
|
|
|
|
|
|
|
|
|
assert "declared-cons" in cons_by_name
|
|
|
|
|
assert cons_by_name["declared-cons"]["into"] == "umbrella-b"
|
|
|
|
|
assert "absorbed_into" in cons_by_name["declared-cons"]["source"]
|
|
|
|
|
|
|
|
|
|
assert "legacy-cons" in cons_by_name
|
|
|
|
|
assert cons_by_name["legacy-cons"]["into"] == "umbrella-a"
|
|
|
|
|
assert "tool-call audit" in cons_by_name["legacy-cons"]["source"]
|
|
|
|
|
|
|
|
|
|
assert "declared-prune" in pruned_by_name
|
|
|
|
|
assert "model-declared prune" in pruned_by_name["declared-prune"]["source"]
|
|
|
|
|
|
|
|
|
|
assert "legacy-prune" in pruned_by_name
|
|
|
|
|
assert "no-evidence fallback" in pruned_by_name["legacy-prune"]["source"]
|