feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
"""Tests for agent/curator.py — orchestrator, idle gating, state transitions.
|
|
|
|
|
|
|
|
|
|
LLM spawning is never exercised here — `_run_llm_review` is monkeypatched so
|
|
|
|
|
tests run fully offline and the curator module doesn't need real credentials.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import importlib
|
|
|
|
|
import json
|
|
|
|
|
from datetime import datetime, timedelta, timezone
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
|
def curator_env(tmp_path, monkeypatch):
|
|
|
|
|
"""Isolated HERMES_HOME + freshly reloaded curator + skill_usage modules."""
|
|
|
|
|
home = tmp_path / ".hermes"
|
|
|
|
|
(home / "skills").mkdir(parents=True)
|
|
|
|
|
monkeypatch.setattr(Path, "home", lambda: tmp_path)
|
|
|
|
|
monkeypatch.setenv("HERMES_HOME", str(home))
|
|
|
|
|
|
|
|
|
|
import tools.skill_usage as usage
|
|
|
|
|
importlib.reload(usage)
|
|
|
|
|
import agent.curator as curator
|
|
|
|
|
importlib.reload(curator)
|
|
|
|
|
|
|
|
|
|
# Neutralize the real LLM pass by default — tests opt in per-case.
|
|
|
|
|
monkeypatch.setattr(curator, "_run_llm_review", lambda prompt: "llm-stub")
|
|
|
|
|
|
|
|
|
|
# Default: no config file → curator defaults. Tests can override.
|
|
|
|
|
monkeypatch.setattr(curator, "_load_config", lambda: {})
|
|
|
|
|
|
|
|
|
|
return {"home": home, "curator": curator, "usage": usage}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _write_skill(skills_dir: Path, name: str):
|
|
|
|
|
d = skills_dir / name
|
|
|
|
|
d.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
(d / "SKILL.md").write_text(
|
|
|
|
|
f"---\nname: {name}\ndescription: x\n---\n", encoding="utf-8",
|
|
|
|
|
)
|
|
|
|
|
return d
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Config gates
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def test_curator_enabled_default_true(curator_env):
|
|
|
|
|
assert curator_env["curator"].is_enabled() is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_curator_disabled_via_config(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
|
|
|
|
|
assert c.is_enabled() is False
|
|
|
|
|
assert c.should_run_now() is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_curator_defaults(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
2026-04-26 06:32:18 -07:00
|
|
|
assert c.get_interval_hours() == 24 * 7 # 7 days
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
assert c.get_min_idle_hours() == 2
|
|
|
|
|
assert c.get_stale_after_days() == 30
|
|
|
|
|
assert c.get_archive_after_days() == 90
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_curator_config_overrides(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
monkeypatch.setattr(c, "_load_config", lambda: {
|
|
|
|
|
"interval_hours": 12,
|
|
|
|
|
"min_idle_hours": 0.5,
|
|
|
|
|
"stale_after_days": 7,
|
|
|
|
|
"archive_after_days": 60,
|
|
|
|
|
})
|
|
|
|
|
assert c.get_interval_hours() == 12
|
|
|
|
|
assert c.get_min_idle_hours() == 0.5
|
|
|
|
|
assert c.get_stale_after_days() == 7
|
|
|
|
|
assert c.get_archive_after_days() == 60
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# should_run_now
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def test_first_run_always_eligible(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
assert c.should_run_now() is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_recent_run_blocks(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
c.save_state({
|
|
|
|
|
"last_run_at": datetime.now(timezone.utc).isoformat(),
|
|
|
|
|
"paused": False,
|
|
|
|
|
})
|
|
|
|
|
assert c.should_run_now() is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_old_run_eligible(curator_env):
|
2026-04-26 06:32:18 -07:00
|
|
|
"""A run older than the configured interval should re-trigger. Use a
|
|
|
|
|
2x-interval cushion so the test doesn't become coupled to the exact
|
|
|
|
|
default — bumping DEFAULT_INTERVAL_HOURS shouldn't break it."""
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
c = curator_env["curator"]
|
2026-04-26 06:32:18 -07:00
|
|
|
long_ago = datetime.now(timezone.utc) - timedelta(
|
|
|
|
|
hours=c.get_interval_hours() * 2
|
|
|
|
|
)
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
c.save_state({"last_run_at": long_ago.isoformat(), "paused": False})
|
|
|
|
|
assert c.should_run_now() is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_paused_blocks_even_if_stale(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
2026-04-26 06:32:18 -07:00
|
|
|
long_ago = datetime.now(timezone.utc) - timedelta(days=30)
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
c.save_state({"last_run_at": long_ago.isoformat(), "paused": True})
|
|
|
|
|
assert c.should_run_now() is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_set_paused_roundtrip(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
c.set_paused(True)
|
|
|
|
|
assert c.is_paused() is True
|
|
|
|
|
c.set_paused(False)
|
|
|
|
|
assert c.is_paused() is False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Automatic state transitions
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def test_unused_skill_transitions_to_stale(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
u = curator_env["usage"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "old-skill")
|
|
|
|
|
|
|
|
|
|
# Record last-use well past stale_after_days (30 default)
|
|
|
|
|
long_ago = (datetime.now(timezone.utc) - timedelta(days=45)).isoformat()
|
|
|
|
|
data = u.load_usage()
|
|
|
|
|
data["old-skill"] = u._empty_record()
|
|
|
|
|
data["old-skill"]["last_used_at"] = long_ago
|
|
|
|
|
data["old-skill"]["created_at"] = long_ago
|
|
|
|
|
u.save_usage(data)
|
|
|
|
|
|
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
|
|
|
assert counts["marked_stale"] == 1
|
|
|
|
|
assert u.get_record("old-skill")["state"] == "stale"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_very_old_skill_gets_archived(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
u = curator_env["usage"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
skill_dir = _write_skill(skills_dir, "ancient")
|
|
|
|
|
|
|
|
|
|
super_old = (datetime.now(timezone.utc) - timedelta(days=120)).isoformat()
|
|
|
|
|
data = u.load_usage()
|
|
|
|
|
data["ancient"] = u._empty_record()
|
|
|
|
|
data["ancient"]["last_used_at"] = super_old
|
|
|
|
|
data["ancient"]["created_at"] = super_old
|
|
|
|
|
u.save_usage(data)
|
|
|
|
|
|
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
|
|
|
assert counts["archived"] == 1
|
|
|
|
|
assert not skill_dir.exists()
|
|
|
|
|
assert (skills_dir / ".archive" / "ancient" / "SKILL.md").exists()
|
|
|
|
|
assert u.get_record("ancient")["state"] == "archived"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_pinned_skill_is_never_touched(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
u = curator_env["usage"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "precious")
|
|
|
|
|
|
|
|
|
|
super_old = (datetime.now(timezone.utc) - timedelta(days=365)).isoformat()
|
|
|
|
|
data = u.load_usage()
|
|
|
|
|
data["precious"] = u._empty_record()
|
|
|
|
|
data["precious"]["last_used_at"] = super_old
|
|
|
|
|
data["precious"]["created_at"] = super_old
|
|
|
|
|
data["precious"]["pinned"] = True
|
|
|
|
|
u.save_usage(data)
|
|
|
|
|
|
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
|
|
|
assert counts["archived"] == 0
|
|
|
|
|
assert counts["marked_stale"] == 0
|
|
|
|
|
rec = u.get_record("precious")
|
|
|
|
|
assert rec["state"] == "active" # untouched
|
|
|
|
|
assert rec["pinned"] is True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_stale_skill_reactivates_on_recent_use(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
u = curator_env["usage"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "revived")
|
|
|
|
|
|
|
|
|
|
recent = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
data = u.load_usage()
|
|
|
|
|
data["revived"] = u._empty_record()
|
|
|
|
|
data["revived"]["state"] = "stale"
|
|
|
|
|
data["revived"]["last_used_at"] = recent
|
|
|
|
|
data["revived"]["created_at"] = recent
|
|
|
|
|
u.save_usage(data)
|
|
|
|
|
|
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
|
|
|
assert counts["reactivated"] == 1
|
|
|
|
|
assert u.get_record("revived")["state"] == "active"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_new_skill_without_last_used_not_immediately_archived(curator_env):
|
|
|
|
|
"""A freshly-created skill with no use history should not get archived
|
|
|
|
|
just because last_used_at is None."""
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
u = curator_env["usage"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "fresh")
|
|
|
|
|
|
|
|
|
|
# Bump nothing — record doesn't exist yet. Curator should create it
|
|
|
|
|
# and fall back to created_at which is ~now.
|
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
|
|
|
assert counts["archived"] == 0
|
|
|
|
|
assert counts["marked_stale"] == 0
|
|
|
|
|
assert (skills_dir / "fresh").exists()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_bundled_skill_not_touched_by_transitions(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
u = curator_env["usage"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "bundled")
|
|
|
|
|
(skills_dir / ".bundled_manifest").write_text(
|
|
|
|
|
"bundled:abc\n", encoding="utf-8",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
super_old = (datetime.now(timezone.utc) - timedelta(days=500)).isoformat()
|
|
|
|
|
data = u.load_usage()
|
|
|
|
|
data["bundled"] = u._empty_record()
|
|
|
|
|
data["bundled"]["last_used_at"] = super_old
|
|
|
|
|
u.save_usage(data)
|
|
|
|
|
|
|
|
|
|
counts = c.apply_automatic_transitions()
|
|
|
|
|
# bundled skills are excluded from the agent-created list entirely
|
|
|
|
|
assert counts["checked"] == 0
|
|
|
|
|
assert (skills_dir / "bundled").exists() # never moved
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# run_curator_review orchestration
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def test_run_review_records_state(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "a")
|
|
|
|
|
|
|
|
|
|
result = c.run_curator_review(synchronous=True)
|
|
|
|
|
assert "started_at" in result
|
|
|
|
|
state = c.load_state()
|
|
|
|
|
assert state["last_run_at"] is not None
|
|
|
|
|
assert state["run_count"] >= 1
|
|
|
|
|
assert state["last_run_summary"] is not None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_review_synchronous_invokes_llm_stub(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "a")
|
|
|
|
|
|
|
|
|
|
calls = []
|
feat(curator): per-run reports — run.json + REPORT.md under logs/curator/ (#17307)
Every curator pass now emits a dated report directory under
`~/.hermes/logs/curator/{YYYYMMDD-HHMMSS}/` with two files:
- `run.json` — machine-readable full record (before/after snapshot,
state transitions, all tool calls, model/provider, timing, full LLM
final response untruncated, error if any)
- `REPORT.md` — human-readable markdown: model + duration header,
auto-transition counts, LLM consolidation stats, archived-this-run
list, new-skills-this-run list, state transitions, the full LLM
final summary, and a recovery footer pointing at the archive + the
`hermes curator restore` command
Reports live under `logs/curator/`, not inside `skills/` — they're
operational telemetry, not user-authored skill data, and belong
alongside `agent.log` / `gateway.log`.
Internals:
- `_run_llm_review()` now returns a dict (final, summary, model,
provider, tool_calls, error) instead of a bare truncated string so
the reporter has full fidelity
- Report writer is fully best-effort — any failure logs at DEBUG and
never breaks the curator itself. Same-second rerun gets a numeric
suffix so reports can't clobber each other
- Report path stamped into `.curator_state` as `last_report_path`
- `hermes curator status` surfaces a "last report:" line so users
can immediately open the latest run
Tests (all green):
- 7 new tests in tests/agent/test_curator_reports.py covering: report
location (logs not skills), both files written, run.json shape and
diff accuracy, markdown structure, error path still writes, state
transitions captured, same-second runs get unique dirs
- Existing test_run_review_synchronous_invokes_llm_stub updated to
stub the new dict-returning _run_llm_review signature
Live E2E: ran a synchronous pass against a 1-skill test collection
with a stubbed LLM; report written correctly, state stamped with
last_report_path, markdown human-readable, run.json machine-parseable.
2026-04-28 23:23:11 -07:00
|
|
|
def _stub(prompt):
|
|
|
|
|
calls.append(prompt)
|
|
|
|
|
return {
|
|
|
|
|
"final": "stubbed-summary",
|
|
|
|
|
"summary": "stubbed-summary",
|
|
|
|
|
"model": "stub-model",
|
|
|
|
|
"provider": "stub-provider",
|
|
|
|
|
"tool_calls": [],
|
|
|
|
|
"error": None,
|
|
|
|
|
}
|
|
|
|
|
monkeypatch.setattr(c, "_run_llm_review", _stub)
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
|
|
|
|
|
captured = []
|
|
|
|
|
c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
|
|
|
|
|
|
|
|
|
|
assert len(calls) == 1
|
|
|
|
|
assert "skill CURATOR" in calls[0] or "CURATOR" in calls[0]
|
|
|
|
|
assert captured # on_summary was called
|
|
|
|
|
assert any("stubbed-summary" in s for s in captured)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_run_review_skips_llm_when_no_candidates(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
# No skills in the dir → no candidates
|
|
|
|
|
calls = []
|
|
|
|
|
monkeypatch.setattr(
|
|
|
|
|
c, "_run_llm_review",
|
|
|
|
|
lambda prompt: (calls.append(prompt), "never-called")[1],
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
captured = []
|
|
|
|
|
c.run_curator_review(on_summary=lambda s: captured.append(s), synchronous=True)
|
|
|
|
|
|
|
|
|
|
assert calls == [] # LLM not invoked
|
|
|
|
|
assert any("skipped" in s for s in captured)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_maybe_run_curator_respects_disabled(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
monkeypatch.setattr(c, "_load_config", lambda: {"enabled": False})
|
|
|
|
|
result = c.maybe_run_curator()
|
|
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_maybe_run_curator_enforces_idle_gate(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
monkeypatch.setattr(c, "_load_config", lambda: {"min_idle_hours": 2})
|
|
|
|
|
# idle less than the threshold
|
|
|
|
|
result = c.maybe_run_curator(idle_for_seconds=60.0)
|
|
|
|
|
assert result is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_maybe_run_curator_runs_when_eligible(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "a")
|
|
|
|
|
# Force idle over threshold
|
|
|
|
|
result = c.maybe_run_curator(idle_for_seconds=99999.0)
|
|
|
|
|
assert result is not None
|
|
|
|
|
assert "started_at" in result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_maybe_run_curator_swallows_exceptions(curator_env, monkeypatch):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
|
|
|
|
|
def explode():
|
|
|
|
|
raise RuntimeError("boom")
|
|
|
|
|
|
|
|
|
|
monkeypatch.setattr(c, "should_run_now", explode)
|
|
|
|
|
# Must not raise
|
|
|
|
|
assert c.maybe_run_curator() is None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# Persistence
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
def test_state_file_survives_corrupt_read(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
c._state_file().write_text("not json", encoding="utf-8")
|
|
|
|
|
# Must fall back to default, not raise
|
|
|
|
|
assert c.load_state() == c._default_state()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_state_atomic_write_no_tmp_leftovers(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
c.save_state({"paused": True})
|
|
|
|
|
parent = c._state_file().parent
|
|
|
|
|
for p in parent.iterdir():
|
|
|
|
|
assert not p.name.startswith(".curator_state_"), f"tmp leftover: {p.name}"
|
|
|
|
|
|
|
|
|
|
|
2026-04-30 21:36:40 +03:00
|
|
|
def test_state_preserves_last_report_path(curator_env):
|
|
|
|
|
c = curator_env["curator"]
|
|
|
|
|
c.save_state({
|
|
|
|
|
"last_run_at": "2026-04-30T12:00:00+00:00",
|
|
|
|
|
"last_run_summary": "ok",
|
|
|
|
|
"last_report_path": "/tmp/curator-report",
|
|
|
|
|
"paused": False,
|
|
|
|
|
"run_count": 1,
|
|
|
|
|
})
|
|
|
|
|
state = c.load_state()
|
|
|
|
|
assert state["last_report_path"] == "/tmp/curator-report"
|
|
|
|
|
|
|
|
|
|
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
def test_curator_review_prompt_has_invariants():
|
|
|
|
|
"""Core invariants must be in the review prompt text."""
|
|
|
|
|
from agent.curator import CURATOR_REVIEW_PROMPT
|
feat(curator): umbrella-first prompt, inherit parent config, unbounded iterations
Based on three live test runs against 346 agent-created skills on the
author's own setup (~6.5 min, opus-4.7, 86 API calls), the curator
prompt needed three sharpenings before it consistently produced real
umbrella consolidation instead of passive audit output:
**Umbrella-first framing.** The original 'decide keep/patch/archive/
consolidate' framing lets opus default to 'keep' whenever two skills
aren't byte-identical. The new prompt explicitly tells the reviewer
that pairwise distinctness is the wrong bar — the right question is
'would a human maintainer write this as N separate skills, or one
skill with N labeled subsections?' Expect 10-25 prefix clusters; merge
each into an umbrella via one of three methods.
**Three concrete consolidation methods.** (a) Merge into an existing
umbrella (patch the broadest skill, archive siblings); (b) Create a
new umbrella SKILL.md (skill_manage action=create); (c) Demote
session-specific detail into references/, templates/, or scripts/
under the umbrella via skill_manage action=write_file, then archive
the narrow sibling. This matches the support-file vocabulary the
review-prompt side already uses (PR #17213).
**Two observed bailouts pre-empted:** 'usage counters are zero so I
can't judge' (rule 4: judge on content, not use_count) and 'each has
a distinct trigger' (rule 5: pairwise distinctness is the wrong bar).
**Config-aware parent inheritance.** _run_llm_review() was building
AIAgent() without explicit provider/model, hitting an auto-resolve
path that returned empty credentials → HTTP 400 'No models provided'
against OpenRouter. Fork now inherits the user's main provider and
model (via load_config + resolve_runtime_provider) before spawning —
runs on whatever the user is currently on, OAuth-backed or
pool-backed included.
**Unbounded iteration ceiling.** max_iterations=8 was way too low for
an umbrella-build pass over hundreds of skills. A live pass takes
50-100 API calls (scanning, clustering, skill_view'ing candidates,
patching umbrellas, mv'ing siblings). Raised to 9999 — the natural
stopping criterion is 'no more clusters worth processing', not an
arbitrary tool-call budget.
**Tests updated:** test_curator_review_prompt_has_invariants accepts
DO NOT / MUST NOT and drops 'keep' from the required-verb set (the
umbrella-first prompt correctly deemphasizes 'keep' as a first-class
decision label since passive keep-everything is the failure mode
being prevented). Added test_curator_review_prompt_is_umbrella_first
asserting the umbrella framing, class-level thinking, references/
+ templates/ + scripts/ support-file mentions, and the 'use_count
is not evidence of value' pre-emption. Added
test_curator_review_prompt_offers_support_file_actions asserting
skill_manage action=create and action=write_file are both named.
**Live validation on author's setup:**
- Run 1 (old prompt): 3 archives, stopped after surveying — typical passive outcome
- Run 2 (consolidation prompt): 44 archives, 3 patches, surfaced the 50-skill mlops reorg duplicate bug but didn't umbrella
- Run 3 (this prompt): 249 archives + 18 new class-level umbrellas created, reducing agent-created skills from 346 → 118 with every archived skill's content preserved as references/ under its umbrella. Pinned skill untouched. Full report in PR description.
2026-04-28 22:07:02 -07:00
|
|
|
assert "MUST NOT" in CURATOR_REVIEW_PROMPT or "DO NOT" in CURATOR_REVIEW_PROMPT
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
assert "bundled" in CURATOR_REVIEW_PROMPT.lower()
|
|
|
|
|
assert "delete" in CURATOR_REVIEW_PROMPT.lower()
|
|
|
|
|
assert "pinned" in CURATOR_REVIEW_PROMPT.lower()
|
feat(curator): umbrella-first prompt, inherit parent config, unbounded iterations
Based on three live test runs against 346 agent-created skills on the
author's own setup (~6.5 min, opus-4.7, 86 API calls), the curator
prompt needed three sharpenings before it consistently produced real
umbrella consolidation instead of passive audit output:
**Umbrella-first framing.** The original 'decide keep/patch/archive/
consolidate' framing lets opus default to 'keep' whenever two skills
aren't byte-identical. The new prompt explicitly tells the reviewer
that pairwise distinctness is the wrong bar — the right question is
'would a human maintainer write this as N separate skills, or one
skill with N labeled subsections?' Expect 10-25 prefix clusters; merge
each into an umbrella via one of three methods.
**Three concrete consolidation methods.** (a) Merge into an existing
umbrella (patch the broadest skill, archive siblings); (b) Create a
new umbrella SKILL.md (skill_manage action=create); (c) Demote
session-specific detail into references/, templates/, or scripts/
under the umbrella via skill_manage action=write_file, then archive
the narrow sibling. This matches the support-file vocabulary the
review-prompt side already uses (PR #17213).
**Two observed bailouts pre-empted:** 'usage counters are zero so I
can't judge' (rule 4: judge on content, not use_count) and 'each has
a distinct trigger' (rule 5: pairwise distinctness is the wrong bar).
**Config-aware parent inheritance.** _run_llm_review() was building
AIAgent() without explicit provider/model, hitting an auto-resolve
path that returned empty credentials → HTTP 400 'No models provided'
against OpenRouter. Fork now inherits the user's main provider and
model (via load_config + resolve_runtime_provider) before spawning —
runs on whatever the user is currently on, OAuth-backed or
pool-backed included.
**Unbounded iteration ceiling.** max_iterations=8 was way too low for
an umbrella-build pass over hundreds of skills. A live pass takes
50-100 API calls (scanning, clustering, skill_view'ing candidates,
patching umbrellas, mv'ing siblings). Raised to 9999 — the natural
stopping criterion is 'no more clusters worth processing', not an
arbitrary tool-call budget.
**Tests updated:** test_curator_review_prompt_has_invariants accepts
DO NOT / MUST NOT and drops 'keep' from the required-verb set (the
umbrella-first prompt correctly deemphasizes 'keep' as a first-class
decision label since passive keep-everything is the failure mode
being prevented). Added test_curator_review_prompt_is_umbrella_first
asserting the umbrella framing, class-level thinking, references/
+ templates/ + scripts/ support-file mentions, and the 'use_count
is not evidence of value' pre-emption. Added
test_curator_review_prompt_offers_support_file_actions asserting
skill_manage action=create and action=write_file are both named.
**Live validation on author's setup:**
- Run 1 (old prompt): 3 archives, stopped after surveying — typical passive outcome
- Run 2 (consolidation prompt): 44 archives, 3 patches, surfaced the 50-skill mlops reorg duplicate bug but didn't umbrella
- Run 3 (this prompt): 249 archives + 18 new class-level umbrellas created, reducing agent-created skills from 346 → 118 with every archived skill's content preserved as references/ under its umbrella. Pinned skill untouched. Full report in PR description.
2026-04-28 22:07:02 -07:00
|
|
|
# Must describe the actions the reviewer can take. The exact vocabulary
|
|
|
|
|
# has tightened over time (the umbrella-first prompt drops 'keep' as a
|
|
|
|
|
# first-class decision verb, since passive keep-everything is the
|
|
|
|
|
# failure mode the prompt is trying to avoid), but the core merge /
|
|
|
|
|
# archive / patch trio must remain callable.
|
|
|
|
|
for verb in ("patch", "archive"):
|
feat(curator): background skill maintenance (issue #7816)
Adds the Curator — an auxiliary-model background task that periodically
reviews AGENT-CREATED skills and keeps the collection tidy: tracks usage,
transitions unused skills through active → stale → archived, and spawns
a forked AIAgent to consolidate overlaps and patch drift.
Default: enabled, inactivity-triggered (no cron daemon). Runs on CLI
startup and gateway boot when the last run is older than interval_hours
(default 24) AND the agent has been idle for min_idle_hours (default 2).
Invariants (all load-bearing):
- Never touches bundled or hub-installed skills (.bundled_manifest +
.hub/lock.json double-filter)
- Never auto-deletes — archive only. Archives are recoverable
via `hermes curator restore <skill>`
- Pinned skills bypass all auto-transitions
- Uses the aux client; never touches the main session's prompt cache
New files:
- tools/skill_usage.py — sidecar .usage.json telemetry, atomic writes,
provenance filter
- agent/curator.py — orchestrator: config, idle gating, state-machine
transitions (pure, no LLM), forked-agent review prompt
- hermes_cli/curator.py — `hermes curator {status,run,pause,resume,
pin,unpin,restore}` subcommand
- tests/tools/test_skill_usage.py — 29 tests
- tests/agent/test_curator.py — 25 tests
Modified files (surgical patches):
- tools/skills_tool.py — bump view_count on successful skill_view
- tools/skill_manager_tool.py — bump patch_count on skill_manage
patch/edit/write_file/remove_file; forget record on delete
- hermes_cli/config.py — add curator: section to DEFAULT_CONFIG
- hermes_cli/commands.py — add /curator CommandDef with subcommands
- hermes_cli/main.py — register `hermes curator` subparser via
register_cli() from hermes_cli.curator
- cli.py — /curator slash-command dispatch + startup hook
- gateway/run.py — gateway-boot hook (mirrors CLI)
Validation:
- 54 new tests across skill_usage + curator, all passing in 3s
- 346 tests across all touched files' neighbors green
- 2783 tests across hermes_cli/ + gateway/test_run_progress_topics.py green
- CLI smoke: `hermes curator status/pause/resume` work end-to-end
Companion to PR #16026 (class-first skill review prompt) — together
they form a loop: the review prompt stops near-duplicate skill creation
at the source, and the curator prunes/consolidates what still accumulates.
Refs #7816.
2026-04-26 06:08:39 -07:00
|
|
|
assert verb in CURATOR_REVIEW_PROMPT.lower()
|
feat(curator): umbrella-first prompt, inherit parent config, unbounded iterations
Based on three live test runs against 346 agent-created skills on the
author's own setup (~6.5 min, opus-4.7, 86 API calls), the curator
prompt needed three sharpenings before it consistently produced real
umbrella consolidation instead of passive audit output:
**Umbrella-first framing.** The original 'decide keep/patch/archive/
consolidate' framing lets opus default to 'keep' whenever two skills
aren't byte-identical. The new prompt explicitly tells the reviewer
that pairwise distinctness is the wrong bar — the right question is
'would a human maintainer write this as N separate skills, or one
skill with N labeled subsections?' Expect 10-25 prefix clusters; merge
each into an umbrella via one of three methods.
**Three concrete consolidation methods.** (a) Merge into an existing
umbrella (patch the broadest skill, archive siblings); (b) Create a
new umbrella SKILL.md (skill_manage action=create); (c) Demote
session-specific detail into references/, templates/, or scripts/
under the umbrella via skill_manage action=write_file, then archive
the narrow sibling. This matches the support-file vocabulary the
review-prompt side already uses (PR #17213).
**Two observed bailouts pre-empted:** 'usage counters are zero so I
can't judge' (rule 4: judge on content, not use_count) and 'each has
a distinct trigger' (rule 5: pairwise distinctness is the wrong bar).
**Config-aware parent inheritance.** _run_llm_review() was building
AIAgent() without explicit provider/model, hitting an auto-resolve
path that returned empty credentials → HTTP 400 'No models provided'
against OpenRouter. Fork now inherits the user's main provider and
model (via load_config + resolve_runtime_provider) before spawning —
runs on whatever the user is currently on, OAuth-backed or
pool-backed included.
**Unbounded iteration ceiling.** max_iterations=8 was way too low for
an umbrella-build pass over hundreds of skills. A live pass takes
50-100 API calls (scanning, clustering, skill_view'ing candidates,
patching umbrellas, mv'ing siblings). Raised to 9999 — the natural
stopping criterion is 'no more clusters worth processing', not an
arbitrary tool-call budget.
**Tests updated:** test_curator_review_prompt_has_invariants accepts
DO NOT / MUST NOT and drops 'keep' from the required-verb set (the
umbrella-first prompt correctly deemphasizes 'keep' as a first-class
decision label since passive keep-everything is the failure mode
being prevented). Added test_curator_review_prompt_is_umbrella_first
asserting the umbrella framing, class-level thinking, references/
+ templates/ + scripts/ support-file mentions, and the 'use_count
is not evidence of value' pre-emption. Added
test_curator_review_prompt_offers_support_file_actions asserting
skill_manage action=create and action=write_file are both named.
**Live validation on author's setup:**
- Run 1 (old prompt): 3 archives, stopped after surveying — typical passive outcome
- Run 2 (consolidation prompt): 44 archives, 3 patches, surfaced the 50-skill mlops reorg duplicate bug but didn't umbrella
- Run 3 (this prompt): 249 archives + 18 new class-level umbrellas created, reducing agent-created skills from 346 → 118 with every archived skill's content preserved as references/ under its umbrella. Pinned skill untouched. Full report in PR description.
2026-04-28 22:07:02 -07:00
|
|
|
# Must mention consolidation (possibly via "merge" or "consolidat")
|
|
|
|
|
assert "consolidat" in CURATOR_REVIEW_PROMPT.lower() or "merge" in CURATOR_REVIEW_PROMPT.lower()
|
2026-04-26 06:13:09 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_curator_review_prompt_points_at_existing_tools_only():
|
|
|
|
|
"""The review prompt must rely on existing tools (skill_manage + terminal)
|
|
|
|
|
and must NOT reference bespoke curator tools that are not registered
|
|
|
|
|
model tools."""
|
|
|
|
|
from agent.curator import CURATOR_REVIEW_PROMPT
|
|
|
|
|
assert "skill_manage" in CURATOR_REVIEW_PROMPT
|
|
|
|
|
assert "skills_list" in CURATOR_REVIEW_PROMPT
|
|
|
|
|
assert "skill_view" in CURATOR_REVIEW_PROMPT
|
|
|
|
|
assert "terminal" in CURATOR_REVIEW_PROMPT.lower()
|
|
|
|
|
# These would be nice but aren't actually registered as tools — the
|
|
|
|
|
# curator uses skill_manage + terminal mv instead.
|
|
|
|
|
assert "archive_skill" not in CURATOR_REVIEW_PROMPT
|
|
|
|
|
assert "pin_skill" not in CURATOR_REVIEW_PROMPT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_curator_does_not_instruct_model_to_pin():
|
|
|
|
|
"""Pinning is a user opt-out, not a model decision. The prompt should
|
|
|
|
|
not tell the reviewer to pin skills autonomously."""
|
|
|
|
|
from agent.curator import CURATOR_REVIEW_PROMPT
|
|
|
|
|
# "pinned" appears in the invariant ("skip pinned skills"), but "pin"
|
|
|
|
|
# as a decision verb should not.
|
|
|
|
|
lines = CURATOR_REVIEW_PROMPT.split("\n")
|
|
|
|
|
decision_block = "\n".join(
|
|
|
|
|
l for l in lines
|
|
|
|
|
if l.strip().startswith(("keep", "patch", "archive", "consolidate", "pin "))
|
|
|
|
|
)
|
|
|
|
|
# No standalone "pin" action line
|
|
|
|
|
assert not any(l.strip().startswith("pin ") for l in lines), (
|
|
|
|
|
f"Found a pin action line in:\n{decision_block}"
|
|
|
|
|
)
|
2026-04-26 06:17:01 -07:00
|
|
|
|
|
|
|
|
|
feat(curator): umbrella-first prompt, inherit parent config, unbounded iterations
Based on three live test runs against 346 agent-created skills on the
author's own setup (~6.5 min, opus-4.7, 86 API calls), the curator
prompt needed three sharpenings before it consistently produced real
umbrella consolidation instead of passive audit output:
**Umbrella-first framing.** The original 'decide keep/patch/archive/
consolidate' framing lets opus default to 'keep' whenever two skills
aren't byte-identical. The new prompt explicitly tells the reviewer
that pairwise distinctness is the wrong bar — the right question is
'would a human maintainer write this as N separate skills, or one
skill with N labeled subsections?' Expect 10-25 prefix clusters; merge
each into an umbrella via one of three methods.
**Three concrete consolidation methods.** (a) Merge into an existing
umbrella (patch the broadest skill, archive siblings); (b) Create a
new umbrella SKILL.md (skill_manage action=create); (c) Demote
session-specific detail into references/, templates/, or scripts/
under the umbrella via skill_manage action=write_file, then archive
the narrow sibling. This matches the support-file vocabulary the
review-prompt side already uses (PR #17213).
**Two observed bailouts pre-empted:** 'usage counters are zero so I
can't judge' (rule 4: judge on content, not use_count) and 'each has
a distinct trigger' (rule 5: pairwise distinctness is the wrong bar).
**Config-aware parent inheritance.** _run_llm_review() was building
AIAgent() without explicit provider/model, hitting an auto-resolve
path that returned empty credentials → HTTP 400 'No models provided'
against OpenRouter. Fork now inherits the user's main provider and
model (via load_config + resolve_runtime_provider) before spawning —
runs on whatever the user is currently on, OAuth-backed or
pool-backed included.
**Unbounded iteration ceiling.** max_iterations=8 was way too low for
an umbrella-build pass over hundreds of skills. A live pass takes
50-100 API calls (scanning, clustering, skill_view'ing candidates,
patching umbrellas, mv'ing siblings). Raised to 9999 — the natural
stopping criterion is 'no more clusters worth processing', not an
arbitrary tool-call budget.
**Tests updated:** test_curator_review_prompt_has_invariants accepts
DO NOT / MUST NOT and drops 'keep' from the required-verb set (the
umbrella-first prompt correctly deemphasizes 'keep' as a first-class
decision label since passive keep-everything is the failure mode
being prevented). Added test_curator_review_prompt_is_umbrella_first
asserting the umbrella framing, class-level thinking, references/
+ templates/ + scripts/ support-file mentions, and the 'use_count
is not evidence of value' pre-emption. Added
test_curator_review_prompt_offers_support_file_actions asserting
skill_manage action=create and action=write_file are both named.
**Live validation on author's setup:**
- Run 1 (old prompt): 3 archives, stopped after surveying — typical passive outcome
- Run 2 (consolidation prompt): 44 archives, 3 patches, surfaced the 50-skill mlops reorg duplicate bug but didn't umbrella
- Run 3 (this prompt): 249 archives + 18 new class-level umbrellas created, reducing agent-created skills from 346 → 118 with every archived skill's content preserved as references/ under its umbrella. Pinned skill untouched. Full report in PR description.
2026-04-28 22:07:02 -07:00
|
|
|
def test_curator_review_prompt_is_umbrella_first():
|
|
|
|
|
"""The curator prompt must push umbrella-building / class-level thinking,
|
|
|
|
|
not pair-level 'are these two the same?' analysis."""
|
|
|
|
|
from agent.curator import CURATOR_REVIEW_PROMPT
|
|
|
|
|
lower = CURATOR_REVIEW_PROMPT.lower()
|
|
|
|
|
# Must frame the task as active umbrella-building, not a passive audit.
|
|
|
|
|
assert "umbrella" in lower, (
|
|
|
|
|
"must use UMBRELLA framing — the class-first abstraction the curator "
|
|
|
|
|
"is designed to produce"
|
|
|
|
|
)
|
|
|
|
|
# Must tell the reviewer not to stop at pair-level distinctness.
|
|
|
|
|
assert "class" in lower, "must reference class-level thinking"
|
|
|
|
|
# Must cover the three consolidation methods explicitly
|
|
|
|
|
assert "references/" in CURATOR_REVIEW_PROMPT, (
|
|
|
|
|
"must name references/ as a demotion target for session-specific content"
|
|
|
|
|
)
|
|
|
|
|
# templates/ and scripts/ make the umbrella a real class-level skill
|
|
|
|
|
assert "templates/" in CURATOR_REVIEW_PROMPT
|
|
|
|
|
assert "scripts/" in CURATOR_REVIEW_PROMPT
|
|
|
|
|
# Must say the counter argument: usage=0 is not a reason to skip
|
|
|
|
|
assert "use_count" in CURATOR_REVIEW_PROMPT or "counter" in lower, (
|
|
|
|
|
"must pre-empt the 'usage counters are zero, I can't judge' bailout"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_curator_review_prompt_offers_support_file_actions():
|
|
|
|
|
"""Support-file demotion (references/templates/scripts) must be one of
|
|
|
|
|
the three consolidation methods, alongside merge-into-existing and
|
|
|
|
|
create-new-umbrella."""
|
|
|
|
|
from agent.curator import CURATOR_REVIEW_PROMPT
|
|
|
|
|
# skill_manage action=write_file is how references/ are added to an
|
|
|
|
|
# existing skill — this is the create-adjacent action the curator needs
|
|
|
|
|
# to demote narrow siblings without touching their SKILL.md.
|
|
|
|
|
assert "write_file" in CURATOR_REVIEW_PROMPT
|
|
|
|
|
# Must offer creating a brand-new umbrella when no existing one fits
|
|
|
|
|
assert "action=create" in CURATOR_REVIEW_PROMPT or "create a new umbrella" in CURATOR_REVIEW_PROMPT.lower()
|
|
|
|
|
|
|
|
|
|
|
2026-04-26 06:17:01 -07:00
|
|
|
|
|
|
|
|
def test_cli_unpin_refuses_bundled_skill(curator_env, capsys):
|
|
|
|
|
"""hermes curator unpin must refuse bundled/hub skills too (matches pin)."""
|
|
|
|
|
from hermes_cli import curator as cli
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "ship-skill")
|
|
|
|
|
(skills_dir / ".bundled_manifest").write_text(
|
|
|
|
|
"ship-skill:abc\n", encoding="utf-8",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
class _A:
|
|
|
|
|
skill = "ship-skill"
|
|
|
|
|
|
|
|
|
|
rc = cli._cmd_unpin(_A())
|
|
|
|
|
captured = capsys.readouterr()
|
|
|
|
|
assert rc == 1
|
|
|
|
|
assert "bundled" in captured.out.lower() or "hub" in captured.out.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_cli_pin_refuses_bundled_skill(curator_env, capsys):
|
|
|
|
|
from hermes_cli import curator as cli
|
|
|
|
|
skills_dir = curator_env["home"] / "skills"
|
|
|
|
|
_write_skill(skills_dir, "ship-skill")
|
|
|
|
|
(skills_dir / ".bundled_manifest").write_text(
|
|
|
|
|
"ship-skill:abc\n", encoding="utf-8",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
class _A:
|
|
|
|
|
skill = "ship-skill"
|
|
|
|
|
|
|
|
|
|
rc = cli._cmd_pin(_A())
|
|
|
|
|
captured = capsys.readouterr()
|
|
|
|
|
assert rc == 1
|
|
|
|
|
assert "bundled" in captured.out.lower() or "hub" in captured.out.lower()
|
fix(curator): unify under auxiliary.curator (hermes model, dashboard) (#17868)
Voscko reported curator.auxiliary.provider/model was advertised in the
docs but ignored — the review fork read only model.provider/default. The
narrow fix would wire the one-off key through, but that leaves curator
as a parallel system: not in `hermes model` → auxiliary picker, not in
the dashboard Models tab, missing per-task base_url/api_key/timeout/
extra_body.
Unify curator with the rest of the aux task system so `hermes model`
and the dashboard configure it like every other aux task.
Four sources of truth updated:
- hermes_cli/config.py — add 'curator' slot to DEFAULT_CONFIG.auxiliary
(timeout=600 since reviews run long), drop the one-off curator.auxiliary
block from DEFAULT_CONFIG.curator.
- hermes_cli/main.py — add ('curator', 'Curator', 'skill-usage review pass')
to _AUX_TASKS so the CLI picker offers it.
- hermes_cli/web_server.py — add 'curator' to _AUX_TASK_SLOTS so the
dashboard REST endpoint accepts it.
- web/src/pages/ModelsPage.tsx — add Curator entry so the dashboard
Models tab renders the task.
agent/curator.py _resolve_review_model() now reads auxiliary.curator
first (canonical), falls back to legacy curator.auxiliary (with an info
log asking users to migrate), then falls back to the main chat model.
Pre-unification users keep working.
Docs updated: docs/user-guide/features/curator.md now points at
`hermes model` → auxiliary → Curator and the dashboard Models tab.
Tests: 6 unit tests on _resolve_review_model (auto default, canonical
slot honored, partial override fallback, legacy fallback with
deprecation log assertion, new-wins-over-legacy, empty-config safety)
plus a cross-registry test that curator is wired into all four sources
of truth. test_aux_tasks_keys_all_exist_in_default_config already
covers the DEFAULT_CONFIG ↔ _AUX_TASKS invariant.
Reported by Voscko on Discord.
2026-04-30 02:46:01 -07:00
|
|
|
|
|
|
|
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
# curator review-model resolution (canonical auxiliary.curator slot)
|
|
|
|
|
#
|
|
|
|
|
# Curator was unified with the rest of the aux task system in Apr 2026 so
|
|
|
|
|
# `hermes model` → auxiliary picker, the dashboard Models tab, and the full
|
|
|
|
|
# per-task config (timeout, base_url, api_key, extra_body) all work for it.
|
|
|
|
|
# Voscko report: curator.auxiliary.{provider,model} was advertised but never
|
|
|
|
|
# read. Fix wires curator through auxiliary.curator with a legacy fallback.
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_review_model_defaults_to_main_when_slot_is_auto(curator_env):
|
|
|
|
|
"""auxiliary.curator absent (or auto/empty) → use main model.provider/model."""
|
|
|
|
|
curator = curator_env["curator"]
|
|
|
|
|
cfg = {
|
|
|
|
|
"model": {"provider": "openrouter", "default": "openai/gpt-5.5"},
|
|
|
|
|
}
|
|
|
|
|
assert curator._resolve_review_model(cfg) == ("openrouter", "openai/gpt-5.5")
|
|
|
|
|
|
|
|
|
|
# Explicit auto/empty slot — still main model.
|
|
|
|
|
cfg["auxiliary"] = {"curator": {"provider": "auto", "model": ""}}
|
|
|
|
|
assert curator._resolve_review_model(cfg) == ("openrouter", "openai/gpt-5.5")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_review_model_honors_auxiliary_curator_slot(curator_env):
|
|
|
|
|
"""auxiliary.curator.{provider,model} fully set → that pair wins."""
|
|
|
|
|
curator = curator_env["curator"]
|
|
|
|
|
cfg = {
|
|
|
|
|
"model": {"provider": "openrouter", "default": "openai/gpt-5.5"},
|
|
|
|
|
"auxiliary": {
|
|
|
|
|
"curator": {
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"model": "openai/gpt-5.4-mini",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
assert curator._resolve_review_model(cfg) == (
|
|
|
|
|
"openrouter", "openai/gpt-5.4-mini",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_review_model_auxiliary_curator_partial_override_falls_back(curator_env):
|
|
|
|
|
"""Only one of slot provider/model set → fall back to the main pair.
|
|
|
|
|
|
|
|
|
|
Prevents half-configured overrides from sending an empty side to
|
|
|
|
|
resolve_runtime_provider.
|
|
|
|
|
"""
|
|
|
|
|
curator = curator_env["curator"]
|
|
|
|
|
base_main = {"provider": "openrouter", "default": "openai/gpt-5.5"}
|
|
|
|
|
|
|
|
|
|
cfg_provider_only = {
|
|
|
|
|
"model": dict(base_main),
|
|
|
|
|
"auxiliary": {"curator": {"provider": "openrouter", "model": ""}},
|
|
|
|
|
}
|
|
|
|
|
assert curator._resolve_review_model(cfg_provider_only) == (
|
|
|
|
|
"openrouter", "openai/gpt-5.5",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
cfg_model_only = {
|
|
|
|
|
"model": dict(base_main),
|
|
|
|
|
"auxiliary": {"curator": {"provider": "auto", "model": "gpt-5.4-mini"}},
|
|
|
|
|
}
|
|
|
|
|
assert curator._resolve_review_model(cfg_model_only) == (
|
|
|
|
|
"openrouter", "openai/gpt-5.5",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_review_model_legacy_curator_auxiliary_still_works(curator_env, caplog):
|
|
|
|
|
"""Pre-unification users set curator.auxiliary.{provider,model} — honor it.
|
|
|
|
|
|
|
|
|
|
Emits a deprecation log line but keeps their config working.
|
|
|
|
|
"""
|
|
|
|
|
curator = curator_env["curator"]
|
|
|
|
|
cfg = {
|
|
|
|
|
"model": {"provider": "openrouter", "default": "openai/gpt-5.5"},
|
|
|
|
|
"curator": {
|
|
|
|
|
"auxiliary": {
|
|
|
|
|
"provider": "openrouter",
|
|
|
|
|
"model": "openai/gpt-5.4-mini",
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
import logging
|
|
|
|
|
with caplog.at_level(logging.INFO, logger="agent.curator"):
|
|
|
|
|
result = curator._resolve_review_model(cfg)
|
|
|
|
|
assert result == ("openrouter", "openai/gpt-5.4-mini")
|
|
|
|
|
assert any(
|
|
|
|
|
"deprecated curator.auxiliary" in rec.message for rec in caplog.records
|
|
|
|
|
), "expected deprecation warning when legacy curator.auxiliary is used"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_review_model_new_slot_wins_over_legacy(curator_env):
|
|
|
|
|
"""When BOTH new and legacy are set, the canonical slot wins."""
|
|
|
|
|
curator = curator_env["curator"]
|
|
|
|
|
cfg = {
|
|
|
|
|
"model": {"provider": "openrouter", "default": "openai/gpt-5.5"},
|
|
|
|
|
"auxiliary": {
|
|
|
|
|
"curator": {"provider": "nous", "model": "new-winner"},
|
|
|
|
|
},
|
|
|
|
|
"curator": {
|
|
|
|
|
"auxiliary": {"provider": "openrouter", "model": "legacy-loser"},
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
assert curator._resolve_review_model(cfg) == ("nous", "new-winner")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_review_model_handles_missing_sections(curator_env):
|
|
|
|
|
"""Missing auxiliary/curator sections never raise — fall back cleanly."""
|
|
|
|
|
curator = curator_env["curator"]
|
|
|
|
|
cfg = {"model": {"provider": "anthropic", "model": "claude-sonnet-4-6"}}
|
|
|
|
|
assert curator._resolve_review_model(cfg) == (
|
|
|
|
|
"anthropic", "claude-sonnet-4-6",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Completely empty config → ("auto", "") — resolve_runtime_provider
|
|
|
|
|
# handles the auto-detection chain from there.
|
|
|
|
|
assert curator._resolve_review_model({}) == ("auto", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def test_curator_slot_is_canonical_aux_task():
|
|
|
|
|
"""Curator must be a first-class slot in every aux-task registry.
|
|
|
|
|
|
|
|
|
|
Four sources of truth, all checked by the shared registry test
|
|
|
|
|
(test_aux_config.py) for the main tasks — this test pins `curator`
|
|
|
|
|
specifically so the unification doesn't silently regress.
|
|
|
|
|
"""
|
|
|
|
|
from hermes_cli.config import DEFAULT_CONFIG
|
|
|
|
|
from hermes_cli.main import _AUX_TASKS
|
|
|
|
|
from hermes_cli.web_server import _AUX_TASK_SLOTS
|
|
|
|
|
|
|
|
|
|
# 1. DEFAULT_CONFIG.auxiliary — schema source
|
|
|
|
|
assert "curator" in DEFAULT_CONFIG["auxiliary"], \
|
|
|
|
|
"curator missing from DEFAULT_CONFIG['auxiliary']"
|
|
|
|
|
slot = DEFAULT_CONFIG["auxiliary"]["curator"]
|
|
|
|
|
assert slot["provider"] == "auto"
|
|
|
|
|
assert slot["model"] == ""
|
|
|
|
|
assert slot["timeout"] > 0, "curator timeout should be set (reviews run long)"
|
|
|
|
|
|
|
|
|
|
# 2. hermes_cli/main.py _AUX_TASKS — CLI picker
|
|
|
|
|
aux_keys = {k for k, _name, _desc in _AUX_TASKS}
|
|
|
|
|
assert "curator" in aux_keys, "curator missing from _AUX_TASKS (CLI picker)"
|
|
|
|
|
|
|
|
|
|
# 3. hermes_cli/web_server.py _AUX_TASK_SLOTS — REST API allowlist
|
|
|
|
|
assert "curator" in _AUX_TASK_SLOTS, \
|
|
|
|
|
"curator missing from _AUX_TASK_SLOTS (dashboard REST API)"
|
|
|
|
|
|
|
|
|
|
# 4. web/src/pages/ModelsPage.tsx is checked at build time; the tsx
|
|
|
|
|
# array and this tuple share a ``Must match _AUX_TASK_SLOTS`` comment.
|