diff --git a/run_agent.py b/run_agent.py index c0dd76596d..7b23b5b41c 100644 --- a/run_agent.py +++ b/run_agent.py @@ -3109,13 +3109,28 @@ class AIAgent: ) _SKILL_REVIEW_PROMPT = ( - "Review the conversation above and consider saving or updating a skill if appropriate.\n\n" - "Focus on: was a non-trivial approach used to complete a task that required trial " - "and error, or changing course due to experiential findings along the way, or did " - "the user expect or desire a different method or outcome?\n\n" - "If a relevant skill already exists, update it with what you learned. " - "Otherwise, create a new skill if the approach is reusable.\n" - "If nothing is worth saving, just say 'Nothing to save.' and stop." + "Review the conversation above and consider whether a skill should be saved or updated.\n\n" + "Work in this order — do not skip steps:\n\n" + "1. SURVEY the existing skill landscape first. Call skills_list to see what you " + "have. If anything looks potentially relevant, skill_view it before deciding. " + "You are looking for the CLASS of task that just happened, not the exact task. " + "Example: a successful Tauri build is in the class \"desktop app build " + "troubleshooting\", not \"fix my specific Tauri error today\".\n\n" + "2. THINK CLASS-FIRST. What general pattern of task did the user just complete? " + "What conditions will trigger this pattern again? Describe the class in one " + "sentence before looking at what to save.\n\n" + "3. PREFER GENERALIZING AN EXISTING SKILL over creating a new one. If a skill " + "already covers the class — even partially — update it (skill_manage patch) " + "with the new insight. Broaden its \"when to use\" trigger if needed.\n\n" + "4. ONLY CREATE A NEW SKILL when no existing skill reasonably covers the class. " + "When you create one, name and scope it at the class level " + "(\"react-i18n-setup\", not \"add-i18n-to-my-dashboard-app\"). The trigger " + "section must describe the class of situations, not this one session.\n\n" + "5. If you notice two existing skills that overlap, note it in your response " + "so a future review can consolidate them. Do not consolidate now unless the " + "overlap is obvious and low-risk.\n\n" + "Only act when something is genuinely worth saving. " + "If nothing stands out, just say 'Nothing to save.' and stop." ) _COMBINED_REVIEW_PROMPT = ( @@ -3125,9 +3140,16 @@ class AIAgent: "about how you should behave, their work style, or ways they want you to operate? " "If so, save using the memory tool.\n\n" "**Skills**: Was a non-trivial approach used to complete a task that required trial " - "and error, or changing course due to experiential findings along the way, or did " - "the user expect or desire a different method or outcome? If a relevant skill " - "already exists, update it. Otherwise, create a new one if the approach is reusable.\n\n" + "and error, changing course due to experiential findings, or a different method " + "or outcome than the user expected? If so, work in this order:\n" + " a. SURVEY existing skills first (skills_list, then skill_view on candidates).\n" + " b. Identify the CLASS of task, not the specific task " + "(\"desktop app build troubleshooting\", not \"fix my Tauri error\").\n" + " c. PREFER UPDATING/GENERALIZING an existing skill that covers the class.\n" + " d. ONLY CREATE A NEW SKILL if no existing one covers the class. Scope at " + "the class level, not this one session.\n" + " e. If you notice overlapping skills during the survey, note it so a future " + "review can consolidate them.\n\n" "Only act if there's something genuinely worth saving. " "If nothing stands out, just say 'Nothing to save.' and stop." ) diff --git a/tests/run_agent/test_review_prompt_class_first.py b/tests/run_agent/test_review_prompt_class_first.py new file mode 100644 index 0000000000..4a7fed1d74 --- /dev/null +++ b/tests/run_agent/test_review_prompt_class_first.py @@ -0,0 +1,78 @@ +"""Behavior tests for the class-first skill review prompts. + +The skill review / combined review prompts steer the background review agent +toward generalizing existing skills rather than accumulating near-duplicates. +These tests assert the behavioral *instructions* are present — they do NOT +snapshot the full prompt text (change-detector). +""" + +from run_agent import AIAgent + + +def test_skill_review_prompt_instructs_survey_first(): + """Prompt must tell the reviewer to list existing skills before deciding.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "skills_list" in prompt, "must instruct the reviewer to call skills_list" + assert "skill_view" in prompt, "must instruct the reviewer to skill_view candidates" + assert "SURVEY" in prompt, "must name the survey step explicitly" + + +def test_skill_review_prompt_is_class_first(): + """Prompt must steer toward the CLASS of task, not the specific task.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "CLASS" in prompt, "must tell the reviewer to think about the task class" + assert "class level" in prompt, "must anchor naming at the class level" + + +def test_skill_review_prompt_prefers_updating_existing(): + """Prompt must prefer generalizing an existing skill over creating a new one.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "PREFER GENERALIZING" in prompt or "PREFER UPDATING" in prompt, ( + "must state the update-over-create preference" + ) + assert "ONLY CREATE A NEW SKILL" in prompt, ( + "must gate new-skill creation behind a last-resort clause" + ) + + +def test_skill_review_prompt_flags_overlap_for_followup(): + """Prompt must ask the reviewer to note overlapping skills for future review.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "overlap" in prompt.lower(), "must mention the overlap-flagging protocol" + + +def test_skill_review_prompt_preserves_opt_out_clause(): + """The 'Nothing to save.' escape clause must remain.""" + prompt = AIAgent._SKILL_REVIEW_PROMPT + assert "Nothing to save." in prompt + + +def test_combined_review_prompt_keeps_memory_section(): + """Combined prompt must still cover memory review.""" + prompt = AIAgent._COMBINED_REVIEW_PROMPT + assert "**Memory**" in prompt + assert "memory tool" in prompt + + +def test_combined_review_prompt_skills_section_is_class_first(): + """The **Skills** half of the combined prompt must follow the same protocol.""" + prompt = AIAgent._COMBINED_REVIEW_PROMPT + assert "**Skills**" in prompt + assert "SURVEY" in prompt + assert "CLASS" in prompt + assert "skills_list" in prompt + assert "ONLY CREATE A NEW SKILL" in prompt + + +def test_combined_review_prompt_preserves_opt_out_clause(): + prompt = AIAgent._COMBINED_REVIEW_PROMPT + assert "Nothing to save." in prompt + + +def test_memory_review_prompt_unchanged_in_structure(): + """Memory-only review prompt stays focused on user facts — not touched by this change.""" + prompt = AIAgent._MEMORY_REVIEW_PROMPT + # Guardrails: the memory-only prompt must NOT mention skills/surveys. + assert "skills_list" not in prompt + assert "SURVEY" not in prompt + assert "memory tool" in prompt