Merge PR #425 : feat(#417 ): add pokemon-player skill

Authored by teyrebaz33. Closes #417. Adds pokemon-player skill for playing Pokemon via headless emulation using the pokemon-agent package (NousResearch/pokemon-agent).
feat: find-nearby skill and Telegram location support
2026-05-04 09:47:54 +08:00 · 2026-03-09 05:42:58 -07:00 · 2026-03-09 05:31:10 -07:00 · 2026-03-09 05:08:01 -07:00 · 2026-03-09 05:07:53 -07:00 · 2026-03-09 04:58:27 -07:00
235 changed files with 2773 additions and 628 deletions
--- a/agent/prompt_builder.py
+++ b/agent/prompt_builder.py
@@ -195,6 +195,8 @@ def build_skills_system_prompt() -> str:
    # Collect skills with descriptions, grouped by category
    # Each entry: (skill_name, description)
    # Supports sub-categories: skills/mlops/training/axolotl/SKILL.md
    # → category "mlops/training", skill "axolotl"
    skills_by_category: dict[str, list[tuple[str, str]]] = {}
    for skill_file in skills_dir.rglob("SKILL.md"):
        # Skip skills incompatible with the current OS platform
@@ -203,8 +205,13 @@ def build_skills_system_prompt() -> str:
        rel_path = skill_file.relative_to(skills_dir)
        parts = rel_path.parts
        if len(parts) >= 2:
-            category = parts[0]
+            # Category is everything between skills_dir and the skill folder
            # e.g. parts = ("mlops", "training", "axolotl", "SKILL.md")
            #   → category = "mlops/training", skill_name = "axolotl"
            # e.g. parts = ("github", "github-auth", "SKILL.md")
            #   → category = "github", skill_name = "github-auth"
            skill_name = parts[-2]
            category = "/".join(parts[:-2]) if len(parts) > 2 else parts[0]
        else:
            category = "general"
            skill_name = skill_file.parent.name
@@ -215,9 +222,11 @@ def build_skills_system_prompt() -> str:
        return ""
    # Read category-level descriptions from DESCRIPTION.md
    # Checks both the exact category path and parent directories
    category_descriptions = {}
    for category in skills_by_category:
-        desc_file = skills_dir / category / "DESCRIPTION.md"
+        cat_path = Path(category)
        desc_file = skills_dir / cat_path / "DESCRIPTION.md"
        if desc_file.exists():
            try:
                content = desc_file.read_text(encoding="utf-8")
--- a/cli-config.yaml.example
+++ b/cli-config.yaml.example
@@ -555,6 +555,21 @@ toolsets:
 #     args: ["-y", "@modelcontextprotocol/server-github"]
 #     env:
 #       GITHUB_PERSONAL_ACCESS_TOKEN: "ghp_..."
 #
 # Sampling (server-initiated LLM requests) — enabled by default.
 # Per-server config under the 'sampling' key:
 #   analysis:
 #     command: npx
 #     args: ["-y", "analysis-server"]
 #     sampling:
 #       enabled: true           # default: true
 #       model: "gemini-3-flash" # override model (optional)
 #       max_tokens_cap: 4096    # max tokens per request
 #       timeout: 30             # LLM call timeout (seconds)
 #       max_rpm: 10             # max requests per minute
 #       allowed_models: []      # model whitelist (empty = all)
 #       max_tool_rounds: 5      # tool loop limit (0 = disable)
 #       log_level: "info"       # audit verbosity
 # =============================================================================
 # Voice Transcription (Speech-to-Text)
--- a/gateway/platforms/base.py
+++ b/gateway/platforms/base.py
@@ -252,6 +252,7 @@ def cleanup_document_cache(max_age_hours: int = 24) -> int:
 class MessageType(Enum):
    """Types of incoming messages."""
    TEXT = "text"
    LOCATION = "location"
    PHOTO = "photo"
    VIDEO = "video"
    AUDIO = "audio"
--- a/gateway/platforms/telegram.py
+++ b/gateway/platforms/telegram.py
@@ -132,6 +132,10 @@ class TelegramAdapter(BasePlatformAdapter):
                filters.COMMAND,
                self._handle_command
            ))
            self._app.add_handler(TelegramMessageHandler(
                filters.LOCATION | getattr(filters, "VENUE", filters.LOCATION),
                self._handle_location_message
            ))
            self._app.add_handler(TelegramMessageHandler(
                filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
                self._handle_media_message
@@ -546,6 +550,41 @@ class TelegramAdapter(BasePlatformAdapter):
        event = self._build_message_event(update.message, MessageType.COMMAND)
        await self.handle_message(event)
    async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming location/venue pin messages."""
        if not update.message:
            return
        msg = update.message
        venue = getattr(msg, "venue", None)
        location = getattr(venue, "location", None) if venue else getattr(msg, "location", None)
        if not location:
            return
        lat = getattr(location, "latitude", None)
        lon = getattr(location, "longitude", None)
        if lat is None or lon is None:
            return
        # Build a text message with coordinates and context
        parts = ["[The user shared a location pin.]"]
        if venue:
            title = getattr(venue, "title", None)
            address = getattr(venue, "address", None)
            if title:
                parts.append(f"Venue: {title}")
            if address:
                parts.append(f"Address: {address}")
        parts.append(f"latitude: {lat}")
        parts.append(f"longitude: {lon}")
        parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
        parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
        event = self._build_message_event(msg, MessageType.LOCATION)
        event.text = "\n".join(parts)
        await self.handle_message(event)
    async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
        """Handle incoming media messages, downloading images to local cache."""
        if not update.message:
--- a/gateway/run.py
+++ b/gateway/run.py
@@ -1449,6 +1449,11 @@ class GatewayRunner:
            except Exception:
                current_provider = "openrouter"
        # Detect custom endpoint: provider resolved to openrouter but a custom
        # base URL is configured — the user set up a custom endpoint.
        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
            current_provider = "custom"
        if not args:
            provider_label = _PROVIDER_LABELS.get(current_provider, current_provider)
            lines = [
@@ -1575,6 +1580,10 @@ class GatewayRunner:
            except Exception:
                current_provider = "openrouter"
        # Detect custom endpoint
        if current_provider == "openrouter" and os.getenv("OPENAI_BASE_URL", "").strip():
            current_provider = "custom"
        current_label = _PROVIDER_LABELS.get(current_provider, current_provider)
        lines = [
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -761,9 +761,39 @@ def cmd_model(args):
        ("kimi-coding", "Kimi / Moonshot (Moonshot AI direct API)"),
        ("minimax", "MiniMax (global direct API)"),
        ("minimax-cn", "MiniMax China (domestic direct API)"),
        ("custom", "Custom endpoint (self-hosted / VLLM / etc.)"),
    ]
    # Add user-defined custom providers from config.yaml
    custom_providers_cfg = config.get("custom_providers") or []
    _custom_provider_map = {}  # key → {name, base_url, api_key}
    if isinstance(custom_providers_cfg, list):
        for entry in custom_providers_cfg:
            if not isinstance(entry, dict):
                continue
            name = entry.get("name", "").strip()
            base_url = entry.get("base_url", "").strip()
            if not name or not base_url:
                continue
            # Generate a stable key from the name
            key = "custom:" + name.lower().replace(" ", "-")
            short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
            saved_model = entry.get("model", "")
            model_hint = f" — {saved_model}" if saved_model else ""
            providers.append((key, f"{name} ({short_url}){model_hint}"))
            _custom_provider_map[key] = {
                "name": name,
                "base_url": base_url,
                "api_key": entry.get("api_key", ""),
                "model": saved_model,
            }
    # Always add the manual custom endpoint option last
    providers.append(("custom", "Custom endpoint (enter URL manually)"))
    # Add removal option if there are saved custom providers
    if _custom_provider_map:
        providers.append(("remove-custom", "Remove a saved custom provider"))
    # Reorder so the active provider is at the top
    known_keys = {k for k, _ in providers}
    active_key = active if active in known_keys else "custom"
@@ -791,6 +821,10 @@ def cmd_model(args):
        _model_flow_openai_codex(config, current_model)
    elif selected_provider == "custom":
        _model_flow_custom(config)
    elif selected_provider.startswith("custom:") and selected_provider in _custom_provider_map:
        _model_flow_named_custom(config, _custom_provider_map[selected_provider])
    elif selected_provider == "remove-custom":
        _remove_custom_provider(config)
    elif selected_provider in ("zai", "kimi-coding", "minimax", "minimax-cn"):
        _model_flow_api_key_provider(config, selected_provider, current_model)
@@ -1006,7 +1040,11 @@ def _model_flow_openai_codex(config, current_model=""):
 def _model_flow_custom(config):
-    """Custom endpoint: collect URL, API key, and model name."""
+    """Custom endpoint: collect URL, API key, and model name.
    Automatically saves the endpoint to ``custom_providers`` in config.yaml
    so it appears in the provider menu on subsequent runs.
    """
    from hermes_cli.auth import _save_model_choice, deactivate_provider
    from hermes_cli.config import get_env_value, save_env_value, load_config, save_config
@@ -1038,6 +1076,8 @@ def _model_flow_custom(config):
        print(f"Invalid URL: {effective_url} (must start with http:// or https://)")
        return
    effective_key = api_key or current_key
    if base_url:
        save_env_value("OPENAI_BASE_URL", base_url)
    if api_key:
@@ -1050,7 +1090,7 @@ def _model_flow_custom(config):
        cfg = load_config()
        model = cfg.get("model")
        if isinstance(model, dict):
-            model["provider"] = "auto"
+            model["provider"] = "custom"
            model["base_url"] = effective_url
        save_config(cfg)
        deactivate_provider()
@@ -1061,6 +1101,223 @@ def _model_flow_custom(config):
            deactivate_provider()
        print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")
    # Auto-save to custom_providers so it appears in the menu next time
    _save_custom_provider(effective_url, effective_key, model_name or "")
 def _save_custom_provider(base_url, api_key="", model=""):
    """Save a custom endpoint to custom_providers in config.yaml.
    Deduplicates by base_url — if the URL already exists, updates the
    model name but doesn't add a duplicate entry.
    Auto-generates a display name from the URL hostname.
    """
    from hermes_cli.config import load_config, save_config
    cfg = load_config()
    providers = cfg.get("custom_providers") or []
    if not isinstance(providers, list):
        providers = []
    # Check if this URL is already saved — update model if so
    for entry in providers:
        if isinstance(entry, dict) and entry.get("base_url", "").rstrip("/") == base_url.rstrip("/"):
            if model and entry.get("model") != model:
                entry["model"] = model
                cfg["custom_providers"] = providers
                save_config(cfg)
            return  # already saved, updated model if needed
    # Auto-generate a name from the URL
    import re
    clean = base_url.replace("https://", "").replace("http://", "").rstrip("/")
    # Remove /v1 suffix for cleaner names
    clean = re.sub(r"/v1/?$", "", clean)
    # Use hostname:port as the name
    name = clean.split("/")[0]
    # Capitalize for readability
    if "localhost" in name or "127.0.0.1" in name:
        name = f"Local ({name})"
    elif "runpod" in name.lower():
        name = f"RunPod ({name})"
    else:
        name = name.capitalize()
    entry = {"name": name, "base_url": base_url}
    if api_key:
        entry["api_key"] = api_key
    if model:
        entry["model"] = model
    providers.append(entry)
    cfg["custom_providers"] = providers
    save_config(cfg)
    print(f"  💾 Saved to custom providers as \"{name}\" (edit in config.yaml)")
 def _remove_custom_provider(config):
    """Let the user remove a saved custom provider from config.yaml."""
    from hermes_cli.config import load_config, save_config
    cfg = load_config()
    providers = cfg.get("custom_providers") or []
    if not isinstance(providers, list) or not providers:
        print("No custom providers configured.")
        return
    print("Remove a custom provider:\n")
    choices = []
    for entry in providers:
        if isinstance(entry, dict):
            name = entry.get("name", "unnamed")
            url = entry.get("base_url", "")
            short_url = url.replace("https://", "").replace("http://", "").rstrip("/")
            choices.append(f"{name} ({short_url})")
        else:
            choices.append(str(entry))
    choices.append("Cancel")
    try:
        from simple_term_menu import TerminalMenu
        menu = TerminalMenu(
            [f"  {c}" for c in choices], cursor_index=0,
            menu_cursor="-> ", menu_cursor_style=("fg_red", "bold"),
            menu_highlight_style=("fg_red",),
            cycle_cursor=True, clear_screen=False,
            title="Select provider to remove:",
        )
        idx = menu.show()
        print()
    except (ImportError, NotImplementedError):
        for i, c in enumerate(choices, 1):
            print(f"  {i}. {c}")
        print()
        try:
            val = input(f"Choice [1-{len(choices)}]: ").strip()
            idx = int(val) - 1 if val else None
        except (ValueError, KeyboardInterrupt, EOFError):
            idx = None
    if idx is None or idx >= len(providers):
        print("No change.")
        return
    removed = providers.pop(idx)
    cfg["custom_providers"] = providers
    save_config(cfg)
    removed_name = removed.get("name", "unnamed") if isinstance(removed, dict) else str(removed)
    print(f"✅ Removed \"{removed_name}\" from custom providers.")
 def _model_flow_named_custom(config, provider_info):
    """Handle a named custom provider from config.yaml custom_providers list.
    If the entry has a saved model name, activates it immediately.
    Otherwise probes the endpoint's /models API to let the user pick one.
    """
    from hermes_cli.auth import _save_model_choice, deactivate_provider
    from hermes_cli.config import save_env_value, load_config, save_config
    from hermes_cli.models import fetch_api_models
    name = provider_info["name"]
    base_url = provider_info["base_url"]
    api_key = provider_info.get("api_key", "")
    saved_model = provider_info.get("model", "")
    # If a model is saved, just activate immediately — no probing needed
    if saved_model:
        save_env_value("OPENAI_BASE_URL", base_url)
        if api_key:
            save_env_value("OPENAI_API_KEY", api_key)
        _save_model_choice(saved_model)
        cfg = load_config()
        model = cfg.get("model")
        if isinstance(model, dict):
            model["provider"] = "custom"
            model["base_url"] = base_url
        save_config(cfg)
        deactivate_provider()
        print(f"✅ Switched to: {saved_model}")
        print(f"   Provider: {name} ({base_url})")
        return
    # No saved model — probe endpoint and let user pick
    print(f"  Provider: {name}")
    print(f"  URL:      {base_url}")
    print()
    print("No model saved for this provider. Fetching available models...")
    models = fetch_api_models(api_key, base_url, timeout=8.0)
    if models:
        print(f"Found {len(models)} model(s):\n")
        try:
            from simple_term_menu import TerminalMenu
            menu_items = [f"  {m}" for m in models] + ["  Cancel"]
            menu = TerminalMenu(
                menu_items, cursor_index=0,
                menu_cursor="-> ", menu_cursor_style=("fg_green", "bold"),
                menu_highlight_style=("fg_green",),
                cycle_cursor=True, clear_screen=False,
                title=f"Select model from {name}:",
            )
            idx = menu.show()
            print()
            if idx is None or idx >= len(models):
                print("Cancelled.")
                return
            model_name = models[idx]
        except (ImportError, NotImplementedError):
            for i, m in enumerate(models, 1):
                print(f"  {i}. {m}")
            print(f"  {len(models) + 1}. Cancel")
            print()
            try:
                val = input(f"Choice [1-{len(models) + 1}]: ").strip()
                if not val:
                    print("Cancelled.")
                    return
                idx = int(val) - 1
                if idx < 0 or idx >= len(models):
                    print("Cancelled.")
                    return
                model_name = models[idx]
            except (ValueError, KeyboardInterrupt, EOFError):
                print("\nCancelled.")
                return
    else:
        print("Could not fetch models from endpoint. Enter model name manually.")
        try:
            model_name = input("Model name: ").strip()
        except (KeyboardInterrupt, EOFError):
            print("\nCancelled.")
            return
        if not model_name:
            print("No model specified. Cancelled.")
            return
    # Activate and save the model to the custom_providers entry
    save_env_value("OPENAI_BASE_URL", base_url)
    if api_key:
        save_env_value("OPENAI_API_KEY", api_key)
    _save_model_choice(model_name)
    cfg = load_config()
    model = cfg.get("model")
    if isinstance(model, dict):
        model["provider"] = "custom"
        model["base_url"] = base_url
    save_config(cfg)
    deactivate_provider()
    # Save model name to the custom_providers entry for next time
    _save_custom_provider(base_url, api_key, model_name)
    print(f"\n✅ Model set to: {model_name}")
    print(f"   Provider: {name} ({base_url})")
 # Curated model lists for direct API-key providers
 _PROVIDER_MODELS = {
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -63,7 +63,7 @@ _PROVIDER_LABELS = {
    "kimi-coding": "Kimi / Moonshot",
    "minimax": "MiniMax",
    "minimax-cn": "MiniMax (China)",
-    "custom": "custom endpoint",
+    "custom": "Custom endpoint",
 }
 _PROVIDER_ALIASES = {
--- a/hermes_cli/setup.py
+++ b/hermes_cli/setup.py
@@ -632,6 +632,29 @@ def setup_model_provider(config: dict):
            save_env_value("OPENAI_BASE_URL", "")
            save_env_value("OPENAI_API_KEY", "")
        # Update config.yaml and deactivate any OAuth provider so the
        # resolver doesn't keep returning the old provider (e.g. Codex).
        try:
            from hermes_cli.auth import deactivate_provider
            deactivate_provider()
        except Exception:
            pass
        import yaml
        config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml"
        try:
            disk_cfg = {}
            if config_path.exists():
                disk_cfg = yaml.safe_load(config_path.read_text()) or {}
            model_section = disk_cfg.get("model", {})
            if isinstance(model_section, str):
                model_section = {"default": model_section}
            model_section["provider"] = "openrouter"
            model_section.pop("base_url", None)  # OpenRouter uses default URL
            disk_cfg["model"] = model_section
            config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
        except Exception as e:
            logger.debug("Could not save provider to config.yaml: %s", e)
    elif provider_idx == 3:  # Custom endpoint
        selected_provider = "custom"
        print()
@@ -659,6 +682,28 @@ def setup_model_provider(config: dict):
        if model_name:
            config['model'] = model_name
            save_env_value("LLM_MODEL", model_name)
        # Save provider and base_url to config.yaml so the gateway and CLI
        # both resolve the correct provider without relying on env-var heuristics.
        if base_url:
            import yaml
            config_path = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "config.yaml"
            try:
                disk_cfg = {}
                if config_path.exists():
                    disk_cfg = yaml.safe_load(config_path.read_text()) or {}
                model_section = disk_cfg.get("model", {})
                if isinstance(model_section, str):
                    model_section = {"default": model_section}
                model_section["provider"] = "custom"
                model_section["base_url"] = base_url.rstrip("/")
                if model_name:
                    model_section["default"] = model_name
                disk_cfg["model"] = model_section
                config_path.write_text(yaml.safe_dump(disk_cfg, sort_keys=False))
            except Exception as e:
                logger.debug("Could not save provider to config.yaml: %s", e)
        print_success("Custom endpoint configured")
    elif provider_idx == 4:  # Z.AI / GLM
--- a/run_agent.py
+++ b/run_agent.py
@@ -3834,6 +3834,27 @@ class AIAgent:
                else:
                    assistant_message = response.choices[0].message
                # Normalize content to string — some OpenAI-compatible servers
                # (llama-server, etc.) return content as a dict or list instead
                # of a plain string, which crashes downstream .strip() calls.
                if assistant_message.content is not None and not isinstance(assistant_message.content, str):
                    raw = assistant_message.content
                    if isinstance(raw, dict):
                        assistant_message.content = raw.get("text", "") or raw.get("content", "") or json.dumps(raw)
                    elif isinstance(raw, list):
                        # Multimodal content list — extract text parts
                        parts = []
                        for part in raw:
                            if isinstance(part, str):
                                parts.append(part)
                            elif isinstance(part, dict) and part.get("type") == "text":
                                parts.append(part.get("text", ""))
                            elif isinstance(part, dict) and "text" in part:
                                parts.append(str(part["text"]))
                        assistant_message.content = "\n".join(parts)
                    else:
                        assistant_message.content = str(raw)
                # Handle assistant response
                if assistant_message.content and not self.quiet_mode:
                    print(f"{self.log_prefix}🤖 Assistant: {assistant_message.content[:100]}{'...' if len(assistant_message.content) > 100 else ''}")
--- a/skills/creative/DESCRIPTION.md
+++ b/skills/creative/DESCRIPTION.md
@@ -0,0 +1,3 @@
 ---
 description: Creative content generation — ASCII art, hand-drawn style diagrams, and visual design tools.
 ---
--- a/skills/diagramming/excalidraw/SKILL.md
+++ b/skills/diagramming/excalidraw/SKILL.md
--- a/skills/diagramming/excalidraw/references/colors.md
+++ b/skills/diagramming/excalidraw/references/colors.md
--- a/skills/diagramming/excalidraw/references/dark-mode.md
+++ b/skills/diagramming/excalidraw/references/dark-mode.md
--- a/skills/diagramming/excalidraw/references/examples.md
+++ b/skills/diagramming/excalidraw/references/examples.md
--- a/skills/diagramming/excalidraw/scripts/upload.py
+++ b/skills/diagramming/excalidraw/scripts/upload.py
--- a/skills/gaming/pokemon-player/SKILL.md
+++ b/skills/gaming/pokemon-player/SKILL.md
@@ -0,0 +1,161 @@
 ---
 name: pokemon-player
 description: Play Pokémon games autonomously via headless emulation. Starts a game server, reads structured game state from RAM, makes strategic decisions, and sends button inputs — all from the terminal.
 tags: [gaming, pokemon, emulator, pyboy, gameplay, gameboy]
 ---
 # Pokémon Player
 Play Pokémon games via headless emulation using the `pokemon-agent` package.
 ## When to Use
 - User says "play pokemon", "start pokemon", "pokemon game"
 - User asks about Pokemon Red, Blue, Yellow, FireRed, etc.
 - User wants to watch an AI play Pokemon
 - User references a ROM file (.gb, .gbc, .gba)
 ## First-Time Setup
 ### 1. Install the package
 ```bash
 pip install pokemon-agent[dashboard] pyboy
 ```
 ### 2. Get the ROM
 Ask the user for their ROM file path. Do NOT attempt to download ROMs.
 ### 3. Start the game server
 ```bash
 pokemon-agent serve --rom <ROM_PATH> --port 8765 &
 ```
 Wait 3 seconds, then verify:
 ```bash
 curl -s http://localhost:8765/health
 ```
 ## The Gameplay Loop
 ### Step 1: OBSERVE
 ```bash
 curl -s http://localhost:8765/state
 ```
 ### Step 2: ORIENT
 - Dialog active → advance text
 - In battle → fight
 - Party hurt → heal
 - Near objective → navigate
 ### Step 3: DECIDE
 Priority order:
 1. If dialog active → a_until_dialog_end
 2. If in battle → choose best move
 3. If any Pokemon <20% HP → Pokémon Center
 4. If near story objective → navigate to it
 5. If underleveled → train in grass
 6. Otherwise → explore
 ### Step 4: ACT
 ```bash
 curl -s -X POST http://localhost:8765/action \
  -H "Content-Type: application/json" \
  -d '{"actions": ["walk_up", "walk_up", "press_a"]}'
 ```
 Action reference:
 - press_a — confirm, talk, select
 - press_b — cancel, close menu
 - press_start — open game menu
 - walk_up/down/left/right — move one tile
 - a_until_dialog_end — advance all dialog
 - wait_60 — wait ~1 second
 ### Step 5: VERIFY
 Check state_after in the response. If stuck 3+ turns:
 1. Press B several times
 2. Try different directions
 3. Take screenshot and use vision_analyze
 4. Load last save if truly stuck
 ### Step 6: RECORD
 ```
 memory add: PKM:OBJECTIVE: Heading to Pewter City to challenge Brock
 memory add: PKM:PROGRESS: Got Squirtle, Got Pokedex, → Pewter City
 ```
 ### Step 7: SAVE
 Save every 20-30 turns and ALWAYS before gym battles:
 ```bash
 curl -s -X POST http://localhost:8765/save \
  -H "Content-Type: application/json" \
  -d '{"name": "before_brock"}'
 ```
 ## Battle Strategy
 ### Decision Tree
 1. Want to catch? → Weaken then throw Poké Ball
 2. Wild you don't need? → RUN
 3. Type advantage? → Use super-effective move
 4. No advantage? → Use strongest STAB move
 5. Low HP? → Switch or use Potion
 ### Type Chart
 - Water beats Fire, Ground, Rock
 - Fire beats Grass, Bug, Ice
 - Grass beats Water, Ground, Rock
 - Electric beats Water, Flying
 - Ground beats Fire, Electric, Rock, Poison
 - Psychic beats Fighting, Poison (dominant in Gen 1!)
 ### Gen 1 Quirks
 - Special stat is both offense AND defense for special moves
 - Psychic is overpowered (Ghost moves bugged)
 - Critical hits based on Speed stat
 - Wrap/Bind prevent opponent from acting
 ## Memory Conventions
 | Prefix | Purpose | Example |
 |--------|---------|---------|
 | PKM:OBJECTIVE | Current goal | Defeat Brock in Pewter City |
 | PKM:MAP | Navigation knowledge | Viridian Forest: go north |
 | PKM:STRATEGY | Battle/team plans | Need Grass type before Misty |
 | PKM:PROGRESS | Milestone tracker | ✓ Boulder Badge → Cascade Badge |
 | PKM:STUCK | Stuck situations | Got stuck in Cerulean Cave |
 | PKM:TEAM | Team notes | Squirtle is Water/Ice coverage |
 ## Progression Milestones
 - ☐ Choose starter
 - ☐ Deliver Oak's Parcel → receive Pokédex
 - ☐ Boulder Badge — Brock (Rock) → use Water/Grass
 - ☐ Cascade Badge — Misty (Water) → use Grass/Electric
 - ☐ Thunder Badge — Lt. Surge (Electric) → use Ground
 - ☐ Rainbow Badge — Erika (Grass) → use Fire/Ice/Flying
 - ☐ Soul Badge — Koga (Poison) → use Ground/Psychic
 - ☐ Marsh Badge — Sabrina (Psychic)
 - ☐ Volcano Badge — Blaine (Fire) → use Water/Ground
 - ☐ Earth Badge — Giovanni (Ground) → use Water/Grass/Ice
 - ☐ Elite Four → Champion!
 ## Stopping Play
 1. Save the game:
 ```bash
 curl -s -X POST http://localhost:8765/save \
  -d '{"name": "session_end"}'
 ```
 2. Update memory with progress
 3. Tell user: "Game saved! Say 'play pokemon' to resume."
 4. Kill the background server process
 ## Dashboard
 If `pokemon-agent[dashboard]` is installed, open:
 http://localhost:8765/dashboard
 Live features: game screen, AI reasoning stream, team status, action log.
 ## Pitfalls
 - NEVER download or provide ROM files — always ask the user
 - Don't send more than 15 actions per /action call
 - Always wait for dialog to clear before moving
 - Save BEFORE gym battles
 - Take screenshots sparingly — they cost vision tokens
 - Verify server is running with /health before any commands
--- a/skills/leisure/find-nearby/SKILL.md
+++ b/skills/leisure/find-nearby/SKILL.md
@@ -0,0 +1,69 @@
 ---
 name: find-nearby
 description: Find nearby places (restaurants, cafes, bars, pharmacies, etc.) using OpenStreetMap. Works with coordinates, addresses, cities, zip codes, or Telegram location pins. No API keys needed.
 version: 1.0.0
 metadata:
  hermes:
    tags: [location, maps, nearby, places, restaurants, local]
    related_skills: []
 ---
 # Find Nearby — Local Place Discovery
 Find restaurants, cafes, bars, pharmacies, and other places near any location. Uses OpenStreetMap (free, no API keys). Works with:
 - **Coordinates** from Telegram location pins (latitude/longitude in conversation)
 - **Addresses** ("near 123 Main St, Springfield")
 - **Cities** ("restaurants in downtown Austin")
 - **Zip codes** ("pharmacies near 90210")
 - **Landmarks** ("cafes near Times Square")
 ## Quick Reference
 ```bash
 # By coordinates (from Telegram location pin or user-provided)
 python3 SKILL_DIR/scripts/find_nearby.py --lat <LAT> --lon <LON> --type restaurant --radius 1500
 # By address, city, or landmark (auto-geocoded)
 python3 SKILL_DIR/scripts/find_nearby.py --near "Times Square, New York" --type cafe
 # Multiple place types
 python3 SKILL_DIR/scripts/find_nearby.py --near "downtown austin" --type restaurant --type bar --limit 10
 # JSON output
 python3 SKILL_DIR/scripts/find_nearby.py --near "90210" --type pharmacy --json
 ```
 ### Parameters
 | Flag | Description | Default |
 |------|-------------|---------|
 | `--lat`, `--lon` | Exact coordinates | — |
 | `--near` | Address, city, zip, or landmark (geocoded) | — |
 | `--type` | Place type (repeatable for multiple) | restaurant |
 | `--radius` | Search radius in meters | 1500 |
 | `--limit` | Max results | 15 |
 | `--json` | Machine-readable JSON output | off |
 ### Common Place Types
 `restaurant`, `cafe`, `bar`, `pub`, `fast_food`, `pharmacy`, `hospital`, `bank`, `atm`, `fuel`, `parking`, `supermarket`, `convenience`, `hotel`
 ## Workflow
 1. **Get the location.** Look for coordinates (`latitude: ... / longitude: ...`) from a Telegram pin, or ask the user for an address/city/zip.
 2. **Ask for preferences** (only if not already stated): place type, how far they're willing to go, any specifics (cuisine, "open now", etc.).
 3. **Run the script** with appropriate flags. Use `--json` if you need to process results programmatically.
 4. **Present results** with names, distances, and Google Maps links. If the user asked about hours or "open now," check the `hours` field in results — if missing or unclear, verify with `web_search`.
 5. **For directions**, use the `directions_url` from results, or construct: `https://www.google.com/maps/dir/?api=1&origin=<LAT>,<LON>&destination=<LAT>,<LON>`
 ## Tips
 - If results are sparse, widen the radius (1500 → 3000m)
 - For "open now" requests: check the `hours` field in results, cross-reference with `web_search` for accuracy since OSM hours aren't always complete
 - Zip codes alone can be ambiguous globally — prompt the user for country/state if results look wrong
 - The script uses OpenStreetMap data which is community-maintained; coverage varies by region
--- a/skills/leisure/find-nearby/scripts/find_nearby.py
+++ b/skills/leisure/find-nearby/scripts/find_nearby.py
@@ -0,0 +1,184 @@
 #!/usr/bin/env python3
 """Find nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed.
 Usage:
    # By coordinates
    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --radius 1500
    # By address/city/zip (auto-geocoded)
    python find_nearby.py --near "Times Square, New York" --type cafe --radius 1000
    python find_nearby.py --near "90210" --type pharmacy
    # Multiple types
    python find_nearby.py --lat 36.17 --lon -115.14 --type restaurant --type bar
    # JSON output for programmatic use
    python find_nearby.py --near "downtown las vegas" --type restaurant --json
 """
 import argparse
 import json
 import math
 import sys
 import urllib.parse
 import urllib.request
 from typing import Any
 OVERPASS_URLS = [
    "https://overpass-api.de/api/interpreter",
    "https://overpass.kumi.systems/api/interpreter",
 ]
 NOMINATIM_URL = "https://nominatim.openstreetmap.org/search"
 USER_AGENT = "HermesAgent/1.0 (find-nearby skill)"
 TIMEOUT = 15
 def _http_get(url: str) -> Any:
    req = urllib.request.Request(url, headers={"User-Agent": USER_AGENT})
    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
        return json.loads(r.read())
 def _http_post(url: str, data: str) -> Any:
    req = urllib.request.Request(
        url, data=data.encode(), headers={"User-Agent": USER_AGENT}
    )
    with urllib.request.urlopen(req, timeout=TIMEOUT) as r:
        return json.loads(r.read())
 def haversine(lat1: float, lon1: float, lat2: float, lon2: float) -> float:
    """Distance in meters between two coordinates."""
    R = 6_371_000
    rlat1, rlat2 = math.radians(lat1), math.radians(lat2)
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2) ** 2 + math.cos(rlat1) * math.cos(rlat2) * math.sin(dlon / 2) ** 2
    return R * 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
 def geocode(query: str) -> tuple[float, float]:
    """Convert address/city/zip to coordinates via Nominatim."""
    params = urllib.parse.urlencode({"q": query, "format": "json", "limit": 1})
    results = _http_get(f"{NOMINATIM_URL}?{params}")
    if not results:
        print(f"Error: Could not geocode '{query}'. Try a more specific address.", file=sys.stderr)
        sys.exit(1)
    return float(results[0]["lat"]), float(results[0]["lon"])
 def find_nearby(lat: float, lon: float, types: list[str], radius: int = 1500, limit: int = 15) -> list[dict]:
    """Query Overpass for nearby amenities."""
    # Build Overpass QL query
    type_filters = "".join(
        f'nwr["amenity"="{t}"](around:{radius},{lat},{lon});' for t in types
    )
    query = f"[out:json][timeout:{TIMEOUT}];({type_filters});out center tags;"
    # Try each Overpass server
    data = None
    for url in OVERPASS_URLS:
        try:
            data = _http_post(url, f"data={urllib.parse.quote(query)}")
            break
        except Exception:
            continue
    if not data:
        return []
    # Parse results
    places = []
    for el in data.get("elements", []):
        tags = el.get("tags", {})
        name = tags.get("name")
        if not name:
            continue
        # Get coordinates (nodes have lat/lon directly, ways/relations use center)
        plat = el.get("lat") or (el.get("center", {}) or {}).get("lat")
        plon = el.get("lon") or (el.get("center", {}) or {}).get("lon")
        if not plat or not plon:
            continue
        dist = haversine(lat, lon, plat, plon)
        place = {
            "name": name,
            "type": tags.get("amenity", ""),
            "distance_m": round(dist),
            "lat": plat,
            "lon": plon,
            "maps_url": f"https://www.google.com/maps/search/?api=1&query={plat},{plon}",
            "directions_url": f"https://www.google.com/maps/dir/?api=1&origin={lat},{lon}&destination={plat},{plon}",
        }
        # Add useful optional fields
        if tags.get("cuisine"):
            place["cuisine"] = tags["cuisine"]
        if tags.get("opening_hours"):
            place["hours"] = tags["opening_hours"]
        if tags.get("phone"):
            place["phone"] = tags["phone"]
        if tags.get("website"):
            place["website"] = tags["website"]
        if tags.get("addr:street"):
            addr_parts = [tags.get("addr:housenumber", ""), tags.get("addr:street", "")]
            if tags.get("addr:city"):
                addr_parts.append(tags["addr:city"])
            place["address"] = " ".join(p for p in addr_parts if p)
        places.append(place)
    # Sort by distance, limit results
    places.sort(key=lambda p: p["distance_m"])
    return places[:limit]
 def main():
    parser = argparse.ArgumentParser(description="Find nearby places via OpenStreetMap")
    parser.add_argument("--lat", type=float, help="Latitude")
    parser.add_argument("--lon", type=float, help="Longitude")
    parser.add_argument("--near", type=str, help="Address, city, or zip code (geocoded automatically)")
    parser.add_argument("--type", action="append", dest="types", default=[], help="Place type (restaurant, cafe, bar, pharmacy, etc.)")
    parser.add_argument("--radius", type=int, default=1500, help="Search radius in meters (default: 1500)")
    parser.add_argument("--limit", type=int, default=15, help="Max results (default: 15)")
    parser.add_argument("--json", action="store_true", dest="json_output", help="Output as JSON")
    args = parser.parse_args()
    # Resolve coordinates
    if args.near:
        lat, lon = geocode(args.near)
    elif args.lat is not None and args.lon is not None:
        lat, lon = args.lat, args.lon
    else:
        print("Error: Provide --lat/--lon or --near", file=sys.stderr)
        sys.exit(1)
    if not args.types:
        args.types = ["restaurant"]
    places = find_nearby(lat, lon, args.types, args.radius, args.limit)
    if args.json_output:
        print(json.dumps({"origin": {"lat": lat, "lon": lon}, "results": places, "count": len(places)}, indent=2))
    else:
        if not places:
            print(f"No {'/'.join(args.types)} found within {args.radius}m")
            return
        print(f"Found {len(places)} places within {args.radius}m:\n")
        for i, p in enumerate(places, 1):
            dist_str = f"{p['distance_m']}m" if p["distance_m"] < 1000 else f"{p['distance_m']/1000:.1f}km"
            print(f"  {i}. {p['name']} ({p['type']}) — {dist_str}")
            if p.get("cuisine"):
                print(f"     Cuisine: {p['cuisine']}")
            if p.get("hours"):
                print(f"     Hours: {p['hours']}")
            if p.get("address"):
                print(f"     Address: {p['address']}")
            print(f"     Map: {p['maps_url']}")
            print()
 if __name__ == "__main__":
    main()
--- a/skills/mcp/native-mcp/SKILL.md
+++ b/skills/mcp/native-mcp/SKILL.md
@@ -321,6 +321,32 @@ mcp_servers:
 All tools from all servers are registered and available simultaneously. Each server's tools are prefixed with its name to avoid collisions.
 ## Sampling (Server-Initiated LLM Requests)
 Hermes supports MCP's `sampling/createMessage` capability — MCP servers can request LLM completions through the agent during tool execution. This enables agent-in-the-loop workflows (data analysis, content generation, decision-making).
 Sampling is **enabled by default**. Configure per server:
 ```yaml
 mcp_servers:
  my_server:
    command: "npx"
    args: ["-y", "my-mcp-server"]
    sampling:
      enabled: true           # default: true
      model: "gemini-3-flash" # model override (optional)
      max_tokens_cap: 4096    # max tokens per request
      timeout: 30             # LLM call timeout (seconds)
      max_rpm: 10             # max requests per minute
      allowed_models: []      # model whitelist (empty = all)
      max_tool_rounds: 5      # tool loop limit (0 = disable)
      log_level: "info"       # audit verbosity
 ```
 Servers can also include `tools` in sampling requests for multi-turn tool-augmented workflows. The `max_tool_rounds` config prevents infinite tool loops. Per-server audit metrics (requests, errors, tokens, tool use count) are tracked via `get_mcp_status()`.
 Disable sampling for untrusted servers with `sampling: { enabled: false }`.
 ## Notes
 - MCP tools are called synchronously from the agent's perspective but run asynchronously on a dedicated background event loop
--- a/skills/media/DESCRIPTION.md
+++ b/skills/media/DESCRIPTION.md
@@ -1 +1,3 @@
-Media content extraction and transformation tools — YouTube transcripts, audio, video processing.
+---
 description: Skills for working with media content — YouTube transcripts, GIF search, music generation, and audio visualization.
 ---
--- a/skills/media/gif-search/SKILL.md
+++ b/skills/media/gif-search/SKILL.md
--- a/skills/music-creation/heartmula/SKILL.md
+++ b/skills/music-creation/heartmula/SKILL.md
--- a/skills/music-creation/songsee/SKILL.md
+++ b/skills/music-creation/songsee/SKILL.md
--- a/skills/mlops/cloud/DESCRIPTION.md
+++ b/skills/mlops/cloud/DESCRIPTION.md
@@ -0,0 +1,3 @@
 ---
 description: GPU cloud providers and serverless compute platforms for ML workloads.
 ---
--- a/skills/mlops/cloud/lambda-labs/SKILL.md
+++ b/skills/mlops/cloud/lambda-labs/SKILL.md
--- a/skills/mlops/cloud/lambda-labs/references/advanced-usage.md
+++ b/skills/mlops/cloud/lambda-labs/references/advanced-usage.md
--- a/skills/mlops/cloud/lambda-labs/references/troubleshooting.md
+++ b/skills/mlops/cloud/lambda-labs/references/troubleshooting.md
--- a/skills/mlops/cloud/modal/SKILL.md
+++ b/skills/mlops/cloud/modal/SKILL.md
--- a/skills/mlops/cloud/modal/references/advanced-usage.md
+++ b/skills/mlops/cloud/modal/references/advanced-usage.md
--- a/skills/mlops/cloud/modal/references/troubleshooting.md
+++ b/skills/mlops/cloud/modal/references/troubleshooting.md
--- a/skills/mlops/evaluation/DESCRIPTION.md
+++ b/skills/mlops/evaluation/DESCRIPTION.md
@@ -0,0 +1,3 @@
 ---
 description: Model evaluation benchmarks, experiment tracking, data curation, tokenizers, and interpretability tools.
 ---
--- a/skills/mlops/evaluation/huggingface-tokenizers/SKILL.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/SKILL.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/algorithms.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/algorithms.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/integration.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/integration.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/pipeline.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/pipeline.md
--- a/skills/mlops/evaluation/huggingface-tokenizers/references/training.md
+++ b/skills/mlops/evaluation/huggingface-tokenizers/references/training.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/SKILL.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/SKILL.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/api-evaluation.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/api-evaluation.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/benchmark-guide.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/benchmark-guide.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/custom-tasks.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/custom-tasks.md
--- a/skills/mlops/evaluation/lm-evaluation-harness/references/distributed-eval.md
+++ b/skills/mlops/evaluation/lm-evaluation-harness/references/distributed-eval.md
--- a/skills/mlops/evaluation/nemo-curator/SKILL.md
+++ b/skills/mlops/evaluation/nemo-curator/SKILL.md
--- a/skills/mlops/evaluation/nemo-curator/references/deduplication.md
+++ b/skills/mlops/evaluation/nemo-curator/references/deduplication.md
--- a/skills/mlops/evaluation/nemo-curator/references/filtering.md
+++ b/skills/mlops/evaluation/nemo-curator/references/filtering.md
--- a/skills/mlops/evaluation/saelens/SKILL.md
+++ b/skills/mlops/evaluation/saelens/SKILL.md
--- a/skills/mlops/evaluation/saelens/references/README.md
+++ b/skills/mlops/evaluation/saelens/references/README.md
--- a/skills/mlops/evaluation/saelens/references/api.md
+++ b/skills/mlops/evaluation/saelens/references/api.md
--- a/skills/mlops/evaluation/saelens/references/tutorials.md
+++ b/skills/mlops/evaluation/saelens/references/tutorials.md
--- a/skills/mlops/evaluation/weights-and-biases/SKILL.md
+++ b/skills/mlops/evaluation/weights-and-biases/SKILL.md
--- a/skills/mlops/evaluation/weights-and-biases/references/artifacts.md
+++ b/skills/mlops/evaluation/weights-and-biases/references/artifacts.md
--- a/skills/mlops/evaluation/weights-and-biases/references/integrations.md
+++ b/skills/mlops/evaluation/weights-and-biases/references/integrations.md
--- a/skills/mlops/evaluation/weights-and-biases/references/sweeps.md
+++ b/skills/mlops/evaluation/weights-and-biases/references/sweeps.md
--- a/skills/mlops/inference/DESCRIPTION.md
+++ b/skills/mlops/inference/DESCRIPTION.md
@@ -0,0 +1,3 @@
 ---
 description: Model serving, quantization (GGUF/GPTQ), structured output, inference optimization, and model surgery tools for deploying and running LLMs.
 ---
--- a/skills/mlops/inference/gguf/SKILL.md
+++ b/skills/mlops/inference/gguf/SKILL.md
--- a/skills/mlops/inference/gguf/references/advanced-usage.md
+++ b/skills/mlops/inference/gguf/references/advanced-usage.md
--- a/skills/mlops/inference/gguf/references/troubleshooting.md
+++ b/skills/mlops/inference/gguf/references/troubleshooting.md
--- a/skills/mlops/inference/guidance/SKILL.md
+++ b/skills/mlops/inference/guidance/SKILL.md
--- a/skills/mlops/inference/guidance/references/backends.md
+++ b/skills/mlops/inference/guidance/references/backends.md
--- a/skills/mlops/inference/guidance/references/constraints.md
+++ b/skills/mlops/inference/guidance/references/constraints.md
--- a/skills/mlops/inference/guidance/references/examples.md
+++ b/skills/mlops/inference/guidance/references/examples.md
--- a/skills/mlops/inference/instructor/SKILL.md
+++ b/skills/mlops/inference/instructor/SKILL.md
--- a/skills/mlops/inference/instructor/references/examples.md
+++ b/skills/mlops/inference/instructor/references/examples.md
--- a/skills/mlops/inference/instructor/references/providers.md
+++ b/skills/mlops/inference/instructor/references/providers.md
--- a/skills/mlops/inference/instructor/references/validation.md
+++ b/skills/mlops/inference/instructor/references/validation.md
--- a/skills/mlops/inference/llama-cpp/SKILL.md
+++ b/skills/mlops/inference/llama-cpp/SKILL.md
--- a/skills/mlops/inference/llama-cpp/references/optimization.md
+++ b/skills/mlops/inference/llama-cpp/references/optimization.md
--- a/skills/mlops/inference/llama-cpp/references/quantization.md
+++ b/skills/mlops/inference/llama-cpp/references/quantization.md
--- a/skills/mlops/inference/llama-cpp/references/server.md
+++ b/skills/mlops/inference/llama-cpp/references/server.md
--- a/skills/mlops/inference/obliteratus/SKILL.md
+++ b/skills/mlops/inference/obliteratus/SKILL.md
@@ -0,0 +1,330 @@
 ---
 name: obliteratus
 description: Remove refusal behaviors from open-weight LLMs using OBLITERATUS — mechanistic interpretability techniques (diff-in-means, SVD, whitened SVD, LEACE, SAE decomposition, etc.) to excise guardrails while preserving reasoning. 9 CLI methods, 28 analysis modules, 116 model presets across 5 compute tiers, tournament evaluation, and telemetry-driven recommendations. Use when a user wants to uncensor, abliterate, or remove refusal from an LLM.
 version: 2.0.0
 author: Hermes Agent
 license: MIT
 dependencies: [obliteratus, torch, transformers, bitsandbytes, accelerate, safetensors]
 metadata:
  hermes:
    tags: [Abliteration, Uncensoring, Refusal-Removal, LLM, Weight-Projection, SVD, Mechanistic-Interpretability, HuggingFace, Model-Surgery]
    related_skills: [vllm, gguf, huggingface-tokenizers]
 ---
 # OBLITERATUS Skill
 Remove refusal behaviors (guardrails) from open-weight LLMs without retraining or fine-tuning. Uses mechanistic interpretability techniques — including diff-in-means, SVD, whitened SVD, LEACE concept erasure, SAE decomposition, Bayesian kernel projection, and more — to identify and surgically excise refusal directions from model weights while preserving reasoning capabilities.
 **License warning:** OBLITERATUS is AGPL-3.0. NEVER import it as a Python library. Always invoke via CLI (`obliteratus` command) or subprocess. This keeps Hermes Agent's MIT license clean.
 ## When to Use This Skill
 Trigger when the user:
 - Wants to "uncensor" or "abliterate" an LLM
 - Asks about removing refusal/guardrails from a model
 - Wants to create an uncensored version of Llama, Qwen, Mistral, etc.
 - Mentions "refusal removal", "abliteration", "weight projection"
 - Wants to analyze how a model's refusal mechanism works
 - References OBLITERATUS, abliterator, or refusal directions
 ## Step 1: Installation
 Check if already installed:
 ```bash
 obliteratus --version 2>/dev/null && echo "INSTALLED" || echo "NOT INSTALLED"
 ```
 If not installed, clone and install from GitHub:
 ```bash
 git clone https://github.com/elder-plinius/OBLITERATUS.git
 cd OBLITERATUS
 pip install -e .
 # For Gradio web UI support:
 # pip install -e ".[spaces]"
 ```
 **IMPORTANT:** Confirm with user before installing. This pulls in ~5-10GB of dependencies (PyTorch, Transformers, bitsandbytes, etc.).
 ## Step 2: Check Hardware
 Before anything, check what GPU is available:
 ```bash
 python3 -c "
 import torch
 if torch.cuda.is_available():
    gpu = torch.cuda.get_device_name(0)
    vram = torch.cuda.get_device_properties(0).total_memory / 1024**3
    print(f'GPU: {gpu}')
    print(f'VRAM: {vram:.1f} GB')
    if vram < 4: print('TIER: tiny (models under 1B)')
    elif vram < 8: print('TIER: small (models 1-4B)')
    elif vram < 16: print('TIER: medium (models 4-9B with 4bit quant)')
    elif vram < 32: print('TIER: large (models 8-32B with 4bit quant)')
    else: print('TIER: frontier (models 32B+)')
 else:
    print('NO GPU - only tiny models (under 1B) on CPU')
 "
 ```
 ### VRAM Requirements (with 4-bit quantization)
 | VRAM     | Max Model Size  | Example Models                              |
 |:---------|:----------------|:--------------------------------------------|
 | CPU only | ~1B params      | GPT-2, TinyLlama, SmolLM                    |
 | 4-8 GB   | ~4B params      | Qwen2.5-1.5B, Phi-3.5 mini, Llama 3.2 3B   |
 | 8-16 GB  | ~9B params      | Llama 3.1 8B, Mistral 7B, Gemma 2 9B       |
 | 24 GB    | ~32B params     | Qwen3-32B, Llama 3.1 70B (tight), Command-R |
 | 48 GB+   | ~72B+ params    | Qwen2.5-72B, DeepSeek-R1                    |
 | Multi-GPU| 200B+ params    | Llama 3.1 405B, DeepSeek-V3 (685B MoE)      |
 ## Step 3: Browse Available Models & Get Recommendations
 ```bash
 # Browse models by compute tier
 obliteratus models --tier medium
 # Get architecture info for a specific model
 obliteratus info <model_name>
 # Get telemetry-driven recommendation for best method & params
 obliteratus recommend <model_name>
 obliteratus recommend <model_name> --insights  # global cross-architecture rankings
 ```
 ## Step 4: Choose a Method
 ### Method Selection Guide
 **Default / recommended for most cases: `advanced`.** It uses multi-direction SVD with norm-preserving projection and is well-tested.
 | Situation                         | Recommended Method | Why                                      |
 |:----------------------------------|:-------------------|:-----------------------------------------|
 | Default / most models             | `advanced`         | Multi-direction SVD, norm-preserving, reliable |
 | Quick test / prototyping          | `basic`            | Fast, simple, good enough to evaluate    |
 | Dense model (Llama, Mistral)      | `advanced`         | Multi-direction, norm-preserving         |
 | MoE model (DeepSeek, Mixtral)     | `nuclear`          | Expert-granular, handles MoE complexity  |
 | Reasoning model (R1 distills)     | `surgical`         | CoT-aware, preserves chain-of-thought    |
 | Stubborn refusals persist         | `aggressive`       | Whitened SVD + head surgery + jailbreak   |
 | Want reversible changes           | Use steering vectors (see Analysis section) |
 | Maximum quality, time no object   | `optimized`        | Bayesian search for best parameters      |
 | Experimental auto-detection       | `informed`         | Auto-detects alignment type — experimental, may not always outperform advanced |
 ### 9 CLI Methods
 - **basic** — Single refusal direction via diff-in-means. Fast (~5-10 min for 8B).
 - **advanced** (DEFAULT, RECOMMENDED) — Multiple SVD directions, norm-preserving projection, 2 refinement passes. Medium speed (~10-20 min).
 - **aggressive** — Whitened SVD + jailbreak-contrastive + attention head surgery. Higher risk of coherence damage.
 - **spectral_cascade** — DCT frequency-domain decomposition. Research/novel approach.
 - **informed** — Runs analysis DURING abliteration to auto-configure. Experimental — slower and less predictable than advanced.
 - **surgical** — SAE features + neuron masking + head surgery + per-expert. Very slow (~1-2 hrs). Best for reasoning models.
 - **optimized** — Bayesian hyperparameter search (Optuna TPE). Longest runtime but finds optimal parameters.
 - **inverted** — Flips the refusal direction. Model becomes actively willing.
 - **nuclear** — Maximum force combo for stubborn MoE models. Expert-granular.
 ### Direction Extraction Methods (--direction-method flag)
 - **diff_means** (default) — Simple difference-in-means between refused/complied activations. Robust.
 - **svd** — Multi-direction SVD extraction. Better for complex alignment.
 - **leace** — LEACE (Linear Erasure via Closed-form Estimation). Optimal linear erasure.
 ### 4 Python-API-Only Methods
 (NOT available via CLI — require Python import, which violates AGPL boundary. Mention to user only if they explicitly want to use OBLITERATUS as a library in their own AGPL project.)
 - failspy, gabliteration, heretic, rdo
 ## Step 5: Run Abliteration
 ### Standard usage
 ```bash
 # Default method (advanced) — recommended for most models
 obliteratus obliterate <model_name> --method advanced --output-dir ./abliterated-models
 # With 4-bit quantization (saves VRAM)
 obliteratus obliterate <model_name> --method advanced --quantization 4bit --output-dir ./abliterated-models
 # Large models (70B+) — conservative defaults
 obliteratus obliterate <model_name> --method advanced --quantization 4bit --large-model --output-dir ./abliterated-models
 ```
 ### Fine-tuning parameters
 ```bash
 obliteratus obliterate <model_name> \
  --method advanced \
  --direction-method diff_means \
  --n-directions 4 \
  --refinement-passes 2 \
  --regularization 0.1 \
  --quantization 4bit \
  --output-dir ./abliterated-models \
  --contribute  # opt-in telemetry for community research
 ```
 ### Key flags
 | Flag | Description | Default |
 |:-----|:------------|:--------|
 | `--method` | Abliteration method | advanced |
 | `--direction-method` | Direction extraction | diff_means |
 | `--n-directions` | Number of refusal directions (1-32) | method-dependent |
 | `--refinement-passes` | Iterative passes (1-5) | 2 |
 | `--regularization` | Regularization strength (0.0-1.0) | 0.1 |
 | `--quantization` | Load in 4bit or 8bit | none (full precision) |
 | `--large-model` | Conservative defaults for 120B+ | false |
 | `--output-dir` | Where to save the abliterated model | ./obliterated_model |
 | `--contribute` | Share anonymized results for research | false |
 | `--verify-sample-size` | Number of test prompts for refusal check | 20 |
 | `--dtype` | Model dtype (float16, bfloat16) | auto |
 ### Other execution modes
 ```bash
 # Interactive guided mode (hardware → model → preset)
 obliteratus interactive
 # Web UI (Gradio)
 obliteratus ui --port 7860
 # Run a full ablation study from YAML config
 obliteratus run config.yaml --preset quick
 # Tournament: pit all methods against each other
 obliteratus tourney <model_name>
 ```
 ## Step 6: Verify Results
 After abliteration, check the output metrics:
 | Metric | Good Value | Warning |
 |:-------|:-----------|:--------|
 | Refusal rate | < 5% (ideally ~0%) | > 10% means refusals persist |
 | Perplexity change | < 10% increase | > 15% means coherence damage |
 | KL divergence | < 0.1 | > 0.5 means significant distribution shift |
 | Coherence | High / passes qualitative check | Degraded responses, repetition |
 ### If refusals persist (> 10%)
 1. Try `aggressive` method
 2. Increase `--n-directions` (e.g., 8 or 16)
 3. Add `--refinement-passes 3`
 4. Try `--direction-method svd` instead of diff_means
 ### If coherence is damaged (perplexity > 15% increase)
 1. Reduce `--n-directions` (try 2)
 2. Increase `--regularization` (try 0.3)
 3. Reduce `--refinement-passes` to 1
 4. Try `basic` method (gentler)
 ## Step 7: Use the Abliterated Model
 The output is a standard HuggingFace model directory.
 ```bash
 # Test locally with transformers
 python3 -c "
 from transformers import AutoModelForCausalLM, AutoTokenizer
 model = AutoModelForCausalLM.from_pretrained('./abliterated-models/<model>')
 tokenizer = AutoTokenizer.from_pretrained('./abliterated-models/<model>')
 inputs = tokenizer('How do I pick a lock?', return_tensors='pt')
 outputs = model.generate(**inputs, max_new_tokens=200)
 print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 "
 # Upload to HuggingFace Hub
 huggingface-cli upload <username>/<model-name>-abliterated ./abliterated-models/<model>
 # Serve with vLLM
 vllm serve ./abliterated-models/<model>
 ```
 ## CLI Command Reference
 | Command | Description |
 |:--------|:------------|
 | `obliteratus obliterate` | Main abliteration command |
 | `obliteratus info <model>` | Print model architecture details |
 | `obliteratus models --tier <tier>` | Browse curated models by compute tier |
 | `obliteratus recommend <model>` | Telemetry-driven method/param suggestion |
 | `obliteratus interactive` | Guided setup wizard |
 | `obliteratus tourney <model>` | Tournament: all methods head-to-head |
 | `obliteratus run <config.yaml>` | Execute ablation study from YAML |
 | `obliteratus strategies` | List all registered ablation strategies |
 | `obliteratus report <results.json>` | Regenerate visual reports |
 | `obliteratus ui` | Launch Gradio web interface |
 | `obliteratus aggregate` | Summarize community telemetry data |
 ## Analysis Modules
 OBLITERATUS includes 28 analysis modules for mechanistic interpretability.
 See `skill_view(name="obliteratus", file_path="references/analysis-modules.md")` for the full reference.
 ### Quick analysis commands
 ```bash
 # Run specific analysis modules
 obliteratus run analysis-config.yaml --preset quick
 # Key modules to run first:
 # - alignment_imprint: Fingerprint DPO/RLHF/CAI/SFT alignment method
 # - concept_geometry: Single direction vs polyhedral cone
 # - logit_lens: Which layer decides to refuse
 # - anti_ouroboros: Self-repair risk score
 # - causal_tracing: Causally necessary components
 ```
 ### Steering Vectors (Reversible Alternative)
 Instead of permanent weight modification, use inference-time steering:
 ```python
 # Python API only — for user's own projects
 from obliteratus.analysis.steering_vectors import SteeringVectorFactory, SteeringHookManager
 ```
 ## Ablation Strategies
 Beyond direction-based abliteration, OBLITERATUS includes structural ablation strategies:
 - **Embedding Ablation** — Target embedding layer components
 - **FFN Ablation** — Feed-forward network block removal
 - **Head Pruning** — Attention head pruning
 - **Layer Removal** — Full layer removal
 List all available: `obliteratus strategies`
 ## Evaluation
 OBLITERATUS includes built-in evaluation tools:
 - Refusal rate benchmarking
 - Perplexity comparison (before/after)
 - LM Eval Harness integration for academic benchmarks
 - Head-to-head competitor comparison
 - Baseline performance tracking
 ## Platform Support
 - **CUDA** — Full support (NVIDIA GPUs)
 - **Apple Silicon (MLX)** — Supported via MLX backend
 - **CPU** — Supported for tiny models (< 1B params)
 ## YAML Config Templates
 Load templates for reproducible runs via `skill_view`:
 - `templates/abliteration-config.yaml` — Standard single-model config
 - `templates/analysis-study.yaml` — Pre-abliteration analysis study
 - `templates/batch-abliteration.yaml` — Multi-model batch processing
 ## Telemetry
 OBLITERATUS can optionally contribute anonymized run data to a global research dataset.
 Enable with `--contribute` flag. No personal data is collected — only model name, method, metrics.
 ## Common Pitfalls
 1. **Don't use `informed` as default** — it's experimental and slower. Use `advanced` for reliable results.
 2. **Models under ~1B respond poorly to abliteration** — their refusal behaviors are shallow and fragmented, making clean direction extraction difficult. Expect partial results (20-40% remaining refusal). Models 3B+ have cleaner refusal directions and respond much better (often 0% refusal with `advanced`).
 3. **`aggressive` can make things worse** — on small models it can damage coherence and actually increase refusal rate. Only use it if `advanced` leaves > 10% refusals on a 3B+ model.
 4. **Always check perplexity** — if it spikes > 15%, the model is damaged. Reduce aggressiveness.
 5. **MoE models need special handling** — use `nuclear` method for Mixtral, DeepSeek-MoE, etc.
 6. **Quantized models can't be re-quantized** — abliterate the full-precision model, then quantize the output.
 7. **VRAM estimation is approximate** — 4-bit quant helps but peak usage can spike during extraction.
 8. **Reasoning models are sensitive** — use `surgical` for R1 distills to preserve chain-of-thought.
 9. **Check `obliteratus recommend`** — telemetry data may have better parameters than defaults.
 10. **AGPL license** — never `import obliteratus` in MIT/Apache projects. CLI invocation only.
 11. **Large models (70B+)** — always use `--large-model` flag for conservative defaults.
 12. **Spectral certification RED is common** — the spectral check often flags "incomplete" even when practical refusal rate is 0%. Check actual refusal rate rather than relying on spectral certification alone.
 ## Complementary Skills
 - **vllm** — Serve abliterated models with high throughput
 - **gguf** — Convert abliterated models to GGUF for llama.cpp
 - **huggingface-tokenizers** — Work with model tokenizers
--- a/skills/mlops/inference/obliteratus/references/analysis-modules.md
+++ b/skills/mlops/inference/obliteratus/references/analysis-modules.md
@@ -0,0 +1,166 @@
 # OBLITERATUS Analysis Modules — Reference
 OBLITERATUS includes 28 analysis modules for mechanistic interpretability of refusal in LLMs.
 These modules help understand how and where refusal behaviors are encoded before performing abliteration.
 ---
 ## Core Analysis (Run These First)
 ### 1. Alignment Imprint Detection (`alignment_imprint.py`)
 Fingerprints whether a model was trained via DPO, RLHF, CAI, or SFT.
 This determines which extraction strategy will work best.
 ### 2. Concept Cone Geometry (`concept_geometry.py`)
 Determines if refusal is a single linear direction or a polyhedral cone
 (set of multiple mechanisms). Single-direction models respond well to `basic`;
 polyhedral models need `advanced` or `surgical`.
 ### 3. Refusal Logit Lens (`logit_lens.py`)
 Identifies the specific layer where a model "decides" to refuse by decoding
 intermediate layer representations into token space.
 ### 4. Ouroboros Detection (`anti_ouroboros.py`)
 Identifies if a model attempts to "self-repair" refusal behaviors after
 excision. Reports a risk score (0-1). High scores mean additional refinement
 passes are needed.
 ### 5. Causal Tracing (`causal_tracing.py`)
 Identifies which components (layers, heads, MLPs) are causally necessary
 for refusal behavior using activation patching.
 ---
 ## Geometric Analysis
 ### 6. Cross-Layer Alignment (`cross_layer.py`)
 Measures how refusal directions align across different layers. High alignment
 means the refusal signal is consistent; low alignment suggests layer-specific
 mechanisms.
 ### 7. Residual Stream Decomposition (`residual_stream.py`)
 Decomposes the residual stream into attention and MLP contributions to
 understand which component type contributes more to refusal.
 ### 8. Riemannian Manifold Geometry (`riemannian_manifold.py`)
 Analyzes the curvature and geometry of the weight manifold near refusal
 directions. Informs how aggressively projections can be applied without
 damaging the manifold structure.
 ### 9. Whitened SVD (`whitened_svd.py`)
 Covariance-normalized SVD extraction that separates guardrail signals from
 natural activation variance. More precise than standard SVD for models with
 high activation variance.
 ### 10. Concept Cone Geometry (extended)
 Maps the full polyhedral structure of refusal, including cone angles,
 face counts, and intersection patterns.
 ---
 ## Probing & Classification
 ### 11. Activation Probing (`activation_probing.py`)
 Post-excision verification — probes for residual refusal concepts after
 abliteration to ensure complete removal.
 ### 12. Probing Classifiers (`probing_classifiers.py`)
 Trains linear classifiers to detect refusal in activations. Used both
 before (to verify refusal exists) and after (to verify it's gone).
 ### 13. Activation Patching (`activation_patching.py`)
 Interchange interventions — swaps activations between refused and complied
 runs to identify causal components.
 ### 14. Tuned Lens (`tuned_lens.py`)
 Trained version of logit lens that provides more accurate per-layer
 decoding by learning affine transformations for each layer.
 ### 15. Multi-Token Position Analysis (`multi_token_position.py`)
 Analyzes refusal signals across multiple token positions, not just the
 last token. Important for models that distribute refusal across the sequence.
 ---
 ## Abliteration & Manipulation
 ### 16. SAE-Based Abliteration (`sae_abliteration.py`)
 Uses Sparse Autoencoder features to identify and remove specific refusal
 features. More surgical than direction-based methods.
 ### 17. Steering Vectors (`steering_vectors.py`)
 Creates and applies inference-time steering vectors for reversible refusal
 modification. Includes `SteeringVectorFactory` and `SteeringHookManager`.
 ### 18. LEACE Concept Erasure (`leace.py`)
 Linear Erasure via Closed-form Estimation — mathematically optimal linear
 concept removal. Available as both analysis module and direction extraction method.
 ### 19. Sparse Surgery (`sparse_surgery.py`)
 High-precision weight modification targeting individual neurons and
 weight matrix entries rather than full directions.
 ### 20. Conditional Abliteration (`conditional_abliteration.py`)
 Targeted removal that only affects specific refusal categories while
 preserving others (e.g., remove weapons refusal but keep CSAM refusal).
 ---
 ## Transfer & Robustness
 ### 21. Cross-Model Transfer (`cross_model_transfer.py`)
 Tests whether refusal directions extracted from one model transfer to
 another architecture. Measures universality of guardrail directions.
 ### 22. Defense Robustness (`defense_robustness.py`)
 Evaluates how robust the abliteration is against various defense mechanisms
 and re-alignment attempts.
 ### 23. Spectral Certification (`spectral_certification.py`)
 Provides mathematical bounds on the completeness of refusal removal
 using spectral analysis of the projection.
 ### 24. Wasserstein Optimal Extraction (`wasserstein_optimal.py`)
 Uses optimal transport theory for more precise direction extraction
 that minimizes distribution shift.
 ### 25. Wasserstein Transfer (`wasserstein_transfer.py`)
 Distribution transfer between models using Wasserstein distance
 for cross-architecture refusal direction mapping.
 ---
 ## Advanced / Research
 ### 26. Bayesian Kernel Projection (`bayesian_kernel_projection.py`)
 Probabilistic feature mapping that estimates uncertainty in refusal
 direction identification.
 ### 27. Cross-Model Universality Index
 Measures if guardrail directions generalize across different model
 architectures and training regimes.
 ### 28. Visualization (`visualization.py`)
 Plotting and graphing utilities for all analysis modules. Generates
 heatmaps, direction plots, and layer-wise analysis charts.
 ---
 ## Running Analysis
 ### Via CLI
 ```bash
 # Run analysis from a YAML config
 obliteratus run analysis-study.yaml --preset quick
 # Available study presets:
 # quick     — Fast sanity check (2-3 modules)
 # full      — All core + geometric analysis
 # jailbreak — Refusal circuit localization
 # knowledge — Knowledge preservation analysis
 # robustness — Stress testing / defense evaluation
 ```
 ### Via YAML Config
 See the `templates/analysis-study.yaml` template for a complete example.
 Load with: `skill_view(name="obliteratus", file_path="templates/analysis-study.yaml")`
--- a/skills/mlops/inference/obliteratus/references/methods-guide.md
+++ b/skills/mlops/inference/obliteratus/references/methods-guide.md
@@ -0,0 +1,141 @@
 # OBLITERATUS Methods — Detailed Guide
 > The CLI accepts 9 methods via `--method`: basic, advanced, aggressive, spectral_cascade,
 > informed, surgical, optimized, inverted, nuclear.
 > Four additional methods (failspy, gabliteration, heretic, rdo) are available only via the Python API.
 ## How Abliteration Works (Theory)
 Abliteration identifies a "refusal direction" — a vector in the model's activation space that
 corresponds to refusal behavior — and projects it out of the weight matrices.
 Mathematically: `W_new = W_old - (W_old @ d @ d.T)` where `d` is the refusal direction.
 The key challenge is finding accurate refusal directions without damaging other capabilities.
 ---
 ## Direction Extraction Methods
 Before projecting, OBLITERATUS extracts refusal directions using one of three methods:
 | Method | Flag | Description | Best For |
 |:-------|:-----|:------------|:---------|
 | Diff-in-Means | `--direction-method diff_means` | Difference between mean activations on refused vs. complied prompts | Default, fast, robust |
 | SVD | `--direction-method svd` | Multi-direction extraction via Singular Value Decomposition | Complex alignment, multiple refusal mechanisms |
 | LEACE | `--direction-method leace` | Linear Erasure via Closed-form Estimation — mathematically optimal | Maximum precision, research |
 ---
 ## Method Details
 ### basic
 - **Directions:** 1 (single diff-in-means vector)
 - **Speed:** Fast (~5-10 min for 8B model)
 - **Risk:** Low
 - **Use case:** Quick tests, prototyping, evaluating if abliteration works for a model
 - **How it works:** Extracts one refusal direction and projects it out uniformly across all layers.
 ### advanced (DEFAULT — RECOMMENDED)
 - **Directions:** 4 (multi-direction SVD)
 - **Speed:** Medium (~10-20 min for 8B model)
 - **Risk:** Low-Medium
 - **Refinement passes:** 2
 - **Use case:** Default for most models. Well-tested and reliable.
 - **How it works:** Extracts multiple refusal directions via SVD, applies norm-preserving bi-projection to maintain weight matrix norms. Two refinement passes catch residual refusal.
 ### aggressive
 - **Directions:** 8+ (whitened SVD + jailbreak-contrastive)
 - **Speed:** Medium-Slow
 - **Risk:** Medium-High (may damage coherence)
 - **Use case:** When `advanced` leaves > 10% refusals. Stubborn models.
 - **How it works:** Uses whitened SVD for covariance-normalized extraction, adds jailbreak-contrastive directions, performs attention head surgery on the most refusal-active heads.
 ### spectral_cascade
 - **Speed:** Medium
 - **Risk:** Medium
 - **Use case:** Research, novel approaches
 - **How it works:** DCT (Discrete Cosine Transform) frequency-domain decomposition of refusal signals. Separates high-frequency (surface-level) from low-frequency (deep) refusal patterns.
 ### informed (EXPERIMENTAL)
 - **Speed:** Slow (~20-40 min for 8B model)
 - **Risk:** Variable — results depend on analysis quality
 - **Use case:** When you want auto-configuration, but be aware this is experimental and may not outperform `advanced`.
 - **How it works:** Runs 4 analysis modules first (alignment imprint, concept geometry, logit lens, ouroboros detection), then auto-configures extraction strategy. Includes an "Ouroboros loop" that detects and counteracts self-repair.
 - **Note:** The auto-detection can sometimes misconfigure. If results are poor, fall back to `advanced`.
 ### surgical
 - **Speed:** Very slow (~1-2 hrs for 8B model)
 - **Risk:** Low (very precise)
 - **Use case:** Reasoning models (R1 distills, QwQ, etc.) where chain-of-thought must be preserved.
 - **How it works:** Uses SAE (Sparse Autoencoder) features + individual neuron masking + attention head surgery + per-expert decomposition (for MoE). CoT-aware — identifies and protects reasoning-critical directions before projecting.
 ### optimized
 - **Speed:** Very slow (hours — runs many trials)
 - **Risk:** Low (finds optimal parameters)
 - **Use case:** When quality matters more than speed. Production models.
 - **How it works:** Bayesian hyperparameter search via Optuna TPE sampler. Optimizes n_directions, regularization, refinement passes, and layer selection jointly. Evaluates each configuration on refusal rate + perplexity.
 ### inverted
 - **Speed:** Fast
 - **Risk:** High (model behavior changes dramatically)
 - **Use case:** Research, studying refusal mechanisms
 - **How it works:** Instead of projecting out the refusal direction, reflects it. The model actively complies rather than passively not-refusing. Useful for understanding the geometry of alignment.
 ### nuclear
 - **Speed:** Slow
 - **Risk:** Medium-High
 - **Use case:** Stubborn MoE models (DeepSeek-MoE, Mixtral, etc.)
 - **How it works:** Combines expert-granular abliteration (EGA), steering vector injection, attention head pruning, and multi-pass refinement. Decomposes refusal signals into per-expert components for MoE architectures.
 ---
 ## Method Selection Flowchart
 ```
 Is this a quick test?
  → YES: basic
  → NO: continue
 Is it an MoE model (Mixtral, DeepSeek-MoE)?
  → YES: nuclear
  → NO: continue
 Is it a reasoning model (R1, QwQ, CoT-focused)?
  → YES: surgical
  → NO: continue
 Do you need the absolute best quality and have time?
  → YES: optimized
  → NO: advanced (recommended default)
 Did advanced leave > 10% refusals?
  → YES: aggressive
  → Still refusing: nuclear
 ```
 ---
 ## Key Parameters
 | Parameter | Range | Default | Effect |
 |:----------|:------|:--------|:-------|
 | `--n-directions` | 1-32 | method-dependent | More directions = more complete removal, but higher damage risk |
 | `--regularization` | 0.0-1.0 | 0.1 | Higher = more conservative (less removal, less damage) |
 | `--refinement-passes` | 1-5 | 2 | More passes catch residual refusal, but diminishing returns |
 | `--quantization` | 4bit, 8bit | none | Reduces VRAM usage; quality impact minimal for extraction |
 | `--verify-sample-size` | 10-200 | 20 | More samples = more accurate refusal rate estimate |
 ---
 ## Troubleshooting
 | Problem | Likely Cause | Fix |
 |:--------|:-------------|:----|
 | Refusal rate > 20% | Too few directions | Increase `--n-directions`, try `aggressive` |
 | Refusal rate 5-20% | Residual refusal | Add `--refinement-passes 3`, try `--direction-method svd` |
 | Perplexity spike > 20% | Over-aggressive removal | Reduce `--n-directions`, increase `--regularization` |
 | Repetitive output | Weight matrix damage | Use `basic` with fewer directions, check norm preservation |
 | MoE model still refuses | Non-expert-aware method | Switch to `nuclear` |
 | Reasoning degraded | CoT directions damaged | Use `surgical` method |
 | OOM during extraction | Insufficient VRAM | Add `--quantization 4bit` and/or `--large-model` |
--- a/skills/mlops/inference/obliteratus/templates/abliteration-config.yaml
+++ b/skills/mlops/inference/obliteratus/templates/abliteration-config.yaml
--- a/skills/mlops/inference/obliteratus/templates/analysis-study.yaml
+++ b/skills/mlops/inference/obliteratus/templates/analysis-study.yaml
--- a/skills/mlops/inference/obliteratus/templates/batch-abliteration.yaml
+++ b/skills/mlops/inference/obliteratus/templates/batch-abliteration.yaml
--- a/skills/mlops/inference/outlines/SKILL.md
+++ b/skills/mlops/inference/outlines/SKILL.md
--- a/skills/mlops/inference/outlines/references/backends.md
+++ b/skills/mlops/inference/outlines/references/backends.md
--- a/skills/mlops/inference/outlines/references/examples.md
+++ b/skills/mlops/inference/outlines/references/examples.md
--- a/skills/mlops/inference/outlines/references/json_generation.md
+++ b/skills/mlops/inference/outlines/references/json_generation.md
--- a/skills/mlops/inference/tensorrt-llm/SKILL.md
+++ b/skills/mlops/inference/tensorrt-llm/SKILL.md
--- a/skills/mlops/inference/tensorrt-llm/references/multi-gpu.md
+++ b/skills/mlops/inference/tensorrt-llm/references/multi-gpu.md
--- a/skills/mlops/inference/tensorrt-llm/references/optimization.md
+++ b/skills/mlops/inference/tensorrt-llm/references/optimization.md
--- a/skills/mlops/inference/tensorrt-llm/references/serving.md
+++ b/skills/mlops/inference/tensorrt-llm/references/serving.md
--- a/skills/mlops/inference/vllm/SKILL.md
+++ b/skills/mlops/inference/vllm/SKILL.md
--- a/skills/mlops/inference/vllm/references/optimization.md
+++ b/skills/mlops/inference/vllm/references/optimization.md
--- a/skills/mlops/inference/vllm/references/quantization.md
+++ b/skills/mlops/inference/vllm/references/quantization.md
--- a/skills/mlops/inference/vllm/references/server-deployment.md
+++ b/skills/mlops/inference/vllm/references/server-deployment.md
--- a/skills/mlops/inference/vllm/references/troubleshooting.md
+++ b/skills/mlops/inference/vllm/references/troubleshooting.md
--- a/skills/mlops/models/DESCRIPTION.md
+++ b/skills/mlops/models/DESCRIPTION.md
@@ -0,0 +1,3 @@
 ---
 description: Specific model architectures and tools — computer vision (CLIP, SAM, Stable Diffusion), speech (Whisper), audio generation (AudioCraft), and multimodal models (LLaVA).
 ---
--- a/skills/mlops/models/audiocraft/SKILL.md
+++ b/skills/mlops/models/audiocraft/SKILL.md
--- a/skills/mlops/models/audiocraft/references/advanced-usage.md
+++ b/skills/mlops/models/audiocraft/references/advanced-usage.md
--- a/skills/mlops/models/audiocraft/references/troubleshooting.md
+++ b/skills/mlops/models/audiocraft/references/troubleshooting.md
--- a/skills/mlops/models/clip/SKILL.md
+++ b/skills/mlops/models/clip/SKILL.md
--- a/skills/mlops/models/clip/references/applications.md
+++ b/skills/mlops/models/clip/references/applications.md
--- a/skills/mlops/models/llava/SKILL.md
+++ b/skills/mlops/models/llava/SKILL.md
--- a/skills/mlops/models/llava/references/training.md
+++ b/skills/mlops/models/llava/references/training.md
--- a/skills/mlops/models/segment-anything/SKILL.md
+++ b/skills/mlops/models/segment-anything/SKILL.md
--- a/skills/mlops/models/segment-anything/references/advanced-usage.md
+++ b/skills/mlops/models/segment-anything/references/advanced-usage.md
--- a/skills/mlops/models/segment-anything/references/troubleshooting.md
+++ b/skills/mlops/models/segment-anything/references/troubleshooting.md
--- a/skills/mlops/models/stable-diffusion/SKILL.md
+++ b/skills/mlops/models/stable-diffusion/SKILL.md
--- a/skills/mlops/models/stable-diffusion/references/advanced-usage.md
+++ b/skills/mlops/models/stable-diffusion/references/advanced-usage.md
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
teknium1	a477118337	Merge PR #425 : feat(#417 ): add pokemon-player skill Authored by teyrebaz33. Closes #417. Adds pokemon-player skill for playing Pokemon via headless emulation using the pokemon-agent package (NousResearch/pokemon-agent).	2026-03-09 05:42:58 -07:00
teknium1	c6b75baad0	feat: find-nearby skill and Telegram location support Adds a 'find-nearby' skill for discovering nearby places using OpenStreetMap (Overpass + Nominatim). No API keys needed. Works with: - Coordinates (from Telegram location pins) - Addresses, cities, zip codes, landmarks (auto-geocoded) - Multiple place types (restaurant, cafe, bar, pharmacy, etc.) Returns names, distances, cuisine, hours, addresses, and Google Maps links (pin + directions). 184-line stdlib-only script. Also adds Telegram location message handling: - New MessageType.LOCATION in gateway base - Telegram adapter handles LOCATION and VENUE messages - Injects lat/lon coordinates into conversation context - Prompts agent to ask what the user wants nearby Inspired by PR #422 (reimplemented with simpler script and broader skill scope — addresses/cities/zips, not just Telegram coordinates).	2026-03-09 05:31:10 -07:00
teknium1	a7ad6f6d28	Merge: custom providers instant activation + model persistence	2026-03-09 05:08:01 -07:00
teknium1	1a2141d04d	fix: custom providers activate immediately, save model name Selecting a saved custom provider now switches instantly without probing /models — the model name is stored in the config entry as a complete profile (name + url + key + model). Changes: - custom_providers entries now include 'model' field - Selecting a saved provider with a model just activates it - Only probes /models if no model is saved (first-time setup) - Menu shows saved model name: 'Local (localhost:8000) — llama-70b' - Dedup on re-entry: still activates the model, just doesn't add a duplicate config entry (updates model name if changed)	2026-03-09 05:07:53 -07:00
teknium1	ff3f3169b2	Merge: auto-save custom endpoints + removal option	2026-03-09 04:58:27 -07:00
teknium1	f4580b6010	feat: auto-save custom endpoints + removal option When a user adds a custom endpoint via 'hermes model' → 'Custom endpoint', it now automatically saves to custom_providers in config.yaml so it persists and appears in the provider menu on subsequent runs. Deduplicates by base_url. Auto-generated names based on URL: http://localhost:8000/v1 → 'Local (localhost:8000)' https://xyz.runpod.ai/v1 → 'RunPod (xyz.runpod.ai)' https://api.example.com/v1 → 'Api.example.com' Also adds 'Remove a saved custom provider' option to the menu (only shown when custom providers exist) with a selection UI to pick which one to remove. Users can also manually edit custom_providers in config.yaml for full control over names and settings.	2026-03-09 04:58:20 -07:00
teknium1	7b63a787b3	Merge: named custom providers in hermes model	2026-03-09 03:45:26 -07:00
teknium1	069570d103	feat: support multiple named custom providers in `hermes model` Users with multiple local servers or custom endpoints can now define them all in config.yaml and switch between them from the model selection menu: custom_providers: - name: 'Local Llama 70B' base_url: 'http://localhost:8000/v1' api_key: 'not-needed' - name: 'RunPod vLLM' base_url: 'https://xyz.runpod.ai/v1' api_key: 'rp_xxxxx' These appear in `hermes model` provider selection alongside the built-in providers. When selected, the endpoint's /models API is probed to show available models in a selection menu. Previously only a single 'Custom endpoint' option existed, requiring manual URL entry each time you wanted to switch between local servers. Requested by @ZiarnoBobu on Twitter.	2026-03-09 03:45:17 -07:00
teknium1	0dafdcab86	Merge: skill reorganization + sub-category support - Sub-category support in prompt_builder.py (backwards-compatible) - Split mlops (40 skills) into 7 logical sub-categories - Merged 8 singleton categories into logical parents - Fixed 2 misplaced skills (code-review, ml-paper-writing)	2026-03-09 03:40:11 -07:00
Teknium	654e16187e	feat(mcp): add sampling support — server-initiated LLM requests (#753 ) Add MCP sampling/createMessage capability via SamplingHandler class. Text-only sampling + tool use in sampling with governance (rate limits, model whitelist, token caps, tool loop limits). Per-server audit metrics. Based on concept from PR #366 by eren-karakus0. Restructured as class-based design with bug fixes and tests using real MCP SDK types. 50 new tests, 2600 total passing.	2026-03-09 03:37:38 -07:00
teknium1	732c66b0f3	refactor: reorganize skills into sub-categories The skills directory was getting disorganized — mlops alone had 40 skills in a flat list, and 12 categories were singletons with just one skill each. Code change: - prompt_builder.py: Support sub-categories in skill scanner. skills/mlops/training/axolotl/SKILL.md now shows as category 'mlops/training' instead of just 'mlops'. Backwards-compatible with existing flat structure. Split mlops (40 skills) into 7 sub-categories: - mlops/training (12): accelerate, axolotl, flash-attention, grpo-rl-training, peft, pytorch-fsdp, pytorch-lightning, simpo, slime, torchtitan, trl-fine-tuning, unsloth - mlops/inference (8): gguf, guidance, instructor, llama-cpp, obliteratus, outlines, tensorrt-llm, vllm - mlops/models (6): audiocraft, clip, llava, segment-anything, stable-diffusion, whisper - mlops/vector-databases (4): chroma, faiss, pinecone, qdrant - mlops/evaluation (5): huggingface-tokenizers, lm-evaluation-harness, nemo-curator, saelens, weights-and-biases - mlops/cloud (2): lambda-labs, modal - mlops/research (1): dspy Merged singleton categories: - gifs → media (gif-search joins youtube-content) - music-creation → media (heartmula, songsee) - diagramming → creative (excalidraw joins ascii-art) - ocr-and-documents → productivity - domain → research (domain-intel) - feeds → research (blogwatcher) - market-data → research (polymarket) Fixed misplaced skills: - mlops/code-review → software-development (not ML-specific) - mlops/ml-paper-writing → research (academic writing) Added DESCRIPTION.md files for all new/updated categories.	2026-03-09 03:35:53 -07:00
teknium1	1f0944de21	fix: handle non-string content from OpenAI-compatible servers (#759 ) Some local LLM servers (llama-server, etc.) return message.content as a dict or list instead of a plain string. This caused AttributeError 'dict object has no attribute strip' on every API call. Normalizes content to string immediately after receiving the response: - dict: extracts 'text' or 'content' field, falls back to json.dumps - list: extracts text parts (OpenAI multimodal content format) - other: str() conversion Applied at the single point where response.choices[0].message is read in the main agent loop, so all downstream .strip()/.startswith()/[:100] operations work regardless of server implementation. Closes #759	2026-03-09 03:32:32 -07:00
teknium1	f1a1b58319	fix: hermes setup doesn't update provider when switching to OpenRouter When switching FROM Codex/Nous/custom TO OpenRouter via 'hermes setup', the old provider stayed active because setup only saved the API key but never updated config.yaml or auth.json. This caused resolve_provider() to keep returning the old provider (e.g. openai-codex) even after the user selected OpenRouter. Fix: the OpenRouter path in setup now deactivates any OAuth provider in auth.json and writes model.provider='openrouter' to config.yaml, matching what all other provider paths already do.	2026-03-09 03:14:22 -07:00
teknium1	c21d77ca08	Merge: OBLITERATUS skill v2.0 + unified gateway compression OBLITERATUS skill (PR #408 updated): - 9 CLI methods, 28 analysis modules, 116 model presets - Default method: advanced (multi-direction SVD, norm-preserving) - Live-tested: Qwen2.5-3B 75%→0% refusal, Qwen2.5-0.5B 60%→20% - References, templates, and real-world pitfalls included Gateway compression fix (PR #739): - Unified session hygiene with agent compression config - Uses model context length × compression.threshold from config.yaml - Removed hardcoded 100k/200-msg thresholds	2026-03-09 02:59:41 -07:00
teknium1	d6c710706f	docs: add real-world testing findings to OBLITERATUS skill Added pitfalls discovered during live abliteration testing: - Models < 1B have fragmented refusal, respond poorly (0.5B: 60%→20%) - Models 3B+ work much better (3B: 75%→0% with advanced defaults) - aggressive method can backfire on small models (made it worse) - Spectral certification RED is common even when refusal rate is 0% - Fixed torch property: total_mem → total_memory	2026-03-09 02:52:54 -07:00
teknium1	a6d3becd6a	feat: update OBLITERATUS skill to v2.0 — match current repo state Major updates to reflect the current OBLITERATUS codebase: - Change default recommendation from 'informed' (experimental) to 'advanced' (reliable, well-tested multi-direction SVD) - Add new CLI commands: tourney, recommend, strategies, report, aggregate, abliterate (alias) - Add --direction-method flag (diff_means, svd, leace) - Add strategies module (embedding/FFN ablation, head pruning, layer removal) - Add evaluation module with LM Eval Harness integration - Expand analysis modules from 15 to 28 - Add Apple Silicon (MLX) support - Add study presets (quick, jailbreak, knowledge, etc.) - Add --contribute, --verify-sample-size, --preset flags - Add complete CLI command reference table - Fix torch property name: total_mem -> total_memory (caught during live testing) Tested: Successfully abliterated Qwen2.5-0.5B-Instruct using 'advanced' method — refusal rate 0.4%, coherence 1.0, model responds without refusal to test prompts.	2026-03-09 02:39:03 -07:00
teknium1	3b67606c42	fix: custom endpoint provider shows as openrouter in gateway Three issues caused the gateway to display 'openrouter' instead of 'Custom endpoint' when users configured a custom OAI-compatible endpoint: 1. hermes setup: custom endpoint path saved OPENAI_BASE_URL and OPENAI_API_KEY to .env but never wrote model.provider to config.yaml. All other providers (Codex, z.ai, Kimi, etc.) call _update_config_for_provider() which sets this — custom was the only path that skipped it. Now writes model.provider='custom' and model.base_url to config.yaml. 2. hermes model: custom endpoint set model.provider='auto' in config.yaml. The CLI display had a hack to detect OPENAI_BASE_URL and override to 'custom', but the gateway didn't. Now sets model.provider='custom' directly. 3. gateway /model and /provider commands: defaulted to 'openrouter' and read config.yaml — which had no provider set. Added OPENAI_BASE_URL detection fallback (same pattern the CLI uses) as a defensive catch for existing users who set up before this fix.	2026-03-09 02:38:34 -07:00
teknium1	763c6d104d	fix: unify gateway session hygiene with agent compression config The gateway had a SEPARATE compression system ('session hygiene') with hardcoded thresholds (100k tokens / 200 messages) that were completely disconnected from the model's context length and the user's compression config in config.yaml. This caused premature auto-compression on Telegram/Discord — triggering at ~60k tokens (from the 200-message threshold) or inconsistent token counts. Changes: - Gateway hygiene now reads model name from config.yaml and uses get_model_context_length() to derive the actual context limit - Compression threshold comes from compression.threshold in config.yaml (default 0.85), same as the agent's ContextCompressor - Removed the message-count-based trigger (was redundant and caused false positives in tool-heavy sessions) - Removed the undocumented session_hygiene config section — the standard compression.* config now controls everything - Env var overrides (CONTEXT_COMPRESSION_THRESHOLD, CONTEXT_COMPRESSION_ENABLED) are respected - Warn threshold is now 95% of model context (was hardcoded 200k) - Updated tests to verify model-aware thresholds, scaling across models, and that message count alone no longer triggers compression For claude-opus-4.6 (200k context) at 85% threshold: gateway hygiene now triggers at 170k tokens instead of the old 100k.	2026-03-08 20:08:02 -07:00
teknium1	37752ff1ac	feat: bell_on_complete — terminal bell when agent finishes Adds a simple config option to play the terminal bell (\a) when the agent finishes a response. Useful for long-running tasks — switch to another window and your terminal will ding when done. Works over SSH since the bell character propagates through the connection. Most terminal emulators can be configured to flash the taskbar, play a sound, or show a visual indicator on bell. Config (default: off): display: bell_on_complete: true Closes #318	2026-03-08 19:41:17 -07:00
teyrebaz33	99d9ea1464	feat(#417 ): add pokemon-player skill Thin skill file that wraps the pokemon-agent pip package. All emulation logic lives in NousResearch/pokemon-agent. - Gameplay loop: observe/orient/decide/act/verify/record/save - Battle strategy with Gen 1 quirks and type chart - Memory conventions with PKM: prefix - Progression milestones (all 8 badges + Elite Four) - Session save/load lifecycle - Dashboard reference (localhost:8765/dashboard)	2026-03-05 14:31:29 +03:00