fix(skills): resolve skills.sh alias installs

Harden the skills.sh hub adapter by parsing skill detail pages when search slugs do not map cleanly onto GitHub skill folder names. This adds detail-page resolution for alias-style skills, improves inspect metadata from the page itself, and covers the behavior with regression tests plus live smoke validation for json-render-react.
2026-04-28 06:51:16 +08:00 · 2026-03-14 06:50:25 -07:00
parent 483a0b5233
commit 02c307b004
3 changed files with 243 additions and 33 deletions
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@@ -920,7 +920,7 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "search":
        if not args:
-            c.print("[bold red]Usage:[/] /skills search <query> [--source github] [--limit N]\n")
+            c.print("[bold red]Usage:[/] /skills search <query> [--source skills-sh|github|official] [--limit N]\n")
            return
        source = "all"
        limit = 10
--- a/tests/tools/test_skills_hub.py
+++ b/tests/tools/test_skills_hub.py
@@ -211,6 +211,79 @@ class TestSkillsShSource:
        assert meta.identifier == "skills-sh/vercel-labs/agent-skills/vercel-react-best-practices"
        assert mock_list_skills.called

+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    @patch.object(GitHubSource, "_list_skills_in_repo")
+    @patch.object(GitHubSource, "inspect")
+    def test_inspect_uses_detail_page_to_resolve_alias_skill(self, mock_inspect, mock_list_skills, mock_get, _mock_read_cache, _mock_write_cache):
+        resolved = SkillMeta(
+            name="react",
+            description="React renderer",
+            source="github",
+            identifier="vercel-labs/json-render/skills/react",
+            trust_level="community",
+            repo="vercel-labs/json-render",
+            path="skills/react",
+        )
+        mock_inspect.side_effect = lambda identifier: resolved if identifier == resolved.identifier else None
+        mock_list_skills.return_value = [resolved]
+        mock_get.return_value = MagicMock(
+            status_code=200,
+            text='''
+                <h1>json-render-react</h1>
+                <code>$ npx skills add https://github.com/vercel-labs/json-render --skill json-render-react</code>
+                <div class="prose"><h1>@json-render/react</h1><p>React renderer.</p></div>
+            ''',
+        )
+
+        meta = self._source().inspect("skills-sh/vercel-labs/json-render/json-render-react")
+
+        assert meta is not None
+        assert meta.identifier == "skills-sh/vercel-labs/json-render/json-render-react"
+        assert meta.path == "skills/react"
+        assert mock_get.called
+
+    @patch("tools.skills_hub._write_index_cache")
+    @patch("tools.skills_hub._read_index_cache", return_value=None)
+    @patch("tools.skills_hub.httpx.get")
+    @patch.object(GitHubSource, "_list_skills_in_repo")
+    @patch.object(GitHubSource, "fetch")
+    def test_fetch_uses_detail_page_to_resolve_alias_skill(self, mock_fetch, mock_list_skills, mock_get, _mock_read_cache, _mock_write_cache):
+        resolved_meta = SkillMeta(
+            name="react",
+            description="React renderer",
+            source="github",
+            identifier="vercel-labs/json-render/skills/react",
+            trust_level="community",
+            repo="vercel-labs/json-render",
+            path="skills/react",
+        )
+        resolved_bundle = SkillBundle(
+            name="react",
+            files={"SKILL.md": "# react"},
+            source="github",
+            identifier="vercel-labs/json-render/skills/react",
+            trust_level="community",
+        )
+        mock_fetch.side_effect = lambda identifier: resolved_bundle if identifier == resolved_bundle.identifier else None
+        mock_list_skills.return_value = [resolved_meta]
+        mock_get.return_value = MagicMock(
+            status_code=200,
+            text='''
+                <h1>json-render-react</h1>
+                <code>$ npx skills add https://github.com/vercel-labs/json-render --skill json-render-react</code>
+                <div class="prose"><h1>@json-render/react</h1><p>React renderer.</p></div>
+            ''',
+        )
+
+        bundle = self._source().fetch("skills-sh/vercel-labs/json-render/json-render-react")
+
+        assert bundle is not None
+        assert bundle.identifier == "skills-sh/vercel-labs/json-render/json-render-react"
+        assert bundle.files["SKILL.md"] == "# react"
+        assert mock_get.called
+

 class TestCreateSourceRouter:
    def test_includes_skills_sh_source(self):
--- a/tools/skills_hub.py
+++ b/tools/skills_hub.py
@@ -507,6 +507,21 @@ class SkillsShSource(SkillSource):
    BASE_URL = "https://skills.sh"
    SEARCH_URL = f"{BASE_URL}/api/search"
    _SKILL_LINK_RE = re.compile(r'href=["\']/(?P<id>(?!agents/|_next/|api/)[^"\'/]+/[^"\'/]+/[^"\'/]+)["\']')
+    _INSTALL_CMD_RE = re.compile(
+        r'npx\s+skills\s+add\s+(?P<repo>https?://github\.com/[^\s<]+|[^\s<]+)'
+        r'(?:\s+--skill\s+(?P<skill>[^\s<]+))?',
+        re.IGNORECASE,
+    )
+    _PAGE_H1_RE = re.compile(r'<h1[^>]*>(?P<title>.*?)</h1>', re.IGNORECASE | re.DOTALL)
+    _PROSE_H1_RE = re.compile(
+        r'<div[^>]*class=["\'][^"\']*prose[^"\']*["\'][^>]*>.*?<h1[^>]*>(?P<title>.*?)</h1>',
+        re.IGNORECASE | re.DOTALL,
+    )
+    _PROSE_P_RE = re.compile(
+        r'<div[^>]*class=["\'][^"\']*prose[^"\']*["\'][^>]*>.*?<p[^>]*>(?P<body>.*?)</p>',
+        re.IGNORECASE | re.DOTALL,
+    )
+    _WEEKLY_INSTALLS_RE = re.compile(r'Weekly Installs.*?children\\":\\"(?P<count>[0-9.,Kk]+)\\"', re.DOTALL)

    def __init__(self, auth: GitHubAuth):
        self.auth = auth
@@ -561,7 +576,8 @@ class SkillsShSource(SkillSource):
                bundle.identifier = self._wrap_identifier(canonical)
                return bundle

-        resolved = self._discover_identifier(canonical)
+        detail = self._fetch_detail_page(canonical)
+        resolved = self._discover_identifier(canonical, detail=detail)
        if resolved:
            bundle = self.github.fetch(resolved)
            if bundle:
@@ -572,22 +588,19 @@ class SkillsShSource(SkillSource):

    def inspect(self, identifier: str) -> Optional[SkillMeta]:
        canonical = self._normalize_identifier(identifier)
+        detail: Optional[dict] = None
        for candidate in self._candidate_identifiers(canonical):
            meta = self.github.inspect(candidate)
            if meta:
-                meta.source = "skills.sh"
-                meta.identifier = self._wrap_identifier(canonical)
-                meta.trust_level = self.trust_level_for(canonical)
-                return meta
+                detail = self._fetch_detail_page(canonical)
+                return self._finalize_inspect_meta(meta, canonical, detail)

-        resolved = self._discover_identifier(canonical)
+        detail = self._fetch_detail_page(canonical)
+        resolved = self._discover_identifier(canonical, detail=detail)
        if resolved:
            meta = self.github.inspect(resolved)
            if meta:
-                meta.source = "skills.sh"
-                meta.identifier = self._wrap_identifier(canonical)
-                meta.trust_level = self.trust_level_for(canonical)
-                return meta
+                return self._finalize_inspect_meta(meta, canonical, detail)
        return None

    def _featured_skills(self, limit: int) -> List[SkillMeta]:
@@ -661,45 +674,169 @@ class SkillsShSource(SkillSource):
            path=skill_path,
        )

-    def _discover_identifier(self, identifier: str) -> Optional[str]:
+    def _fetch_detail_page(self, identifier: str) -> Optional[dict]:
+        cache_key = f"skills_sh_detail_{hashlib.md5(identifier.encode()).hexdigest()}"
+        cached = _read_index_cache(cache_key)
+        if isinstance(cached, dict):
+            return cached
+
+        try:
+            resp = httpx.get(f"{self.BASE_URL}/{identifier}", timeout=20)
+            if resp.status_code != 200:
+                return None
+        except httpx.HTTPError:
+            return None
+
+        detail = self._parse_detail_page(identifier, resp.text)
+        if detail:
+            _write_index_cache(cache_key, detail)
+        return detail
+
+    def _parse_detail_page(self, identifier: str, html: str) -> Optional[dict]:
        parts = identifier.split("/", 2)
        if len(parts) < 3:
            return None

-        repo = f"{parts[0]}/{parts[1]}"
+        default_repo = f"{parts[0]}/{parts[1]}"
        skill_token = parts[2]
+        repo = default_repo
+        install_skill = skill_token
+
+        install_match = self._INSTALL_CMD_RE.search(html)
+        if install_match:
+            repo_value = (install_match.group("repo") or "").strip()
+            install_skill = (install_match.group("skill") or install_skill).strip()
+            repo = self._extract_repo_slug(repo_value) or repo
+
+        page_title = self._extract_first_match(self._PAGE_H1_RE, html)
+        body_title = self._extract_first_match(self._PROSE_H1_RE, html)
+        body_summary = self._extract_first_match(self._PROSE_P_RE, html)
+        weekly_installs = self._extract_weekly_installs(html)
+
+        return {
+            "repo": repo,
+            "install_skill": install_skill,
+            "page_title": page_title,
+            "body_title": body_title,
+            "body_summary": body_summary,
+            "weekly_installs": weekly_installs,
+        }
+
+    def _discover_identifier(self, identifier: str, detail: Optional[dict] = None) -> Optional[str]:
+        parts = identifier.split("/", 2)
+        if len(parts) < 3:
+            return None
+
+        default_repo = f"{parts[0]}/{parts[1]}"
+        repo = detail.get("repo", default_repo) if isinstance(detail, dict) else default_repo
+        skill_token = parts[2]
+        tokens = [skill_token]
+        if isinstance(detail, dict):
+            tokens.extend([
+                detail.get("install_skill", ""),
+                detail.get("page_title", ""),
+                detail.get("body_title", ""),
+            ])
+
        for base_path in ("skills/", ".agents/skills/", ".claude/skills/"):
            try:
                skills = self.github._list_skills_in_repo(repo, base_path)
            except Exception:
                continue
            for meta in skills:
-                if self._matches_skill_token(meta, skill_token):
+                if self._matches_skill_tokens(meta, tokens):
                    return meta.identifier
        return None

-    @staticmethod
-    def _matches_skill_token(meta: SkillMeta, skill_token: str) -> bool:
-        target = skill_token.strip("/").lower()
-        target_base = target.split("/")[-1]
+    def _finalize_inspect_meta(self, meta: SkillMeta, canonical: str, detail: Optional[dict]) -> SkillMeta:
+        meta.source = "skills.sh"
+        meta.identifier = self._wrap_identifier(canonical)
+        meta.trust_level = self.trust_level_for(canonical)

-        def variants(value: Optional[str]) -> set[str]:
-            if not value:
-                return set()
-            normalized = value.strip("/").lower()
-            base = normalized.split("/")[-1]
-            return {
-                normalized,
-                base,
-                normalized.replace("_", "-"),
-                base.replace("_", "-"),
-            }
+        if isinstance(detail, dict):
+            body_summary = detail.get("body_summary")
+            weekly_installs = detail.get("weekly_installs")
+            if body_summary:
+                meta.description = body_summary
+            elif meta.description and weekly_installs:
+                meta.description = f"{meta.description} · {weekly_installs} weekly installs on skills.sh"
+        return meta

+    @classmethod
+    def _matches_skill_tokens(cls, meta: SkillMeta, skill_tokens: List[str]) -> bool:
        candidates = set()
-        candidates.update(variants(meta.name))
-        candidates.update(variants(meta.path))
-        candidates.update(variants(meta.identifier.split("/", 2)[-1] if meta.identifier else None))
-        return target in candidates or target_base in candidates
+        candidates.update(cls._token_variants(meta.name))
+        candidates.update(cls._token_variants(meta.path))
+        candidates.update(cls._token_variants(meta.identifier.split("/", 2)[-1] if meta.identifier else None))
+
+        for token in skill_tokens:
+            variants = cls._token_variants(token)
+            if variants & candidates:
+                return True
+        return False
+
+    @staticmethod
+    def _token_variants(value: Optional[str]) -> set[str]:
+        if not value:
+            return set()
+
+        plain = SkillsShSource._strip_html(str(value)).strip().strip("/").lower()
+        if not plain:
+            return set()
+
+        base = plain.split("/")[-1]
+        sanitized = re.sub(r'[^a-z0-9/_-]+', '-', plain).strip('-')
+        sanitized_base = sanitized.split("/")[-1] if sanitized else ""
+        slash_tail = plain.split("/")[-1]
+        slash_tail_clean = slash_tail.lstrip('@')
+        slash_tail_clean = slash_tail_clean.split('/')[-1]
+
+        variants = {
+            plain,
+            plain.replace("_", "-"),
+            plain.replace("/", "-"),
+            base,
+            base.replace("_", "-"),
+            base.replace("/", "-"),
+            sanitized,
+            sanitized.replace("/", "-") if sanitized else "",
+            sanitized_base,
+            slash_tail_clean,
+            slash_tail_clean.replace("_", "-"),
+        }
+        return {v for v in variants if v}
+
+    @staticmethod
+    def _extract_repo_slug(repo_value: str) -> Optional[str]:
+        repo_value = repo_value.strip()
+        if repo_value.startswith("https://github.com/"):
+            repo_value = repo_value[len("https://github.com/"):]
+        repo_value = repo_value.strip("/")
+        parts = repo_value.split("/")
+        if len(parts) >= 2:
+            return f"{parts[0]}/{parts[1]}"
+        return None
+
+    @staticmethod
+    def _extract_first_match(pattern: re.Pattern, text: str) -> Optional[str]:
+        match = pattern.search(text)
+        if not match:
+            return None
+        value = next((group for group in match.groups() if group), None)
+        if value is None:
+            return None
+        return SkillsShSource._strip_html(value).strip() or None
+
+    @staticmethod
+    def _extract_weekly_installs(html: str) -> Optional[str]:
+        match = SkillsShSource._WEEKLY_INSTALLS_RE.search(html)
+        if not match:
+            return None
+        return match.group("count")
+
+    @staticmethod
+    def _strip_html(value: str) -> str:
+        return re.sub(r'<[^>]+>', '', value)

    @staticmethod
    def _normalize_identifier(identifier: str) -> str: