Compare commits

...

2 Commits

Author SHA1 Message Date
Teknium
ec9645234d test(skills): add bytes-vs-str equivalence and on-disk hash parity tests
Follow-up on #9925 cherry-pick adding two additional tests:
- bytes content hashes identically to its str-decoded form
- mixed bytes+str bundle hash equals the on-disk content_hash from
  skills_guard (the production invariant used to detect drift)

Also map dodofun@126.com and 1615063567@qq.com in AUTHOR_MAP so the
CI contributor check passes for the cherry-picked commit.

Co-authored-by: LeonSGP43 <cine.dreamer.one@gmail.com>
Co-authored-by: zhao0112 <1615063567@qq.com>
2026-05-03 12:10:20 -07:00
dh
5f58a9957a skills-hub: hash binary skill bundle files correctly 2026-05-03 12:09:43 -07:00
3 changed files with 70 additions and 1 deletions

View File

@@ -621,6 +621,8 @@ AUTHOR_MAP = {
"2114364329@qq.com": "cuyua9",
"2557058999@qq.com": "Disaster-Terminator",
"cine.dreamer.one@gmail.com": "LeonSGP43",
"dodofun@126.com": "colorcross",
"1615063567@qq.com": "zhao0112",
"leozeli@qq.com": "leozeli",
"linlehao@cuhk.edu.cn": "LehaoLin",
"liutong@isacas.ac.cn": "I3eg1nner",

View File

@@ -901,6 +901,69 @@ class TestCheckForSkillUpdates:
assert bundle_content_hash(bundle) == content_hash(skill_dir)
def test_bundle_content_hash_accepts_binary_files(self):
bundle = SkillBundle(
name="demo-binary-skill",
files={
"SKILL.md": "# Demo\n",
"assets/logo.png": b"\x89PNG\r\n\x1a\nbinary",
},
source="github",
identifier="owner/repo/demo-binary-skill",
trust_level="community",
)
digest = bundle_content_hash(bundle)
assert digest.startswith("sha256:")
def test_bundle_content_hash_bytes_matches_str_equivalent(self):
"""Bytes content must hash identically to its str-decoded form."""
text_bundle = SkillBundle(
name="demo-skill",
files={
"SKILL.md": "same content",
"references/checklist.md": "- [ ] security\n",
},
source="github",
identifier="owner/repo/demo-skill",
trust_level="community",
)
bytes_bundle = SkillBundle(
name="demo-skill",
files={
"SKILL.md": b"same content",
"references/checklist.md": b"- [ ] security\n",
},
source="github",
identifier="owner/repo/demo-skill",
trust_level="community",
)
assert bundle_content_hash(bytes_bundle) == bundle_content_hash(text_bundle)
def test_bundle_content_hash_mixed_matches_on_disk(self, tmp_path):
"""In-memory bundle hash must equal on-disk content_hash for mixed bytes+str."""
from tools.skills_guard import content_hash
bundle = SkillBundle(
name="demo-skill",
files={
"SKILL.md": b"# Demo Skill\n",
"references/checklist.md": "- [ ] security\n",
},
source="github",
identifier="owner/repo/demo-skill",
trust_level="community",
)
skill_dir = tmp_path / "demo-skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_bytes(b"# Demo Skill\n")
(skill_dir / "references").mkdir()
(skill_dir / "references" / "checklist.md").write_text("- [ ] security\n")
assert bundle_content_hash(bundle) == content_hash(skill_dir)
def test_reports_update_when_remote_hash_differs(self):
lock = MagicMock()
lock.list_installed.return_value = [{

View File

@@ -2801,7 +2801,11 @@ def bundle_content_hash(bundle: SkillBundle) -> str:
"""Compute a deterministic hash for an in-memory skill bundle."""
h = hashlib.sha256()
for rel_path in sorted(bundle.files):
h.update(bundle.files[rel_path].encode("utf-8"))
content = bundle.files[rel_path]
if isinstance(content, bytes):
h.update(content)
else:
h.update(content.encode("utf-8"))
return f"sha256:{h.hexdigest()[:16]}"