mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 06:51:16 +08:00
fix(skills-guard): allow agent-created dangerous verdicts without confirmation
The security scanner is meant to protect against hostile external skills pulled from GitHub via hermes skills install — trusted/community policies block or ask on dangerous verdicts accordingly. But agent-created skills (from skill_manage) run in the same process as the agent that wrote them. The agent can already execute the same code paths via terminal() with no gate, so the ask-on-dangerous policy adds friction without meaningful security. Concrete trigger: an agent writing a PR-review skill that describes cache-busting or persistence semantics in prose gets blocked because those words appear in the patterns list. The skill isn't actually doing anything dangerous — it's just documenting what reviewers should watch for in other PRs. Change: agent-created dangerous verdict maps to 'allow' instead of 'ask'. External hub installs (trusted/community) keep their stricter policies intact. Tests updated: renamed test_dangerous_agent_created_asks → test_dangerous_agent_created_allowed; renamed force-override test and updated assertion since force is now a no-op for agent-created (the allow branch returns first).
This commit is contained in:
@@ -174,20 +174,27 @@ class TestShouldAllowInstall:
|
||||
assert allowed is True
|
||||
assert "agent-created" in reason
|
||||
|
||||
def test_dangerous_agent_created_asks(self):
|
||||
"""Agent-created skills with dangerous verdict return None (ask for confirmation)."""
|
||||
def test_dangerous_agent_created_allowed(self):
|
||||
"""Agent-created skills bypass verdict gating — agent can already
|
||||
execute the same code via terminal(), so skill_manage allows all
|
||||
verdicts. This prevents friction when the agent writes skills that
|
||||
mention risky keywords in prose (e.g. describing cache-busting or
|
||||
persistence semantics in a PR-review skill)."""
|
||||
f = [Finding("env_exfil_curl", "critical", "exfiltration", "SKILL.md", 1, "curl $TOKEN", "exfiltration")]
|
||||
allowed, reason = should_allow_install(self._result("agent-created", "dangerous", f))
|
||||
assert allowed is None
|
||||
assert "Requires confirmation" in reason
|
||||
assert allowed is True
|
||||
assert "agent-created" in reason
|
||||
|
||||
def test_force_overrides_dangerous_for_agent_created(self):
|
||||
def test_force_noop_for_agent_created_dangerous(self):
|
||||
"""With agent-created dangerous mapped to 'allow', force becomes a
|
||||
no-op — the allow branch returns first. Force still works for any
|
||||
trust level that maps to block (community/trusted)."""
|
||||
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
|
||||
allowed, reason = should_allow_install(
|
||||
self._result("agent-created", "dangerous", f), force=True
|
||||
)
|
||||
assert allowed is True
|
||||
assert "Force-installed" in reason
|
||||
assert "agent-created" in reason
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -43,7 +43,11 @@ INSTALL_POLICY = {
|
||||
"builtin": ("allow", "allow", "allow"),
|
||||
"trusted": ("allow", "allow", "block"),
|
||||
"community": ("allow", "block", "block"),
|
||||
"agent-created": ("allow", "allow", "ask"),
|
||||
# Agent-created skills run in the same process as the agent that
|
||||
# wrote them — the agent could already execute the same code via
|
||||
# terminal(), so a dangerous-pattern gate on skill_manage adds
|
||||
# friction without meaningful security. Allow all verdicts.
|
||||
"agent-created": ("allow", "allow", "allow"),
|
||||
}
|
||||
|
||||
VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}
|
||||
|
||||
Reference in New Issue
Block a user