fix(skills_guard): agent-created dangerous skills ask instead of block (#2446)

fix(skills_guard): agent-created dangerous skills ask instead of block
This commit is contained in:
Teknium
2026-03-22 03:56:30 -07:00
committed by GitHub
3 changed files with 26 additions and 8 deletions

View File

@@ -167,12 +167,12 @@ class TestShouldAllowInstall:
assert allowed is True
assert "agent-created" in reason
def test_dangerous_agent_created_blocked(self):
"""Agent-created skills with dangerous verdict (critical findings) stay blocked."""
def test_dangerous_agent_created_asks(self):
"""Agent-created skills with dangerous verdict return None (ask for confirmation)."""
f = [Finding("env_exfil_curl", "critical", "exfiltration", "SKILL.md", 1, "curl $TOKEN", "exfiltration")]
allowed, reason = should_allow_install(self._result("agent-created", "dangerous", f))
assert allowed is False
assert "Blocked" in reason
assert allowed is None
assert "Requires confirmation" in reason
def test_force_overrides_dangerous_for_agent_created(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]

View File

@@ -59,9 +59,15 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]:
try:
result = scan_skill(skill_dir, source="agent-created")
allowed, reason = should_allow_install(result)
if not allowed:
if allowed is False:
report = format_scan_report(result)
return f"Security scan blocked this skill ({reason}):\n{report}"
if allowed is None:
# "ask" — allow but include the warning so the user sees the findings
report = format_scan_report(result)
logger.warning("Agent-created skill has security findings: %s", reason)
# Don't block — return None to allow, but log the warning
return None
except Exception as e:
logger.warning("Security scan failed for %s: %s", skill_dir, e, exc_info=True)
return None

View File

@@ -43,7 +43,7 @@ INSTALL_POLICY = {
"builtin": ("allow", "allow", "allow"),
"trusted": ("allow", "allow", "block"),
"community": ("allow", "block", "block"),
"agent-created": ("allow", "allow", "block"),
"agent-created": ("allow", "allow", "ask"),
}
VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}
@@ -659,10 +659,17 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
if force:
return True, (
f"Force-installed despite blocked {result.verdict} verdict "
f"Force-installed despite {result.verdict} verdict "
f"({len(result.findings)} findings)"
)
if decision == "ask":
# Return None to signal "needs user confirmation"
return None, (
f"Requires confirmation ({result.trust_level} source + {result.verdict} verdict, "
f"{len(result.findings)} findings)"
)
return False, (
f"Blocked ({result.trust_level} source + {result.verdict} verdict, "
f"{len(result.findings)} findings). Use --force to override."
@@ -694,7 +701,12 @@ def format_scan_report(result: ScanResult) -> str:
lines.append("")
allowed, reason = should_allow_install(result)
status = "ALLOWED" if allowed else "BLOCKED"
if allowed is True:
status = "ALLOWED"
elif allowed is None:
status = "NEEDS CONFIRMATION"
else:
status = "BLOCKED"
lines.append(f"Decision: {status}{reason}")
return "\n".join(lines)