fix(skills_guard): agent-created dangerous skills ask instead of block (#2446)

fix(skills_guard): agent-created dangerous skills ask instead of block
2026-03-22 03:56:30 -07:00
parent 887e8a8d84 0b370f2dd9
commit 5c8d7d5d6f
3 changed files with 26 additions and 8 deletions
--- a/tests/tools/test_skills_guard.py
+++ b/tests/tools/test_skills_guard.py
@@ -167,12 +167,12 @@ class TestShouldAllowInstall:
        assert allowed is True
        assert "agent-created" in reason

-    def test_dangerous_agent_created_blocked(self):
-        """Agent-created skills with dangerous verdict (critical findings) stay blocked."""
+    def test_dangerous_agent_created_asks(self):
+        """Agent-created skills with dangerous verdict return None (ask for confirmation)."""
        f = [Finding("env_exfil_curl", "critical", "exfiltration", "SKILL.md", 1, "curl $TOKEN", "exfiltration")]
        allowed, reason = should_allow_install(self._result("agent-created", "dangerous", f))
-        assert allowed is False
-        assert "Blocked" in reason
+        assert allowed is None
+        assert "Requires confirmation" in reason

    def test_force_overrides_dangerous_for_agent_created(self):
        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
--- a/tools/skill_manager_tool.py
+++ b/tools/skill_manager_tool.py
@@ -59,9 +59,15 @@ def _security_scan_skill(skill_dir: Path) -> Optional[str]:
    try:
        result = scan_skill(skill_dir, source="agent-created")
        allowed, reason = should_allow_install(result)
-        if not allowed:
+        if allowed is False:
            report = format_scan_report(result)
            return f"Security scan blocked this skill ({reason}):\n{report}"
+        if allowed is None:
+            # "ask" — allow but include the warning so the user sees the findings
+            report = format_scan_report(result)
+            logger.warning("Agent-created skill has security findings: %s", reason)
+            # Don't block — return None to allow, but log the warning
+            return None
    except Exception as e:
        logger.warning("Security scan failed for %s: %s", skill_dir, e, exc_info=True)
    return None
--- a/tools/skills_guard.py
+++ b/tools/skills_guard.py
@@ -43,7 +43,7 @@ INSTALL_POLICY = {
    "builtin":       ("allow",  "allow",   "allow"),
    "trusted":       ("allow",  "allow",   "block"),
    "community":     ("allow",  "block",   "block"),
-    "agent-created": ("allow",  "allow",   "block"),
+    "agent-created": ("allow",  "allow",   "ask"),
 }

 VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}
@@ -659,10 +659,17 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,

    if force:
        return True, (
-            f"Force-installed despite blocked {result.verdict} verdict "
+            f"Force-installed despite {result.verdict} verdict "
            f"({len(result.findings)} findings)"
        )

+    if decision == "ask":
+        # Return None to signal "needs user confirmation"
+        return None, (
+            f"Requires confirmation ({result.trust_level} source + {result.verdict} verdict, "
+            f"{len(result.findings)} findings)"
+        )
+
    return False, (
        f"Blocked ({result.trust_level} source + {result.verdict} verdict, "
        f"{len(result.findings)} findings). Use --force to override."
@@ -694,7 +701,12 @@ def format_scan_report(result: ScanResult) -> str:
        lines.append("")

    allowed, reason = should_allow_install(result)
-    status = "ALLOWED" if allowed else "BLOCKED"
+    if allowed is True:
+        status = "ALLOWED"
+    elif allowed is None:
+        status = "NEEDS CONFIRMATION"
+    else:
+        status = "BLOCKED"
    lines.append(f"Decision: {status} — {reason}")

    return "\n".join(lines)