fix(skills): honor policy table for dangerous verdicts

Salvaged from PR #1007 by stablegenius49. - let INSTALL_POLICY decide dangerous verdict handling for builtin skills - allow --force to override blocked dangerous decisions for trusted and community sources - accept --yes / -y as aliases for --force in /skills install - update regression tests to match the intended policy precedence
2026-03-14 11:27:02 -07:00
parent 6d8286f396
commit 3325e51e53
5 changed files with 40 additions and 49 deletions
--- a/hermes_cli/skills_hub.py
+++ b/hermes_cli/skills_hub.py
@@ -1050,11 +1050,11 @@ def handle_skills_slash(cmd: str, console: Optional[Console] = None) -> None:

    elif action == "install":
        if not args:
-            c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force]\n")
+            c.print("[bold red]Usage:[/] /skills install <identifier> [--category <cat>] [--force|--yes]\n")
            return
        identifier = args[0]
        category = ""
-        force = "--force" in args
+        force = any(flag in args for flag in ("--force", "--yes", "-y"))
        for i, a in enumerate(args):
            if a == "--category" and i + 1 < len(args):
                category = args[i + 1]
--- a/tests/hermes_cli/test_skills_hub.py
+++ b/tests/hermes_cli/test_skills_hub.py
@@ -3,7 +3,7 @@ from io import StringIO
 import pytest
 from rich.console import Console

-from hermes_cli.skills_hub import do_check, do_list, do_update
+from hermes_cli.skills_hub import do_check, do_list, do_update, handle_skills_slash


 class _DummyLockFile:
--- a/tests/tools/test_force_dangerous_override.py
+++ b/tests/tools/test_force_dangerous_override.py
@@ -1,11 +1,8 @@
-"""Tests for the --force flag dangerous verdict bypass fix in skills_guard.py.
+"""Regression tests for skills guard policy precedence.

-Regression test: the old code had `if result.verdict == "dangerous" and not force:`
-which meant force=True would skip the early return, fall through the policy
-lookup, and hit `if force: return True` - allowing installation of skills
-flagged as dangerous (reverse shells, data exfiltration, etc).
-
-The docstring explicitly states: "never overrides dangerous".
+Official/builtin skills should follow the INSTALL_POLICY table even when their
+scan verdict is dangerous, and --force should override blocked verdicts for
+non-builtin sources.
 """


@@ -44,10 +41,6 @@ def _new_should_allow(verdict, trust_level, force):
    }
    VERDICT_INDEX = {"safe": 0, "caution": 1, "dangerous": 2}

-    # Fixed: no `and not force` - dangerous is always blocked
-    if verdict == "dangerous":
-        return False
-
    policy = INSTALL_POLICY.get(trust_level, INSTALL_POLICY["community"])
    vi = VERDICT_INDEX.get(verdict, 2)
    decision = policy[vi]
@@ -61,35 +54,28 @@ def _new_should_allow(verdict, trust_level, force):
    return False


-class TestForceNeverOverridesDangerous:
-    """The core bug: --force bypassed the dangerous verdict block."""
+class TestPolicyPrecedenceForDangerousVerdicts:
+    def test_builtin_dangerous_is_allowed_by_policy(self):
+        assert _new_should_allow("dangerous", "builtin", force=False) is True

-    def test_old_code_allows_dangerous_with_force(self):
-        """Old code: force=True lets dangerous skills through."""
-        assert _old_should_allow("dangerous", "community", force=True) is True
+    def test_trusted_dangerous_is_blocked_without_force(self):
+        assert _new_should_allow("dangerous", "trusted", force=False) is False

-    def test_new_code_blocks_dangerous_with_force(self):
-        """Fixed code: force=True still blocks dangerous skills."""
-        assert _new_should_allow("dangerous", "community", force=True) is False
+    def test_force_overrides_dangerous_for_community(self):
+        assert _new_should_allow("dangerous", "community", force=True) is True

-    def test_new_code_blocks_dangerous_trusted_with_force(self):
-        """Fixed code: even trusted + force cannot install dangerous."""
-        assert _new_should_allow("dangerous", "trusted", force=True) is False
+    def test_force_overrides_dangerous_for_trusted(self):
+        assert _new_should_allow("dangerous", "trusted", force=True) is True

    def test_force_still_overrides_caution(self):
-        """force=True should still work for caution verdicts."""
        assert _new_should_allow("caution", "community", force=True) is True

    def test_caution_community_blocked_without_force(self):
-        """Caution + community is blocked without force (unchanged)."""
        assert _new_should_allow("caution", "community", force=False) is False

    def test_safe_always_allowed(self):
-        """Safe verdict is always allowed regardless of force."""
        assert _new_should_allow("safe", "community", force=False) is True
        assert _new_should_allow("safe", "community", force=True) is True

-    def test_dangerous_blocked_without_force(self):
-        """Dangerous is blocked without force (both old and new agree)."""
-        assert _old_should_allow("dangerous", "community", force=False) is False
-        assert _new_should_allow("dangerous", "community", force=False) is False
+    def test_old_code_happened_to_allow_forced_dangerous_community(self):
+        assert _old_should_allow("dangerous", "community", force=True) is True
--- a/tests/tools/test_skills_guard.py
+++ b/tests/tools/test_skills_guard.py
@@ -46,9 +46,9 @@ from tools.skills_guard import (


 class TestResolveTrustLevel:
-    def test_builtin_not_exposed(self):
-        # builtin is only used internally, not resolved from source string
-        assert _resolve_trust_level("openai/skills") == "trusted"
+    def test_official_sources_resolve_to_builtin(self):
+        assert _resolve_trust_level("official") == "builtin"
+        assert _resolve_trust_level("official/email/agentmail") == "builtin"

    def test_trusted_repos(self):
        assert _resolve_trust_level("openai/skills") == "trusted"
@@ -116,11 +116,17 @@ class TestShouldAllowInstall:
        allowed, _ = should_allow_install(self._result("trusted", "caution", f))
        assert allowed is True

-    def test_dangerous_blocked_even_trusted(self):
+    def test_trusted_dangerous_blocked_without_force(self):
        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
        allowed, _ = should_allow_install(self._result("trusted", "dangerous", f))
        assert allowed is False

+    def test_builtin_dangerous_allowed_without_force(self):
+        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
+        allowed, reason = should_allow_install(self._result("builtin", "dangerous", f))
+        assert allowed is True
+        assert "builtin source" in reason
+
    def test_force_overrides_caution(self):
        f = [Finding("x", "high", "c", "f", 1, "m", "d")]
        allowed, reason = should_allow_install(self._result("community", "caution", f), force=True)
@@ -132,22 +138,21 @@ class TestShouldAllowInstall:
        allowed, _ = should_allow_install(self._result("community", "dangerous", f), force=False)
        assert allowed is False

-    def test_force_never_overrides_dangerous(self):
-        """--force must not bypass dangerous verdict (regression test)."""
+    def test_force_overrides_dangerous_for_community(self):
        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
        allowed, reason = should_allow_install(
            self._result("community", "dangerous", f), force=True
        )
-        assert allowed is False
-        assert "DANGEROUS" in reason
+        assert allowed is True
+        assert "Force-installed" in reason

-    def test_force_never_overrides_dangerous_trusted(self):
-        """--force must not bypass dangerous even for trusted sources."""
+    def test_force_overrides_dangerous_for_trusted(self):
        f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
-        allowed, _ = should_allow_install(
+        allowed, reason = should_allow_install(
            self._result("trusted", "dangerous", f), force=True
        )
-        assert allowed is False
+        assert allowed is True
+        assert "Force-installed" in reason


 # ---------------------------------------------------------------------------
--- a/tools/skills_guard.py
+++ b/tools/skills_guard.py
@@ -645,14 +645,11 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,

    Args:
        result: Scan result from scan_skill()
-        force: If True, override blocks for caution verdicts (never overrides dangerous)
+        force: If True, override blocked policy decisions for this scan result

    Returns:
        (allowed, reason) tuple
    """
-    if result.verdict == "dangerous":
-        return False, f"Scan verdict is DANGEROUS ({len(result.findings)} findings). Blocked."
-
    policy = INSTALL_POLICY.get(result.trust_level, INSTALL_POLICY["community"])
    vi = VERDICT_INDEX.get(result.verdict, 2)
    decision = policy[vi]
@@ -661,7 +658,10 @@ def should_allow_install(result: ScanResult, force: bool = False) -> Tuple[bool,
        return True, f"Allowed ({result.trust_level} source, {result.verdict} verdict)"

    if force:
-        return True, f"Force-installed despite {result.verdict} verdict ({len(result.findings)} findings)"
+        return True, (
+            f"Force-installed despite blocked {result.verdict} verdict "
+            f"({len(result.findings)} findings)"
+        )

    return False, (
        f"Blocked ({result.trust_level} source + {result.verdict} verdict, "