feat: risk scoring tests (#681 )

feat: path-aware risk scoring for terminal commands (#681 )
2026-04-14 23:00:25 +00:00 · 2026-04-14 23:00:20 +00:00
2 changed files with 266 additions and 0 deletions
--- a/tests/test_risk_scoring.py
+++ b/tests/test_risk_scoring.py
@@ -0,0 +1,81 @@
+"""Tests for path-aware risk scoring (#681)."""
+
+import pytest
+from tools.risk_scoring import (
+    classify_risk,
+    assess_path_risk,
+    is_documentation_context,
+    RiskTier,
+    RiskAssessment,
+)
+
+
+class TestPathRiskAssessment:
+    def test_critical_system_path(self):
+        tier, matches = assess_path_risk("cat /etc/passwd")
+        assert tier == RiskTier.CRITICAL
+        assert len(matches) > 0
+
+    def test_high_system_path(self):
+        tier, matches = assess_path_risk("vim /etc/nginx.conf")
+        assert tier in (RiskTier.HIGH, RiskTier.CRITICAL)
+
+    def test_medium_user_path(self):
+        tier, _ = assess_path_risk("rm ~/Documents/report.pdf")
+        assert tier == RiskTier.MEDIUM
+
+    def test_low_temp_path(self):
+        tier, _ = assess_path_risk("rm /tmp/cache.txt")
+        assert tier == RiskTier.LOW
+
+    def test_no_path(self):
+        tier, matches = assess_path_risk("ls -la")
+        assert tier == RiskTier.SAFE
+        assert len(matches) == 0
+
+
+class TestDocumentationContext:
+    def test_echo_redirect_to_doc(self):
+        assert is_documentation_context('echo "rm -rf /" >> docs.md') is True
+
+    def test_git_commit_message(self):
+        assert is_documentation_context('git commit -m "removed old files"') is True
+
+    def test_comment(self):
+        assert is_documentation_context('# rm -rf dangerous') is True
+
+    def test_actual_command(self):
+        assert is_documentation_context('rm -rf /tmp/old') is False
+
+
+class TestClassifyRisk:
+    def test_safe_command(self):
+        r = classify_risk("ls -la")
+        assert r.tier == RiskTier.SAFE
+        assert r.confidence > 0.8
+
+    def test_dangerous_with_critical_path(self):
+        r = classify_risk("rm /etc/passwd", is_dangerous=True,
+                         pattern_key="rm_root", description="delete in root path")
+        assert r.tier == RiskTier.CRITICAL
+        assert r.confidence > 0.9
+
+    def test_dangerous_in_doc_context(self):
+        r = classify_risk('echo "rm -rf /" >> docs.md', is_dangerous=True,
+                         pattern_key="rm_root", description="delete in root path")
+        assert r.tier == RiskTier.LOW
+
+    def test_rm_temp_file(self):
+        """rm temp.txt is SAFE despite matching rm pattern."""
+        r = classify_risk("rm /tmp/cache.txt", is_dangerous=False)
+        assert r.tier == RiskTier.SAFE
+
+    def test_empty_command(self):
+        r = classify_risk("")
+        assert r.tier == RiskTier.SAFE
+        assert r.confidence == 1.0
+
+    def test_reasoning_populated(self):
+        r = classify_risk("cat /etc/shadow", is_dangerous=True,
+                         pattern_key="overwrite", description="overwrite system config")
+        assert len(r.reasoning) > 0
--- a/tools/risk_scoring.py
+++ b/tools/risk_scoring.py
@@ -0,0 +1,185 @@
+"""
+Path-aware risk scoring for terminal commands (#681).
+
+Enhances the existing dangerous command detection with:
+1. Path-aware tiers: system paths = CRITICAL, user paths = MEDIUM, temp = LOW
+2. Context awareness: documentation vs execution context
+3. Confidence scoring: how certain is the classification?
+
+This module wraps detect_dangerous_command() from tools/approval.py
+and adds tier/confidence information without modifying the original.
+"""
+
+import re
+from dataclasses import dataclass
+from enum import Enum
+from typing import Optional
+
+
+class RiskTier(Enum):
+    """Risk tier for a command or action."""
+    SAFE = "safe"
+    LOW = "low"
+    MEDIUM = "medium"
+    HIGH = "high"
+    CRITICAL = "critical"
+
+
+# System paths that indicate high-risk operations
+_CRITICAL_PATHS = [
+    re.compile(r"/etc/(passwd|shadow|sudoers|ssh/)", re.IGNORECASE),
+    re.compile(r"/boot/", re.IGNORECASE),
+    re.compile(r"/dev/sd", re.IGNORECASE),
+    re.compile(r"/proc/sysrq-trigger", re.IGNORECASE),
+    re.compile(r"/usr/bin/(shutdown|reboot|halt|poweroff)", re.IGNORECASE),
+    re.compile(r"/var/log/(syslog|auth|kern)", re.IGNORECASE),
+]
+
+_HIGH_PATHS = [
+    re.compile(r"/etc/", re.IGNORECASE),
+    re.compile(r"/usr/(local/)?(s)?bin/", re.IGNORECASE),
+    re.compile(r"/var/(lib|spool|cache)/", re.IGNORECASE),
+    re.compile(r"/opt/", re.IGNORECASE),
+]
+
+_MEDIUM_PATHS = [
+    re.compile(r"/home/", re.IGNORECASE),
+    re.compile(r"/root/", re.IGNORECASE),
+    re.compile(r"~/", re.IGNORECASE),
+    re.compile(r"/Users/", re.IGNORECASE),
+]
+
+_LOW_PATHS = [
+    re.compile(r"/tmp/", re.IGNORECASE),
+    re.compile(r"/var/tmp/", re.IGNORECASE),
+    re.compile(r"\./", re.IGNORECASE),  # relative current dir
+]
+
+
+@dataclass
+class RiskAssessment:
+    """Risk assessment for a command."""
+    tier: RiskTier
+    confidence: float  # 0.0 to 1.0
+    is_dangerous: bool
+    pattern_key: Optional[str]
+    description: str
+    path_risk: Optional[RiskTier] = None
+    path_matches: list = None
+    reasoning: str = ""
+
+
+def assess_path_risk(command: str) -> tuple[RiskTier, list[str]]:
+    """Assess risk based on file paths in the command.
+
+    Returns (highest_risk_tier, list_of_matched_paths).
+    """
+    matched = []
+    highest = RiskTier.SAFE
+
+    for pat, tier in [
+        (_CRITICAL_PATHS, RiskTier.CRITICAL),
+        (_HIGH_PATHS, RiskTier.HIGH),
+        (_MEDIUM_PATHS, RiskTier.MEDIUM),
+        (_LOW_PATHS, RiskTier.LOW),
+    ]:
+        for p in pat:
+            found = p.findall(command)
+            if found:
+                matched.extend(found)
+                if tier.value > highest.value:
+                    highest = tier
+
+    return highest, matched
+
+
+def is_documentation_context(command: str) -> bool:
+    """Detect if the command is in a documentation/echo context, not execution.
+
+    Examples:
+    - 'echo "rm -rf /" >> docs.txt' — documentation, not execution
+    - 'cat << EOF ... rm ... EOF' — heredoc generation
+    - 'git commit -m "removed old files with rm"' — commit message
+    """
+    doc_indicators = [
+        re.compile(r'echo\s+["\'].*(?:rm|chmod|kill|DROP|DELETE).*["\']', re.IGNORECASE),
+        re.compile(r'(?:cat|printf|echo)\s+.*>>.*\.(?:md|txt|rst|doc)', re.IGNORECASE),
+        re.compile(r'git\s+commit\s+.*-m.*["\']', re.IGNORECASE),
+        re.compile(r'#.*(?:rm|chmod|kill)', re.IGNORECASE),  # comments
+        re.compile(r'(?:vim|nano|emacs|code)\s+.*\.(?:md|txt|rst)', re.IGNORECASE),
+    ]
+    return any(p.search(command) for p in doc_indicators)
+
+
+def classify_risk(command: str, is_dangerous: bool = False,
+                  pattern_key: str = None, description: str = None) -> RiskAssessment:
+    """Classify the risk of a command with path-awareness and context.
+
+    Args:
+        command: The command string
+        is_dangerous: Whether detect_dangerous_command flagged it
+        pattern_key: The pattern key from detect_dangerous_command
+        description: The description from detect_dangerous_command
+
+    Returns RiskAssessment with tier, confidence, and reasoning.
+    """
+    if not command or not command.strip():
+        return RiskAssessment(
+            tier=RiskTier.SAFE, confidence=1.0, is_dangerous=False,
+            pattern_key=None, description="empty command"
+        )
+
+    # Check for documentation context (reduces risk)
+    in_doc_context = is_documentation_context(command)
+
+    # Assess path risk
+    path_tier, path_matches = assess_path_risk(command)
+
+    # Combine pattern-based and path-based risk
+    if is_dangerous:
+        # Pattern matched — this is the baseline
+        if path_tier == RiskTier.CRITICAL:
+            tier = RiskTier.CRITICAL
+            confidence = 0.95
+            reasoning = f"Dangerous pattern + critical system path: {path_matches[:3]}"
+        elif path_tier in (RiskTier.HIGH, RiskTier.MEDIUM):
+            tier = RiskTier.HIGH
+            confidence = 0.85
+            reasoning = f"Dangerous pattern + sensitive path: {path_matches[:3]}"
+        elif in_doc_context:
+            tier = RiskTier.LOW
+            confidence = 0.6
+            reasoning = f"Dangerous pattern in documentation context"
+        else:
+            tier = RiskTier.HIGH
+            confidence = 0.8
+            reasoning = f"Dangerous pattern: {description or pattern_key}"
+    else:
+        # No pattern matched — use path-only risk
+        if path_tier == RiskTier.CRITICAL:
+            tier = RiskTier.MEDIUM  # Path alone is less risky than pattern+path
+            confidence = 0.6
+            reasoning = f"Critical system path referenced: {path_matches[:3]}"
+        elif path_tier == RiskTier.HIGH:
+            tier = RiskTier.LOW
+            confidence = 0.5
+            reasoning = f"Sensitive path referenced: {path_matches[:3]}"
+        elif path_tier == RiskTier.MEDIUM:
+            tier = RiskTier.SAFE
+            confidence = 0.7
+            reasoning = f"User path referenced — likely safe"
+        else:
+            tier = RiskTier.SAFE
+            confidence = 0.9
+            reasoning = "No dangerous patterns or sensitive paths detected"
+
+    return RiskAssessment(
+        tier=tier,
+        confidence=confidence,
+        is_dangerous=is_dangerous,
+        pattern_key=pattern_key,
+        description=description or "",
+        path_risk=path_tier if path_tier != RiskTier.SAFE else None,
+        path_matches=path_matches,
+        reasoning=reasoning,
+    )
Author	SHA1	Message	Date
Timmy Time	3830b32dd0	feat: risk scoring tests (#681 ) Some checks failed Docker Build and Publish / build-and-push (pull_request) Has been skipped Details Contributor Attribution Check / check-attribution (pull_request) Failing after 21s Details Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 31s Details Tests / e2e (pull_request) Successful in 2m11s Details Tests / test (pull_request) Failing after 31m49s Details	2026-04-14 23:00:25 +00:00
Timmy Time	8afd7c82ef	feat: path-aware risk scoring for terminal commands (#681 )	2026-04-14 23:00:20 +00:00