Compare commits

...

2 Commits

Author SHA1 Message Date
3830b32dd0 feat: risk scoring tests (#681)
Some checks failed
Docker Build and Publish / build-and-push (pull_request) Has been skipped
Contributor Attribution Check / check-attribution (pull_request) Failing after 21s
Supply Chain Audit / Scan PR for supply chain risks (pull_request) Successful in 31s
Tests / e2e (pull_request) Successful in 2m11s
Tests / test (pull_request) Failing after 31m49s
2026-04-14 23:00:25 +00:00
8afd7c82ef feat: path-aware risk scoring for terminal commands (#681) 2026-04-14 23:00:20 +00:00
2 changed files with 266 additions and 0 deletions

View File

@@ -0,0 +1,81 @@
"""Tests for path-aware risk scoring (#681)."""
import pytest
from tools.risk_scoring import (
classify_risk,
assess_path_risk,
is_documentation_context,
RiskTier,
RiskAssessment,
)
class TestPathRiskAssessment:
def test_critical_system_path(self):
tier, matches = assess_path_risk("cat /etc/passwd")
assert tier == RiskTier.CRITICAL
assert len(matches) > 0
def test_high_system_path(self):
tier, matches = assess_path_risk("vim /etc/nginx.conf")
assert tier in (RiskTier.HIGH, RiskTier.CRITICAL)
def test_medium_user_path(self):
tier, _ = assess_path_risk("rm ~/Documents/report.pdf")
assert tier == RiskTier.MEDIUM
def test_low_temp_path(self):
tier, _ = assess_path_risk("rm /tmp/cache.txt")
assert tier == RiskTier.LOW
def test_no_path(self):
tier, matches = assess_path_risk("ls -la")
assert tier == RiskTier.SAFE
assert len(matches) == 0
class TestDocumentationContext:
def test_echo_redirect_to_doc(self):
assert is_documentation_context('echo "rm -rf /" >> docs.md') is True
def test_git_commit_message(self):
assert is_documentation_context('git commit -m "removed old files"') is True
def test_comment(self):
assert is_documentation_context('# rm -rf dangerous') is True
def test_actual_command(self):
assert is_documentation_context('rm -rf /tmp/old') is False
class TestClassifyRisk:
def test_safe_command(self):
r = classify_risk("ls -la")
assert r.tier == RiskTier.SAFE
assert r.confidence > 0.8
def test_dangerous_with_critical_path(self):
r = classify_risk("rm /etc/passwd", is_dangerous=True,
pattern_key="rm_root", description="delete in root path")
assert r.tier == RiskTier.CRITICAL
assert r.confidence > 0.9
def test_dangerous_in_doc_context(self):
r = classify_risk('echo "rm -rf /" >> docs.md', is_dangerous=True,
pattern_key="rm_root", description="delete in root path")
assert r.tier == RiskTier.LOW
def test_rm_temp_file(self):
"""rm temp.txt is SAFE despite matching rm pattern."""
r = classify_risk("rm /tmp/cache.txt", is_dangerous=False)
assert r.tier == RiskTier.SAFE
def test_empty_command(self):
r = classify_risk("")
assert r.tier == RiskTier.SAFE
assert r.confidence == 1.0
def test_reasoning_populated(self):
r = classify_risk("cat /etc/shadow", is_dangerous=True,
pattern_key="overwrite", description="overwrite system config")
assert len(r.reasoning) > 0

185
tools/risk_scoring.py Normal file
View File

@@ -0,0 +1,185 @@
"""
Path-aware risk scoring for terminal commands (#681).
Enhances the existing dangerous command detection with:
1. Path-aware tiers: system paths = CRITICAL, user paths = MEDIUM, temp = LOW
2. Context awareness: documentation vs execution context
3. Confidence scoring: how certain is the classification?
This module wraps detect_dangerous_command() from tools/approval.py
and adds tier/confidence information without modifying the original.
"""
import re
from dataclasses import dataclass
from enum import Enum
from typing import Optional
class RiskTier(Enum):
"""Risk tier for a command or action."""
SAFE = "safe"
LOW = "low"
MEDIUM = "medium"
HIGH = "high"
CRITICAL = "critical"
# System paths that indicate high-risk operations
_CRITICAL_PATHS = [
re.compile(r"/etc/(passwd|shadow|sudoers|ssh/)", re.IGNORECASE),
re.compile(r"/boot/", re.IGNORECASE),
re.compile(r"/dev/sd", re.IGNORECASE),
re.compile(r"/proc/sysrq-trigger", re.IGNORECASE),
re.compile(r"/usr/bin/(shutdown|reboot|halt|poweroff)", re.IGNORECASE),
re.compile(r"/var/log/(syslog|auth|kern)", re.IGNORECASE),
]
_HIGH_PATHS = [
re.compile(r"/etc/", re.IGNORECASE),
re.compile(r"/usr/(local/)?(s)?bin/", re.IGNORECASE),
re.compile(r"/var/(lib|spool|cache)/", re.IGNORECASE),
re.compile(r"/opt/", re.IGNORECASE),
]
_MEDIUM_PATHS = [
re.compile(r"/home/", re.IGNORECASE),
re.compile(r"/root/", re.IGNORECASE),
re.compile(r"~/", re.IGNORECASE),
re.compile(r"/Users/", re.IGNORECASE),
]
_LOW_PATHS = [
re.compile(r"/tmp/", re.IGNORECASE),
re.compile(r"/var/tmp/", re.IGNORECASE),
re.compile(r"\./", re.IGNORECASE), # relative current dir
]
@dataclass
class RiskAssessment:
"""Risk assessment for a command."""
tier: RiskTier
confidence: float # 0.0 to 1.0
is_dangerous: bool
pattern_key: Optional[str]
description: str
path_risk: Optional[RiskTier] = None
path_matches: list = None
reasoning: str = ""
def assess_path_risk(command: str) -> tuple[RiskTier, list[str]]:
"""Assess risk based on file paths in the command.
Returns (highest_risk_tier, list_of_matched_paths).
"""
matched = []
highest = RiskTier.SAFE
for pat, tier in [
(_CRITICAL_PATHS, RiskTier.CRITICAL),
(_HIGH_PATHS, RiskTier.HIGH),
(_MEDIUM_PATHS, RiskTier.MEDIUM),
(_LOW_PATHS, RiskTier.LOW),
]:
for p in pat:
found = p.findall(command)
if found:
matched.extend(found)
if tier.value > highest.value:
highest = tier
return highest, matched
def is_documentation_context(command: str) -> bool:
"""Detect if the command is in a documentation/echo context, not execution.
Examples:
- 'echo "rm -rf /" >> docs.txt' — documentation, not execution
- 'cat << EOF ... rm ... EOF' — heredoc generation
- 'git commit -m "removed old files with rm"' — commit message
"""
doc_indicators = [
re.compile(r'echo\s+["\'].*(?:rm|chmod|kill|DROP|DELETE).*["\']', re.IGNORECASE),
re.compile(r'(?:cat|printf|echo)\s+.*>>.*\.(?:md|txt|rst|doc)', re.IGNORECASE),
re.compile(r'git\s+commit\s+.*-m.*["\']', re.IGNORECASE),
re.compile(r'#.*(?:rm|chmod|kill)', re.IGNORECASE), # comments
re.compile(r'(?:vim|nano|emacs|code)\s+.*\.(?:md|txt|rst)', re.IGNORECASE),
]
return any(p.search(command) for p in doc_indicators)
def classify_risk(command: str, is_dangerous: bool = False,
pattern_key: str = None, description: str = None) -> RiskAssessment:
"""Classify the risk of a command with path-awareness and context.
Args:
command: The command string
is_dangerous: Whether detect_dangerous_command flagged it
pattern_key: The pattern key from detect_dangerous_command
description: The description from detect_dangerous_command
Returns RiskAssessment with tier, confidence, and reasoning.
"""
if not command or not command.strip():
return RiskAssessment(
tier=RiskTier.SAFE, confidence=1.0, is_dangerous=False,
pattern_key=None, description="empty command"
)
# Check for documentation context (reduces risk)
in_doc_context = is_documentation_context(command)
# Assess path risk
path_tier, path_matches = assess_path_risk(command)
# Combine pattern-based and path-based risk
if is_dangerous:
# Pattern matched — this is the baseline
if path_tier == RiskTier.CRITICAL:
tier = RiskTier.CRITICAL
confidence = 0.95
reasoning = f"Dangerous pattern + critical system path: {path_matches[:3]}"
elif path_tier in (RiskTier.HIGH, RiskTier.MEDIUM):
tier = RiskTier.HIGH
confidence = 0.85
reasoning = f"Dangerous pattern + sensitive path: {path_matches[:3]}"
elif in_doc_context:
tier = RiskTier.LOW
confidence = 0.6
reasoning = f"Dangerous pattern in documentation context"
else:
tier = RiskTier.HIGH
confidence = 0.8
reasoning = f"Dangerous pattern: {description or pattern_key}"
else:
# No pattern matched — use path-only risk
if path_tier == RiskTier.CRITICAL:
tier = RiskTier.MEDIUM # Path alone is less risky than pattern+path
confidence = 0.6
reasoning = f"Critical system path referenced: {path_matches[:3]}"
elif path_tier == RiskTier.HIGH:
tier = RiskTier.LOW
confidence = 0.5
reasoning = f"Sensitive path referenced: {path_matches[:3]}"
elif path_tier == RiskTier.MEDIUM:
tier = RiskTier.SAFE
confidence = 0.7
reasoning = f"User path referenced — likely safe"
else:
tier = RiskTier.SAFE
confidence = 0.9
reasoning = "No dangerous patterns or sensitive paths detected"
return RiskAssessment(
tier=tier,
confidence=confidence,
is_dangerous=is_dangerous,
pattern_key=pattern_key,
description=description or "",
path_risk=path_tier if path_tier != RiskTier.SAFE else None,
path_matches=path_matches,
reasoning=reasoning,
)