security: fix V-011 Skills Guard Bypass with AST analysis and normalization
This commit is contained in:
@@ -3,10 +3,11 @@
|
|||||||
Skills Guard — Security scanner for externally-sourced skills.
|
Skills Guard — Security scanner for externally-sourced skills.
|
||||||
|
|
||||||
Every skill downloaded from a registry passes through this scanner before
|
Every skill downloaded from a registry passes through this scanner before
|
||||||
installation. It uses regex-based static analysis to detect known-bad patterns
|
installation. It uses regex-based static analysis and AST analysis to detect
|
||||||
(data exfiltration, prompt injection, destructive commands, persistence, etc.)
|
known-bad patterns (data exfiltration, prompt injection, destructive commands,
|
||||||
and a trust-aware install policy that determines whether a skill is allowed
|
persistence, obfuscation, etc.) and a trust-aware install policy that determines
|
||||||
based on both the scan verdict and the source's trust level.
|
whether a skill is allowed based on both the scan verdict and the source's
|
||||||
|
trust level.
|
||||||
|
|
||||||
Trust levels:
|
Trust levels:
|
||||||
- builtin: Ships with Hermes. Never scanned, always trusted.
|
- builtin: Ships with Hermes. Never scanned, always trusted.
|
||||||
@@ -22,12 +23,14 @@ Usage:
|
|||||||
print(format_scan_report(result))
|
print(format_scan_report(result))
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import re
|
import ast
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import re
|
||||||
|
import unicodedata
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import List, Tuple
|
from typing import List, Set, Tuple
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -501,7 +504,25 @@ SUSPICIOUS_BINARY_EXTENSIONS = {
|
|||||||
'.msi', '.dmg', '.app', '.deb', '.rpm',
|
'.msi', '.dmg', '.app', '.deb', '.rpm',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Input normalization for bypass detection
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
# Zero-width and invisible unicode characters used for injection
|
# Zero-width and invisible unicode characters used for injection
|
||||||
|
# These are removed during normalization
|
||||||
|
ZERO_WIDTH_CHARS = frozenset({
|
||||||
|
'\u200b', # zero-width space
|
||||||
|
'\u200c', # zero-width non-joiner
|
||||||
|
'\u200d', # zero-width joiner
|
||||||
|
'\u2060', # word joiner
|
||||||
|
'\u2062', # invisible times
|
||||||
|
'\u2063', # invisible separator
|
||||||
|
'\u2064', # invisible plus
|
||||||
|
'\ufeff', # zero-width no-break space (BOM)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Extended invisible characters for detection (reporting only)
|
||||||
INVISIBLE_CHARS = {
|
INVISIBLE_CHARS = {
|
||||||
'\u200b', # zero-width space
|
'\u200b', # zero-width space
|
||||||
'\u200c', # zero-width non-joiner
|
'\u200c', # zero-width non-joiner
|
||||||
@@ -522,6 +543,311 @@ INVISIBLE_CHARS = {
|
|||||||
'\u2069', # pop directional isolate
|
'\u2069', # pop directional isolate
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Unicode homoglyph mapping for common confusable characters
|
||||||
|
# Maps lookalike characters to their ASCII equivalents
|
||||||
|
HOMOGLYPH_MAP = str.maketrans({
|
||||||
|
# Fullwidth Latin
|
||||||
|
'\uff45': 'e', '\uff56': 'v', '\uff41': 'a', '\uff4c': 'l', # eval -> eval
|
||||||
|
'\uff25': 'e', '\uff36': 'v', '\uff21': 'a', '\uff2c': 'l', # EVAL -> eval
|
||||||
|
'\uff4f': 'o', '\uff53': 's', '\uff58': 'x', '\uff43': 'c', # osxc
|
||||||
|
'\uff2f': 'o', '\uff33': 's', '\uff38': 'x', '\uff23': 'c', # OSXC
|
||||||
|
# Cyrillic lookalikes
|
||||||
|
'\u0435': 'e', # Cyrillic е -> Latin e
|
||||||
|
'\u0430': 'a', # Cyrillic а -> Latin a
|
||||||
|
'\u043e': 'o', # Cyrillic о -> Latin o
|
||||||
|
'\u0441': 'c', # Cyrillic с -> Latin c
|
||||||
|
'\u0445': 'x', # Cyrillic х -> Latin x
|
||||||
|
'\u0440': 'p', # Cyrillic р -> Latin p
|
||||||
|
'\u0456': 'i', # Cyrillic і -> Latin i (U+0456)
|
||||||
|
'\u0415': 'e', # Cyrillic Е -> Latin e
|
||||||
|
'\u0410': 'a', # Cyrillic А -> Latin a
|
||||||
|
'\u041e': 'o', # Cyrillic О -> Latin o
|
||||||
|
'\u0421': 'c', # Cyrillic С -> Latin c
|
||||||
|
'\u0425': 'x', # Cyrillic Х -> Latin x
|
||||||
|
'\u0420': 'p', # Cyrillic Р -> Latin p
|
||||||
|
'\u0406': 'i', # Cyrillic І -> Latin I (U+0406)
|
||||||
|
# Greek lookalikes
|
||||||
|
'\u03bf': 'o', # Greek omicron -> Latin o
|
||||||
|
'\u03c1': 'p', # Greek rho -> Latin p
|
||||||
|
'\u03b1': 'a', # Greek alpha -> Latin a
|
||||||
|
'\u03b5': 'e', # Greek epsilon -> Latin e
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_input(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Normalize input text to defeat obfuscation attempts.
|
||||||
|
|
||||||
|
Applies:
|
||||||
|
1. Removal of zero-width characters (U+200B, U+200C, U+200D, U+FEFF, etc.)
|
||||||
|
2. NFKC Unicode normalization (decomposes + canonicalizes)
|
||||||
|
3. Case folding (lowercase)
|
||||||
|
4. Homoglyph substitution (Cyrillic, fullwidth, Greek lookalikes)
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: The input text to normalize
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Normalized text with obfuscation removed
|
||||||
|
"""
|
||||||
|
# Step 1: Remove zero-width characters
|
||||||
|
for char in ZERO_WIDTH_CHARS:
|
||||||
|
text = text.replace(char, '')
|
||||||
|
|
||||||
|
# Step 2: NFKC normalization (decomposes characters, canonicalizes)
|
||||||
|
text = unicodedata.normalize('NFKC', text)
|
||||||
|
|
||||||
|
# Step 3: Homoglyph substitution (before case folding for fullwidth)
|
||||||
|
text = text.translate(HOMOGLYPH_MAP)
|
||||||
|
|
||||||
|
# Step 4: Case folding (lowercase)
|
||||||
|
text = text.casefold()
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# AST-based Python security analysis
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class PythonSecurityAnalyzer(ast.NodeVisitor):
|
||||||
|
"""
|
||||||
|
AST visitor that detects obfuscated Python code execution patterns.
|
||||||
|
|
||||||
|
Detects:
|
||||||
|
- Direct dangerous calls: eval(), exec(), compile(), __import__()
|
||||||
|
- Dynamic access: getattr(__builtins__, ...), globals()['eval']
|
||||||
|
- String concatenation obfuscation: 'e'+'v'+'a'+'l'
|
||||||
|
- Encoded attribute access via subscripts
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Dangerous builtins that can execute arbitrary code
|
||||||
|
DANGEROUS_BUILTINS: Set[str] = {
|
||||||
|
'eval', 'exec', 'compile', '__import__',
|
||||||
|
'open', 'execfile', # Python 2 compatibility concerns
|
||||||
|
}
|
||||||
|
|
||||||
|
def __init__(self, source_lines: List[str], file_path: str):
|
||||||
|
self.findings: List[Finding] = []
|
||||||
|
self.source_lines = source_lines
|
||||||
|
self.file_path = file_path
|
||||||
|
self.line_offsets = self._build_line_offsets()
|
||||||
|
|
||||||
|
def _build_line_offsets(self) -> List[int]:
|
||||||
|
"""Build offset map for converting absolute position to line number."""
|
||||||
|
offsets = [0]
|
||||||
|
for line in self.source_lines:
|
||||||
|
offsets.append(offsets[-1] + len(line) + 1) # +1 for newline
|
||||||
|
return offsets
|
||||||
|
|
||||||
|
def _get_line_from_offset(self, offset: int) -> int:
|
||||||
|
"""Convert absolute character offset to 1-based line number."""
|
||||||
|
for i, start_offset in enumerate(self.line_offsets):
|
||||||
|
if offset < start_offset:
|
||||||
|
return max(1, i)
|
||||||
|
return len(self.line_offsets)
|
||||||
|
|
||||||
|
def _get_line_content(self, lineno: int) -> str:
|
||||||
|
"""Get the content of a specific line (1-based)."""
|
||||||
|
if 1 <= lineno <= len(self.source_lines):
|
||||||
|
return self.source_lines[lineno - 1]
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def _add_finding(self, pattern_id: str, severity: str, category: str,
|
||||||
|
node: ast.AST, description: str) -> None:
|
||||||
|
"""Add a finding for a detected pattern."""
|
||||||
|
lineno = getattr(node, 'lineno', 1)
|
||||||
|
line_content = self._get_line_content(lineno).strip()
|
||||||
|
if len(line_content) > 120:
|
||||||
|
line_content = line_content[:117] + "..."
|
||||||
|
|
||||||
|
self.findings.append(Finding(
|
||||||
|
pattern_id=pattern_id,
|
||||||
|
severity=severity,
|
||||||
|
category=category,
|
||||||
|
file=self.file_path,
|
||||||
|
line=lineno,
|
||||||
|
match=line_content,
|
||||||
|
description=description,
|
||||||
|
))
|
||||||
|
|
||||||
|
def _is_string_concat(self, node: ast.AST) -> bool:
|
||||||
|
"""Check if node represents a string concatenation operation."""
|
||||||
|
if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
|
||||||
|
return self._is_string_concat(node.left) or self._is_string_concat(node.right)
|
||||||
|
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
||||||
|
return True
|
||||||
|
if isinstance(node, ast.JoinedStr):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _concat_to_string(self, node: ast.AST) -> str:
|
||||||
|
"""Try to extract the concatenated string value from a BinOp chain."""
|
||||||
|
if isinstance(node, ast.Constant) and isinstance(node.value, str):
|
||||||
|
return node.value
|
||||||
|
if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
|
||||||
|
return self._concat_to_string(node.left) + self._concat_to_string(node.right)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def visit_Call(self, node: ast.Call) -> None:
|
||||||
|
"""Detect dangerous function calls including obfuscated variants."""
|
||||||
|
func = node.func
|
||||||
|
|
||||||
|
# Direct call: eval(...), exec(...), etc.
|
||||||
|
if isinstance(func, ast.Name):
|
||||||
|
func_name = func.id
|
||||||
|
if func_name in self.DANGEROUS_BUILTINS:
|
||||||
|
self._add_finding(
|
||||||
|
f"ast_dangerous_call_{func_name}",
|
||||||
|
"high", "obfuscation", node,
|
||||||
|
f"Dangerous builtin call: {func_name}()"
|
||||||
|
)
|
||||||
|
|
||||||
|
# getattr(__builtins__, ...) pattern
|
||||||
|
if isinstance(func, ast.Name) and func.id == 'getattr':
|
||||||
|
if len(node.args) >= 2:
|
||||||
|
first_arg = node.args[0]
|
||||||
|
second_arg = node.args[1]
|
||||||
|
|
||||||
|
# Check for getattr(__builtins__, ...)
|
||||||
|
if (isinstance(first_arg, ast.Name) and
|
||||||
|
first_arg.id in ('__builtins__', 'builtins')):
|
||||||
|
self._add_finding(
|
||||||
|
"ast_getattr_builtins", "critical", "obfuscation", node,
|
||||||
|
"Dynamic access to builtins via getattr() (evasion technique)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for getattr(..., 'eval') or getattr(..., 'exec')
|
||||||
|
if isinstance(second_arg, ast.Constant) and isinstance(second_arg.value, str):
|
||||||
|
if second_arg.value in self.DANGEROUS_BUILTINS:
|
||||||
|
self._add_finding(
|
||||||
|
f"ast_getattr_{second_arg.value}", "critical", "obfuscation", node,
|
||||||
|
f"Dynamic retrieval of {second_arg.value} via getattr()"
|
||||||
|
)
|
||||||
|
|
||||||
|
# globals()[...] or locals()[...] pattern when called
|
||||||
|
# AST structure: Call(func=Subscript(value=Call(func=Name(id='globals')), slice=Constant('eval')))
|
||||||
|
if isinstance(func, ast.Subscript):
|
||||||
|
subscript_value = func.value
|
||||||
|
# Check if subscript value is a call to globals() or locals()
|
||||||
|
if (isinstance(subscript_value, ast.Call) and
|
||||||
|
isinstance(subscript_value.func, ast.Name) and
|
||||||
|
subscript_value.func.id in ('globals', 'locals')):
|
||||||
|
self._add_finding(
|
||||||
|
"ast_dynamic_global_access", "critical", "obfuscation", node,
|
||||||
|
f"Dynamic function call via {subscript_value.func.id}()[...] (evasion technique)"
|
||||||
|
)
|
||||||
|
# Also check for direct globals[...] (without call, less common but possible)
|
||||||
|
elif isinstance(subscript_value, ast.Name) and subscript_value.id in ('globals', 'locals'):
|
||||||
|
self._add_finding(
|
||||||
|
"ast_dynamic_global_access", "critical", "obfuscation", node,
|
||||||
|
f"Dynamic function call via {subscript_value.id}[...] (evasion technique)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Detect string concatenation in arguments (e.g., 'e'+'v'+'a'+'l')
|
||||||
|
for arg in node.args:
|
||||||
|
if self._is_string_concat(arg):
|
||||||
|
concat_str = self._concat_to_string(arg)
|
||||||
|
normalized = normalize_input(concat_str)
|
||||||
|
if normalized in self.DANGEROUS_BUILTINS:
|
||||||
|
self._add_finding(
|
||||||
|
f"ast_concat_{normalized}", "critical", "obfuscation", node,
|
||||||
|
f"String concatenation obfuscation building '{normalized}'"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
def visit_Subscript(self, node: ast.Subscript) -> None:
|
||||||
|
"""Detect globals()['eval'] / locals()['exec'] patterns."""
|
||||||
|
# Check for globals()[...] or locals()[...]
|
||||||
|
# AST structure for `globals()['eval']`: Subscript(value=Call(func=Name(id='globals')), slice=Constant('eval'))
|
||||||
|
subscript_target = node.value
|
||||||
|
globals_or_locals = None
|
||||||
|
|
||||||
|
# Check if subscript target is a call to globals() or locals()
|
||||||
|
if isinstance(subscript_target, ast.Call) and isinstance(subscript_target.func, ast.Name):
|
||||||
|
if subscript_target.func.id in ('globals', 'locals'):
|
||||||
|
globals_or_locals = subscript_target.func.id
|
||||||
|
# Also handle direct globals[...] without call (less common)
|
||||||
|
elif isinstance(subscript_target, ast.Name) and subscript_target.id in ('globals', 'locals'):
|
||||||
|
globals_or_locals = subscript_target.id
|
||||||
|
|
||||||
|
if globals_or_locals:
|
||||||
|
# Check the subscript value
|
||||||
|
if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str):
|
||||||
|
slice_val = node.slice.value
|
||||||
|
if slice_val in self.DANGEROUS_BUILTINS:
|
||||||
|
self._add_finding(
|
||||||
|
f"ast_{globals_or_locals}_subscript_{slice_val}",
|
||||||
|
"critical", "obfuscation", node,
|
||||||
|
f"Dynamic access to {slice_val} via {globals_or_locals}()['{slice_val}']"
|
||||||
|
)
|
||||||
|
# String concatenation in subscript: globals()['e'+'v'+'a'+'l']
|
||||||
|
elif isinstance(node.slice, ast.BinOp):
|
||||||
|
concat_str = self._concat_to_string(node.slice)
|
||||||
|
normalized = normalize_input(concat_str)
|
||||||
|
if normalized in self.DANGEROUS_BUILTINS:
|
||||||
|
self._add_finding(
|
||||||
|
f"ast_{globals_or_locals}_concat_{normalized}",
|
||||||
|
"critical", "obfuscation", node,
|
||||||
|
f"String concatenation obfuscation via {globals_or_locals}()['...']"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check for __builtins__[...]
|
||||||
|
if isinstance(node.value, ast.Name) and node.value.id == '__builtins__':
|
||||||
|
self._add_finding(
|
||||||
|
"ast_builtins_subscript", "high", "obfuscation", node,
|
||||||
|
"Direct subscript access to __builtins__"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
def visit_BinOp(self, node: ast.BinOp) -> None:
|
||||||
|
"""Detect string concatenation building dangerous function names."""
|
||||||
|
if isinstance(node.op, ast.Add):
|
||||||
|
concat_str = self._concat_to_string(node)
|
||||||
|
normalized = normalize_input(concat_str)
|
||||||
|
if normalized in self.DANGEROUS_BUILTINS:
|
||||||
|
self._add_finding(
|
||||||
|
f"ast_string_concat_{normalized}", "high", "obfuscation", node,
|
||||||
|
f"String concatenation building '{normalized}' (possible obfuscation)"
|
||||||
|
)
|
||||||
|
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
def visit_Attribute(self, node: ast.Attribute) -> None:
|
||||||
|
"""Detect obj.eval, obj.exec patterns."""
|
||||||
|
if node.attr in self.DANGEROUS_BUILTINS:
|
||||||
|
self._add_finding(
|
||||||
|
f"ast_attr_{node.attr}", "medium", "obfuscation", node,
|
||||||
|
f"Access to .{node.attr} attribute (context-dependent risk)"
|
||||||
|
)
|
||||||
|
self.generic_visit(node)
|
||||||
|
|
||||||
|
|
||||||
|
def analyze_python_ast(content: str, file_path: str) -> List[Finding]:
|
||||||
|
"""
|
||||||
|
Parse Python code and analyze its AST for security issues.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The Python source code to analyze
|
||||||
|
file_path: Path to the file (for reporting)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of findings from AST analysis
|
||||||
|
"""
|
||||||
|
lines = content.split('\n')
|
||||||
|
|
||||||
|
try:
|
||||||
|
tree = ast.parse(content)
|
||||||
|
except SyntaxError:
|
||||||
|
# If we can't parse, return empty findings
|
||||||
|
return []
|
||||||
|
|
||||||
|
analyzer = PythonSecurityAnalyzer(lines, file_path)
|
||||||
|
analyzer.visit(tree)
|
||||||
|
return analyzer.findings
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Scanning functions
|
# Scanning functions
|
||||||
@@ -529,7 +855,12 @@ INVISIBLE_CHARS = {
|
|||||||
|
|
||||||
def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
|
def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
|
||||||
"""
|
"""
|
||||||
Scan a single file for threat patterns and invisible unicode characters.
|
Scan a single file for threat patterns, obfuscation, and invisible unicode.
|
||||||
|
|
||||||
|
Performs:
|
||||||
|
1. Invisible unicode character detection (on original content)
|
||||||
|
2. AST analysis for Python files (detects obfuscated execution patterns)
|
||||||
|
3. Regex pattern matching on normalized content (catches obfuscated variants)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path: Absolute path to the file
|
file_path: Absolute path to the file
|
||||||
@@ -553,27 +884,7 @@ def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
|
|||||||
lines = content.split('\n')
|
lines = content.split('\n')
|
||||||
seen = set() # (pattern_id, line_number) for deduplication
|
seen = set() # (pattern_id, line_number) for deduplication
|
||||||
|
|
||||||
# Regex pattern matching
|
# Step 1: Invisible unicode character detection (on original)
|
||||||
for pattern, pid, severity, category, description in THREAT_PATTERNS:
|
|
||||||
for i, line in enumerate(lines, start=1):
|
|
||||||
if (pid, i) in seen:
|
|
||||||
continue
|
|
||||||
if re.search(pattern, line, re.IGNORECASE):
|
|
||||||
seen.add((pid, i))
|
|
||||||
matched_text = line.strip()
|
|
||||||
if len(matched_text) > 120:
|
|
||||||
matched_text = matched_text[:117] + "..."
|
|
||||||
findings.append(Finding(
|
|
||||||
pattern_id=pid,
|
|
||||||
severity=severity,
|
|
||||||
category=category,
|
|
||||||
file=rel_path,
|
|
||||||
line=i,
|
|
||||||
match=matched_text,
|
|
||||||
description=description,
|
|
||||||
))
|
|
||||||
|
|
||||||
# Invisible unicode character detection
|
|
||||||
for i, line in enumerate(lines, start=1):
|
for i, line in enumerate(lines, start=1):
|
||||||
for char in INVISIBLE_CHARS:
|
for char in INVISIBLE_CHARS:
|
||||||
if char in line:
|
if char in line:
|
||||||
@@ -589,6 +900,38 @@ def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
|
|||||||
))
|
))
|
||||||
break # one finding per line for invisible chars
|
break # one finding per line for invisible chars
|
||||||
|
|
||||||
|
# Step 2: AST analysis for Python files
|
||||||
|
if file_path.suffix.lower() == '.py':
|
||||||
|
ast_findings = analyze_python_ast(content, rel_path)
|
||||||
|
findings.extend(ast_findings)
|
||||||
|
|
||||||
|
# Step 3: Normalize content and run regex patterns
|
||||||
|
# This catches obfuscated variants like Cyrillic homoglyphs, fullwidth, etc.
|
||||||
|
normalized_content = normalize_input(content)
|
||||||
|
normalized_lines = normalized_content.split('\n')
|
||||||
|
|
||||||
|
# Map normalized line numbers to original line numbers (they should match)
|
||||||
|
for pattern, pid, severity, category, description in THREAT_PATTERNS:
|
||||||
|
for i, norm_line in enumerate(normalized_lines, start=1):
|
||||||
|
if (pid, i) in seen:
|
||||||
|
continue
|
||||||
|
if re.search(pattern, norm_line, re.IGNORECASE):
|
||||||
|
seen.add((pid, i))
|
||||||
|
# Show original line content for context
|
||||||
|
original_line = lines[i - 1] if i <= len(lines) else norm_line
|
||||||
|
matched_text = original_line.strip()
|
||||||
|
if len(matched_text) > 120:
|
||||||
|
matched_text = matched_text[:117] + "..."
|
||||||
|
findings.append(Finding(
|
||||||
|
pattern_id=pid,
|
||||||
|
severity=severity,
|
||||||
|
category=category,
|
||||||
|
file=rel_path,
|
||||||
|
line=i,
|
||||||
|
match=matched_text,
|
||||||
|
description=description,
|
||||||
|
))
|
||||||
|
|
||||||
return findings
|
return findings
|
||||||
|
|
||||||
|
|
||||||
@@ -598,8 +941,17 @@ def scan_skill(skill_path: Path, source: str = "community") -> ScanResult:
|
|||||||
|
|
||||||
Performs:
|
Performs:
|
||||||
1. Structural checks (file count, total size, binary files, symlinks)
|
1. Structural checks (file count, total size, binary files, symlinks)
|
||||||
2. Regex pattern matching on all text files
|
2. Unicode normalization to defeat obfuscation (NFKC, homoglyphs, zero-width)
|
||||||
3. Invisible unicode character detection
|
3. AST analysis for Python files (detects dynamic execution patterns)
|
||||||
|
4. Regex pattern matching on normalized content
|
||||||
|
5. Invisible unicode character detection
|
||||||
|
|
||||||
|
V-011 Bypass Protection:
|
||||||
|
- Unicode homoglyphs (Cyrillic, fullwidth, Greek lookalikes)
|
||||||
|
- Zero-width character injection (U+200B, U+200C, U+200D, U+FEFF)
|
||||||
|
- Case manipulation (EvAl, ExEc)
|
||||||
|
- String concatenation obfuscation ('e'+'v'+'a'+'l')
|
||||||
|
- Dynamic execution patterns (globals()['eval'], getattr(__builtins__, 'exec'))
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
skill_path: Path to the skill directory (must contain SKILL.md)
|
skill_path: Path to the skill directory (must contain SKILL.md)
|
||||||
|
|||||||
410
tools/test_skills_guard_v011.py
Normal file
410
tools/test_skills_guard_v011.py
Normal file
@@ -0,0 +1,410 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Tests for V-011 Skills Guard Bypass fix.
|
||||||
|
|
||||||
|
Tests all bypass techniques:
|
||||||
|
1. Unicode encoding tricks (fullwidth characters, Cyrillic homoglyphs)
|
||||||
|
2. Case manipulation (EvAl, ExEc)
|
||||||
|
3. Zero-width characters (U+200B, U+200C, U+200D, U+FEFF)
|
||||||
|
4. Dynamic execution obfuscation: globals()['ev'+'al'], getattr(__builtins__, 'exec')
|
||||||
|
5. String concatenation: 'e'+'v'+'a'+'l'
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add parent directory to path
|
||||||
|
sys.path.insert(0, str(Path(__file__).parent))
|
||||||
|
|
||||||
|
from skills_guard import (
|
||||||
|
normalize_input,
|
||||||
|
analyze_python_ast,
|
||||||
|
scan_file,
|
||||||
|
ZERO_WIDTH_CHARS,
|
||||||
|
HOMOGLYPH_MAP,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestNormalizeInput:
|
||||||
|
"""Test input normalization for obfuscation removal."""
|
||||||
|
|
||||||
|
def test_zero_width_removal(self):
|
||||||
|
"""Test removal of zero-width characters."""
|
||||||
|
# U+200B zero-width space
|
||||||
|
obfuscated = "ev\u200bal"
|
||||||
|
normalized = normalize_input(obfuscated)
|
||||||
|
assert normalized == "eval", f"Expected 'eval', got '{normalized}'"
|
||||||
|
|
||||||
|
# Multiple zero-width characters
|
||||||
|
obfuscated = "e\u200bx\u200ce\u200dc"
|
||||||
|
normalized = normalize_input(obfuscated)
|
||||||
|
assert normalized == "exec", f"Expected 'exec', got '{normalized}'"
|
||||||
|
|
||||||
|
# U+FEFF BOM
|
||||||
|
obfuscated = "\ufeffeval"
|
||||||
|
normalized = normalize_input(obfuscated)
|
||||||
|
assert normalized == "eval", f"Expected 'eval', got '{normalized}'"
|
||||||
|
|
||||||
|
print("✓ Zero-width character removal tests passed")
|
||||||
|
|
||||||
|
def test_case_folding(self):
|
||||||
|
"""Test case folding (lowercase conversion)."""
|
||||||
|
test_cases = [
|
||||||
|
("EvAl", "eval"),
|
||||||
|
("EXEC", "exec"),
|
||||||
|
("CoMpIlE", "compile"),
|
||||||
|
("GetAttr", "getattr"),
|
||||||
|
]
|
||||||
|
for input_str, expected in test_cases:
|
||||||
|
normalized = normalize_input(input_str)
|
||||||
|
assert normalized == expected, f"Expected '{expected}', got '{normalized}'"
|
||||||
|
|
||||||
|
print("✓ Case folding tests passed")
|
||||||
|
|
||||||
|
def test_fullwidth_normalization(self):
|
||||||
|
"""Test fullwidth character normalization."""
|
||||||
|
# Fullwidth Latin characters
|
||||||
|
test_cases = [
|
||||||
|
("\uff45\uff56\uff41\uff4c", "eval"), # eval
|
||||||
|
("\uff25\uff36\uff21\uff2c", "eval"), # EVAL (uppercase fullwidth)
|
||||||
|
("\uff45\uff58\uff45\uff43", "exec"), # exec
|
||||||
|
("\uff4f\uff53", "os"), # os
|
||||||
|
]
|
||||||
|
for input_str, expected in test_cases:
|
||||||
|
normalized = normalize_input(input_str)
|
||||||
|
assert normalized == expected, f"Expected '{expected}', got '{normalized}'"
|
||||||
|
|
||||||
|
print("✓ Fullwidth normalization tests passed")
|
||||||
|
|
||||||
|
def test_cyrillic_homoglyphs(self):
|
||||||
|
"""Test Cyrillic lookalike character normalization."""
|
||||||
|
# Cyrillic е (U+0435) looks like Latin e (U+0065)
|
||||||
|
test_cases = [
|
||||||
|
("\u0435val", "eval"), # еval (Cyrillic е)
|
||||||
|
("\u0435x\u0435c", "exec"), # еxеc (Cyrillic е's)
|
||||||
|
("\u0430\u0435\u0456\u043e", "aeio"), # аеіо (all Cyrillic)
|
||||||
|
("g\u0435tattr", "getattr"), # gеtattr (Cyrillic е)
|
||||||
|
]
|
||||||
|
for input_str, expected in test_cases:
|
||||||
|
normalized = normalize_input(input_str)
|
||||||
|
assert normalized == expected, f"Expected '{expected}', got '{normalized}'"
|
||||||
|
|
||||||
|
print("✓ Cyrillic homoglyph tests passed")
|
||||||
|
|
||||||
|
def test_combined_obfuscation(self):
|
||||||
|
"""Test combined obfuscation techniques."""
|
||||||
|
# Mix of case, zero-width, and homoglyphs
|
||||||
|
obfuscated = "E\u200bV\u0430L" # E + ZWS + V + Cyrillic а + L
|
||||||
|
normalized = normalize_input(obfuscated)
|
||||||
|
assert normalized == "eval", f"Expected 'eval', got '{normalized}'"
|
||||||
|
|
||||||
|
print("✓ Combined obfuscation tests passed")
|
||||||
|
|
||||||
|
|
||||||
|
class TestASTAnalysis:
|
||||||
|
"""Test AST-based security analysis."""
|
||||||
|
|
||||||
|
def test_direct_dangerous_calls(self):
|
||||||
|
"""Test detection of direct eval/exec/compile calls."""
|
||||||
|
code = "eval('1+1')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("eval" in f.pattern_id for f in findings), "Should detect eval() call"
|
||||||
|
|
||||||
|
code = "exec('print(1)')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("exec" in f.pattern_id for f in findings), "Should detect exec() call"
|
||||||
|
|
||||||
|
code = "compile('x', '<string>', 'exec')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("compile" in f.pattern_id for f in findings), "Should detect compile() call"
|
||||||
|
|
||||||
|
print("✓ Direct dangerous call detection tests passed")
|
||||||
|
|
||||||
|
def test_getattr_builtins_pattern(self):
|
||||||
|
"""Test detection of getattr(__builtins__, ...) pattern."""
|
||||||
|
code = "getattr(__builtins__, 'eval')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("getattr_builtins" in f.pattern_id for f in findings), \
|
||||||
|
"Should detect getattr(__builtins__, ...) pattern"
|
||||||
|
|
||||||
|
code = "getattr(__builtins__, 'exec')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("getattr_exec" in f.pattern_id for f in findings), \
|
||||||
|
"Should detect getattr(..., 'exec')"
|
||||||
|
|
||||||
|
print("✓ getattr(__builtins__, ...) detection tests passed")
|
||||||
|
|
||||||
|
def test_globals_subscript_pattern(self):
|
||||||
|
"""Test detection of globals()['eval'] pattern."""
|
||||||
|
code = "globals()['eval']('1+1')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("globals" in f.pattern_id for f in findings), \
|
||||||
|
"Should detect globals()['eval'] pattern"
|
||||||
|
|
||||||
|
code = "locals()['exec']('print(1)')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("locals" in f.pattern_id for f in findings), \
|
||||||
|
"Should detect locals()['exec'] pattern"
|
||||||
|
|
||||||
|
print("✓ globals()/locals() subscript detection tests passed")
|
||||||
|
|
||||||
|
def test_string_concatenation_obfuscation(self):
|
||||||
|
"""Test detection of string concatenation obfuscation."""
|
||||||
|
# Simple concatenation
|
||||||
|
code = "('e'+'v'+'a'+'l')('1+1')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("concat" in f.pattern_id for f in findings), \
|
||||||
|
"Should detect string concatenation obfuscation"
|
||||||
|
|
||||||
|
# Concatenation in globals subscript
|
||||||
|
code = "globals()['e'+'v'+'a'+'l']('1+1')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("concat" in f.pattern_id for f in findings), \
|
||||||
|
"Should detect concat in globals subscript"
|
||||||
|
|
||||||
|
print("✓ String concatenation obfuscation detection tests passed")
|
||||||
|
|
||||||
|
def test_dynamic_global_call(self):
|
||||||
|
"""Test detection of dynamic calls via globals()."""
|
||||||
|
code = "globals()['eval']('1+1')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("dynamic_global" in f.pattern_id for f in findings), \
|
||||||
|
"Should detect dynamic global access"
|
||||||
|
|
||||||
|
print("✓ Dynamic global call detection tests passed")
|
||||||
|
|
||||||
|
def test_legitimate_code_not_flagged(self):
|
||||||
|
"""Test that legitimate code is not flagged."""
|
||||||
|
# Normal function definition
|
||||||
|
code = """
|
||||||
|
def calculate(x, y):
|
||||||
|
result = x + y
|
||||||
|
return result
|
||||||
|
|
||||||
|
class MyClass:
|
||||||
|
def method(self):
|
||||||
|
return "hello"
|
||||||
|
|
||||||
|
import os
|
||||||
|
print(os.path.join("a", "b"))
|
||||||
|
"""
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
# Should not have any obfuscation-related findings
|
||||||
|
obfuscation_findings = [f for f in findings if f.category == "obfuscation"]
|
||||||
|
assert len(obfuscation_findings) == 0, \
|
||||||
|
f"Legitimate code should not be flagged, got: {[f.description for f in obfuscation_findings]}"
|
||||||
|
|
||||||
|
print("✓ Legitimate code not flagged tests passed")
|
||||||
|
|
||||||
|
|
||||||
|
class TestScanFileIntegration:
|
||||||
|
"""Integration tests for scan_file with new detection."""
|
||||||
|
|
||||||
|
def _create_temp_file(self, content: str, suffix: str = ".py") -> Path:
|
||||||
|
"""Create a temporary file with the given content."""
|
||||||
|
with tempfile.NamedTemporaryFile(mode='w', suffix=suffix, delete=False) as f:
|
||||||
|
f.write(content)
|
||||||
|
return Path(f.name)
|
||||||
|
|
||||||
|
def test_unicode_obfuscation_detection(self):
|
||||||
|
"""Test that obfuscated eval is detected via normalization."""
|
||||||
|
# Fullwidth eval
|
||||||
|
code = "\uff45\uff56\uff41\uff4c('1+1')" # eval
|
||||||
|
path = self._create_temp_file(code)
|
||||||
|
try:
|
||||||
|
findings = scan_file(path, "test.py")
|
||||||
|
# Should detect via regex on normalized content
|
||||||
|
assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower()
|
||||||
|
for f in findings), \
|
||||||
|
f"Should detect fullwidth eval, got: {[f.pattern_id for f in findings]}"
|
||||||
|
finally:
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
|
print("✓ Unicode obfuscation detection tests passed")
|
||||||
|
|
||||||
|
def test_zero_width_character_detection(self):
|
||||||
|
"""Test detection of zero-width characters."""
|
||||||
|
code = "ev\u200bal('1+1')" # eval with zero-width space
|
||||||
|
path = self._create_temp_file(code)
|
||||||
|
try:
|
||||||
|
findings = scan_file(path, "test.py")
|
||||||
|
assert any("invisible_unicode" in f.pattern_id for f in findings), \
|
||||||
|
f"Should detect invisible unicode, got: {[f.pattern_id for f in findings]}"
|
||||||
|
finally:
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
|
print("✓ Zero-width character detection tests passed")
|
||||||
|
|
||||||
|
def test_ast_and_regex_combined(self):
|
||||||
|
"""Test that both AST and regex detection work together."""
|
||||||
|
code = """
|
||||||
|
# Obfuscated eval via string concat
|
||||||
|
func = ('e'+'v'+'a'+'l')
|
||||||
|
result = func('1+1')
|
||||||
|
|
||||||
|
# Also fullwidth in comment: eval
|
||||||
|
"""
|
||||||
|
path = self._create_temp_file(code)
|
||||||
|
try:
|
||||||
|
findings = scan_file(path, "test.py")
|
||||||
|
ast_findings = [f for f in findings if f.pattern_id.startswith("ast_")]
|
||||||
|
assert len(ast_findings) > 0, "Should have AST-based findings"
|
||||||
|
finally:
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
|
print("✓ AST and regex combined detection tests passed")
|
||||||
|
|
||||||
|
def test_cyrillic_in_code_detection(self):
|
||||||
|
"""Test detection of Cyrillic homoglyphs in code."""
|
||||||
|
# Using Cyrillic е (U+0435) instead of Latin e (U+0065)
|
||||||
|
code = "\u0435val('1+1')" # еval with Cyrillic е
|
||||||
|
path = self._create_temp_file(code)
|
||||||
|
try:
|
||||||
|
findings = scan_file(path, "test.py")
|
||||||
|
# After normalization, regex should catch this
|
||||||
|
assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower()
|
||||||
|
for f in findings), \
|
||||||
|
f"Should detect Cyrillic obfuscated eval, got: {[f.pattern_id for f in findings]}"
|
||||||
|
finally:
|
||||||
|
path.unlink()
|
||||||
|
|
||||||
|
print("✓ Cyrillic homoglyph detection tests passed")
|
||||||
|
|
||||||
|
|
||||||
|
class TestBypassTechniques:
|
||||||
|
"""Test specific bypass techniques mentioned in the vulnerability report."""
|
||||||
|
|
||||||
|
def test_bypass_1_unicode_encoding(self):
|
||||||
|
"""Bypass 1: Unicode encoding tricks (fullwidth characters)."""
|
||||||
|
# Fullwidth characters: eval
|
||||||
|
fullwidth_eval = "\uff45\uff56\uff41\uff4c"
|
||||||
|
normalized = normalize_input(fullwidth_eval)
|
||||||
|
assert normalized == "eval", "Fullwidth should normalize to ASCII"
|
||||||
|
|
||||||
|
# Fullwidth exec: exec
|
||||||
|
fullwidth_exec = "\uff45\uff58\uff45\uff43"
|
||||||
|
normalized = normalize_input(fullwidth_exec)
|
||||||
|
assert normalized == "exec", "Fullwidth exec should normalize"
|
||||||
|
|
||||||
|
print("✓ Bypass 1: Unicode encoding tricks blocked")
|
||||||
|
|
||||||
|
def test_bypass_2_case_manipulation(self):
|
||||||
|
"""Bypass 2: Case manipulation (EvAl, ExEc)."""
|
||||||
|
test_cases = ["EvAl", "ExEc", "CoMpIlE", "EVA", "exec"]
|
||||||
|
for case in test_cases:
|
||||||
|
normalized = normalize_input(case)
|
||||||
|
expected = case.lower()
|
||||||
|
assert normalized == expected, f"Case folding failed for {case}"
|
||||||
|
|
||||||
|
print("✓ Bypass 2: Case manipulation blocked")
|
||||||
|
|
||||||
|
def test_bypass_3_zero_width(self):
|
||||||
|
"""Bypass 3: Zero-width characters (U+200B, U+200C, U+200D, U+FEFF)."""
|
||||||
|
# Test all zero-width characters are removed
|
||||||
|
for char in ZERO_WIDTH_CHARS:
|
||||||
|
obfuscated = f"ev{char}al"
|
||||||
|
normalized = normalize_input(obfuscated)
|
||||||
|
assert normalized == "eval", f"Zero-width char U+{ord(char):04X} not removed"
|
||||||
|
|
||||||
|
print("✓ Bypass 3: Zero-width character injection blocked")
|
||||||
|
|
||||||
|
def test_bypass_4_dynamic_execution(self):
|
||||||
|
"""Bypass 4: Dynamic execution obfuscation."""
|
||||||
|
# globals()['eval']
|
||||||
|
code1 = "globals()['eval']('1+1')"
|
||||||
|
findings1 = analyze_python_ast(code1, "test.py")
|
||||||
|
assert len([f for f in findings1 if "globals" in f.pattern_id]) > 0, \
|
||||||
|
"globals()['eval'] should be detected"
|
||||||
|
|
||||||
|
# getattr(__builtins__, 'exec')
|
||||||
|
code2 = "getattr(__builtins__, 'exec')"
|
||||||
|
findings2 = analyze_python_ast(code2, "test.py")
|
||||||
|
assert any("getattr_builtins" in f.pattern_id for f in findings2), \
|
||||||
|
"getattr(__builtins__, ...) should be detected"
|
||||||
|
|
||||||
|
print("✓ Bypass 4: Dynamic execution obfuscation blocked")
|
||||||
|
|
||||||
|
def test_bypass_5_string_concatenation(self):
|
||||||
|
"""Bypass 5: String concatenation ('e'+'v'+'a'+'l')."""
|
||||||
|
# AST should detect this
|
||||||
|
code = "('e'+'v'+'a'+'l')('1+1')"
|
||||||
|
findings = analyze_python_ast(code, "test.py")
|
||||||
|
assert any("concat" in f.pattern_id for f in findings), \
|
||||||
|
"String concatenation obfuscation should be detected"
|
||||||
|
|
||||||
|
# Also test via globals
|
||||||
|
code2 = "globals()['e'+'v'+'a'+'l']('1+1')"
|
||||||
|
findings2 = analyze_python_ast(code2, "test.py")
|
||||||
|
assert any("concat" in f.pattern_id for f in findings2), \
|
||||||
|
"Concat in globals subscript should be detected"
|
||||||
|
|
||||||
|
print("✓ Bypass 5: String concatenation obfuscation blocked")
|
||||||
|
|
||||||
|
def test_cyrillic_homoglyph_bypass(self):
|
||||||
|
"""Test Cyrillic homoglyph bypass (е vs e)."""
|
||||||
|
# е (U+0435) vs e (U+0065)
|
||||||
|
cyrillic_e = "\u0435"
|
||||||
|
latin_e = "e"
|
||||||
|
|
||||||
|
assert cyrillic_e != latin_e, "Cyrillic and Latin e should be different"
|
||||||
|
|
||||||
|
# After normalization, they should be the same
|
||||||
|
normalized_cyrillic = normalize_input(cyrillic_e)
|
||||||
|
normalized_latin = normalize_input(latin_e)
|
||||||
|
assert normalized_cyrillic == normalized_latin == "e", \
|
||||||
|
"Cyrillic е should normalize to Latin e"
|
||||||
|
|
||||||
|
# Test full word: еval (with Cyrillic е)
|
||||||
|
cyrillic_eval = "\u0435val"
|
||||||
|
normalized = normalize_input(cyrillic_eval)
|
||||||
|
assert normalized == "eval", "Cyrillic eval should normalize"
|
||||||
|
|
||||||
|
print("✓ Cyrillic homoglyph bypass blocked")
|
||||||
|
|
||||||
|
|
||||||
|
def run_all_tests():
|
||||||
|
"""Run all tests."""
|
||||||
|
print("=" * 60)
|
||||||
|
print("V-011 Skills Guard Bypass Fix Tests")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
test_classes = [
|
||||||
|
TestNormalizeInput,
|
||||||
|
TestASTAnalysis,
|
||||||
|
TestScanFileIntegration,
|
||||||
|
TestBypassTechniques,
|
||||||
|
]
|
||||||
|
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for test_class in test_classes:
|
||||||
|
print(f"\n--- {test_class.__name__} ---")
|
||||||
|
instance = test_class()
|
||||||
|
for method_name in dir(instance):
|
||||||
|
if method_name.startswith("test_"):
|
||||||
|
try:
|
||||||
|
method = getattr(instance, method_name)
|
||||||
|
method()
|
||||||
|
passed += 1
|
||||||
|
except AssertionError as e:
|
||||||
|
print(f" ✗ FAILED: {method_name}: {e}")
|
||||||
|
failed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ✗ ERROR: {method_name}: {e}")
|
||||||
|
failed += 1
|
||||||
|
|
||||||
|
print("\n" + "=" * 60)
|
||||||
|
print(f"Results: {passed} passed, {failed} failed")
|
||||||
|
print("=" * 60)
|
||||||
|
|
||||||
|
if failed > 0:
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print("\n✓ All V-011 bypass protection tests passed!")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
run_all_tests()
|
||||||
Reference in New Issue
Block a user