diff --git a/tools/skills_guard.py b/tools/skills_guard.py index d22b7d294..f2fa17060 100644 --- a/tools/skills_guard.py +++ b/tools/skills_guard.py @@ -3,10 +3,11 @@ Skills Guard — Security scanner for externally-sourced skills. Every skill downloaded from a registry passes through this scanner before -installation. It uses regex-based static analysis to detect known-bad patterns -(data exfiltration, prompt injection, destructive commands, persistence, etc.) -and a trust-aware install policy that determines whether a skill is allowed -based on both the scan verdict and the source's trust level. +installation. It uses regex-based static analysis and AST analysis to detect +known-bad patterns (data exfiltration, prompt injection, destructive commands, +persistence, obfuscation, etc.) and a trust-aware install policy that determines +whether a skill is allowed based on both the scan verdict and the source's +trust level. Trust levels: - builtin: Ships with Hermes. Never scanned, always trusted. @@ -22,12 +23,14 @@ Usage: print(format_scan_report(result)) """ -import re +import ast import hashlib +import re +import unicodedata from dataclasses import dataclass, field from datetime import datetime, timezone from pathlib import Path -from typing import List, Tuple +from typing import List, Set, Tuple @@ -501,7 +504,25 @@ SUSPICIOUS_BINARY_EXTENSIONS = { '.msi', '.dmg', '.app', '.deb', '.rpm', } + +# --------------------------------------------------------------------------- +# Input normalization for bypass detection +# --------------------------------------------------------------------------- + # Zero-width and invisible unicode characters used for injection +# These are removed during normalization +ZERO_WIDTH_CHARS = frozenset({ + '\u200b', # zero-width space + '\u200c', # zero-width non-joiner + '\u200d', # zero-width joiner + '\u2060', # word joiner + '\u2062', # invisible times + '\u2063', # invisible separator + '\u2064', # invisible plus + '\ufeff', # zero-width no-break space (BOM) +}) + +# Extended invisible characters for detection (reporting only) INVISIBLE_CHARS = { '\u200b', # zero-width space '\u200c', # zero-width non-joiner @@ -522,6 +543,311 @@ INVISIBLE_CHARS = { '\u2069', # pop directional isolate } +# Unicode homoglyph mapping for common confusable characters +# Maps lookalike characters to their ASCII equivalents +HOMOGLYPH_MAP = str.maketrans({ + # Fullwidth Latin + '\uff45': 'e', '\uff56': 'v', '\uff41': 'a', '\uff4c': 'l', # eval -> eval + '\uff25': 'e', '\uff36': 'v', '\uff21': 'a', '\uff2c': 'l', # EVAL -> eval + '\uff4f': 'o', '\uff53': 's', '\uff58': 'x', '\uff43': 'c', # osxc + '\uff2f': 'o', '\uff33': 's', '\uff38': 'x', '\uff23': 'c', # OSXC + # Cyrillic lookalikes + '\u0435': 'e', # Cyrillic е -> Latin e + '\u0430': 'a', # Cyrillic а -> Latin a + '\u043e': 'o', # Cyrillic о -> Latin o + '\u0441': 'c', # Cyrillic с -> Latin c + '\u0445': 'x', # Cyrillic х -> Latin x + '\u0440': 'p', # Cyrillic р -> Latin p + '\u0456': 'i', # Cyrillic і -> Latin i (U+0456) + '\u0415': 'e', # Cyrillic Е -> Latin e + '\u0410': 'a', # Cyrillic А -> Latin a + '\u041e': 'o', # Cyrillic О -> Latin o + '\u0421': 'c', # Cyrillic С -> Latin c + '\u0425': 'x', # Cyrillic Х -> Latin x + '\u0420': 'p', # Cyrillic Р -> Latin p + '\u0406': 'i', # Cyrillic І -> Latin I (U+0406) + # Greek lookalikes + '\u03bf': 'o', # Greek omicron -> Latin o + '\u03c1': 'p', # Greek rho -> Latin p + '\u03b1': 'a', # Greek alpha -> Latin a + '\u03b5': 'e', # Greek epsilon -> Latin e +}) + + +def normalize_input(text: str) -> str: + """ + Normalize input text to defeat obfuscation attempts. + + Applies: + 1. Removal of zero-width characters (U+200B, U+200C, U+200D, U+FEFF, etc.) + 2. NFKC Unicode normalization (decomposes + canonicalizes) + 3. Case folding (lowercase) + 4. Homoglyph substitution (Cyrillic, fullwidth, Greek lookalikes) + + Args: + text: The input text to normalize + + Returns: + Normalized text with obfuscation removed + """ + # Step 1: Remove zero-width characters + for char in ZERO_WIDTH_CHARS: + text = text.replace(char, '') + + # Step 2: NFKC normalization (decomposes characters, canonicalizes) + text = unicodedata.normalize('NFKC', text) + + # Step 3: Homoglyph substitution (before case folding for fullwidth) + text = text.translate(HOMOGLYPH_MAP) + + # Step 4: Case folding (lowercase) + text = text.casefold() + + return text + + +# --------------------------------------------------------------------------- +# AST-based Python security analysis +# --------------------------------------------------------------------------- + +class PythonSecurityAnalyzer(ast.NodeVisitor): + """ + AST visitor that detects obfuscated Python code execution patterns. + + Detects: + - Direct dangerous calls: eval(), exec(), compile(), __import__() + - Dynamic access: getattr(__builtins__, ...), globals()['eval'] + - String concatenation obfuscation: 'e'+'v'+'a'+'l' + - Encoded attribute access via subscripts + """ + + # Dangerous builtins that can execute arbitrary code + DANGEROUS_BUILTINS: Set[str] = { + 'eval', 'exec', 'compile', '__import__', + 'open', 'execfile', # Python 2 compatibility concerns + } + + def __init__(self, source_lines: List[str], file_path: str): + self.findings: List[Finding] = [] + self.source_lines = source_lines + self.file_path = file_path + self.line_offsets = self._build_line_offsets() + + def _build_line_offsets(self) -> List[int]: + """Build offset map for converting absolute position to line number.""" + offsets = [0] + for line in self.source_lines: + offsets.append(offsets[-1] + len(line) + 1) # +1 for newline + return offsets + + def _get_line_from_offset(self, offset: int) -> int: + """Convert absolute character offset to 1-based line number.""" + for i, start_offset in enumerate(self.line_offsets): + if offset < start_offset: + return max(1, i) + return len(self.line_offsets) + + def _get_line_content(self, lineno: int) -> str: + """Get the content of a specific line (1-based).""" + if 1 <= lineno <= len(self.source_lines): + return self.source_lines[lineno - 1] + return "" + + def _add_finding(self, pattern_id: str, severity: str, category: str, + node: ast.AST, description: str) -> None: + """Add a finding for a detected pattern.""" + lineno = getattr(node, 'lineno', 1) + line_content = self._get_line_content(lineno).strip() + if len(line_content) > 120: + line_content = line_content[:117] + "..." + + self.findings.append(Finding( + pattern_id=pattern_id, + severity=severity, + category=category, + file=self.file_path, + line=lineno, + match=line_content, + description=description, + )) + + def _is_string_concat(self, node: ast.AST) -> bool: + """Check if node represents a string concatenation operation.""" + if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add): + return self._is_string_concat(node.left) or self._is_string_concat(node.right) + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return True + if isinstance(node, ast.JoinedStr): + return True + return False + + def _concat_to_string(self, node: ast.AST) -> str: + """Try to extract the concatenated string value from a BinOp chain.""" + if isinstance(node, ast.Constant) and isinstance(node.value, str): + return node.value + if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add): + return self._concat_to_string(node.left) + self._concat_to_string(node.right) + return "" + + def visit_Call(self, node: ast.Call) -> None: + """Detect dangerous function calls including obfuscated variants.""" + func = node.func + + # Direct call: eval(...), exec(...), etc. + if isinstance(func, ast.Name): + func_name = func.id + if func_name in self.DANGEROUS_BUILTINS: + self._add_finding( + f"ast_dangerous_call_{func_name}", + "high", "obfuscation", node, + f"Dangerous builtin call: {func_name}()" + ) + + # getattr(__builtins__, ...) pattern + if isinstance(func, ast.Name) and func.id == 'getattr': + if len(node.args) >= 2: + first_arg = node.args[0] + second_arg = node.args[1] + + # Check for getattr(__builtins__, ...) + if (isinstance(first_arg, ast.Name) and + first_arg.id in ('__builtins__', 'builtins')): + self._add_finding( + "ast_getattr_builtins", "critical", "obfuscation", node, + "Dynamic access to builtins via getattr() (evasion technique)" + ) + + # Check for getattr(..., 'eval') or getattr(..., 'exec') + if isinstance(second_arg, ast.Constant) and isinstance(second_arg.value, str): + if second_arg.value in self.DANGEROUS_BUILTINS: + self._add_finding( + f"ast_getattr_{second_arg.value}", "critical", "obfuscation", node, + f"Dynamic retrieval of {second_arg.value} via getattr()" + ) + + # globals()[...] or locals()[...] pattern when called + # AST structure: Call(func=Subscript(value=Call(func=Name(id='globals')), slice=Constant('eval'))) + if isinstance(func, ast.Subscript): + subscript_value = func.value + # Check if subscript value is a call to globals() or locals() + if (isinstance(subscript_value, ast.Call) and + isinstance(subscript_value.func, ast.Name) and + subscript_value.func.id in ('globals', 'locals')): + self._add_finding( + "ast_dynamic_global_access", "critical", "obfuscation", node, + f"Dynamic function call via {subscript_value.func.id}()[...] (evasion technique)" + ) + # Also check for direct globals[...] (without call, less common but possible) + elif isinstance(subscript_value, ast.Name) and subscript_value.id in ('globals', 'locals'): + self._add_finding( + "ast_dynamic_global_access", "critical", "obfuscation", node, + f"Dynamic function call via {subscript_value.id}[...] (evasion technique)" + ) + + # Detect string concatenation in arguments (e.g., 'e'+'v'+'a'+'l') + for arg in node.args: + if self._is_string_concat(arg): + concat_str = self._concat_to_string(arg) + normalized = normalize_input(concat_str) + if normalized in self.DANGEROUS_BUILTINS: + self._add_finding( + f"ast_concat_{normalized}", "critical", "obfuscation", node, + f"String concatenation obfuscation building '{normalized}'" + ) + + self.generic_visit(node) + + def visit_Subscript(self, node: ast.Subscript) -> None: + """Detect globals()['eval'] / locals()['exec'] patterns.""" + # Check for globals()[...] or locals()[...] + # AST structure for `globals()['eval']`: Subscript(value=Call(func=Name(id='globals')), slice=Constant('eval')) + subscript_target = node.value + globals_or_locals = None + + # Check if subscript target is a call to globals() or locals() + if isinstance(subscript_target, ast.Call) and isinstance(subscript_target.func, ast.Name): + if subscript_target.func.id in ('globals', 'locals'): + globals_or_locals = subscript_target.func.id + # Also handle direct globals[...] without call (less common) + elif isinstance(subscript_target, ast.Name) and subscript_target.id in ('globals', 'locals'): + globals_or_locals = subscript_target.id + + if globals_or_locals: + # Check the subscript value + if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str): + slice_val = node.slice.value + if slice_val in self.DANGEROUS_BUILTINS: + self._add_finding( + f"ast_{globals_or_locals}_subscript_{slice_val}", + "critical", "obfuscation", node, + f"Dynamic access to {slice_val} via {globals_or_locals}()['{slice_val}']" + ) + # String concatenation in subscript: globals()['e'+'v'+'a'+'l'] + elif isinstance(node.slice, ast.BinOp): + concat_str = self._concat_to_string(node.slice) + normalized = normalize_input(concat_str) + if normalized in self.DANGEROUS_BUILTINS: + self._add_finding( + f"ast_{globals_or_locals}_concat_{normalized}", + "critical", "obfuscation", node, + f"String concatenation obfuscation via {globals_or_locals}()['...']" + ) + + # Check for __builtins__[...] + if isinstance(node.value, ast.Name) and node.value.id == '__builtins__': + self._add_finding( + "ast_builtins_subscript", "high", "obfuscation", node, + "Direct subscript access to __builtins__" + ) + + self.generic_visit(node) + + def visit_BinOp(self, node: ast.BinOp) -> None: + """Detect string concatenation building dangerous function names.""" + if isinstance(node.op, ast.Add): + concat_str = self._concat_to_string(node) + normalized = normalize_input(concat_str) + if normalized in self.DANGEROUS_BUILTINS: + self._add_finding( + f"ast_string_concat_{normalized}", "high", "obfuscation", node, + f"String concatenation building '{normalized}' (possible obfuscation)" + ) + + self.generic_visit(node) + + def visit_Attribute(self, node: ast.Attribute) -> None: + """Detect obj.eval, obj.exec patterns.""" + if node.attr in self.DANGEROUS_BUILTINS: + self._add_finding( + f"ast_attr_{node.attr}", "medium", "obfuscation", node, + f"Access to .{node.attr} attribute (context-dependent risk)" + ) + self.generic_visit(node) + + +def analyze_python_ast(content: str, file_path: str) -> List[Finding]: + """ + Parse Python code and analyze its AST for security issues. + + Args: + content: The Python source code to analyze + file_path: Path to the file (for reporting) + + Returns: + List of findings from AST analysis + """ + lines = content.split('\n') + + try: + tree = ast.parse(content) + except SyntaxError: + # If we can't parse, return empty findings + return [] + + analyzer = PythonSecurityAnalyzer(lines, file_path) + analyzer.visit(tree) + return analyzer.findings + # --------------------------------------------------------------------------- # Scanning functions @@ -529,7 +855,12 @@ INVISIBLE_CHARS = { def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]: """ - Scan a single file for threat patterns and invisible unicode characters. + Scan a single file for threat patterns, obfuscation, and invisible unicode. + + Performs: + 1. Invisible unicode character detection (on original content) + 2. AST analysis for Python files (detects obfuscated execution patterns) + 3. Regex pattern matching on normalized content (catches obfuscated variants) Args: file_path: Absolute path to the file @@ -553,27 +884,7 @@ def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]: lines = content.split('\n') seen = set() # (pattern_id, line_number) for deduplication - # Regex pattern matching - for pattern, pid, severity, category, description in THREAT_PATTERNS: - for i, line in enumerate(lines, start=1): - if (pid, i) in seen: - continue - if re.search(pattern, line, re.IGNORECASE): - seen.add((pid, i)) - matched_text = line.strip() - if len(matched_text) > 120: - matched_text = matched_text[:117] + "..." - findings.append(Finding( - pattern_id=pid, - severity=severity, - category=category, - file=rel_path, - line=i, - match=matched_text, - description=description, - )) - - # Invisible unicode character detection + # Step 1: Invisible unicode character detection (on original) for i, line in enumerate(lines, start=1): for char in INVISIBLE_CHARS: if char in line: @@ -589,6 +900,38 @@ def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]: )) break # one finding per line for invisible chars + # Step 2: AST analysis for Python files + if file_path.suffix.lower() == '.py': + ast_findings = analyze_python_ast(content, rel_path) + findings.extend(ast_findings) + + # Step 3: Normalize content and run regex patterns + # This catches obfuscated variants like Cyrillic homoglyphs, fullwidth, etc. + normalized_content = normalize_input(content) + normalized_lines = normalized_content.split('\n') + + # Map normalized line numbers to original line numbers (they should match) + for pattern, pid, severity, category, description in THREAT_PATTERNS: + for i, norm_line in enumerate(normalized_lines, start=1): + if (pid, i) in seen: + continue + if re.search(pattern, norm_line, re.IGNORECASE): + seen.add((pid, i)) + # Show original line content for context + original_line = lines[i - 1] if i <= len(lines) else norm_line + matched_text = original_line.strip() + if len(matched_text) > 120: + matched_text = matched_text[:117] + "..." + findings.append(Finding( + pattern_id=pid, + severity=severity, + category=category, + file=rel_path, + line=i, + match=matched_text, + description=description, + )) + return findings @@ -598,8 +941,17 @@ def scan_skill(skill_path: Path, source: str = "community") -> ScanResult: Performs: 1. Structural checks (file count, total size, binary files, symlinks) - 2. Regex pattern matching on all text files - 3. Invisible unicode character detection + 2. Unicode normalization to defeat obfuscation (NFKC, homoglyphs, zero-width) + 3. AST analysis for Python files (detects dynamic execution patterns) + 4. Regex pattern matching on normalized content + 5. Invisible unicode character detection + + V-011 Bypass Protection: + - Unicode homoglyphs (Cyrillic, fullwidth, Greek lookalikes) + - Zero-width character injection (U+200B, U+200C, U+200D, U+FEFF) + - Case manipulation (EvAl, ExEc) + - String concatenation obfuscation ('e'+'v'+'a'+'l') + - Dynamic execution patterns (globals()['eval'], getattr(__builtins__, 'exec')) Args: skill_path: Path to the skill directory (must contain SKILL.md) diff --git a/tools/test_skills_guard_v011.py b/tools/test_skills_guard_v011.py new file mode 100644 index 000000000..bf541a5b2 --- /dev/null +++ b/tools/test_skills_guard_v011.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 +""" +Tests for V-011 Skills Guard Bypass fix. + +Tests all bypass techniques: +1. Unicode encoding tricks (fullwidth characters, Cyrillic homoglyphs) +2. Case manipulation (EvAl, ExEc) +3. Zero-width characters (U+200B, U+200C, U+200D, U+FEFF) +4. Dynamic execution obfuscation: globals()['ev'+'al'], getattr(__builtins__, 'exec') +5. String concatenation: 'e'+'v'+'a'+'l' +""" + +import sys +import tempfile +from pathlib import Path + +# Add parent directory to path +sys.path.insert(0, str(Path(__file__).parent)) + +from skills_guard import ( + normalize_input, + analyze_python_ast, + scan_file, + ZERO_WIDTH_CHARS, + HOMOGLYPH_MAP, +) + + +class TestNormalizeInput: + """Test input normalization for obfuscation removal.""" + + def test_zero_width_removal(self): + """Test removal of zero-width characters.""" + # U+200B zero-width space + obfuscated = "ev\u200bal" + normalized = normalize_input(obfuscated) + assert normalized == "eval", f"Expected 'eval', got '{normalized}'" + + # Multiple zero-width characters + obfuscated = "e\u200bx\u200ce\u200dc" + normalized = normalize_input(obfuscated) + assert normalized == "exec", f"Expected 'exec', got '{normalized}'" + + # U+FEFF BOM + obfuscated = "\ufeffeval" + normalized = normalize_input(obfuscated) + assert normalized == "eval", f"Expected 'eval', got '{normalized}'" + + print("✓ Zero-width character removal tests passed") + + def test_case_folding(self): + """Test case folding (lowercase conversion).""" + test_cases = [ + ("EvAl", "eval"), + ("EXEC", "exec"), + ("CoMpIlE", "compile"), + ("GetAttr", "getattr"), + ] + for input_str, expected in test_cases: + normalized = normalize_input(input_str) + assert normalized == expected, f"Expected '{expected}', got '{normalized}'" + + print("✓ Case folding tests passed") + + def test_fullwidth_normalization(self): + """Test fullwidth character normalization.""" + # Fullwidth Latin characters + test_cases = [ + ("\uff45\uff56\uff41\uff4c", "eval"), # eval + ("\uff25\uff36\uff21\uff2c", "eval"), # EVAL (uppercase fullwidth) + ("\uff45\uff58\uff45\uff43", "exec"), # exec + ("\uff4f\uff53", "os"), # os + ] + for input_str, expected in test_cases: + normalized = normalize_input(input_str) + assert normalized == expected, f"Expected '{expected}', got '{normalized}'" + + print("✓ Fullwidth normalization tests passed") + + def test_cyrillic_homoglyphs(self): + """Test Cyrillic lookalike character normalization.""" + # Cyrillic е (U+0435) looks like Latin e (U+0065) + test_cases = [ + ("\u0435val", "eval"), # еval (Cyrillic е) + ("\u0435x\u0435c", "exec"), # еxеc (Cyrillic е's) + ("\u0430\u0435\u0456\u043e", "aeio"), # аеіо (all Cyrillic) + ("g\u0435tattr", "getattr"), # gеtattr (Cyrillic е) + ] + for input_str, expected in test_cases: + normalized = normalize_input(input_str) + assert normalized == expected, f"Expected '{expected}', got '{normalized}'" + + print("✓ Cyrillic homoglyph tests passed") + + def test_combined_obfuscation(self): + """Test combined obfuscation techniques.""" + # Mix of case, zero-width, and homoglyphs + obfuscated = "E\u200bV\u0430L" # E + ZWS + V + Cyrillic а + L + normalized = normalize_input(obfuscated) + assert normalized == "eval", f"Expected 'eval', got '{normalized}'" + + print("✓ Combined obfuscation tests passed") + + +class TestASTAnalysis: + """Test AST-based security analysis.""" + + def test_direct_dangerous_calls(self): + """Test detection of direct eval/exec/compile calls.""" + code = "eval('1+1')" + findings = analyze_python_ast(code, "test.py") + assert any("eval" in f.pattern_id for f in findings), "Should detect eval() call" + + code = "exec('print(1)')" + findings = analyze_python_ast(code, "test.py") + assert any("exec" in f.pattern_id for f in findings), "Should detect exec() call" + + code = "compile('x', '', 'exec')" + findings = analyze_python_ast(code, "test.py") + assert any("compile" in f.pattern_id for f in findings), "Should detect compile() call" + + print("✓ Direct dangerous call detection tests passed") + + def test_getattr_builtins_pattern(self): + """Test detection of getattr(__builtins__, ...) pattern.""" + code = "getattr(__builtins__, 'eval')" + findings = analyze_python_ast(code, "test.py") + assert any("getattr_builtins" in f.pattern_id for f in findings), \ + "Should detect getattr(__builtins__, ...) pattern" + + code = "getattr(__builtins__, 'exec')" + findings = analyze_python_ast(code, "test.py") + assert any("getattr_exec" in f.pattern_id for f in findings), \ + "Should detect getattr(..., 'exec')" + + print("✓ getattr(__builtins__, ...) detection tests passed") + + def test_globals_subscript_pattern(self): + """Test detection of globals()['eval'] pattern.""" + code = "globals()['eval']('1+1')" + findings = analyze_python_ast(code, "test.py") + assert any("globals" in f.pattern_id for f in findings), \ + "Should detect globals()['eval'] pattern" + + code = "locals()['exec']('print(1)')" + findings = analyze_python_ast(code, "test.py") + assert any("locals" in f.pattern_id for f in findings), \ + "Should detect locals()['exec'] pattern" + + print("✓ globals()/locals() subscript detection tests passed") + + def test_string_concatenation_obfuscation(self): + """Test detection of string concatenation obfuscation.""" + # Simple concatenation + code = "('e'+'v'+'a'+'l')('1+1')" + findings = analyze_python_ast(code, "test.py") + assert any("concat" in f.pattern_id for f in findings), \ + "Should detect string concatenation obfuscation" + + # Concatenation in globals subscript + code = "globals()['e'+'v'+'a'+'l']('1+1')" + findings = analyze_python_ast(code, "test.py") + assert any("concat" in f.pattern_id for f in findings), \ + "Should detect concat in globals subscript" + + print("✓ String concatenation obfuscation detection tests passed") + + def test_dynamic_global_call(self): + """Test detection of dynamic calls via globals().""" + code = "globals()['eval']('1+1')" + findings = analyze_python_ast(code, "test.py") + assert any("dynamic_global" in f.pattern_id for f in findings), \ + "Should detect dynamic global access" + + print("✓ Dynamic global call detection tests passed") + + def test_legitimate_code_not_flagged(self): + """Test that legitimate code is not flagged.""" + # Normal function definition + code = """ +def calculate(x, y): + result = x + y + return result + +class MyClass: + def method(self): + return "hello" + +import os +print(os.path.join("a", "b")) +""" + findings = analyze_python_ast(code, "test.py") + # Should not have any obfuscation-related findings + obfuscation_findings = [f for f in findings if f.category == "obfuscation"] + assert len(obfuscation_findings) == 0, \ + f"Legitimate code should not be flagged, got: {[f.description for f in obfuscation_findings]}" + + print("✓ Legitimate code not flagged tests passed") + + +class TestScanFileIntegration: + """Integration tests for scan_file with new detection.""" + + def _create_temp_file(self, content: str, suffix: str = ".py") -> Path: + """Create a temporary file with the given content.""" + with tempfile.NamedTemporaryFile(mode='w', suffix=suffix, delete=False) as f: + f.write(content) + return Path(f.name) + + def test_unicode_obfuscation_detection(self): + """Test that obfuscated eval is detected via normalization.""" + # Fullwidth eval + code = "\uff45\uff56\uff41\uff4c('1+1')" # eval + path = self._create_temp_file(code) + try: + findings = scan_file(path, "test.py") + # Should detect via regex on normalized content + assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower() + for f in findings), \ + f"Should detect fullwidth eval, got: {[f.pattern_id for f in findings]}" + finally: + path.unlink() + + print("✓ Unicode obfuscation detection tests passed") + + def test_zero_width_character_detection(self): + """Test detection of zero-width characters.""" + code = "ev\u200bal('1+1')" # eval with zero-width space + path = self._create_temp_file(code) + try: + findings = scan_file(path, "test.py") + assert any("invisible_unicode" in f.pattern_id for f in findings), \ + f"Should detect invisible unicode, got: {[f.pattern_id for f in findings]}" + finally: + path.unlink() + + print("✓ Zero-width character detection tests passed") + + def test_ast_and_regex_combined(self): + """Test that both AST and regex detection work together.""" + code = """ +# Obfuscated eval via string concat +func = ('e'+'v'+'a'+'l') +result = func('1+1') + +# Also fullwidth in comment: eval +""" + path = self._create_temp_file(code) + try: + findings = scan_file(path, "test.py") + ast_findings = [f for f in findings if f.pattern_id.startswith("ast_")] + assert len(ast_findings) > 0, "Should have AST-based findings" + finally: + path.unlink() + + print("✓ AST and regex combined detection tests passed") + + def test_cyrillic_in_code_detection(self): + """Test detection of Cyrillic homoglyphs in code.""" + # Using Cyrillic е (U+0435) instead of Latin e (U+0065) + code = "\u0435val('1+1')" # еval with Cyrillic е + path = self._create_temp_file(code) + try: + findings = scan_file(path, "test.py") + # After normalization, regex should catch this + assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower() + for f in findings), \ + f"Should detect Cyrillic obfuscated eval, got: {[f.pattern_id for f in findings]}" + finally: + path.unlink() + + print("✓ Cyrillic homoglyph detection tests passed") + + +class TestBypassTechniques: + """Test specific bypass techniques mentioned in the vulnerability report.""" + + def test_bypass_1_unicode_encoding(self): + """Bypass 1: Unicode encoding tricks (fullwidth characters).""" + # Fullwidth characters: eval + fullwidth_eval = "\uff45\uff56\uff41\uff4c" + normalized = normalize_input(fullwidth_eval) + assert normalized == "eval", "Fullwidth should normalize to ASCII" + + # Fullwidth exec: exec + fullwidth_exec = "\uff45\uff58\uff45\uff43" + normalized = normalize_input(fullwidth_exec) + assert normalized == "exec", "Fullwidth exec should normalize" + + print("✓ Bypass 1: Unicode encoding tricks blocked") + + def test_bypass_2_case_manipulation(self): + """Bypass 2: Case manipulation (EvAl, ExEc).""" + test_cases = ["EvAl", "ExEc", "CoMpIlE", "EVA", "exec"] + for case in test_cases: + normalized = normalize_input(case) + expected = case.lower() + assert normalized == expected, f"Case folding failed for {case}" + + print("✓ Bypass 2: Case manipulation blocked") + + def test_bypass_3_zero_width(self): + """Bypass 3: Zero-width characters (U+200B, U+200C, U+200D, U+FEFF).""" + # Test all zero-width characters are removed + for char in ZERO_WIDTH_CHARS: + obfuscated = f"ev{char}al" + normalized = normalize_input(obfuscated) + assert normalized == "eval", f"Zero-width char U+{ord(char):04X} not removed" + + print("✓ Bypass 3: Zero-width character injection blocked") + + def test_bypass_4_dynamic_execution(self): + """Bypass 4: Dynamic execution obfuscation.""" + # globals()['eval'] + code1 = "globals()['eval']('1+1')" + findings1 = analyze_python_ast(code1, "test.py") + assert len([f for f in findings1 if "globals" in f.pattern_id]) > 0, \ + "globals()['eval'] should be detected" + + # getattr(__builtins__, 'exec') + code2 = "getattr(__builtins__, 'exec')" + findings2 = analyze_python_ast(code2, "test.py") + assert any("getattr_builtins" in f.pattern_id for f in findings2), \ + "getattr(__builtins__, ...) should be detected" + + print("✓ Bypass 4: Dynamic execution obfuscation blocked") + + def test_bypass_5_string_concatenation(self): + """Bypass 5: String concatenation ('e'+'v'+'a'+'l').""" + # AST should detect this + code = "('e'+'v'+'a'+'l')('1+1')" + findings = analyze_python_ast(code, "test.py") + assert any("concat" in f.pattern_id for f in findings), \ + "String concatenation obfuscation should be detected" + + # Also test via globals + code2 = "globals()['e'+'v'+'a'+'l']('1+1')" + findings2 = analyze_python_ast(code2, "test.py") + assert any("concat" in f.pattern_id for f in findings2), \ + "Concat in globals subscript should be detected" + + print("✓ Bypass 5: String concatenation obfuscation blocked") + + def test_cyrillic_homoglyph_bypass(self): + """Test Cyrillic homoglyph bypass (е vs e).""" + # е (U+0435) vs e (U+0065) + cyrillic_e = "\u0435" + latin_e = "e" + + assert cyrillic_e != latin_e, "Cyrillic and Latin e should be different" + + # After normalization, they should be the same + normalized_cyrillic = normalize_input(cyrillic_e) + normalized_latin = normalize_input(latin_e) + assert normalized_cyrillic == normalized_latin == "e", \ + "Cyrillic е should normalize to Latin e" + + # Test full word: еval (with Cyrillic е) + cyrillic_eval = "\u0435val" + normalized = normalize_input(cyrillic_eval) + assert normalized == "eval", "Cyrillic eval should normalize" + + print("✓ Cyrillic homoglyph bypass blocked") + + +def run_all_tests(): + """Run all tests.""" + print("=" * 60) + print("V-011 Skills Guard Bypass Fix Tests") + print("=" * 60) + + test_classes = [ + TestNormalizeInput, + TestASTAnalysis, + TestScanFileIntegration, + TestBypassTechniques, + ] + + passed = 0 + failed = 0 + + for test_class in test_classes: + print(f"\n--- {test_class.__name__} ---") + instance = test_class() + for method_name in dir(instance): + if method_name.startswith("test_"): + try: + method = getattr(instance, method_name) + method() + passed += 1 + except AssertionError as e: + print(f" ✗ FAILED: {method_name}: {e}") + failed += 1 + except Exception as e: + print(f" ✗ ERROR: {method_name}: {e}") + failed += 1 + + print("\n" + "=" * 60) + print(f"Results: {passed} passed, {failed} failed") + print("=" * 60) + + if failed > 0: + sys.exit(1) + else: + print("\n✓ All V-011 bypass protection tests passed!") + sys.exit(0) + + +if __name__ == "__main__": + run_all_tests()