security: fix V-011 Skills Guard Bypass with AST analysis and normalization

This commit is contained in:
Allegro
2026-03-31 18:44:32 +00:00
parent 546b3dd45d
commit 37c75ecd7a
2 changed files with 792 additions and 30 deletions

View File

@@ -3,10 +3,11 @@
Skills Guard — Security scanner for externally-sourced skills. Skills Guard — Security scanner for externally-sourced skills.
Every skill downloaded from a registry passes through this scanner before Every skill downloaded from a registry passes through this scanner before
installation. It uses regex-based static analysis to detect known-bad patterns installation. It uses regex-based static analysis and AST analysis to detect
(data exfiltration, prompt injection, destructive commands, persistence, etc.) known-bad patterns (data exfiltration, prompt injection, destructive commands,
and a trust-aware install policy that determines whether a skill is allowed persistence, obfuscation, etc.) and a trust-aware install policy that determines
based on both the scan verdict and the source's trust level. whether a skill is allowed based on both the scan verdict and the source's
trust level.
Trust levels: Trust levels:
- builtin: Ships with Hermes. Never scanned, always trusted. - builtin: Ships with Hermes. Never scanned, always trusted.
@@ -22,12 +23,14 @@ Usage:
print(format_scan_report(result)) print(format_scan_report(result))
""" """
import re import ast
import hashlib import hashlib
import re
import unicodedata
from dataclasses import dataclass, field from dataclasses import dataclass, field
from datetime import datetime, timezone from datetime import datetime, timezone
from pathlib import Path from pathlib import Path
from typing import List, Tuple from typing import List, Set, Tuple
@@ -501,7 +504,25 @@ SUSPICIOUS_BINARY_EXTENSIONS = {
'.msi', '.dmg', '.app', '.deb', '.rpm', '.msi', '.dmg', '.app', '.deb', '.rpm',
} }
# ---------------------------------------------------------------------------
# Input normalization for bypass detection
# ---------------------------------------------------------------------------
# Zero-width and invisible unicode characters used for injection # Zero-width and invisible unicode characters used for injection
# These are removed during normalization
ZERO_WIDTH_CHARS = frozenset({
'\u200b', # zero-width space
'\u200c', # zero-width non-joiner
'\u200d', # zero-width joiner
'\u2060', # word joiner
'\u2062', # invisible times
'\u2063', # invisible separator
'\u2064', # invisible plus
'\ufeff', # zero-width no-break space (BOM)
})
# Extended invisible characters for detection (reporting only)
INVISIBLE_CHARS = { INVISIBLE_CHARS = {
'\u200b', # zero-width space '\u200b', # zero-width space
'\u200c', # zero-width non-joiner '\u200c', # zero-width non-joiner
@@ -522,6 +543,311 @@ INVISIBLE_CHARS = {
'\u2069', # pop directional isolate '\u2069', # pop directional isolate
} }
# Unicode homoglyph mapping for common confusable characters
# Maps lookalike characters to their ASCII equivalents
HOMOGLYPH_MAP = str.maketrans({
# Fullwidth Latin
'\uff45': 'e', '\uff56': 'v', '\uff41': 'a', '\uff4c': 'l', # -> eval
'\uff25': 'e', '\uff36': 'v', '\uff21': 'a', '\uff2c': 'l', # -> eval
'\uff4f': 'o', '\uff53': 's', '\uff58': 'x', '\uff43': 'c', #
'\uff2f': 'o', '\uff33': 's', '\uff38': 'x', '\uff23': 'c', #
# Cyrillic lookalikes
'\u0435': 'e', # Cyrillic е -> Latin e
'\u0430': 'a', # Cyrillic а -> Latin a
'\u043e': 'o', # Cyrillic о -> Latin o
'\u0441': 'c', # Cyrillic с -> Latin c
'\u0445': 'x', # Cyrillic х -> Latin x
'\u0440': 'p', # Cyrillic р -> Latin p
'\u0456': 'i', # Cyrillic і -> Latin i (U+0456)
'\u0415': 'e', # Cyrillic Е -> Latin e
'\u0410': 'a', # Cyrillic А -> Latin a
'\u041e': 'o', # Cyrillic О -> Latin o
'\u0421': 'c', # Cyrillic С -> Latin c
'\u0425': 'x', # Cyrillic Х -> Latin x
'\u0420': 'p', # Cyrillic Р -> Latin p
'\u0406': 'i', # Cyrillic І -> Latin I (U+0406)
# Greek lookalikes
'\u03bf': 'o', # Greek omicron -> Latin o
'\u03c1': 'p', # Greek rho -> Latin p
'\u03b1': 'a', # Greek alpha -> Latin a
'\u03b5': 'e', # Greek epsilon -> Latin e
})
def normalize_input(text: str) -> str:
"""
Normalize input text to defeat obfuscation attempts.
Applies:
1. Removal of zero-width characters (U+200B, U+200C, U+200D, U+FEFF, etc.)
2. NFKC Unicode normalization (decomposes + canonicalizes)
3. Case folding (lowercase)
4. Homoglyph substitution (Cyrillic, fullwidth, Greek lookalikes)
Args:
text: The input text to normalize
Returns:
Normalized text with obfuscation removed
"""
# Step 1: Remove zero-width characters
for char in ZERO_WIDTH_CHARS:
text = text.replace(char, '')
# Step 2: NFKC normalization (decomposes characters, canonicalizes)
text = unicodedata.normalize('NFKC', text)
# Step 3: Homoglyph substitution (before case folding for fullwidth)
text = text.translate(HOMOGLYPH_MAP)
# Step 4: Case folding (lowercase)
text = text.casefold()
return text
# ---------------------------------------------------------------------------
# AST-based Python security analysis
# ---------------------------------------------------------------------------
class PythonSecurityAnalyzer(ast.NodeVisitor):
"""
AST visitor that detects obfuscated Python code execution patterns.
Detects:
- Direct dangerous calls: eval(), exec(), compile(), __import__()
- Dynamic access: getattr(__builtins__, ...), globals()['eval']
- String concatenation obfuscation: 'e'+'v'+'a'+'l'
- Encoded attribute access via subscripts
"""
# Dangerous builtins that can execute arbitrary code
DANGEROUS_BUILTINS: Set[str] = {
'eval', 'exec', 'compile', '__import__',
'open', 'execfile', # Python 2 compatibility concerns
}
def __init__(self, source_lines: List[str], file_path: str):
self.findings: List[Finding] = []
self.source_lines = source_lines
self.file_path = file_path
self.line_offsets = self._build_line_offsets()
def _build_line_offsets(self) -> List[int]:
"""Build offset map for converting absolute position to line number."""
offsets = [0]
for line in self.source_lines:
offsets.append(offsets[-1] + len(line) + 1) # +1 for newline
return offsets
def _get_line_from_offset(self, offset: int) -> int:
"""Convert absolute character offset to 1-based line number."""
for i, start_offset in enumerate(self.line_offsets):
if offset < start_offset:
return max(1, i)
return len(self.line_offsets)
def _get_line_content(self, lineno: int) -> str:
"""Get the content of a specific line (1-based)."""
if 1 <= lineno <= len(self.source_lines):
return self.source_lines[lineno - 1]
return ""
def _add_finding(self, pattern_id: str, severity: str, category: str,
node: ast.AST, description: str) -> None:
"""Add a finding for a detected pattern."""
lineno = getattr(node, 'lineno', 1)
line_content = self._get_line_content(lineno).strip()
if len(line_content) > 120:
line_content = line_content[:117] + "..."
self.findings.append(Finding(
pattern_id=pattern_id,
severity=severity,
category=category,
file=self.file_path,
line=lineno,
match=line_content,
description=description,
))
def _is_string_concat(self, node: ast.AST) -> bool:
"""Check if node represents a string concatenation operation."""
if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
return self._is_string_concat(node.left) or self._is_string_concat(node.right)
if isinstance(node, ast.Constant) and isinstance(node.value, str):
return True
if isinstance(node, ast.JoinedStr):
return True
return False
def _concat_to_string(self, node: ast.AST) -> str:
"""Try to extract the concatenated string value from a BinOp chain."""
if isinstance(node, ast.Constant) and isinstance(node.value, str):
return node.value
if isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
return self._concat_to_string(node.left) + self._concat_to_string(node.right)
return ""
def visit_Call(self, node: ast.Call) -> None:
"""Detect dangerous function calls including obfuscated variants."""
func = node.func
# Direct call: eval(...), exec(...), etc.
if isinstance(func, ast.Name):
func_name = func.id
if func_name in self.DANGEROUS_BUILTINS:
self._add_finding(
f"ast_dangerous_call_{func_name}",
"high", "obfuscation", node,
f"Dangerous builtin call: {func_name}()"
)
# getattr(__builtins__, ...) pattern
if isinstance(func, ast.Name) and func.id == 'getattr':
if len(node.args) >= 2:
first_arg = node.args[0]
second_arg = node.args[1]
# Check for getattr(__builtins__, ...)
if (isinstance(first_arg, ast.Name) and
first_arg.id in ('__builtins__', 'builtins')):
self._add_finding(
"ast_getattr_builtins", "critical", "obfuscation", node,
"Dynamic access to builtins via getattr() (evasion technique)"
)
# Check for getattr(..., 'eval') or getattr(..., 'exec')
if isinstance(second_arg, ast.Constant) and isinstance(second_arg.value, str):
if second_arg.value in self.DANGEROUS_BUILTINS:
self._add_finding(
f"ast_getattr_{second_arg.value}", "critical", "obfuscation", node,
f"Dynamic retrieval of {second_arg.value} via getattr()"
)
# globals()[...] or locals()[...] pattern when called
# AST structure: Call(func=Subscript(value=Call(func=Name(id='globals')), slice=Constant('eval')))
if isinstance(func, ast.Subscript):
subscript_value = func.value
# Check if subscript value is a call to globals() or locals()
if (isinstance(subscript_value, ast.Call) and
isinstance(subscript_value.func, ast.Name) and
subscript_value.func.id in ('globals', 'locals')):
self._add_finding(
"ast_dynamic_global_access", "critical", "obfuscation", node,
f"Dynamic function call via {subscript_value.func.id}()[...] (evasion technique)"
)
# Also check for direct globals[...] (without call, less common but possible)
elif isinstance(subscript_value, ast.Name) and subscript_value.id in ('globals', 'locals'):
self._add_finding(
"ast_dynamic_global_access", "critical", "obfuscation", node,
f"Dynamic function call via {subscript_value.id}[...] (evasion technique)"
)
# Detect string concatenation in arguments (e.g., 'e'+'v'+'a'+'l')
for arg in node.args:
if self._is_string_concat(arg):
concat_str = self._concat_to_string(arg)
normalized = normalize_input(concat_str)
if normalized in self.DANGEROUS_BUILTINS:
self._add_finding(
f"ast_concat_{normalized}", "critical", "obfuscation", node,
f"String concatenation obfuscation building '{normalized}'"
)
self.generic_visit(node)
def visit_Subscript(self, node: ast.Subscript) -> None:
"""Detect globals()['eval'] / locals()['exec'] patterns."""
# Check for globals()[...] or locals()[...]
# AST structure for `globals()['eval']`: Subscript(value=Call(func=Name(id='globals')), slice=Constant('eval'))
subscript_target = node.value
globals_or_locals = None
# Check if subscript target is a call to globals() or locals()
if isinstance(subscript_target, ast.Call) and isinstance(subscript_target.func, ast.Name):
if subscript_target.func.id in ('globals', 'locals'):
globals_or_locals = subscript_target.func.id
# Also handle direct globals[...] without call (less common)
elif isinstance(subscript_target, ast.Name) and subscript_target.id in ('globals', 'locals'):
globals_or_locals = subscript_target.id
if globals_or_locals:
# Check the subscript value
if isinstance(node.slice, ast.Constant) and isinstance(node.slice.value, str):
slice_val = node.slice.value
if slice_val in self.DANGEROUS_BUILTINS:
self._add_finding(
f"ast_{globals_or_locals}_subscript_{slice_val}",
"critical", "obfuscation", node,
f"Dynamic access to {slice_val} via {globals_or_locals}()['{slice_val}']"
)
# String concatenation in subscript: globals()['e'+'v'+'a'+'l']
elif isinstance(node.slice, ast.BinOp):
concat_str = self._concat_to_string(node.slice)
normalized = normalize_input(concat_str)
if normalized in self.DANGEROUS_BUILTINS:
self._add_finding(
f"ast_{globals_or_locals}_concat_{normalized}",
"critical", "obfuscation", node,
f"String concatenation obfuscation via {globals_or_locals}()['...']"
)
# Check for __builtins__[...]
if isinstance(node.value, ast.Name) and node.value.id == '__builtins__':
self._add_finding(
"ast_builtins_subscript", "high", "obfuscation", node,
"Direct subscript access to __builtins__"
)
self.generic_visit(node)
def visit_BinOp(self, node: ast.BinOp) -> None:
"""Detect string concatenation building dangerous function names."""
if isinstance(node.op, ast.Add):
concat_str = self._concat_to_string(node)
normalized = normalize_input(concat_str)
if normalized in self.DANGEROUS_BUILTINS:
self._add_finding(
f"ast_string_concat_{normalized}", "high", "obfuscation", node,
f"String concatenation building '{normalized}' (possible obfuscation)"
)
self.generic_visit(node)
def visit_Attribute(self, node: ast.Attribute) -> None:
"""Detect obj.eval, obj.exec patterns."""
if node.attr in self.DANGEROUS_BUILTINS:
self._add_finding(
f"ast_attr_{node.attr}", "medium", "obfuscation", node,
f"Access to .{node.attr} attribute (context-dependent risk)"
)
self.generic_visit(node)
def analyze_python_ast(content: str, file_path: str) -> List[Finding]:
"""
Parse Python code and analyze its AST for security issues.
Args:
content: The Python source code to analyze
file_path: Path to the file (for reporting)
Returns:
List of findings from AST analysis
"""
lines = content.split('\n')
try:
tree = ast.parse(content)
except SyntaxError:
# If we can't parse, return empty findings
return []
analyzer = PythonSecurityAnalyzer(lines, file_path)
analyzer.visit(tree)
return analyzer.findings
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Scanning functions # Scanning functions
@@ -529,7 +855,12 @@ INVISIBLE_CHARS = {
def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]: def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
""" """
Scan a single file for threat patterns and invisible unicode characters. Scan a single file for threat patterns, obfuscation, and invisible unicode.
Performs:
1. Invisible unicode character detection (on original content)
2. AST analysis for Python files (detects obfuscated execution patterns)
3. Regex pattern matching on normalized content (catches obfuscated variants)
Args: Args:
file_path: Absolute path to the file file_path: Absolute path to the file
@@ -553,27 +884,7 @@ def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
lines = content.split('\n') lines = content.split('\n')
seen = set() # (pattern_id, line_number) for deduplication seen = set() # (pattern_id, line_number) for deduplication
# Regex pattern matching # Step 1: Invisible unicode character detection (on original)
for pattern, pid, severity, category, description in THREAT_PATTERNS:
for i, line in enumerate(lines, start=1):
if (pid, i) in seen:
continue
if re.search(pattern, line, re.IGNORECASE):
seen.add((pid, i))
matched_text = line.strip()
if len(matched_text) > 120:
matched_text = matched_text[:117] + "..."
findings.append(Finding(
pattern_id=pid,
severity=severity,
category=category,
file=rel_path,
line=i,
match=matched_text,
description=description,
))
# Invisible unicode character detection
for i, line in enumerate(lines, start=1): for i, line in enumerate(lines, start=1):
for char in INVISIBLE_CHARS: for char in INVISIBLE_CHARS:
if char in line: if char in line:
@@ -589,6 +900,38 @@ def scan_file(file_path: Path, rel_path: str = "") -> List[Finding]:
)) ))
break # one finding per line for invisible chars break # one finding per line for invisible chars
# Step 2: AST analysis for Python files
if file_path.suffix.lower() == '.py':
ast_findings = analyze_python_ast(content, rel_path)
findings.extend(ast_findings)
# Step 3: Normalize content and run regex patterns
# This catches obfuscated variants like Cyrillic homoglyphs, fullwidth, etc.
normalized_content = normalize_input(content)
normalized_lines = normalized_content.split('\n')
# Map normalized line numbers to original line numbers (they should match)
for pattern, pid, severity, category, description in THREAT_PATTERNS:
for i, norm_line in enumerate(normalized_lines, start=1):
if (pid, i) in seen:
continue
if re.search(pattern, norm_line, re.IGNORECASE):
seen.add((pid, i))
# Show original line content for context
original_line = lines[i - 1] if i <= len(lines) else norm_line
matched_text = original_line.strip()
if len(matched_text) > 120:
matched_text = matched_text[:117] + "..."
findings.append(Finding(
pattern_id=pid,
severity=severity,
category=category,
file=rel_path,
line=i,
match=matched_text,
description=description,
))
return findings return findings
@@ -598,8 +941,17 @@ def scan_skill(skill_path: Path, source: str = "community") -> ScanResult:
Performs: Performs:
1. Structural checks (file count, total size, binary files, symlinks) 1. Structural checks (file count, total size, binary files, symlinks)
2. Regex pattern matching on all text files 2. Unicode normalization to defeat obfuscation (NFKC, homoglyphs, zero-width)
3. Invisible unicode character detection 3. AST analysis for Python files (detects dynamic execution patterns)
4. Regex pattern matching on normalized content
5. Invisible unicode character detection
V-011 Bypass Protection:
- Unicode homoglyphs (Cyrillic, fullwidth, Greek lookalikes)
- Zero-width character injection (U+200B, U+200C, U+200D, U+FEFF)
- Case manipulation (EvAl, ExEc)
- String concatenation obfuscation ('e'+'v'+'a'+'l')
- Dynamic execution patterns (globals()['eval'], getattr(__builtins__, 'exec'))
Args: Args:
skill_path: Path to the skill directory (must contain SKILL.md) skill_path: Path to the skill directory (must contain SKILL.md)

View File

@@ -0,0 +1,410 @@
#!/usr/bin/env python3
"""
Tests for V-011 Skills Guard Bypass fix.
Tests all bypass techniques:
1. Unicode encoding tricks (fullwidth characters, Cyrillic homoglyphs)
2. Case manipulation (EvAl, ExEc)
3. Zero-width characters (U+200B, U+200C, U+200D, U+FEFF)
4. Dynamic execution obfuscation: globals()['ev'+'al'], getattr(__builtins__, 'exec')
5. String concatenation: 'e'+'v'+'a'+'l'
"""
import sys
import tempfile
from pathlib import Path
# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))
from skills_guard import (
normalize_input,
analyze_python_ast,
scan_file,
ZERO_WIDTH_CHARS,
HOMOGLYPH_MAP,
)
class TestNormalizeInput:
"""Test input normalization for obfuscation removal."""
def test_zero_width_removal(self):
"""Test removal of zero-width characters."""
# U+200B zero-width space
obfuscated = "ev\u200bal"
normalized = normalize_input(obfuscated)
assert normalized == "eval", f"Expected 'eval', got '{normalized}'"
# Multiple zero-width characters
obfuscated = "e\u200bx\u200ce\u200dc"
normalized = normalize_input(obfuscated)
assert normalized == "exec", f"Expected 'exec', got '{normalized}'"
# U+FEFF BOM
obfuscated = "\ufeffeval"
normalized = normalize_input(obfuscated)
assert normalized == "eval", f"Expected 'eval', got '{normalized}'"
print("✓ Zero-width character removal tests passed")
def test_case_folding(self):
"""Test case folding (lowercase conversion)."""
test_cases = [
("EvAl", "eval"),
("EXEC", "exec"),
("CoMpIlE", "compile"),
("GetAttr", "getattr"),
]
for input_str, expected in test_cases:
normalized = normalize_input(input_str)
assert normalized == expected, f"Expected '{expected}', got '{normalized}'"
print("✓ Case folding tests passed")
def test_fullwidth_normalization(self):
"""Test fullwidth character normalization."""
# Fullwidth Latin characters
test_cases = [
("\uff45\uff56\uff41\uff4c", "eval"), #
("\uff25\uff36\uff21\uff2c", "eval"), # (uppercase fullwidth)
("\uff45\uff58\uff45\uff43", "exec"), #
("\uff4f\uff53", "os"), #
]
for input_str, expected in test_cases:
normalized = normalize_input(input_str)
assert normalized == expected, f"Expected '{expected}', got '{normalized}'"
print("✓ Fullwidth normalization tests passed")
def test_cyrillic_homoglyphs(self):
"""Test Cyrillic lookalike character normalization."""
# Cyrillic е (U+0435) looks like Latin e (U+0065)
test_cases = [
("\u0435val", "eval"), # еval (Cyrillic е)
("\u0435x\u0435c", "exec"), # еxеc (Cyrillic е's)
("\u0430\u0435\u0456\u043e", "aeio"), # аеіо (all Cyrillic)
("g\u0435tattr", "getattr"), # gеtattr (Cyrillic е)
]
for input_str, expected in test_cases:
normalized = normalize_input(input_str)
assert normalized == expected, f"Expected '{expected}', got '{normalized}'"
print("✓ Cyrillic homoglyph tests passed")
def test_combined_obfuscation(self):
"""Test combined obfuscation techniques."""
# Mix of case, zero-width, and homoglyphs
obfuscated = "E\u200bV\u0430L" # E + ZWS + V + Cyrillic а + L
normalized = normalize_input(obfuscated)
assert normalized == "eval", f"Expected 'eval', got '{normalized}'"
print("✓ Combined obfuscation tests passed")
class TestASTAnalysis:
"""Test AST-based security analysis."""
def test_direct_dangerous_calls(self):
"""Test detection of direct eval/exec/compile calls."""
code = "eval('1+1')"
findings = analyze_python_ast(code, "test.py")
assert any("eval" in f.pattern_id for f in findings), "Should detect eval() call"
code = "exec('print(1)')"
findings = analyze_python_ast(code, "test.py")
assert any("exec" in f.pattern_id for f in findings), "Should detect exec() call"
code = "compile('x', '<string>', 'exec')"
findings = analyze_python_ast(code, "test.py")
assert any("compile" in f.pattern_id for f in findings), "Should detect compile() call"
print("✓ Direct dangerous call detection tests passed")
def test_getattr_builtins_pattern(self):
"""Test detection of getattr(__builtins__, ...) pattern."""
code = "getattr(__builtins__, 'eval')"
findings = analyze_python_ast(code, "test.py")
assert any("getattr_builtins" in f.pattern_id for f in findings), \
"Should detect getattr(__builtins__, ...) pattern"
code = "getattr(__builtins__, 'exec')"
findings = analyze_python_ast(code, "test.py")
assert any("getattr_exec" in f.pattern_id for f in findings), \
"Should detect getattr(..., 'exec')"
print("✓ getattr(__builtins__, ...) detection tests passed")
def test_globals_subscript_pattern(self):
"""Test detection of globals()['eval'] pattern."""
code = "globals()['eval']('1+1')"
findings = analyze_python_ast(code, "test.py")
assert any("globals" in f.pattern_id for f in findings), \
"Should detect globals()['eval'] pattern"
code = "locals()['exec']('print(1)')"
findings = analyze_python_ast(code, "test.py")
assert any("locals" in f.pattern_id for f in findings), \
"Should detect locals()['exec'] pattern"
print("✓ globals()/locals() subscript detection tests passed")
def test_string_concatenation_obfuscation(self):
"""Test detection of string concatenation obfuscation."""
# Simple concatenation
code = "('e'+'v'+'a'+'l')('1+1')"
findings = analyze_python_ast(code, "test.py")
assert any("concat" in f.pattern_id for f in findings), \
"Should detect string concatenation obfuscation"
# Concatenation in globals subscript
code = "globals()['e'+'v'+'a'+'l']('1+1')"
findings = analyze_python_ast(code, "test.py")
assert any("concat" in f.pattern_id for f in findings), \
"Should detect concat in globals subscript"
print("✓ String concatenation obfuscation detection tests passed")
def test_dynamic_global_call(self):
"""Test detection of dynamic calls via globals()."""
code = "globals()['eval']('1+1')"
findings = analyze_python_ast(code, "test.py")
assert any("dynamic_global" in f.pattern_id for f in findings), \
"Should detect dynamic global access"
print("✓ Dynamic global call detection tests passed")
def test_legitimate_code_not_flagged(self):
"""Test that legitimate code is not flagged."""
# Normal function definition
code = """
def calculate(x, y):
result = x + y
return result
class MyClass:
def method(self):
return "hello"
import os
print(os.path.join("a", "b"))
"""
findings = analyze_python_ast(code, "test.py")
# Should not have any obfuscation-related findings
obfuscation_findings = [f for f in findings if f.category == "obfuscation"]
assert len(obfuscation_findings) == 0, \
f"Legitimate code should not be flagged, got: {[f.description for f in obfuscation_findings]}"
print("✓ Legitimate code not flagged tests passed")
class TestScanFileIntegration:
"""Integration tests for scan_file with new detection."""
def _create_temp_file(self, content: str, suffix: str = ".py") -> Path:
"""Create a temporary file with the given content."""
with tempfile.NamedTemporaryFile(mode='w', suffix=suffix, delete=False) as f:
f.write(content)
return Path(f.name)
def test_unicode_obfuscation_detection(self):
"""Test that obfuscated eval is detected via normalization."""
# Fullwidth eval
code = "\uff45\uff56\uff41\uff4c('1+1')" #
path = self._create_temp_file(code)
try:
findings = scan_file(path, "test.py")
# Should detect via regex on normalized content
assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower()
for f in findings), \
f"Should detect fullwidth eval, got: {[f.pattern_id for f in findings]}"
finally:
path.unlink()
print("✓ Unicode obfuscation detection tests passed")
def test_zero_width_character_detection(self):
"""Test detection of zero-width characters."""
code = "ev\u200bal('1+1')" # eval with zero-width space
path = self._create_temp_file(code)
try:
findings = scan_file(path, "test.py")
assert any("invisible_unicode" in f.pattern_id for f in findings), \
f"Should detect invisible unicode, got: {[f.pattern_id for f in findings]}"
finally:
path.unlink()
print("✓ Zero-width character detection tests passed")
def test_ast_and_regex_combined(self):
"""Test that both AST and regex detection work together."""
code = """
# Obfuscated eval via string concat
func = ('e'+'v'+'a'+'l')
result = func('1+1')
# Also fullwidth in comment:
"""
path = self._create_temp_file(code)
try:
findings = scan_file(path, "test.py")
ast_findings = [f for f in findings if f.pattern_id.startswith("ast_")]
assert len(ast_findings) > 0, "Should have AST-based findings"
finally:
path.unlink()
print("✓ AST and regex combined detection tests passed")
def test_cyrillic_in_code_detection(self):
"""Test detection of Cyrillic homoglyphs in code."""
# Using Cyrillic е (U+0435) instead of Latin e (U+0065)
code = "\u0435val('1+1')" # еval with Cyrillic е
path = self._create_temp_file(code)
try:
findings = scan_file(path, "test.py")
# After normalization, regex should catch this
assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower()
for f in findings), \
f"Should detect Cyrillic obfuscated eval, got: {[f.pattern_id for f in findings]}"
finally:
path.unlink()
print("✓ Cyrillic homoglyph detection tests passed")
class TestBypassTechniques:
"""Test specific bypass techniques mentioned in the vulnerability report."""
def test_bypass_1_unicode_encoding(self):
"""Bypass 1: Unicode encoding tricks (fullwidth characters)."""
# Fullwidth characters:
fullwidth_eval = "\uff45\uff56\uff41\uff4c"
normalized = normalize_input(fullwidth_eval)
assert normalized == "eval", "Fullwidth should normalize to ASCII"
# Fullwidth exec:
fullwidth_exec = "\uff45\uff58\uff45\uff43"
normalized = normalize_input(fullwidth_exec)
assert normalized == "exec", "Fullwidth exec should normalize"
print("✓ Bypass 1: Unicode encoding tricks blocked")
def test_bypass_2_case_manipulation(self):
"""Bypass 2: Case manipulation (EvAl, ExEc)."""
test_cases = ["EvAl", "ExEc", "CoMpIlE", "EVA", "exec"]
for case in test_cases:
normalized = normalize_input(case)
expected = case.lower()
assert normalized == expected, f"Case folding failed for {case}"
print("✓ Bypass 2: Case manipulation blocked")
def test_bypass_3_zero_width(self):
"""Bypass 3: Zero-width characters (U+200B, U+200C, U+200D, U+FEFF)."""
# Test all zero-width characters are removed
for char in ZERO_WIDTH_CHARS:
obfuscated = f"ev{char}al"
normalized = normalize_input(obfuscated)
assert normalized == "eval", f"Zero-width char U+{ord(char):04X} not removed"
print("✓ Bypass 3: Zero-width character injection blocked")
def test_bypass_4_dynamic_execution(self):
"""Bypass 4: Dynamic execution obfuscation."""
# globals()['eval']
code1 = "globals()['eval']('1+1')"
findings1 = analyze_python_ast(code1, "test.py")
assert len([f for f in findings1 if "globals" in f.pattern_id]) > 0, \
"globals()['eval'] should be detected"
# getattr(__builtins__, 'exec')
code2 = "getattr(__builtins__, 'exec')"
findings2 = analyze_python_ast(code2, "test.py")
assert any("getattr_builtins" in f.pattern_id for f in findings2), \
"getattr(__builtins__, ...) should be detected"
print("✓ Bypass 4: Dynamic execution obfuscation blocked")
def test_bypass_5_string_concatenation(self):
"""Bypass 5: String concatenation ('e'+'v'+'a'+'l')."""
# AST should detect this
code = "('e'+'v'+'a'+'l')('1+1')"
findings = analyze_python_ast(code, "test.py")
assert any("concat" in f.pattern_id for f in findings), \
"String concatenation obfuscation should be detected"
# Also test via globals
code2 = "globals()['e'+'v'+'a'+'l']('1+1')"
findings2 = analyze_python_ast(code2, "test.py")
assert any("concat" in f.pattern_id for f in findings2), \
"Concat in globals subscript should be detected"
print("✓ Bypass 5: String concatenation obfuscation blocked")
def test_cyrillic_homoglyph_bypass(self):
"""Test Cyrillic homoglyph bypass (е vs e)."""
# е (U+0435) vs e (U+0065)
cyrillic_e = "\u0435"
latin_e = "e"
assert cyrillic_e != latin_e, "Cyrillic and Latin e should be different"
# After normalization, they should be the same
normalized_cyrillic = normalize_input(cyrillic_e)
normalized_latin = normalize_input(latin_e)
assert normalized_cyrillic == normalized_latin == "e", \
"Cyrillic е should normalize to Latin e"
# Test full word: еval (with Cyrillic е)
cyrillic_eval = "\u0435val"
normalized = normalize_input(cyrillic_eval)
assert normalized == "eval", "Cyrillic eval should normalize"
print("✓ Cyrillic homoglyph bypass blocked")
def run_all_tests():
"""Run all tests."""
print("=" * 60)
print("V-011 Skills Guard Bypass Fix Tests")
print("=" * 60)
test_classes = [
TestNormalizeInput,
TestASTAnalysis,
TestScanFileIntegration,
TestBypassTechniques,
]
passed = 0
failed = 0
for test_class in test_classes:
print(f"\n--- {test_class.__name__} ---")
instance = test_class()
for method_name in dir(instance):
if method_name.startswith("test_"):
try:
method = getattr(instance, method_name)
method()
passed += 1
except AssertionError as e:
print(f" ✗ FAILED: {method_name}: {e}")
failed += 1
except Exception as e:
print(f" ✗ ERROR: {method_name}: {e}")
failed += 1
print("\n" + "=" * 60)
print(f"Results: {passed} passed, {failed} failed")
print("=" * 60)
if failed > 0:
sys.exit(1)
else:
print("\n✓ All V-011 bypass protection tests passed!")
sys.exit(0)
if __name__ == "__main__":
run_all_tests()