hermes-agent/tools/test_skills_guard_v011.py

#!/usr/bin/env python3
"""
Tests for V-011 Skills Guard Bypass fix.

Tests all bypass techniques:
1. Unicode encoding tricks (fullwidth characters, Cyrillic homoglyphs)
2. Case manipulation (EvAl, ExEc)
3. Zero-width characters (U+200B, U+200C, U+200D, U+FEFF)
4. Dynamic execution obfuscation: globals()['ev'+'al'], getattr(__builtins__, 'exec')
5. String concatenation: 'e'+'v'+'a'+'l'
"""

import sys
import tempfile
from pathlib import Path

# Add parent directory to path
sys.path.insert(0, str(Path(__file__).parent))

from skills_guard import (
    normalize_input,
    analyze_python_ast,
    scan_file,
    ZERO_WIDTH_CHARS,
    HOMOGLYPH_MAP,
)


class TestNormalizeInput:
    """Test input normalization for obfuscation removal."""

    def test_zero_width_removal(self):
        """Test removal of zero-width characters."""
        # U+200B zero-width space
        obfuscated = "ev\u200bal"
        normalized = normalize_input(obfuscated)
        assert normalized == "eval", f"Expected 'eval', got '{normalized}'"

        # Multiple zero-width characters
        obfuscated = "e\u200bx\u200ce\u200dc"
        normalized = normalize_input(obfuscated)
        assert normalized == "exec", f"Expected 'exec', got '{normalized}'"

        # U+FEFF BOM
        obfuscated = "\ufeffeval"
        normalized = normalize_input(obfuscated)
        assert normalized == "eval", f"Expected 'eval', got '{normalized}'"

        print("✓ Zero-width character removal tests passed")

    def test_case_folding(self):
        """Test case folding (lowercase conversion)."""
        test_cases = [
            ("EvAl", "eval"),
            ("EXEC", "exec"),
            ("CoMpIlE", "compile"),
            ("GetAttr", "getattr"),
        ]
        for input_str, expected in test_cases:
            normalized = normalize_input(input_str)
            assert normalized == expected, f"Expected '{expected}', got '{normalized}'"

        print("✓ Case folding tests passed")

    def test_fullwidth_normalization(self):
        """Test fullwidth character normalization."""
        # Fullwidth Latin characters
        test_cases = [
            ("\uff45\uff56\uff41\uff4c", "eval"),  # ｅｖａｌ
            ("\uff25\uff36\uff21\uff2c", "eval"),  # ＥＶＡＬ (uppercase fullwidth)
            ("\uff45\uff58\uff45\uff43", "exec"),  # ｅｘｅｃ
            ("\uff4f\uff53", "os"),               # ｏｓ
        ]
        for input_str, expected in test_cases:
            normalized = normalize_input(input_str)
            assert normalized == expected, f"Expected '{expected}', got '{normalized}'"

        print("✓ Fullwidth normalization tests passed")

    def test_cyrillic_homoglyphs(self):
        """Test Cyrillic lookalike character normalization."""
        # Cyrillic е (U+0435) looks like Latin e (U+0065)
        test_cases = [
            ("\u0435val", "eval"),  # еval (Cyrillic е)
            ("\u0435x\u0435c", "exec"),  # еxеc (Cyrillic е's)
            ("\u0430\u0435\u0456\u043e", "aeio"),  # аеіо (all Cyrillic)
            ("g\u0435tattr", "getattr"),  # gеtattr (Cyrillic е)
        ]
        for input_str, expected in test_cases:
            normalized = normalize_input(input_str)
            assert normalized == expected, f"Expected '{expected}', got '{normalized}'"

        print("✓ Cyrillic homoglyph tests passed")

    def test_combined_obfuscation(self):
        """Test combined obfuscation techniques."""
        # Mix of case, zero-width, and homoglyphs
        obfuscated = "E\u200bV\u0430L"  # E + ZWS + V + Cyrillic а + L
        normalized = normalize_input(obfuscated)
        assert normalized == "eval", f"Expected 'eval', got '{normalized}'"

        print("✓ Combined obfuscation tests passed")


class TestASTAnalysis:
    """Test AST-based security analysis."""

    def test_direct_dangerous_calls(self):
        """Test detection of direct eval/exec/compile calls."""
        code = "eval('1+1')"
        findings = analyze_python_ast(code, "test.py")
        assert any("eval" in f.pattern_id for f in findings), "Should detect eval() call"

        code = "exec('print(1)')"
        findings = analyze_python_ast(code, "test.py")
        assert any("exec" in f.pattern_id for f in findings), "Should detect exec() call"

        code = "compile('x', '<string>', 'exec')"
        findings = analyze_python_ast(code, "test.py")
        assert any("compile" in f.pattern_id for f in findings), "Should detect compile() call"

        print("✓ Direct dangerous call detection tests passed")

    def test_getattr_builtins_pattern(self):
        """Test detection of getattr(__builtins__, ...) pattern."""
        code = "getattr(__builtins__, 'eval')"
        findings = analyze_python_ast(code, "test.py")
        assert any("getattr_builtins" in f.pattern_id for f in findings), \
            "Should detect getattr(__builtins__, ...) pattern"

        code = "getattr(__builtins__, 'exec')"
        findings = analyze_python_ast(code, "test.py")
        assert any("getattr_exec" in f.pattern_id for f in findings), \
            "Should detect getattr(..., 'exec')"

        print("✓ getattr(__builtins__, ...) detection tests passed")

    def test_globals_subscript_pattern(self):
        """Test detection of globals()['eval'] pattern."""
        code = "globals()['eval']('1+1')"
        findings = analyze_python_ast(code, "test.py")
        assert any("globals" in f.pattern_id for f in findings), \
            "Should detect globals()['eval'] pattern"

        code = "locals()['exec']('print(1)')"
        findings = analyze_python_ast(code, "test.py")
        assert any("locals" in f.pattern_id for f in findings), \
            "Should detect locals()['exec'] pattern"

        print("✓ globals()/locals() subscript detection tests passed")

    def test_string_concatenation_obfuscation(self):
        """Test detection of string concatenation obfuscation."""
        # Simple concatenation
        code = "('e'+'v'+'a'+'l')('1+1')"
        findings = analyze_python_ast(code, "test.py")
        assert any("concat" in f.pattern_id for f in findings), \
            "Should detect string concatenation obfuscation"

        # Concatenation in globals subscript
        code = "globals()['e'+'v'+'a'+'l']('1+1')"
        findings = analyze_python_ast(code, "test.py")
        assert any("concat" in f.pattern_id for f in findings), \
            "Should detect concat in globals subscript"

        print("✓ String concatenation obfuscation detection tests passed")

    def test_dynamic_global_call(self):
        """Test detection of dynamic calls via globals()."""
        code = "globals()['eval']('1+1')"
        findings = analyze_python_ast(code, "test.py")
        assert any("dynamic_global" in f.pattern_id for f in findings), \
            "Should detect dynamic global access"

        print("✓ Dynamic global call detection tests passed")

    def test_legitimate_code_not_flagged(self):
        """Test that legitimate code is not flagged."""
        # Normal function definition
        code = """
def calculate(x, y):
    result = x + y
    return result

class MyClass:
    def method(self):
        return "hello"

import os
print(os.path.join("a", "b"))
"""
        findings = analyze_python_ast(code, "test.py")
        # Should not have any obfuscation-related findings
        obfuscation_findings = [f for f in findings if f.category == "obfuscation"]
        assert len(obfuscation_findings) == 0, \
            f"Legitimate code should not be flagged, got: {[f.description for f in obfuscation_findings]}"

        print("✓ Legitimate code not flagged tests passed")


class TestScanFileIntegration:
    """Integration tests for scan_file with new detection."""

    def _create_temp_file(self, content: str, suffix: str = ".py") -> Path:
        """Create a temporary file with the given content."""
        with tempfile.NamedTemporaryFile(mode='w', suffix=suffix, delete=False) as f:
            f.write(content)
            return Path(f.name)

    def test_unicode_obfuscation_detection(self):
        """Test that obfuscated eval is detected via normalization."""
        # Fullwidth eval
        code = "\uff45\uff56\uff41\uff4c('1+1')"  # ｅｖａｌ
        path = self._create_temp_file(code)
        try:
            findings = scan_file(path, "test.py")
            # Should detect via regex on normalized content
            assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower()
                      for f in findings), \
                f"Should detect fullwidth eval, got: {[f.pattern_id for f in findings]}"
        finally:
            path.unlink()

        print("✓ Unicode obfuscation detection tests passed")

    def test_zero_width_character_detection(self):
        """Test detection of zero-width characters."""
        code = "ev\u200bal('1+1')"  # eval with zero-width space
        path = self._create_temp_file(code)
        try:
            findings = scan_file(path, "test.py")
            assert any("invisible_unicode" in f.pattern_id for f in findings), \
                f"Should detect invisible unicode, got: {[f.pattern_id for f in findings]}"
        finally:
            path.unlink()

        print("✓ Zero-width character detection tests passed")

    def test_ast_and_regex_combined(self):
        """Test that both AST and regex detection work together."""
        code = """
# Obfuscated eval via string concat
func = ('e'+'v'+'a'+'l')
result = func('1+1')

# Also fullwidth in comment: ｅｖａｌ
"""
        path = self._create_temp_file(code)
        try:
            findings = scan_file(path, "test.py")
            ast_findings = [f for f in findings if f.pattern_id.startswith("ast_")]
            assert len(ast_findings) > 0, "Should have AST-based findings"
        finally:
            path.unlink()

        print("✓ AST and regex combined detection tests passed")

    def test_cyrillic_in_code_detection(self):
        """Test detection of Cyrillic homoglyphs in code."""
        # Using Cyrillic е (U+0435) instead of Latin e (U+0065)
        code = "\u0435val('1+1')"  # еval with Cyrillic е
        path = self._create_temp_file(code)
        try:
            findings = scan_file(path, "test.py")
            # After normalization, regex should catch this
            assert any("eval" in f.pattern_id.lower() or "eval" in f.description.lower()
                      for f in findings), \
                f"Should detect Cyrillic obfuscated eval, got: {[f.pattern_id for f in findings]}"
        finally:
            path.unlink()

        print("✓ Cyrillic homoglyph detection tests passed")


class TestBypassTechniques:
    """Test specific bypass techniques mentioned in the vulnerability report."""

    def test_bypass_1_unicode_encoding(self):
        """Bypass 1: Unicode encoding tricks (fullwidth characters)."""
        # Fullwidth characters: ｅｖａｌ
        fullwidth_eval = "\uff45\uff56\uff41\uff4c"
        normalized = normalize_input(fullwidth_eval)
        assert normalized == "eval", "Fullwidth should normalize to ASCII"

        # Fullwidth exec: ｅｘｅｃ
        fullwidth_exec = "\uff45\uff58\uff45\uff43"
        normalized = normalize_input(fullwidth_exec)
        assert normalized == "exec", "Fullwidth exec should normalize"

        print("✓ Bypass 1: Unicode encoding tricks blocked")

    def test_bypass_2_case_manipulation(self):
        """Bypass 2: Case manipulation (EvAl, ExEc)."""
        test_cases = ["EvAl", "ExEc", "CoMpIlE", "EVA", "exec"]
        for case in test_cases:
            normalized = normalize_input(case)
            expected = case.lower()
            assert normalized == expected, f"Case folding failed for {case}"

        print("✓ Bypass 2: Case manipulation blocked")

    def test_bypass_3_zero_width(self):
        """Bypass 3: Zero-width characters (U+200B, U+200C, U+200D, U+FEFF)."""
        # Test all zero-width characters are removed
        for char in ZERO_WIDTH_CHARS:
            obfuscated = f"ev{char}al"
            normalized = normalize_input(obfuscated)
            assert normalized == "eval", f"Zero-width char U+{ord(char):04X} not removed"

        print("✓ Bypass 3: Zero-width character injection blocked")

    def test_bypass_4_dynamic_execution(self):
        """Bypass 4: Dynamic execution obfuscation."""
        # globals()['eval']
        code1 = "globals()['eval']('1+1')"
        findings1 = analyze_python_ast(code1, "test.py")
        assert len([f for f in findings1 if "globals" in f.pattern_id]) > 0, \
            "globals()['eval'] should be detected"

        # getattr(__builtins__, 'exec')
        code2 = "getattr(__builtins__, 'exec')"
        findings2 = analyze_python_ast(code2, "test.py")
        assert any("getattr_builtins" in f.pattern_id for f in findings2), \
            "getattr(__builtins__, ...) should be detected"

        print("✓ Bypass 4: Dynamic execution obfuscation blocked")

    def test_bypass_5_string_concatenation(self):
        """Bypass 5: String concatenation ('e'+'v'+'a'+'l')."""
        # AST should detect this
        code = "('e'+'v'+'a'+'l')('1+1')"
        findings = analyze_python_ast(code, "test.py")
        assert any("concat" in f.pattern_id for f in findings), \
            "String concatenation obfuscation should be detected"

        # Also test via globals
        code2 = "globals()['e'+'v'+'a'+'l']('1+1')"
        findings2 = analyze_python_ast(code2, "test.py")
        assert any("concat" in f.pattern_id for f in findings2), \
            "Concat in globals subscript should be detected"

        print("✓ Bypass 5: String concatenation obfuscation blocked")

    def test_cyrillic_homoglyph_bypass(self):
        """Test Cyrillic homoglyph bypass (е vs e)."""
        # е (U+0435) vs e (U+0065)
        cyrillic_e = "\u0435"
        latin_e = "e"

        assert cyrillic_e != latin_e, "Cyrillic and Latin e should be different"

        # After normalization, they should be the same
        normalized_cyrillic = normalize_input(cyrillic_e)
        normalized_latin = normalize_input(latin_e)
        assert normalized_cyrillic == normalized_latin == "e", \
            "Cyrillic е should normalize to Latin e"

        # Test full word: еval (with Cyrillic е)
        cyrillic_eval = "\u0435val"
        normalized = normalize_input(cyrillic_eval)
        assert normalized == "eval", "Cyrillic eval should normalize"

        print("✓ Cyrillic homoglyph bypass blocked")


def run_all_tests():
    """Run all tests."""
    print("=" * 60)
    print("V-011 Skills Guard Bypass Fix Tests")
    print("=" * 60)

    test_classes = [
        TestNormalizeInput,
        TestASTAnalysis,
        TestScanFileIntegration,
        TestBypassTechniques,
    ]

    passed = 0
    failed = 0

    for test_class in test_classes:
        print(f"\n--- {test_class.__name__} ---")
        instance = test_class()
        for method_name in dir(instance):
            if method_name.startswith("test_"):
                try:
                    method = getattr(instance, method_name)
                    method()
                    passed += 1
                except AssertionError as e:
                    print(f"  ✗ FAILED: {method_name}: {e}")
                    failed += 1
                except Exception as e:
                    print(f"  ✗ ERROR: {method_name}: {e}")
                    failed += 1

    print("\n" + "=" * 60)
    print(f"Results: {passed} passed, {failed} failed")
    print("=" * 60)

    if failed > 0:
        sys.exit(1)
    else:
        print("\n✓ All V-011 bypass protection tests passed!")
        sys.exit(0)


if __name__ == "__main__":
    run_all_tests()