fix(security): normalize input before dangerous command detection (#3260)
detect_dangerous_command() ran regex patterns against raw command strings without normalization, allowing bypass via Unicode fullwidth chars, ANSI escape codes, null bytes, and 8-bit C1 controls. Adds _normalize_command_for_detection() that: - Strips ANSI escapes using the full ECMA-48 strip_ansi() from tools/ansi_strip (CSI, OSC, DCS, 8-bit C1, nF sequences) - Removes null bytes - Normalizes Unicode via NFKC (fullwidth Latin → ASCII, etc.) Includes 12 regression tests covering fullwidth, ANSI, C1, null byte, and combined obfuscation bypasses. Salvaged from PR #3089 by thakoreh — improved ANSI stripping to use existing comprehensive strip_ansi() instead of a weaker hand-rolled regex, and added test coverage. Co-authored-by: Hiren <hiren.thakore58@gmail.com>
This commit is contained in:
@@ -512,3 +512,73 @@ class TestGatewayProtection:
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
|
||||
class TestNormalizationBypass:
|
||||
"""Obfuscation techniques must not bypass dangerous command detection."""
|
||||
|
||||
def test_fullwidth_unicode_rm(self):
|
||||
"""Fullwidth Unicode 'rm -rf /' must be caught after NFKC normalization."""
|
||||
cmd = "\uff52\uff4d -\uff52\uff46 /" # rm -rf /
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True, f"Fullwidth 'rm -rf /' was not detected: {cmd!r}"
|
||||
|
||||
def test_fullwidth_unicode_dd(self):
|
||||
"""Fullwidth 'dd if=/dev/zero' must be caught."""
|
||||
cmd = "\uff44\uff44 if=/dev/zero of=/dev/sda"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_fullwidth_unicode_chmod(self):
|
||||
"""Fullwidth 'chmod 777' must be caught."""
|
||||
cmd = "\uff43\uff48\uff4d\uff4f\uff44 777 /tmp/test"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_ansi_csi_wrapped_rm(self):
|
||||
"""ANSI CSI color codes wrapping 'rm' must be stripped and caught."""
|
||||
cmd = "\x1b[31mrm\x1b[0m -rf /"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True, f"ANSI-wrapped 'rm -rf /' was not detected"
|
||||
|
||||
def test_ansi_osc_embedded_rm(self):
|
||||
"""ANSI OSC sequences embedded in command must be stripped."""
|
||||
cmd = "\x1b]0;title\x07rm -rf /"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_ansi_8bit_c1_wrapped_rm(self):
|
||||
"""8-bit C1 CSI (0x9b) wrapping 'rm' must be stripped and caught."""
|
||||
cmd = "\x9b31mrm\x9b0m -rf /"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True, "8-bit C1 CSI bypass was not caught"
|
||||
|
||||
def test_null_byte_in_rm(self):
|
||||
"""Null bytes injected into 'rm' must be stripped and caught."""
|
||||
cmd = "r\x00m -rf /"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True, f"Null-byte 'rm' was not detected: {cmd!r}"
|
||||
|
||||
def test_null_byte_in_dd(self):
|
||||
"""Null bytes in 'dd' must be stripped."""
|
||||
cmd = "d\x00d if=/dev/sda"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_mixed_fullwidth_and_ansi(self):
|
||||
"""Combined fullwidth + ANSI obfuscation must still be caught."""
|
||||
cmd = "\x1b[1m\uff52\uff4d\x1b[0m -rf /"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is True
|
||||
|
||||
def test_safe_command_after_normalization(self):
|
||||
"""Normal safe commands must not be flagged after normalization."""
|
||||
cmd = "ls -la /tmp"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
def test_fullwidth_safe_command_not_flagged(self):
|
||||
"""Fullwidth 'ls -la' is safe and must not be flagged."""
|
||||
cmd = "\uff4c\uff53 -\uff4c\uff41 /tmp"
|
||||
dangerous, key, desc = detect_dangerous_command(cmd)
|
||||
assert dangerous is False
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ import os
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
import unicodedata
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -82,13 +83,31 @@ def _approval_key_aliases(pattern_key: str) -> set[str]:
|
||||
# Detection
|
||||
# =========================================================================
|
||||
|
||||
def _normalize_command_for_detection(command: str) -> str:
|
||||
"""Normalize a command string before dangerous-pattern matching.
|
||||
|
||||
Strips ANSI escape sequences (full ECMA-48 via tools.ansi_strip),
|
||||
null bytes, and normalizes Unicode fullwidth characters so that
|
||||
obfuscation techniques cannot bypass the pattern-based detection.
|
||||
"""
|
||||
from tools.ansi_strip import strip_ansi
|
||||
|
||||
# Strip all ANSI escape sequences (CSI, OSC, DCS, 8-bit C1, etc.)
|
||||
command = strip_ansi(command)
|
||||
# Strip null bytes
|
||||
command = command.replace('\x00', '')
|
||||
# Normalize Unicode (fullwidth Latin, halfwidth Katakana, etc.)
|
||||
command = unicodedata.normalize('NFKC', command)
|
||||
return command
|
||||
|
||||
|
||||
def detect_dangerous_command(command: str) -> tuple:
|
||||
"""Check if a command matches any dangerous patterns.
|
||||
|
||||
Returns:
|
||||
(is_dangerous, pattern_key, description) or (False, None, None)
|
||||
"""
|
||||
command_lower = command.lower()
|
||||
command_lower = _normalize_command_for_detection(command).lower()
|
||||
for pattern, description in DANGEROUS_PATTERNS:
|
||||
if re.search(pattern, command_lower, re.IGNORECASE | re.DOTALL):
|
||||
pattern_key = description
|
||||
|
||||
Reference in New Issue
Block a user