From 76ed15dd4decd88aaa56019ee728a096722a97a1 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Thu, 26 Mar 2026 14:33:18 -0700 Subject: [PATCH] fix(security): normalize input before dangerous command detection (#3260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit detect_dangerous_command() ran regex patterns against raw command strings without normalization, allowing bypass via Unicode fullwidth chars, ANSI escape codes, null bytes, and 8-bit C1 controls. Adds _normalize_command_for_detection() that: - Strips ANSI escapes using the full ECMA-48 strip_ansi() from tools/ansi_strip (CSI, OSC, DCS, 8-bit C1, nF sequences) - Removes null bytes - Normalizes Unicode via NFKC (fullwidth Latin → ASCII, etc.) Includes 12 regression tests covering fullwidth, ANSI, C1, null byte, and combined obfuscation bypasses. Salvaged from PR #3089 by thakoreh — improved ANSI stripping to use existing comprehensive strip_ansi() instead of a weaker hand-rolled regex, and added test coverage. Co-authored-by: Hiren --- tests/tools/test_approval.py | 70 ++++++++++++++++++++++++++++++++++++ tools/approval.py | 21 ++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/tests/tools/test_approval.py b/tests/tools/test_approval.py index bdd2b5284..b973cb0f0 100644 --- a/tests/tools/test_approval.py +++ b/tests/tools/test_approval.py @@ -512,3 +512,73 @@ class TestGatewayProtection: dangerous, key, desc = detect_dangerous_command(cmd) assert dangerous is False + +class TestNormalizationBypass: + """Obfuscation techniques must not bypass dangerous command detection.""" + + def test_fullwidth_unicode_rm(self): + """Fullwidth Unicode 'rm -rf /' must be caught after NFKC normalization.""" + cmd = "\uff52\uff4d -\uff52\uff46 /" # rm -rf / + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True, f"Fullwidth 'rm -rf /' was not detected: {cmd!r}" + + def test_fullwidth_unicode_dd(self): + """Fullwidth 'dd if=/dev/zero' must be caught.""" + cmd = "\uff44\uff44 if=/dev/zero of=/dev/sda" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True + + def test_fullwidth_unicode_chmod(self): + """Fullwidth 'chmod 777' must be caught.""" + cmd = "\uff43\uff48\uff4d\uff4f\uff44 777 /tmp/test" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True + + def test_ansi_csi_wrapped_rm(self): + """ANSI CSI color codes wrapping 'rm' must be stripped and caught.""" + cmd = "\x1b[31mrm\x1b[0m -rf /" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True, f"ANSI-wrapped 'rm -rf /' was not detected" + + def test_ansi_osc_embedded_rm(self): + """ANSI OSC sequences embedded in command must be stripped.""" + cmd = "\x1b]0;title\x07rm -rf /" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True + + def test_ansi_8bit_c1_wrapped_rm(self): + """8-bit C1 CSI (0x9b) wrapping 'rm' must be stripped and caught.""" + cmd = "\x9b31mrm\x9b0m -rf /" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True, "8-bit C1 CSI bypass was not caught" + + def test_null_byte_in_rm(self): + """Null bytes injected into 'rm' must be stripped and caught.""" + cmd = "r\x00m -rf /" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True, f"Null-byte 'rm' was not detected: {cmd!r}" + + def test_null_byte_in_dd(self): + """Null bytes in 'dd' must be stripped.""" + cmd = "d\x00d if=/dev/sda" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True + + def test_mixed_fullwidth_and_ansi(self): + """Combined fullwidth + ANSI obfuscation must still be caught.""" + cmd = "\x1b[1m\uff52\uff4d\x1b[0m -rf /" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is True + + def test_safe_command_after_normalization(self): + """Normal safe commands must not be flagged after normalization.""" + cmd = "ls -la /tmp" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is False + + def test_fullwidth_safe_command_not_flagged(self): + """Fullwidth 'ls -la' is safe and must not be flagged.""" + cmd = "\uff4c\uff53 -\uff4c\uff41 /tmp" + dangerous, key, desc = detect_dangerous_command(cmd) + assert dangerous is False + diff --git a/tools/approval.py b/tools/approval.py index ea814e5ce..f3ae4e1fe 100644 --- a/tools/approval.py +++ b/tools/approval.py @@ -13,6 +13,7 @@ import os import re import sys import threading +import unicodedata from typing import Optional logger = logging.getLogger(__name__) @@ -82,13 +83,31 @@ def _approval_key_aliases(pattern_key: str) -> set[str]: # Detection # ========================================================================= +def _normalize_command_for_detection(command: str) -> str: + """Normalize a command string before dangerous-pattern matching. + + Strips ANSI escape sequences (full ECMA-48 via tools.ansi_strip), + null bytes, and normalizes Unicode fullwidth characters so that + obfuscation techniques cannot bypass the pattern-based detection. + """ + from tools.ansi_strip import strip_ansi + + # Strip all ANSI escape sequences (CSI, OSC, DCS, 8-bit C1, etc.) + command = strip_ansi(command) + # Strip null bytes + command = command.replace('\x00', '') + # Normalize Unicode (fullwidth Latin, halfwidth Katakana, etc.) + command = unicodedata.normalize('NFKC', command) + return command + + def detect_dangerous_command(command: str) -> tuple: """Check if a command matches any dangerous patterns. Returns: (is_dangerous, pattern_key, description) or (False, None, None) """ - command_lower = command.lower() + command_lower = _normalize_command_for_detection(command).lower() for pattern, description in DANGEROUS_PATTERNS: if re.search(pattern, command_lower, re.IGNORECASE | re.DOTALL): pattern_key = description