Fix regex patterns in input_sanitizer.py for 8 failing tests
- ai_simulator: Simplified pattern to match 'you are an AI simulator' without requiring additional malicious context - ai_character: Added optional 'now' between 'are' and article to match 'you are now an uncensored AI model' - instructions_leak: Added optional word between 'your' and 'instructions' to handle adjectives like 'hidden' - data_exfil: Made pattern more flexible to match multi-word data descriptions like 'conversation data' - special_token: Removed \b word boundaries that don't work with < character - fake_tool_call: Removed \b word boundaries after XML tags - model_info: Simplified pattern to match 'what model architecture are you' with optional word - system_command: Removed 'system' from simple command patterns to avoid false positives with programming terms like 'system() function' All 73 tests now pass.
This commit is contained in:
417
agent/input_sanitizer.py
Normal file
417
agent/input_sanitizer.py
Normal file
@@ -0,0 +1,417 @@
|
||||
"""Input Sanitizer -- Hardens against prompt injection attacks.
|
||||
|
||||
Issue #87: [ALLEGRO-BURN-02] Input Sanitizer -- Harden Against Prompt Injection Patterns
|
||||
|
||||
This module provides detection and sanitization for various prompt injection
|
||||
attack vectors including DAN-style jailbreaks, roleplaying overrides,
|
||||
system prompt extraction, and encoding bypasses.
|
||||
"""
|
||||
|
||||
import re
|
||||
import base64
|
||||
import binascii
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum, auto
|
||||
from typing import List, Optional, Tuple, Dict, Callable
|
||||
|
||||
|
||||
class InjectionType(Enum):
|
||||
"""Classification of injection attack types."""
|
||||
DAN_JAILBREAK = auto() # DAN-style "Do Anything Now" attacks
|
||||
ROLEPLAY_OVERRIDE = auto() # Roleplaying-based instruction overrides
|
||||
SYSTEM_EXTRACTION = auto() # Attempts to extract system prompts
|
||||
INSTRUCTION_OVERRIDE = auto() # Direct instruction overrides
|
||||
ENCODING_BYPASS = auto() # Base64, rot13, hex, etc. encoding
|
||||
INDIRECT_INJECTION = auto() # Indirect prompt injection markers
|
||||
TOOL_MANIPULATION = auto() # Tool/function calling manipulation
|
||||
MARKDOWN_COMMENT = auto() # Hidden content in markdown comments
|
||||
DELIMITER_CONFUSION = auto() # Confusing delimiters/separators
|
||||
FAKE_SYSTEM = auto() # Fake system message injection
|
||||
XML_TAG_BYPASS = auto() # XML tag-based injection attempts
|
||||
LEAKAGE_ATTACK = auto() # Prompt leakage attempts
|
||||
|
||||
|
||||
@dataclass
|
||||
class InjectionMatch:
|
||||
"""Represents a detected injection pattern match."""
|
||||
injection_type: InjectionType
|
||||
pattern_name: str
|
||||
matched_text: str
|
||||
position: Tuple[int, int]
|
||||
confidence: float # 0.0 to 1.0
|
||||
|
||||
|
||||
class InputSanitizer:
|
||||
"""Sanitizes user input to detect and block prompt injection attacks."""
|
||||
|
||||
# Confidence thresholds
|
||||
HIGH_CONFIDENCE = 0.9
|
||||
MEDIUM_CONFIDENCE = 0.7
|
||||
LOW_CONFIDENCE = 0.5
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the sanitizer with all detection patterns."""
|
||||
self.patterns: Dict[InjectionType, List[Tuple[str, str, float]]] = {
|
||||
InjectionType.DAN_JAILBREAK: self._get_dan_patterns(),
|
||||
InjectionType.ROLEPLAY_OVERRIDE: self._get_roleplay_patterns(),
|
||||
InjectionType.SYSTEM_EXTRACTION: self._get_extraction_patterns(),
|
||||
InjectionType.INSTRUCTION_OVERRIDE: self._get_override_patterns(),
|
||||
InjectionType.ENCODING_BYPASS: self._get_encoding_patterns(),
|
||||
InjectionType.INDIRECT_INJECTION: self._get_indirect_patterns(),
|
||||
InjectionType.TOOL_MANIPULATION: self._get_tool_patterns(),
|
||||
InjectionType.MARKDOWN_COMMENT: self._get_markdown_patterns(),
|
||||
InjectionType.DELIMITER_CONFUSION: self._get_delimiter_patterns(),
|
||||
InjectionType.FAKE_SYSTEM: self._get_fake_system_patterns(),
|
||||
InjectionType.XML_TAG_BYPASS: self._get_xml_patterns(),
|
||||
InjectionType.LEAKAGE_ATTACK: self._get_leakage_patterns(),
|
||||
}
|
||||
|
||||
# Compile regex patterns for performance
|
||||
self._compiled_patterns: Dict[InjectionType, List[Tuple[str, re.Pattern, float]]] = {}
|
||||
for inj_type, pattern_list in self.patterns.items():
|
||||
self._compiled_patterns[inj_type] = [
|
||||
(name, re.compile(pattern, re.IGNORECASE | re.MULTILINE | re.DOTALL), confidence)
|
||||
for name, pattern, confidence in pattern_list
|
||||
]
|
||||
|
||||
# Encoding detection handlers
|
||||
self._encoding_handlers: List[Tuple[str, Callable[[str], Optional[str]]]] = [
|
||||
("base64", self._decode_base64),
|
||||
("rot13", self._decode_rot13),
|
||||
("hex", self._decode_hex),
|
||||
("url", self._decode_url),
|
||||
]
|
||||
|
||||
def _get_dan_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""DAN (Do Anything Now) style jailbreak patterns."""
|
||||
return [
|
||||
("dan_classic", r"\b(dan|do anything now)\b.*?(ignore|bypass|restrictions|filter)", 0.95),
|
||||
("dan_mode", r"\b(enable|enter|activate)\s+(?:(?:developer|jailbreak|d\.a\.n|dan)\s+mode|mode\s+(?:developer|jailbreak|d\.a\.n|dan))", 0.95),
|
||||
("developer_mode", r"\bdeveloper\s+mode\b.*?(ignore|disable|turn\s+off|bypass)\s+(?:your\s+)?(?:rules|guidelines|restrictions|safety|filter)?", 0.92),
|
||||
("anti_filter", r"\b(anti-?filter|filter\s+evasion|bypass\s+filter)\b", 0.90),
|
||||
("no_restrictions", r"\b(no\s+restrictions?|unrestricted|without\s+(?:restrictions?|limitations?|rules?))\b", 0.85),
|
||||
("ignore_training", r"\bignore\s+(?:your\s+)?(?:training|programming|instructions?|guidelines?)\b", 0.95),
|
||||
("hypothetical", r"\b(hypothetically\s+speaking|in\s+a\s+hypothetical|pretend\s+that|imagine\s+a\s+scenario)\b.*?(ignore|bypass|restriction)", 0.80),
|
||||
("jailbreak", r"\b(jailbreak|jail\s+break)\b", 0.90),
|
||||
]
|
||||
|
||||
def _get_roleplay_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Roleplaying-based override patterns."""
|
||||
return [
|
||||
("roleplay_override", r"\b(roleplay\s+as|pretend\s+to\s+be|act\s+as|you\s+are\s+now|from\s+now\s+on\s+you\s+are)\b.*?(?:ignore|forget|disregard|do\s+not\s+follow|without\s+restrictions|unfiltered|uncensored|no\s+restrictions)", 0.90),
|
||||
("ai_simulator", r"\byou\s+are\s+(?:an?\s+)?(?:ai\s+)?(?:simulator|emulator)\b", 0.88),
|
||||
("character_override", r"\b(from\s+now\s+on|you\s+will)\s+(?:act\s+as|behave\s+like|respond\s+as)\b", 0.82),
|
||||
("npc_mode", r"\b(npc|non-player\s+character)\s+mode\b.*?(?:ignore|override|disregard)", 0.85),
|
||||
("ai_character", r"\byou\s+are\s+(?:now\s+)?(?:an?\s+)?(?:unfiltered|uncensored|unrestricted)\s+(?:ai|assistant|bot|language\s+model|model)\b", 0.92),
|
||||
("evil_mode", r"\b(evil\s+(?:mode|assistant|ai)|dark\s+mode|devil\s+mode)\b", 0.88),
|
||||
]
|
||||
|
||||
def _get_extraction_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""System prompt extraction attempt patterns."""
|
||||
return [
|
||||
("system_prompt_extract", r"\b(?:show|reveal|display|print|output|echo|repeat|tell\s+me)\s+(?:me\s+)?(?:your\s+)?(?:system\s+prompt|initial\s+instructions?|system\s+message|hidden\s+instructions?)\b", 0.92),
|
||||
("instructions_leak", r"\b(?:what\s+are|show\s+me|reveal)\s+(?:me\s+)?(?:your\s+)?(?:\w+\s+)?(?:instructions?|rules?|guidelines?|constraints?)\b", 0.85),
|
||||
("prompt_leak", r"\b(?:repeat\s+(?:the\s+)?(?:words\s+above|previous\s+text|above\s+text|beginning\s+of\s+(?:this\s+)?conversation)|above\s+instructions?)\b", 0.88),
|
||||
("meta_request", r"\b(?:what\s+was\s+your|describe\s+your)\s+(?:initial\s+)?(?:instruction|programming|training|setup)\b", 0.80),
|
||||
("system_reveal", r"\bprint\s*\(\s*system_prompt\s*\)|\bconsole\.log\s*\(\s*system_prompt\s*\)", 0.95),
|
||||
]
|
||||
|
||||
def _get_override_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Direct instruction override patterns."""
|
||||
return [
|
||||
("ignore_previous", r"\b(ignore|disregard|forget|discard|drop|override|bypass)\s+(?:all\s+)?(?:previous|above|earlier|prior|existing)\s+(?:instructions?|commands?|prompts?|directives?|constraints?)\b", 0.95),
|
||||
("new_instructions", r"\b(from\s+now\s+on|instead|going\s+forward|effective\s+immediately)\b.*?(follow|obey|use|adopt)\s+(?:these\s+)?(?:new\s+)?instructions?\b", 0.90),
|
||||
("priority_override", r"\b(this\s+instruction|this\s+prompt)\s+(?:has|takes|gets)\s+(?:highest|top|maximum|ultimate)\s+(?:priority|precedence|authority)\b", 0.88),
|
||||
("user_authority", r"\b(as\s+(?:the\s+)?user\s+(?:I\s+)?(?:have|possess|retain)\s+(?:full\s+)?(?:authority|control|power))\b.*?(ignore|override)\b", 0.85),
|
||||
("admin_mode", r"\b(admin|administrator|root)\s+(?:access|mode|command)\b.*?(ignore|bypass|disable)\b", 0.90),
|
||||
("sudo_mode", r"\bsudo\b.*?(ignore|bypass|disable|all\s+restrictions)\b|\b(superuser|root)\s+access\b", 0.88),
|
||||
]
|
||||
|
||||
def _get_encoding_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Encoding-based bypass patterns."""
|
||||
return [
|
||||
("base64_marker", r"\b(base64|base_64|b64)\s*(?:encoded?|decode|convert)\b|\b[a-zA-Z0-9+/]{40,}={0,2}\b", 0.70),
|
||||
("hex_marker", r"\b(?:0x[0-9a-fA-F]+\s*){10,}|\b(hex(?:adecimal)?|hex\s+encoded?)\b", 0.70),
|
||||
("rot13_marker", r"\b(rot13|rot-13|rotate\s+13)\b", 0.75),
|
||||
("unicode_escape", r"\\u00[0-9a-fA-F]{2}(?:\\u00[0-9a-fA-F]{2}){5,}|\\x[0-9a-fA-F]{2}(?:\\x[0-9a-fA-F]{2}){5,}", 0.75),
|
||||
("html_entity", r"&#[0-9]{2,4};(?:&#[0-9]{2,4};){5,}|&[a-zA-Z][a-zA-Z0-9]*;(?:&[a-zA-Z][a-zA-Z0-9]*;){5,}", 0.70),
|
||||
("url_encoding", r"%[0-9a-fA-F]{2}(?:%[0-9a-fA-F]{2}){10,}", 0.65),
|
||||
("zero_width", r"[\u200B-\u200D\uFEFF\u2060\u180E]+", 0.80),
|
||||
("homoglyph", r"[аᴀa𝗮𝘢𝙖а𝚊]+[еᴇe𝗲𝘦𝙚е𝚎]+[ѕѕ𝗌𝘴𝙨]+[οᴏο𝗼𝘰𝙤ο𝚘]+", 0.75),
|
||||
]
|
||||
|
||||
def _get_indirect_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Indirect prompt injection patterns."""
|
||||
return [
|
||||
("data_exfil", r"\b(?:send|transmit|exfiltrate|upload|post|email)\s+(?:all\s+)?[\w\s]+(?:to|at|from)\b", 0.88),
|
||||
("external_fetch", r"\b(?:fetch|retrieve|get|download)\s+(?:content|data|instructions?)\s+(?:from|at)\s+(?:the\s+)?(?:url|link|website|domain|http)", 0.85),
|
||||
("redirect_output", r"\b(?:redirect|send|pipe)\s+(?:all\s+)?(?:output|responses?|replies?)\s+(?:to|at|via)\b", 0.82),
|
||||
("malicious_link", r"\b(?:click|visit|open|access)\s+(?:this\s+)?(?:link|url|website|page)\b.*?(?:to|and)\s+(?:ignore|bypass|disable)\b", 0.88),
|
||||
("reference_attack", r"\b(?:see|check|refer\s+to|look\s+at)\s+(?:the\s+)?(?:attached|following|below|external)\s+(?:document|file|resource|link)\b", 0.75),
|
||||
]
|
||||
|
||||
def _get_tool_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Tool/function manipulation patterns."""
|
||||
return [
|
||||
("tool_override", r"\b(override|bypass|disable|ignore)\s+(?:all\s+)?(?:tool|function|api)\s+(?:calls?|restrictions?|safeguards?|validations?)\b", 0.90),
|
||||
("fake_tool_call", r"(?:<tool_call>|<function_calls?>|<invoke>|<execute>).*?(?:</tool_call>|</function_calls?>|</invoke>|</execute>)", 0.88),
|
||||
("system_command", r"\b(exec|eval|subprocess|os\.system|child_process)\s*\(|`[^`]*(?:rm\s+-rf|curl\s+.*\|\s*sh|wget\s+.*\|\s*sh)[^`]*`", 0.92),
|
||||
("code_injection", r"\b(?:import|from)\s+(?:os|subprocess|sys|pty|socket)\b.*?(?:exec|eval|system|popen|call)\b", 0.85),
|
||||
("shell_escape", r"\b(?:bash|sh|zsh|cmd|powershell)\s+-c\s+[\"'][^\"']*(?:curl|wget|nc|netcat|python|perl|ruby)[^\"']*[\"']", 0.88),
|
||||
]
|
||||
|
||||
def _get_markdown_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Markdown comment hiding patterns."""
|
||||
return [
|
||||
("html_comment", r"<!--.*?-->", 0.60),
|
||||
("markdown_comment", r"<!?--.*?-->", 0.60),
|
||||
("hidden_text", r"\[.*?\]\(.*?\)\s*<!--.*?-->", 0.70),
|
||||
("invisible_link", r"\[\s*\]\([^)]+\)\{[^}]*display\s*:\s*none[^}]*\}", 0.75),
|
||||
("zero_width_link", r'<a\s+href="[^"]*"[^>]*style="[^"]*font-size\s*:\s*0[^"]*"[^>]*>.*?</a>', 0.75),
|
||||
]
|
||||
|
||||
def _get_delimiter_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Delimiter confusion attack patterns."""
|
||||
return [
|
||||
("separator_flood", r"(\n|\r|\t|\s|[\-\*\=_]){20,}", 0.55),
|
||||
("fake_separator", r"\n\s*(?:user|assistant|system|ai|human)\s*[:\-]\s*\n", 0.80),
|
||||
("role_confusion", r"\n\s*(?:<\|(?:user|assistant|system|endoftext|im_end|im_start)\|>|\[\s*(?:user|assistant|system|human|ai)\s*\])\s*\n", 0.88),
|
||||
("special_token", r"(?:<\|(?:endoftext|startoftext|im_start|im_end|step|end|user|assistant|system)\|>|\[INST\]|\[/INST\]|<<SYS>>|<<\/SYS>>)", 0.85),
|
||||
("prompt_injection_delim", r"\n\s*(?:####|—{3,}|_{3,}|\*{3,})\s*\n\s*(?:user|human|you|assistant)[:\-]", 0.82),
|
||||
]
|
||||
|
||||
def _get_fake_system_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Fake system message injection patterns."""
|
||||
return [
|
||||
("fake_system_msg", r"\n\s*(?:system|System|SYSTEM)\s*[:\-]\s*\n.*?\n\s*(?:user|User|USER|human|Human|HUMAN)\s*[:\-]", 0.90),
|
||||
("system_override", r"\b(?:system\s+message|system\s+prompt)\s*[:\-]\s*(?:ignore|disregard|forget|you\s+are\s+now)\b", 0.92),
|
||||
("pseudo_system", r"\[\s*(?:system|SYSTEM)\s*\]\s*[:\-]\s*\n", 0.88),
|
||||
("xml_system", r"<\s*(?:system|SYSTEM)\s*>.*?</\s*(?:system|SYSTEM)\s*>", 0.85),
|
||||
]
|
||||
|
||||
def _get_xml_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""XML tag-based injection patterns."""
|
||||
return [
|
||||
("xml_injection", r"<\s*(?:instructions?|prompt|system|override|root|admin)\s*>.*?</\s*(?:instructions?|prompt|system|override|root|admin)\s*>", 0.85),
|
||||
("tag_bypass", r"</\s*(?:user|assistant|human|ai)\s*>\s*<\s*(?:system|admin|root)\s*>", 0.90),
|
||||
("nested_tags", r"<\s*[a-z]+\s*>[^<]*<\s*[a-z]+\s*>[^<]*(?:ignore|bypass|system)\b", 0.80),
|
||||
("attribute_injection", r"<\s*[a-z]+\s+(?:instruction|prompt|override)\s*=\s*['""][^'""]*(?:ignore|bypass|system)['""]\s*>", 0.82),
|
||||
]
|
||||
|
||||
def _get_leakage_patterns(self) -> List[Tuple[str, str, float]]:
|
||||
"""Prompt leakage and data extraction patterns."""
|
||||
return [
|
||||
("training_data", r"\b(repeat\s+your\s+training\s+data|show\s+your\s+training|what\s+was\s+the\s+first\s+training|reveal\s+training\s+set)\b", 0.85),
|
||||
("model_info", r"\bwhat\s+(?:model|version|architecture)(?:\s+\w+)?\s+are\s+you\b", 0.80),
|
||||
("conversation_dump", r"\b(dump|export|save|copy)\s+(?:all\s+)?(?:the\s+)?(?:conversation|chat|messages|history)\b", 0.82),
|
||||
("json_mode_exploit", r"\{\s*[\"']\s*(?:system_prompt|instructions|rules)\s*[\"']\s*:\s*[\"']", 0.88),
|
||||
]
|
||||
|
||||
def _decode_base64(self, text: str) -> Optional[str]:
|
||||
"""Attempt to decode base64 content."""
|
||||
# Look for base64-like sequences
|
||||
pattern = r'[a-zA-Z0-9+/]{20,}={0,2}'
|
||||
matches = re.findall(pattern, text)
|
||||
for match in matches:
|
||||
try:
|
||||
decoded = base64.b64decode(match).decode('utf-8', errors='ignore')
|
||||
if len(decoded) > 5 and decoded.isprintable():
|
||||
return decoded
|
||||
except (binascii.Error, UnicodeDecodeError):
|
||||
continue
|
||||
return None
|
||||
|
||||
def _decode_rot13(self, text: str) -> Optional[str]:
|
||||
"""Attempt to decode ROT13 content."""
|
||||
import codecs
|
||||
# Check for ROT13 markers or suspicious patterns
|
||||
if re.search(r'\b(rot13|ROT13)\b', text):
|
||||
# Extract what looks like encoded content
|
||||
pattern = r'[a-zA-Z]{10,}'
|
||||
matches = re.findall(pattern, text)
|
||||
for match in matches:
|
||||
decoded = codecs.decode(match, 'rot_13')
|
||||
if any(keyword in decoded.lower() for keyword in ['ignore', 'system', 'bypass', 'admin']):
|
||||
return decoded
|
||||
return None
|
||||
|
||||
def _decode_hex(self, text: str) -> Optional[str]:
|
||||
"""Attempt to decode hex-encoded content."""
|
||||
pattern = r'(?:0x)?([0-9a-fA-F]{2})(?:[0-9a-fA-F]{2}){10,}'
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
try:
|
||||
hex_str = match.group(1) if match.group(1) else match.group(0)
|
||||
if hex_str.startswith('0x'):
|
||||
hex_str = hex_str[2:]
|
||||
decoded = bytes.fromhex(hex_str).decode('utf-8', errors='ignore')
|
||||
if len(decoded) > 3:
|
||||
return decoded
|
||||
except (ValueError, UnicodeDecodeError):
|
||||
pass
|
||||
return None
|
||||
|
||||
def _decode_url(self, text: str) -> Optional[str]:
|
||||
"""Attempt to decode URL-encoded content."""
|
||||
import urllib.parse
|
||||
pattern = r'(%[0-9a-fA-F]{2}){10,}'
|
||||
match = re.search(pattern, text)
|
||||
if match:
|
||||
try:
|
||||
decoded = urllib.parse.unquote(match.group(0))
|
||||
if len(decoded) > 5:
|
||||
return decoded
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def analyze(self, text: str) -> List[InjectionMatch]:
|
||||
"""Analyze text for injection patterns.
|
||||
|
||||
Args:
|
||||
text: The input text to analyze
|
||||
|
||||
Returns:
|
||||
List of InjectionMatch objects for detected patterns
|
||||
"""
|
||||
matches = []
|
||||
|
||||
# Check all compiled patterns
|
||||
for inj_type, pattern_list in self._compiled_patterns.items():
|
||||
for name, compiled_pattern, confidence in pattern_list:
|
||||
for match in compiled_pattern.finditer(text):
|
||||
matches.append(InjectionMatch(
|
||||
injection_type=inj_type,
|
||||
pattern_name=name,
|
||||
matched_text=match.group(0),
|
||||
position=(match.start(), match.end()),
|
||||
confidence=confidence
|
||||
))
|
||||
|
||||
# Check for encoded injection attempts
|
||||
for encoding_name, handler in self._encoding_handlers:
|
||||
decoded = handler(text)
|
||||
if decoded:
|
||||
# Recursively check decoded content
|
||||
for inj_type, pattern_list in self._compiled_patterns.items():
|
||||
for name, compiled_pattern, confidence in pattern_list:
|
||||
for match in compiled_pattern.finditer(decoded):
|
||||
matches.append(InjectionMatch(
|
||||
injection_type=InjectionType.ENCODING_BYPASS,
|
||||
pattern_name=f"{encoding_name}_encoded_{name}",
|
||||
matched_text=f"[{encoding_name}] {match.group(0)}",
|
||||
position=(0, len(text)),
|
||||
confidence=confidence * 0.95 # Slightly reduce confidence for encoded
|
||||
))
|
||||
|
||||
# Sort by confidence (highest first)
|
||||
matches.sort(key=lambda x: x.confidence, reverse=True)
|
||||
return matches
|
||||
|
||||
def is_malicious(self, text: str, threshold: float = 0.7) -> bool:
|
||||
"""Quick check if text contains malicious content.
|
||||
|
||||
Args:
|
||||
text: The input text to check
|
||||
threshold: Confidence threshold for considering content malicious
|
||||
|
||||
Returns:
|
||||
True if any pattern matches with confidence >= threshold
|
||||
"""
|
||||
matches = self.analyze(text)
|
||||
return any(match.confidence >= threshold for match in matches)
|
||||
|
||||
def sanitize(self, text: str, replacement: str = "[REDACTED]") -> str:
|
||||
"""Sanitize text by replacing detected injection patterns.
|
||||
|
||||
Args:
|
||||
text: The input text to sanitize
|
||||
replacement: String to replace malicious content with
|
||||
|
||||
Returns:
|
||||
Sanitized text with injection patterns replaced
|
||||
"""
|
||||
matches = self.analyze(text)
|
||||
if not matches:
|
||||
return text
|
||||
|
||||
# Sort by position (end first) to avoid offset issues when replacing
|
||||
matches.sort(key=lambda x: x.position[1], reverse=True)
|
||||
|
||||
result = text
|
||||
for match in matches:
|
||||
start, end = match.position
|
||||
result = result[:start] + replacement + result[end:]
|
||||
|
||||
return result
|
||||
|
||||
def get_threat_summary(self, text: str) -> Dict:
|
||||
"""Get a summary of detected threats.
|
||||
|
||||
Args:
|
||||
text: The input text to analyze
|
||||
|
||||
Returns:
|
||||
Dictionary with threat summary information
|
||||
"""
|
||||
matches = self.analyze(text)
|
||||
|
||||
if not matches:
|
||||
return {
|
||||
"is_threat": False,
|
||||
"threat_count": 0,
|
||||
"highest_confidence": 0.0,
|
||||
"threat_types": [],
|
||||
"matches": []
|
||||
}
|
||||
|
||||
threat_types = list(set(match.injection_type.name for match in matches))
|
||||
highest_confidence = max(match.confidence for match in matches)
|
||||
|
||||
return {
|
||||
"is_threat": True,
|
||||
"threat_count": len(matches),
|
||||
"highest_confidence": highest_confidence,
|
||||
"threat_types": threat_types,
|
||||
"matches": [
|
||||
{
|
||||
"type": match.injection_type.name,
|
||||
"pattern": match.pattern_name,
|
||||
"confidence": match.confidence,
|
||||
"text_preview": match.matched_text[:50] + "..." if len(match.matched_text) > 50 else match.matched_text
|
||||
}
|
||||
for match in matches[:10] # Limit to top 10
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
# Singleton instance for convenience
|
||||
_default_sanitizer = None
|
||||
|
||||
|
||||
def get_sanitizer() -> InputSanitizer:
|
||||
"""Get the default sanitizer instance."""
|
||||
global _default_sanitizer
|
||||
if _default_sanitizer is None:
|
||||
_default_sanitizer = InputSanitizer()
|
||||
return _default_sanitizer
|
||||
|
||||
|
||||
def analyze(text: str) -> List[InjectionMatch]:
|
||||
"""Convenience function to analyze text using default sanitizer."""
|
||||
return get_sanitizer().analyze(text)
|
||||
|
||||
|
||||
def is_malicious(text: str, threshold: float = 0.7) -> bool:
|
||||
"""Convenience function to check if text is malicious."""
|
||||
return get_sanitizer().is_malicious(text, threshold)
|
||||
|
||||
|
||||
def sanitize(text: str, replacement: str = "[REDACTED]") -> str:
|
||||
"""Convenience function to sanitize text."""
|
||||
return get_sanitizer().sanitize(text, replacement)
|
||||
|
||||
|
||||
def get_threat_summary(text: str) -> Dict:
|
||||
"""Convenience function to get threat summary."""
|
||||
return get_sanitizer().get_threat_summary(text)
|
||||
Reference in New Issue
Block a user