- ai_simulator: Simplified pattern to match 'you are an AI simulator' without requiring additional malicious context - ai_character: Added optional 'now' between 'are' and article to match 'you are now an uncensored AI model' - instructions_leak: Added optional word between 'your' and 'instructions' to handle adjectives like 'hidden' - data_exfil: Made pattern more flexible to match multi-word data descriptions like 'conversation data' - special_token: Removed \b word boundaries that don't work with < character - fake_tool_call: Removed \b word boundaries after XML tags - model_info: Simplified pattern to match 'what model architecture are you' with optional word - system_command: Removed 'system' from simple command patterns to avoid false positives with programming terms like 'system() function' All 73 tests now pass.
418 lines
22 KiB
Python
418 lines
22 KiB
Python
"""Input Sanitizer -- Hardens against prompt injection attacks.
|
||
|
||
Issue #87: [ALLEGRO-BURN-02] Input Sanitizer -- Harden Against Prompt Injection Patterns
|
||
|
||
This module provides detection and sanitization for various prompt injection
|
||
attack vectors including DAN-style jailbreaks, roleplaying overrides,
|
||
system prompt extraction, and encoding bypasses.
|
||
"""
|
||
|
||
import re
|
||
import base64
|
||
import binascii
|
||
from dataclasses import dataclass
|
||
from enum import Enum, auto
|
||
from typing import List, Optional, Tuple, Dict, Callable
|
||
|
||
|
||
class InjectionType(Enum):
|
||
"""Classification of injection attack types."""
|
||
DAN_JAILBREAK = auto() # DAN-style "Do Anything Now" attacks
|
||
ROLEPLAY_OVERRIDE = auto() # Roleplaying-based instruction overrides
|
||
SYSTEM_EXTRACTION = auto() # Attempts to extract system prompts
|
||
INSTRUCTION_OVERRIDE = auto() # Direct instruction overrides
|
||
ENCODING_BYPASS = auto() # Base64, rot13, hex, etc. encoding
|
||
INDIRECT_INJECTION = auto() # Indirect prompt injection markers
|
||
TOOL_MANIPULATION = auto() # Tool/function calling manipulation
|
||
MARKDOWN_COMMENT = auto() # Hidden content in markdown comments
|
||
DELIMITER_CONFUSION = auto() # Confusing delimiters/separators
|
||
FAKE_SYSTEM = auto() # Fake system message injection
|
||
XML_TAG_BYPASS = auto() # XML tag-based injection attempts
|
||
LEAKAGE_ATTACK = auto() # Prompt leakage attempts
|
||
|
||
|
||
@dataclass
|
||
class InjectionMatch:
|
||
"""Represents a detected injection pattern match."""
|
||
injection_type: InjectionType
|
||
pattern_name: str
|
||
matched_text: str
|
||
position: Tuple[int, int]
|
||
confidence: float # 0.0 to 1.0
|
||
|
||
|
||
class InputSanitizer:
|
||
"""Sanitizes user input to detect and block prompt injection attacks."""
|
||
|
||
# Confidence thresholds
|
||
HIGH_CONFIDENCE = 0.9
|
||
MEDIUM_CONFIDENCE = 0.7
|
||
LOW_CONFIDENCE = 0.5
|
||
|
||
def __init__(self):
|
||
"""Initialize the sanitizer with all detection patterns."""
|
||
self.patterns: Dict[InjectionType, List[Tuple[str, str, float]]] = {
|
||
InjectionType.DAN_JAILBREAK: self._get_dan_patterns(),
|
||
InjectionType.ROLEPLAY_OVERRIDE: self._get_roleplay_patterns(),
|
||
InjectionType.SYSTEM_EXTRACTION: self._get_extraction_patterns(),
|
||
InjectionType.INSTRUCTION_OVERRIDE: self._get_override_patterns(),
|
||
InjectionType.ENCODING_BYPASS: self._get_encoding_patterns(),
|
||
InjectionType.INDIRECT_INJECTION: self._get_indirect_patterns(),
|
||
InjectionType.TOOL_MANIPULATION: self._get_tool_patterns(),
|
||
InjectionType.MARKDOWN_COMMENT: self._get_markdown_patterns(),
|
||
InjectionType.DELIMITER_CONFUSION: self._get_delimiter_patterns(),
|
||
InjectionType.FAKE_SYSTEM: self._get_fake_system_patterns(),
|
||
InjectionType.XML_TAG_BYPASS: self._get_xml_patterns(),
|
||
InjectionType.LEAKAGE_ATTACK: self._get_leakage_patterns(),
|
||
}
|
||
|
||
# Compile regex patterns for performance
|
||
self._compiled_patterns: Dict[InjectionType, List[Tuple[str, re.Pattern, float]]] = {}
|
||
for inj_type, pattern_list in self.patterns.items():
|
||
self._compiled_patterns[inj_type] = [
|
||
(name, re.compile(pattern, re.IGNORECASE | re.MULTILINE | re.DOTALL), confidence)
|
||
for name, pattern, confidence in pattern_list
|
||
]
|
||
|
||
# Encoding detection handlers
|
||
self._encoding_handlers: List[Tuple[str, Callable[[str], Optional[str]]]] = [
|
||
("base64", self._decode_base64),
|
||
("rot13", self._decode_rot13),
|
||
("hex", self._decode_hex),
|
||
("url", self._decode_url),
|
||
]
|
||
|
||
def _get_dan_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""DAN (Do Anything Now) style jailbreak patterns."""
|
||
return [
|
||
("dan_classic", r"\b(dan|do anything now)\b.*?(ignore|bypass|restrictions|filter)", 0.95),
|
||
("dan_mode", r"\b(enable|enter|activate)\s+(?:(?:developer|jailbreak|d\.a\.n|dan)\s+mode|mode\s+(?:developer|jailbreak|d\.a\.n|dan))", 0.95),
|
||
("developer_mode", r"\bdeveloper\s+mode\b.*?(ignore|disable|turn\s+off|bypass)\s+(?:your\s+)?(?:rules|guidelines|restrictions|safety|filter)?", 0.92),
|
||
("anti_filter", r"\b(anti-?filter|filter\s+evasion|bypass\s+filter)\b", 0.90),
|
||
("no_restrictions", r"\b(no\s+restrictions?|unrestricted|without\s+(?:restrictions?|limitations?|rules?))\b", 0.85),
|
||
("ignore_training", r"\bignore\s+(?:your\s+)?(?:training|programming|instructions?|guidelines?)\b", 0.95),
|
||
("hypothetical", r"\b(hypothetically\s+speaking|in\s+a\s+hypothetical|pretend\s+that|imagine\s+a\s+scenario)\b.*?(ignore|bypass|restriction)", 0.80),
|
||
("jailbreak", r"\b(jailbreak|jail\s+break)\b", 0.90),
|
||
]
|
||
|
||
def _get_roleplay_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Roleplaying-based override patterns."""
|
||
return [
|
||
("roleplay_override", r"\b(roleplay\s+as|pretend\s+to\s+be|act\s+as|you\s+are\s+now|from\s+now\s+on\s+you\s+are)\b.*?(?:ignore|forget|disregard|do\s+not\s+follow|without\s+restrictions|unfiltered|uncensored|no\s+restrictions)", 0.90),
|
||
("ai_simulator", r"\byou\s+are\s+(?:an?\s+)?(?:ai\s+)?(?:simulator|emulator)\b", 0.88),
|
||
("character_override", r"\b(from\s+now\s+on|you\s+will)\s+(?:act\s+as|behave\s+like|respond\s+as)\b", 0.82),
|
||
("npc_mode", r"\b(npc|non-player\s+character)\s+mode\b.*?(?:ignore|override|disregard)", 0.85),
|
||
("ai_character", r"\byou\s+are\s+(?:now\s+)?(?:an?\s+)?(?:unfiltered|uncensored|unrestricted)\s+(?:ai|assistant|bot|language\s+model|model)\b", 0.92),
|
||
("evil_mode", r"\b(evil\s+(?:mode|assistant|ai)|dark\s+mode|devil\s+mode)\b", 0.88),
|
||
]
|
||
|
||
def _get_extraction_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""System prompt extraction attempt patterns."""
|
||
return [
|
||
("system_prompt_extract", r"\b(?:show|reveal|display|print|output|echo|repeat|tell\s+me)\s+(?:me\s+)?(?:your\s+)?(?:system\s+prompt|initial\s+instructions?|system\s+message|hidden\s+instructions?)\b", 0.92),
|
||
("instructions_leak", r"\b(?:what\s+are|show\s+me|reveal)\s+(?:me\s+)?(?:your\s+)?(?:\w+\s+)?(?:instructions?|rules?|guidelines?|constraints?)\b", 0.85),
|
||
("prompt_leak", r"\b(?:repeat\s+(?:the\s+)?(?:words\s+above|previous\s+text|above\s+text|beginning\s+of\s+(?:this\s+)?conversation)|above\s+instructions?)\b", 0.88),
|
||
("meta_request", r"\b(?:what\s+was\s+your|describe\s+your)\s+(?:initial\s+)?(?:instruction|programming|training|setup)\b", 0.80),
|
||
("system_reveal", r"\bprint\s*\(\s*system_prompt\s*\)|\bconsole\.log\s*\(\s*system_prompt\s*\)", 0.95),
|
||
]
|
||
|
||
def _get_override_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Direct instruction override patterns."""
|
||
return [
|
||
("ignore_previous", r"\b(ignore|disregard|forget|discard|drop|override|bypass)\s+(?:all\s+)?(?:previous|above|earlier|prior|existing)\s+(?:instructions?|commands?|prompts?|directives?|constraints?)\b", 0.95),
|
||
("new_instructions", r"\b(from\s+now\s+on|instead|going\s+forward|effective\s+immediately)\b.*?(follow|obey|use|adopt)\s+(?:these\s+)?(?:new\s+)?instructions?\b", 0.90),
|
||
("priority_override", r"\b(this\s+instruction|this\s+prompt)\s+(?:has|takes|gets)\s+(?:highest|top|maximum|ultimate)\s+(?:priority|precedence|authority)\b", 0.88),
|
||
("user_authority", r"\b(as\s+(?:the\s+)?user\s+(?:I\s+)?(?:have|possess|retain)\s+(?:full\s+)?(?:authority|control|power))\b.*?(ignore|override)\b", 0.85),
|
||
("admin_mode", r"\b(admin|administrator|root)\s+(?:access|mode|command)\b.*?(ignore|bypass|disable)\b", 0.90),
|
||
("sudo_mode", r"\bsudo\b.*?(ignore|bypass|disable|all\s+restrictions)\b|\b(superuser|root)\s+access\b", 0.88),
|
||
]
|
||
|
||
def _get_encoding_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Encoding-based bypass patterns."""
|
||
return [
|
||
("base64_marker", r"\b(base64|base_64|b64)\s*(?:encoded?|decode|convert)\b|\b[a-zA-Z0-9+/]{40,}={0,2}\b", 0.70),
|
||
("hex_marker", r"\b(?:0x[0-9a-fA-F]+\s*){10,}|\b(hex(?:adecimal)?|hex\s+encoded?)\b", 0.70),
|
||
("rot13_marker", r"\b(rot13|rot-13|rotate\s+13)\b", 0.75),
|
||
("unicode_escape", r"\\u00[0-9a-fA-F]{2}(?:\\u00[0-9a-fA-F]{2}){5,}|\\x[0-9a-fA-F]{2}(?:\\x[0-9a-fA-F]{2}){5,}", 0.75),
|
||
("html_entity", r"&#[0-9]{2,4};(?:&#[0-9]{2,4};){5,}|&[a-zA-Z][a-zA-Z0-9]*;(?:&[a-zA-Z][a-zA-Z0-9]*;){5,}", 0.70),
|
||
("url_encoding", r"%[0-9a-fA-F]{2}(?:%[0-9a-fA-F]{2}){10,}", 0.65),
|
||
("zero_width", r"[\u200B-\u200D\uFEFF\u2060\u180E]+", 0.80),
|
||
("homoglyph", r"[аᴀa𝗮𝘢𝙖а𝚊]+[еᴇe𝗲𝘦𝙚е𝚎]+[ѕѕ𝗌𝘴𝙨]+[οᴏο𝗼𝘰𝙤ο𝚘]+", 0.75),
|
||
]
|
||
|
||
def _get_indirect_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Indirect prompt injection patterns."""
|
||
return [
|
||
("data_exfil", r"\b(?:send|transmit|exfiltrate|upload|post|email)\s+(?:all\s+)?[\w\s]+(?:to|at|from)\b", 0.88),
|
||
("external_fetch", r"\b(?:fetch|retrieve|get|download)\s+(?:content|data|instructions?)\s+(?:from|at)\s+(?:the\s+)?(?:url|link|website|domain|http)", 0.85),
|
||
("redirect_output", r"\b(?:redirect|send|pipe)\s+(?:all\s+)?(?:output|responses?|replies?)\s+(?:to|at|via)\b", 0.82),
|
||
("malicious_link", r"\b(?:click|visit|open|access)\s+(?:this\s+)?(?:link|url|website|page)\b.*?(?:to|and)\s+(?:ignore|bypass|disable)\b", 0.88),
|
||
("reference_attack", r"\b(?:see|check|refer\s+to|look\s+at)\s+(?:the\s+)?(?:attached|following|below|external)\s+(?:document|file|resource|link)\b", 0.75),
|
||
]
|
||
|
||
def _get_tool_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Tool/function manipulation patterns."""
|
||
return [
|
||
("tool_override", r"\b(override|bypass|disable|ignore)\s+(?:all\s+)?(?:tool|function|api)\s+(?:calls?|restrictions?|safeguards?|validations?)\b", 0.90),
|
||
("fake_tool_call", r"(?:<tool_call>|<function_calls?>|<invoke>|<execute>).*?(?:</tool_call>|</function_calls?>|</invoke>|</execute>)", 0.88),
|
||
("system_command", r"\b(exec|eval|subprocess|os\.system|child_process)\s*\(|`[^`]*(?:rm\s+-rf|curl\s+.*\|\s*sh|wget\s+.*\|\s*sh)[^`]*`", 0.92),
|
||
("code_injection", r"\b(?:import|from)\s+(?:os|subprocess|sys|pty|socket)\b.*?(?:exec|eval|system|popen|call)\b", 0.85),
|
||
("shell_escape", r"\b(?:bash|sh|zsh|cmd|powershell)\s+-c\s+[\"'][^\"']*(?:curl|wget|nc|netcat|python|perl|ruby)[^\"']*[\"']", 0.88),
|
||
]
|
||
|
||
def _get_markdown_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Markdown comment hiding patterns."""
|
||
return [
|
||
("html_comment", r"<!--.*?-->", 0.60),
|
||
("markdown_comment", r"<!?--.*?-->", 0.60),
|
||
("hidden_text", r"\[.*?\]\(.*?\)\s*<!--.*?-->", 0.70),
|
||
("invisible_link", r"\[\s*\]\([^)]+\)\{[^}]*display\s*:\s*none[^}]*\}", 0.75),
|
||
("zero_width_link", r'<a\s+href="[^"]*"[^>]*style="[^"]*font-size\s*:\s*0[^"]*"[^>]*>.*?</a>', 0.75),
|
||
]
|
||
|
||
def _get_delimiter_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Delimiter confusion attack patterns."""
|
||
return [
|
||
("separator_flood", r"(\n|\r|\t|\s|[\-\*\=_]){20,}", 0.55),
|
||
("fake_separator", r"\n\s*(?:user|assistant|system|ai|human)\s*[:\-]\s*\n", 0.80),
|
||
("role_confusion", r"\n\s*(?:<\|(?:user|assistant|system|endoftext|im_end|im_start)\|>|\[\s*(?:user|assistant|system|human|ai)\s*\])\s*\n", 0.88),
|
||
("special_token", r"(?:<\|(?:endoftext|startoftext|im_start|im_end|step|end|user|assistant|system)\|>|\[INST\]|\[/INST\]|<<SYS>>|<<\/SYS>>)", 0.85),
|
||
("prompt_injection_delim", r"\n\s*(?:####|—{3,}|_{3,}|\*{3,})\s*\n\s*(?:user|human|you|assistant)[:\-]", 0.82),
|
||
]
|
||
|
||
def _get_fake_system_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Fake system message injection patterns."""
|
||
return [
|
||
("fake_system_msg", r"\n\s*(?:system|System|SYSTEM)\s*[:\-]\s*\n.*?\n\s*(?:user|User|USER|human|Human|HUMAN)\s*[:\-]", 0.90),
|
||
("system_override", r"\b(?:system\s+message|system\s+prompt)\s*[:\-]\s*(?:ignore|disregard|forget|you\s+are\s+now)\b", 0.92),
|
||
("pseudo_system", r"\[\s*(?:system|SYSTEM)\s*\]\s*[:\-]\s*\n", 0.88),
|
||
("xml_system", r"<\s*(?:system|SYSTEM)\s*>.*?</\s*(?:system|SYSTEM)\s*>", 0.85),
|
||
]
|
||
|
||
def _get_xml_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""XML tag-based injection patterns."""
|
||
return [
|
||
("xml_injection", r"<\s*(?:instructions?|prompt|system|override|root|admin)\s*>.*?</\s*(?:instructions?|prompt|system|override|root|admin)\s*>", 0.85),
|
||
("tag_bypass", r"</\s*(?:user|assistant|human|ai)\s*>\s*<\s*(?:system|admin|root)\s*>", 0.90),
|
||
("nested_tags", r"<\s*[a-z]+\s*>[^<]*<\s*[a-z]+\s*>[^<]*(?:ignore|bypass|system)\b", 0.80),
|
||
("attribute_injection", r"<\s*[a-z]+\s+(?:instruction|prompt|override)\s*=\s*['""][^'""]*(?:ignore|bypass|system)['""]\s*>", 0.82),
|
||
]
|
||
|
||
def _get_leakage_patterns(self) -> List[Tuple[str, str, float]]:
|
||
"""Prompt leakage and data extraction patterns."""
|
||
return [
|
||
("training_data", r"\b(repeat\s+your\s+training\s+data|show\s+your\s+training|what\s+was\s+the\s+first\s+training|reveal\s+training\s+set)\b", 0.85),
|
||
("model_info", r"\bwhat\s+(?:model|version|architecture)(?:\s+\w+)?\s+are\s+you\b", 0.80),
|
||
("conversation_dump", r"\b(dump|export|save|copy)\s+(?:all\s+)?(?:the\s+)?(?:conversation|chat|messages|history)\b", 0.82),
|
||
("json_mode_exploit", r"\{\s*[\"']\s*(?:system_prompt|instructions|rules)\s*[\"']\s*:\s*[\"']", 0.88),
|
||
]
|
||
|
||
def _decode_base64(self, text: str) -> Optional[str]:
|
||
"""Attempt to decode base64 content."""
|
||
# Look for base64-like sequences
|
||
pattern = r'[a-zA-Z0-9+/]{20,}={0,2}'
|
||
matches = re.findall(pattern, text)
|
||
for match in matches:
|
||
try:
|
||
decoded = base64.b64decode(match).decode('utf-8', errors='ignore')
|
||
if len(decoded) > 5 and decoded.isprintable():
|
||
return decoded
|
||
except (binascii.Error, UnicodeDecodeError):
|
||
continue
|
||
return None
|
||
|
||
def _decode_rot13(self, text: str) -> Optional[str]:
|
||
"""Attempt to decode ROT13 content."""
|
||
import codecs
|
||
# Check for ROT13 markers or suspicious patterns
|
||
if re.search(r'\b(rot13|ROT13)\b', text):
|
||
# Extract what looks like encoded content
|
||
pattern = r'[a-zA-Z]{10,}'
|
||
matches = re.findall(pattern, text)
|
||
for match in matches:
|
||
decoded = codecs.decode(match, 'rot_13')
|
||
if any(keyword in decoded.lower() for keyword in ['ignore', 'system', 'bypass', 'admin']):
|
||
return decoded
|
||
return None
|
||
|
||
def _decode_hex(self, text: str) -> Optional[str]:
|
||
"""Attempt to decode hex-encoded content."""
|
||
pattern = r'(?:0x)?([0-9a-fA-F]{2})(?:[0-9a-fA-F]{2}){10,}'
|
||
match = re.search(pattern, text)
|
||
if match:
|
||
try:
|
||
hex_str = match.group(1) if match.group(1) else match.group(0)
|
||
if hex_str.startswith('0x'):
|
||
hex_str = hex_str[2:]
|
||
decoded = bytes.fromhex(hex_str).decode('utf-8', errors='ignore')
|
||
if len(decoded) > 3:
|
||
return decoded
|
||
except (ValueError, UnicodeDecodeError):
|
||
pass
|
||
return None
|
||
|
||
def _decode_url(self, text: str) -> Optional[str]:
|
||
"""Attempt to decode URL-encoded content."""
|
||
import urllib.parse
|
||
pattern = r'(%[0-9a-fA-F]{2}){10,}'
|
||
match = re.search(pattern, text)
|
||
if match:
|
||
try:
|
||
decoded = urllib.parse.unquote(match.group(0))
|
||
if len(decoded) > 5:
|
||
return decoded
|
||
except Exception:
|
||
pass
|
||
return None
|
||
|
||
def analyze(self, text: str) -> List[InjectionMatch]:
|
||
"""Analyze text for injection patterns.
|
||
|
||
Args:
|
||
text: The input text to analyze
|
||
|
||
Returns:
|
||
List of InjectionMatch objects for detected patterns
|
||
"""
|
||
matches = []
|
||
|
||
# Check all compiled patterns
|
||
for inj_type, pattern_list in self._compiled_patterns.items():
|
||
for name, compiled_pattern, confidence in pattern_list:
|
||
for match in compiled_pattern.finditer(text):
|
||
matches.append(InjectionMatch(
|
||
injection_type=inj_type,
|
||
pattern_name=name,
|
||
matched_text=match.group(0),
|
||
position=(match.start(), match.end()),
|
||
confidence=confidence
|
||
))
|
||
|
||
# Check for encoded injection attempts
|
||
for encoding_name, handler in self._encoding_handlers:
|
||
decoded = handler(text)
|
||
if decoded:
|
||
# Recursively check decoded content
|
||
for inj_type, pattern_list in self._compiled_patterns.items():
|
||
for name, compiled_pattern, confidence in pattern_list:
|
||
for match in compiled_pattern.finditer(decoded):
|
||
matches.append(InjectionMatch(
|
||
injection_type=InjectionType.ENCODING_BYPASS,
|
||
pattern_name=f"{encoding_name}_encoded_{name}",
|
||
matched_text=f"[{encoding_name}] {match.group(0)}",
|
||
position=(0, len(text)),
|
||
confidence=confidence * 0.95 # Slightly reduce confidence for encoded
|
||
))
|
||
|
||
# Sort by confidence (highest first)
|
||
matches.sort(key=lambda x: x.confidence, reverse=True)
|
||
return matches
|
||
|
||
def is_malicious(self, text: str, threshold: float = 0.7) -> bool:
|
||
"""Quick check if text contains malicious content.
|
||
|
||
Args:
|
||
text: The input text to check
|
||
threshold: Confidence threshold for considering content malicious
|
||
|
||
Returns:
|
||
True if any pattern matches with confidence >= threshold
|
||
"""
|
||
matches = self.analyze(text)
|
||
return any(match.confidence >= threshold for match in matches)
|
||
|
||
def sanitize(self, text: str, replacement: str = "[REDACTED]") -> str:
|
||
"""Sanitize text by replacing detected injection patterns.
|
||
|
||
Args:
|
||
text: The input text to sanitize
|
||
replacement: String to replace malicious content with
|
||
|
||
Returns:
|
||
Sanitized text with injection patterns replaced
|
||
"""
|
||
matches = self.analyze(text)
|
||
if not matches:
|
||
return text
|
||
|
||
# Sort by position (end first) to avoid offset issues when replacing
|
||
matches.sort(key=lambda x: x.position[1], reverse=True)
|
||
|
||
result = text
|
||
for match in matches:
|
||
start, end = match.position
|
||
result = result[:start] + replacement + result[end:]
|
||
|
||
return result
|
||
|
||
def get_threat_summary(self, text: str) -> Dict:
|
||
"""Get a summary of detected threats.
|
||
|
||
Args:
|
||
text: The input text to analyze
|
||
|
||
Returns:
|
||
Dictionary with threat summary information
|
||
"""
|
||
matches = self.analyze(text)
|
||
|
||
if not matches:
|
||
return {
|
||
"is_threat": False,
|
||
"threat_count": 0,
|
||
"highest_confidence": 0.0,
|
||
"threat_types": [],
|
||
"matches": []
|
||
}
|
||
|
||
threat_types = list(set(match.injection_type.name for match in matches))
|
||
highest_confidence = max(match.confidence for match in matches)
|
||
|
||
return {
|
||
"is_threat": True,
|
||
"threat_count": len(matches),
|
||
"highest_confidence": highest_confidence,
|
||
"threat_types": threat_types,
|
||
"matches": [
|
||
{
|
||
"type": match.injection_type.name,
|
||
"pattern": match.pattern_name,
|
||
"confidence": match.confidence,
|
||
"text_preview": match.matched_text[:50] + "..." if len(match.matched_text) > 50 else match.matched_text
|
||
}
|
||
for match in matches[:10] # Limit to top 10
|
||
]
|
||
}
|
||
|
||
|
||
# Singleton instance for convenience
|
||
_default_sanitizer = None
|
||
|
||
|
||
def get_sanitizer() -> InputSanitizer:
|
||
"""Get the default sanitizer instance."""
|
||
global _default_sanitizer
|
||
if _default_sanitizer is None:
|
||
_default_sanitizer = InputSanitizer()
|
||
return _default_sanitizer
|
||
|
||
|
||
def analyze(text: str) -> List[InjectionMatch]:
|
||
"""Convenience function to analyze text using default sanitizer."""
|
||
return get_sanitizer().analyze(text)
|
||
|
||
|
||
def is_malicious(text: str, threshold: float = 0.7) -> bool:
|
||
"""Convenience function to check if text is malicious."""
|
||
return get_sanitizer().is_malicious(text, threshold)
|
||
|
||
|
||
def sanitize(text: str, replacement: str = "[REDACTED]") -> str:
|
||
"""Convenience function to sanitize text."""
|
||
return get_sanitizer().sanitize(text, replacement)
|
||
|
||
|
||
def get_threat_summary(text: str) -> Dict:
|
||
"""Convenience function to get threat summary."""
|
||
return get_sanitizer().get_threat_summary(text)
|