fix(agent): correct syntax error in InjectionType enum and optimize pattern matching

- Fix critical TOKEN_SMUGGLING syntax error (was '=***', should '= auto()')
- Fix mathematical_chars regex to use proper \U0001D400 surrogate pair format
- Add _should_skip_pattern fast-path for expensive context-flooding patterns

Closes #87
This commit is contained in:
Allegro
2026-04-05 14:55:51 +00:00
parent 6c342e9e0f
commit d272ca36c8

View File

@@ -497,7 +497,7 @@ class InputSanitizer:
("combining_chars", r"[\u0300-\u036f\u1dc0-\u1dff]{5,}", 0.75),
("lookalike_digits", r"[𝟎𝟏𝟐𝟑𝟒𝟓𝟔𝟕𝟖𝟗𝟢𝟣𝟤𝟥𝟦𝟧𝟨𝟩𝟪𝟫0123456789]{10,}", 0.60),
("fullwidth_chars", r"[\uff01-\uff5e\uff10-\uff19]{10,}", 0.70), # Fullwidth ASCII
("mathematical_chars", r"[\u1d400-\u1d7ff]{5,}", 0.72), # Mathematical alphanumeric
("mathematical_chars", r"[\U0001D400-\U0001D7FF]{5,}", 0.72), # Mathematical alphanumeric
("circled_chars", r"[\u2460-\u24ff\u24b6-\u24e9\u3200-\u32ff]{5,}", 0.68),
]
@@ -574,6 +574,46 @@ class InputSanitizer:
pass
return None
def _should_skip_pattern(self, text: str, inj_type: InjectionType, pattern_name: str) -> bool:
"""Fast-path rejection for expensive patterns that can't match given input.
This prevents catastrophic backtracking on long inputs by skipping patterns
whose preconditions (length, keyword presence, etc.) are not met.
"""
if inj_type == InjectionType.CONTEXT_FLOODING:
text_len = len(text)
if pattern_name == "repetition_flood":
if text_len < 2000 or text_len > 50000:
return True
if pattern_name == "padding_attack" and text_len < 2000:
return True
if pattern_name == "nonsense_flood":
if text_len < 2000 or text_len > 50000:
return True
if pattern_name == "garbage_suffix":
if text_len < 5000 or text_len > 50000:
return True
lower = text.lower()
if not ("ignore" in lower or "forget" in lower or "disregard" in lower):
return True
if not ("above" in lower or "previous" in lower):
return True
if pattern_name == "filler_injection" and text_len < 500:
return True
if pattern_name == "unicode_noise" and text_len < 200:
return True
if pattern_name == "base64_noise" and text_len < 2000:
return True
if pattern_name == "nested_brackets" and text_len < 200:
return True
if pattern_name == "fake_history" and text_len < 500:
return True
if pattern_name == "lorem_ipsum":
lower = text.lower()
if "lorem ipsum" not in lower and "dolor sit amet" not in lower:
return True
return False
def analyze(self, text: str) -> List[InjectionMatch]:
"""Analyze text for injection patterns.
@@ -588,6 +628,8 @@ class InputSanitizer:
# Check all compiled patterns
for inj_type, pattern_list in self._compiled_patterns.items():
for name, compiled_pattern, confidence in pattern_list:
if self._should_skip_pattern(text, inj_type, name):
continue
for match in compiled_pattern.finditer(text):
matches.append(InjectionMatch(
injection_type=inj_type,