From aaf8f2d2d2db7e7e768cd7ee6563c5bfc86d0355 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Mon, 9 Mar 2026 01:28:27 -0700 Subject: [PATCH] feat: expand secret redaction patterns Added 14 new redaction patterns, all with distinctive prefixes that have near-zero false positive risk: Prefix patterns: - AWS Access Key ID (AKIA...) - Stripe keys (sk_live_, sk_test_, rk_live_) - SendGrid (SG....) - HuggingFace (hf_...) - Replicate (r8_...) - npm tokens (npm_...) - PyPI tokens (pypi-...) - DigitalOcean PATs (dop_v1_, doo_v1_) - AgentMail (am_...) Structural patterns: - Private key blocks (-----BEGIN...PRIVATE KEY-----) - Database connection string passwords (postgres://user:PASS@host) --- agent/redact.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/agent/redact.py b/agent/redact.py index f11bc9843..02700c832 100644 --- a/agent/redact.py +++ b/agent/redact.py @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) # Known API key prefixes -- match the prefix + contiguous token chars _PREFIX_PATTERNS = [ - r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter + r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter / Anthropic (sk-ant-*) r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic) r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained) r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens @@ -26,6 +26,18 @@ _PREFIX_PATTERNS = [ r"fc-[A-Za-z0-9]{10,}", # Firecrawl r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens + r"AKIA[A-Z0-9]{16}", # AWS Access Key ID + r"sk_live_[A-Za-z0-9]{10,}", # Stripe secret key (live) + r"sk_test_[A-Za-z0-9]{10,}", # Stripe secret key (test) + r"rk_live_[A-Za-z0-9]{10,}", # Stripe restricted key + r"SG\.[A-Za-z0-9_-]{10,}", # SendGrid API key + r"hf_[A-Za-z0-9]{10,}", # HuggingFace token + r"r8_[A-Za-z0-9]{10,}", # Replicate API token + r"npm_[A-Za-z0-9]{10,}", # npm access token + r"pypi-[A-Za-z0-9_-]{10,}", # PyPI API token + r"dop_v1_[A-Za-z0-9]{10,}", # DigitalOcean PAT + r"doo_v1_[A-Za-z0-9]{10,}", # DigitalOcean OAuth + r"am_[A-Za-z0-9_-]{10,}", # AgentMail API key ] # ENV assignment patterns: KEY=value where KEY contains a secret-like name @@ -53,6 +65,18 @@ _TELEGRAM_RE = re.compile( r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})", ) +# Private key blocks: -----BEGIN RSA PRIVATE KEY----- ... -----END RSA PRIVATE KEY----- +_PRIVATE_KEY_RE = re.compile( + r"-----BEGIN[A-Z ]*PRIVATE KEY-----[\s\S]*?-----END[A-Z ]*PRIVATE KEY-----" +) + +# Database connection strings: protocol://user:PASSWORD@host +# Catches postgres, mysql, mongodb, redis, amqp URLs and redacts the password +_DB_CONNSTR_RE = re.compile( + r"((?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp)://[^:]+:)([^@]+)(@)", + re.IGNORECASE, +) + # E.164 phone numbers: +, 7-15 digits # Negative lookahead prevents matching hex strings or identifiers _SIGNAL_PHONE_RE = re.compile(r"(\+[1-9]\d{6,14})(?![A-Za-z0-9])") @@ -109,6 +133,12 @@ def redact_sensitive_text(text: str) -> str: return f"{prefix}{digits}:***" text = _TELEGRAM_RE.sub(_redact_telegram, text) + # Private key blocks + text = _PRIVATE_KEY_RE.sub("[REDACTED PRIVATE KEY]", text) + + # Database connection string passwords + text = _DB_CONNSTR_RE.sub(lambda m: f"{m.group(1)}***{m.group(3)}", text) + # E.164 phone numbers (Signal, WhatsApp) def _redact_phone(m): phone = m.group(1)