diff --git a/agent/redact.py b/agent/redact.py new file mode 100644 index 000000000..22f1a547f --- /dev/null +++ b/agent/redact.py @@ -0,0 +1,115 @@ +"""Regex-based secret redaction for logs and tool output. + +Applies pattern matching to mask API keys, tokens, and credentials +before they reach log files, verbose output, or gateway logs. + +Short tokens (< 18 chars) are fully masked. Longer tokens preserve +the first 6 and last 4 characters for debuggability. +""" + +import logging +import re +from typing import Optional + +logger = logging.getLogger(__name__) + +# Known API key prefixes -- match the prefix + contiguous token chars +_PREFIX_PATTERNS = [ + r"sk-[A-Za-z0-9_-]{10,}", # OpenAI / OpenRouter + r"ghp_[A-Za-z0-9]{10,}", # GitHub PAT (classic) + r"github_pat_[A-Za-z0-9_]{10,}", # GitHub PAT (fine-grained) + r"xox[baprs]-[A-Za-z0-9-]{10,}", # Slack tokens + r"AIza[A-Za-z0-9_-]{30,}", # Google API keys + r"pplx-[A-Za-z0-9]{10,}", # Perplexity + r"fal_[A-Za-z0-9_-]{10,}", # Fal.ai + r"fc-[A-Za-z0-9]{10,}", # Firecrawl + r"bb_live_[A-Za-z0-9_-]{10,}", # BrowserBase + r"gAAAA[A-Za-z0-9_=-]{20,}", # Codex encrypted tokens +] + +# ENV assignment patterns: KEY=value where KEY contains a secret-like name +_SECRET_ENV_NAMES = r"(?:API_?KEY|TOKEN|SECRET|PASSWORD|PASSWD|CREDENTIAL|AUTH)" +_ENV_ASSIGN_RE = re.compile( + rf"([A-Z_]*{_SECRET_ENV_NAMES}[A-Z_]*)\s*=\s*(['\"]?)(\S+)\2", + re.IGNORECASE, +) + +# JSON field patterns: "apiKey": "value", "token": "value", etc. +_JSON_KEY_NAMES = r"(?:api_?[Kk]ey|token|secret|password|access_token|refresh_token|auth_token|bearer)" +_JSON_FIELD_RE = re.compile( + rf'("{_JSON_KEY_NAMES}")\s*:\s*"([^"]+)"', + re.IGNORECASE, +) + +# Authorization headers +_AUTH_HEADER_RE = re.compile( + r"(Authorization:\s*Bearer\s+)(\S+)", + re.IGNORECASE, +) + +# Telegram bot tokens: bot: or : +_TELEGRAM_RE = re.compile( + r"(bot)?(\d{8,}):([-A-Za-z0-9_]{30,})", +) + +# Compile known prefix patterns into one alternation +_PREFIX_RE = re.compile( + r"(? str: + """Mask a token, preserving prefix for long tokens.""" + if len(token) < 18: + return "***" + return f"{token[:6]}...{token[-4:]}" + + +def redact_sensitive_text(text: str) -> str: + """Apply all redaction patterns to a block of text. + + Safe to call on any string -- non-matching text passes through unchanged. + """ + if not text: + return text + + # Known prefixes (sk-, ghp_, etc.) + text = _PREFIX_RE.sub(lambda m: _mask_token(m.group(1)), text) + + # ENV assignments: OPENAI_API_KEY=sk-abc... + def _redact_env(m): + name, quote, value = m.group(1), m.group(2), m.group(3) + return f"{name}={quote}{_mask_token(value)}{quote}" + text = _ENV_ASSIGN_RE.sub(_redact_env, text) + + # JSON fields: "apiKey": "value" + def _redact_json(m): + key, value = m.group(1), m.group(2) + return f'{key}: "{_mask_token(value)}"' + text = _JSON_FIELD_RE.sub(_redact_json, text) + + # Authorization headers + text = _AUTH_HEADER_RE.sub( + lambda m: m.group(1) + _mask_token(m.group(2)), + text, + ) + + # Telegram bot tokens + def _redact_telegram(m): + prefix = m.group(1) or "" + digits = m.group(2) + return f"{prefix}{digits}:***" + text = _TELEGRAM_RE.sub(_redact_telegram, text) + + return text + + +class RedactingFormatter(logging.Formatter): + """Log formatter that redacts secrets from all log messages.""" + + def __init__(self, fmt=None, datefmt=None, style='%', **kwargs): + super().__init__(fmt, datefmt, style, **kwargs) + + def format(self, record: logging.LogRecord) -> str: + original = super().format(record) + return redact_sensitive_text(original) diff --git a/gateway/run.py b/gateway/run.py index c21cf8b9e..8d8e29e39 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2027,7 +2027,8 @@ async def start_gateway(config: Optional[GatewayConfig] = None) -> bool: maxBytes=5 * 1024 * 1024, backupCount=3, ) - file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(name)s: %(message)s')) + from agent.redact import RedactingFormatter + file_handler.setFormatter(RedactingFormatter('%(asctime)s %(levelname)s %(name)s: %(message)s')) logging.getLogger().addHandler(file_handler) logging.getLogger().setLevel(logging.INFO) diff --git a/run_agent.py b/run_agent.py index f025e6b98..b1e1f1bea 100644 --- a/run_agent.py +++ b/run_agent.py @@ -247,13 +247,19 @@ class AIAgent: self._use_prompt_caching = is_openrouter and is_claude self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) - # Configure logging + # Configure logging with secret redaction + from agent.redact import RedactingFormatter if self.verbose_logging: logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%H:%M:%S' ) + for handler in logging.getLogger().handlers: + handler.setFormatter(RedactingFormatter( + '%(asctime)s - %(name)s - %(levelname)s - %(message)s', + datefmt='%H:%M:%S', + )) # Keep third-party libraries at WARNING level to reduce noise # We have our own retry and error logging that's more informative logging.getLogger('openai').setLevel(logging.WARNING)