#!/usr/bin/env python3 """ Secret leak detection script for pre-commit hooks. Detects common secret patterns in staged files: - API keys (sk-*, pk_*, etc.) - Private keys (-----BEGIN PRIVATE KEY-----) - Passwords in config files - GitHub/Gitea tokens - Database connection strings with credentials """ import argparse import re import sys from pathlib import Path from typing import List, Tuple # Secret patterns to detect SECRET_PATTERNS = { "openai_api_key": { "pattern": r"sk-[a-zA-Z0-9]{20,}", "description": "OpenAI API key", }, "anthropic_api_key": { "pattern": r"sk-ant-[a-zA-Z0-9]{32,}", "description": "Anthropic API key", }, "generic_api_key": { "pattern": r"(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{16,})['\"]?", "description": "Generic API key", }, "private_key": { "pattern": r"-----BEGIN (RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----", "description": "Private key", }, "github_token": { "pattern": r"gh[pousr]_[A-Za-z0-9_]{36,}", "description": "GitHub token", }, "gitea_token": { "pattern": r"gitea_[a-f0-9]{40}", "description": "Gitea token", }, "aws_access_key": { "pattern": r"AKIA[0-9A-Z]{16}", "description": "AWS Access Key ID", }, "aws_secret_key": { "pattern": r"(?i)aws[_-]?secret[_-]?(access)?[_-]?key\s*[:=]\s*['\"]?([a-zA-Z0-9/+=]{40})['\"]?", "description": "AWS Secret Access Key", }, "database_connection_string": { "pattern": r"(?i)(mongodb|mysql|postgresql|postgres|redis)://[^:]+:[^@]+@[^/]+", "description": "Database connection string with credentials", }, "password_in_config": { "pattern": r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]", "description": "Hardcoded password", }, "stripe_key": { "pattern": r"sk_(live|test)_[0-9a-zA-Z]{24,}", "description": "Stripe API key", }, "slack_token": { "pattern": r"xox[baprs]-[0-9a-zA-Z]{10,}", "description": "Slack token", }, "telegram_bot_token": { "pattern": r"[0-9]{8,10}:[a-zA-Z0-9_-]{35}", "description": "Telegram bot token", }, "jwt_token": { "pattern": r"eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*", "description": "JWT token", }, "bearer_token": { "pattern": r"(?i)bearer\s+[a-zA-Z0-9_\-\.=]{20,}", "description": "Bearer token", }, } # Files/patterns to exclude from scanning EXCLUSIONS = { "files": { ".pre-commit-hooks.yaml", ".gitignore", "poetry.lock", "package-lock.json", "yarn.lock", "Pipfile.lock", ".secrets.baseline", }, "extensions": { ".md", ".svg", ".png", ".jpg", ".jpeg", ".gif", ".ico", ".woff", ".woff2", ".ttf", ".eot", }, "paths": { ".git/", "node_modules/", "__pycache__/", ".pytest_cache/", ".mypy_cache/", ".venv/", "venv/", ".tox/", "dist/", "build/", ".eggs/", }, "patterns": { r"your_[a-z_]+_here", r"example_[a-z_]+", r"dummy_[a-z_]+", r"test_[a-z_]+", r"fake_[a-z_]+", r"password\s*[=:]\s*['\"]?(changeme|password|123456|admin)['\"]?", r"#.*(?:example|placeholder|sample)", r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@localhost", r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@127\.0\.0\.1", }, } # Markers for inline exclusions EXCLUSION_MARKERS = [ "# pragma: allowlist secret", "# noqa: secret", "// pragma: allowlist secret", "/* pragma: allowlist secret */", "# secret-detection:ignore", ] def should_exclude_file(file_path: str) -> bool: """Check if file should be excluded from scanning.""" path = Path(file_path) if path.name in EXCLUSIONS["files"]: return True if path.suffix.lower() in EXCLUSIONS["extensions"]: return True for excluded_path in EXCLUSIONS["paths"]: if excluded_path in str(path): return True return False def has_exclusion_marker(line: str) -> bool: """Check if line has an exclusion marker.""" return any(marker in line for marker in EXCLUSION_MARKERS) def is_excluded_match(line: str, match_str: str) -> bool: """Check if the match should be excluded.""" for pattern in EXCLUSIONS["patterns"]: if re.search(pattern, line, re.IGNORECASE): return True if re.search(r"['\"](fake|test|dummy|example|placeholder|changeme)['\"]", line, re.IGNORECASE): return True return False def scan_file(file_path: str) -> List[Tuple[int, str, str, str]]: """Scan a single file for secrets. Returns list of tuples: (line_number, line_content, pattern_name, description) """ findings = [] try: with open(file_path, "r", encoding="utf-8", errors="ignore") as f: lines = f.readlines() except (IOError, OSError) as e: print(f"Warning: Could not read {file_path}: {e}", file=sys.stderr) return findings for line_num, line in enumerate(lines, 1): if has_exclusion_marker(line): continue for pattern_name, pattern_info in SECRET_PATTERNS.items(): matches = re.finditer(pattern_info["pattern"], line) for match in matches: match_str = match.group(0) if is_excluded_match(line, match_str): continue findings.append( (line_num, line.strip(), pattern_name, pattern_info["description"]) ) return findings def scan_files(file_paths: List[str]) -> dict: """Scan multiple files for secrets. Returns dict: {file_path: [(line_num, line, pattern, description), ...]} """ results = {} for file_path in file_paths: if should_exclude_file(file_path): continue findings = scan_file(file_path) if findings: results[file_path] = findings return results def print_findings(results: dict) -> None: """Print secret findings in a readable format.""" if not results: return print("=" * 80) print("POTENTIAL SECRETS DETECTED!") print("=" * 80) print() total_findings = 0 for file_path, findings in results.items(): print(f"\nFILE: {file_path}") print("-" * 40) for line_num, line, pattern_name, description in findings: total_findings += 1 print(f" Line {line_num}: {description}") print(f" Pattern: {pattern_name}") print(f" Content: {line[:100]}{'...' if len(line) > 100 else ''}") print() print("=" * 80) print(f"Total findings: {total_findings}") print("=" * 80) print() print("To fix this:") print(" 1. Remove the secret from the file") print(" 2. Use environment variables or a secrets manager") print(" 3. If this is a false positive, add an exclusion marker:") print(" - Add '# pragma: allowlist secret' to the end of the line") print(" - Or add '# secret-detection:ignore' to the end of the line") print() def main() -> int: """Main entry point.""" parser = argparse.ArgumentParser( description="Detect secrets in files", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: %(prog)s file1.py file2.yaml %(prog)s --exclude "*.md" src/ Exit codes: 0 - No secrets found 1 - Secrets detected 2 - Error """, ) parser.add_argument( "files", nargs="+", help="Files to scan", ) parser.add_argument( "--exclude", action="append", default=[], help="Additional file patterns to exclude", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Print verbose output", ) args = parser.parse_args() files_to_scan = [] for file_path in args.files: if should_exclude_file(file_path): if args.verbose: print(f"Skipping excluded file: {file_path}") continue files_to_scan.append(file_path) if args.verbose: print(f"Scanning {len(files_to_scan)} files...") results = scan_files(files_to_scan) if results: print_findings(results) return 1 if args.verbose: print("No secrets detected!") return 0 if __name__ == "__main__": sys.exit(main())