Files
timmy-home/scripts/detect_secrets.py

324 lines
8.5 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
"""
Secret leak detection script for pre-commit hooks.
Detects common secret patterns in staged files:
- API keys (sk-*, pk_*, etc.)
- Private keys (-----BEGIN PRIVATE KEY-----)
- Passwords in config files
- GitHub/Gitea tokens
- Database connection strings with credentials
"""
import argparse
import re
import sys
from pathlib import Path
from typing import List, Tuple
# Secret patterns to detect
SECRET_PATTERNS = {
"openai_api_key": {
"pattern": r"sk-[a-zA-Z0-9]{20,}",
"description": "OpenAI API key",
},
"anthropic_api_key": {
"pattern": r"sk-ant-[a-zA-Z0-9]{32,}",
"description": "Anthropic API key",
},
"generic_api_key": {
"pattern": r"(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{16,})['\"]?",
"description": "Generic API key",
},
"private_key": {
"pattern": r"-----BEGIN (RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----",
"description": "Private key",
},
"github_token": {
"pattern": r"gh[pousr]_[A-Za-z0-9_]{36,}",
"description": "GitHub token",
},
"gitea_token": {
"pattern": r"gitea_[a-f0-9]{40}",
"description": "Gitea token",
},
"aws_access_key": {
"pattern": r"AKIA[0-9A-Z]{16}",
"description": "AWS Access Key ID",
},
"aws_secret_key": {
"pattern": r"(?i)aws[_-]?secret[_-]?(access)?[_-]?key\s*[:=]\s*['\"]?([a-zA-Z0-9/+=]{40})['\"]?",
"description": "AWS Secret Access Key",
},
"database_connection_string": {
"pattern": r"(?i)(mongodb|mysql|postgresql|postgres|redis)://[^:]+:[^@]+@[^/]+",
"description": "Database connection string with credentials",
},
"password_in_config": {
"pattern": r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]",
"description": "Hardcoded password",
},
"stripe_key": {
"pattern": r"sk_(live|test)_[0-9a-zA-Z]{24,}",
"description": "Stripe API key",
},
"slack_token": {
"pattern": r"xox[baprs]-[0-9a-zA-Z]{10,}",
"description": "Slack token",
},
"telegram_bot_token": {
"pattern": r"[0-9]{8,10}:[a-zA-Z0-9_-]{35}",
"description": "Telegram bot token",
},
"jwt_token": {
"pattern": r"eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*",
"description": "JWT token",
},
"bearer_token": {
"pattern": r"(?i)bearer\s+[a-zA-Z0-9_\-\.=]{20,}",
"description": "Bearer token",
},
}
# Files/patterns to exclude from scanning
EXCLUSIONS = {
"files": {
".pre-commit-hooks.yaml",
".gitignore",
"poetry.lock",
"package-lock.json",
"yarn.lock",
"Pipfile.lock",
".secrets.baseline",
},
"extensions": {
".md",
".svg",
".png",
".jpg",
".jpeg",
".gif",
".ico",
".woff",
".woff2",
".ttf",
".eot",
},
"paths": {
".git/",
"node_modules/",
"__pycache__/",
".pytest_cache/",
".mypy_cache/",
".venv/",
"venv/",
".tox/",
"dist/",
"build/",
".eggs/",
},
"patterns": {
r"your_[a-z_]+_here",
r"example_[a-z_]+",
r"dummy_[a-z_]+",
r"test_[a-z_]+",
r"fake_[a-z_]+",
r"password\s*[=:]\s*['\"]?(changeme|password|123456|admin)['\"]?",
r"#.*(?:example|placeholder|sample)",
r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@localhost",
r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@127\.0\.0\.1",
},
}
# Markers for inline exclusions
EXCLUSION_MARKERS = [
"# pragma: allowlist secret",
"# noqa: secret",
"// pragma: allowlist secret",
"/* pragma: allowlist secret */",
"# secret-detection:ignore",
]
def should_exclude_file(file_path: str) -> bool:
"""Check if file should be excluded from scanning."""
path = Path(file_path)
if path.name in EXCLUSIONS["files"]:
return True
if path.suffix.lower() in EXCLUSIONS["extensions"]:
return True
for excluded_path in EXCLUSIONS["paths"]:
if excluded_path in str(path):
return True
return False
def has_exclusion_marker(line: str) -> bool:
"""Check if line has an exclusion marker."""
return any(marker in line for marker in EXCLUSION_MARKERS)
def is_excluded_match(line: str, match_str: str) -> bool:
"""Check if the match should be excluded."""
for pattern in EXCLUSIONS["patterns"]:
if re.search(pattern, line, re.IGNORECASE):
return True
if re.search(r"['\"](fake|test|dummy|example|placeholder|changeme)['\"]", line, re.IGNORECASE):
return True
return False
def scan_file(file_path: str) -> List[Tuple[int, str, str, str]]:
"""Scan a single file for secrets.
Returns list of tuples: (line_number, line_content, pattern_name, description)
"""
findings = []
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
lines = f.readlines()
except (IOError, OSError) as e:
print(f"Warning: Could not read {file_path}: {e}", file=sys.stderr)
return findings
for line_num, line in enumerate(lines, 1):
if has_exclusion_marker(line):
continue
for pattern_name, pattern_info in SECRET_PATTERNS.items():
matches = re.finditer(pattern_info["pattern"], line)
for match in matches:
match_str = match.group(0)
if is_excluded_match(line, match_str):
continue
findings.append(
(line_num, line.strip(), pattern_name, pattern_info["description"])
)
return findings
def scan_files(file_paths: List[str]) -> dict:
"""Scan multiple files for secrets.
Returns dict: {file_path: [(line_num, line, pattern, description), ...]}
"""
results = {}
for file_path in file_paths:
if should_exclude_file(file_path):
continue
findings = scan_file(file_path)
if findings:
results[file_path] = findings
return results
def print_findings(results: dict) -> None:
"""Print secret findings in a readable format."""
if not results:
return
print("=" * 80)
print("POTENTIAL SECRETS DETECTED!")
print("=" * 80)
print()
total_findings = 0
for file_path, findings in results.items():
print(f"\nFILE: {file_path}")
print("-" * 40)
for line_num, line, pattern_name, description in findings:
total_findings += 1
print(f" Line {line_num}: {description}")
print(f" Pattern: {pattern_name}")
print(f" Content: {line[:100]}{'...' if len(line) > 100 else ''}")
print()
print("=" * 80)
print(f"Total findings: {total_findings}")
print("=" * 80)
print()
print("To fix this:")
print(" 1. Remove the secret from the file")
print(" 2. Use environment variables or a secrets manager")
print(" 3. If this is a false positive, add an exclusion marker:")
print(" - Add '# pragma: allowlist secret' to the end of the line")
print(" - Or add '# secret-detection:ignore' to the end of the line")
print()
def main() -> int:
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Detect secrets in files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s file1.py file2.yaml
%(prog)s --exclude "*.md" src/
Exit codes:
0 - No secrets found
1 - Secrets detected
2 - Error
""",
)
parser.add_argument(
"files",
nargs="+",
help="Files to scan",
)
parser.add_argument(
"--exclude",
action="append",
default=[],
help="Additional file patterns to exclude",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Print verbose output",
)
args = parser.parse_args()
files_to_scan = []
for file_path in args.files:
if should_exclude_file(file_path):
if args.verbose:
print(f"Skipping excluded file: {file_path}")
continue
files_to_scan.append(file_path)
if args.verbose:
print(f"Scanning {len(files_to_scan)} files...")
results = scan_files(files_to_scan)
if results:
print_findings(results)
return 1
if args.verbose:
print("No secrets detected!")
return 0
if __name__ == "__main__":
sys.exit(main())