324 lines
8.5 KiB
Python
324 lines
8.5 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Secret leak detection script for pre-commit hooks.
|
||
|
|
|
||
|
|
Detects common secret patterns in staged files:
|
||
|
|
- API keys (sk-*, pk_*, etc.)
|
||
|
|
- Private keys (-----BEGIN PRIVATE KEY-----)
|
||
|
|
- Passwords in config files
|
||
|
|
- GitHub/Gitea tokens
|
||
|
|
- Database connection strings with credentials
|
||
|
|
"""
|
||
|
|
|
||
|
|
import argparse
|
||
|
|
import re
|
||
|
|
import sys
|
||
|
|
from pathlib import Path
|
||
|
|
from typing import List, Tuple
|
||
|
|
|
||
|
|
|
||
|
|
# Secret patterns to detect
|
||
|
|
SECRET_PATTERNS = {
|
||
|
|
"openai_api_key": {
|
||
|
|
"pattern": r"sk-[a-zA-Z0-9]{20,}",
|
||
|
|
"description": "OpenAI API key",
|
||
|
|
},
|
||
|
|
"anthropic_api_key": {
|
||
|
|
"pattern": r"sk-ant-[a-zA-Z0-9]{32,}",
|
||
|
|
"description": "Anthropic API key",
|
||
|
|
},
|
||
|
|
"generic_api_key": {
|
||
|
|
"pattern": r"(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{16,})['\"]?",
|
||
|
|
"description": "Generic API key",
|
||
|
|
},
|
||
|
|
"private_key": {
|
||
|
|
"pattern": r"-----BEGIN (RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----",
|
||
|
|
"description": "Private key",
|
||
|
|
},
|
||
|
|
"github_token": {
|
||
|
|
"pattern": r"gh[pousr]_[A-Za-z0-9_]{36,}",
|
||
|
|
"description": "GitHub token",
|
||
|
|
},
|
||
|
|
"gitea_token": {
|
||
|
|
"pattern": r"gitea_[a-f0-9]{40}",
|
||
|
|
"description": "Gitea token",
|
||
|
|
},
|
||
|
|
"aws_access_key": {
|
||
|
|
"pattern": r"AKIA[0-9A-Z]{16}",
|
||
|
|
"description": "AWS Access Key ID",
|
||
|
|
},
|
||
|
|
"aws_secret_key": {
|
||
|
|
"pattern": r"(?i)aws[_-]?secret[_-]?(access)?[_-]?key\s*[:=]\s*['\"]?([a-zA-Z0-9/+=]{40})['\"]?",
|
||
|
|
"description": "AWS Secret Access Key",
|
||
|
|
},
|
||
|
|
"database_connection_string": {
|
||
|
|
"pattern": r"(?i)(mongodb|mysql|postgresql|postgres|redis)://[^:]+:[^@]+@[^/]+",
|
||
|
|
"description": "Database connection string with credentials",
|
||
|
|
},
|
||
|
|
"password_in_config": {
|
||
|
|
"pattern": r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]",
|
||
|
|
"description": "Hardcoded password",
|
||
|
|
},
|
||
|
|
"stripe_key": {
|
||
|
|
"pattern": r"sk_(live|test)_[0-9a-zA-Z]{24,}",
|
||
|
|
"description": "Stripe API key",
|
||
|
|
},
|
||
|
|
"slack_token": {
|
||
|
|
"pattern": r"xox[baprs]-[0-9a-zA-Z]{10,}",
|
||
|
|
"description": "Slack token",
|
||
|
|
},
|
||
|
|
"telegram_bot_token": {
|
||
|
|
"pattern": r"[0-9]{8,10}:[a-zA-Z0-9_-]{35}",
|
||
|
|
"description": "Telegram bot token",
|
||
|
|
},
|
||
|
|
"jwt_token": {
|
||
|
|
"pattern": r"eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*",
|
||
|
|
"description": "JWT token",
|
||
|
|
},
|
||
|
|
"bearer_token": {
|
||
|
|
"pattern": r"(?i)bearer\s+[a-zA-Z0-9_\-\.=]{20,}",
|
||
|
|
"description": "Bearer token",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
# Files/patterns to exclude from scanning
|
||
|
|
EXCLUSIONS = {
|
||
|
|
"files": {
|
||
|
|
".pre-commit-hooks.yaml",
|
||
|
|
".gitignore",
|
||
|
|
"poetry.lock",
|
||
|
|
"package-lock.json",
|
||
|
|
"yarn.lock",
|
||
|
|
"Pipfile.lock",
|
||
|
|
".secrets.baseline",
|
||
|
|
},
|
||
|
|
"extensions": {
|
||
|
|
".md",
|
||
|
|
".svg",
|
||
|
|
".png",
|
||
|
|
".jpg",
|
||
|
|
".jpeg",
|
||
|
|
".gif",
|
||
|
|
".ico",
|
||
|
|
".woff",
|
||
|
|
".woff2",
|
||
|
|
".ttf",
|
||
|
|
".eot",
|
||
|
|
},
|
||
|
|
"paths": {
|
||
|
|
".git/",
|
||
|
|
"node_modules/",
|
||
|
|
"__pycache__/",
|
||
|
|
".pytest_cache/",
|
||
|
|
".mypy_cache/",
|
||
|
|
".venv/",
|
||
|
|
"venv/",
|
||
|
|
".tox/",
|
||
|
|
"dist/",
|
||
|
|
"build/",
|
||
|
|
".eggs/",
|
||
|
|
},
|
||
|
|
"patterns": {
|
||
|
|
r"your_[a-z_]+_here",
|
||
|
|
r"example_[a-z_]+",
|
||
|
|
r"dummy_[a-z_]+",
|
||
|
|
r"test_[a-z_]+",
|
||
|
|
r"fake_[a-z_]+",
|
||
|
|
r"password\s*[=:]\s*['\"]?(changeme|password|123456|admin)['\"]?",
|
||
|
|
r"#.*(?:example|placeholder|sample)",
|
||
|
|
r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@localhost",
|
||
|
|
r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@127\.0\.0\.1",
|
||
|
|
},
|
||
|
|
}
|
||
|
|
|
||
|
|
# Markers for inline exclusions
|
||
|
|
EXCLUSION_MARKERS = [
|
||
|
|
"# pragma: allowlist secret",
|
||
|
|
"# noqa: secret",
|
||
|
|
"// pragma: allowlist secret",
|
||
|
|
"/* pragma: allowlist secret */",
|
||
|
|
"# secret-detection:ignore",
|
||
|
|
]
|
||
|
|
|
||
|
|
|
||
|
|
def should_exclude_file(file_path: str) -> bool:
|
||
|
|
"""Check if file should be excluded from scanning."""
|
||
|
|
path = Path(file_path)
|
||
|
|
|
||
|
|
if path.name in EXCLUSIONS["files"]:
|
||
|
|
return True
|
||
|
|
|
||
|
|
if path.suffix.lower() in EXCLUSIONS["extensions"]:
|
||
|
|
return True
|
||
|
|
|
||
|
|
for excluded_path in EXCLUSIONS["paths"]:
|
||
|
|
if excluded_path in str(path):
|
||
|
|
return True
|
||
|
|
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def has_exclusion_marker(line: str) -> bool:
|
||
|
|
"""Check if line has an exclusion marker."""
|
||
|
|
return any(marker in line for marker in EXCLUSION_MARKERS)
|
||
|
|
|
||
|
|
|
||
|
|
def is_excluded_match(line: str, match_str: str) -> bool:
|
||
|
|
"""Check if the match should be excluded."""
|
||
|
|
for pattern in EXCLUSIONS["patterns"]:
|
||
|
|
if re.search(pattern, line, re.IGNORECASE):
|
||
|
|
return True
|
||
|
|
|
||
|
|
if re.search(r"['\"](fake|test|dummy|example|placeholder|changeme)['\"]", line, re.IGNORECASE):
|
||
|
|
return True
|
||
|
|
|
||
|
|
return False
|
||
|
|
|
||
|
|
|
||
|
|
def scan_file(file_path: str) -> List[Tuple[int, str, str, str]]:
|
||
|
|
"""Scan a single file for secrets.
|
||
|
|
|
||
|
|
Returns list of tuples: (line_number, line_content, pattern_name, description)
|
||
|
|
"""
|
||
|
|
findings = []
|
||
|
|
|
||
|
|
try:
|
||
|
|
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
|
||
|
|
lines = f.readlines()
|
||
|
|
except (IOError, OSError) as e:
|
||
|
|
print(f"Warning: Could not read {file_path}: {e}", file=sys.stderr)
|
||
|
|
return findings
|
||
|
|
|
||
|
|
for line_num, line in enumerate(lines, 1):
|
||
|
|
if has_exclusion_marker(line):
|
||
|
|
continue
|
||
|
|
|
||
|
|
for pattern_name, pattern_info in SECRET_PATTERNS.items():
|
||
|
|
matches = re.finditer(pattern_info["pattern"], line)
|
||
|
|
for match in matches:
|
||
|
|
match_str = match.group(0)
|
||
|
|
|
||
|
|
if is_excluded_match(line, match_str):
|
||
|
|
continue
|
||
|
|
|
||
|
|
findings.append(
|
||
|
|
(line_num, line.strip(), pattern_name, pattern_info["description"])
|
||
|
|
)
|
||
|
|
|
||
|
|
return findings
|
||
|
|
|
||
|
|
|
||
|
|
def scan_files(file_paths: List[str]) -> dict:
|
||
|
|
"""Scan multiple files for secrets.
|
||
|
|
|
||
|
|
Returns dict: {file_path: [(line_num, line, pattern, description), ...]}
|
||
|
|
"""
|
||
|
|
results = {}
|
||
|
|
|
||
|
|
for file_path in file_paths:
|
||
|
|
if should_exclude_file(file_path):
|
||
|
|
continue
|
||
|
|
|
||
|
|
findings = scan_file(file_path)
|
||
|
|
if findings:
|
||
|
|
results[file_path] = findings
|
||
|
|
|
||
|
|
return results
|
||
|
|
|
||
|
|
|
||
|
|
def print_findings(results: dict) -> None:
|
||
|
|
"""Print secret findings in a readable format."""
|
||
|
|
if not results:
|
||
|
|
return
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print("POTENTIAL SECRETS DETECTED!")
|
||
|
|
print("=" * 80)
|
||
|
|
print()
|
||
|
|
|
||
|
|
total_findings = 0
|
||
|
|
for file_path, findings in results.items():
|
||
|
|
print(f"\nFILE: {file_path}")
|
||
|
|
print("-" * 40)
|
||
|
|
for line_num, line, pattern_name, description in findings:
|
||
|
|
total_findings += 1
|
||
|
|
print(f" Line {line_num}: {description}")
|
||
|
|
print(f" Pattern: {pattern_name}")
|
||
|
|
print(f" Content: {line[:100]}{'...' if len(line) > 100 else ''}")
|
||
|
|
print()
|
||
|
|
|
||
|
|
print("=" * 80)
|
||
|
|
print(f"Total findings: {total_findings}")
|
||
|
|
print("=" * 80)
|
||
|
|
print()
|
||
|
|
print("To fix this:")
|
||
|
|
print(" 1. Remove the secret from the file")
|
||
|
|
print(" 2. Use environment variables or a secrets manager")
|
||
|
|
print(" 3. If this is a false positive, add an exclusion marker:")
|
||
|
|
print(" - Add '# pragma: allowlist secret' to the end of the line")
|
||
|
|
print(" - Or add '# secret-detection:ignore' to the end of the line")
|
||
|
|
print()
|
||
|
|
|
||
|
|
|
||
|
|
def main() -> int:
|
||
|
|
"""Main entry point."""
|
||
|
|
parser = argparse.ArgumentParser(
|
||
|
|
description="Detect secrets in files",
|
||
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||
|
|
epilog="""
|
||
|
|
Examples:
|
||
|
|
%(prog)s file1.py file2.yaml
|
||
|
|
%(prog)s --exclude "*.md" src/
|
||
|
|
|
||
|
|
Exit codes:
|
||
|
|
0 - No secrets found
|
||
|
|
1 - Secrets detected
|
||
|
|
2 - Error
|
||
|
|
""",
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"files",
|
||
|
|
nargs="+",
|
||
|
|
help="Files to scan",
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--exclude",
|
||
|
|
action="append",
|
||
|
|
default=[],
|
||
|
|
help="Additional file patterns to exclude",
|
||
|
|
)
|
||
|
|
parser.add_argument(
|
||
|
|
"--verbose",
|
||
|
|
"-v",
|
||
|
|
action="store_true",
|
||
|
|
help="Print verbose output",
|
||
|
|
)
|
||
|
|
|
||
|
|
args = parser.parse_args()
|
||
|
|
|
||
|
|
files_to_scan = []
|
||
|
|
for file_path in args.files:
|
||
|
|
if should_exclude_file(file_path):
|
||
|
|
if args.verbose:
|
||
|
|
print(f"Skipping excluded file: {file_path}")
|
||
|
|
continue
|
||
|
|
files_to_scan.append(file_path)
|
||
|
|
|
||
|
|
if args.verbose:
|
||
|
|
print(f"Scanning {len(files_to_scan)} files...")
|
||
|
|
|
||
|
|
results = scan_files(files_to_scan)
|
||
|
|
|
||
|
|
if results:
|
||
|
|
print_findings(results)
|
||
|
|
return 1
|
||
|
|
|
||
|
|
if args.verbose:
|
||
|
|
print("No secrets detected!")
|
||
|
|
|
||
|
|
return 0
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
sys.exit(main())
|