security: add pre-commit hook for secret leak detection (#384)

This commit is contained in:
Timmy Bot
2026-04-05 00:27:00 +00:00
parent d5c357df76
commit 5ace1e69ce
4 changed files with 603 additions and 0 deletions

42
.pre-commit-hooks.yaml Normal file
View File

@@ -0,0 +1,42 @@
# Pre-commit hooks configuration for timmy-home
# See https://pre-commit.com for more information
repos:
# Standard pre-commit hooks
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
hooks:
- id: trailing-whitespace
exclude: '\.(md|txt)$'
- id: end-of-file-fixer
exclude: '\.(md|txt)$'
- id: check-yaml
- id: check-json
- id: check-added-large-files
args: ['--maxkb=5000']
- id: check-merge-conflict
- id: check-symlinks
- id: detect-private-key
# Secret detection - custom local hook
- repo: local
hooks:
- id: detect-secrets
name: Detect Secrets
description: Scan for API keys, tokens, and other secrets
entry: python3 scripts/detect_secrets.py
language: python
types: [text]
exclude:
'(?x)^(
.*\.md$|
.*\.svg$|
.*\.lock$|
.*-lock\..*$|
\.gitignore$|
\.secrets\.baseline$|
tests/test_secret_detection\.py$
)'
pass_filenames: true
require_serial: false
verbose: true

132
README.md Normal file
View File

@@ -0,0 +1,132 @@
# Timmy Home
Timmy Foundation's home repository for development operations and configurations.
## Security
### Pre-commit Hook for Secret Detection
This repository includes a pre-commit hook that automatically scans for secrets (API keys, tokens, passwords) before allowing commits.
#### Setup
Install pre-commit hooks:
```bash
pip install pre-commit
pre-commit install
```
#### What Gets Scanned
The hook detects:
- **API Keys**: OpenAI (`sk-*`), Anthropic (`sk-ant-*`), AWS, Stripe
- **Private Keys**: RSA, DSA, EC, OpenSSH private keys
- **Tokens**: GitHub (`ghp_*`), Gitea, Slack, Telegram, JWT, Bearer tokens
- **Database URLs**: Connection strings with embedded credentials
- **Passwords**: Hardcoded passwords in configuration files
#### How It Works
Before each commit, the hook:
1. Scans all staged text files
2. Checks against patterns for common secret formats
3. Reports any potential secrets found
4. Blocks the commit if secrets are detected
#### Handling False Positives
If the hook flags something that is not actually a secret (e.g., test fixtures, placeholder values), you can:
**Option 1: Add an exclusion marker to the line**
```python
# Add one of these markers to the end of the line:
api_key = "sk-test123" # pragma: allowlist secret
api_key = "sk-test123" # noqa: secret
api_key = "sk-test123" # secret-detection:ignore
```
**Option 2: Use placeholder values (auto-excluded)**
These patterns are automatically excluded:
- `changeme`, `password`, `123456`, `admin` (common defaults)
- Values containing `fake_`, `test_`, `dummy_`, `example_`, `placeholder_`
- URLs with `localhost` or `127.0.0.1`
**Option 3: Skip the hook (emergency only)**
```bash
git commit --no-verify # Bypasses all pre-commit hooks
```
⚠️ **Warning**: Only use `--no-verify` if you are certain no real secrets are being committed.
#### CI/CD Integration
The secret detection script can also be run in CI/CD:
```bash
# Scan specific files
python3 scripts/detect_secrets.py file1.py file2.yaml
# Scan with verbose output
python3 scripts/detect_secrets.py --verbose src/
# Run tests
python3 tests/test_secret_detection.py
```
#### Excluded Files
The following are automatically excluded from scanning:
- Markdown files (`.md`)
- Lock files (`package-lock.json`, `poetry.lock`, `yarn.lock`)
- Image and font files
- `node_modules/`, `__pycache__/`, `.git/`
#### Testing the Detection
To verify the detection works:
```bash
# Run the test suite
python3 tests/test_secret_detection.py
# Test with a specific file
echo "API_KEY=sk-test123456789" > /tmp/test_secret.py
python3 scripts/detect_secrets.py /tmp/test_secret.py
# Should report: OpenAI API key detected
```
## Development
### Running Tests
```bash
# Run secret detection tests
python3 tests/test_secret_detection.py
# Run all tests
pytest tests/
```
### Project Structure
```
.
├── .pre-commit-hooks.yaml # Pre-commit configuration
├── scripts/
│ └── detect_secrets.py # Secret detection script
├── tests/
│ └── test_secret_detection.py # Test cases
└── README.md # This file
```
## Contributing
See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution guidelines.
## License
This project is part of the Timmy Foundation.

323
scripts/detect_secrets.py Executable file
View File

@@ -0,0 +1,323 @@
#!/usr/bin/env python3
"""
Secret leak detection script for pre-commit hooks.
Detects common secret patterns in staged files:
- API keys (sk-*, pk_*, etc.)
- Private keys (-----BEGIN PRIVATE KEY-----)
- Passwords in config files
- GitHub/Gitea tokens
- Database connection strings with credentials
"""
import argparse
import re
import sys
from pathlib import Path
from typing import List, Tuple
# Secret patterns to detect
SECRET_PATTERNS = {
"openai_api_key": {
"pattern": r"sk-[a-zA-Z0-9]{20,}",
"description": "OpenAI API key",
},
"anthropic_api_key": {
"pattern": r"sk-ant-[a-zA-Z0-9]{32,}",
"description": "Anthropic API key",
},
"generic_api_key": {
"pattern": r"(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{16,})['\"]?",
"description": "Generic API key",
},
"private_key": {
"pattern": r"-----BEGIN (RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----",
"description": "Private key",
},
"github_token": {
"pattern": r"gh[pousr]_[A-Za-z0-9_]{36,}",
"description": "GitHub token",
},
"gitea_token": {
"pattern": r"gitea_[a-f0-9]{40}",
"description": "Gitea token",
},
"aws_access_key": {
"pattern": r"AKIA[0-9A-Z]{16}",
"description": "AWS Access Key ID",
},
"aws_secret_key": {
"pattern": r"(?i)aws[_-]?secret[_-]?(access)?[_-]?key\s*[:=]\s*['\"]?([a-zA-Z0-9/+=]{40})['\"]?",
"description": "AWS Secret Access Key",
},
"database_connection_string": {
"pattern": r"(?i)(mongodb|mysql|postgresql|postgres|redis)://[^:]+:[^@]+@[^/]+",
"description": "Database connection string with credentials",
},
"password_in_config": {
"pattern": r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]",
"description": "Hardcoded password",
},
"stripe_key": {
"pattern": r"sk_(live|test)_[0-9a-zA-Z]{24,}",
"description": "Stripe API key",
},
"slack_token": {
"pattern": r"xox[baprs]-[0-9a-zA-Z]{10,}",
"description": "Slack token",
},
"telegram_bot_token": {
"pattern": r"[0-9]{8,10}:[a-zA-Z0-9_-]{35}",
"description": "Telegram bot token",
},
"jwt_token": {
"pattern": r"eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*",
"description": "JWT token",
},
"bearer_token": {
"pattern": r"(?i)bearer\s+[a-zA-Z0-9_\-\.=]{20,}",
"description": "Bearer token",
},
}
# Files/patterns to exclude from scanning
EXCLUSIONS = {
"files": {
".pre-commit-hooks.yaml",
".gitignore",
"poetry.lock",
"package-lock.json",
"yarn.lock",
"Pipfile.lock",
".secrets.baseline",
},
"extensions": {
".md",
".svg",
".png",
".jpg",
".jpeg",
".gif",
".ico",
".woff",
".woff2",
".ttf",
".eot",
},
"paths": {
".git/",
"node_modules/",
"__pycache__/",
".pytest_cache/",
".mypy_cache/",
".venv/",
"venv/",
".tox/",
"dist/",
"build/",
".eggs/",
},
"patterns": {
r"your_[a-z_]+_here",
r"example_[a-z_]+",
r"dummy_[a-z_]+",
r"test_[a-z_]+",
r"fake_[a-z_]+",
r"password\s*[=:]\s*['\"]?(changeme|password|123456|admin)['\"]?",
r"#.*(?:example|placeholder|sample)",
r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@localhost",
r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@127\.0\.0\.1",
},
}
# Markers for inline exclusions
EXCLUSION_MARKERS = [
"# pragma: allowlist secret",
"# noqa: secret",
"// pragma: allowlist secret",
"/* pragma: allowlist secret */",
"# secret-detection:ignore",
]
def should_exclude_file(file_path: str) -> bool:
"""Check if file should be excluded from scanning."""
path = Path(file_path)
if path.name in EXCLUSIONS["files"]:
return True
if path.suffix.lower() in EXCLUSIONS["extensions"]:
return True
for excluded_path in EXCLUSIONS["paths"]:
if excluded_path in str(path):
return True
return False
def has_exclusion_marker(line: str) -> bool:
"""Check if line has an exclusion marker."""
return any(marker in line for marker in EXCLUSION_MARKERS)
def is_excluded_match(line: str, match_str: str) -> bool:
"""Check if the match should be excluded."""
for pattern in EXCLUSIONS["patterns"]:
if re.search(pattern, line, re.IGNORECASE):
return True
if re.search(r"['\"](fake|test|dummy|example|placeholder|changeme)['\"]", line, re.IGNORECASE):
return True
return False
def scan_file(file_path: str) -> List[Tuple[int, str, str, str]]:
"""Scan a single file for secrets.
Returns list of tuples: (line_number, line_content, pattern_name, description)
"""
findings = []
try:
with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
lines = f.readlines()
except (IOError, OSError) as e:
print(f"Warning: Could not read {file_path}: {e}", file=sys.stderr)
return findings
for line_num, line in enumerate(lines, 1):
if has_exclusion_marker(line):
continue
for pattern_name, pattern_info in SECRET_PATTERNS.items():
matches = re.finditer(pattern_info["pattern"], line)
for match in matches:
match_str = match.group(0)
if is_excluded_match(line, match_str):
continue
findings.append(
(line_num, line.strip(), pattern_name, pattern_info["description"])
)
return findings
def scan_files(file_paths: List[str]) -> dict:
"""Scan multiple files for secrets.
Returns dict: {file_path: [(line_num, line, pattern, description), ...]}
"""
results = {}
for file_path in file_paths:
if should_exclude_file(file_path):
continue
findings = scan_file(file_path)
if findings:
results[file_path] = findings
return results
def print_findings(results: dict) -> None:
"""Print secret findings in a readable format."""
if not results:
return
print("=" * 80)
print("POTENTIAL SECRETS DETECTED!")
print("=" * 80)
print()
total_findings = 0
for file_path, findings in results.items():
print(f"\nFILE: {file_path}")
print("-" * 40)
for line_num, line, pattern_name, description in findings:
total_findings += 1
print(f" Line {line_num}: {description}")
print(f" Pattern: {pattern_name}")
print(f" Content: {line[:100]}{'...' if len(line) > 100 else ''}")
print()
print("=" * 80)
print(f"Total findings: {total_findings}")
print("=" * 80)
print()
print("To fix this:")
print(" 1. Remove the secret from the file")
print(" 2. Use environment variables or a secrets manager")
print(" 3. If this is a false positive, add an exclusion marker:")
print(" - Add '# pragma: allowlist secret' to the end of the line")
print(" - Or add '# secret-detection:ignore' to the end of the line")
print()
def main() -> int:
"""Main entry point."""
parser = argparse.ArgumentParser(
description="Detect secrets in files",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
%(prog)s file1.py file2.yaml
%(prog)s --exclude "*.md" src/
Exit codes:
0 - No secrets found
1 - Secrets detected
2 - Error
""",
)
parser.add_argument(
"files",
nargs="+",
help="Files to scan",
)
parser.add_argument(
"--exclude",
action="append",
default=[],
help="Additional file patterns to exclude",
)
parser.add_argument(
"--verbose",
"-v",
action="store_true",
help="Print verbose output",
)
args = parser.parse_args()
files_to_scan = []
for file_path in args.files:
if should_exclude_file(file_path):
if args.verbose:
print(f"Skipping excluded file: {file_path}")
continue
files_to_scan.append(file_path)
if args.verbose:
print(f"Scanning {len(files_to_scan)} files...")
results = scan_files(files_to_scan)
if results:
print_findings(results)
return 1
if args.verbose:
print("No secrets detected!")
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,106 @@
#!/usr/bin/env python3
"""
Test cases for secret detection script.
These tests verify that the detect_secrets.py script correctly:
1. Detects actual secrets
2. Ignores false positives
3. Respects exclusion markers
"""
import os
import sys
import tempfile
import unittest
from pathlib import Path
# Add scripts directory to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts"))
from detect_secrets import (
scan_file,
scan_files,
should_exclude_file,
has_exclusion_marker,
is_excluded_match,
SECRET_PATTERNS,
)
class TestSecretDetection(unittest.TestCase):
"""Test cases for secret detection."""
def setUp(self):
"""Set up test fixtures."""
self.test_dir = tempfile.mkdtemp()
def tearDown(self):
"""Clean up test fixtures."""
import shutil
shutil.rmtree(self.test_dir, ignore_errors=True)
def _create_test_file(self, content: str, filename: str = "test.txt") -> str:
"""Create a test file with given content."""
file_path = os.path.join(self.test_dir, filename)
with open(file_path, "w") as f:
f.write(content)
return file_path
def test_detect_openai_api_key(self):
"""Test detection of OpenAI API keys."""
content = "api_key = 'sk-abcdefghijklmnopqrstuvwxyz123456'"
file_path = self._create_test_file(content)
findings = scan_file(file_path)
self.assertTrue(any("openai" in f[2].lower() for f in findings))
def test_detect_private_key(self):
"""Test detection of private keys."""
content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/ygWyF8PbnGy0AHB7MhgwMbRvI0MBZhpF\n-----END RSA PRIVATE KEY-----"
file_path = self._create_test_file(content)
findings = scan_file(file_path)
self.assertTrue(any("private" in f[2].lower() for f in findings))
def test_detect_database_connection_string(self):
"""Test detection of database connection strings with credentials."""
content = "DATABASE_URL=mongodb://admin:secretpassword@mongodb.example.com:27017/db"
file_path = self._create_test_file(content)
findings = scan_file(file_path)
self.assertTrue(any("database" in f[2].lower() for f in findings))
def test_detect_password_in_config(self):
"""Test detection of hardcoded passwords."""
content = "password = 'mysecretpassword123'"
file_path = self._create_test_file(content)
findings = scan_file(file_path)
self.assertTrue(any("password" in f[2].lower() for f in findings))
def test_exclude_placeholder_passwords(self):
"""Test that placeholder passwords are excluded."""
content = "password = 'changeme'"
file_path = self._create_test_file(content)
findings = scan_file(file_path)
self.assertEqual(len(findings), 0)
def test_exclude_localhost_database_url(self):
"""Test that localhost database URLs are excluded."""
content = "DATABASE_URL=mongodb://admin:secret@localhost:27017/db"
file_path = self._create_test_file(content)
findings = scan_file(file_path)
self.assertEqual(len(findings), 0)
def test_pragma_allowlist_secret(self):
"""Test '# pragma: allowlist secret' marker."""
content = "api_key = 'sk-abcdefghijklmnopqrstuvwxyz123456' # pragma: allowlist secret"
file_path = self._create_test_file(content)
findings = scan_file(file_path)
self.assertEqual(len(findings), 0)
def test_empty_file(self):
"""Test scanning empty file."""
file_path = self._create_test_file("")
findings = scan_file(file_path)
self.assertEqual(len(findings), 0)
if __name__ == "__main__":
unittest.main(verbosity=2)