diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml new file mode 100644 index 0000000..36dea3f --- /dev/null +++ b/.pre-commit-hooks.yaml @@ -0,0 +1,42 @@ +# Pre-commit hooks configuration for timmy-home +# See https://pre-commit.com for more information + +repos: + # Standard pre-commit hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + exclude: '\.(md|txt)$' + - id: end-of-file-fixer + exclude: '\.(md|txt)$' + - id: check-yaml + - id: check-json + - id: check-added-large-files + args: ['--maxkb=5000'] + - id: check-merge-conflict + - id: check-symlinks + - id: detect-private-key + + # Secret detection - custom local hook + - repo: local + hooks: + - id: detect-secrets + name: Detect Secrets + description: Scan for API keys, tokens, and other secrets + entry: python3 scripts/detect_secrets.py + language: python + types: [text] + exclude: + '(?x)^( + .*\.md$| + .*\.svg$| + .*\.lock$| + .*-lock\..*$| + \.gitignore$| + \.secrets\.baseline$| + tests/test_secret_detection\.py$ + )' + pass_filenames: true + require_serial: false + verbose: true diff --git a/README.md b/README.md new file mode 100644 index 0000000..0db9d13 --- /dev/null +++ b/README.md @@ -0,0 +1,132 @@ +# Timmy Home + +Timmy Foundation's home repository for development operations and configurations. + +## Security + +### Pre-commit Hook for Secret Detection + +This repository includes a pre-commit hook that automatically scans for secrets (API keys, tokens, passwords) before allowing commits. + +#### Setup + +Install pre-commit hooks: + +```bash +pip install pre-commit +pre-commit install +``` + +#### What Gets Scanned + +The hook detects: +- **API Keys**: OpenAI (`sk-*`), Anthropic (`sk-ant-*`), AWS, Stripe +- **Private Keys**: RSA, DSA, EC, OpenSSH private keys +- **Tokens**: GitHub (`ghp_*`), Gitea, Slack, Telegram, JWT, Bearer tokens +- **Database URLs**: Connection strings with embedded credentials +- **Passwords**: Hardcoded passwords in configuration files + +#### How It Works + +Before each commit, the hook: +1. Scans all staged text files +2. Checks against patterns for common secret formats +3. Reports any potential secrets found +4. Blocks the commit if secrets are detected + +#### Handling False Positives + +If the hook flags something that is not actually a secret (e.g., test fixtures, placeholder values), you can: + +**Option 1: Add an exclusion marker to the line** + +```python +# Add one of these markers to the end of the line: +api_key = "sk-test123" # pragma: allowlist secret +api_key = "sk-test123" # noqa: secret +api_key = "sk-test123" # secret-detection:ignore +``` + +**Option 2: Use placeholder values (auto-excluded)** + +These patterns are automatically excluded: +- `changeme`, `password`, `123456`, `admin` (common defaults) +- Values containing `fake_`, `test_`, `dummy_`, `example_`, `placeholder_` +- URLs with `localhost` or `127.0.0.1` + +**Option 3: Skip the hook (emergency only)** + +```bash +git commit --no-verify # Bypasses all pre-commit hooks +``` + +⚠️ **Warning**: Only use `--no-verify` if you are certain no real secrets are being committed. + +#### CI/CD Integration + +The secret detection script can also be run in CI/CD: + +```bash +# Scan specific files +python3 scripts/detect_secrets.py file1.py file2.yaml + +# Scan with verbose output +python3 scripts/detect_secrets.py --verbose src/ + +# Run tests +python3 tests/test_secret_detection.py +``` + +#### Excluded Files + +The following are automatically excluded from scanning: +- Markdown files (`.md`) +- Lock files (`package-lock.json`, `poetry.lock`, `yarn.lock`) +- Image and font files +- `node_modules/`, `__pycache__/`, `.git/` + +#### Testing the Detection + +To verify the detection works: + +```bash +# Run the test suite +python3 tests/test_secret_detection.py + +# Test with a specific file +echo "API_KEY=sk-test123456789" > /tmp/test_secret.py +python3 scripts/detect_secrets.py /tmp/test_secret.py +# Should report: OpenAI API key detected +``` + +## Development + +### Running Tests + +```bash +# Run secret detection tests +python3 tests/test_secret_detection.py + +# Run all tests +pytest tests/ +``` + +### Project Structure + +``` +. +├── .pre-commit-hooks.yaml # Pre-commit configuration +├── scripts/ +│ └── detect_secrets.py # Secret detection script +├── tests/ +│ └── test_secret_detection.py # Test cases +└── README.md # This file +``` + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution guidelines. + +## License + +This project is part of the Timmy Foundation. diff --git a/scripts/detect_secrets.py b/scripts/detect_secrets.py new file mode 100755 index 0000000..0db1d78 --- /dev/null +++ b/scripts/detect_secrets.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +""" +Secret leak detection script for pre-commit hooks. + +Detects common secret patterns in staged files: +- API keys (sk-*, pk_*, etc.) +- Private keys (-----BEGIN PRIVATE KEY-----) +- Passwords in config files +- GitHub/Gitea tokens +- Database connection strings with credentials +""" + +import argparse +import re +import sys +from pathlib import Path +from typing import List, Tuple + + +# Secret patterns to detect +SECRET_PATTERNS = { + "openai_api_key": { + "pattern": r"sk-[a-zA-Z0-9]{20,}", + "description": "OpenAI API key", + }, + "anthropic_api_key": { + "pattern": r"sk-ant-[a-zA-Z0-9]{32,}", + "description": "Anthropic API key", + }, + "generic_api_key": { + "pattern": r"(?i)(api[_-]?key|apikey)\s*[:=]\s*['\"]?([a-zA-Z0-9_\-]{16,})['\"]?", + "description": "Generic API key", + }, + "private_key": { + "pattern": r"-----BEGIN (RSA |DSA |EC |OPENSSH )?PRIVATE KEY-----", + "description": "Private key", + }, + "github_token": { + "pattern": r"gh[pousr]_[A-Za-z0-9_]{36,}", + "description": "GitHub token", + }, + "gitea_token": { + "pattern": r"gitea_[a-f0-9]{40}", + "description": "Gitea token", + }, + "aws_access_key": { + "pattern": r"AKIA[0-9A-Z]{16}", + "description": "AWS Access Key ID", + }, + "aws_secret_key": { + "pattern": r"(?i)aws[_-]?secret[_-]?(access)?[_-]?key\s*[:=]\s*['\"]?([a-zA-Z0-9/+=]{40})['\"]?", + "description": "AWS Secret Access Key", + }, + "database_connection_string": { + "pattern": r"(?i)(mongodb|mysql|postgresql|postgres|redis)://[^:]+:[^@]+@[^/]+", + "description": "Database connection string with credentials", + }, + "password_in_config": { + "pattern": r"(?i)(password|passwd|pwd)\s*[:=]\s*['\"]([^'\"]{4,})['\"]", + "description": "Hardcoded password", + }, + "stripe_key": { + "pattern": r"sk_(live|test)_[0-9a-zA-Z]{24,}", + "description": "Stripe API key", + }, + "slack_token": { + "pattern": r"xox[baprs]-[0-9a-zA-Z]{10,}", + "description": "Slack token", + }, + "telegram_bot_token": { + "pattern": r"[0-9]{8,10}:[a-zA-Z0-9_-]{35}", + "description": "Telegram bot token", + }, + "jwt_token": { + "pattern": r"eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*", + "description": "JWT token", + }, + "bearer_token": { + "pattern": r"(?i)bearer\s+[a-zA-Z0-9_\-\.=]{20,}", + "description": "Bearer token", + }, +} + +# Files/patterns to exclude from scanning +EXCLUSIONS = { + "files": { + ".pre-commit-hooks.yaml", + ".gitignore", + "poetry.lock", + "package-lock.json", + "yarn.lock", + "Pipfile.lock", + ".secrets.baseline", + }, + "extensions": { + ".md", + ".svg", + ".png", + ".jpg", + ".jpeg", + ".gif", + ".ico", + ".woff", + ".woff2", + ".ttf", + ".eot", + }, + "paths": { + ".git/", + "node_modules/", + "__pycache__/", + ".pytest_cache/", + ".mypy_cache/", + ".venv/", + "venv/", + ".tox/", + "dist/", + "build/", + ".eggs/", + }, + "patterns": { + r"your_[a-z_]+_here", + r"example_[a-z_]+", + r"dummy_[a-z_]+", + r"test_[a-z_]+", + r"fake_[a-z_]+", + r"password\s*[=:]\s*['\"]?(changeme|password|123456|admin)['\"]?", + r"#.*(?:example|placeholder|sample)", + r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@localhost", + r"(mongodb|mysql|postgresql)://[^:]+:[^@]+@127\.0\.0\.1", + }, +} + +# Markers for inline exclusions +EXCLUSION_MARKERS = [ + "# pragma: allowlist secret", + "# noqa: secret", + "// pragma: allowlist secret", + "/* pragma: allowlist secret */", + "# secret-detection:ignore", +] + + +def should_exclude_file(file_path: str) -> bool: + """Check if file should be excluded from scanning.""" + path = Path(file_path) + + if path.name in EXCLUSIONS["files"]: + return True + + if path.suffix.lower() in EXCLUSIONS["extensions"]: + return True + + for excluded_path in EXCLUSIONS["paths"]: + if excluded_path in str(path): + return True + + return False + + +def has_exclusion_marker(line: str) -> bool: + """Check if line has an exclusion marker.""" + return any(marker in line for marker in EXCLUSION_MARKERS) + + +def is_excluded_match(line: str, match_str: str) -> bool: + """Check if the match should be excluded.""" + for pattern in EXCLUSIONS["patterns"]: + if re.search(pattern, line, re.IGNORECASE): + return True + + if re.search(r"['\"](fake|test|dummy|example|placeholder|changeme)['\"]", line, re.IGNORECASE): + return True + + return False + + +def scan_file(file_path: str) -> List[Tuple[int, str, str, str]]: + """Scan a single file for secrets. + + Returns list of tuples: (line_number, line_content, pattern_name, description) + """ + findings = [] + + try: + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + lines = f.readlines() + except (IOError, OSError) as e: + print(f"Warning: Could not read {file_path}: {e}", file=sys.stderr) + return findings + + for line_num, line in enumerate(lines, 1): + if has_exclusion_marker(line): + continue + + for pattern_name, pattern_info in SECRET_PATTERNS.items(): + matches = re.finditer(pattern_info["pattern"], line) + for match in matches: + match_str = match.group(0) + + if is_excluded_match(line, match_str): + continue + + findings.append( + (line_num, line.strip(), pattern_name, pattern_info["description"]) + ) + + return findings + + +def scan_files(file_paths: List[str]) -> dict: + """Scan multiple files for secrets. + + Returns dict: {file_path: [(line_num, line, pattern, description), ...]} + """ + results = {} + + for file_path in file_paths: + if should_exclude_file(file_path): + continue + + findings = scan_file(file_path) + if findings: + results[file_path] = findings + + return results + + +def print_findings(results: dict) -> None: + """Print secret findings in a readable format.""" + if not results: + return + + print("=" * 80) + print("POTENTIAL SECRETS DETECTED!") + print("=" * 80) + print() + + total_findings = 0 + for file_path, findings in results.items(): + print(f"\nFILE: {file_path}") + print("-" * 40) + for line_num, line, pattern_name, description in findings: + total_findings += 1 + print(f" Line {line_num}: {description}") + print(f" Pattern: {pattern_name}") + print(f" Content: {line[:100]}{'...' if len(line) > 100 else ''}") + print() + + print("=" * 80) + print(f"Total findings: {total_findings}") + print("=" * 80) + print() + print("To fix this:") + print(" 1. Remove the secret from the file") + print(" 2. Use environment variables or a secrets manager") + print(" 3. If this is a false positive, add an exclusion marker:") + print(" - Add '# pragma: allowlist secret' to the end of the line") + print(" - Or add '# secret-detection:ignore' to the end of the line") + print() + + +def main() -> int: + """Main entry point.""" + parser = argparse.ArgumentParser( + description="Detect secrets in files", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + %(prog)s file1.py file2.yaml + %(prog)s --exclude "*.md" src/ + +Exit codes: + 0 - No secrets found + 1 - Secrets detected + 2 - Error + """, + ) + parser.add_argument( + "files", + nargs="+", + help="Files to scan", + ) + parser.add_argument( + "--exclude", + action="append", + default=[], + help="Additional file patterns to exclude", + ) + parser.add_argument( + "--verbose", + "-v", + action="store_true", + help="Print verbose output", + ) + + args = parser.parse_args() + + files_to_scan = [] + for file_path in args.files: + if should_exclude_file(file_path): + if args.verbose: + print(f"Skipping excluded file: {file_path}") + continue + files_to_scan.append(file_path) + + if args.verbose: + print(f"Scanning {len(files_to_scan)} files...") + + results = scan_files(files_to_scan) + + if results: + print_findings(results) + return 1 + + if args.verbose: + print("No secrets detected!") + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/test_secret_detection.py b/tests/test_secret_detection.py new file mode 100644 index 0000000..1911355 --- /dev/null +++ b/tests/test_secret_detection.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +Test cases for secret detection script. + +These tests verify that the detect_secrets.py script correctly: +1. Detects actual secrets +2. Ignores false positives +3. Respects exclusion markers +""" + +import os +import sys +import tempfile +import unittest +from pathlib import Path + +# Add scripts directory to path +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "scripts")) + +from detect_secrets import ( + scan_file, + scan_files, + should_exclude_file, + has_exclusion_marker, + is_excluded_match, + SECRET_PATTERNS, +) + + +class TestSecretDetection(unittest.TestCase): + """Test cases for secret detection.""" + + def setUp(self): + """Set up test fixtures.""" + self.test_dir = tempfile.mkdtemp() + + def tearDown(self): + """Clean up test fixtures.""" + import shutil + shutil.rmtree(self.test_dir, ignore_errors=True) + + def _create_test_file(self, content: str, filename: str = "test.txt") -> str: + """Create a test file with given content.""" + file_path = os.path.join(self.test_dir, filename) + with open(file_path, "w") as f: + f.write(content) + return file_path + + def test_detect_openai_api_key(self): + """Test detection of OpenAI API keys.""" + content = "api_key = 'sk-abcdefghijklmnopqrstuvwxyz123456'" + file_path = self._create_test_file(content) + findings = scan_file(file_path) + self.assertTrue(any("openai" in f[2].lower() for f in findings)) + + def test_detect_private_key(self): + """Test detection of private keys.""" + content = "-----BEGIN RSA PRIVATE KEY-----\nMIIEpAIBAAKCAQEA0Z3VS5JJcds3xfn/ygWyF8PbnGy0AHB7MhgwMbRvI0MBZhpF\n-----END RSA PRIVATE KEY-----" + file_path = self._create_test_file(content) + findings = scan_file(file_path) + self.assertTrue(any("private" in f[2].lower() for f in findings)) + + def test_detect_database_connection_string(self): + """Test detection of database connection strings with credentials.""" + content = "DATABASE_URL=mongodb://admin:secretpassword@mongodb.example.com:27017/db" + file_path = self._create_test_file(content) + findings = scan_file(file_path) + self.assertTrue(any("database" in f[2].lower() for f in findings)) + + def test_detect_password_in_config(self): + """Test detection of hardcoded passwords.""" + content = "password = 'mysecretpassword123'" + file_path = self._create_test_file(content) + findings = scan_file(file_path) + self.assertTrue(any("password" in f[2].lower() for f in findings)) + + def test_exclude_placeholder_passwords(self): + """Test that placeholder passwords are excluded.""" + content = "password = 'changeme'" + file_path = self._create_test_file(content) + findings = scan_file(file_path) + self.assertEqual(len(findings), 0) + + def test_exclude_localhost_database_url(self): + """Test that localhost database URLs are excluded.""" + content = "DATABASE_URL=mongodb://admin:secret@localhost:27017/db" + file_path = self._create_test_file(content) + findings = scan_file(file_path) + self.assertEqual(len(findings), 0) + + def test_pragma_allowlist_secret(self): + """Test '# pragma: allowlist secret' marker.""" + content = "api_key = 'sk-abcdefghijklmnopqrstuvwxyz123456' # pragma: allowlist secret" + file_path = self._create_test_file(content) + findings = scan_file(file_path) + self.assertEqual(len(findings), 0) + + def test_empty_file(self): + """Test scanning empty file.""" + file_path = self._create_test_file("") + findings = scan_file(file_path) + self.assertEqual(len(findings), 0) + + +if __name__ == "__main__": + unittest.main(verbosity=2)