#!/usr/bin/env python3 """ Fast secret leak scanner for the repository. Checks for common patterns that should never be committed. Usage as CLI: python -m devkit.secret_scan python -m devkit.secret_scan --path /some/repo --fail-on-find Usage as module: from devkit.secret_scan import scan findings = scan("/path/to/repo") """ import argparse import json import os import re import sys from pathlib import Path from typing import Any, Dict, List # Patterns to flag PATTERNS = { "aws_access_key_id": re.compile(r"AKIA[0-9A-Z]{16}"), "aws_secret_key": re.compile(r"['\"\s][0-9a-zA-Z/+]{40}['\"\s]"), "generic_api_key": re.compile(r"api[_-]?key\s*[:=]\s*['\"][a-zA-Z0-9_\-]{20,}['\"]", re.IGNORECASE), "private_key": re.compile(r"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"), "github_token": re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}"), "gitea_token": re.compile(r"[0-9a-f]{40}"), # heuristic for long hex strings after "token" "telegram_bot_token": re.compile(r"[0-9]{9,}:[A-Za-z0-9_-]{35,}"), } # Files and paths to skip SKIP_PATHS = [ ".git", "__pycache__", ".pytest_cache", "node_modules", "venv", ".env", ".agent-skills", ] # Max file size to scan (bytes) MAX_FILE_SIZE = 1024 * 1024 def _should_skip(path: Path) -> bool: for skip in SKIP_PATHS: if skip in path.parts: return True return False def scan(root: str = ".") -> List[Dict[str, Any]]: root_path = Path(root).resolve() findings = [] for file_path in root_path.rglob("*"): if not file_path.is_file(): continue if _should_skip(file_path): continue if file_path.stat().st_size > MAX_FILE_SIZE: continue try: text = file_path.read_text(encoding="utf-8", errors="ignore") except Exception: continue for pattern_name, pattern in PATTERNS.items(): for match in pattern.finditer(text): # Simple context: line around match start = max(0, match.start() - 40) end = min(len(text), match.end() + 40) context = text[start:end].replace("\n", " ") findings.append({ "file": str(file_path.relative_to(root_path)), "pattern": pattern_name, "line": text[:match.start()].count("\n") + 1, "context": context, }) return findings def main(argv: List[str] = None) -> int: argv = argv or sys.argv[1:] parser = argparse.ArgumentParser(description="Secret leak scanner") parser.add_argument("--path", default=".", help="Repository root to scan") parser.add_argument("--fail-on-find", action="store_true", help="Exit non-zero if secrets found") parser.add_argument("--json", action="store_true", help="Output as JSON") args = parser.parse_args(argv) findings = scan(args.path) if args.json: print(json.dumps({"findings": findings, "count": len(findings)}, indent=2)) else: print(f"Scanned {args.path}") print(f"Findings: {len(findings)}") for f in findings: print(f" [{f['pattern']}] {f['file']}:{f['line']} -> ...{f['context']}...") if args.fail_on_find and findings: return 1 return 0 if __name__ == "__main__": sys.exit(main())