Some checks failed
Notebook CI / notebook-smoke (pull_request) Failing after 2s
- gitea_client.py — reusable Gitea API client for issues, PRs, comments - health.py — fleet health monitor (load, disk, memory, processes) - notebook_runner.py — Papermill wrapper with JSON reporting - smoke_test.py — fast smoke tests and bare green-path e2e - secret_scan.py — secret leak scanner for CI gating - wizard_env.py — environment validator for bootstrapping agents - README.md — usage guide for all tools These tools are designed to be used by any wizard via python -m devkit.<tool>. Rising up as a platform, not a silo.
109 lines
3.3 KiB
Python
109 lines
3.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Fast secret leak scanner for the repository.
|
|
Checks for common patterns that should never be committed.
|
|
|
|
Usage as CLI:
|
|
python -m devkit.secret_scan
|
|
python -m devkit.secret_scan --path /some/repo --fail-on-find
|
|
|
|
Usage as module:
|
|
from devkit.secret_scan import scan
|
|
findings = scan("/path/to/repo")
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
import re
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List
|
|
|
|
# Patterns to flag
|
|
PATTERNS = {
|
|
"aws_access_key_id": re.compile(r"AKIA[0-9A-Z]{16}"),
|
|
"aws_secret_key": re.compile(r"['\"\s][0-9a-zA-Z/+]{40}['\"\s]"),
|
|
"generic_api_key": re.compile(r"api[_-]?key\s*[:=]\s*['\"][a-zA-Z0-9_\-]{20,}['\"]", re.IGNORECASE),
|
|
"private_key": re.compile(r"-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"),
|
|
"github_token": re.compile(r"gh[pousr]_[A-Za-z0-9_]{36,}"),
|
|
"gitea_token": re.compile(r"[0-9a-f]{40}"), # heuristic for long hex strings after "token"
|
|
"telegram_bot_token": re.compile(r"[0-9]{9,}:[A-Za-z0-9_-]{35,}"),
|
|
}
|
|
|
|
# Files and paths to skip
|
|
SKIP_PATHS = [
|
|
".git",
|
|
"__pycache__",
|
|
".pytest_cache",
|
|
"node_modules",
|
|
"venv",
|
|
".env",
|
|
".agent-skills",
|
|
]
|
|
|
|
# Max file size to scan (bytes)
|
|
MAX_FILE_SIZE = 1024 * 1024
|
|
|
|
|
|
def _should_skip(path: Path) -> bool:
|
|
for skip in SKIP_PATHS:
|
|
if skip in path.parts:
|
|
return True
|
|
return False
|
|
|
|
|
|
def scan(root: str = ".") -> List[Dict[str, Any]]:
|
|
root_path = Path(root).resolve()
|
|
findings = []
|
|
for file_path in root_path.rglob("*"):
|
|
if not file_path.is_file():
|
|
continue
|
|
if _should_skip(file_path):
|
|
continue
|
|
if file_path.stat().st_size > MAX_FILE_SIZE:
|
|
continue
|
|
try:
|
|
text = file_path.read_text(encoding="utf-8", errors="ignore")
|
|
except Exception:
|
|
continue
|
|
for pattern_name, pattern in PATTERNS.items():
|
|
for match in pattern.finditer(text):
|
|
# Simple context: line around match
|
|
start = max(0, match.start() - 40)
|
|
end = min(len(text), match.end() + 40)
|
|
context = text[start:end].replace("\n", " ")
|
|
findings.append({
|
|
"file": str(file_path.relative_to(root_path)),
|
|
"pattern": pattern_name,
|
|
"line": text[:match.start()].count("\n") + 1,
|
|
"context": context,
|
|
})
|
|
return findings
|
|
|
|
|
|
def main(argv: List[str] = None) -> int:
|
|
argv = argv or sys.argv[1:]
|
|
parser = argparse.ArgumentParser(description="Secret leak scanner")
|
|
parser.add_argument("--path", default=".", help="Repository root to scan")
|
|
parser.add_argument("--fail-on-find", action="store_true", help="Exit non-zero if secrets found")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
args = parser.parse_args(argv)
|
|
|
|
findings = scan(args.path)
|
|
if args.json:
|
|
print(json.dumps({"findings": findings, "count": len(findings)}, indent=2))
|
|
else:
|
|
print(f"Scanned {args.path}")
|
|
print(f"Findings: {len(findings)}")
|
|
for f in findings:
|
|
print(f" [{f['pattern']}] {f['file']}:{f['line']} -> ...{f['context']}...")
|
|
|
|
if args.fail_on_find and findings:
|
|
return 1
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|