diff --git a/scripts/dependency_bloat_detector.py b/scripts/dependency_bloat_detector.py new file mode 100755 index 0000000..9dcbfca --- /dev/null +++ b/scripts/dependency_bloat_detector.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python3 +""" +Dependency Bloat Detector — find declared packages never imported + +Usage: + python3 scripts/dependency_bloat_detector.py + python3 scripts/dependency_bloat_detector.py --output json +""" + +import ast +import json +import re +import sys +from pathlib import Path +from typing import Set, List, Tuple + + +def extract_imports_from_py_files(repo_path: Path) -> Set[str]: + """Walk the repo and return the set of top-level imported module names.""" + imports = set() + exclude_dirs = {".git", "venv", ".venv", "__pycache__", "node_modules", + "dist", "build", ".tox", "vendor"} + py_files = [ + f for f in repo_path.rglob("*.py") + if not any(part in exclude_dirs for part in f.parts) + ] + for fpath in py_files: + try: + content = fpath.read_text(errors="ignore") + tree = ast.parse(content) + except Exception: + continue + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + top = alias.name.split('.')[0] + imports.add(top) + elif isinstance(node, ast.ImportFrom): + if node.module: + top = node.module.split('.')[0] + imports.add(top) + return imports + + +def parse_requirements_txt(req_path: Path) -> List[Tuple[str, str]]: + """ + Parse requirements.txt and return list of (package_name, raw_line). + Strips version specifiers and ignores comments. + """ + if not req_path.exists(): + return [] + declared = [] + for line in req_path.read_text().splitlines(): + line = line.strip() + if not line or line.startswith('#'): + continue + # Strip inline comments + line = line.split('#')[0].strip() + # Extract package name (before any version specifier) + pkg_match = re.match(r'^([a-zA-Z0-9_-]+)', line) + if pkg_match: + pkg = pkg_match.group(1).strip() + declared.append((pkg, line)) + return declared + + +def main(): + repo_path = Path('.').resolve() + req_path = repo_path / 'requirements.txt' + + # 1. Scan imports + used = extract_imports_from_py_files(repo_path) + + # 2. Parse declared deps + declared = parse_requirements_txt(req_path) + declared_names = [pkg for pkg, _ in declared] + + # 3. Compare + unused = [(raw, pkg) for pkg, raw in declared if pkg not in used] + missing_from_req = [imp for imp in used if imp not in declared_names] + + # 4. Output + print("=" * 60) + print(" DEPENDENCY BLOAT DETECTOR") + print("=" * 60) + print(f" Repository: {repo_path.name}") + print(f" Requirements: {req_path}") + print(f" Python files: {len(list(repo_path.rglob('*.py')))}") + print() + print(f" Declared packages ({len(declared_names)}): {declared_names}") + print(f" Imported packages ({len(used)}): {sorted(used)}") + print() + if unused: + print(" UNUSED DEPENDENCIES (bloat):") + for raw, pkg in unused: + print(f" ✗ {raw}") + else: + print(" No unused dependencies detected.") + print() + if missing_from_req: + print(" UNDECLARED IMPORTS (used but not in requirements.txt):") + for imp in missing_from_req: + print(f" ! {imp}") + print() + print("=" * 60) + + # Exit code: 0 if no bloat, 1 if unused deps found + sys.exit(1 if unused else 0) + + +if __name__ == "__main__": + main()