5.6: Add dependency bloat detector

Scans all .py files for import statements, compares against requirements.txt, and reports unused declared packages. Acceptance: - Lists all imports in code - Compares against declared deps - Reports: unused deps - Output: bloat report Refs #112
2026-04-26 00:50:36 -04:00
3 changed files with 112 additions and 285 deletions
--- a/scripts/coverage_checker.py
+++ b/scripts/coverage_checker.py
@@ -1,169 +0,0 @@
-#!/usr/bin/env python3
-"""
-Test Coverage Checker — 6.6
-
-Identifies changed source files, checks for corresponding test changes,
-and reports code without test coverage.
-
-Usage:
-    python3 scripts/test_coverage_checker.py
-    python3 scripts/test_coverage_checker.py --format json
-    python3 scripts/test_coverage_checker.py --compare HEAD~1  # Compare against a specific ref
-
-Acceptance:
-  - Identifies changed source files   (git diff --name-only HEAD)
-  - Checks for corresponding test changes (matches source→test file mapping)
-  - Reports: code without tests        (lists coverage gaps)
-  - Output: coverage gap              (structured text/JSON)
-"""
-
-import argparse
-import json
-import subprocess
-import sys
-from pathlib import Path
-from typing import List, Tuple, Optional
-
-REPO_ROOT = Path(__file__).resolve().parent.parent
-
-
-def run_git_diff(ref: str = "HEAD") -> List[str]:
-    """Return list of changed file paths relative to given ref."""
-    result = subprocess.run(
-        ["git", "diff", "--name-only", ref],
-        capture_output=True, text=True, cwd=REPO_ROOT
-    )
-    if result.returncode != 0:
-        print(f"ERROR: git diff failed: {result.stderr}")
-        sys.exit(1)
-    return [p for p in result.stdout.splitlines() if p.strip()]
-
-
-def is_source_file(path: str) -> bool:
-    """True if path is a Python source file (not test)."""
-    return path.endswith(".py") and not path.startswith("tests/") and "/test" not in Path(path).name
-
-
-def is_test_file(path: str) -> bool:
-    """True if path is a test file."""
-    if not path.endswith(".py"):
-        return False
-    name = Path(path).name
-    # Test files: test_*.py or *_test.py or in tests/ directory
-    return (name.startswith("test_") or name.endswith("_test.py") or path.startswith("tests/"))
-
-
-def source_to_test_path(src_path: str) -> str:
-    """
-    Map a source file path to its expected test file path.
-    Convention: scripts/<name>.py -> tests/test_<name>.py
-                <module>.py -> tests/test_<module>.py
-    """
-    name = Path(src_path).name
-    stem = Path(name).stem  # without .py
-    # Common mapping: script name -> test_ prefix in tests/
-    test_name = f"test_{stem}.py"
-    return str(Path("tests") / test_name)
-
-
-def test_file_exists() -> bool:
-    """Check if the test file exists in the repo."""
-    return (REPO_ROOT / test_rel).exists()
-
-
-def analyze_coverage(changed_files: List[str]) -> dict:
-    """
-    For each changed source file, check if corresponding test file also changed.
-    Returns structured coverage gap report.
-    """
-    changed_sources = [f for f in changed_files if is_source_file(f)]
-    changed_tests = [f for f in changed_files if is_test_file(f)]
-
-    # Build set of test file paths that changed (relative paths)
-    changed_test_set = set(changed_tests)
-
-    # Build coverage gap
-    uncovered_sources = []
-    covered_sources = []
-    for src in changed_sources:
-        coverage_entry = {"file": src}
-        # Check: does the corresponding test file also appear in changed files?
-        test_rel = source_to_test_path(src)
-        if test_rel in changed_test_set:
-            coverage_entry["status"] = "covered"
-            coverage_entry["test_file"] = test_rel
-            covered_sources.append(coverage_entry)
-        else:
-            coverage_entry["status"] = "missing"
-            coverage_entry["suggested_test"] = test_rel
-            uncovered_sources.append(coverage_entry)
-
-    return {
-        "repo": REPO_ROOT.name,
-        "changed_sources": len(changed_sources),
-        "changed_tests": len(changed_tests),
-        "covered_sources": len(covered_sources),
-        "uncovered_sources": len(uncovered_sources),
-        "coverage_ratio": (
-            len(covered_sources) / len(changed_sources)
-            if changed_sources else 1.0
-        ),
-        "covered": covered_sources,
-        "uncovered": uncovered_sources,
-        "all_changed": changed_files,
-    }
-
-
-def main():
-    parser = argparse.ArgumentParser(description="Test Coverage Checker")
-    parser.add_argument("--format", choices=["text", "json"], default="text",
-                        help="Output format")
-    parser.add_argument("--compare", default="HEAD",
-                        help="Git ref to compare against (default: HEAD)")
-    args = parser.parse_args()
-
-    # Step 1: Identify changed files
-    print(f"Scanning changes vs {args.compare}...")
-    changed_files = run_git_diff(args.compare)
-    if not changed_files:
-        print("No changed files detected.")
-        sys.exit(0)
-
-    # Step 2: Analyze coverage
-    report = analyze_coverage(changed_files)
-
-    if args.format == "json":
-        print(json.dumps(report, indent=2))
-        sys.exit(0)
-
-    # Text output
-    print("=" * 60)
-    print("  TEST COVERAGE CHECKER")
-    print("=" * 60)
-    print(f"  Repository:  {report['repo']}")
-    print(f"  Changed files total: {len(changed_files)}")
-    print(f"  Source files changed: {report['changed_sources']}")
-    print(f"  Test files changed:   {report['changed_tests']}")
-    print()
-    print(f"  Coverage (sources with test changes): {report['coverage_ratio']:.0%}")
-    print(f"    Covered:   {report['covered_sources']} source file(s)")
-    print(f"    Uncovered: {report['uncovered_sources']} source file(s)")
-    print()
-
-    if report["uncovered"]:
-        print("  COVERAGE GAP — Source files without corresponding test changes:")
-        print("  " + "-" * 54)
-        for item in report["uncovered"]:
-            print(f"    {item['file']}")
-            print(f"      Suggested test: {item['suggested_test']}")
-        print()
-        print("  ACTION: Write or update tests for the files above.")
-        sys.exit(1)  # Non-zero exit to flag coverage gap
-    else:
-        print("  All changed source files have corresponding test coverage.")
-
-    print("=" * 60)
-
-
-if __name__ == "__main__":
-    main()
--- a/scripts/dependency_bloat_detector.py
+++ b/scripts/dependency_bloat_detector.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python3
+"""
+Dependency Bloat Detector — find declared packages never imported
+
+Usage:
+  python3 scripts/dependency_bloat_detector.py
+  python3 scripts/dependency_bloat_detector.py --output json
+"""
+
+import ast
+import json
+import re
+import sys
+from pathlib import Path
+from typing import Set, List, Tuple
+
+
+def extract_imports_from_py_files(repo_path: Path) -> Set[str]:
+    """Walk the repo and return the set of top-level imported module names."""
+    imports = set()
+    exclude_dirs = {".git", "venv", ".venv", "__pycache__", "node_modules",
+                    "dist", "build", ".tox", "vendor"}
+    py_files = [
+        f for f in repo_path.rglob("*.py")
+        if not any(part in exclude_dirs for part in f.parts)
+    ]
+    for fpath in py_files:
+        try:
+            content = fpath.read_text(errors="ignore")
+            tree = ast.parse(content)
+        except Exception:
+            continue
+        for node in ast.walk(tree):
+            if isinstance(node, ast.Import):
+                for alias in node.names:
+                    top = alias.name.split('.')[0]
+                    imports.add(top)
+            elif isinstance(node, ast.ImportFrom):
+                if node.module:
+                    top = node.module.split('.')[0]
+                    imports.add(top)
+    return imports
+
+
+def parse_requirements_txt(req_path: Path) -> List[Tuple[str, str]]:
+    """
+    Parse requirements.txt and return list of (package_name, raw_line).
+    Strips version specifiers and ignores comments.
+    """
+    if not req_path.exists():
+        return []
+    declared = []
+    for line in req_path.read_text().splitlines():
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        # Strip inline comments
+        line = line.split('#')[0].strip()
+        # Extract package name (before any version specifier)
+        pkg_match = re.match(r'^([a-zA-Z0-9_-]+)', line)
+        if pkg_match:
+            pkg = pkg_match.group(1).strip()
+            declared.append((pkg, line))
+    return declared
+
+
+def main():
+    repo_path = Path('.').resolve()
+    req_path = repo_path / 'requirements.txt'
+
+    # 1. Scan imports
+    used = extract_imports_from_py_files(repo_path)
+
+    # 2. Parse declared deps
+    declared = parse_requirements_txt(req_path)
+    declared_names = [pkg for pkg, _ in declared]
+
+    # 3. Compare
+    unused = [(raw, pkg) for pkg, raw in declared if pkg not in used]
+    missing_from_req = [imp for imp in used if imp not in declared_names]
+
+    # 4. Output
+    print("=" * 60)
+    print("  DEPENDENCY BLOAT DETECTOR")
+    print("=" * 60)
+    print(f"  Repository:     {repo_path.name}")
+    print(f"  Requirements:   {req_path}")
+    print(f"  Python files:   {len(list(repo_path.rglob('*.py')))}")
+    print()
+    print(f"  Declared packages  ({len(declared_names)}): {declared_names}")
+    print(f"  Imported packages  ({len(used)}): {sorted(used)}")
+    print()
+    if unused:
+        print("  UNUSED DEPENDENCIES (bloat):")
+        for raw, pkg in unused:
+            print(f"    ✗ {raw}")
+    else:
+        print("  No unused dependencies detected.")
+    print()
+    if missing_from_req:
+        print("  UNDECLARED IMPORTS (used but not in requirements.txt):")
+        for imp in missing_from_req:
+            print(f"    ! {imp}")
+    print()
+    print("=" * 60)
+
+    # Exit code: 0 if no bloat, 1 if unused deps found
+    sys.exit(1 if unused else 0)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_coverage_checker.py
+++ b/tests/test_coverage_checker.py
@@ -1,116 +0,0 @@
-#!/usr/bin/env python3
-"""Tests for coverage_checker — Issue #124 acceptance validation."""
-
-import subprocess
-import sys
-from pathlib import Path
-
-sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
-
-from coverage_checker import (
-    is_source_file,
-    is_test_file,
-    source_to_test_path,
-    analyze_coverage,
-)
-
-
-class TestSourceFileDetection:
-    def test_script_in_scripts_dir(self):
-        assert is_source_file("scripts/freshness.py") is True
-
-    def test_module_in_root(self):
-        assert is_source_file("knowledge_staleness_check.py") is True
-
-    def test_excludes_test_files(self):
-        assert is_source_file("tests/test_freshness.py") is False
-
-    def test_excludes_non_py(self):
-        assert is_source_file("README.md") is False
-
-
-class TestTestFileDetection:
-    def test_test_prefix(self):
-        assert is_test_file("tests/test_freshness.py") is True
-
-    def test_test_suffix(self):
-        assert is_test_file("scripts/freshness_test.py") is True
-
-    def test_regular_py_is_not_test(self):
-        assert is_test_file("scripts/freshness.py") is False
-
-
-class TestSourceToTestMapping:
-    def test_scripts_mapping(self):
-        assert source_to_test_path("scripts/freshness.py") == "tests/test_freshness.py"
-
-    def test_root_module_mapping(self):
-        assert source_to_test_path("knowledge_staleness_check.py") == "tests/test_knowledge_staleness_check.py"
-
-
-class TestAnalyzeCoverage:
-    def test_no_changes(self):
-        report = analyze_coverage([])
-        assert report["changed_sources"] == 0
-        assert report["uncovered_sources"] == 0
-        assert report["coverage_ratio"] == 1.0
-
-    def test_all_covered(self):
-        changed = [
-            "scripts/freshness.py",
-            "tests/test_freshness.py",
-            "scripts/dedup.py",
-            "tests/test_dedup.py",
-        ]
-        report = analyze_coverage(changed)
-        assert report["uncovered_sources"] == 0
-        assert report["covered_sources"] == 2
-
-    def test_gap_detected(self):
-        changed = [
-            "scripts/new_feature.py",
-            "README.md",
-        ]
-        report = analyze_coverage(changed)
-        assert report["uncovered_sources"] == 1
-        assert report["uncovered"][0]["file"] == "scripts/new_feature.py"
-        assert report["uncovered"][0]["suggested_test"] == "tests/test_new_feature.py"
-
-    def test_mixed_coverage(self):
-        changed = [
-            "scripts/covered.py",
-            "tests/test_covered.py",
-            "scripts/uncovered.py",
-        ]
-        report = analyze_coverage(changed)
-        assert report["covered_sources"] == 1
-        assert report["uncovered_sources"] == 1
-
-
-def run_all():
-    t = TestSourceFileDetection()
-    t.test_script_in_scripts_dir()
-    t.test_module_in_root()
-    t.test_excludes_test_files()
-    t.test_excludes_non_py()
-
-    t2 = TestTestFileDetection()
-    t2.test_test_prefix()
-    t2.test_test_suffix()
-    t2.test_regular_py_is_not_test()
-
-    t3 = TestSourceToTestMapping()
-    t3.test_scripts_mapping()
-    t3.test_root_module_mapping()
-
-    t4 = TestAnalyzeCoverage()
-    t4.test_no_changes()
-    t4.test_all_covered()
-    t4.test_gap_detected()
-    t4.test_mixed_coverage()
-
-    print("All 11 tests passed!")
-
-
-if __name__ == "__main__":
-    run_all()