#!/usr/bin/env python3 """ Dead Code Detector for Python Codebases AST-based analysis to find defined but never-called functions and classes. Excludes entry points, plugin hooks, __init__ exports. Usage: python3 scripts/dead_code_detector.py /path/to/repo/ python3 scripts/dead_code_detector.py hermes-agent/ --format json python3 scripts/dead_code_detector.py . --exclude tests/,venv/ Output: file:line, function/class name, last git author (if available) """ import argparse import ast import json import os import subprocess import sys from collections import defaultdict from pathlib import Path from typing import Optional # Names that are expected to be unused (entry points, protocol methods, etc.) SAFE_UNUSED_PATTERNS = { # Python dunders "__init__", "__str__", "__repr__", "__eq__", "__hash__", "__len__", "__getitem__", "__setitem__", "__contains__", "__iter__", "__next__", "__enter__", "__exit__", "__call__", "__bool__", "__del__", "__post_init__", "__class_getitem__", # Common entry points "main", "app", "handler", "setup", "teardown", "fixture", # pytest "conftest", "test_", "pytest_", # prefix patterns # Protocols / abstract "abstractmethod", "abc_", } def is_safe_unused(name: str, filepath: str) -> bool: """Check if an unused name is expected to be unused.""" # Test files are exempt if "test" in filepath.lower(): return True # Known patterns for pattern in SAFE_UNUSED_PATTERNS: if name.startswith(pattern) or name == pattern: return True # __init__.py exports are often unused internally if filepath.endswith("__init__.py"): return True return False def get_git_blame(filepath: str, lineno: int) -> Optional[str]: """Get last author of a line via git blame.""" try: result = subprocess.run( ["git", "blame", "-L", f"{lineno},{lineno}", "--porcelain", filepath], capture_output=True, text=True, timeout=5 ) for line in result.stdout.split("\n"): if line.startswith("author "): return line[7:] except: pass return None class DefinitionCollector(ast.NodeVisitor): """Collect all function and class definitions.""" def __init__(self): self.definitions = [] # (name, type, lineno, filepath) def visit_FunctionDef(self, node): self.definitions.append((node.name, "function", node.lineno)) self.generic_visit(node) def visit_AsyncFunctionDef(self, node): self.definitions.append((node.name, "async_function", node.lineno)) self.generic_visit(node) def visit_ClassDef(self, node): self.definitions.append((node.name, "class", node.lineno)) self.generic_visit(node) class NameUsageCollector(ast.NodeVisitor): """Collect all name references (calls, imports, attribute access).""" def __init__(self): self.names = set() self.calls = set() self.imports = set() def visit_Name(self, node): self.names.add(node.id) self.generic_visit(node) def visit_Attribute(self, node): if isinstance(node.value, ast.Name): self.names.add(node.value.id) self.generic_visit(node) def visit_Call(self, node): if isinstance(node.func, ast.Name): self.calls.add(node.func.id) elif isinstance(node.func, ast.Attribute): if isinstance(node.func.value, ast.Name): self.names.add(node.func.value.id) self.calls.add(node.func.attr) self.generic_visit(node) def visit_Import(self, node): for alias in node.names: self.imports.add(alias.asname or alias.name) self.generic_visit(node) def visit_ImportFrom(self, node): for alias in node.names: self.imports.add(alias.asname or alias.name) self.generic_visit(node) def analyze_file(filepath: str) -> dict: """Analyze a single Python file for dead code.""" path = Path(filepath) try: content = path.read_text() tree = ast.parse(content, filename=str(filepath)) except (SyntaxError, UnicodeDecodeError): return {"error": f"Could not parse {filepath}"} # Collect definitions def_collector = DefinitionCollector() def_collector.visit(tree) definitions = def_collector.definitions # Collect usage usage_collector = NameUsageCollector() usage_collector.visit(tree) used_names = usage_collector.names | usage_collector.calls | usage_collector.imports # Also scan the entire repo for references to this file's definitions # (this is done at the repo level, not file level) dead = [] for name, def_type, lineno in definitions: if name.startswith("_") and not name.startswith("__"): # Private functions — might be used externally, less likely dead pass if name not in used_names: if not is_safe_unused(name, filepath): dead.append({ "name": name, "type": def_type, "file": filepath, "line": lineno, }) return {"definitions": len(definitions), "dead": dead} def scan_repo(repo_path: str, exclude_patterns: list = None) -> dict: """Scan an entire repo for dead code.""" path = Path(repo_path) exclude = exclude_patterns or ["venv", ".venv", "node_modules", "__pycache__", ".git", "dist", "build", ".tox", "vendor"] all_definitions = {} # name -> [{file, line, type}] all_files = [] dead_code = [] # First pass: collect all definitions across repo for fpath in path.rglob("*.py"): parts = fpath.parts if any(ex in parts for ex in exclude): continue if fpath.name.startswith("."): continue try: content = fpath.read_text(errors="ignore") tree = ast.parse(content, filename=str(fpath)) except: continue all_files.append(str(fpath)) collector = DefinitionCollector() collector.visit(tree) for name, def_type, lineno in collector.definitions: rel_path = str(fpath.relative_to(path)) if name not in all_definitions: all_definitions[name] = [] all_definitions[name].append({ "file": rel_path, "line": lineno, "type": def_type, }) # Second pass: check each name for usage across entire repo all_used_names = set() for fpath_str in all_files: try: content = Path(fpath_str).read_text(errors="ignore") tree = ast.parse(content) except: continue usage = NameUsageCollector() usage.visit(tree) all_used_names.update(usage.names) all_used_names.update(usage.calls) all_used_names.update(usage.imports) # Find dead code for name, locations in all_definitions.items(): if name not in all_used_names: for loc in locations: if not is_safe_unused(name, loc["file"]): dead_code.append({ "name": name, "type": loc["type"], "file": loc["file"], "line": loc["line"], }) return { "repo": path.name, "files_scanned": len(all_files), "total_definitions": sum(len(v) for v in all_definitions.values()), "dead_code_count": len(dead_code), "dead_code": sorted(dead_code, key=lambda x: (x["file"], x["line"])), } def main(): parser = argparse.ArgumentParser(description="Find dead code in Python codebases") parser.add_argument("repo", help="Repository path to scan") parser.add_argument("--format", choices=["text", "json"], default="text") parser.add_argument("--exclude", help="Comma-separated patterns to exclude") parser.add_argument("--git-blame", action="store_true", help="Include git blame info") args = parser.parse_args() exclude = args.exclude.split(",") if args.exclude else None result = scan_repo(args.repo, exclude) if args.format == "json": print(json.dumps(result, indent=2)) else: print(f"Dead Code Report: {result['repo']}") print(f"Files scanned: {result['files_scanned']}") print(f"Total definitions: {result['total_definitions']}") print(f"Dead code found: {result['dead_code_count']}") print() if result["dead_code"]: print(f"{'File':<45} {'Line':>4} {'Type':<10} {'Name'}") print("-" * 85) for item in result["dead_code"]: author = "" if args.git_blame: author = get_git_blame( os.path.join(args.repo, item["file"]), item["line"] ) or "" author = f" ({author})" if author else "" print(f"{item['file']:<45} {item['line']:>4} {item['type']:<10} {item['name']}{author}") else: print("No dead code detected!") if __name__ == "__main__": main()