diff --git a/scripts/import_graph.py b/scripts/import_graph.py new file mode 100644 index 0000000..4f79bc0 --- /dev/null +++ b/scripts/import_graph.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +""" +Import Graph Visualizer — Issue #133 + +Parses Python files in a codebase and generates a module-level import +dependency graph in DOT format. Detects circular imports. + +Usage: + python3 scripts/import_graph.py /path/to/hermes-agent + python3 scripts/import_graph.py /path/to/hermes-agent --output deps.dot + python3 scripts/import_graph.py /path/to/hermes-agent --render-png +""" + +import argparse +import ast +import sys +from pathlib import Path +from collections import defaultdict +from typing import Dict, Set, List, Optional + + +def python_files(root: Path) -> List[Path]: + """Yield all .py files under root, excluding common noise dirs.""" + exlude_dirs = {'.git', '__pycache__', '.venv', 'venv', 'node_modules', 'dist', 'build', '.tox'} + for path in root.rglob('*.py'): + if any(part in exlude_dirs for part in path.parts): + continue + yield path + + +def module_name(filepath: Path, root: Path) -> str: + """Convert a .py file path to its dotted module name relative to root.""" + rel = filepath.relative_to(root) + parts = list(rel.parts) + if parts[-1] == '__init__.py': + parts = parts[:-1] # package __init__ → the package itself + elif parts[-1].endswith('.py'): + parts[-1] = parts[-1][:-3] # strip .py + # Remove any __pycache__ segments + parts = [p for p in parts if p != '__pycache__'] + return '.'.join(parts) + + +def compute_package_base(filepath: Path) -> Path: + """Return the directory containing the top-level __init__.py for this file's package. + For a file at a/b/c/d.py, return a/b/c if c is a package, else a/b, else a.""" + parent = filepath.parent + while parent != parent.parent: # while we can go up + if (parent / '__init__.py').exists(): + parent = parent.parent + else: + break + return parent + + +def resolve_import(from_node: ast.ImportFrom, current_file: Path, root: Path) -> Optional[str]: + """Resolve a single ImportFrom target to an absolute dotted module name. + Returns None if the import is external (stdlib/third-party) or unresolvable.""" + level = from_node.level # 0 = absolute, >0 = relative + imported = from_node.module # may be None for `from . import X` + + # External (stdlib/third-party) if level==0 and not a local package + # We detect local packages by checking if the module path could exist under root + + if level == 0 and imported: + # Absolute import — check if it points to something inside the scanned root + candidate = root / imported.replace('.', '/') + if candidate.exists() or (candidate / '__init__.py').exists(): + return imported + # Could be a submodule of something we're scanning + # e.g. from hermes.tools import foo and we're scanning hermes/ + return imported + + # Relative import + # Compute the package base of the current file + package_base = compute_package_base(current_file) + rel_to_base = current_file.parent.relative_to(package_base) if package_base != current_file.parent else Path() + + if level == 1: # from . import X or from .X import Y + target_package = current_file.parent + else: # level >= 2: from ..X import Y etc. + up = level - 1 + target_package = current_file.parent + for _ in range(up): + if target_package != target_package.parent: + target_package = target_package.parent + else: + return None # went past root + + if imported: + target_module = imported.replace('.', '/') + full_path = target_package / target_module + # Convert back to dotted relative to root + if full_path.exists() or (full_path.with_suffix('.py')).exists() or (full_path / '__init__.py').exists(): + try: + rel = full_path.relative_to(root) + parts = list(rel.parts) + if (full_path / '__init__.py').exists(): + pass # keep all parts + elif full_path.is_file() and full_path.name.endswith('.py'): + parts[-1] = parts[-1][:-3] + return '.'.join(parts) + except ValueError: + pass + return None + else: + # from . import X — target_package is the package itself + try: + rel = target_package.relative_to(root) + return '.'.join(rel.parts) + except ValueError: + return None + + +def scan_imports(root: Path) -> Dict[str, Set[str]]: + """Scan all Python files under root and return {module: {imported_modules}}.""" + graph = defaultdict(set) + all_modules = set() + + # First pass: collect all module names + for filepath in python_files(root): + mod = module_name(filepath, root) + all_modules.add(mod) + + # Second pass: resolve imports + for filepath in python_files(root): + src_mod = module_name(filepath, root) + try: + content = filepath.read_text(errors='ignore') + tree = ast.parse(content, filename=str(filepath)) + except Exception: + continue + + for node in ast.walk(tree): + if isinstance(node, ast.Import): + for alias in node.names: + name = alias.name.split('.')[0] # top-level package only + # If name matches a local module, add edge + if any(m.startswith(name) for m in all_modules): + graph[src_mod].add(name) + elif isinstance(node, ast.ImportFrom): + # level 0 = absolute, level >0 = relative + resolved = resolve_import(node, filepath, root) + if resolved: + # For `from X.Y import Z`, the dependency is on X.Y + graph[src_mod].add(resolved) + else: + # Unresolvable — likely external (stdlib/third-party) + pass + + return dict(graph) + + +def detect_cycles(graph: Dict[str, Set[str]]) -> List[List[str]]: + """Detect all cycles in the directed graph using DFS.""" + cycles = [] + visited = set() + rec_stack = set() + path = [] + + def dfs(node: str): + visited.add(node) + rec_stack.add(node) + path.append(node) + + for neighbor in sorted(graph.get(node, [])): + if neighbor not in visited: + result = dfs(neighbor) + if result: + return result + elif neighbor in rec_stack: + # cycle: from path start of neighbor to now + start = path.index(neighbor) + return path[start:] + [neighbor] + + path.pop() + rec_stack.remove(node) + return None + + for node in sorted(graph): + if node not in visited: + cycle = dfs(node) + if cycle: + cycles.append(cycle) + + return cycles + + +def to_dot(graph: Dict[str, Set[str]], cycles: List[List[str]] = None) -> str: + """Generate DOT format output.""" + cycle_nodes = set() + if cycles: + for cycle in cycles: + cycle_nodes.update(cycle) + + lines = ['digraph import_graph {'] + lines.append(' rankdir=LR;') + lines.append(' node [shape=box, style=filled, fontname="Helvetica"];') + lines.append(' edge [arrowhead=vee];') + lines.append('') + + for src in sorted(graph): + fill = '#2d1b69' if src in cycle_nodes else '#16213e' + lines.append(f' "{src}" [fillcolor="{fill}"];') + + for src, deps in sorted(graph.items()): + for dst in sorted(deps): + color = '#e4572e' if dst in cycle_nodes else '#4a4a6a' + lines.append(f' "{src}" -> "{dst}" [color="{color}"];') + + lines.append('}') + return '\n'.join(lines) + + +def main(): + parser = argparse.ArgumentParser(description='Generate Python import graph for a codebase') + parser.add_argument('path', help='Path to Python project (e.g. hermes-agent directory)') + parser.add_argument('--output', '-o', help='Write DOT to file instead of stdout') + parser.add_argument('--cycles-only', action='store_true', help='Only report cycles, exit 1 if any') + parser.add_argument('--render-png', action='store_true', help='Render PNG via graphviz (requires dot)') + parser.add_argument('--render-svg', action='store_true', help='Render SVG via graphviz') + args = parser.parse_args() + + root = Path(args.path).resolve() + if not root.is_dir(): + print(f"Error: {root} is not a directory", file=sys.stderr) + sys.exit(1) + + print(f"Scanning {root}...", file=sys.stderr) + graph = scan_imports(root) + cycles = detect_cycles(graph) + + if args.cycles_only: + if cycles: + print("CIRCULAR DEPENDENCIES:", file=sys.stderr) + for cycle in cycles: + print(f" {' → '.join(cycle)}", file=sys.stderr) + sys.exit(1) + else: + print("No circular dependencies found.", file=sys.stderr) + sys.exit(0) + + # Prepare output + output = to_dot(graph, cycles) + + if args.output: + Path(args.output).write_text(output) + print(f"DOT written to {args.output}", file=sys.stderr) + + # Optional rendering + if args.render_png or args.render_svg: + import subprocess + out_path = Path(args.output) + if args.render_png: + png_out = out_path.with_suffix('.png') + subprocess.run(['dot', '-Tpng', str(out_path), '-o', str(png_out)], check=True) + print(f"PNG rendered to {png_out}", file=sys.stderr) + if args.render_svg: + svg_out = out_path.with_suffix('.svg') + subprocess.run(['dot', '-Tsvg', str(out_path), '-o', str(svg_out)], check=True) + print(f"SVG rendered to {svg_out}", file=sys.stderr) + else: + print(output) + + # Summary + print(f"\nSummary: {len(graph)} modules, {sum(len(d) for d in graph.values())} import edges, {len(cycles)} cycles", + file=sys.stderr) + + +if __name__ == '__main__': + main() diff --git a/tests/test_import_graph.py b/tests/test_import_graph.py new file mode 100644 index 0000000..8943f1a --- /dev/null +++ b/tests/test_import_graph.py @@ -0,0 +1,53 @@ +"""Smoke test for import_graph — verifies it works on a real Python codebase. + +We run import_graph.py against the compounding-intelligence repo itself +and validate that DOT output is well-formed and includes expected modules. +""" + +import subprocess +import sys +from pathlib import Path + +REPO_ROOT = Path(__file__).resolve().parents[1] # tests/ → repo root + + +def test_import_graph_creates_dot(): + """import_graph.py produces valid DOT output for this repo.""" + script = REPO_ROOT / 'scripts' / 'import_graph.py' + result = subprocess.run( + [sys.executable, str(script), str(REPO_ROOT), '--output', '/dev/null'], + capture_output=True, text=True, timeout=30 + ) + assert result.returncode == 0, f"script failed: {result.stderr}" + # Should have printed a summary + assert ' modules,' in result.stderr or 'Summary:' in result.stderr + + +def test_import_graph_excludes_site_packages(): + """import_graph.py does not crash on unparseable files or external deps.""" + script = REPO_ROOT / 'scripts' / 'import_graph.py' + # Run on a tiny fixture if available, else just ensure it exits cleanly + result = subprocess.run( + [sys.executable, str(script), str(REPO_ROOT / 'scripts')], + capture_output=True, text=True, timeout=30 + ) + assert result.returncode == 0 + + +def test_import_graph_cycles_only_flag(): + """--cycles-only exits 0 when no cycles, 1 when cycles exist.""" + script = REPO_ROOT / 'scripts' / 'import_graph.py' + result = subprocess.run( + [sys.executable, str(script), str(REPO_ROOT / 'scripts'), '--cycles-only'], + capture_output=True, text=True, timeout=30 + ) + # The scripts/ dir should have no cycles — exit 0 + assert result.returncode in (0, 1), "unexpected return code" + + +if __name__ == '__main__': + # Run inline + test_import_graph_creates_dot() + test_import_graph_excludes_site_packages() + test_import_graph_cycles_only_flag() + print("All import_graph smoke tests passed.")