#!/usr/bin/env python3 """ Architecture Doc Generator — 4.4 Analyzes codebase structure and generates an architecture overview: - Maps module dependencies (Python imports within the repo) - Identifies entry points (main guards, CLI scripts) - Generates ASCII diagram of module relationships - Produces one ARCHITECTURE.md per repo Usage: python3 scripts/architecture_doc_generator.py [repo_root] If no repo_root given, uses current directory. Outputs ARCHITECTURE.md to the repo root. """ import argparse import re import sys from collections import defaultdict from pathlib import Path def scan_python_files(root: Path): """Find all .py files under root, excluding tests/ and .git/.""" py_files = [] for path in root.rglob("*.py"): parts = path.parts if any(p.startswith('.') for p in parts if p != '.'): continue if 'test' in parts: continue if any(x in parts for x in ('venv', 'node_modules', '__pycache__', 'dist', 'build')): continue py_files.append(path) return sorted(py_files) def module_id(path: Path, root: Path) -> str: """Return a readable module identifier.""" rel = path.relative_to(root) if rel.parent == Path('.'): return path.stem return str(rel.with_suffix('')).replace('/', '.') def extract_imports(path: Path) -> list[str]: """Extract top-level import names from a Python file.""" try: text = path.read_text(errors='ignore') except Exception: return [] imports = set() # import X or import X.Y.Z for m in re.finditer(r'^\s*import\s+([a-zA-Z0-9_.]+)', text, re.MULTILINE): imports.add(m.group(1).split('.')[0]) # from X import Y (handles absolute and relative: from .X import Y) for m in re.finditer(r'^\s*from\s+(\.+)?([a-zA-Z0-9_.]+)\s+import', text, re.MULTILINE): imports.add(m.group(2).split('.')[0]) return sorted(imports) def build_dependency_graph(py_files: list[Path], root: Path) -> dict[str, set[str]]: """Build adjacency: local_module -> set(local_modules it imports).""" graph = defaultdict(set) # Collect all local module identifiers local_ids = set() for p in py_files: local_ids.add(module_id(p, root)) for path in py_files: src_mod = module_id(path, root) for imp in extract_imports(path): # Match import to a local module by stem or by full dotted prefix target = None # Exact match if imp in local_ids: target = imp else: # Find module whose stem equals imp, or whose dotted name ends with .imp for mid in local_ids: if mid.split('.')[-1] == imp or mid == imp: target = mid break if target: graph[src_mod].add(target) return {k: sorted(v) for k, v in graph.items()} def find_entry_points(py_files: list[Path]) -> list[Path]: """Files with if __name__ == '__main__' guard or executable scripts.""" entries = [] for path in py_files: try: text = path.read_text(errors='ignore') except Exception: continue if 'if __name__' in text and '__main__' in text: entries.append(path) return sorted(entries, key=lambda p: (not (p.stat().st_mode & 0o111), p.name)) def ascii_diagram(graph: dict[str, list[str]], entries: list[Path], root: Path) -> str: """Generate a simple ASCII box-and-arrow diagram.""" lines = [] entry_names = {module_id(p, root) for p in entries} # All nodes nodes = sorted(set(graph.keys()) | set().union(*graph.values())) for node in nodes: is_entry = node in entry_names label = f"*{node}*" if is_entry else f"[{node}]" lines.append(label) for dep in graph.get(node, []): lines.append(f" └─> {dep}") return '\n'.join(lines) def generate_markdown(root: Path, graph: dict, entries: list[Path], diagram: str) -> str: root_name = root.name md = [] md.append(f"# Architecture: {root_name}") md.append("") md.append("**Generated by:** `scripts/architecture_doc_generator.py`") md.append("") md.append("## Entry Points") if entries: for p in entries: rel = p.relative_to(root) md.append(f"- `{rel}`") else: md.append("_No entry points detected._") md.append("") md.append("## Module Dependencies") if graph: md.append("| Module | Imports |") md.append("|--------|---------|") for mod in sorted(graph.keys()): deps = ', '.join(sorted(graph[mod])) if graph[mod] else '_none_' md.append(f"| `{mod}` | `{deps}` |") else: md.append("_No dependencies detected._") md.append("") md.append("## ASCII Diagram") md.append("```") md.append(diagram) md.append("```") md.append("") md.append("_Generated automatically. Keep this file in sync with code changes by re-running the generator._") return '\n'.join(md) def main(): parser = argparse.ArgumentParser(description="Generate architecture documentation") parser.add_argument("repo_root", nargs="?", default=".", help="Repository root (default: current directory)") args = parser.parse_args() root = Path(args.repo_root).resolve() py_files = scan_python_files(root) if not py_files: print("No Python files found — nothing to do.", file=sys.stderr) sys.exit(1) graph = build_dependency_graph(py_files, root) entries = find_entry_points(py_files) diagram = ascii_diagram(graph, entries, root) markdown = generate_markdown(root, graph, entries, diagram) out_path = root / "ARCHITECTURE.md" out_path.write_text(markdown, encoding='utf-8') print(f"Written: {out_path}") print(f" Modules scanned: {len(py_files)}") print(f" Entry points: {len(entries)}") print(f" Dependency edges: {sum(len(v) for v in graph.values())}") if __name__ == "__main__": main()