diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md new file mode 100644 index 0000000..480b157 --- /dev/null +++ b/ARCHITECTURE.md @@ -0,0 +1,95 @@ +# Architecture: STEP35-compounding-intelligence-99 + +**Generated by:** `scripts/architecture_doc_generator.py` + +## Entry Points +- `scripts/architecture_doc_generator.py` +- `scripts/refactoring_opportunity_finder.py` +- `scripts/automation_opportunity_finder.py` +- `scripts/bootstrapper.py` +- `scripts/dead_code_detector.py` +- `scripts/dedup.py` +- `scripts/dependency_graph.py` +- `scripts/freshness.py` +- `scripts/gitea_issue_parser.py` +- `scripts/harvester.py` +- `scripts/improvement_proposals.py` +- `scripts/knowledge_staleness_check.py` +- `scripts/perf_bottleneck_finder.py` +- `scripts/pr_complexity_scorer.py` +- `scripts/priority_rebalancer.py` +- `quality_gate.py` +- `scripts/sampler.py` +- `scripts/session_metadata.py` +- `scripts/session_pair_harvester.py` +- `scripts/session_reader.py` +- `scripts/test_automation_opportunity_finder.py` +- `scripts/test_bootstrapper.py` +- `scripts/test_diff_analyzer.py` +- `tests/test_freshness.py` +- `scripts/test_gitea_issue_parser.py` +- `scripts/test_harvest_prompt.py` +- `scripts/test_harvest_prompt_comprehensive.py` +- `scripts/test_harvester_pipeline.py` +- `scripts/test_improvement_proposals.py` +- `tests/test_knowledge_gap_identifier.py` +- `scripts/test_knowledge_staleness.py` +- `tests/test_quality_gate.py` +- `scripts/test_refactoring_opportunity_finder.py` +- `scripts/test_session_pair_harvester.py` +- `scripts/validate_knowledge.py` + +## Module Dependencies +| Module | Imports | +|--------|---------| +| `quality_gate` | `quality_gate` | +| `scripts.harvester` | `scripts.session_reader` | +| `scripts.session_metadata` | `scripts.session_reader` | +| `scripts.test_bootstrapper` | `scripts.bootstrapper` | +| `scripts.test_harvester_pipeline` | `scripts.harvester, scripts.session_reader` | +| `scripts.test_pr_complexity_scorer` | `scripts.pr_complexity_scorer` | +| `scripts.test_priority_rebalancer` | `scripts.priority_rebalancer` | +| `scripts.test_session_pair_harvester` | `scripts.session_pair_harvester` | +| `tests.test_dedup` | `scripts.dedup` | +| `tests.test_knowledge_gap_identifier` | `scripts.knowledge_gap_identifier` | +| `tests.test_perf_bottleneck_finder` | `scripts.perf_bottleneck_finder` | +| `tests.test_quality_gate` | `quality_gate` | + +## ASCII Diagram +``` +*quality_gate* + └─> quality_gate +*scripts.bootstrapper* +*scripts.dedup* +*scripts.harvester* + └─> scripts.session_reader +[scripts.knowledge_gap_identifier] +*scripts.perf_bottleneck_finder* +*scripts.pr_complexity_scorer* +*scripts.priority_rebalancer* +*scripts.session_metadata* + └─> scripts.session_reader +*scripts.session_pair_harvester* +*scripts.session_reader* +*scripts.test_bootstrapper* + └─> scripts.bootstrapper +*scripts.test_harvester_pipeline* + └─> scripts.harvester + └─> scripts.session_reader +[scripts.test_pr_complexity_scorer] + └─> scripts.pr_complexity_scorer +[scripts.test_priority_rebalancer] + └─> scripts.priority_rebalancer +*scripts.test_session_pair_harvester* + └─> scripts.session_pair_harvester +[tests.test_dedup] + └─> scripts.dedup +*tests.test_knowledge_gap_identifier* + └─> scripts.knowledge_gap_identifier +[tests.test_perf_bottleneck_finder] + └─> scripts.perf_bottleneck_finder +*tests.test_quality_gate* + └─> quality_gate +``` + +_Generated automatically. Keep this file in sync with code changes by re-running the generator._ \ No newline at end of file diff --git a/scripts/architecture_doc_generator.py b/scripts/architecture_doc_generator.py new file mode 100755 index 0000000..c50c374 --- /dev/null +++ b/scripts/architecture_doc_generator.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python3 +""" +Architecture Doc Generator — 4.4 + +Analyzes codebase structure and generates an architecture overview: +- Maps module dependencies (Python imports within the repo) +- Identifies entry points (main guards, CLI scripts) +- Generates ASCII diagram of module relationships +- Produces one ARCHITECTURE.md per repo + +Usage: + python3 scripts/architecture_doc_generator.py [repo_root] + +If no repo_root given, uses current directory. +Outputs ARCHITECTURE.md to the repo root. +""" + +import argparse +import re +import sys +from collections import defaultdict +from pathlib import Path + + +def scan_python_files(root: Path): + """Find all .py files under root, excluding tests/ and .git/.""" + py_files = [] + for path in root.rglob("*.py"): + parts = path.parts + if any(p.startswith('.') for p in parts if p != '.'): + continue + if 'test' in parts: + continue + if any(x in parts for x in ('venv', 'node_modules', '__pycache__', 'dist', 'build')): + continue + py_files.append(path) + return sorted(py_files) + + +def module_id(path: Path, root: Path) -> str: + """Return a readable module identifier.""" + rel = path.relative_to(root) + if rel.parent == Path('.'): + return path.stem + return str(rel.with_suffix('')).replace('/', '.') + + +def extract_imports(path: Path) -> list[str]: + """Extract top-level import names from a Python file.""" + try: + text = path.read_text(errors='ignore') + except Exception: + return [] + imports = set() + # import X or import X.Y.Z + for m in re.finditer(r'^\s*import\s+([a-zA-Z0-9_.]+)', text, re.MULTILINE): + imports.add(m.group(1).split('.')[0]) + # from X import Y (handles absolute and relative: from .X import Y) + for m in re.finditer(r'^\s*from\s+(\.+)?([a-zA-Z0-9_.]+)\s+import', text, re.MULTILINE): + imports.add(m.group(2).split('.')[0]) + return sorted(imports) + + +def build_dependency_graph(py_files: list[Path], root: Path) -> dict[str, set[str]]: + """Build adjacency: local_module -> set(local_modules it imports).""" + graph = defaultdict(set) + # Collect all local module identifiers + local_ids = set() + for p in py_files: + local_ids.add(module_id(p, root)) + + for path in py_files: + src_mod = module_id(path, root) + for imp in extract_imports(path): + # Match import to a local module by stem or by full dotted prefix + target = None + # Exact match + if imp in local_ids: + target = imp + else: + # Find module whose stem equals imp, or whose dotted name ends with .imp + for mid in local_ids: + if mid.split('.')[-1] == imp or mid == imp: + target = mid + break + if target: + graph[src_mod].add(target) + + return {k: sorted(v) for k, v in graph.items()} + + +def find_entry_points(py_files: list[Path]) -> list[Path]: + """Files with if __name__ == '__main__' guard or executable scripts.""" + entries = [] + for path in py_files: + try: + text = path.read_text(errors='ignore') + except Exception: + continue + if 'if __name__' in text and '__main__' in text: + entries.append(path) + return sorted(entries, key=lambda p: (not (p.stat().st_mode & 0o111), p.name)) + + +def ascii_diagram(graph: dict[str, list[str]], entries: list[Path], root: Path) -> str: + """Generate a simple ASCII box-and-arrow diagram.""" + lines = [] + entry_names = {module_id(p, root) for p in entries} + # All nodes + nodes = sorted(set(graph.keys()) | set().union(*graph.values())) + for node in nodes: + is_entry = node in entry_names + label = f"*{node}*" if is_entry else f"[{node}]" + lines.append(label) + for dep in graph.get(node, []): + lines.append(f" └─> {dep}") + return '\n'.join(lines) + + +def generate_markdown(root: Path, graph: dict, entries: list[Path], diagram: str) -> str: + root_name = root.name + md = [] + md.append(f"# Architecture: {root_name}") + md.append("") + md.append("**Generated by:** `scripts/architecture_doc_generator.py`") + md.append("") + md.append("## Entry Points") + if entries: + for p in entries: + rel = p.relative_to(root) + md.append(f"- `{rel}`") + else: + md.append("_No entry points detected._") + md.append("") + md.append("## Module Dependencies") + if graph: + md.append("| Module | Imports |") + md.append("|--------|---------|") + for mod in sorted(graph.keys()): + deps = ', '.join(sorted(graph[mod])) if graph[mod] else '_none_' + md.append(f"| `{mod}` | `{deps}` |") + else: + md.append("_No dependencies detected._") + md.append("") + md.append("## ASCII Diagram") + md.append("```") + md.append(diagram) + md.append("```") + md.append("") + md.append("_Generated automatically. Keep this file in sync with code changes by re-running the generator._") + return '\n'.join(md) + + +def main(): + parser = argparse.ArgumentParser(description="Generate architecture documentation") + parser.add_argument("repo_root", nargs="?", default=".", help="Repository root (default: current directory)") + args = parser.parse_args() + + root = Path(args.repo_root).resolve() + py_files = scan_python_files(root) + if not py_files: + print("No Python files found — nothing to do.", file=sys.stderr) + sys.exit(1) + + graph = build_dependency_graph(py_files, root) + entries = find_entry_points(py_files) + diagram = ascii_diagram(graph, entries, root) + markdown = generate_markdown(root, graph, entries, diagram) + + out_path = root / "ARCHITECTURE.md" + out_path.write_text(markdown, encoding='utf-8') + print(f"Written: {out_path}") + print(f" Modules scanned: {len(py_files)}") + print(f" Entry points: {len(entries)}") + print(f" Dependency edges: {sum(len(v) for v in graph.values())}") + + +if __name__ == "__main__": + main()