#!/usr/bin/env python3 """ API Doc Generator — Issue #98 Scans all Python modules in `scripts/`, extracts their public API surface (module docstring + public function signatures + first-line doc summaries), and produces a single markdown reference document at `docs/API.md`. Usage: python3 scripts/api_doc_generator.py # Write docs/API.md python3 scripts/api_doc_generator.py --check # Verify docs/API.md is up-to-date python3 scripts/api_doc_generator.py --json # Emit JSON for downstream tooling """ from __future__ import annotations import ast import os import sys from datetime import datetime, timezone from pathlib import Path from typing import TypedDict, List, Optional # ─── Paths ──────────────────────────────────────────────────────────────────── SCRIPT_DIR = Path(__file__).resolve().parent REPO_ROOT = SCRIPT_DIR.parent SCRIPTS_DIR = REPO_ROOT / "scripts" DOCS_DIR = REPO_ROOT / "docs" OUTPUT_PATH = DOCS_DIR / "API.md" # ─── Data structures ─────────────────────────────────────────────────────────── class FunctionInfo(TypedDict): name: str signature: str summary: str class ModuleInfo(TypedDict): path: str # relative to repo root, e.g. "scripts/harvester.py" docstring: str functions: List[FunctionInfo] # ─── AST extraction ──────────────────────────────────────────────────────────── def extract_functions_from_ast(tree: ast.AST, file_rel: str) -> List[FunctionInfo]: """Extract public function names, signatures, and first-line doc summaries.""" funcs: list[FunctionInfo] = [] for node in ast.iter_child_nodes(tree): if not isinstance(node, ast.FunctionDef): continue # Skip private functions if node.name.startswith("_"): continue # Build signature: arg1, arg2=default, *args, **kwargs args = [] for arg in node.args.args: args.append(arg.arg) if node.args.vararg: args.append(f"*{node.args.vararg.arg}") if node.args.kwarg: args.append(f"**{node.args.kwarg.arg}") # Get first line of docstring summary = "" if (node.body and isinstance(node.body[0], ast.Expr) and isinstance(node.body[0].value, ast.Constant) and isinstance(node.body[0].value.value, str)): raw = node.body[0].value.value.strip() summary = raw.split("\n")[0].strip() if len(summary) > 100: summary = summary[:97] + "..." funcs.append({ "name": node.name, "signature": ", ".join(args), "summary": summary, }) return funcs def parse_module(filepath: Path) -> Optional[ModuleInfo]: """Parse a Python file and return its module-level docstring and public functions.""" try: with open(filepath, "r", encoding="utf-8") as f: source = f.read() tree = ast.parse(source, filename=str(filepath)) except Exception as e: print(f"WARNING: Could not parse {filepath}: {e}", file=sys.stderr) return None # Module docstring module_doc = ast.get_docstring(tree) or "" module_doc = module_doc.strip().split("\n")[0] # first line only # Public functions functions = extract_functions_from_ast(tree, filepath.name) rel = filepath.relative_to(REPO_ROOT) return { "path": str(rel), "docstring": module_doc, "functions": functions, } # ─── Scanning ────────────────────────────────────────────────────────────────── def scan_scripts_dir(scripts_dir: Path) -> List[ModuleInfo]: """Scan all .py files in scripts/ and extract API info.""" modules: list[ModuleInfo] = [] for pyfile in sorted(scripts_dir.glob("*.py")): info = parse_module(pyfile) if info is not None: modules.append(info) return modules # ─── Markdown rendering ───────────────────────────────────────────────────────── def render_markdown(modules: List[ModuleInfo]) -> str: """Generate full docs/API.md content from the scanned modules.""" lines = [ "# Compounding Intelligence — Scripts API Reference", "", f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*", "", "This document auto-documents the public API surface of all scripts", "in `scripts/`. Each section covers one script: module purpose,", "public functions, and their signatures.", "", "---", "", ] for mod in modules: rel = mod["path"] name = Path(rel).stem # e.g. harvester lines.append(f"## `{rel}`") lines.append("") if mod["docstring"]: lines.append(mod["docstring"]) lines.append("") if mod["functions"]: lines.append("| Function | Signature | Description |") lines.append("|----------|-----------|-------------|") for fn in mod["functions"]: sig = fn["name"] + "(" + fn["signature"] + ")" desc = fn["summary"] or "-" lines.append(f"| `{fn['name']}` | `{sig}` | {desc} |") lines.append("") else: lines.append("*(no public functions — script runs as `main()` only)*") lines.append("") lines.extend([ "", "---", "", f"**Total scripts documented:** {len(modules)}", "", "*Generated by `scripts/api_doc_generator.py` (Issue #98)*", ]) return "\n".join(lines) # ─── JSON output (optional, for automation) ─────────────────────────────────── def render_json(modules: List[ModuleInfo]) -> str: """Emit machine-readable JSON version of the API reference.""" import json payload = { "generated_at": datetime.now(timezone.utc).isoformat(), "generator": "scripts/api_doc_generator.py", "repo": "Timmy_Foundation/compounding-intelligence", "modules": modules, } return json.dumps(payload, indent=2) # ─── Main ────────────────────────────────────────────────────────────────────── def main() -> int: import argparse parser = argparse.ArgumentParser(description="Generate API docs for scripts/") parser.add_argument("--check", action="store_true", help="Exit 1 if docs/API.md is out-of-date") parser.add_argument("--json", action="store_true", help="Emit JSON to stdout instead of writing markdown") args = parser.parse_args() modules = scan_scripts_dir(SCRIPTS_DIR) modules.sort(key=lambda m: m["path"]) if args.json: print(render_json(modules)) return 0 md = render_markdown(modules) if args.check: if OUTPUT_PATH.exists(): existing = OUTPUT_PATH.read_text(encoding="utf-8") if existing == md: print("✅ docs/API.md is up-to-date") return 0 print("❌ docs/API.md is missing or out-of-date — regenerate with " "`python3 scripts/api_doc_generator.py`", file=sys.stderr) return 1 DOCS_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_PATH.write_text(md, encoding="utf-8") print(f"✅ Wrote {OUTPUT_PATH} ({len(modules)} modules documented)") return 0 if __name__ == "__main__": sys.exit(main())