Some checks failed
Test / pytest (pull_request) Failing after 7s
- scripts/api_doc_generator.py: AST-based scanner for scripts/ Python modules - docs/API.md: generated API reference (33 modules, ~500 lines) - tests/test_api_doc_generator.py: 12 smoke tests (all passing) The generator extracts module docstrings and public function signatures (name, args, summary) and produces a markdown table per script. One consolidated document per repo (docs/API.md). Closes #98
220 lines
8.1 KiB
Python
220 lines
8.1 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
API Doc Generator — Issue #98
|
|
|
|
Scans all Python modules in `scripts/`, extracts their public API surface
|
|
(module docstring + public function signatures + first-line doc summaries),
|
|
and produces a single markdown reference document at `docs/API.md`.
|
|
|
|
Usage:
|
|
python3 scripts/api_doc_generator.py # Write docs/API.md
|
|
python3 scripts/api_doc_generator.py --check # Verify docs/API.md is up-to-date
|
|
python3 scripts/api_doc_generator.py --json # Emit JSON for downstream tooling
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import ast
|
|
import os
|
|
import sys
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import TypedDict, List, Optional
|
|
|
|
|
|
# ─── Paths ────────────────────────────────────────────────────────────────────
|
|
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
REPO_ROOT = SCRIPT_DIR.parent
|
|
SCRIPTS_DIR = REPO_ROOT / "scripts"
|
|
DOCS_DIR = REPO_ROOT / "docs"
|
|
OUTPUT_PATH = DOCS_DIR / "API.md"
|
|
|
|
|
|
# ─── Data structures ───────────────────────────────────────────────────────────
|
|
class FunctionInfo(TypedDict):
|
|
name: str
|
|
signature: str
|
|
summary: str
|
|
|
|
|
|
class ModuleInfo(TypedDict):
|
|
path: str # relative to repo root, e.g. "scripts/harvester.py"
|
|
docstring: str
|
|
functions: List[FunctionInfo]
|
|
|
|
|
|
# ─── AST extraction ────────────────────────────────────────────────────────────
|
|
def extract_functions_from_ast(tree: ast.AST, file_rel: str) -> List[FunctionInfo]:
|
|
"""Extract public function names, signatures, and first-line doc summaries."""
|
|
funcs: list[FunctionInfo] = []
|
|
|
|
for node in ast.iter_child_nodes(tree):
|
|
if not isinstance(node, ast.FunctionDef):
|
|
continue
|
|
# Skip private functions
|
|
if node.name.startswith("_"):
|
|
continue
|
|
|
|
# Build signature: arg1, arg2=default, *args, **kwargs
|
|
args = []
|
|
for arg in node.args.args:
|
|
args.append(arg.arg)
|
|
if node.args.vararg:
|
|
args.append(f"*{node.args.vararg.arg}")
|
|
if node.args.kwarg:
|
|
args.append(f"**{node.args.kwarg.arg}")
|
|
|
|
# Get first line of docstring
|
|
summary = ""
|
|
if (node.body and isinstance(node.body[0], ast.Expr) and
|
|
isinstance(node.body[0].value, ast.Constant) and
|
|
isinstance(node.body[0].value.value, str)):
|
|
raw = node.body[0].value.value.strip()
|
|
summary = raw.split("\n")[0].strip()
|
|
if len(summary) > 100:
|
|
summary = summary[:97] + "..."
|
|
|
|
funcs.append({
|
|
"name": node.name,
|
|
"signature": ", ".join(args),
|
|
"summary": summary,
|
|
})
|
|
|
|
return funcs
|
|
|
|
|
|
def parse_module(filepath: Path) -> Optional[ModuleInfo]:
|
|
"""Parse a Python file and return its module-level docstring and public functions."""
|
|
try:
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
source = f.read()
|
|
tree = ast.parse(source, filename=str(filepath))
|
|
except Exception as e:
|
|
print(f"WARNING: Could not parse {filepath}: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
# Module docstring
|
|
module_doc = ast.get_docstring(tree) or ""
|
|
module_doc = module_doc.strip().split("\n")[0] # first line only
|
|
|
|
# Public functions
|
|
functions = extract_functions_from_ast(tree, filepath.name)
|
|
|
|
rel = filepath.relative_to(REPO_ROOT)
|
|
return {
|
|
"path": str(rel),
|
|
"docstring": module_doc,
|
|
"functions": functions,
|
|
}
|
|
|
|
|
|
# ─── Scanning ──────────────────────────────────────────────────────────────────
|
|
def scan_scripts_dir(scripts_dir: Path) -> List[ModuleInfo]:
|
|
"""Scan all .py files in scripts/ and extract API info."""
|
|
modules: list[ModuleInfo] = []
|
|
for pyfile in sorted(scripts_dir.glob("*.py")):
|
|
info = parse_module(pyfile)
|
|
if info is not None:
|
|
modules.append(info)
|
|
return modules
|
|
|
|
|
|
# ─── Markdown rendering ─────────────────────────────────────────────────────────
|
|
def render_markdown(modules: List[ModuleInfo]) -> str:
|
|
"""Generate full docs/API.md content from the scanned modules."""
|
|
lines = [
|
|
"# Compounding Intelligence — Scripts API Reference",
|
|
"",
|
|
f"*Generated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}*",
|
|
"",
|
|
"This document auto-documents the public API surface of all scripts",
|
|
"in `scripts/`. Each section covers one script: module purpose,",
|
|
"public functions, and their signatures.",
|
|
"",
|
|
"---",
|
|
"",
|
|
]
|
|
|
|
for mod in modules:
|
|
rel = mod["path"]
|
|
name = Path(rel).stem # e.g. harvester
|
|
lines.append(f"## `{rel}`")
|
|
lines.append("")
|
|
if mod["docstring"]:
|
|
lines.append(mod["docstring"])
|
|
lines.append("")
|
|
|
|
if mod["functions"]:
|
|
lines.append("| Function | Signature | Description |")
|
|
lines.append("|----------|-----------|-------------|")
|
|
for fn in mod["functions"]:
|
|
sig = fn["name"] + "(" + fn["signature"] + ")"
|
|
desc = fn["summary"] or "-"
|
|
lines.append(f"| `{fn['name']}` | `{sig}` | {desc} |")
|
|
lines.append("")
|
|
else:
|
|
lines.append("*(no public functions — script runs as `main()` only)*")
|
|
lines.append("")
|
|
|
|
lines.extend([
|
|
"",
|
|
"---",
|
|
"",
|
|
f"**Total scripts documented:** {len(modules)}",
|
|
"",
|
|
"*Generated by `scripts/api_doc_generator.py` (Issue #98)*",
|
|
])
|
|
return "\n".join(lines)
|
|
|
|
|
|
# ─── JSON output (optional, for automation) ───────────────────────────────────
|
|
def render_json(modules: List[ModuleInfo]) -> str:
|
|
"""Emit machine-readable JSON version of the API reference."""
|
|
import json
|
|
payload = {
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"generator": "scripts/api_doc_generator.py",
|
|
"repo": "Timmy_Foundation/compounding-intelligence",
|
|
"modules": modules,
|
|
}
|
|
return json.dumps(payload, indent=2)
|
|
|
|
|
|
# ─── Main ──────────────────────────────────────────────────────────────────────
|
|
def main() -> int:
|
|
import argparse
|
|
parser = argparse.ArgumentParser(description="Generate API docs for scripts/")
|
|
parser.add_argument("--check", action="store_true",
|
|
help="Exit 1 if docs/API.md is out-of-date")
|
|
parser.add_argument("--json", action="store_true",
|
|
help="Emit JSON to stdout instead of writing markdown")
|
|
args = parser.parse_args()
|
|
|
|
modules = scan_scripts_dir(SCRIPTS_DIR)
|
|
modules.sort(key=lambda m: m["path"])
|
|
|
|
if args.json:
|
|
print(render_json(modules))
|
|
return 0
|
|
|
|
md = render_markdown(modules)
|
|
|
|
if args.check:
|
|
if OUTPUT_PATH.exists():
|
|
existing = OUTPUT_PATH.read_text(encoding="utf-8")
|
|
if existing == md:
|
|
print("✅ docs/API.md is up-to-date")
|
|
return 0
|
|
print("❌ docs/API.md is missing or out-of-date — regenerate with "
|
|
"`python3 scripts/api_doc_generator.py`", file=sys.stderr)
|
|
return 1
|
|
|
|
DOCS_DIR.mkdir(parents=True, exist_ok=True)
|
|
OUTPUT_PATH.write_text(md, encoding="utf-8")
|
|
print(f"✅ Wrote {OUTPUT_PATH} ({len(modules)} modules documented)")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|