#!/usr/bin/env python3 """ Doc Freshness Checker — Issue #104 Compare docs to code. Flag docs that reference removed functions or outdated APIs. Usage: python3 scripts/doc_freshness.py [--root .] [--docs-dir .] [--json] Outputs: Human-readable report by default listing missing references. JSON output with --json for machine consumption. """ import argparse import ast import json import os import re import sys from datetime import datetime, timezone from pathlib import Path from typing import Set, List, Tuple, Dict, Any def collect_python_symbols(repo_root: str) -> Set[str]: """Collect all top-level function and class names from Python files.""" symbols: Set[str] = set() for root, dirs, files in os.walk(repo_root): # Skip irrelevant dirs dirs[:] = [d for d in dirs if d not in ['.git', '__pycache__', '.venv', 'venv', 'node_modules']] for fname in files: if fname.endswith('.py'): path = os.path.join(root, fname) try: with open(path, 'r', encoding='utf-8') as f: tree = ast.parse(f.read()) for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): symbols.add(node.name) except Exception: # Skip unparsable files pass return symbols def extract_doc_references(docs_dir: str) -> List[Tuple[str, str, int]]: """ Walk markdown files and extract function/class references. Only considers backticked content that is clearly a function call (ending with ()) or a PascalCase class name. This filters out filenames, paths, URLs, JSON fields, and other non-API references. """ refs: List[Tuple[str, str, int]] = [] backtick_pat = re.compile(r'`([^`]+)`') func_pat = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') class_pat = re.compile(r'^[A-Z][a-zA-Z0-9_]*$') for root, dirs, files in os.walk(docs_dir): dirs[:] = [d for d in dirs if d != '.git'] for fname in files: if not fname.endswith('.md'): continue path = os.path.join(root, fname) rel_path = os.path.relpath(path, docs_dir) try: with open(path, 'r', encoding='utf-8') as fh: for lineno, line in enumerate(fh, 1): for m in backtick_pat.finditer(line): raw = m.group(1).strip() # Function call: ends with () if raw.endswith('()'): name = raw[:-2].strip() if func_pat.fullmatch(name): refs.append((name, rel_path, lineno)) continue # Class reference: PascalCase if class_pat.fullmatch(raw): refs.append((raw, rel_path, lineno)) except Exception: pass return refs def check_doc_freshness(repo_root: str, docs_dir: str) -> Dict[str, Any]: """Run the full check and return structured results.""" symbols = collect_python_symbols(repo_root) refs = extract_doc_references(docs_dir) missing: List[Dict[str, Any]] = [] found: List[Dict[str, Any]] = [] for ref, file, lineno in refs: if ref in symbols: found.append({"reference": ref, "file": file, "line": lineno}) else: missing.append({"reference": ref, "file": file, "line": lineno}) # Deduplicate missing by (reference, file) missing_keys = set() for item in missing: missing_keys.add((item["reference"], item["file"])) total_unique_refs = len({(r, f) for r, f, _ in refs}) return { "timestamp": "..", # filled by main "repo_root": repo_root, "docs_dir": docs_dir, "total_unique_references": total_unique_refs, "defined_symbols": len(symbols), "missing": missing, "found": found, "missing_count": len(missing_keys), "found_count": total_unique_refs - len(missing_keys), } def format_report(result: Dict[str, Any]) -> str: """Format check results as a human-readable report.""" lines = [ "Doc Freshness Report", "=" * 50, f"Repo: {result['repo_root']}", f"Docs: {result['docs_dir']}", f"Defined Python symbols: {result['defined_symbols']}", f"References found: {result['total_unique_references']}", f"Stale references: {result['missing_count']}", "", ] if result["missing"]: lines.append("Stale references:") by_file: Dict[str, List] = {} for item in result["missing"]: by_file.setdefault(item["file"], []).append(item) for fname in sorted(by_file): lines.append(f"\n {fname}:") for item in by_file[fname]: lines.append(f" line {item['line']}: {item['reference']}") else: lines.append("All references are current.") lines.append("") lines.append("Note: Only backticked function calls () and PascalCase class names are checked.") return "\n".join(lines) def main() -> None: parser = argparse.ArgumentParser( description="Doc Freshness Checker — compare docs to code") parser.add_argument("--root", default=".", help="Repository root (code location)") parser.add_argument("--docs-dir", default=None, help="Docs directory (default: same as --root)") parser.add_argument("--json", action="store_true", help="Machine-readable output") args = parser.parse_args() docs_dir = args.docs_dir or args.root result = check_doc_freshness(args.root, docs_dir) result["timestamp"] = datetime.now(timezone.utc).isoformat() if args.json: print(json.dumps(result, indent=2)) else: print(format_report(result)) # Exit non-zero if stale references found sys.exit(1 if result["missing_count"] > 0 else 0) if __name__ == "__main__": main()