diff --git a/scripts/doc_freshness.py b/scripts/doc_freshness.py new file mode 100755 index 0000000..74d906d --- /dev/null +++ b/scripts/doc_freshness.py @@ -0,0 +1,176 @@ +#!/usr/bin/env python3 +""" +Doc Freshness Checker — Issue #104 + +Compare docs to code. Flag docs that reference removed functions or outdated APIs. + +Usage: + python3 scripts/doc_freshness.py [--root .] [--docs-dir .] [--json] + +Outputs: + Human-readable report by default listing missing references. + JSON output with --json for machine consumption. + +""" + +import argparse +import ast +import json +import os +import re +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Set, List, Tuple, Dict, Any + + +def collect_python_symbols(repo_root: str) -> Set[str]: + """Collect all top-level function and class names from Python files.""" + symbols: Set[str] = set() + for root, dirs, files in os.walk(repo_root): + # Skip irrelevant dirs + dirs[:] = [d for d in dirs if d not in ['.git', '__pycache__', '.venv', 'venv', 'node_modules']] + for fname in files: + if fname.endswith('.py'): + path = os.path.join(root, fname) + try: + with open(path, 'r', encoding='utf-8') as f: + tree = ast.parse(f.read()) + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + symbols.add(node.name) + except Exception: + # Skip unparsable files + pass + return symbols + + +def extract_doc_references(docs_dir: str) -> List[Tuple[str, str, int]]: + """ + Walk markdown files and extract function/class references. + + Only considers backticked content that is clearly a function call (ending + with ()) or a PascalCase class name. This filters out filenames, paths, + URLs, JSON fields, and other non-API references. + """ + refs: List[Tuple[str, str, int]] = [] + backtick_pat = re.compile(r'`([^`]+)`') + func_pat = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$') + class_pat = re.compile(r'^[A-Z][a-zA-Z0-9_]*$') + + for root, dirs, files in os.walk(docs_dir): + dirs[:] = [d for d in dirs if d != '.git'] + for fname in files: + if not fname.endswith('.md'): + continue + path = os.path.join(root, fname) + rel_path = os.path.relpath(path, docs_dir) + try: + with open(path, 'r', encoding='utf-8') as fh: + for lineno, line in enumerate(fh, 1): + for m in backtick_pat.finditer(line): + raw = m.group(1).strip() + # Function call: ends with () + if raw.endswith('()'): + name = raw[:-2].strip() + if func_pat.fullmatch(name): + refs.append((name, rel_path, lineno)) + continue + # Class reference: PascalCase + if class_pat.fullmatch(raw): + refs.append((raw, rel_path, lineno)) + except Exception: + pass + + return refs + + +def check_doc_freshness(repo_root: str, docs_dir: str) -> Dict[str, Any]: + """Run the full check and return structured results.""" + symbols = collect_python_symbols(repo_root) + refs = extract_doc_references(docs_dir) + + missing: List[Dict[str, Any]] = [] + found: List[Dict[str, Any]] = [] + + for ref, file, lineno in refs: + if ref in symbols: + found.append({"reference": ref, "file": file, "line": lineno}) + else: + missing.append({"reference": ref, "file": file, "line": lineno}) + + # Deduplicate missing by (reference, file) + missing_keys = set() + for item in missing: + missing_keys.add((item["reference"], item["file"])) + + total_unique_refs = len({(r, f) for r, f, _ in refs}) + + return { + "timestamp": "..", # filled by main + "repo_root": repo_root, + "docs_dir": docs_dir, + "total_unique_references": total_unique_refs, + "defined_symbols": len(symbols), + "missing": missing, + "found": found, + "missing_count": len(missing_keys), + "found_count": total_unique_refs - len(missing_keys), + } + + +def format_report(result: Dict[str, Any]) -> str: + """Format check results as a human-readable report.""" + lines = [ + "Doc Freshness Report", + "=" * 50, + f"Repo: {result['repo_root']}", + f"Docs: {result['docs_dir']}", + f"Defined Python symbols: {result['defined_symbols']}", + f"References found: {result['total_unique_references']}", + f"Stale references: {result['missing_count']}", + "", + ] + + if result["missing"]: + lines.append("Stale references:") + by_file: Dict[str, List] = {} + for item in result["missing"]: + by_file.setdefault(item["file"], []).append(item) + for fname in sorted(by_file): + lines.append(f"\n {fname}:") + for item in by_file[fname]: + lines.append(f" line {item['line']}: {item['reference']}") + else: + lines.append("All references are current.") + + lines.append("") + lines.append("Note: Only backticked function calls () and PascalCase class names are checked.") + return "\n".join(lines) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Doc Freshness Checker — compare docs to code") + parser.add_argument("--root", default=".", help="Repository root (code location)") + parser.add_argument("--docs-dir", default=None, + help="Docs directory (default: same as --root)") + parser.add_argument("--json", action="store_true", help="Machine-readable output") + args = parser.parse_args() + + docs_dir = args.docs_dir or args.root + + result = check_doc_freshness(args.root, docs_dir) + result["timestamp"] = datetime.now(timezone.utc).isoformat() + + if args.json: + print(json.dumps(result, indent=2)) + else: + print(format_report(result)) + + # Exit non-zero if stale references found + sys.exit(1 if result["missing_count"] > 0 else 0) + + +if __name__ == "__main__": + main() diff --git a/tests/test_doc_freshness.py b/tests/test_doc_freshness.py new file mode 100755 index 0000000..75aaa16 --- /dev/null +++ b/tests/test_doc_freshness.py @@ -0,0 +1,89 @@ +#!/usr/bin/env python3 +"""Tests for scripts/doc_freshness.py — Issue #104.""" + +import os +import sys +import tempfile +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent / "scripts")) + +import doc_freshness as df + + +def test_collect_python_symbols(): + """Should collect function and class names from Python files.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Create a simple Python file + py_path = os.path.join(tmpdir, "sample.py") + with open(py_path, "w") as f: + f.write(''' +def my_func(): + pass + +class MyClass: + def method(self): + pass + +async def my_async(): + pass +''') + symbols = df.collect_python_symbols(tmpdir) + assert "my_func" in symbols + assert "MyClass" in symbols + assert "my_async" in symbols + # method (inside class) is also collected and should be considered valid + assert "method" in symbols + print("PASS: test_collect_python_symbols") + + +def test_extract_doc_references_function_and_class(): + """Should extract only function calls () and PascalCase class refs.""" + with tempfile.TemporaryDirectory() as tmpdir: + docs = os.path.join(tmpdir, "docs") + os.makedirs(docs) + md_path = os.path.join(docs, "test.md") + with open(md_path, "w") as f: + f.write(''' +# Test + +`call_this()` is a function. +`SomeClass` is a class. +`not_a_function` (lowercase, no parens) should be ignored. +`filename.py` should be ignored. +`https://example.com` ignored. +''') + refs = df.extract_doc_references(docs) + names = [r[0] for r in refs] + assert "call_this" in names + assert "SomeClass" in names + assert "not_a_function" not in names + assert "filename" not in names # filename.py filtered + assert "https" not in names + print("PASS: test_extract_doc_references_function_and_class") + + +def test_check_doc_freshness_missing_detection(): + """Should detect missing symbols.""" + with tempfile.TemporaryDirectory() as tmpdir: + # Code with one function + code_dir = os.path.join(tmpdir, "code") + os.makedirs(code_dir) + with open(os.path.join(code_dir, "a.py"), "w") as f: + f.write("def existing_func(): pass\n") + # Docs reference existing_func and missing_func + docs_dir = os.path.join(tmpdir, "docs") + os.makedirs(docs_dir) + with open(os.path.join(docs_dir, "readme.md"), "w") as f: + f.write("`existing_func()` and `missing_func()` are mentioned.") + result = df.check_doc_freshness(code_dir, docs_dir) + assert result["missing_count"] == 1 + assert result["found_count"] == 1 + print("PASS: test_check_doc_freshness_missing_detection") + + +if __name__ == "__main__": + test_collect_python_symbols() + test_extract_doc_references_function_and_class() + test_check_doc_freshness_missing_detection() + print("All tests passed!")