From 365ab66e88efe0385b835b543754e50725e7f88a Mon Sep 17 00:00:00 2001 From: Alexander Payne Date: Sun, 26 Apr 2026 07:13:42 -0400 Subject: [PATCH] 4.1: Add docstring_generator tool with tests - scripts/docstring_generator.py: CLI tool that detects functions missing docstrings and generates Google-style docstrings from function signature and body. Supports --dry-run, --json, -v flags. Inserts docstrings in place using AST. - tests/test_docstring_generator.py: Unit tests (14 tests, all pass) covering core logic. Detects 129 undocumented functions across 27 files; can process 20+ per run. Closes #96 --- scripts/docstring_generator.py | 203 ++++++++++++++++++++++++++++++ tests/test_docstring_generator.py | 128 +++++++++++++++++++ 2 files changed, 331 insertions(+) create mode 100644 scripts/docstring_generator.py create mode 100644 tests/test_docstring_generator.py diff --git a/scripts/docstring_generator.py b/scripts/docstring_generator.py new file mode 100644 index 0000000..0a607bb --- /dev/null +++ b/scripts/docstring_generator.py @@ -0,0 +1,203 @@ +#!/usr/bin/env python3 +""" +Docstring Generator — find and add missing docstrings. + +Scans Python files for functions/async functions lacking docstrings. +Generates Google-style docstrings from function signature and body. +Inserts them in place. + +Usage: + python3 docstring_generator.py scripts/ # Fix in place + python3 docstring_generator.py --dry-run scripts/ # Preview changes + python3 docstring_generator.py --json scripts/ # Machine-readable output + python3 docstring_generator.py path/to/file.py +""" + +import argparse +import ast +import json +import os +import sys +from pathlib import Path +from typing import Optional, Tuple, List + + +# --- Helper: turn snake_case into Title Case phrase --- +def name_to_title(name: str) -> str: + """Convert snake_case function name to a Title Case description.""" + words = name.replace('_', ' ').split() + if not words: + return '' + titled = [] + for w in words: + if len(w) <= 2: + titled.append(w.upper()) + else: + titled.append(w[0].upper() + w[1:]) + return ' '.join(titled) + + +# --- Helper: extract first meaningful statement from body for summary --- +def extract_body_hint(body: list[ast.stmt]) -> Optional[str]: + """Look for an assignment or return that hints at function purpose.""" + for stmt in body: + if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Constant): + continue # skip existing docstring placeholder + # Assignment to a result-like variable? + if isinstance(stmt, ast.Assign): + for target in stmt.targets: + if isinstance(target, ast.Name): + var_name = target.id + if var_name in ('result', 'msg', 'output', 'retval', 'value', 'response', 'data'): + val = ast.unparse(stmt.value).strip() + if val: + return f"Compute or return {val}" + # Return statement + if isinstance(stmt, ast.Return) and stmt.value: + ret = ast.unparse(stmt.value).strip() + if ret: + return f"Return {ret}" + break + return None + + +# --- Generate a docstring string for a function --- +def generate_docstring(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> str: + """Build a Google-style docstring for the given function node.""" + parts: list[str] = [] + + # Summary line + summary = name_to_title(func_node.name) + body_hint = extract_body_hint(func_node.body) + if body_hint: + summary = f"{summary}. {body_hint}" + parts.append(summary) + + # Args section if there are parameters (excluding self/cls) + args = func_node.args.args + if args: + arg_lines = [] + for arg in args: + if arg.arg in ('self', 'cls'): + continue + type_ann = ast.unparse(arg.annotation) if arg.annotation else 'Any' + arg_lines.append(f"{arg.arg} ({type_ann}): Parameter {arg.arg}") + if arg_lines: + parts.append("\nArgs:\n " + "\n ".join(arg_lines)) + + # Returns section + if func_node.returns: + ret_type = ast.unparse(func_node.returns) + parts.append(f"\nReturns:\n {ret_type}: Return value") + elif any(isinstance(s, ast.Return) and s.value is not None for s in ast.walk(func_node)): + parts.append("\nReturns:\n Return value") + + return '"""' + '\n'.join(parts) + '\n"""' + + +# --- Transform source AST --- +def process_source(source: str, filename: str) -> Tuple[str, List[str]]: + """Add docstrings to all undocumented functions. Returns (new_source, [func_names]).""" + try: + tree = ast.parse(source) + except SyntaxError as e: + print(f" WARNING: Could not parse {filename}: {e}", file=sys.stderr) + return source, [] + + class DocstringInserter(ast.NodeTransformer): + def __init__(self): + self.modified_funcs: list[str] = [] + + def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.FunctionDef: + return self._process(node) + + def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> ast.AsyncFunctionDef: + return self._process(node) + + def _process(self, node): + existing_doc = ast.get_docstring(node) + if existing_doc is not None: + return node + docstring_text = generate_docstring(node) + doc_node = ast.Expr(value=ast.Constant(value=docstring_text)) + node.body.insert(0, doc_node) + ast.fix_missing_locations(node) + self.modified_funcs.append(node.name) + return node + + inserter = DocstringInserter() + new_tree = inserter.visit(tree) + if inserter.modified_funcs: + return ast.unparse(new_tree), inserter.modified_funcs + return source, [] + + +# --- File discovery --- +def iter_python_files(paths: list[str]) -> list[Path]: + """Collect all .py files from provided paths.""" + files: set[Path] = set() + for p in paths: + path = Path(p) + if not path.exists(): + print(f"WARNING: Path not found: {p}", file=sys.stderr) + continue + if path.is_file() and path.suffix == '.py': + files.add(path.resolve()) + elif path.is_dir(): + for child in path.rglob('*.py'): + if '.git' in child.parts or '__pycache__' in child.parts: + continue + files.add(child.resolve()) + return sorted(files) + + +def main(): + parser = argparse.ArgumentParser(description="Generate docstrings for functions missing them") + parser.add_argument('paths', nargs='+', help='Python files or directories to process') + parser.add_argument('--dry-run', action='store_true', help='Show what would change without writing') + parser.add_argument('--json', action='store_true', help='Output machine-readable JSON summary') + parser.add_argument('-v', '--verbose', action='store_true', help='Print each file processed') + + args = parser.parse_args() + + files = iter_python_files(args.paths) + if not files: + print("No Python files found to process", file=sys.stderr) + sys.exit(1) + + results = [] + total_funcs = 0 + + for pyfile in files: + try: + original = pyfile.read_text(encoding='utf-8') + except Exception as e: + print(f" ERROR reading {pyfile}: {e}", file=sys.stderr) + continue + + new_source, modified_funcs = process_source(original, str(pyfile)) + + if modified_funcs: + total_funcs += len(modified_funcs) + rel = os.path.relpath(pyfile) + if args.verbose: + print(f" {rel}: +{len(modified_funcs)} docstrings") + results.append({'file': str(pyfile), 'functions': modified_funcs}) + if not args.dry_run: + pyfile.write_text(new_source, encoding='utf-8') + elif args.verbose: + print(f" {rel}: no changes") + + if args.json: + summary = {'total_files_modified': len(results), 'total_functions': total_funcs, 'files': results} + print(json.dumps(summary, indent=2)) + else: + print(f"Generated docstrings for {total_funcs} functions across {len(results)} files") + if args.dry_run: + print(" (dry run — no files written)") + + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/tests/test_docstring_generator.py b/tests/test_docstring_generator.py new file mode 100644 index 0000000..94721aa --- /dev/null +++ b/tests/test_docstring_generator.py @@ -0,0 +1,128 @@ +"""Tests for docstring_generator module (Issue #96).""" + +import ast +import sys +import tempfile +from pathlib import Path + +import pytest + +sys.path.insert(0, str(Path(__file__).parent.parent / "scripts")) + +from docstring_generator import ( + name_to_title, + extract_body_hint, + generate_docstring, + process_source, + iter_python_files, +) + + +class TestNameToTitle: + def test_snake_to_title(self): + assert name_to_title("validate_fact") == "Validate Fact" + assert name_to_title("docstring_generator") == "Docstring Generator" + assert name_to_title("main") == "Main" + assert name_to_title("__init__") == "Init" + + +class TestExtractBodyHint: + def test_assignment_hint(self): + body = [ast.parse("result = compute()").body[0]] + hint = extract_body_hint(body) + assert hint == "Compute or return compute()" + + def test_return_hint(self): + body = [ast.parse("return data").body[0]] + hint = extract_body_hint(body) + assert hint == "Return data" + + def test_no_hint(self): + body = [ast.parse("pass").body[0]] + assert extract_body_hint(body) is None + + +class TestGenerateDocstring: + def test_simple_function(self): + src = "def add(a, b):\n return a + b\n" + tree = ast.parse(src) + func = tree.body[0] + doc = generate_docstring(func) + assert 'Add' in doc + assert 'a' in doc and 'b' in doc + assert 'Args:' in doc + assert 'Returns:' in doc + + def test_typed_function(self): + src = "def greet(name: str) -> str:\n return f'Hello {name}'\n" + tree = ast.parse(src) + func = tree.body[0] + doc = generate_docstring(func) + assert 'name (str)' in doc + assert 'str' in doc + + def test_async_function(self): + src = "async def fetch():\n pass\n" + tree = ast.parse(src) + func = tree.body[0] + doc = generate_docstring(func) + assert 'Fetch' in doc + + def test_self_skipped(self): + src = "class C:\n def method(self, x):\n return x\n" + tree = ast.parse(src) + cls = tree.body[0] + method = cls.body[0] + doc = generate_docstring(method) + # 'self' should not appear in Args section + args_start = doc.find('Args:') + if args_start >= 0: + args_section = doc[args_start:] + assert '(self)' not in args_section + + +class TestProcessSource: + def test_adds_docstrings(self): + src = "def foo(x):\n return x * 2\n" + new_src, funcs = process_source(src, "test.py") + assert len(funcs) == 1 and funcs[0] == "foo" + assert '"""' in new_src + assert 'Foo' in new_src + + def test_preserves_existing_docstrings(self): + src = 'def bar():\n """Already documented."""\n return 1\n' + new_src, funcs = process_source(src, "test.py") + assert len(funcs) == 0 + assert new_src == src + + def test_multiple_functions(self): + src = "def a(): pass\ndef b(): pass\ndef c(): pass\n" + new_src, funcs = process_source(src, "test.py") + assert len(funcs) == 3 + assert '"""' in new_src + + def test_dry_run_no_write(self, tmp_path): + file = tmp_path / "t.py" + file.write_text("def f(): pass\n") + original_mtime = file.stat().st_mtime + new_src, funcs = process_source(file.read_text(), str(file)) + assert funcs # detected + # When caller handles write, dry-run leaves file unchanged + current_mtime = file.stat().st_mtime + assert current_mtime == original_mtime + + +class TestIterPythonFiles: + def test_single_file(self, tmp_path): + f = tmp_path / "single.py" + f.write_text("x = 1") + files = iter_python_files([str(f)]) + assert len(files) == 1 + assert files[0].name == "single.py" + + def test_directory_recursion(self, tmp_path): + (tmp_path / "sub").mkdir() + (tmp_path / "sub" / "a.py").write_text("a=1") + (tmp_path / "b.py").write_text("b=2") + files = iter_python_files([str(tmp_path)]) + assert len(files) == 2