Compare commits
1 Commits
step35/112
...
step35/96-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
365ab66e88 |
@@ -1,112 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dependency Bloat Detector — find declared packages never imported
|
||||
|
||||
Usage:
|
||||
python3 scripts/dependency_bloat_detector.py
|
||||
python3 scripts/dependency_bloat_detector.py --output json
|
||||
"""
|
||||
|
||||
import ast
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Set, List, Tuple
|
||||
|
||||
|
||||
def extract_imports_from_py_files(repo_path: Path) -> Set[str]:
|
||||
"""Walk the repo and return the set of top-level imported module names."""
|
||||
imports = set()
|
||||
exclude_dirs = {".git", "venv", ".venv", "__pycache__", "node_modules",
|
||||
"dist", "build", ".tox", "vendor"}
|
||||
py_files = [
|
||||
f for f in repo_path.rglob("*.py")
|
||||
if not any(part in exclude_dirs for part in f.parts)
|
||||
]
|
||||
for fpath in py_files:
|
||||
try:
|
||||
content = fpath.read_text(errors="ignore")
|
||||
tree = ast.parse(content)
|
||||
except Exception:
|
||||
continue
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
top = alias.name.split('.')[0]
|
||||
imports.add(top)
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if node.module:
|
||||
top = node.module.split('.')[0]
|
||||
imports.add(top)
|
||||
return imports
|
||||
|
||||
|
||||
def parse_requirements_txt(req_path: Path) -> List[Tuple[str, str]]:
|
||||
"""
|
||||
Parse requirements.txt and return list of (package_name, raw_line).
|
||||
Strips version specifiers and ignores comments.
|
||||
"""
|
||||
if not req_path.exists():
|
||||
return []
|
||||
declared = []
|
||||
for line in req_path.read_text().splitlines():
|
||||
line = line.strip()
|
||||
if not line or line.startswith('#'):
|
||||
continue
|
||||
# Strip inline comments
|
||||
line = line.split('#')[0].strip()
|
||||
# Extract package name (before any version specifier)
|
||||
pkg_match = re.match(r'^([a-zA-Z0-9_-]+)', line)
|
||||
if pkg_match:
|
||||
pkg = pkg_match.group(1).strip()
|
||||
declared.append((pkg, line))
|
||||
return declared
|
||||
|
||||
|
||||
def main():
|
||||
repo_path = Path('.').resolve()
|
||||
req_path = repo_path / 'requirements.txt'
|
||||
|
||||
# 1. Scan imports
|
||||
used = extract_imports_from_py_files(repo_path)
|
||||
|
||||
# 2. Parse declared deps
|
||||
declared = parse_requirements_txt(req_path)
|
||||
declared_names = [pkg for pkg, _ in declared]
|
||||
|
||||
# 3. Compare
|
||||
unused = [(raw, pkg) for pkg, raw in declared if pkg not in used]
|
||||
missing_from_req = [imp for imp in used if imp not in declared_names]
|
||||
|
||||
# 4. Output
|
||||
print("=" * 60)
|
||||
print(" DEPENDENCY BLOAT DETECTOR")
|
||||
print("=" * 60)
|
||||
print(f" Repository: {repo_path.name}")
|
||||
print(f" Requirements: {req_path}")
|
||||
print(f" Python files: {len(list(repo_path.rglob('*.py')))}")
|
||||
print()
|
||||
print(f" Declared packages ({len(declared_names)}): {declared_names}")
|
||||
print(f" Imported packages ({len(used)}): {sorted(used)}")
|
||||
print()
|
||||
if unused:
|
||||
print(" UNUSED DEPENDENCIES (bloat):")
|
||||
for raw, pkg in unused:
|
||||
print(f" ✗ {raw}")
|
||||
else:
|
||||
print(" No unused dependencies detected.")
|
||||
print()
|
||||
if missing_from_req:
|
||||
print(" UNDECLARED IMPORTS (used but not in requirements.txt):")
|
||||
for imp in missing_from_req:
|
||||
print(f" ! {imp}")
|
||||
print()
|
||||
print("=" * 60)
|
||||
|
||||
# Exit code: 0 if no bloat, 1 if unused deps found
|
||||
sys.exit(1 if unused else 0)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
203
scripts/docstring_generator.py
Normal file
203
scripts/docstring_generator.py
Normal file
@@ -0,0 +1,203 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Docstring Generator — find and add missing docstrings.
|
||||
|
||||
Scans Python files for functions/async functions lacking docstrings.
|
||||
Generates Google-style docstrings from function signature and body.
|
||||
Inserts them in place.
|
||||
|
||||
Usage:
|
||||
python3 docstring_generator.py scripts/ # Fix in place
|
||||
python3 docstring_generator.py --dry-run scripts/ # Preview changes
|
||||
python3 docstring_generator.py --json scripts/ # Machine-readable output
|
||||
python3 docstring_generator.py path/to/file.py
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import ast
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import Optional, Tuple, List
|
||||
|
||||
|
||||
# --- Helper: turn snake_case into Title Case phrase ---
|
||||
def name_to_title(name: str) -> str:
|
||||
"""Convert snake_case function name to a Title Case description."""
|
||||
words = name.replace('_', ' ').split()
|
||||
if not words:
|
||||
return ''
|
||||
titled = []
|
||||
for w in words:
|
||||
if len(w) <= 2:
|
||||
titled.append(w.upper())
|
||||
else:
|
||||
titled.append(w[0].upper() + w[1:])
|
||||
return ' '.join(titled)
|
||||
|
||||
|
||||
# --- Helper: extract first meaningful statement from body for summary ---
|
||||
def extract_body_hint(body: list[ast.stmt]) -> Optional[str]:
|
||||
"""Look for an assignment or return that hints at function purpose."""
|
||||
for stmt in body:
|
||||
if isinstance(stmt, ast.Expr) and isinstance(stmt.value, ast.Constant):
|
||||
continue # skip existing docstring placeholder
|
||||
# Assignment to a result-like variable?
|
||||
if isinstance(stmt, ast.Assign):
|
||||
for target in stmt.targets:
|
||||
if isinstance(target, ast.Name):
|
||||
var_name = target.id
|
||||
if var_name in ('result', 'msg', 'output', 'retval', 'value', 'response', 'data'):
|
||||
val = ast.unparse(stmt.value).strip()
|
||||
if val:
|
||||
return f"Compute or return {val}"
|
||||
# Return statement
|
||||
if isinstance(stmt, ast.Return) and stmt.value:
|
||||
ret = ast.unparse(stmt.value).strip()
|
||||
if ret:
|
||||
return f"Return {ret}"
|
||||
break
|
||||
return None
|
||||
|
||||
|
||||
# --- Generate a docstring string for a function ---
|
||||
def generate_docstring(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
||||
"""Build a Google-style docstring for the given function node."""
|
||||
parts: list[str] = []
|
||||
|
||||
# Summary line
|
||||
summary = name_to_title(func_node.name)
|
||||
body_hint = extract_body_hint(func_node.body)
|
||||
if body_hint:
|
||||
summary = f"{summary}. {body_hint}"
|
||||
parts.append(summary)
|
||||
|
||||
# Args section if there are parameters (excluding self/cls)
|
||||
args = func_node.args.args
|
||||
if args:
|
||||
arg_lines = []
|
||||
for arg in args:
|
||||
if arg.arg in ('self', 'cls'):
|
||||
continue
|
||||
type_ann = ast.unparse(arg.annotation) if arg.annotation else 'Any'
|
||||
arg_lines.append(f"{arg.arg} ({type_ann}): Parameter {arg.arg}")
|
||||
if arg_lines:
|
||||
parts.append("\nArgs:\n " + "\n ".join(arg_lines))
|
||||
|
||||
# Returns section
|
||||
if func_node.returns:
|
||||
ret_type = ast.unparse(func_node.returns)
|
||||
parts.append(f"\nReturns:\n {ret_type}: Return value")
|
||||
elif any(isinstance(s, ast.Return) and s.value is not None for s in ast.walk(func_node)):
|
||||
parts.append("\nReturns:\n Return value")
|
||||
|
||||
return '"""' + '\n'.join(parts) + '\n"""'
|
||||
|
||||
|
||||
# --- Transform source AST ---
|
||||
def process_source(source: str, filename: str) -> Tuple[str, List[str]]:
|
||||
"""Add docstrings to all undocumented functions. Returns (new_source, [func_names])."""
|
||||
try:
|
||||
tree = ast.parse(source)
|
||||
except SyntaxError as e:
|
||||
print(f" WARNING: Could not parse {filename}: {e}", file=sys.stderr)
|
||||
return source, []
|
||||
|
||||
class DocstringInserter(ast.NodeTransformer):
|
||||
def __init__(self):
|
||||
self.modified_funcs: list[str] = []
|
||||
|
||||
def visit_FunctionDef(self, node: ast.FunctionDef) -> ast.FunctionDef:
|
||||
return self._process(node)
|
||||
|
||||
def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> ast.AsyncFunctionDef:
|
||||
return self._process(node)
|
||||
|
||||
def _process(self, node):
|
||||
existing_doc = ast.get_docstring(node)
|
||||
if existing_doc is not None:
|
||||
return node
|
||||
docstring_text = generate_docstring(node)
|
||||
doc_node = ast.Expr(value=ast.Constant(value=docstring_text))
|
||||
node.body.insert(0, doc_node)
|
||||
ast.fix_missing_locations(node)
|
||||
self.modified_funcs.append(node.name)
|
||||
return node
|
||||
|
||||
inserter = DocstringInserter()
|
||||
new_tree = inserter.visit(tree)
|
||||
if inserter.modified_funcs:
|
||||
return ast.unparse(new_tree), inserter.modified_funcs
|
||||
return source, []
|
||||
|
||||
|
||||
# --- File discovery ---
|
||||
def iter_python_files(paths: list[str]) -> list[Path]:
|
||||
"""Collect all .py files from provided paths."""
|
||||
files: set[Path] = set()
|
||||
for p in paths:
|
||||
path = Path(p)
|
||||
if not path.exists():
|
||||
print(f"WARNING: Path not found: {p}", file=sys.stderr)
|
||||
continue
|
||||
if path.is_file() and path.suffix == '.py':
|
||||
files.add(path.resolve())
|
||||
elif path.is_dir():
|
||||
for child in path.rglob('*.py'):
|
||||
if '.git' in child.parts or '__pycache__' in child.parts:
|
||||
continue
|
||||
files.add(child.resolve())
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate docstrings for functions missing them")
|
||||
parser.add_argument('paths', nargs='+', help='Python files or directories to process')
|
||||
parser.add_argument('--dry-run', action='store_true', help='Show what would change without writing')
|
||||
parser.add_argument('--json', action='store_true', help='Output machine-readable JSON summary')
|
||||
parser.add_argument('-v', '--verbose', action='store_true', help='Print each file processed')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
files = iter_python_files(args.paths)
|
||||
if not files:
|
||||
print("No Python files found to process", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
results = []
|
||||
total_funcs = 0
|
||||
|
||||
for pyfile in files:
|
||||
try:
|
||||
original = pyfile.read_text(encoding='utf-8')
|
||||
except Exception as e:
|
||||
print(f" ERROR reading {pyfile}: {e}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
new_source, modified_funcs = process_source(original, str(pyfile))
|
||||
|
||||
if modified_funcs:
|
||||
total_funcs += len(modified_funcs)
|
||||
rel = os.path.relpath(pyfile)
|
||||
if args.verbose:
|
||||
print(f" {rel}: +{len(modified_funcs)} docstrings")
|
||||
results.append({'file': str(pyfile), 'functions': modified_funcs})
|
||||
if not args.dry_run:
|
||||
pyfile.write_text(new_source, encoding='utf-8')
|
||||
elif args.verbose:
|
||||
print(f" {rel}: no changes")
|
||||
|
||||
if args.json:
|
||||
summary = {'total_files_modified': len(results), 'total_functions': total_funcs, 'files': results}
|
||||
print(json.dumps(summary, indent=2))
|
||||
else:
|
||||
print(f"Generated docstrings for {total_funcs} functions across {len(results)} files")
|
||||
if args.dry_run:
|
||||
print(" (dry run — no files written)")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
128
tests/test_docstring_generator.py
Normal file
128
tests/test_docstring_generator.py
Normal file
@@ -0,0 +1,128 @@
|
||||
"""Tests for docstring_generator module (Issue #96)."""
|
||||
|
||||
import ast
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
||||
|
||||
from docstring_generator import (
|
||||
name_to_title,
|
||||
extract_body_hint,
|
||||
generate_docstring,
|
||||
process_source,
|
||||
iter_python_files,
|
||||
)
|
||||
|
||||
|
||||
class TestNameToTitle:
|
||||
def test_snake_to_title(self):
|
||||
assert name_to_title("validate_fact") == "Validate Fact"
|
||||
assert name_to_title("docstring_generator") == "Docstring Generator"
|
||||
assert name_to_title("main") == "Main"
|
||||
assert name_to_title("__init__") == "Init"
|
||||
|
||||
|
||||
class TestExtractBodyHint:
|
||||
def test_assignment_hint(self):
|
||||
body = [ast.parse("result = compute()").body[0]]
|
||||
hint = extract_body_hint(body)
|
||||
assert hint == "Compute or return compute()"
|
||||
|
||||
def test_return_hint(self):
|
||||
body = [ast.parse("return data").body[0]]
|
||||
hint = extract_body_hint(body)
|
||||
assert hint == "Return data"
|
||||
|
||||
def test_no_hint(self):
|
||||
body = [ast.parse("pass").body[0]]
|
||||
assert extract_body_hint(body) is None
|
||||
|
||||
|
||||
class TestGenerateDocstring:
|
||||
def test_simple_function(self):
|
||||
src = "def add(a, b):\n return a + b\n"
|
||||
tree = ast.parse(src)
|
||||
func = tree.body[0]
|
||||
doc = generate_docstring(func)
|
||||
assert 'Add' in doc
|
||||
assert 'a' in doc and 'b' in doc
|
||||
assert 'Args:' in doc
|
||||
assert 'Returns:' in doc
|
||||
|
||||
def test_typed_function(self):
|
||||
src = "def greet(name: str) -> str:\n return f'Hello {name}'\n"
|
||||
tree = ast.parse(src)
|
||||
func = tree.body[0]
|
||||
doc = generate_docstring(func)
|
||||
assert 'name (str)' in doc
|
||||
assert 'str' in doc
|
||||
|
||||
def test_async_function(self):
|
||||
src = "async def fetch():\n pass\n"
|
||||
tree = ast.parse(src)
|
||||
func = tree.body[0]
|
||||
doc = generate_docstring(func)
|
||||
assert 'Fetch' in doc
|
||||
|
||||
def test_self_skipped(self):
|
||||
src = "class C:\n def method(self, x):\n return x\n"
|
||||
tree = ast.parse(src)
|
||||
cls = tree.body[0]
|
||||
method = cls.body[0]
|
||||
doc = generate_docstring(method)
|
||||
# 'self' should not appear in Args section
|
||||
args_start = doc.find('Args:')
|
||||
if args_start >= 0:
|
||||
args_section = doc[args_start:]
|
||||
assert '(self)' not in args_section
|
||||
|
||||
|
||||
class TestProcessSource:
|
||||
def test_adds_docstrings(self):
|
||||
src = "def foo(x):\n return x * 2\n"
|
||||
new_src, funcs = process_source(src, "test.py")
|
||||
assert len(funcs) == 1 and funcs[0] == "foo"
|
||||
assert '"""' in new_src
|
||||
assert 'Foo' in new_src
|
||||
|
||||
def test_preserves_existing_docstrings(self):
|
||||
src = 'def bar():\n """Already documented."""\n return 1\n'
|
||||
new_src, funcs = process_source(src, "test.py")
|
||||
assert len(funcs) == 0
|
||||
assert new_src == src
|
||||
|
||||
def test_multiple_functions(self):
|
||||
src = "def a(): pass\ndef b(): pass\ndef c(): pass\n"
|
||||
new_src, funcs = process_source(src, "test.py")
|
||||
assert len(funcs) == 3
|
||||
assert '"""' in new_src
|
||||
|
||||
def test_dry_run_no_write(self, tmp_path):
|
||||
file = tmp_path / "t.py"
|
||||
file.write_text("def f(): pass\n")
|
||||
original_mtime = file.stat().st_mtime
|
||||
new_src, funcs = process_source(file.read_text(), str(file))
|
||||
assert funcs # detected
|
||||
# When caller handles write, dry-run leaves file unchanged
|
||||
current_mtime = file.stat().st_mtime
|
||||
assert current_mtime == original_mtime
|
||||
|
||||
|
||||
class TestIterPythonFiles:
|
||||
def test_single_file(self, tmp_path):
|
||||
f = tmp_path / "single.py"
|
||||
f.write_text("x = 1")
|
||||
files = iter_python_files([str(f)])
|
||||
assert len(files) == 1
|
||||
assert files[0].name == "single.py"
|
||||
|
||||
def test_directory_recursion(self, tmp_path):
|
||||
(tmp_path / "sub").mkdir()
|
||||
(tmp_path / "sub" / "a.py").write_text("a=1")
|
||||
(tmp_path / "b.py").write_text("b=2")
|
||||
files = iter_python_files([str(tmp_path)])
|
||||
assert len(files) == 2
|
||||
Reference in New Issue
Block a user