Some checks failed
Test / pytest (pull_request) Failing after 8s
Adds scripts/readme_generator.py — a tool that scans Python codebases, extracts module docstrings, entry points (argparse main), and directory structure to generate a standard README.md with: description, installation, usage, scripts list. Acceptance for #97: - Reads codebase structure (AST-based Python file scanner) - Generates README sections (Description, Installation, Usage, Scripts, Directory) - Updates existing README (replaces on run — idempotent) - 1 per run (single repo per invocation) Usage: python3 scripts/readme_generator.py # generate in-place python3 scripts/readme_generator.py --dry-run # preview stats python3 scripts/readme_generator.py --dir /path Closes #97
153 lines
5.5 KiB
Python
Executable File
153 lines
5.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
README Generator — Scan codebase and generate/update README.md.
|
|
|
|
Reads codebase structure, extracts module docstrings and main entry points,
|
|
produces a README with: description, installation, usage, API/scripts list.
|
|
|
|
Usage:
|
|
python3 scripts/readme_generator.py
|
|
python3 scripts/readme_generator.py --dir /path/to/repo
|
|
python3 scripts/readme_generator.py --dry-run # preview without writing
|
|
"""
|
|
import argparse
|
|
import ast
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import List, Dict, Optional
|
|
|
|
def read_file(path: Path) -> str:
|
|
try:
|
|
return path.read_text()
|
|
except Exception:
|
|
return ""
|
|
|
|
def extract_module_docstring(path: Path) -> str:
|
|
try:
|
|
tree = ast.parse(read_file(path))
|
|
return ast.get_docstring(tree) or ""
|
|
except Exception:
|
|
return ""
|
|
|
|
def extract_parser_description(path: Path) -> str:
|
|
"""Extract the first ArgumentParser description found in the file."""
|
|
try:
|
|
content = read_file(path)
|
|
for line in content.split('\n'):
|
|
if 'ArgumentParser' in content[max(0,content.index(line)-100):content.index(line)+200] and 'description=' in line:
|
|
desc_part = line.split('description=')[1]
|
|
desc = desc_part.strip().rstrip(',').strip('"\'')
|
|
return desc
|
|
return ""
|
|
except Exception:
|
|
return ""
|
|
|
|
def scan_python_files(root: Path) -> List[Dict]:
|
|
"""Collect Python files (exclude tests) with basic metadata."""
|
|
files = []
|
|
for path in root.rglob('*.py'):
|
|
rel = path.relative_to(root)
|
|
parts = rel.parts
|
|
if any(p.startswith('test_') or p in ('__pycache__', '.git', 'venv', '.venv', '.pytest_cache') for p in parts):
|
|
continue
|
|
files.append({
|
|
'path': str(rel),
|
|
'docstring': extract_module_docstring(path),
|
|
'parser_desc': extract_parser_description(path),
|
|
'name': path.name,
|
|
})
|
|
return sorted(files, key=lambda x: x['path'])
|
|
|
|
def detect_entry_point(file_info: Dict) -> bool:
|
|
"""A file is an entry point if it has a main block or argparse."""
|
|
path = Path(file_info['path'])
|
|
name = path.name
|
|
return name in ('__main__.py', 'main.py') or bool(file_info['parser_desc']) or path.parts[0] == 'bin'
|
|
|
|
def generate_readme(root_dir: str, output_path: Optional[str] = None, dry_run: bool = False) -> str:
|
|
root = Path(root_dir).resolve()
|
|
py_files = scan_python_files(root)
|
|
|
|
sections = []
|
|
repo_name = root.name
|
|
|
|
sections.append(f"# {repo_name}\n")
|
|
|
|
if py_files:
|
|
main_doc = py_files[0]['docstring'].strip()
|
|
if main_doc:
|
|
sections.append(main_doc + "\n")
|
|
else:
|
|
sections.append("A Python project.\n")
|
|
else:
|
|
sections.append("A Python project.\n")
|
|
|
|
sections.append("## Installation\n")
|
|
if (root / "requirements.txt").exists():
|
|
sections.append("```bash\ncp .env.example .env # if present\npip install -r requirements.txt\n```\n")
|
|
elif (root / "pyproject.toml").exists():
|
|
sections.append("```bash\npip install -e .\n```\n")
|
|
else:
|
|
sections.append("```bash\npip install -e .\n```\n")
|
|
|
|
sections.append("## Usage\n")
|
|
entry_scripts = [f for f in py_files if detect_entry_point(f)]
|
|
if entry_scripts:
|
|
for f in entry_scripts[:8]:
|
|
name = f['name']
|
|
if f['parser_desc']:
|
|
sections.append(f"### {name}\n{f['parser_desc']}\n")
|
|
else:
|
|
sections.append(f"### {name}\n```bash\npython3 {f['path']}\n```\n")
|
|
else:
|
|
sections.append("See `scripts/` directory for available tools.\n")
|
|
|
|
sections.append("## Scripts\n")
|
|
if entry_scripts:
|
|
for f in entry_scripts[:15]:
|
|
desc = f['docstring'].strip().split('\n')[0] if f['docstring'].strip() else "Utility script."
|
|
sections.append(f"- **{f['name']}**: {desc}")
|
|
else:
|
|
sections.append("- No entry-point scripts detected.\n")
|
|
|
|
sections.append("\n## Directory Structure\n")
|
|
top_dirs = sorted([
|
|
d.name for d in root.iterdir()
|
|
if d.is_dir() and not d.name.startswith('.') and d.name not in ('__pycache__', 'venv', '.venv', 'node_modules')
|
|
])
|
|
sections.append("```\n")
|
|
for d in top_dirs[:12]:
|
|
sections.append(f"{d}/")
|
|
sections.append("```\n")
|
|
|
|
readme_content = "\n".join(sections)
|
|
|
|
if dry_run:
|
|
print(json.dumps({
|
|
"repo": repo_name,
|
|
"sections": len(sections),
|
|
"chars": len(readme_content),
|
|
"python_files": len(py_files),
|
|
"entry_scripts": sum(1 for f in py_files if detect_entry_point(f)),
|
|
}, indent=2))
|
|
return ""
|
|
|
|
if output_path is None:
|
|
output_path = root / "README.md"
|
|
else:
|
|
output_path = Path(output_path)
|
|
|
|
output_path.write_text(readme_content)
|
|
print(f"README {'updated' if output_path.exists() else 'created'}: {output_path} ({len(readme_content)} bytes)")
|
|
return str(output_path)
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description="Generate or update README.md from codebase structure.")
|
|
parser.add_argument("--dir", default=".", help="Directory to scan (default: current)")
|
|
parser.add_argument("--output", help="Output README path (default: README.md in scanned dir)")
|
|
parser.add_argument("--dry-run", action="store_true", help="Preview without writing")
|
|
args = parser.parse_args()
|
|
|
|
generate_readme(args.dir, args.output, args.dry_run)
|