Files
compounding-intelligence/scripts/readme_generator.py
Timmy Agent 425a87bcce
Some checks failed
Test / pytest (pull_request) Failing after 8s
feat(scripts): add readme_generator — auto-generate README from codebase
Adds scripts/readme_generator.py — a tool that scans Python codebases,
extracts module docstrings, entry points (argparse main), and directory
structure to generate a standard README.md with: description, installation,
usage, scripts list.

Acceptance for #97:
- Reads codebase structure (AST-based Python file scanner)
- Generates README sections (Description, Installation, Usage, Scripts, Directory)
- Updates existing README (replaces on run — idempotent)
- 1 per run (single repo per invocation)

Usage:
    python3 scripts/readme_generator.py              # generate in-place
    python3 scripts/readme_generator.py --dry-run    # preview stats
    python3 scripts/readme_generator.py --dir /path

Closes #97
2026-04-26 00:20:11 -04:00

153 lines
5.5 KiB
Python
Executable File

#!/usr/bin/env python3
"""
README Generator — Scan codebase and generate/update README.md.
Reads codebase structure, extracts module docstrings and main entry points,
produces a README with: description, installation, usage, API/scripts list.
Usage:
python3 scripts/readme_generator.py
python3 scripts/readme_generator.py --dir /path/to/repo
python3 scripts/readme_generator.py --dry-run # preview without writing
"""
import argparse
import ast
import json
import sys
from pathlib import Path
from typing import List, Dict, Optional
def read_file(path: Path) -> str:
try:
return path.read_text()
except Exception:
return ""
def extract_module_docstring(path: Path) -> str:
try:
tree = ast.parse(read_file(path))
return ast.get_docstring(tree) or ""
except Exception:
return ""
def extract_parser_description(path: Path) -> str:
"""Extract the first ArgumentParser description found in the file."""
try:
content = read_file(path)
for line in content.split('\n'):
if 'ArgumentParser' in content[max(0,content.index(line)-100):content.index(line)+200] and 'description=' in line:
desc_part = line.split('description=')[1]
desc = desc_part.strip().rstrip(',').strip('"\'')
return desc
return ""
except Exception:
return ""
def scan_python_files(root: Path) -> List[Dict]:
"""Collect Python files (exclude tests) with basic metadata."""
files = []
for path in root.rglob('*.py'):
rel = path.relative_to(root)
parts = rel.parts
if any(p.startswith('test_') or p in ('__pycache__', '.git', 'venv', '.venv', '.pytest_cache') for p in parts):
continue
files.append({
'path': str(rel),
'docstring': extract_module_docstring(path),
'parser_desc': extract_parser_description(path),
'name': path.name,
})
return sorted(files, key=lambda x: x['path'])
def detect_entry_point(file_info: Dict) -> bool:
"""A file is an entry point if it has a main block or argparse."""
path = Path(file_info['path'])
name = path.name
return name in ('__main__.py', 'main.py') or bool(file_info['parser_desc']) or path.parts[0] == 'bin'
def generate_readme(root_dir: str, output_path: Optional[str] = None, dry_run: bool = False) -> str:
root = Path(root_dir).resolve()
py_files = scan_python_files(root)
sections = []
repo_name = root.name
sections.append(f"# {repo_name}\n")
if py_files:
main_doc = py_files[0]['docstring'].strip()
if main_doc:
sections.append(main_doc + "\n")
else:
sections.append("A Python project.\n")
else:
sections.append("A Python project.\n")
sections.append("## Installation\n")
if (root / "requirements.txt").exists():
sections.append("```bash\ncp .env.example .env # if present\npip install -r requirements.txt\n```\n")
elif (root / "pyproject.toml").exists():
sections.append("```bash\npip install -e .\n```\n")
else:
sections.append("```bash\npip install -e .\n```\n")
sections.append("## Usage\n")
entry_scripts = [f for f in py_files if detect_entry_point(f)]
if entry_scripts:
for f in entry_scripts[:8]:
name = f['name']
if f['parser_desc']:
sections.append(f"### {name}\n{f['parser_desc']}\n")
else:
sections.append(f"### {name}\n```bash\npython3 {f['path']}\n```\n")
else:
sections.append("See `scripts/` directory for available tools.\n")
sections.append("## Scripts\n")
if entry_scripts:
for f in entry_scripts[:15]:
desc = f['docstring'].strip().split('\n')[0] if f['docstring'].strip() else "Utility script."
sections.append(f"- **{f['name']}**: {desc}")
else:
sections.append("- No entry-point scripts detected.\n")
sections.append("\n## Directory Structure\n")
top_dirs = sorted([
d.name for d in root.iterdir()
if d.is_dir() and not d.name.startswith('.') and d.name not in ('__pycache__', 'venv', '.venv', 'node_modules')
])
sections.append("```\n")
for d in top_dirs[:12]:
sections.append(f"{d}/")
sections.append("```\n")
readme_content = "\n".join(sections)
if dry_run:
print(json.dumps({
"repo": repo_name,
"sections": len(sections),
"chars": len(readme_content),
"python_files": len(py_files),
"entry_scripts": sum(1 for f in py_files if detect_entry_point(f)),
}, indent=2))
return ""
if output_path is None:
output_path = root / "README.md"
else:
output_path = Path(output_path)
output_path.write_text(readme_content)
print(f"README {'updated' if output_path.exists() else 'created'}: {output_path} ({len(readme_content)} bytes)")
return str(output_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate or update README.md from codebase structure.")
parser.add_argument("--dir", default=".", help="Directory to scan (default: current)")
parser.add_argument("--output", help="Output README path (default: README.md in scanned dir)")
parser.add_argument("--dry-run", action="store_true", help="Preview without writing")
args = parser.parse_args()
generate_readme(args.dir, args.output, args.dry_run)