#!/usr/bin/env python3 """ README Generator — Scan codebase and generate/update README.md. Reads codebase structure, extracts module docstrings and main entry points, produces a README with: description, installation, usage, API/scripts list. Usage: python3 scripts/readme_generator.py python3 scripts/readme_generator.py --dir /path/to/repo python3 scripts/readme_generator.py --dry-run # preview without writing """ import argparse import ast import json import sys from pathlib import Path from typing import List, Dict, Optional def read_file(path: Path) -> str: try: return path.read_text() except Exception: return "" def extract_module_docstring(path: Path) -> str: try: tree = ast.parse(read_file(path)) return ast.get_docstring(tree) or "" except Exception: return "" def extract_parser_description(path: Path) -> str: """Extract the first ArgumentParser description found in the file.""" try: content = read_file(path) for line in content.split('\n'): if 'ArgumentParser' in content[max(0,content.index(line)-100):content.index(line)+200] and 'description=' in line: desc_part = line.split('description=')[1] desc = desc_part.strip().rstrip(',').strip('"\'') return desc return "" except Exception: return "" def scan_python_files(root: Path) -> List[Dict]: """Collect Python files (exclude tests) with basic metadata.""" files = [] for path in root.rglob('*.py'): rel = path.relative_to(root) parts = rel.parts if any(p.startswith('test_') or p in ('__pycache__', '.git', 'venv', '.venv', '.pytest_cache') for p in parts): continue files.append({ 'path': str(rel), 'docstring': extract_module_docstring(path), 'parser_desc': extract_parser_description(path), 'name': path.name, }) return sorted(files, key=lambda x: x['path']) def detect_entry_point(file_info: Dict) -> bool: """A file is an entry point if it has a main block or argparse.""" path = Path(file_info['path']) name = path.name return name in ('__main__.py', 'main.py') or bool(file_info['parser_desc']) or path.parts[0] == 'bin' def generate_readme(root_dir: str, output_path: Optional[str] = None, dry_run: bool = False) -> str: root = Path(root_dir).resolve() py_files = scan_python_files(root) sections = [] repo_name = root.name sections.append(f"# {repo_name}\n") if py_files: main_doc = py_files[0]['docstring'].strip() if main_doc: sections.append(main_doc + "\n") else: sections.append("A Python project.\n") else: sections.append("A Python project.\n") sections.append("## Installation\n") if (root / "requirements.txt").exists(): sections.append("```bash\ncp .env.example .env # if present\npip install -r requirements.txt\n```\n") elif (root / "pyproject.toml").exists(): sections.append("```bash\npip install -e .\n```\n") else: sections.append("```bash\npip install -e .\n```\n") sections.append("## Usage\n") entry_scripts = [f for f in py_files if detect_entry_point(f)] if entry_scripts: for f in entry_scripts[:8]: name = f['name'] if f['parser_desc']: sections.append(f"### {name}\n{f['parser_desc']}\n") else: sections.append(f"### {name}\n```bash\npython3 {f['path']}\n```\n") else: sections.append("See `scripts/` directory for available tools.\n") sections.append("## Scripts\n") if entry_scripts: for f in entry_scripts[:15]: desc = f['docstring'].strip().split('\n')[0] if f['docstring'].strip() else "Utility script." sections.append(f"- **{f['name']}**: {desc}") else: sections.append("- No entry-point scripts detected.\n") sections.append("\n## Directory Structure\n") top_dirs = sorted([ d.name for d in root.iterdir() if d.is_dir() and not d.name.startswith('.') and d.name not in ('__pycache__', 'venv', '.venv', 'node_modules') ]) sections.append("```\n") for d in top_dirs[:12]: sections.append(f"{d}/") sections.append("```\n") readme_content = "\n".join(sections) if dry_run: print(json.dumps({ "repo": repo_name, "sections": len(sections), "chars": len(readme_content), "python_files": len(py_files), "entry_scripts": sum(1 for f in py_files if detect_entry_point(f)), }, indent=2)) return "" if output_path is None: output_path = root / "README.md" else: output_path = Path(output_path) output_path.write_text(readme_content) print(f"README {'updated' if output_path.exists() else 'created'}: {output_path} ({len(readme_content)} bytes)") return str(output_path) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate or update README.md from codebase structure.") parser.add_argument("--dir", default=".", help="Directory to scan (default: current)") parser.add_argument("--output", help="Output README path (default: README.md in scanned dir)") parser.add_argument("--dry-run", action="store_true", help="Preview without writing") args = parser.parse_args() generate_readme(args.dir, args.output, args.dry_run)