diff --git a/scripts/readme_generator.py b/scripts/readme_generator.py new file mode 100755 index 0000000..ac89efe --- /dev/null +++ b/scripts/readme_generator.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +""" +README Generator — Scan codebase and generate/update README.md. + +Reads codebase structure, extracts module docstrings and main entry points, +produces a README with: description, installation, usage, API/scripts list. + +Usage: + python3 scripts/readme_generator.py + python3 scripts/readme_generator.py --dir /path/to/repo + python3 scripts/readme_generator.py --dry-run # preview without writing +""" +import argparse +import ast +import json +import sys +from pathlib import Path +from typing import List, Dict, Optional + +def read_file(path: Path) -> str: + try: + return path.read_text() + except Exception: + return "" + +def extract_module_docstring(path: Path) -> str: + try: + tree = ast.parse(read_file(path)) + return ast.get_docstring(tree) or "" + except Exception: + return "" + +def extract_parser_description(path: Path) -> str: + """Extract the first ArgumentParser description found in the file.""" + try: + content = read_file(path) + for line in content.split('\n'): + if 'ArgumentParser' in content[max(0,content.index(line)-100):content.index(line)+200] and 'description=' in line: + desc_part = line.split('description=')[1] + desc = desc_part.strip().rstrip(',').strip('"\'') + return desc + return "" + except Exception: + return "" + +def scan_python_files(root: Path) -> List[Dict]: + """Collect Python files (exclude tests) with basic metadata.""" + files = [] + for path in root.rglob('*.py'): + rel = path.relative_to(root) + parts = rel.parts + if any(p.startswith('test_') or p in ('__pycache__', '.git', 'venv', '.venv', '.pytest_cache') for p in parts): + continue + files.append({ + 'path': str(rel), + 'docstring': extract_module_docstring(path), + 'parser_desc': extract_parser_description(path), + 'name': path.name, + }) + return sorted(files, key=lambda x: x['path']) + +def detect_entry_point(file_info: Dict) -> bool: + """A file is an entry point if it has a main block or argparse.""" + path = Path(file_info['path']) + name = path.name + return name in ('__main__.py', 'main.py') or bool(file_info['parser_desc']) or path.parts[0] == 'bin' + +def generate_readme(root_dir: str, output_path: Optional[str] = None, dry_run: bool = False) -> str: + root = Path(root_dir).resolve() + py_files = scan_python_files(root) + + sections = [] + repo_name = root.name + + sections.append(f"# {repo_name}\n") + + if py_files: + main_doc = py_files[0]['docstring'].strip() + if main_doc: + sections.append(main_doc + "\n") + else: + sections.append("A Python project.\n") + else: + sections.append("A Python project.\n") + + sections.append("## Installation\n") + if (root / "requirements.txt").exists(): + sections.append("```bash\ncp .env.example .env # if present\npip install -r requirements.txt\n```\n") + elif (root / "pyproject.toml").exists(): + sections.append("```bash\npip install -e .\n```\n") + else: + sections.append("```bash\npip install -e .\n```\n") + + sections.append("## Usage\n") + entry_scripts = [f for f in py_files if detect_entry_point(f)] + if entry_scripts: + for f in entry_scripts[:8]: + name = f['name'] + if f['parser_desc']: + sections.append(f"### {name}\n{f['parser_desc']}\n") + else: + sections.append(f"### {name}\n```bash\npython3 {f['path']}\n```\n") + else: + sections.append("See `scripts/` directory for available tools.\n") + + sections.append("## Scripts\n") + if entry_scripts: + for f in entry_scripts[:15]: + desc = f['docstring'].strip().split('\n')[0] if f['docstring'].strip() else "Utility script." + sections.append(f"- **{f['name']}**: {desc}") + else: + sections.append("- No entry-point scripts detected.\n") + + sections.append("\n## Directory Structure\n") + top_dirs = sorted([ + d.name for d in root.iterdir() + if d.is_dir() and not d.name.startswith('.') and d.name not in ('__pycache__', 'venv', '.venv', 'node_modules') + ]) + sections.append("```\n") + for d in top_dirs[:12]: + sections.append(f"{d}/") + sections.append("```\n") + + readme_content = "\n".join(sections) + + if dry_run: + print(json.dumps({ + "repo": repo_name, + "sections": len(sections), + "chars": len(readme_content), + "python_files": len(py_files), + "entry_scripts": sum(1 for f in py_files if detect_entry_point(f)), + }, indent=2)) + return "" + + if output_path is None: + output_path = root / "README.md" + else: + output_path = Path(output_path) + + output_path.write_text(readme_content) + print(f"README {'updated' if output_path.exists() else 'created'}: {output_path} ({len(readme_content)} bytes)") + return str(output_path) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate or update README.md from codebase structure.") + parser.add_argument("--dir", default=".", help="Directory to scan (default: current)") + parser.add_argument("--output", help="Output README path (default: README.md in scanned dir)") + parser.add_argument("--dry-run", action="store_true", help="Preview without writing") + args = parser.parse_args() + + generate_readme(args.dir, args.output, args.dry_run)