compounding-intelligence/scripts/readme_generator.py

#!/usr/bin/env python3
"""
README Generator — Scan codebase and generate/update README.md.

Reads codebase structure, extracts module docstrings and main entry points,
produces a README with: description, installation, usage, API/scripts list.

Usage:
    python3 scripts/readme_generator.py
    python3 scripts/readme_generator.py --dir /path/to/repo
    python3 scripts/readme_generator.py --dry-run  # preview without writing
"""
import argparse
import ast
import json
import sys
from pathlib import Path
from typing import List, Dict, Optional

def read_file(path: Path) -> str:
    try:
        return path.read_text()
    except Exception:
        return ""

def extract_module_docstring(path: Path) -> str:
    try:
        tree = ast.parse(read_file(path))
        return ast.get_docstring(tree) or ""
    except Exception:
        return ""

def extract_parser_description(path: Path) -> str:
    """Extract the first ArgumentParser description found in the file."""
    try:
        content = read_file(path)
        for line in content.split('\n'):
            if 'ArgumentParser' in content[max(0,content.index(line)-100):content.index(line)+200] and 'description=' in line:
                desc_part = line.split('description=')[1]
                desc = desc_part.strip().rstrip(',').strip('"\'')
                return desc
        return ""
    except Exception:
        return ""

def scan_python_files(root: Path) -> List[Dict]:
    """Collect Python files (exclude tests) with basic metadata."""
    files = []
    for path in root.rglob('*.py'):
        rel = path.relative_to(root)
        parts = rel.parts
        if any(p.startswith('test_') or p in ('__pycache__', '.git', 'venv', '.venv', '.pytest_cache') for p in parts):
            continue
        files.append({
            'path': str(rel),
            'docstring': extract_module_docstring(path),
            'parser_desc': extract_parser_description(path),
            'name': path.name,
        })
    return sorted(files, key=lambda x: x['path'])

def detect_entry_point(file_info: Dict) -> bool:
    """A file is an entry point if it has a main block or argparse."""
    path = Path(file_info['path'])
    name = path.name
    return name in ('__main__.py', 'main.py') or bool(file_info['parser_desc']) or path.parts[0] == 'bin'

def generate_readme(root_dir: str, output_path: Optional[str] = None, dry_run: bool = False) -> str:
    root = Path(root_dir).resolve()
    py_files = scan_python_files(root)

    sections = []
    repo_name = root.name

    sections.append(f"# {repo_name}\n")

    if py_files:
        main_doc = py_files[0]['docstring'].strip()
        if main_doc:
            sections.append(main_doc + "\n")
        else:
            sections.append("A Python project.\n")
    else:
        sections.append("A Python project.\n")

    sections.append("## Installation\n")
    if (root / "requirements.txt").exists():
        sections.append("```bash\ncp .env.example .env  # if present\npip install -r requirements.txt\n```\n")
    elif (root / "pyproject.toml").exists():
        sections.append("```bash\npip install -e .\n```\n")
    else:
        sections.append("```bash\npip install -e .\n```\n")

    sections.append("## Usage\n")
    entry_scripts = [f for f in py_files if detect_entry_point(f)]
    if entry_scripts:
        for f in entry_scripts[:8]:
            name = f['name']
            if f['parser_desc']:
                sections.append(f"### {name}\n{f['parser_desc']}\n")
            else:
                sections.append(f"### {name}\n```bash\npython3 {f['path']}\n```\n")
    else:
        sections.append("See `scripts/` directory for available tools.\n")

    sections.append("## Scripts\n")
    if entry_scripts:
        for f in entry_scripts[:15]:
            desc = f['docstring'].strip().split('\n')[0] if f['docstring'].strip() else "Utility script."
            sections.append(f"- **{f['name']}**: {desc}")
    else:
        sections.append("- No entry-point scripts detected.\n")

    sections.append("\n## Directory Structure\n")
    top_dirs = sorted([
        d.name for d in root.iterdir()
        if d.is_dir() and not d.name.startswith('.') and d.name not in ('__pycache__', 'venv', '.venv', 'node_modules')
    ])
    sections.append("```\n")
    for d in top_dirs[:12]:
        sections.append(f"{d}/")
    sections.append("```\n")

    readme_content = "\n".join(sections)

    if dry_run:
        print(json.dumps({
            "repo": repo_name,
            "sections": len(sections),
            "chars": len(readme_content),
            "python_files": len(py_files),
            "entry_scripts": sum(1 for f in py_files if detect_entry_point(f)),
        }, indent=2))
        return ""

    if output_path is None:
        output_path = root / "README.md"
    else:
        output_path = Path(output_path)

    output_path.write_text(readme_content)
    print(f"README {'updated' if output_path.exists() else 'created'}: {output_path} ({len(readme_content)} bytes)")
    return str(output_path)

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Generate or update README.md from codebase structure.")
    parser.add_argument("--dir", default=".", help="Directory to scan (default: current)")
    parser.add_argument("--output", help="Output README path (default: README.md in scanned dir)")
    parser.add_argument("--dry-run", action="store_true", help="Preview without writing")
    args = parser.parse_args()

    generate_readme(args.dir, args.output, args.dry_run)