timmy-home/codebase_genome.py

#!/usr/bin/env python3
"""
codebase_genome.py — Analyze a repo and generate test stubs for uncovered functions.

Scans Python files, extracts function/class/method signatures via AST,
and generates pytest test cases with edge cases.

Usage:
    python3 codebase_genome.py /path/to/repo
    python3 codebase_genome.py /path/to/repo --output tests/test_genome_generated.py
"""
import ast
import os
import sys
import argparse
from pathlib import Path


class FunctionInfo:
    def __init__(self, name, filepath, lineno, args, returns, decorators, is_method=False, class_name=None):
        self.name = name
        self.filepath = filepath
        self.lineno = lineno
        self.args = args  # list of arg names
        self.returns = returns  # return annotation or None
        self.decorators = decorators
        self.is_method = is_method
        self.class_name = class_name

    @property
    def qualified_name(self):
        if self.class_name:
            return f"{self.class_name}.{self.name}"
        return self.name

    @property
    def import_path(self):
        """Module path for import (e.g., 'mymodule.sub.Class.method')."""
        rel = Path(self.filepath).with_suffix('')
        parts = list(rel.parts)
        # Remove common prefixes
        if parts and parts[0] in ('src', 'lib'):
            parts = parts[1:]
        module = '.'.join(parts)
        if self.class_name:
            return f"{module}.{self.class_name}.{self.name}"
        return f"{module}.{self.name}"

    @property
    def module_path(self):
        rel = Path(self.filepath).with_suffix('')
        parts = list(rel.parts)
        if parts and parts[0] in ('src', 'lib'):
            parts = parts[1:]
        return '.'.join(parts)


def extract_functions(filepath: str) -> list:
    """Extract all function definitions from a Python file via AST."""
    try:
        source = open(filepath).read()
        tree = ast.parse(source, filename=filepath)
    except (SyntaxError, UnicodeDecodeError):
        return []

    functions = []

    class FuncVisitor(ast.NodeVisitor):
        def __init__(self):
            self.current_class = None

        def visit_ClassDef(self, node):
            old_class = self.current_class
            self.current_class = node.name
            self.generic_visit(node)
            self.current_class = old_class

        def visit_FunctionDef(self, node):
            args = [a.arg for a in node.args.args]
            if args and args[0] == 'self':
                args = args[1:]

            returns = None
            if node.returns:
                if isinstance(node.returns, ast.Name):
                    returns = node.returns.id
                elif isinstance(node.returns, ast.Constant):
                    returns = str(node.returns.value)

            decorators = []
            for d in node.decorator_list:
                if isinstance(d, ast.Name):
                    decorators.append(d.id)
                elif isinstance(d, ast.Attribute):
                    decorators.append(d.attr)

            functions.append(FunctionInfo(
                name=node.name,
                filepath=filepath,
                lineno=node.lineno,
                args=args,
                returns=returns,
                decorators=decorators,
                is_method=self.current_class is not None,
                class_name=self.current_class,
            ))
            self.generic_visit(node)

        visit_AsyncFunctionDef = visit_FunctionDef

    visitor = FuncVisitor()
    visitor.visit(tree)
    return functions


def generate_test(func: FunctionInfo, existing_tests: set) -> str:
    """Generate a pytest test function for a given function."""
    if func.name in existing_tests:
        return ''

    # Skip private/dunder methods
    if func.name.startswith('_') and not func.name.startswith('__'):
        return ''
    if func.name.startswith('__') and func.name.endswith('__'):
        return ''

    lines = []

    # Generate imports
    module = func.module_path.replace('/', '.').lstrip('.')
    if func.class_name:
        lines.append(f"from {module} import {func.class_name}")
    else:
        lines.append(f"from {module} import {func.name}")
    lines.append('')
    lines.append('')

    # Test function name
    test_name = f"test_{func.qualified_name.replace('.', '_')}"

    # Determine args for the test call
    args_str = ', '.join(func.args)

    lines.append(f"def {test_name}():")
    lines.append(f'    """Test {func.qualified_name} (line {func.lineno} in {func.filepath})."""')

    if func.is_method:
        lines.append(f"    # TODO: instantiate {func.class_name} with valid args")
        lines.append(f"    obj = {func.class_name}()")
        lines.append(f"    result = obj.{func.name}({', '.join('None' for _ in func.args) if func.args else ''})")
    else:
        if func.args:
            lines.append(f"    # TODO: provide valid arguments for: {args_str}")
            lines.append(f"    result = {func.name}({', '.join('None' for _ in func.args)})")
        else:
            lines.append(f"    result = {func.name}()")

    lines.append(f"    assert result is not None or result is None  # TODO: real assertion")
    lines.append('')
    lines.append('')

    # Edge cases
    lines.append(f"def {test_name}_edge_cases():")
    lines.append(f'    """Edge cases for {func.qualified_name}."""')
    if func.args:
        lines.append(f"    # Test with empty/zero/None args")
        if func.is_method:
            lines.append(f"    obj = {func.class_name}()")
            for arg in func.args:
                lines.append(f"    # obj.{func.name}({arg}=...)  # TODO: test with invalid {arg}")
        else:
            for arg in func.args:
                lines.append(f"    # {func.name}({arg}=...)  # TODO: test with invalid {arg}")
    else:
        lines.append(f"    # {func.qualified_name} takes no args — test idempotency")
        if func.is_method:
            lines.append(f"    obj = {func.class_name}()")
            lines.append(f"    r1 = obj.{func.name}()")
            lines.append(f"    r2 = obj.{func.name}()")
            lines.append(f"    # assert r1 == r2  # TODO: uncomment if deterministic")
        else:
            lines.append(f"    r1 = {func.name}()")
            lines.append(f"    r2 = {func.name}()")
            lines.append(f"    # assert r1 == r2  # TODO: uncomment if deterministic")
    lines.append('')
    lines.append('')

    return '\n'.join(lines)


def scan_repo(repo_path: str) -> list:
    """Scan all Python files in a repo and extract functions."""
    all_functions = []
    for root, dirs, files in os.walk(repo_path):
        # Skip hidden dirs, __pycache__, .git, venv, node_modules
        dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('__pycache__', 'venv', 'node_modules', 'env')]
        for f in files:
            if f.endswith('.py') and not f.startswith('_'):
                filepath = os.path.join(root, f)
                relpath = os.path.relpath(filepath, repo_path)
                funcs = extract_functions(filepath)
                # Update filepath to relative
                for func in funcs:
                    func.filepath = relpath
                all_functions.extend(funcs)
    return all_functions


def find_existing_tests(repo_path: str) -> set:
    """Find function names that already have tests."""
    tested = set()
    tests_dir = os.path.join(repo_path, 'tests')
    if not os.path.isdir(tests_dir):
        return tested
    for root, dirs, files in os.walk(tests_dir):
        for f in files:
            if f.startswith('test_') and f.endswith('.py'):
                try:
                    source = open(os.path.join(root, f)).read()
                    tree = ast.parse(source)
                    for node in ast.walk(tree):
                        if isinstance(node, ast.FunctionDef) and node.name.startswith('test_'):
                            # Extract function name from test name
                            name = node.name[5:]  # strip 'test_'
                            tested.add(name)
                except (SyntaxError, UnicodeDecodeError):
                    pass
    return tested


def main():
    parser = argparse.ArgumentParser(description='Generate test stubs for uncovered functions')
    parser.add_argument('repo', help='Path to repository')
    parser.add_argument('--output', '-o', default=None, help='Output file (default: stdout)')
    parser.add_argument('--limit', '-n', type=int, default=50, help='Max tests to generate')
    args = parser.parse_args()

    repo = os.path.abspath(args.repo)
    if not os.path.isdir(repo):
        print(f"Error: {repo} is not a directory", file=sys.stderr)
        sys.exit(1)

    functions = scan_repo(repo)
    existing = find_existing_tests(repo)

    # Filter to untested functions
    untested = [f for f in functions if f.name not in existing and not f.name.startswith('_')]
    print(f"Found {len(functions)} functions, {len(untested)} untested", file=sys.stderr)

    # Generate tests
    output = []
    output.append('"""Auto-generated test stubs from codebase_genome.py.\n')
    output.append('These are starting points — fill in real assertions and args.\n"""')
    output.append('import pytest')
    output.append('')

    generated = 0
    for func in untested[:args.limit]:
        test = generate_test(func, set())
        if test:
            output.append(test)
            generated += 1

    content = '\n'.join(output)

    if args.output:
        with open(args.output, 'w') as f:
            f.write(content)
        print(f"Generated {generated} test stubs → {args.output}", file=sys.stderr)
    else:
        print(content)


if __name__ == '__main__':
    main()