#!/usr/bin/env python3 """ codebase_genome.py — Analyze a repo and generate test stubs for uncovered functions. Scans Python files, extracts function/class/method signatures via AST, and generates pytest test cases with edge cases. Usage: python3 codebase_genome.py /path/to/repo python3 codebase_genome.py /path/to/repo --output tests/test_genome_generated.py """ import ast import os import sys import argparse from pathlib import Path class FunctionInfo: def __init__(self, name, filepath, lineno, args, returns, decorators, is_method=False, class_name=None): self.name = name self.filepath = filepath self.lineno = lineno self.args = args # list of arg names self.returns = returns # return annotation or None self.decorators = decorators self.is_method = is_method self.class_name = class_name @property def qualified_name(self): if self.class_name: return f"{self.class_name}.{self.name}" return self.name @property def import_path(self): """Module path for import (e.g., 'mymodule.sub.Class.method').""" rel = Path(self.filepath).with_suffix('') parts = list(rel.parts) # Remove common prefixes if parts and parts[0] in ('src', 'lib'): parts = parts[1:] module = '.'.join(parts) if self.class_name: return f"{module}.{self.class_name}.{self.name}" return f"{module}.{self.name}" @property def module_path(self): rel = Path(self.filepath).with_suffix('') parts = list(rel.parts) if parts and parts[0] in ('src', 'lib'): parts = parts[1:] return '.'.join(parts) def extract_functions(filepath: str) -> list: """Extract all function definitions from a Python file via AST.""" try: source = open(filepath).read() tree = ast.parse(source, filename=filepath) except (SyntaxError, UnicodeDecodeError): return [] functions = [] class FuncVisitor(ast.NodeVisitor): def __init__(self): self.current_class = None def visit_ClassDef(self, node): old_class = self.current_class self.current_class = node.name self.generic_visit(node) self.current_class = old_class def visit_FunctionDef(self, node): args = [a.arg for a in node.args.args] if args and args[0] == 'self': args = args[1:] returns = None if node.returns: if isinstance(node.returns, ast.Name): returns = node.returns.id elif isinstance(node.returns, ast.Constant): returns = str(node.returns.value) decorators = [] for d in node.decorator_list: if isinstance(d, ast.Name): decorators.append(d.id) elif isinstance(d, ast.Attribute): decorators.append(d.attr) functions.append(FunctionInfo( name=node.name, filepath=filepath, lineno=node.lineno, args=args, returns=returns, decorators=decorators, is_method=self.current_class is not None, class_name=self.current_class, )) self.generic_visit(node) visit_AsyncFunctionDef = visit_FunctionDef visitor = FuncVisitor() visitor.visit(tree) return functions def generate_test(func: FunctionInfo, existing_tests: set) -> str: """Generate a pytest test function for a given function.""" if func.name in existing_tests: return '' # Skip private/dunder methods if func.name.startswith('_') and not func.name.startswith('__'): return '' if func.name.startswith('__') and func.name.endswith('__'): return '' lines = [] # Generate imports module = func.module_path.replace('/', '.').lstrip('.') if func.class_name: lines.append(f"from {module} import {func.class_name}") else: lines.append(f"from {module} import {func.name}") lines.append('') lines.append('') # Test function name test_name = f"test_{func.qualified_name.replace('.', '_')}" # Determine args for the test call args_str = ', '.join(func.args) lines.append(f"def {test_name}():") lines.append(f' """Test {func.qualified_name} (line {func.lineno} in {func.filepath})."""') if func.is_method: lines.append(f" # TODO: instantiate {func.class_name} with valid args") lines.append(f" obj = {func.class_name}()") lines.append(f" result = obj.{func.name}({', '.join('None' for _ in func.args) if func.args else ''})") else: if func.args: lines.append(f" # TODO: provide valid arguments for: {args_str}") lines.append(f" result = {func.name}({', '.join('None' for _ in func.args)})") else: lines.append(f" result = {func.name}()") lines.append(f" assert result is not None or result is None # TODO: real assertion") lines.append('') lines.append('') # Edge cases lines.append(f"def {test_name}_edge_cases():") lines.append(f' """Edge cases for {func.qualified_name}."""') if func.args: lines.append(f" # Test with empty/zero/None args") if func.is_method: lines.append(f" obj = {func.class_name}()") for arg in func.args: lines.append(f" # obj.{func.name}({arg}=...) # TODO: test with invalid {arg}") else: for arg in func.args: lines.append(f" # {func.name}({arg}=...) # TODO: test with invalid {arg}") else: lines.append(f" # {func.qualified_name} takes no args — test idempotency") if func.is_method: lines.append(f" obj = {func.class_name}()") lines.append(f" r1 = obj.{func.name}()") lines.append(f" r2 = obj.{func.name}()") lines.append(f" # assert r1 == r2 # TODO: uncomment if deterministic") else: lines.append(f" r1 = {func.name}()") lines.append(f" r2 = {func.name}()") lines.append(f" # assert r1 == r2 # TODO: uncomment if deterministic") lines.append('') lines.append('') return '\n'.join(lines) def scan_repo(repo_path: str) -> list: """Scan all Python files in a repo and extract functions.""" all_functions = [] for root, dirs, files in os.walk(repo_path): # Skip hidden dirs, __pycache__, .git, venv, node_modules dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('__pycache__', 'venv', 'node_modules', 'env')] for f in files: if f.endswith('.py') and not f.startswith('_'): filepath = os.path.join(root, f) relpath = os.path.relpath(filepath, repo_path) funcs = extract_functions(filepath) # Update filepath to relative for func in funcs: func.filepath = relpath all_functions.extend(funcs) return all_functions def find_existing_tests(repo_path: str) -> set: """Find function names that already have tests.""" tested = set() tests_dir = os.path.join(repo_path, 'tests') if not os.path.isdir(tests_dir): return tested for root, dirs, files in os.walk(tests_dir): for f in files: if f.startswith('test_') and f.endswith('.py'): try: source = open(os.path.join(root, f)).read() tree = ast.parse(source) for node in ast.walk(tree): if isinstance(node, ast.FunctionDef) and node.name.startswith('test_'): # Extract function name from test name name = node.name[5:] # strip 'test_' tested.add(name) except (SyntaxError, UnicodeDecodeError): pass return tested def main(): parser = argparse.ArgumentParser(description='Generate test stubs for uncovered functions') parser.add_argument('repo', help='Path to repository') parser.add_argument('--output', '-o', default=None, help='Output file (default: stdout)') parser.add_argument('--limit', '-n', type=int, default=50, help='Max tests to generate') args = parser.parse_args() repo = os.path.abspath(args.repo) if not os.path.isdir(repo): print(f"Error: {repo} is not a directory", file=sys.stderr) sys.exit(1) functions = scan_repo(repo) existing = find_existing_tests(repo) # Filter to untested functions untested = [f for f in functions if f.name not in existing and not f.name.startswith('_')] print(f"Found {len(functions)} functions, {len(untested)} untested", file=sys.stderr) # Generate tests output = [] output.append('"""Auto-generated test stubs from codebase_genome.py.\n') output.append('These are starting points — fill in real assertions and args.\n"""') output.append('import pytest') output.append('') generated = 0 for func in untested[:args.limit]: test = generate_test(func, set()) if test: output.append(test) generated += 1 content = '\n'.join(output) if args.output: with open(args.output, 'w') as f: f.write(content) print(f"Generated {generated} test stubs → {args.output}", file=sys.stderr) else: print(content) if __name__ == '__main__': main()