Files
timmy-home/codebase_genome.py
Alexander Whitestone 8d1f9ed375
Some checks failed
Smoke Test / smoke (pull_request) Failing after 20s
feat(#667): codebase_genome.py — test stub generator for uncovered functions
AST-based tool that scans Python files, extracts function/method signatures,
and generates pytest test stubs for functions without existing tests.

Usage:
  python3 codebase_genome.py /path/to/repo
  python3 codebase_genome.py /path/to/repo -o tests/test_genome_generated.py

Features:
- AST parsing (no imports required, handles syntax errors gracefully)
- Extracts: function name, args, return type, decorators, class context
- Detects existing tests to avoid duplicates
- Generates: basic test + edge case test per function
- Skips private/dunder methods
- Configurable limit (--limit N)

Generated 30 test stubs for timmy-home as proof of concept.
2026-04-14 23:39:13 -04:00

276 lines
9.6 KiB
Python

#!/usr/bin/env python3
"""
codebase_genome.py — Analyze a repo and generate test stubs for uncovered functions.
Scans Python files, extracts function/class/method signatures via AST,
and generates pytest test cases with edge cases.
Usage:
python3 codebase_genome.py /path/to/repo
python3 codebase_genome.py /path/to/repo --output tests/test_genome_generated.py
"""
import ast
import os
import sys
import argparse
from pathlib import Path
class FunctionInfo:
def __init__(self, name, filepath, lineno, args, returns, decorators, is_method=False, class_name=None):
self.name = name
self.filepath = filepath
self.lineno = lineno
self.args = args # list of arg names
self.returns = returns # return annotation or None
self.decorators = decorators
self.is_method = is_method
self.class_name = class_name
@property
def qualified_name(self):
if self.class_name:
return f"{self.class_name}.{self.name}"
return self.name
@property
def import_path(self):
"""Module path for import (e.g., 'mymodule.sub.Class.method')."""
rel = Path(self.filepath).with_suffix('')
parts = list(rel.parts)
# Remove common prefixes
if parts and parts[0] in ('src', 'lib'):
parts = parts[1:]
module = '.'.join(parts)
if self.class_name:
return f"{module}.{self.class_name}.{self.name}"
return f"{module}.{self.name}"
@property
def module_path(self):
rel = Path(self.filepath).with_suffix('')
parts = list(rel.parts)
if parts and parts[0] in ('src', 'lib'):
parts = parts[1:]
return '.'.join(parts)
def extract_functions(filepath: str) -> list:
"""Extract all function definitions from a Python file via AST."""
try:
source = open(filepath).read()
tree = ast.parse(source, filename=filepath)
except (SyntaxError, UnicodeDecodeError):
return []
functions = []
class FuncVisitor(ast.NodeVisitor):
def __init__(self):
self.current_class = None
def visit_ClassDef(self, node):
old_class = self.current_class
self.current_class = node.name
self.generic_visit(node)
self.current_class = old_class
def visit_FunctionDef(self, node):
args = [a.arg for a in node.args.args]
if args and args[0] == 'self':
args = args[1:]
returns = None
if node.returns:
if isinstance(node.returns, ast.Name):
returns = node.returns.id
elif isinstance(node.returns, ast.Constant):
returns = str(node.returns.value)
decorators = []
for d in node.decorator_list:
if isinstance(d, ast.Name):
decorators.append(d.id)
elif isinstance(d, ast.Attribute):
decorators.append(d.attr)
functions.append(FunctionInfo(
name=node.name,
filepath=filepath,
lineno=node.lineno,
args=args,
returns=returns,
decorators=decorators,
is_method=self.current_class is not None,
class_name=self.current_class,
))
self.generic_visit(node)
visit_AsyncFunctionDef = visit_FunctionDef
visitor = FuncVisitor()
visitor.visit(tree)
return functions
def generate_test(func: FunctionInfo, existing_tests: set) -> str:
"""Generate a pytest test function for a given function."""
if func.name in existing_tests:
return ''
# Skip private/dunder methods
if func.name.startswith('_') and not func.name.startswith('__'):
return ''
if func.name.startswith('__') and func.name.endswith('__'):
return ''
lines = []
# Generate imports
module = func.module_path.replace('/', '.').lstrip('.')
if func.class_name:
lines.append(f"from {module} import {func.class_name}")
else:
lines.append(f"from {module} import {func.name}")
lines.append('')
lines.append('')
# Test function name
test_name = f"test_{func.qualified_name.replace('.', '_')}"
# Determine args for the test call
args_str = ', '.join(func.args)
lines.append(f"def {test_name}():")
lines.append(f' """Test {func.qualified_name} (line {func.lineno} in {func.filepath})."""')
if func.is_method:
lines.append(f" # TODO: instantiate {func.class_name} with valid args")
lines.append(f" obj = {func.class_name}()")
lines.append(f" result = obj.{func.name}({', '.join('None' for _ in func.args) if func.args else ''})")
else:
if func.args:
lines.append(f" # TODO: provide valid arguments for: {args_str}")
lines.append(f" result = {func.name}({', '.join('None' for _ in func.args)})")
else:
lines.append(f" result = {func.name}()")
lines.append(f" assert result is not None or result is None # TODO: real assertion")
lines.append('')
lines.append('')
# Edge cases
lines.append(f"def {test_name}_edge_cases():")
lines.append(f' """Edge cases for {func.qualified_name}."""')
if func.args:
lines.append(f" # Test with empty/zero/None args")
if func.is_method:
lines.append(f" obj = {func.class_name}()")
for arg in func.args:
lines.append(f" # obj.{func.name}({arg}=...) # TODO: test with invalid {arg}")
else:
for arg in func.args:
lines.append(f" # {func.name}({arg}=...) # TODO: test with invalid {arg}")
else:
lines.append(f" # {func.qualified_name} takes no args — test idempotency")
if func.is_method:
lines.append(f" obj = {func.class_name}()")
lines.append(f" r1 = obj.{func.name}()")
lines.append(f" r2 = obj.{func.name}()")
lines.append(f" # assert r1 == r2 # TODO: uncomment if deterministic")
else:
lines.append(f" r1 = {func.name}()")
lines.append(f" r2 = {func.name}()")
lines.append(f" # assert r1 == r2 # TODO: uncomment if deterministic")
lines.append('')
lines.append('')
return '\n'.join(lines)
def scan_repo(repo_path: str) -> list:
"""Scan all Python files in a repo and extract functions."""
all_functions = []
for root, dirs, files in os.walk(repo_path):
# Skip hidden dirs, __pycache__, .git, venv, node_modules
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in ('__pycache__', 'venv', 'node_modules', 'env')]
for f in files:
if f.endswith('.py') and not f.startswith('_'):
filepath = os.path.join(root, f)
relpath = os.path.relpath(filepath, repo_path)
funcs = extract_functions(filepath)
# Update filepath to relative
for func in funcs:
func.filepath = relpath
all_functions.extend(funcs)
return all_functions
def find_existing_tests(repo_path: str) -> set:
"""Find function names that already have tests."""
tested = set()
tests_dir = os.path.join(repo_path, 'tests')
if not os.path.isdir(tests_dir):
return tested
for root, dirs, files in os.walk(tests_dir):
for f in files:
if f.startswith('test_') and f.endswith('.py'):
try:
source = open(os.path.join(root, f)).read()
tree = ast.parse(source)
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name.startswith('test_'):
# Extract function name from test name
name = node.name[5:] # strip 'test_'
tested.add(name)
except (SyntaxError, UnicodeDecodeError):
pass
return tested
def main():
parser = argparse.ArgumentParser(description='Generate test stubs for uncovered functions')
parser.add_argument('repo', help='Path to repository')
parser.add_argument('--output', '-o', default=None, help='Output file (default: stdout)')
parser.add_argument('--limit', '-n', type=int, default=50, help='Max tests to generate')
args = parser.parse_args()
repo = os.path.abspath(args.repo)
if not os.path.isdir(repo):
print(f"Error: {repo} is not a directory", file=sys.stderr)
sys.exit(1)
functions = scan_repo(repo)
existing = find_existing_tests(repo)
# Filter to untested functions
untested = [f for f in functions if f.name not in existing and not f.name.startswith('_')]
print(f"Found {len(functions)} functions, {len(untested)} untested", file=sys.stderr)
# Generate tests
output = []
output.append('"""Auto-generated test stubs from codebase_genome.py.\n')
output.append('These are starting points — fill in real assertions and args.\n"""')
output.append('import pytest')
output.append('')
generated = 0
for func in untested[:args.limit]:
test = generate_test(func, set())
if test:
output.append(test)
generated += 1
content = '\n'.join(output)
if args.output:
with open(args.output, 'w') as f:
f.write(content)
print(f"Generated {generated} test stubs → {args.output}", file=sys.stderr)
else:
print(content)
if __name__ == '__main__':
main()