#!/usr/bin/env python3 """Codebase Test Generator — Fill Coverage Gaps (#667).""" import ast import os import sys import argparse from dataclasses import dataclass, field from pathlib import Path from typing import Dict, List, Optional, Set, Tuple @dataclass class FunctionInfo: name: str module_path: str class_name: Optional[str] = None lineno: int = 0 args: List[str] = field(default_factory=list) is_async: bool = False is_private: bool = False is_property: bool = False docstring: Optional[str] = None has_return: bool = False raises: List[str] = field(default_factory=list) decorators: List[str] = field(default_factory=list) @property def qualified_name(self): if self.class_name: return f"{self.class_name}.{self.name}" return self.name @property def test_name(self): safe_mod = self.module_path.replace("/", "_").replace(".py", "").replace("-", "_") safe_cls = self.class_name + "_" if self.class_name else "" return f"test_{safe_mod}_{safe_cls}{self.name}" @dataclass class CoverageGap: func: FunctionInfo reason: str test_priority: int class SourceAnalyzer(ast.NodeVisitor): def __init__(self, module_path: str): self.module_path = module_path self.functions: List[FunctionInfo] = [] self._class_stack: List[str] = [] def visit_ClassDef(self, node): self._class_stack.append(node.name) self.generic_visit(node) self._class_stack.pop() def visit_FunctionDef(self, node): self._collect(node, False) self.generic_visit(node) def visit_AsyncFunctionDef(self, node): self._collect(node, True) self.generic_visit(node) def _collect(self, node, is_async): cls = self._class_stack[-1] if self._class_stack else None args = [a.arg for a in node.args.args if a.arg not in ("self", "cls")] has_ret = any(isinstance(c, ast.Return) and c.value for c in ast.walk(node)) raises = [] for c in ast.walk(node): if isinstance(c, ast.Raise) and c.exc: if isinstance(c.exc, ast.Call) and isinstance(c.exc.func, ast.Name): raises.append(c.exc.func.id) decos = [] for d in node.decorator_list: if isinstance(d, ast.Name): decos.append(d.id) elif isinstance(d, ast.Attribute): decos.append(d.attr) self.functions.append(FunctionInfo( name=node.name, module_path=self.module_path, class_name=cls, lineno=node.lineno, args=args, is_async=is_async, is_private=node.name.startswith("_") and not node.name.startswith("__"), is_property="property" in decos, docstring=ast.get_docstring(node), has_return=has_ret, raises=raises, decorators=decos)) def analyze_file(filepath, base_dir): module_path = os.path.relpath(filepath, base_dir) try: with open(filepath, "r", errors="replace") as f: tree = ast.parse(f.read(), filename=filepath) except (SyntaxError, UnicodeDecodeError): return [] a = SourceAnalyzer(module_path) a.visit(tree) return a.functions def find_source_files(source_dir): exclude = {"__pycache__", ".git", "venv", ".venv", "node_modules", ".tox", "build", "dist"} files = [] for root, dirs, fs in os.walk(source_dir): dirs[:] = [d for d in dirs if d not in exclude and not d.startswith(".")] for f in fs: if f.endswith(".py") and f != "__init__.py" and not f.startswith("test_"): files.append(os.path.join(root, f)) return sorted(files) def find_existing_tests(test_dir): existing = set() for root, dirs, fs in os.walk(test_dir): for f in fs: if f.startswith("test_") and f.endswith(".py"): try: with open(os.path.join(root, f)) as fh: tree = ast.parse(fh.read()) for node in ast.walk(tree): if isinstance(node, ast.FunctionDef) and node.name.startswith("test_"): existing.add(node.name) except (SyntaxError, UnicodeDecodeError): pass return existing def identify_gaps(functions, existing_tests): gaps = [] for func in functions: if func.name.startswith("__") and func.name != "__init__": continue covered = func.name in str(existing_tests) if not covered: pri = 3 if func.is_private else (1 if (func.raises or func.has_return) else 2) gaps.append(CoverageGap(func=func, reason="no test found", test_priority=pri)) gaps.sort(key=lambda g: (g.test_priority, g.func.module_path, g.func.name)) return gaps def generate_test(gap): func = gap.func lines = [] lines.append(f" # AUTO-GENERATED -- review before merging") lines.append(f" # Source: {func.module_path}:{func.lineno}") lines.append(f" # Function: {func.qualified_name}") lines.append("") mod_imp = func.module_path.replace("/", ".").replace("-", "_").replace(".py", "") call_args = [] for a in func.args: if a in ("self", "cls"): continue if "path" in a or "file" in a or "dir" in a: call_args.append(f"{a}='/tmp/test'") elif "name" in a: call_args.append(f"{a}='test'") elif "id" in a or "key" in a: call_args.append(f"{a}='test_id'") elif "message" in a or "text" in a: call_args.append(f"{a}='test msg'") elif "count" in a or "num" in a or "size" in a: call_args.append(f"{a}=1") elif "flag" in a or "enabled" in a or "verbose" in a: call_args.append(f"{a}=False") else: call_args.append(f"{a}=None") args_str = ", ".join(call_args) if func.is_async: lines.append(" @pytest.mark.asyncio") lines.append(f" def {func.test_name}(self):") lines.append(f' """Test {func.qualified_name} -- auto-generated."""') if func.class_name: lines.append(f" try:") lines.append(f" from {mod_imp} import {func.class_name}") if func.is_private: lines.append(f" pytest.skip('Private method')") elif func.is_property: lines.append(f" obj = {func.class_name}()") lines.append(f" _ = obj.{func.name}") else: if func.raises: lines.append(f" with pytest.raises(({', '.join(func.raises)})):") lines.append(f" {func.class_name}().{func.name}({args_str})") else: lines.append(f" obj = {func.class_name}()") lines.append(f" result = obj.{func.name}({args_str})") if func.has_return: lines.append(f" assert result is not None or result is None # Placeholder") lines.append(f" except ImportError:") lines.append(f" pytest.skip('Module not importable')") else: lines.append(f" try:") lines.append(f" from {mod_imp} import {func.name}") if func.is_private: lines.append(f" pytest.skip('Private function')") else: if func.raises: lines.append(f" with pytest.raises(({', '.join(func.raises)})):") lines.append(f" {func.name}({args_str})") else: lines.append(f" result = {func.name}({args_str})") if func.has_return: lines.append(f" assert result is not None or result is None # Placeholder") lines.append(f" except ImportError:") lines.append(f" pytest.skip('Module not importable')") return chr(10).join(lines) def generate_test_suite(gaps, max_tests=50): by_module = {} for gap in gaps[:max_tests]: by_module.setdefault(gap.func.module_path, []).append(gap) lines = [] lines.append('"""Auto-generated test suite -- Codebase Genome (#667).') lines.append("") lines.append("Generated by scripts/codebase_test_generator.py") lines.append("Coverage gaps identified from AST analysis.") lines.append("") lines.append("These tests are starting points. Review before merging.") lines.append('"""') lines.append("") lines.append("import pytest") lines.append("from unittest.mock import MagicMock, patch") lines.append("") lines.append("") lines.append("# AUTO-GENERATED -- DO NOT EDIT WITHOUT REVIEW") for module, mgaps in sorted(by_module.items()): safe = module.replace("/", "_").replace(".py", "").replace("-", "_") cls_name = "".join(w.title() for w in safe.split("_")) lines.append("") lines.append(f"class Test{cls_name}Generated:") lines.append(f' """Auto-generated tests for {module}."""') for gap in mgaps: lines.append("") lines.append(generate_test(gap)) lines.append("") return chr(10).join(lines) def main(): parser = argparse.ArgumentParser(description="Codebase Test Generator") parser.add_argument("--source", default=".") parser.add_argument("--output", default="tests/test_genome_generated.py") parser.add_argument("--max-tests", type=int, default=50) parser.add_argument("--dry-run", action="store_true") parser.add_argument("--include-private", action="store_true") args = parser.parse_args() source_dir = os.path.abspath(args.source) test_dir = os.path.join(source_dir, "tests") print(f"Scanning: {source_dir}") source_files = find_source_files(source_dir) print(f"Source files: {len(source_files)}") all_funcs = [] for f in source_files: all_funcs.extend(analyze_file(f, source_dir)) print(f"Functions/methods: {len(all_funcs)}") existing = find_existing_tests(test_dir) print(f"Existing tests: {len(existing)}") gaps = identify_gaps(all_funcs, existing) if not args.include_private: gaps = [g for g in gaps if not g.func.is_private] print(f"Coverage gaps: {len(gaps)}") by_pri = {1: 0, 2: 0, 3: 0} for g in gaps: by_pri[g.test_priority] += 1 print(f" High: {by_pri[1]}, Medium: {by_pri[2]}, Low: {by_pri[3]}") if args.dry_run: for g in gaps[:10]: print(f" {g.func.module_path}:{g.func.lineno} {g.func.qualified_name}") return if gaps: content = generate_test_suite(gaps, max_tests=args.max-tests if hasattr(args, 'max-tests') else args.max_tests) out = os.path.join(source_dir, args.output) os.makedirs(os.path.dirname(out), exist_ok=True) with open(out, "w") as f: f.write(content) print(f"Generated {min(len(gaps), args.max_tests)} tests -> {args.output}") else: print("No gaps found!") if __name__ == "__main__": main()