From b172e720e47db89cd7ad05b19387f206ce2f4ff0 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:30:58 +0000 Subject: [PATCH] =?UTF-8?q?feat:=20implement=20refactoring=20opportunity?= =?UTF-8?q?=20finder=20=E2=80=94=20AST=20complexity=20+=20scoring=20(#210)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/refactoring_opportunity_finder.py | 150 +++++++++++++++++----- 1 file changed, 120 insertions(+), 30 deletions(-) diff --git a/scripts/refactoring_opportunity_finder.py b/scripts/refactoring_opportunity_finder.py index ecd33b0..ad25a0f 100755 --- a/scripts/refactoring_opportunity_finder.py +++ b/scripts/refactoring_opportunity_finder.py @@ -1,54 +1,144 @@ #!/usr/bin/env python3 """ -Finds refactoring opportunities in codebases - -Engine ID: 10.4 - -Usage: - python3 scripts/refactoring_opportunity_finder.py --output proposals/refactoring_opportunity_finder.json - python3 scripts/refactoring_opportunity_finder.py --output proposals/refactoring_opportunity_finder.json --dry-run +Refactoring Opportunity Finder — Engine 10.4 """ import argparse +import ast import json +import os import sys +from dataclasses import dataclass, asdict from datetime import datetime, timezone +from typing import List, Optional, Tuple -def generate_proposals(): - """Generate sample proposals for this engine.""" - # TODO: Implement actual proposal generation logic - return [ - { - "title": f"Sample improvement from 10.4", - "description": "This is a sample improvement proposal", - "impact": 5, - "effort": 3, - "category": "improvement", - "source_engine": "10.4", - "timestamp": datetime.now(timezone.utc).isoformat() - } - ] +@dataclass +class FileMetrics: + path: str + lines: int + complexity: float + max_complexity: int + functions: int + classes: int + churn_30d: int = 0 + churn_90d: int = 0 + test_coverage: Optional[float] = None + refactoring_score: float = 0.0 + + +class ComplexityVisitor(ast.NodeVisitor): + def __init__(self): + self.functions = [] + self.classes = 0 + + def visit_FunctionDef(self, node): + complexity = 1 + for child in ast.walk(node): + if isinstance(child, (ast.If, ast.While, ast.For)): + complexity += 1 + elif isinstance(child, ast.BoolOp): + complexity += len(child.values) - 1 + elif isinstance(child, ast.ExceptHandler): + complexity += 1 + elif isinstance(child, ast.Assert): + complexity += 1 + self.functions.append((node.name, complexity, node.lineno)) + self.generic_visit(node) + + def visit_AsyncFunctionDef(self, node): + self.visit_FunctionDef(node) + + def visit_ClassDef(self, node): + self.classes += 1 + self.generic_visit(node) + + +def compute_file_complexity(filepath): + try: + with open(filepath) as f: + source = f.read() + except (FileNotFoundError, IsADirectoryError, PermissionError): + return (0.0, 0, 0, 0, 0) + try: + tree = ast.parse(source) + except SyntaxError: + return (0.0, 0, 0, 0, 0) + lines = source.count("\n") + 1 if source.strip() else 0 + visitor = ComplexityVisitor() + visitor.visit(tree) + if not visitor.functions: + return (0.0, 0, 0, visitor.classes, lines) + complexities = [c for _, c, _ in visitor.functions] + avg = sum(complexities) / len(complexities) + max_c = max(complexities) + return (round(avg, 1), max_c, len(visitor.functions), visitor.classes, lines) + + +def calculate_refactoring_score(metrics): + score = 0.0 + complexity_score = min(40, metrics.complexity * 4) + if metrics.max_complexity > 10: + complexity_score = min(40, complexity_score + (metrics.max_complexity - 10)) + score += complexity_score + if metrics.lines <= 0: + pass + elif metrics.lines <= 100: + score += metrics.lines * 0.1 + elif metrics.lines <= 500: + score += 10 + (metrics.lines - 100) * 0.0125 + else: + score += min(20, 15 + (metrics.lines - 500) * 0.01) + churn_score = (metrics.churn_30d * 2) + (metrics.churn_90d * 0.5) + score += min(30, churn_score) + if metrics.test_coverage is None: + score += 5 + elif metrics.test_coverage < 0.3: + score += 10 + elif metrics.test_coverage < 0.6: + score += 7 + elif metrics.test_coverage < 0.8: + score += 4 + else: + score += 1 + return round(min(100, max(0, score)), 1) + + +def generate_proposals(repo_path=".", threshold=30.0): + proposals = [] + for root, dirs, files in os.walk(repo_path): + dirs[:] = [d for d in dirs if not d.startswith((".", "__pycache__", "node_modules", ".git", "venv"))] + for fname in files: + if not fname.endswith(".py"): + continue + filepath = os.path.join(root, fname) + relpath = os.path.relpath(filepath, repo_path) + avg, max_c, funcs, classes, lines = compute_file_complexity(filepath) + if funcs == 0 and classes == 0: + continue + metrics = FileMetrics(path=relpath, lines=lines, complexity=avg, max_complexity=max_c, functions=funcs, classes=classes) + score = calculate_refactoring_score(metrics) + metrics.refactoring_score = score + if score >= threshold: + proposals.append({"title": f"Refactor {relpath} (score: {score})", "impact": min(10, int(score / 10)), "effort": min(10, max(1, funcs // 3)), "category": "refactoring", "source_engine": "10.4", "timestamp": datetime.now(timezone.utc).isoformat(), "metrics": asdict(metrics)}) + return sorted(proposals, key=lambda p: p.get("metrics", {}).get("refactoring_score", 0), reverse=True) def main(): - parser = argparse.ArgumentParser(description="Finds refactoring opportunities in codebases") - parser.add_argument("--output", required=True, help="Output file for proposals") - parser.add_argument("--dry-run", action="store_true", help="Don't write output file") - + parser = argparse.ArgumentParser(description="Find refactoring opportunities") + parser.add_argument("--repo", default=".") + parser.add_argument("--output", required=True) + parser.add_argument("--dry-run", action="store_true") + parser.add_argument("--threshold", type=float, default=30.0) args = parser.parse_args() - - proposals = generate_proposals() - + proposals = generate_proposals(args.repo, args.threshold) if not args.dry_run: with open(args.output, "w") as f: json.dump({"proposals": proposals}, f, indent=2) print(f"Generated {len(proposals)} proposals -> {args.output}") else: - print(f"Would generate {len(proposals)} proposals") for p in proposals: print(f" - {p['title']}") - if __name__ == "__main__": main()