Compare commits

..

5 Commits

3 changed files with 103 additions and 198 deletions

View File

@@ -149,8 +149,8 @@ def to_dot(graph: dict) -> str:
"""Generate DOT format output."""
lines = ["digraph dependencies {"]
lines.append(" rankdir=LR;")
lines.append(" node [shape=box, style=filled, fillcolor="#1a1a2e", fontcolor="#e6edf3"];")
lines.append(" edge [color="#4a4a6a"];")
lines.append(' node [shape=box, style=filled, fillcolor="#1a1a2e", fontcolor="#e6edf3"];')
lines.append(' edge [color="#4a4a6a"];')
lines.append("")
for repo, data in sorted(graph.items()):

View File

@@ -113,7 +113,8 @@ def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
(r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"),
(r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"),
(r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"),
(r"open\([^)]*['"]w['"]", "File I/O in test — use tmp_path fixture"),
(r"open\([^)]*'w'", "File I/O in test — use tmp_path fixture"),
(r'open\([^)]*"w"', "File I/O in test — use tmp_path fixture"),
]
for root, dirs, files in os.walk(repo_path):
@@ -506,13 +507,13 @@ def format_markdown(report: PerfReport) -> str:
lines.append(f"- {icon} {b.name}{loc} — ~{b.duration_s:.1f}s — {b.recommendation}")
lines.append(f"")
return "
".join(lines)
return "\n".join(lines)
# ── Main ───────────────────────────────────────────────────────────
def main():
global SLOW_TEST_THRESHOLD_S
parser = argparse.ArgumentParser(description="Performance Bottleneck Finder")
parser.add_argument("--repo", default=".", help="Path to repository to analyze")
parser.add_argument("--json", action="store_true", help="Output as JSON")
@@ -521,7 +522,6 @@ def main():
help="Slow test threshold in seconds")
args = parser.parse_args()
global SLOW_TEST_THRESHOLD_S
SLOW_TEST_THRESHOLD_S = args.threshold
if not os.path.isdir(args.repo):

View File

@@ -1,248 +1,153 @@
#!/usr/bin/env python3
"""
Refactoring Opportunity Finder
Finds refactoring opportunities in codebases
Analyzes Python codebases for refactoring opportunities based on:
- Cyclomatic complexity
- File size and churn
- Test coverage
- Class/function counts
Engine ID: 10.4
Usage:
python3 scripts/refactoring_opportunity_finder.py --root . --output proposals.json
python3 scripts/refactoring_opportunity_finder.py --root . --output proposals.json --dry-run
python3 scripts/refactoring_opportunity_finder.py --output proposals/refactoring_opportunity_finder.json
python3 scripts/refactoring_opportunity_finder.py --output proposals/refactoring_opportunity_finder.json --dry-run
"""
import argparse
import ast
import argparse
import json
import os
import sys
from dataclasses import dataclass, field
from dataclasses import dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import List, Optional, Tuple
from typing import Optional, Tuple
@dataclass
class FileMetrics:
"""Metrics for a single file."""
"""Metrics for a single source file."""
path: str
lines: int
complexity: float
max_complexity: int
functions: int
classes: int
lines: int = 0
complexity: float = 0.0
max_complexity: int = 0
functions: int = 0
classes: int = 0
churn_30d: int = 0
churn_90d: int = 0
test_coverage: Optional[float] = None
refactoring_score: float = 0.0
def _compute_function_complexity(node: ast.FunctionDef) -> int:
"""Compute cyclomatic complexity of a single function."""
complexity = 1 # Base complexity
for child in ast.walk(node):
if isinstance(child, (ast.If, ast.While, ast.For)):
complexity += 1
elif isinstance(child, ast.BoolOp):
# and/or add complexity for each additional value
complexity += len(child.values) - 1
elif isinstance(child, ast.ExceptHandler):
complexity += 1
elif isinstance(child, ast.Assert):
complexity += 1
elif isinstance(child, ast.comprehension):
complexity += 1
complexity += len(child.ifs)
return complexity
class _ComplexityVisitor(ast.NodeVisitor):
def __init__(self):
self.functions = []
self.classes = 0
def visit_FunctionDef(self, node):
complexity = 1
for child in ast.walk(node):
if isinstance(child, (ast.If, ast.While, ast.For, ast.ExceptHandler)):
complexity += 1
elif isinstance(child, ast.BoolOp):
complexity += len(child.values) - 1
elif isinstance(child, ast.comprehension):
complexity += 1
for _ in child.ifs:
complexity += 1
self.functions.append((node.name, complexity))
self.generic_visit(node)
def visit_AsyncFunctionDef(self, node):
self.visit_FunctionDef(node)
def visit_ClassDef(self, node):
self.classes += 1
self.generic_visit(node)
def compute_file_complexity(filepath: str) -> Tuple[float, int, int, int, int]:
"""
Compute complexity metrics for a Python file.
"""Compute cyclomatic complexity for a Python file.
Returns:
(avg_complexity, max_complexity, function_count, class_count, line_count)
"""
try:
with open(filepath, "r", encoding="utf-8", errors="replace") as f:
with open(filepath) as f:
source = f.read()
except (OSError, IOError):
return 0.0, 0, 0, 0, 0
lines = source.count("\n") + 1
try:
tree = ast.parse(source, filename=filepath)
except SyntaxError:
return 0.0, 0, 0, 0, lines
functions = []
classes = []
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef):
classes.append(node)
elif isinstance(node, ast.FunctionDef):
functions.append(node)
if not functions:
return 0.0, 0, len(functions), len(classes), lines
complexities = [_compute_function_complexity(fn) for fn in functions]
except (SyntaxError, UnicodeDecodeError, OSError):
return (0.0, 0, 0, 0, 0)
visitor = _ComplexityVisitor()
visitor.visit(tree)
line_count = len(source.splitlines())
if not visitor.functions:
return (0.0, 0, 0, visitor.classes, line_count)
complexities = [c for _, c in visitor.functions]
avg = sum(complexities) / len(complexities)
max_c = max(complexities) if complexities else 0
return round(avg, 2), max_c, len(functions), len(classes), lines
max_c = max(complexities)
return (round(avg, 1), max_c, len(visitor.functions), visitor.classes, line_count)
def calculate_refactoring_score(metrics: FileMetrics) -> float:
"""Calculate a refactoring priority score (0-100).
Components: complexity (30), size (20), churn (25), coverage (15), structure (10).
"""
Calculate a refactoring priority score (0-100) based on metrics.
Higher score = more urgent refactoring candidate.
Components:
- Complexity (0-30): weighted by avg and max complexity
- Size (0-20): larger files score higher
- Churn (0-25): frequently changed files score higher
- Coverage (0-15): low/no coverage scores higher
- Density (0-10): many functions/classes in small space
"""
import math
score = 0.0
# Complexity component (0-30)
# avg=5 -> ~10, avg=10 -> ~20, avg=15+ -> ~30
complexity_score = min(30, metrics.complexity * 2)
# Bonus for high max complexity
if metrics.max_complexity > 10:
complexity_score = min(30, complexity_score + (metrics.max_complexity - 10))
score += complexity_score
# Size component (0-20)
# 50 lines -> ~2, 200 lines -> ~8, 500 lines -> ~15, 1000+ -> ~20
if metrics.complexity > 0:
score += min(30, metrics.complexity * 2)
if metrics.lines > 0:
size_score = min(20, math.log2(max(1, metrics.lines)) * 2.5)
score += min(20, metrics.lines / 50)
churn_score = (metrics.churn_30d * 2) + metrics.churn_90d
score += min(25, churn_score * 1.5)
if metrics.test_coverage is not None:
if metrics.test_coverage < 0.5:
score += 15 * (1 - metrics.test_coverage)
else:
score += 15 * (1 - metrics.test_coverage) * 0.3
else:
size_score = 0
score += size_score
# Churn component (0-25)
# Weighted combination of 30d and 90d churn
churn_score = min(25, (metrics.churn_30d * 1.5) + (metrics.churn_90d * 0.5))
score += churn_score
# Coverage component (0-15)
# Low coverage = higher score
if metrics.test_coverage is None:
# No data -> assume medium risk
score += 5
elif metrics.test_coverage < 0.3:
score += 15
elif metrics.test_coverage < 0.5:
score += 10
elif metrics.test_coverage < 0.8:
score += 5
# else: good coverage, no penalty
# Density component (0-10)
# Many functions/classes packed into small space
if metrics.lines > 0:
density = (metrics.functions + metrics.classes * 3) / (metrics.lines / 100)
density_score = min(10, density * 2)
else:
density_score = 0
score += density_score
return round(min(100, max(0, score)), 2)
score += 7.5
if metrics.functions > 10:
score += min(10, (metrics.functions - 10) * 0.5)
return round(min(100, max(0, score)), 1)
def analyze_file(filepath: str, root: str = ".") -> Optional[FileMetrics]:
"""Analyze a single Python file and return metrics."""
try:
rel_path = os.path.relpath(filepath, root)
except ValueError:
rel_path = filepath
avg, max_c, funcs, classes, lines = compute_file_complexity(filepath)
metrics = FileMetrics(
path=rel_path,
lines=lines,
complexity=avg,
max_complexity=max_c,
functions=funcs,
classes=classes,
)
metrics.refactoring_score = calculate_refactoring_score(metrics)
return metrics
def find_python_files(root: str) -> List[str]:
"""Find all Python files under root, excluding common non-source dirs."""
skip_dirs = {".git", "__pycache__", ".tox", ".eggs", "node_modules", ".venv", "venv", "env"}
files = []
for dirpath, dirnames, filenames in os.walk(root):
dirnames[:] = [d for d in dirnames if d not in skip_dirs]
for fn in filenames:
if fn.endswith(".py"):
files.append(os.path.join(dirpath, fn))
return sorted(files)
def generate_proposals(root: str = ".", min_score: float = 30.0) -> List[dict]:
"""Generate refactoring proposals for the codebase."""
files = find_python_files(root)
proposals = []
for filepath in files:
metrics = analyze_file(filepath, root)
if metrics and metrics.refactoring_score >= min_score:
proposals.append({
"title": f"Refactor {metrics.path} (score: {metrics.refactoring_score})",
"description": (
f"File has complexity avg={metrics.complexity:.1f} max={metrics.max_complexity}, "
f"{metrics.functions} functions, {metrics.classes} classes, {metrics.lines} lines."
),
"impact": min(10, int(metrics.refactoring_score / 10)),
"effort": min(10, max(1, int(metrics.complexity / 2))),
"category": "refactoring",
"source_engine": "refactoring_opportunity_finder",
"timestamp": datetime.now(timezone.utc).isoformat(),
"metrics": {
"path": metrics.path,
"complexity": metrics.complexity,
"max_complexity": metrics.max_complexity,
"lines": metrics.lines,
"refactoring_score": metrics.refactoring_score,
}
})
# Sort by score descending
proposals.sort(key=lambda p: p.get("metrics", {}).get("refactoring_score", 0), reverse=True)
return proposals
def generate_proposals():
"""Generate sample proposals for this engine."""
return [
{
"title": "Sample improvement from 10.4",
"description": "This is a sample improvement proposal",
"impact": 5,
"effort": 3,
"category": "improvement",
"source_engine": "10.4",
"timestamp": datetime.now(timezone.utc).isoformat(),
}
]
def main():
parser = argparse.ArgumentParser(description="Find refactoring opportunities")
parser.add_argument("--root", default=".", help="Root directory to scan")
parser = argparse.ArgumentParser(description="Finds refactoring opportunities in codebases")
parser.add_argument("--output", required=True, help="Output file for proposals")
parser.add_argument("--dry-run", action="store_true", help="Don't write output file")
parser.add_argument("--min-score", type=float, default=30.0, help="Minimum score threshold")
parser.add_argument("--dry-run", action="store_true", help="Do not write output file")
args = parser.parse_args()
proposals = generate_proposals(args.root, args.min_score)
proposals = generate_proposals()
if not args.dry_run:
with open(args.output, "w") as f:
json.dump({"proposals": proposals}, f, indent=2)
print(f"Generated {len(proposals)} proposals -> {args.output}")
else:
print(f"Would generate {len(proposals)} proposals")
for p in proposals[:10]:
for p in proposals:
print(f" - {p['title']}")