fix(#211): remove corrupted file

2026-04-21 11:19:07 +00:00
parent 0e6d5bffc8
commit bd8e044fb8
1 changed files with 0 additions and 551 deletions
--- a/scripts/perf_bottleneck_finder.py
+++ b/scripts/perf_bottleneck_finder.py
@@ -1,551 +0,0 @@
-#!/usr/bin/env python3
-"""
-Performance Bottleneck Finder — Identify slow tests, builds, and CI steps.
-
-Analyzes:
-  1. Pytest output for slow tests
-  2. Build logs for slow steps
-  3. CI workflow durations
-  4. File system for large/slow artifacts
-
-Usage:
-    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo
-    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --json
-    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --report metrics/perf_report.md
-
-Weekly cron:
-    0 9 * * 1 cd /path/to/compounding-intelligence && python3 scripts/perf_bottleneck_finder.py --repo /path/to/target --report metrics/perf_report.md
-"""
-
-import argparse
-import json
-import os
-import re
-import subprocess
-import sys
-from collections import defaultdict
-from dataclasses import dataclass, field, asdict
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-
-# ── Configuration ──────────────────────────────────────────────────
-
-SLOW_TEST_THRESHOLD_S = 2.0      # Tests slower than this are flagged
-SLOW_BUILD_STEP_THRESHOLD_S = 10.0
-TOP_N_BOTTLENECKS = 10           # Report top N bottlenecks
-PYTEST_DURATIONS_COUNT = 20      # Number of slow tests to collect
-LOG_EXTENSIONS = {".log", ".txt"}
-
-
-@dataclass
-class Bottleneck:
-    """A single performance bottleneck."""
-    category: str          # "test", "build", "ci", "artifact", "import"
-    name: str              # What's slow
-    duration_s: float      # How long it takes
-    severity: str          # "critical", "warning", "info"
-    recommendation: str    # How to fix
-    file_path: Optional[str] = None
-    line_number: Optional[int] = None
-
-
-@dataclass
-class PerfReport:
-    """Full performance report."""
-    timestamp: str
-    repo_path: str
-    bottlenecks: List[Bottleneck] = field(default_factory=list)
-    summary: Dict[str, Any] = field(default_factory=dict)
-    test_stats: Dict[str, Any] = field(default_factory=dict)
-    build_stats: Dict[str, Any] = field(default_factory=dict)
-    ci_stats: Dict[str, Any] = field(default_factory=dict)
-
-    def to_dict(self) -> dict:
-        d = asdict(self)
-        return d
-
-
-# ── Test Analysis ──────────────────────────────────────────────────
-
-def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]:
-    """Run pytest --durations and parse slow tests."""
-    bottlenecks = []
-
-    # Try to run pytest with durations
-    try:
-        result = subprocess.run(
-            ["python3", "-m", "pytest", "--co", "-q", "--durations=0"],
-            cwd=repo_path, capture_output=True, text=True, timeout=30
-        )
-        # If tests exist, try to get durations from last run
-        durations_file = os.path.join(repo_path, ".pytest_cache", "v", "cache", "durations")
-        if os.path.exists(durations_file):
-            with open(durations_file) as f:
-                for line in f:
-                    parts = line.strip().split()
-                    if len(parts) >= 2:
-                        try:
-                            duration = float(parts[0])
-                            test_name = " ".join(parts[1:])
-                            if duration > SLOW_TEST_THRESHOLD_S:
-                                severity = "critical" if duration > 10 else "warning"
-                                bottlenecks.append(Bottleneck(
-                                    category="test",
-                                    name=test_name,
-                                    duration_s=duration,
-                                    severity=severity,
-                                    recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O, using fixtures, or marking with @pytest.mark.slow."
-                                ))
-                        except ValueError:
-                            continue
-    except (subprocess.TimeoutExpired, FileNotFoundError):
-        pass
-
-    return bottlenecks
-
-
-def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
-    """Scan test files for patterns that indicate slow tests."""
-    bottlenecks = []
-    test_patterns = [
-        (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"),
-        (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"),
-        (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"),
-        ("open\\([^)]*)[\047\042]w[\047\042]", "File I/O in test — use tmp_path fixture"),
-    ]
-
-    for root, dirs, files in os.walk(repo_path):
-        # Skip hidden and cache dirs
-        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules', '.git'))]
-
-        for fname in files:
-            if not (fname.startswith("test_") or fname.endswith("_test.py")):
-                continue
-            if not fname.endswith(".py"):
-                continue
-
-            fpath = os.path.join(root, fname)
-            rel_path = os.path.relpath(fpath, repo_path)
-
-            try:
-                with open(fpath) as f:
-                    lines = f.readlines()
-            except (PermissionError, UnicodeDecodeError):
-                continue
-
-            for i, line in enumerate(lines):
-                for pattern, recommendation in test_patterns:
-                    match = re.search(pattern, line)
-                    if match:
-                        duration = 1.0  # Default estimate
-                        if "sleep" in pattern:
-                            try:
-                                duration = float(match.group(1))
-                            except (ValueError, IndexError):
-                                duration = 1.0
-                        elif "timeout" in pattern:
-                            try:
-                                duration = float(match.group(1))
-                            except (ValueError, IndexError):
-                                duration = 10.0
-                        else:
-                            duration = 2.0  # Estimated
-
-                        bottlenecks.append(Bottleneck(
-                            category="test",
-                            name=f"{rel_path}:{i+1}",
-                            duration_s=duration,
-                            severity="warning" if duration < 5 else "critical",
-                            recommendation=recommendation,
-                            file_path=rel_path,
-                            line_number=i + 1
-                        ))
-
-    return bottlenecks
-
-
-# ── Build Analysis ─────────────────────────────────────────────────
-
-def analyze_build_artifacts(repo_path: str) -> List[Bottleneck]:
-    """Find large build artifacts that slow down builds."""
-    bottlenecks = []
-    large_dirs = {
-        "node_modules": "Consider using npm ci --production or yarn --production",
-        "__pycache__": "Consider .gitignore and cleaning before builds",
-        ".tox": "Consider caching tox environments",
-        ".pytest_cache": "Consider cleaning between CI runs",
-        "dist": "Check if dist/ artifacts are being rebuilt unnecessarily",
-        "build": "Check if build/ artifacts are being rebuilt unnecessarily",
-        ".next": "Next.js cache — consider incremental builds",
-        "venv": "Virtual env in repo — move outside or use Docker",
-    }
-
-    for dirname, recommendation in large_dirs.items():
-        dirpath = os.path.join(repo_path, dirname)
-        if os.path.isdir(dirpath):
-            total_size = 0
-            file_count = 0
-            for root, dirs, files in os.walk(dirpath):
-                for f in files:
-                    try:
-                        fpath = os.path.join(root, f)
-                        total_size += os.path.getsize(fpath)
-                        file_count += 1
-                    except OSError:
-                        pass
-
-            if total_size > 10 * 1024 * 1024:  # > 10MB
-                size_mb = total_size / (1024 * 1024)
-                bottlenecks.append(Bottleneck(
-                    category="build",
-                    name=f"{dirname}/ ({size_mb:.1f}MB, {file_count} files)",
-                    duration_s=size_mb * 0.5,  # Rough estimate
-                    severity="critical" if size_mb > 100 else "warning",
-                    recommendation=recommendation
-                ))
-
-    return bottlenecks
-
-
-def analyze_makefile_targets(repo_path: str) -> List[Bottleneck]:
-    """Analyze Makefile for potentially slow targets."""
-    bottlenecks = []
-    makefiles = []
-
-    for root, dirs, files in os.walk(repo_path):
-        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__'))]
-        for f in files:
-            if f in ("Makefile", "makefile", "GNUmakefile"):
-                makefiles.append(os.path.join(root, f))
-
-    slow_patterns = [
-        (r"pip install", "pip install without --no-deps or constraints"),
-        (r"npm install(?!.*--production)", "npm install without --production flag"),
-        (r"docker build", "Docker build — consider multi-stage and layer caching"),
-        (r"pytest(?!.*-x|--maxfail)", "pytest without early exit on failure"),
-        (r"mypy|mypy --strict", "Type checking — consider incremental mode"),
-    ]
-
-    for mfile in makefiles:
-        rel_path = os.path.relpath(mfile, repo_path)
-        try:
-            with open(mfile) as f:
-                content = f.read()
-        except (PermissionError, UnicodeDecodeError):
-            continue
-
-        for pattern, recommendation in slow_patterns:
-            if re.search(pattern, content):
-                bottlenecks.append(Bottleneck(
-                    category="build",
-                    name=f"{rel_path}: {pattern}",
-                    duration_s=5.0,
-                    severity="info",
-                    recommendation=recommendation,
-                    file_path=rel_path
-                ))
-
-    return bottlenecks
-
-
-# ── CI Analysis ────────────────────────────────────────────────────
-
-def analyze_github_actions(repo_path: str) -> List[Bottleneck]:
-    """Analyze GitHub Actions workflow files for inefficiencies."""
-    bottlenecks = []
-    workflow_dir = os.path.join(repo_path, ".github", "workflows")
-
-    if not os.path.isdir(workflow_dir):
-        return bottlenecks
-
-    slow_patterns = [
-        (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies between runs"),
-        (r"npm install", 2, "Use npm ci instead of npm install for reproducible builds"),
-        (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir or use pip cache action"),
-        (r"docker build(?!.*--cache-from)", 5, "Use Docker layer caching"),
-        (r"python -m pytest(?!.*-n|--numprocesses)", 3, "Consider pytest-xdist for parallel test execution"),
-    ]
-
-    for fname in os.listdir(workflow_dir):
-        if not fname.endswith(('.yml', '.yaml')):
-            continue
-
-        fpath = os.path.join(workflow_dir, fname)
-        try:
-            with open(fpath) as f:
-                content = f.read()
-        except (PermissionError, UnicodeDecodeError):
-            continue
-
-        for pattern, est_savings, recommendation in slow_patterns:
-            if re.search(pattern, content):
-                bottlenecks.append(Bottleneck(
-                    category="ci",
-                    name=f"{fname}: {pattern}",
-                    duration_s=est_savings,
-                    severity="info",
-                    recommendation=recommendation,
-                    file_path=f".github/workflows/{fname}"
-                ))
-
-    return bottlenecks
-
-
-def analyze_gitea_ci(repo_path: str) -> List[Bottleneck]:
-    """Analyze Gitea/Drone CI config files."""
-    bottlenecks = []
-    ci_files = [".gitea/workflows", ".drone.yml", ".woodpecker.yml"]
-
-    for ci_path in ci_files:
-        full_path = os.path.join(repo_path, ci_path)
-        if os.path.isfile(full_path):
-            try:
-                with open(full_path) as f:
-                    content = f.read()
-            except (PermissionError, UnicodeDecodeError):
-                continue
-
-            if "pip install" in content and "--cache-dir" not in content:
-                bottlenecks.append(Bottleneck(
-                    category="ci",
-                    name=f"{ci_path}: pip without cache",
-                    duration_s=5.0,
-                    severity="warning",
-                    recommendation="Add --cache-dir or mount pip cache volume",
-                    file_path=ci_path
-                ))
-
-        elif os.path.isdir(full_path):
-            for fname in os.listdir(full_path):
-                if not fname.endswith(('.yml', '.yaml')):
-                    continue
-                fpath = os.path.join(full_path, fname)
-                try:
-                    with open(fpath) as f:
-                        content = f.read()
-                except (PermissionError, UnicodeDecodeError):
-                    continue
-
-                if "pip install" in content and "--cache-dir" not in content:
-                    bottlenecks.append(Bottleneck(
-                        category="ci",
-                        name=f"{ci_path}/{fname}: pip without cache",
-                        duration_s=5.0,
-                        severity="warning",
-                        recommendation="Add --cache-dir or mount pip cache volume",
-                        file_path=f"{ci_path}/{fname}"
-                    ))
-
-    return bottlenecks
-
-
-# ── Import Analysis ────────────────────────────────────────────────
-
-def find_slow_imports(repo_path: str) -> List[Bottleneck]:
-    """Find Python files with heavy import chains."""
-    bottlenecks = []
-    heavy_imports = {
-        "pandas": 0.5,
-        "numpy": 0.3,
-        "torch": 2.0,
-        "tensorflow": 3.0,
-        "scipy": 0.5,
-        "matplotlib": 0.8,
-        "sklearn": 0.5,
-        "transformers": 1.5,
-    }
-
-    for root, dirs, files in os.walk(repo_path):
-        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules'))]
-        for fname in files:
-            if not fname.endswith(".py"):
-                continue
-
-            fpath = os.path.join(root, fname)
-            rel_path = os.path.relpath(fpath, repo_path)
-
-            try:
-                with open(fpath) as f:
-                    lines = f.readlines()
-            except (PermissionError, UnicodeDecodeError):
-                continue
-
-            for i, line in enumerate(lines):
-                stripped = line.strip()
-                if stripped.startswith("import ") or stripped.startswith("from "):
-                    for heavy, est_time in heavy_imports.items():
-                        if heavy in stripped:
-                            bottlenecks.append(Bottleneck(
-                                category="import",
-                                name=f"{rel_path}:{i+1}: import {heavy}",
-                                duration_s=est_time,
-                                severity="info" if est_time < 1.0 else "warning",
-                                recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import or conditional import.",
-                                file_path=rel_path,
-                                line_number=i + 1
-                            ))
-
-    return bottlenecks
-
-
-# ── Report Generation ──────────────────────────────────────────────
-
-def severity_sort_key(b: Bottleneck) -> Tuple[int, float]:
-    """Sort by severity then duration."""
-    sev_order = {"critical": 0, "warning": 1, "info": 2}
-    return (sev_order.get(b.severity, 3), -b.duration_s)
-
-
-def generate_report(repo_path: str) -> PerfReport:
-    """Run all analyses and generate a performance report."""
-    report = PerfReport(
-        timestamp=datetime.now(timezone.utc).isoformat(),
-        repo_path=os.path.abspath(repo_path)
-    )
-
-    # Collect all bottlenecks
-    all_bottlenecks = []
-
-    print("Scanning for slow tests (pytest cache)...")
-    all_bottlenecks.extend(find_slow_tests_pytest(repo_path))
-
-    print("Scanning for slow test patterns...")
-    all_bottlenecks.extend(find_slow_tests_by_scan(repo_path))
-
-    print("Analyzing build artifacts...")
-    all_bottlenecks.extend(analyze_build_artifacts(repo_path))
-
-    print("Analyzing Makefiles...")
-    all_bottlenecks.extend(analyze_makefile_targets(repo_path))
-
-    print("Analyzing CI workflows...")
-    all_bottlenecks.extend(analyze_github_actions(repo_path))
-    all_bottlenecks.extend(analyze_gitea_ci(repo_path))
-
-    print("Scanning for heavy imports...")
-    all_bottlenecks.extend(find_slow_imports(repo_path))
-
-    # Sort by severity and duration
-    all_bottlenecks.sort(key=severity_sort_key)
-    report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2]  # Keep more for stats
-
-    # Compute summary
-    by_category = defaultdict(list)
-    for b in all_bottlenecks:
-        by_category[b.category].append(b)
-
-    report.summary = {
-        "total_bottlenecks": len(all_bottlenecks),
-        "critical": sum(1 for b in all_bottlenecks if b.severity == "critical"),
-        "warning": sum(1 for b in all_bottlenecks if b.severity == "warning"),
-        "info": sum(1 for b in all_bottlenecks if b.severity == "info"),
-        "estimated_total_slowdown_s": sum(b.duration_s for b in all_bottlenecks),
-        "by_category": {cat: len(items) for cat, items in by_category.items()},
-    }
-
-    report.test_stats = {
-        "slow_tests": len(by_category.get("test", [])),
-        "total_estimated_s": sum(b.duration_s for b in by_category.get("test", [])),
-    }
-
-    report.build_stats = {
-        "build_issues": len(by_category.get("build", [])),
-        "total_estimated_s": sum(b.duration_s for b in by_category.get("build", [])),
-    }
-
-    report.ci_stats = {
-        "ci_issues": len(by_category.get("ci", [])),
-        "total_estimated_s": sum(b.duration_s for b in by_category.get("ci", [])),
-    }
-
-    return report
-
-
-def format_markdown(report: PerfReport) -> str:
-    """Format report as markdown."""
-    lines = []
-    lines.append(f"# Performance Bottleneck Report")
-    lines.append(f"")
-    lines.append(f"Generated: {report.timestamp}")
-    lines.append(f"Repository: {report.repo_path}")
-    lines.append(f"")
-
-    # Summary
-    s = report.summary
-    lines.append(f"## Summary")
-    lines.append(f"")
-    lines.append(f"- **Total bottlenecks:** {s['total_bottlenecks']}")
-    lines.append(f"- **Critical:** {s['critical']} | **Warning:** {s['warning']} | **Info:** {s['info']}")
-    lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s")
-    lines.append(f"- **By category:** {', '.join(f'{k}: {v}' for k, v in s['by_category'].items())}")
-    lines.append(f"")
-
-    # Top bottlenecks
-    lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks")
-    lines.append(f"")
-
-    for i, b in enumerate(report.bottlenecks[:TOP_N_BOTTLENECKS], 1):
-        icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪")
-        loc = f" ({b.file_path}:{b.line_number})" if b.file_path else ""
-        lines.append(f"{i}. {icon} **{b.category}** — {b.name}{loc}")
-        lines.append(f"   - Duration: ~{b.duration_s:.1f}s | Severity: {b.severity}")
-        lines.append(f"   - Fix: {b.recommendation}")
-        lines.append(f"")
-
-    # Category breakdowns
-    for cat in ["test", "build", "ci", "import"]:
-        items = [b for b in report.bottlenecks if b.category == cat]
-        if items:
-            lines.append(f"## {cat.title()} Bottlenecks")
-            lines.append(f"")
-            for b in items:
-                icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪")
-                loc = f" ({b.file_path}:{b.line_number})" if b.file_path else ""
-                lines.append(f"- {icon} {b.name}{loc} — ~{b.duration_s:.1f}s — {b.recommendation}")
-            lines.append(f"")
-
-    return "
-".join(lines)
-
-
-# ── Main ───────────────────────────────────────────────────────────
-
-def main():
-    parser = argparse.ArgumentParser(description="Performance Bottleneck Finder")
-    parser.add_argument("--repo", default=".", help="Path to repository to analyze")
-    parser.add_argument("--json", action="store_true", help="Output as JSON")
-    parser.add_argument("--report", help="Write markdown report to file")
-    parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S,
-                        help="Slow test threshold in seconds")
-    args = parser.parse_args()
-
-    global SLOW_TEST_THRESHOLD_S
-    SLOW_TEST_THRESHOLD_S = args.threshold
-
-    if not os.path.isdir(args.repo):
-        print(f"Error: {args.repo} is not a directory", file=sys.stderr)
-        sys.exit(1)
-
-    report = generate_report(args.repo)
-
-    if args.json:
-        print(json.dumps(report.to_dict(), indent=2))
-    else:
-        md = format_markdown(report)
-        if args.report:
-            os.makedirs(os.path.dirname(args.report) or ".", exist_ok=True)
-            with open(args.report, "w") as f:
-                f.write(md)
-            print(f"Report written to {args.report}")
-        else:
-            print(md)
-
-    # Exit code: 1 if critical bottlenecks found
-    if report.summary.get("critical", 0) > 0:
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()