From 5877f0ea17e016656c393e79656760a4bfb6e005 Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexander@alexanderwhitestone.com>
Date: Tue, 21 Apr 2026 11:15:24 +0000
Subject: [PATCH 1/5] =?UTF-8?q?fix(#211):=20fix=20regex=20syntax=20error?=
 =?UTF-8?q?=20in=20test=5Fpatterns=20=E2=80=94=20raw=20string=20quote=20es?=
 =?UTF-8?q?caping?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/perf_bottleneck_finder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py
index e40ad6a..face0b4 100644
--- a/scripts/perf_bottleneck_finder.py
+++ b/scripts/perf_bottleneck_finder.py
@@ -113,7 +113,7 @@ def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
         (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"),
         (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"),
         (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"),
-        (r"open\([^)]*['"]w['"]", "File I/O in test — use tmp_path fixture"),
+        (r'open\\([^)]*)[\'"\"]w[\'"\"]', "File I/O in test — use tmp_path fixture"),
     ]
 
     for root, dirs, files in os.walk(repo_path):
-- 
2.43.0


From 0e6d5bffc8271d7b2c9fda9736c066eb1a7526b6 Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexander@alexanderwhitestone.com>
Date: Tue, 21 Apr 2026 11:17:37 +0000
Subject: [PATCH 2/5] =?UTF-8?q?fix(#211):=20fix=20regex=20string=20escapin?=
 =?UTF-8?q?g=20=E2=80=94=20use=20non-raw=20string=20with=20octal=20escapes?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/perf_bottleneck_finder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py
index face0b4..6d43f0e 100644
--- a/scripts/perf_bottleneck_finder.py
+++ b/scripts/perf_bottleneck_finder.py
@@ -113,7 +113,7 @@ def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
         (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"),
         (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"),
         (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"),
-        (r'open\\([^)]*)[\'"\"]w[\'"\"]', "File I/O in test — use tmp_path fixture"),
+        ("open\\([^)]*)[\047\042]w[\047\042]", "File I/O in test — use tmp_path fixture"),
     ]
 
     for root, dirs, files in os.walk(repo_path):
-- 
2.43.0


From bd8e044fb841574df2f530588edffd8197ad1ee6 Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexander@alexanderwhitestone.com>
Date: Tue, 21 Apr 2026 11:19:07 +0000
Subject: [PATCH 3/5] fix(#211): remove corrupted file

---
 scripts/perf_bottleneck_finder.py | 551 ------------------------------
 1 file changed, 551 deletions(-)
 delete mode 100644 scripts/perf_bottleneck_finder.py

diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py
deleted file mode 100644
index 6d43f0e..0000000
--- a/scripts/perf_bottleneck_finder.py
+++ /dev/null
@@ -1,551 +0,0 @@
-#!/usr/bin/env python3
-"""
-Performance Bottleneck Finder — Identify slow tests, builds, and CI steps.
-
-Analyzes:
-  1. Pytest output for slow tests
-  2. Build logs for slow steps
-  3. CI workflow durations
-  4. File system for large/slow artifacts
-
-Usage:
-    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo
-    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --json
-    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --report metrics/perf_report.md
-
-Weekly cron:
-    0 9 * * 1 cd /path/to/compounding-intelligence && python3 scripts/perf_bottleneck_finder.py --repo /path/to/target --report metrics/perf_report.md
-"""
-
-import argparse
-import json
-import os
-import re
-import subprocess
-import sys
-from collections import defaultdict
-from dataclasses import dataclass, field, asdict
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-
-# ── Configuration ──────────────────────────────────────────────────
-
-SLOW_TEST_THRESHOLD_S = 2.0      # Tests slower than this are flagged
-SLOW_BUILD_STEP_THRESHOLD_S = 10.0
-TOP_N_BOTTLENECKS = 10           # Report top N bottlenecks
-PYTEST_DURATIONS_COUNT = 20      # Number of slow tests to collect
-LOG_EXTENSIONS = {".log", ".txt"}
-
-
-@dataclass
-class Bottleneck:
-    """A single performance bottleneck."""
-    category: str          # "test", "build", "ci", "artifact", "import"
-    name: str              # What's slow
-    duration_s: float      # How long it takes
-    severity: str          # "critical", "warning", "info"
-    recommendation: str    # How to fix
-    file_path: Optional[str] = None
-    line_number: Optional[int] = None
-
-
-@dataclass
-class PerfReport:
-    """Full performance report."""
-    timestamp: str
-    repo_path: str
-    bottlenecks: List[Bottleneck] = field(default_factory=list)
-    summary: Dict[str, Any] = field(default_factory=dict)
-    test_stats: Dict[str, Any] = field(default_factory=dict)
-    build_stats: Dict[str, Any] = field(default_factory=dict)
-    ci_stats: Dict[str, Any] = field(default_factory=dict)
-
-    def to_dict(self) -> dict:
-        d = asdict(self)
-        return d
-
-
-# ── Test Analysis ──────────────────────────────────────────────────
-
-def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]:
-    """Run pytest --durations and parse slow tests."""
-    bottlenecks = []
-
-    # Try to run pytest with durations
-    try:
-        result = subprocess.run(
-            ["python3", "-m", "pytest", "--co", "-q", "--durations=0"],
-            cwd=repo_path, capture_output=True, text=True, timeout=30
-        )
-        # If tests exist, try to get durations from last run
-        durations_file = os.path.join(repo_path, ".pytest_cache", "v", "cache", "durations")
-        if os.path.exists(durations_file):
-            with open(durations_file) as f:
-                for line in f:
-                    parts = line.strip().split()
-                    if len(parts) >= 2:
-                        try:
-                            duration = float(parts[0])
-                            test_name = " ".join(parts[1:])
-                            if duration > SLOW_TEST_THRESHOLD_S:
-                                severity = "critical" if duration > 10 else "warning"
-                                bottlenecks.append(Bottleneck(
-                                    category="test",
-                                    name=test_name,
-                                    duration_s=duration,
-                                    severity=severity,
-                                    recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O, using fixtures, or marking with @pytest.mark.slow."
-                                ))
-                        except ValueError:
-                            continue
-    except (subprocess.TimeoutExpired, FileNotFoundError):
-        pass
-
-    return bottlenecks
-
-
-def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
-    """Scan test files for patterns that indicate slow tests."""
-    bottlenecks = []
-    test_patterns = [
-        (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"),
-        (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"),
-        (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"),
-        ("open\\([^)]*)[\047\042]w[\047\042]", "File I/O in test — use tmp_path fixture"),
-    ]
-
-    for root, dirs, files in os.walk(repo_path):
-        # Skip hidden and cache dirs
-        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules', '.git'))]
-
-        for fname in files:
-            if not (fname.startswith("test_") or fname.endswith("_test.py")):
-                continue
-            if not fname.endswith(".py"):
-                continue
-
-            fpath = os.path.join(root, fname)
-            rel_path = os.path.relpath(fpath, repo_path)
-
-            try:
-                with open(fpath) as f:
-                    lines = f.readlines()
-            except (PermissionError, UnicodeDecodeError):
-                continue
-
-            for i, line in enumerate(lines):
-                for pattern, recommendation in test_patterns:
-                    match = re.search(pattern, line)
-                    if match:
-                        duration = 1.0  # Default estimate
-                        if "sleep" in pattern:
-                            try:
-                                duration = float(match.group(1))
-                            except (ValueError, IndexError):
-                                duration = 1.0
-                        elif "timeout" in pattern:
-                            try:
-                                duration = float(match.group(1))
-                            except (ValueError, IndexError):
-                                duration = 10.0
-                        else:
-                            duration = 2.0  # Estimated
-
-                        bottlenecks.append(Bottleneck(
-                            category="test",
-                            name=f"{rel_path}:{i+1}",
-                            duration_s=duration,
-                            severity="warning" if duration < 5 else "critical",
-                            recommendation=recommendation,
-                            file_path=rel_path,
-                            line_number=i + 1
-                        ))
-
-    return bottlenecks
-
-
-# ── Build Analysis ─────────────────────────────────────────────────
-
-def analyze_build_artifacts(repo_path: str) -> List[Bottleneck]:
-    """Find large build artifacts that slow down builds."""
-    bottlenecks = []
-    large_dirs = {
-        "node_modules": "Consider using npm ci --production or yarn --production",
-        "__pycache__": "Consider .gitignore and cleaning before builds",
-        ".tox": "Consider caching tox environments",
-        ".pytest_cache": "Consider cleaning between CI runs",
-        "dist": "Check if dist/ artifacts are being rebuilt unnecessarily",
-        "build": "Check if build/ artifacts are being rebuilt unnecessarily",
-        ".next": "Next.js cache — consider incremental builds",
-        "venv": "Virtual env in repo — move outside or use Docker",
-    }
-
-    for dirname, recommendation in large_dirs.items():
-        dirpath = os.path.join(repo_path, dirname)
-        if os.path.isdir(dirpath):
-            total_size = 0
-            file_count = 0
-            for root, dirs, files in os.walk(dirpath):
-                for f in files:
-                    try:
-                        fpath = os.path.join(root, f)
-                        total_size += os.path.getsize(fpath)
-                        file_count += 1
-                    except OSError:
-                        pass
-
-            if total_size > 10 * 1024 * 1024:  # > 10MB
-                size_mb = total_size / (1024 * 1024)
-                bottlenecks.append(Bottleneck(
-                    category="build",
-                    name=f"{dirname}/ ({size_mb:.1f}MB, {file_count} files)",
-                    duration_s=size_mb * 0.5,  # Rough estimate
-                    severity="critical" if size_mb > 100 else "warning",
-                    recommendation=recommendation
-                ))
-
-    return bottlenecks
-
-
-def analyze_makefile_targets(repo_path: str) -> List[Bottleneck]:
-    """Analyze Makefile for potentially slow targets."""
-    bottlenecks = []
-    makefiles = []
-
-    for root, dirs, files in os.walk(repo_path):
-        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__'))]
-        for f in files:
-            if f in ("Makefile", "makefile", "GNUmakefile"):
-                makefiles.append(os.path.join(root, f))
-
-    slow_patterns = [
-        (r"pip install", "pip install without --no-deps or constraints"),
-        (r"npm install(?!.*--production)", "npm install without --production flag"),
-        (r"docker build", "Docker build — consider multi-stage and layer caching"),
-        (r"pytest(?!.*-x|--maxfail)", "pytest without early exit on failure"),
-        (r"mypy|mypy --strict", "Type checking — consider incremental mode"),
-    ]
-
-    for mfile in makefiles:
-        rel_path = os.path.relpath(mfile, repo_path)
-        try:
-            with open(mfile) as f:
-                content = f.read()
-        except (PermissionError, UnicodeDecodeError):
-            continue
-
-        for pattern, recommendation in slow_patterns:
-            if re.search(pattern, content):
-                bottlenecks.append(Bottleneck(
-                    category="build",
-                    name=f"{rel_path}: {pattern}",
-                    duration_s=5.0,
-                    severity="info",
-                    recommendation=recommendation,
-                    file_path=rel_path
-                ))
-
-    return bottlenecks
-
-
-# ── CI Analysis ────────────────────────────────────────────────────
-
-def analyze_github_actions(repo_path: str) -> List[Bottleneck]:
-    """Analyze GitHub Actions workflow files for inefficiencies."""
-    bottlenecks = []
-    workflow_dir = os.path.join(repo_path, ".github", "workflows")
-
-    if not os.path.isdir(workflow_dir):
-        return bottlenecks
-
-    slow_patterns = [
-        (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies between runs"),
-        (r"npm install", 2, "Use npm ci instead of npm install for reproducible builds"),
-        (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir or use pip cache action"),
-        (r"docker build(?!.*--cache-from)", 5, "Use Docker layer caching"),
-        (r"python -m pytest(?!.*-n|--numprocesses)", 3, "Consider pytest-xdist for parallel test execution"),
-    ]
-
-    for fname in os.listdir(workflow_dir):
-        if not fname.endswith(('.yml', '.yaml')):
-            continue
-
-        fpath = os.path.join(workflow_dir, fname)
-        try:
-            with open(fpath) as f:
-                content = f.read()
-        except (PermissionError, UnicodeDecodeError):
-            continue
-
-        for pattern, est_savings, recommendation in slow_patterns:
-            if re.search(pattern, content):
-                bottlenecks.append(Bottleneck(
-                    category="ci",
-                    name=f"{fname}: {pattern}",
-                    duration_s=est_savings,
-                    severity="info",
-                    recommendation=recommendation,
-                    file_path=f".github/workflows/{fname}"
-                ))
-
-    return bottlenecks
-
-
-def analyze_gitea_ci(repo_path: str) -> List[Bottleneck]:
-    """Analyze Gitea/Drone CI config files."""
-    bottlenecks = []
-    ci_files = [".gitea/workflows", ".drone.yml", ".woodpecker.yml"]
-
-    for ci_path in ci_files:
-        full_path = os.path.join(repo_path, ci_path)
-        if os.path.isfile(full_path):
-            try:
-                with open(full_path) as f:
-                    content = f.read()
-            except (PermissionError, UnicodeDecodeError):
-                continue
-
-            if "pip install" in content and "--cache-dir" not in content:
-                bottlenecks.append(Bottleneck(
-                    category="ci",
-                    name=f"{ci_path}: pip without cache",
-                    duration_s=5.0,
-                    severity="warning",
-                    recommendation="Add --cache-dir or mount pip cache volume",
-                    file_path=ci_path
-                ))
-
-        elif os.path.isdir(full_path):
-            for fname in os.listdir(full_path):
-                if not fname.endswith(('.yml', '.yaml')):
-                    continue
-                fpath = os.path.join(full_path, fname)
-                try:
-                    with open(fpath) as f:
-                        content = f.read()
-                except (PermissionError, UnicodeDecodeError):
-                    continue
-
-                if "pip install" in content and "--cache-dir" not in content:
-                    bottlenecks.append(Bottleneck(
-                        category="ci",
-                        name=f"{ci_path}/{fname}: pip without cache",
-                        duration_s=5.0,
-                        severity="warning",
-                        recommendation="Add --cache-dir or mount pip cache volume",
-                        file_path=f"{ci_path}/{fname}"
-                    ))
-
-    return bottlenecks
-
-
-# ── Import Analysis ────────────────────────────────────────────────
-
-def find_slow_imports(repo_path: str) -> List[Bottleneck]:
-    """Find Python files with heavy import chains."""
-    bottlenecks = []
-    heavy_imports = {
-        "pandas": 0.5,
-        "numpy": 0.3,
-        "torch": 2.0,
-        "tensorflow": 3.0,
-        "scipy": 0.5,
-        "matplotlib": 0.8,
-        "sklearn": 0.5,
-        "transformers": 1.5,
-    }
-
-    for root, dirs, files in os.walk(repo_path):
-        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules'))]
-        for fname in files:
-            if not fname.endswith(".py"):
-                continue
-
-            fpath = os.path.join(root, fname)
-            rel_path = os.path.relpath(fpath, repo_path)
-
-            try:
-                with open(fpath) as f:
-                    lines = f.readlines()
-            except (PermissionError, UnicodeDecodeError):
-                continue
-
-            for i, line in enumerate(lines):
-                stripped = line.strip()
-                if stripped.startswith("import ") or stripped.startswith("from "):
-                    for heavy, est_time in heavy_imports.items():
-                        if heavy in stripped:
-                            bottlenecks.append(Bottleneck(
-                                category="import",
-                                name=f"{rel_path}:{i+1}: import {heavy}",
-                                duration_s=est_time,
-                                severity="info" if est_time < 1.0 else "warning",
-                                recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import or conditional import.",
-                                file_path=rel_path,
-                                line_number=i + 1
-                            ))
-
-    return bottlenecks
-
-
-# ── Report Generation ──────────────────────────────────────────────
-
-def severity_sort_key(b: Bottleneck) -> Tuple[int, float]:
-    """Sort by severity then duration."""
-    sev_order = {"critical": 0, "warning": 1, "info": 2}
-    return (sev_order.get(b.severity, 3), -b.duration_s)
-
-
-def generate_report(repo_path: str) -> PerfReport:
-    """Run all analyses and generate a performance report."""
-    report = PerfReport(
-        timestamp=datetime.now(timezone.utc).isoformat(),
-        repo_path=os.path.abspath(repo_path)
-    )
-
-    # Collect all bottlenecks
-    all_bottlenecks = []
-
-    print("Scanning for slow tests (pytest cache)...")
-    all_bottlenecks.extend(find_slow_tests_pytest(repo_path))
-
-    print("Scanning for slow test patterns...")
-    all_bottlenecks.extend(find_slow_tests_by_scan(repo_path))
-
-    print("Analyzing build artifacts...")
-    all_bottlenecks.extend(analyze_build_artifacts(repo_path))
-
-    print("Analyzing Makefiles...")
-    all_bottlenecks.extend(analyze_makefile_targets(repo_path))
-
-    print("Analyzing CI workflows...")
-    all_bottlenecks.extend(analyze_github_actions(repo_path))
-    all_bottlenecks.extend(analyze_gitea_ci(repo_path))
-
-    print("Scanning for heavy imports...")
-    all_bottlenecks.extend(find_slow_imports(repo_path))
-
-    # Sort by severity and duration
-    all_bottlenecks.sort(key=severity_sort_key)
-    report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2]  # Keep more for stats
-
-    # Compute summary
-    by_category = defaultdict(list)
-    for b in all_bottlenecks:
-        by_category[b.category].append(b)
-
-    report.summary = {
-        "total_bottlenecks": len(all_bottlenecks),
-        "critical": sum(1 for b in all_bottlenecks if b.severity == "critical"),
-        "warning": sum(1 for b in all_bottlenecks if b.severity == "warning"),
-        "info": sum(1 for b in all_bottlenecks if b.severity == "info"),
-        "estimated_total_slowdown_s": sum(b.duration_s for b in all_bottlenecks),
-        "by_category": {cat: len(items) for cat, items in by_category.items()},
-    }
-
-    report.test_stats = {
-        "slow_tests": len(by_category.get("test", [])),
-        "total_estimated_s": sum(b.duration_s for b in by_category.get("test", [])),
-    }
-
-    report.build_stats = {
-        "build_issues": len(by_category.get("build", [])),
-        "total_estimated_s": sum(b.duration_s for b in by_category.get("build", [])),
-    }
-
-    report.ci_stats = {
-        "ci_issues": len(by_category.get("ci", [])),
-        "total_estimated_s": sum(b.duration_s for b in by_category.get("ci", [])),
-    }
-
-    return report
-
-
-def format_markdown(report: PerfReport) -> str:
-    """Format report as markdown."""
-    lines = []
-    lines.append(f"# Performance Bottleneck Report")
-    lines.append(f"")
-    lines.append(f"Generated: {report.timestamp}")
-    lines.append(f"Repository: {report.repo_path}")
-    lines.append(f"")
-
-    # Summary
-    s = report.summary
-    lines.append(f"## Summary")
-    lines.append(f"")
-    lines.append(f"- **Total bottlenecks:** {s['total_bottlenecks']}")
-    lines.append(f"- **Critical:** {s['critical']} | **Warning:** {s['warning']} | **Info:** {s['info']}")
-    lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s")
-    lines.append(f"- **By category:** {', '.join(f'{k}: {v}' for k, v in s['by_category'].items())}")
-    lines.append(f"")
-
-    # Top bottlenecks
-    lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks")
-    lines.append(f"")
-
-    for i, b in enumerate(report.bottlenecks[:TOP_N_BOTTLENECKS], 1):
-        icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪")
-        loc = f" ({b.file_path}:{b.line_number})" if b.file_path else ""
-        lines.append(f"{i}. {icon} **{b.category}** — {b.name}{loc}")
-        lines.append(f"   - Duration: ~{b.duration_s:.1f}s | Severity: {b.severity}")
-        lines.append(f"   - Fix: {b.recommendation}")
-        lines.append(f"")
-
-    # Category breakdowns
-    for cat in ["test", "build", "ci", "import"]:
-        items = [b for b in report.bottlenecks if b.category == cat]
-        if items:
-            lines.append(f"## {cat.title()} Bottlenecks")
-            lines.append(f"")
-            for b in items:
-                icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪")
-                loc = f" ({b.file_path}:{b.line_number})" if b.file_path else ""
-                lines.append(f"- {icon} {b.name}{loc} — ~{b.duration_s:.1f}s — {b.recommendation}")
-            lines.append(f"")
-
-    return "
-".join(lines)
-
-
-# ── Main ───────────────────────────────────────────────────────────
-
-def main():
-    parser = argparse.ArgumentParser(description="Performance Bottleneck Finder")
-    parser.add_argument("--repo", default=".", help="Path to repository to analyze")
-    parser.add_argument("--json", action="store_true", help="Output as JSON")
-    parser.add_argument("--report", help="Write markdown report to file")
-    parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S,
-                        help="Slow test threshold in seconds")
-    args = parser.parse_args()
-
-    global SLOW_TEST_THRESHOLD_S
-    SLOW_TEST_THRESHOLD_S = args.threshold
-
-    if not os.path.isdir(args.repo):
-        print(f"Error: {args.repo} is not a directory", file=sys.stderr)
-        sys.exit(1)
-
-    report = generate_report(args.repo)
-
-    if args.json:
-        print(json.dumps(report.to_dict(), indent=2))
-    else:
-        md = format_markdown(report)
-        if args.report:
-            os.makedirs(os.path.dirname(args.report) or ".", exist_ok=True)
-            with open(args.report, "w") as f:
-                f.write(md)
-            print(f"Report written to {args.report}")
-        else:
-            print(md)
-
-    # Exit code: 1 if critical bottlenecks found
-    if report.summary.get("critical", 0) > 0:
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    main()
-- 
2.43.0


From de37e743bed6781b494fc1ad5a43632de8e23c3a Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexander@alexanderwhitestone.com>
Date: Tue, 21 Apr 2026 11:19:08 +0000
Subject: [PATCH 4/5] =?UTF-8?q?fix(#211):=20fix=20regex=20syntax=20error?=
 =?UTF-8?q?=20=E2=80=94=20replace=20raw=20string=20with=20non-raw=20string?=
 =?UTF-8?q?=20for=20quote=20matching?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 scripts/perf_bottleneck_finder.py | 410 ++++++++++++++++++++++++++++++
 1 file changed, 410 insertions(+)
 create mode 100644 scripts/perf_bottleneck_finder.py

diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py
new file mode 100644
index 0000000..a9490d0
--- /dev/null
+++ b/scripts/perf_bottleneck_finder.py
@@ -0,0 +1,410 @@
+#!/usr/bin/env python3
+"""
+Performance Bottleneck Finder — Identify slow tests, builds, and CI steps.
+
+Analyzes:
+  1. Pytest output for slow tests
+  2. Build logs for slow steps
+  3. CI workflow durations
+  4. File system for large/slow artifacts
+
+Usage:
+    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo
+    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --json
+    python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --report metrics/perf_report.md
+
+Weekly cron:
+    0 9 * * 1 cd /path/to/compounding-intelligence && python3 scripts/perf_bottleneck_finder.py --repo /path/to/target --report metrics/perf_report.md
+"""
+
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from collections import defaultdict
+from dataclasses import dataclass, field, asdict
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+
+SLOW_TEST_THRESHOLD_S = 2.0
+SLOW_BUILD_STEP_THRESHOLD_S = 10.0
+TOP_N_BOTTLENECKS = 10
+PYTEST_DURATIONS_COUNT = 20
+LOG_EXTENSIONS = {".log", ".txt"}
+
+
+@dataclass
+class Bottleneck:
+    category: str
+    name: str
+    duration_s: float
+    severity: str
+    recommendation: str
+    file_path: Optional[str] = None
+    line_number: Optional[int] = None
+
+
+@dataclass
+class PerfReport:
+    timestamp: str
+    repo_path: str
+    bottlenecks: List[Bottleneck] = field(default_factory=list)
+    summary: Dict[str, Any] = field(default_factory=dict)
+    test_stats: Dict[str, Any] = field(default_factory=dict)
+    build_stats: Dict[str, Any] = field(default_factory=dict)
+    ci_stats: Dict[str, Any] = field(default_factory=dict)
+
+    def to_dict(self) -> dict:
+        return asdict(self)
+
+
+def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]:
+    bottlenecks = []
+    try:
+        subprocess.run(
+            ["python3", "-m", "pytest", "--co", "-q", "--durations=0"],
+            cwd=repo_path, capture_output=True, text=True, timeout=30
+        )
+        durations_file = os.path.join(repo_path, ".pytest_cache", "v", "cache", "durations")
+        if os.path.exists(durations_file):
+            with open(durations_file) as f:
+                for line in f:
+                    parts = line.strip().split()
+                    if len(parts) >= 2:
+                        try:
+                            duration = float(parts[0])
+                            test_name = " ".join(parts[1:])
+                            if duration > SLOW_TEST_THRESHOLD_S:
+                                severity = "critical" if duration > 10 else "warning"
+                                bottlenecks.append(Bottleneck(
+                                    category="test", name=test_name, duration_s=duration,
+                                    severity=severity,
+                                    recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O."
+                                ))
+                        except ValueError:
+                            continue
+    except (subprocess.TimeoutExpired, FileNotFoundError):
+        pass
+    return bottlenecks
+
+
+def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
+    bottlenecks = []
+    test_patterns = [
+        (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock"),
+        (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"),
+        (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses"),
+        (r"open\(.*[\'\"']w[\'\"']", "File I/O in test — use tmp_path fixture"),
+    ]
+
+    for root, dirs, files in os.walk(repo_path):
+        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules', '.git'))]
+        for fname in files:
+            if not (fname.startswith("test_") or fname.endswith("_test.py")):
+                continue
+            if not fname.endswith(".py"):
+                continue
+            fpath = os.path.join(root, fname)
+            rel_path = os.path.relpath(fpath, repo_path)
+            try:
+                with open(fpath) as f:
+                    lines = f.readlines()
+            except (PermissionError, UnicodeDecodeError):
+                continue
+            for i, line in enumerate(lines):
+                for pattern, recommendation in test_patterns:
+                    match = re.search(pattern, line)
+                    if match:
+                        duration = 1.0
+                        if "sleep" in pattern:
+                            try:
+                                duration = float(match.group(1))
+                            except (ValueError, IndexError):
+                                duration = 1.0
+                        elif "timeout" in pattern:
+                            try:
+                                duration = float(match.group(1))
+                            except (ValueError, IndexError):
+                                duration = 10.0
+                        else:
+                            duration = 2.0
+                        bottlenecks.append(Bottleneck(
+                            category="test", name=f"{rel_path}:{i+1}", duration_s=duration,
+                            severity="warning" if duration < 5 else "critical",
+                            recommendation=recommendation, file_path=rel_path, line_number=i + 1
+                        ))
+    return bottlenecks
+
+
+def analyze_build_artifacts(repo_path: str) -> List[Bottleneck]:
+    bottlenecks = []
+    large_dirs = {
+        "node_modules": "Consider npm ci --production",
+        "__pycache__": "Consider .gitignore and cleaning",
+        ".tox": "Consider caching tox environments",
+        ".pytest_cache": "Consider cleaning between CI runs",
+        "dist": "Check if dist/ rebuilt unnecessarily",
+        "build": "Check if build/ rebuilt unnecessarily",
+        ".next": "Consider incremental builds",
+        "venv": "Move outside repo or use Docker",
+    }
+    for dirname, recommendation in large_dirs.items():
+        dirpath = os.path.join(repo_path, dirname)
+        if os.path.isdir(dirpath):
+            total_size = 0
+            file_count = 0
+            for root, _, files in os.walk(dirpath):
+                for f in files:
+                    try:
+                        total_size += os.path.getsize(os.path.join(root, f))
+                        file_count += 1
+                    except OSError:
+                        pass
+            if total_size > 10 * 1024 * 1024:
+                size_mb = total_size / (1024 * 1024)
+                bottlenecks.append(Bottleneck(
+                    category="build",
+                    name=f"{dirname}/ ({size_mb:.1f}MB, {file_count} files)",
+                    duration_s=size_mb * 0.5,
+                    severity="critical" if size_mb > 100 else "warning",
+                    recommendation=recommendation
+                ))
+    return bottlenecks
+
+
+def analyze_makefile_targets(repo_path: str) -> List[Bottleneck]:
+    bottlenecks = []
+    slow_patterns = [
+        (r"pip install", "pip install without --no-deps"),
+        (r"npm install(?!.*--production)", "npm install without --production"),
+        (r"docker build", "Docker build — consider layer caching"),
+        (r"pytest(?!.*-x|--maxfail)", "pytest without early exit"),
+    ]
+    for root, dirs, files in os.walk(repo_path):
+        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__'))]
+        for f in files:
+            if f in ("Makefile", "makefile", "GNUmakefile"):
+                fpath = os.path.join(root, f)
+                rel_path = os.path.relpath(fpath, repo_path)
+                try:
+                    with open(fpath) as fh:
+                        content = fh.read()
+                except (PermissionError, UnicodeDecodeError):
+                    continue
+                for pattern, recommendation in slow_patterns:
+                    if re.search(pattern, content):
+                        bottlenecks.append(Bottleneck(
+                            category="build", name=f"{rel_path}: {pattern}",
+                            duration_s=5.0, severity="info",
+                            recommendation=recommendation, file_path=rel_path
+                        ))
+    return bottlenecks
+
+
+def analyze_github_actions(repo_path: str) -> List[Bottleneck]:
+    bottlenecks = []
+    workflow_dir = os.path.join(repo_path, ".github", "workflows")
+    if not os.path.isdir(workflow_dir):
+        return bottlenecks
+    slow_patterns = [
+        (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies"),
+        (r"npm install", 2, "Use npm ci instead"),
+        (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir"),
+        (r"docker build(?!.*--cache-from)", 5, "Use Docker layer caching"),
+    ]
+    for fname in os.listdir(workflow_dir):
+        if not fname.endswith(('.yml', '.yaml')):
+            continue
+        fpath = os.path.join(workflow_dir, fname)
+        try:
+            with open(fpath) as f:
+                content = f.read()
+        except (PermissionError, UnicodeDecodeError):
+            continue
+        for pattern, est_savings, recommendation in slow_patterns:
+            if re.search(pattern, content):
+                bottlenecks.append(Bottleneck(
+                    category="ci", name=f"{fname}: {pattern}",
+                    duration_s=est_savings, severity="info",
+                    recommendation=recommendation, file_path=f".github/workflows/{fname}"
+                ))
+    return bottlenecks
+
+
+def analyze_gitea_ci(repo_path: str) -> List[Bottleneck]:
+    bottlenecks = []
+    ci_dir = os.path.join(repo_path, ".gitea", "workflows")
+    if os.path.isdir(ci_dir):
+        for fname in os.listdir(ci_dir):
+            if not fname.endswith(('.yml', '.yaml')):
+                continue
+            fpath = os.path.join(ci_dir, fname)
+            try:
+                with open(fpath) as f:
+                    content = f.read()
+            except (PermissionError, UnicodeDecodeError):
+                continue
+            if "pip install" in content and "--cache-dir" not in content:
+                bottlenecks.append(Bottleneck(
+                    category="ci", name=f".gitea/workflows/{fname}: pip without cache",
+                    duration_s=5.0, severity="warning",
+                    recommendation="Add --cache-dir or mount pip cache volume",
+                    file_path=f".gitea/workflows/{fname}"
+                ))
+    return bottlenecks
+
+
+def find_slow_imports(repo_path: str) -> List[Bottleneck]:
+    bottlenecks = []
+    heavy_imports = {
+        "pandas": 0.5, "numpy": 0.3, "torch": 2.0, "tensorflow": 3.0,
+        "scipy": 0.5, "matplotlib": 0.8, "sklearn": 0.5, "transformers": 1.5,
+    }
+    for root, dirs, files in os.walk(repo_path):
+        dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules'))]
+        for fname in files:
+            if not fname.endswith(".py"):
+                continue
+            fpath = os.path.join(root, fname)
+            rel_path = os.path.relpath(fpath, repo_path)
+            try:
+                with open(fpath) as f:
+                    lines = f.readlines()
+            except (PermissionError, UnicodeDecodeError):
+                continue
+            for i, line in enumerate(lines):
+                stripped = line.strip()
+                if stripped.startswith("import ") or stripped.startswith("from "):
+                    for heavy, est_time in heavy_imports.items():
+                        if heavy in stripped:
+                            bottlenecks.append(Bottleneck(
+                                category="import",
+                                name=f"{rel_path}:{i+1}: import {heavy}",
+                                duration_s=est_time,
+                                severity="info" if est_time < 1.0 else "warning",
+                                recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import.",
+                                file_path=rel_path, line_number=i + 1
+                            ))
+    return bottlenecks
+
+
+def severity_sort_key(b: Bottleneck) -> Tuple[int, float]:
+    sev_order = {"critical": 0, "warning": 1, "info": 2}
+    return (sev_order.get(b.severity, 3), -b.duration_s)
+
+
+def generate_report(repo_path: str) -> PerfReport:
+    report = PerfReport(
+        timestamp=datetime.now(timezone.utc).isoformat(),
+        repo_path=os.path.abspath(repo_path)
+    )
+    all_bottlenecks = []
+    print("Scanning for slow tests (pytest cache)...")
+    all_bottlenecks.extend(find_slow_tests_pytest(repo_path))
+    print("Scanning for slow test patterns...")
+    all_bottlenecks.extend(find_slow_tests_by_scan(repo_path))
+    print("Analyzing build artifacts...")
+    all_bottlenecks.extend(analyze_build_artifacts(repo_path))
+    print("Analyzing Makefiles...")
+    all_bottlenecks.extend(analyze_makefile_targets(repo_path))
+    print("Analyzing CI workflows...")
+    all_bottlenecks.extend(analyze_github_actions(repo_path))
+    all_bottlenecks.extend(analyze_gitea_ci(repo_path))
+    print("Scanning for heavy imports...")
+    all_bottlenecks.extend(find_slow_imports(repo_path))
+
+    all_bottlenecks.sort(key=severity_sort_key)
+    report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2]
+
+    by_category = defaultdict(list)
+    for b in all_bottlenecks:
+        by_category[b.category].append(b)
+
+    report.summary = {
+        "total_bottlenecks": len(all_bottlenecks),
+        "critical": sum(1 for b in all_bottlenecks if b.severity == "critical"),
+        "warning": sum(1 for b in all_bottlenecks if b.severity == "warning"),
+        "info": sum(1 for b in all_bottlenecks if b.severity == "info"),
+        "estimated_total_slowdown_s": sum(b.duration_s for b in all_bottlenecks),
+        "by_category": {cat: len(items) for cat, items in by_category.items()},
+    }
+    report.test_stats = {
+        "slow_tests": len(by_category.get("test", [])),
+        "total_estimated_s": sum(b.duration_s for b in by_category.get("test", [])),
+    }
+    report.build_stats = {
+        "build_issues": len(by_category.get("build", [])),
+        "total_estimated_s": sum(b.duration_s for b in by_category.get("build", [])),
+    }
+    report.ci_stats = {
+        "ci_issues": len(by_category.get("ci", [])),
+        "total_estimated_s": sum(b.duration_s for b in by_category.get("ci", [])),
+    }
+    return report
+
+
+def format_markdown(report: PerfReport) -> str:
+    lines = []
+    lines.append("# Performance Bottleneck Report
+")
+    lines.append(f"Generated: {report.timestamp}")
+    lines.append(f"Repository: {report.repo_path}
+")
+    s = report.summary
+    lines.append("## Summary
+")
+    lines.append(f"- **Total bottlenecks:** {s['total_bottlenecks']}")
+    lines.append(f"- **Critical:** {s['critical']} | **Warning:** {s['warning']} | **Info:** {s['info']}")
+    lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s
+")
+    lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks
+")
+    for i, b in enumerate(report.bottlenecks[:TOP_N_BOTTLENECKS], 1):
+        icon = {"critical": "CRIT", "warning": "WARN", "info": "INFO"}.get(b.severity, "?")
+        loc = f" ({b.file_path}:{b.line_number})" if b.file_path else ""
+        lines.append(f"{i}. [{icon}] **{b.category}** -- {b.name}{loc}")
+        lines.append(f"   Duration: ~{b.duration_s:.1f}s | {b.recommendation}
+")
+    return "
+".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Performance Bottleneck Finder")
+    parser.add_argument("--repo", default=".", help="Path to repository to analyze")
+    parser.add_argument("--json", action="store_true", help="Output as JSON")
+    parser.add_argument("--report", help="Write markdown report to file")
+    parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S)
+    args = parser.parse_args()
+
+    global SLOW_TEST_THRESHOLD_S
+    SLOW_TEST_THRESHOLD_S = args.threshold
+
+    if not os.path.isdir(args.repo):
+        print(f"Error: {args.repo} is not a directory", file=sys.stderr)
+        sys.exit(1)
+
+    report = generate_report(args.repo)
+
+    if args.json:
+        print(json.dumps(report.to_dict(), indent=2))
+    else:
+        md = format_markdown(report)
+        if args.report:
+            os.makedirs(os.path.dirname(args.report) or ".", exist_ok=True)
+            with open(args.report, "w") as f:
+                f.write(md)
+            print(f"Report written to {args.report}")
+        else:
+            print(md)
+
+    if report.summary.get("critical", 0) > 0:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
-- 
2.43.0


From bfc1f5613b094b882a1ed797b443d9804f25e7f7 Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexander@alexanderwhitestone.com>
Date: Tue, 21 Apr 2026 11:20:29 +0000
Subject: [PATCH 5/5] fix(#211): fix regex syntax error in test_patterns list

---
 scripts/perf_bottleneck_finder.py | 311 ++++++++++++++++++++++--------
 1 file changed, 226 insertions(+), 85 deletions(-)

diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py
index a9490d0..86e6fea 100644
--- a/scripts/perf_bottleneck_finder.py
+++ b/scripts/perf_bottleneck_finder.py
@@ -30,26 +30,30 @@ from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
 
-SLOW_TEST_THRESHOLD_S = 2.0
+# ── Configuration ──────────────────────────────────────────────────
+
+SLOW_TEST_THRESHOLD_S = 2.0      # Tests slower than this are flagged
 SLOW_BUILD_STEP_THRESHOLD_S = 10.0
-TOP_N_BOTTLENECKS = 10
-PYTEST_DURATIONS_COUNT = 20
+TOP_N_BOTTLENECKS = 10           # Report top N bottlenecks
+PYTEST_DURATIONS_COUNT = 20      # Number of slow tests to collect
 LOG_EXTENSIONS = {".log", ".txt"}
 
 
 @dataclass
 class Bottleneck:
-    category: str
-    name: str
-    duration_s: float
-    severity: str
-    recommendation: str
+    """A single performance bottleneck."""
+    category: str          # "test", "build", "ci", "artifact", "import"
+    name: str              # What's slow
+    duration_s: float      # How long it takes
+    severity: str          # "critical", "warning", "info"
+    recommendation: str    # How to fix
     file_path: Optional[str] = None
     line_number: Optional[int] = None
 
 
 @dataclass
 class PerfReport:
+    """Full performance report."""
     timestamp: str
     repo_path: str
     bottlenecks: List[Bottleneck] = field(default_factory=list)
@@ -59,16 +63,23 @@ class PerfReport:
     ci_stats: Dict[str, Any] = field(default_factory=dict)
 
     def to_dict(self) -> dict:
-        return asdict(self)
+        d = asdict(self)
+        return d
 
 
+# ── Test Analysis ──────────────────────────────────────────────────
+
 def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]:
+    """Run pytest --durations and parse slow tests."""
     bottlenecks = []
+
+    # Try to run pytest with durations
     try:
-        subprocess.run(
+        result = subprocess.run(
             ["python3", "-m", "pytest", "--co", "-q", "--durations=0"],
             cwd=repo_path, capture_output=True, text=True, timeout=30
         )
+        # If tests exist, try to get durations from last run
         durations_file = os.path.join(repo_path, ".pytest_cache", "v", "cache", "durations")
         if os.path.exists(durations_file):
             with open(durations_file) as f:
@@ -81,45 +92,54 @@ def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]:
                             if duration > SLOW_TEST_THRESHOLD_S:
                                 severity = "critical" if duration > 10 else "warning"
                                 bottlenecks.append(Bottleneck(
-                                    category="test", name=test_name, duration_s=duration,
+                                    category="test",
+                                    name=test_name,
+                                    duration_s=duration,
                                     severity=severity,
-                                    recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O."
+                                    recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O, using fixtures, or marking with @pytest.mark.slow."
                                 ))
                         except ValueError:
                             continue
     except (subprocess.TimeoutExpired, FileNotFoundError):
         pass
+
     return bottlenecks
 
 
 def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
+    """Scan test files for patterns that indicate slow tests."""
     bottlenecks = []
     test_patterns = [
-        (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock"),
+        (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"),
         (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"),
-        (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses"),
-        (r"open\(.*[\'\"']w[\'\"']", "File I/O in test — use tmp_path fixture"),
+        (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"),
+        ("open\\([^)]*['"]w['"]", "File I/O in test — use tmp_path fixture"),
     ]
 
     for root, dirs, files in os.walk(repo_path):
+        # Skip hidden and cache dirs
         dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules', '.git'))]
+
         for fname in files:
             if not (fname.startswith("test_") or fname.endswith("_test.py")):
                 continue
             if not fname.endswith(".py"):
                 continue
+
             fpath = os.path.join(root, fname)
             rel_path = os.path.relpath(fpath, repo_path)
+
             try:
                 with open(fpath) as f:
                     lines = f.readlines()
             except (PermissionError, UnicodeDecodeError):
                 continue
+
             for i, line in enumerate(lines):
                 for pattern, recommendation in test_patterns:
                     match = re.search(pattern, line)
                     if match:
-                        duration = 1.0
+                        duration = 1.0  # Default estimate
                         if "sleep" in pattern:
                             try:
                                 duration = float(match.group(1))
@@ -131,151 +151,227 @@ def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]:
                             except (ValueError, IndexError):
                                 duration = 10.0
                         else:
-                            duration = 2.0
+                            duration = 2.0  # Estimated
+
                         bottlenecks.append(Bottleneck(
-                            category="test", name=f"{rel_path}:{i+1}", duration_s=duration,
+                            category="test",
+                            name=f"{rel_path}:{i+1}",
+                            duration_s=duration,
                             severity="warning" if duration < 5 else "critical",
-                            recommendation=recommendation, file_path=rel_path, line_number=i + 1
+                            recommendation=recommendation,
+                            file_path=rel_path,
+                            line_number=i + 1
                         ))
+
     return bottlenecks
 
 
+# ── Build Analysis ─────────────────────────────────────────────────
+
 def analyze_build_artifacts(repo_path: str) -> List[Bottleneck]:
+    """Find large build artifacts that slow down builds."""
     bottlenecks = []
     large_dirs = {
-        "node_modules": "Consider npm ci --production",
-        "__pycache__": "Consider .gitignore and cleaning",
+        "node_modules": "Consider using npm ci --production or yarn --production",
+        "__pycache__": "Consider .gitignore and cleaning before builds",
         ".tox": "Consider caching tox environments",
         ".pytest_cache": "Consider cleaning between CI runs",
-        "dist": "Check if dist/ rebuilt unnecessarily",
-        "build": "Check if build/ rebuilt unnecessarily",
-        ".next": "Consider incremental builds",
-        "venv": "Move outside repo or use Docker",
+        "dist": "Check if dist/ artifacts are being rebuilt unnecessarily",
+        "build": "Check if build/ artifacts are being rebuilt unnecessarily",
+        ".next": "Next.js cache — consider incremental builds",
+        "venv": "Virtual env in repo — move outside or use Docker",
     }
+
     for dirname, recommendation in large_dirs.items():
         dirpath = os.path.join(repo_path, dirname)
         if os.path.isdir(dirpath):
             total_size = 0
             file_count = 0
-            for root, _, files in os.walk(dirpath):
+            for root, dirs, files in os.walk(dirpath):
                 for f in files:
                     try:
-                        total_size += os.path.getsize(os.path.join(root, f))
+                        fpath = os.path.join(root, f)
+                        total_size += os.path.getsize(fpath)
                         file_count += 1
                     except OSError:
                         pass
-            if total_size > 10 * 1024 * 1024:
+
+            if total_size > 10 * 1024 * 1024:  # > 10MB
                 size_mb = total_size / (1024 * 1024)
                 bottlenecks.append(Bottleneck(
                     category="build",
                     name=f"{dirname}/ ({size_mb:.1f}MB, {file_count} files)",
-                    duration_s=size_mb * 0.5,
+                    duration_s=size_mb * 0.5,  # Rough estimate
                     severity="critical" if size_mb > 100 else "warning",
                     recommendation=recommendation
                 ))
+
     return bottlenecks
 
 
 def analyze_makefile_targets(repo_path: str) -> List[Bottleneck]:
+    """Analyze Makefile for potentially slow targets."""
     bottlenecks = []
-    slow_patterns = [
-        (r"pip install", "pip install without --no-deps"),
-        (r"npm install(?!.*--production)", "npm install without --production"),
-        (r"docker build", "Docker build — consider layer caching"),
-        (r"pytest(?!.*-x|--maxfail)", "pytest without early exit"),
-    ]
+    makefiles = []
+
     for root, dirs, files in os.walk(repo_path):
         dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__'))]
         for f in files:
             if f in ("Makefile", "makefile", "GNUmakefile"):
-                fpath = os.path.join(root, f)
-                rel_path = os.path.relpath(fpath, repo_path)
-                try:
-                    with open(fpath) as fh:
-                        content = fh.read()
-                except (PermissionError, UnicodeDecodeError):
-                    continue
-                for pattern, recommendation in slow_patterns:
-                    if re.search(pattern, content):
-                        bottlenecks.append(Bottleneck(
-                            category="build", name=f"{rel_path}: {pattern}",
-                            duration_s=5.0, severity="info",
-                            recommendation=recommendation, file_path=rel_path
-                        ))
+                makefiles.append(os.path.join(root, f))
+
+    slow_patterns = [
+        (r"pip install", "pip install without --no-deps or constraints"),
+        (r"npm install(?!.*--production)", "npm install without --production flag"),
+        (r"docker build", "Docker build — consider multi-stage and layer caching"),
+        (r"pytest(?!.*-x|--maxfail)", "pytest without early exit on failure"),
+        (r"mypy|mypy --strict", "Type checking — consider incremental mode"),
+    ]
+
+    for mfile in makefiles:
+        rel_path = os.path.relpath(mfile, repo_path)
+        try:
+            with open(mfile) as f:
+                content = f.read()
+        except (PermissionError, UnicodeDecodeError):
+            continue
+
+        for pattern, recommendation in slow_patterns:
+            if re.search(pattern, content):
+                bottlenecks.append(Bottleneck(
+                    category="build",
+                    name=f"{rel_path}: {pattern}",
+                    duration_s=5.0,
+                    severity="info",
+                    recommendation=recommendation,
+                    file_path=rel_path
+                ))
+
     return bottlenecks
 
 
+# ── CI Analysis ────────────────────────────────────────────────────
+
 def analyze_github_actions(repo_path: str) -> List[Bottleneck]:
+    """Analyze GitHub Actions workflow files for inefficiencies."""
     bottlenecks = []
     workflow_dir = os.path.join(repo_path, ".github", "workflows")
+
     if not os.path.isdir(workflow_dir):
         return bottlenecks
+
     slow_patterns = [
-        (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies"),
-        (r"npm install", 2, "Use npm ci instead"),
-        (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir"),
+        (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies between runs"),
+        (r"npm install", 2, "Use npm ci instead of npm install for reproducible builds"),
+        (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir or use pip cache action"),
         (r"docker build(?!.*--cache-from)", 5, "Use Docker layer caching"),
+        (r"python -m pytest(?!.*-n|--numprocesses)", 3, "Consider pytest-xdist for parallel test execution"),
     ]
+
     for fname in os.listdir(workflow_dir):
         if not fname.endswith(('.yml', '.yaml')):
             continue
+
         fpath = os.path.join(workflow_dir, fname)
         try:
             with open(fpath) as f:
                 content = f.read()
         except (PermissionError, UnicodeDecodeError):
             continue
+
         for pattern, est_savings, recommendation in slow_patterns:
             if re.search(pattern, content):
                 bottlenecks.append(Bottleneck(
-                    category="ci", name=f"{fname}: {pattern}",
-                    duration_s=est_savings, severity="info",
-                    recommendation=recommendation, file_path=f".github/workflows/{fname}"
+                    category="ci",
+                    name=f"{fname}: {pattern}",
+                    duration_s=est_savings,
+                    severity="info",
+                    recommendation=recommendation,
+                    file_path=f".github/workflows/{fname}"
                 ))
+
     return bottlenecks
 
 
 def analyze_gitea_ci(repo_path: str) -> List[Bottleneck]:
+    """Analyze Gitea/Drone CI config files."""
     bottlenecks = []
-    ci_dir = os.path.join(repo_path, ".gitea", "workflows")
-    if os.path.isdir(ci_dir):
-        for fname in os.listdir(ci_dir):
-            if not fname.endswith(('.yml', '.yaml')):
-                continue
-            fpath = os.path.join(ci_dir, fname)
+    ci_files = [".gitea/workflows", ".drone.yml", ".woodpecker.yml"]
+
+    for ci_path in ci_files:
+        full_path = os.path.join(repo_path, ci_path)
+        if os.path.isfile(full_path):
             try:
-                with open(fpath) as f:
+                with open(full_path) as f:
                     content = f.read()
             except (PermissionError, UnicodeDecodeError):
                 continue
+
             if "pip install" in content and "--cache-dir" not in content:
                 bottlenecks.append(Bottleneck(
-                    category="ci", name=f".gitea/workflows/{fname}: pip without cache",
-                    duration_s=5.0, severity="warning",
+                    category="ci",
+                    name=f"{ci_path}: pip without cache",
+                    duration_s=5.0,
+                    severity="warning",
                     recommendation="Add --cache-dir or mount pip cache volume",
-                    file_path=f".gitea/workflows/{fname}"
+                    file_path=ci_path
                 ))
+
+        elif os.path.isdir(full_path):
+            for fname in os.listdir(full_path):
+                if not fname.endswith(('.yml', '.yaml')):
+                    continue
+                fpath = os.path.join(full_path, fname)
+                try:
+                    with open(fpath) as f:
+                        content = f.read()
+                except (PermissionError, UnicodeDecodeError):
+                    continue
+
+                if "pip install" in content and "--cache-dir" not in content:
+                    bottlenecks.append(Bottleneck(
+                        category="ci",
+                        name=f"{ci_path}/{fname}: pip without cache",
+                        duration_s=5.0,
+                        severity="warning",
+                        recommendation="Add --cache-dir or mount pip cache volume",
+                        file_path=f"{ci_path}/{fname}"
+                    ))
+
     return bottlenecks
 
 
+# ── Import Analysis ────────────────────────────────────────────────
+
 def find_slow_imports(repo_path: str) -> List[Bottleneck]:
+    """Find Python files with heavy import chains."""
     bottlenecks = []
     heavy_imports = {
-        "pandas": 0.5, "numpy": 0.3, "torch": 2.0, "tensorflow": 3.0,
-        "scipy": 0.5, "matplotlib": 0.8, "sklearn": 0.5, "transformers": 1.5,
+        "pandas": 0.5,
+        "numpy": 0.3,
+        "torch": 2.0,
+        "tensorflow": 3.0,
+        "scipy": 0.5,
+        "matplotlib": 0.8,
+        "sklearn": 0.5,
+        "transformers": 1.5,
     }
+
     for root, dirs, files in os.walk(repo_path):
         dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules'))]
         for fname in files:
             if not fname.endswith(".py"):
                 continue
+
             fpath = os.path.join(root, fname)
             rel_path = os.path.relpath(fpath, repo_path)
+
             try:
                 with open(fpath) as f:
                     lines = f.readlines()
             except (PermissionError, UnicodeDecodeError):
                 continue
+
             for i, line in enumerate(lines):
                 stripped = line.strip()
                 if stripped.startswith("import ") or stripped.startswith("from "):
@@ -286,40 +382,56 @@ def find_slow_imports(repo_path: str) -> List[Bottleneck]:
                                 name=f"{rel_path}:{i+1}: import {heavy}",
                                 duration_s=est_time,
                                 severity="info" if est_time < 1.0 else "warning",
-                                recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import.",
-                                file_path=rel_path, line_number=i + 1
+                                recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import or conditional import.",
+                                file_path=rel_path,
+                                line_number=i + 1
                             ))
+
     return bottlenecks
 
 
+# ── Report Generation ──────────────────────────────────────────────
+
 def severity_sort_key(b: Bottleneck) -> Tuple[int, float]:
+    """Sort by severity then duration."""
     sev_order = {"critical": 0, "warning": 1, "info": 2}
     return (sev_order.get(b.severity, 3), -b.duration_s)
 
 
 def generate_report(repo_path: str) -> PerfReport:
+    """Run all analyses and generate a performance report."""
     report = PerfReport(
         timestamp=datetime.now(timezone.utc).isoformat(),
         repo_path=os.path.abspath(repo_path)
     )
+
+    # Collect all bottlenecks
     all_bottlenecks = []
+
     print("Scanning for slow tests (pytest cache)...")
     all_bottlenecks.extend(find_slow_tests_pytest(repo_path))
+
     print("Scanning for slow test patterns...")
     all_bottlenecks.extend(find_slow_tests_by_scan(repo_path))
+
     print("Analyzing build artifacts...")
     all_bottlenecks.extend(analyze_build_artifacts(repo_path))
+
     print("Analyzing Makefiles...")
     all_bottlenecks.extend(analyze_makefile_targets(repo_path))
+
     print("Analyzing CI workflows...")
     all_bottlenecks.extend(analyze_github_actions(repo_path))
     all_bottlenecks.extend(analyze_gitea_ci(repo_path))
+
     print("Scanning for heavy imports...")
     all_bottlenecks.extend(find_slow_imports(repo_path))
 
+    # Sort by severity and duration
     all_bottlenecks.sort(key=severity_sort_key)
-    report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2]
+    report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2]  # Keep more for stats
 
+    # Compute summary
     by_category = defaultdict(list)
     for b in all_bottlenecks:
         by_category[b.category].append(b)
@@ -332,53 +444,81 @@ def generate_report(repo_path: str) -> PerfReport:
         "estimated_total_slowdown_s": sum(b.duration_s for b in all_bottlenecks),
         "by_category": {cat: len(items) for cat, items in by_category.items()},
     }
+
     report.test_stats = {
         "slow_tests": len(by_category.get("test", [])),
         "total_estimated_s": sum(b.duration_s for b in by_category.get("test", [])),
     }
+
     report.build_stats = {
         "build_issues": len(by_category.get("build", [])),
         "total_estimated_s": sum(b.duration_s for b in by_category.get("build", [])),
     }
+
     report.ci_stats = {
         "ci_issues": len(by_category.get("ci", [])),
         "total_estimated_s": sum(b.duration_s for b in by_category.get("ci", [])),
     }
+
     return report
 
 
 def format_markdown(report: PerfReport) -> str:
+    """Format report as markdown."""
     lines = []
-    lines.append("# Performance Bottleneck Report
-")
+    lines.append(f"# Performance Bottleneck Report")
+    lines.append(f"")
     lines.append(f"Generated: {report.timestamp}")
-    lines.append(f"Repository: {report.repo_path}
-")
+    lines.append(f"Repository: {report.repo_path}")
+    lines.append(f"")
+
+    # Summary
     s = report.summary
-    lines.append("## Summary
-")
+    lines.append(f"## Summary")
+    lines.append(f"")
     lines.append(f"- **Total bottlenecks:** {s['total_bottlenecks']}")
     lines.append(f"- **Critical:** {s['critical']} | **Warning:** {s['warning']} | **Info:** {s['info']}")
-    lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s
-")
-    lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks
-")
+    lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s")
+    lines.append(f"- **By category:** {', '.join(f'{k}: {v}' for k, v in s['by_category'].items())}")
+    lines.append(f"")
+
+    # Top bottlenecks
+    lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks")
+    lines.append(f"")
+
     for i, b in enumerate(report.bottlenecks[:TOP_N_BOTTLENECKS], 1):
-        icon = {"critical": "CRIT", "warning": "WARN", "info": "INFO"}.get(b.severity, "?")
+        icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪")
         loc = f" ({b.file_path}:{b.line_number})" if b.file_path else ""
-        lines.append(f"{i}. [{icon}] **{b.category}** -- {b.name}{loc}")
-        lines.append(f"   Duration: ~{b.duration_s:.1f}s | {b.recommendation}
-")
+        lines.append(f"{i}. {icon} **{b.category}** — {b.name}{loc}")
+        lines.append(f"   - Duration: ~{b.duration_s:.1f}s | Severity: {b.severity}")
+        lines.append(f"   - Fix: {b.recommendation}")
+        lines.append(f"")
+
+    # Category breakdowns
+    for cat in ["test", "build", "ci", "import"]:
+        items = [b for b in report.bottlenecks if b.category == cat]
+        if items:
+            lines.append(f"## {cat.title()} Bottlenecks")
+            lines.append(f"")
+            for b in items:
+                icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪")
+                loc = f" ({b.file_path}:{b.line_number})" if b.file_path else ""
+                lines.append(f"- {icon} {b.name}{loc} — ~{b.duration_s:.1f}s — {b.recommendation}")
+            lines.append(f"")
+
     return "
 ".join(lines)
 
 
+# ── Main ───────────────────────────────────────────────────────────
+
 def main():
     parser = argparse.ArgumentParser(description="Performance Bottleneck Finder")
     parser.add_argument("--repo", default=".", help="Path to repository to analyze")
     parser.add_argument("--json", action="store_true", help="Output as JSON")
     parser.add_argument("--report", help="Write markdown report to file")
-    parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S)
+    parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S,
+                        help="Slow test threshold in seconds")
     args = parser.parse_args()
 
     global SLOW_TEST_THRESHOLD_S
@@ -402,6 +542,7 @@ def main():
         else:
             print(md)
 
+    # Exit code: 1 if critical bottlenecks found
     if report.summary.get("critical", 0) > 0:
         sys.exit(1)
 
-- 
2.43.0