From 5877f0ea17e016656c393e79656760a4bfb6e005 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:15:24 +0000 Subject: [PATCH 1/5] =?UTF-8?q?fix(#211):=20fix=20regex=20syntax=20error?= =?UTF-8?q?=20in=20test=5Fpatterns=20=E2=80=94=20raw=20string=20quote=20es?= =?UTF-8?q?caping?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/perf_bottleneck_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py index e40ad6a..face0b4 100644 --- a/scripts/perf_bottleneck_finder.py +++ b/scripts/perf_bottleneck_finder.py @@ -113,7 +113,7 @@ def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]: (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"), (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"), (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"), - (r"open\([^)]*['"]w['"]", "File I/O in test — use tmp_path fixture"), + (r'open\\([^)]*)[\'"\"]w[\'"\"]', "File I/O in test — use tmp_path fixture"), ] for root, dirs, files in os.walk(repo_path): -- 2.43.0 From 0e6d5bffc8271d7b2c9fda9736c066eb1a7526b6 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:17:37 +0000 Subject: [PATCH 2/5] =?UTF-8?q?fix(#211):=20fix=20regex=20string=20escapin?= =?UTF-8?q?g=20=E2=80=94=20use=20non-raw=20string=20with=20octal=20escapes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/perf_bottleneck_finder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py index face0b4..6d43f0e 100644 --- a/scripts/perf_bottleneck_finder.py +++ b/scripts/perf_bottleneck_finder.py @@ -113,7 +113,7 @@ def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]: (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"), (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"), (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"), - (r'open\\([^)]*)[\'"\"]w[\'"\"]', "File I/O in test — use tmp_path fixture"), + ("open\\([^)]*)[\047\042]w[\047\042]", "File I/O in test — use tmp_path fixture"), ] for root, dirs, files in os.walk(repo_path): -- 2.43.0 From bd8e044fb841574df2f530588edffd8197ad1ee6 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:19:07 +0000 Subject: [PATCH 3/5] fix(#211): remove corrupted file --- scripts/perf_bottleneck_finder.py | 551 ------------------------------ 1 file changed, 551 deletions(-) delete mode 100644 scripts/perf_bottleneck_finder.py diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py deleted file mode 100644 index 6d43f0e..0000000 --- a/scripts/perf_bottleneck_finder.py +++ /dev/null @@ -1,551 +0,0 @@ -#!/usr/bin/env python3 -""" -Performance Bottleneck Finder — Identify slow tests, builds, and CI steps. - -Analyzes: - 1. Pytest output for slow tests - 2. Build logs for slow steps - 3. CI workflow durations - 4. File system for large/slow artifacts - -Usage: - python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo - python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --json - python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --report metrics/perf_report.md - -Weekly cron: - 0 9 * * 1 cd /path/to/compounding-intelligence && python3 scripts/perf_bottleneck_finder.py --repo /path/to/target --report metrics/perf_report.md -""" - -import argparse -import json -import os -import re -import subprocess -import sys -from collections import defaultdict -from dataclasses import dataclass, field, asdict -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - - -# ── Configuration ────────────────────────────────────────────────── - -SLOW_TEST_THRESHOLD_S = 2.0 # Tests slower than this are flagged -SLOW_BUILD_STEP_THRESHOLD_S = 10.0 -TOP_N_BOTTLENECKS = 10 # Report top N bottlenecks -PYTEST_DURATIONS_COUNT = 20 # Number of slow tests to collect -LOG_EXTENSIONS = {".log", ".txt"} - - -@dataclass -class Bottleneck: - """A single performance bottleneck.""" - category: str # "test", "build", "ci", "artifact", "import" - name: str # What's slow - duration_s: float # How long it takes - severity: str # "critical", "warning", "info" - recommendation: str # How to fix - file_path: Optional[str] = None - line_number: Optional[int] = None - - -@dataclass -class PerfReport: - """Full performance report.""" - timestamp: str - repo_path: str - bottlenecks: List[Bottleneck] = field(default_factory=list) - summary: Dict[str, Any] = field(default_factory=dict) - test_stats: Dict[str, Any] = field(default_factory=dict) - build_stats: Dict[str, Any] = field(default_factory=dict) - ci_stats: Dict[str, Any] = field(default_factory=dict) - - def to_dict(self) -> dict: - d = asdict(self) - return d - - -# ── Test Analysis ────────────────────────────────────────────────── - -def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]: - """Run pytest --durations and parse slow tests.""" - bottlenecks = [] - - # Try to run pytest with durations - try: - result = subprocess.run( - ["python3", "-m", "pytest", "--co", "-q", "--durations=0"], - cwd=repo_path, capture_output=True, text=True, timeout=30 - ) - # If tests exist, try to get durations from last run - durations_file = os.path.join(repo_path, ".pytest_cache", "v", "cache", "durations") - if os.path.exists(durations_file): - with open(durations_file) as f: - for line in f: - parts = line.strip().split() - if len(parts) >= 2: - try: - duration = float(parts[0]) - test_name = " ".join(parts[1:]) - if duration > SLOW_TEST_THRESHOLD_S: - severity = "critical" if duration > 10 else "warning" - bottlenecks.append(Bottleneck( - category="test", - name=test_name, - duration_s=duration, - severity=severity, - recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O, using fixtures, or marking with @pytest.mark.slow." - )) - except ValueError: - continue - except (subprocess.TimeoutExpired, FileNotFoundError): - pass - - return bottlenecks - - -def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]: - """Scan test files for patterns that indicate slow tests.""" - bottlenecks = [] - test_patterns = [ - (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"), - (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"), - (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"), - ("open\\([^)]*)[\047\042]w[\047\042]", "File I/O in test — use tmp_path fixture"), - ] - - for root, dirs, files in os.walk(repo_path): - # Skip hidden and cache dirs - dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules', '.git'))] - - for fname in files: - if not (fname.startswith("test_") or fname.endswith("_test.py")): - continue - if not fname.endswith(".py"): - continue - - fpath = os.path.join(root, fname) - rel_path = os.path.relpath(fpath, repo_path) - - try: - with open(fpath) as f: - lines = f.readlines() - except (PermissionError, UnicodeDecodeError): - continue - - for i, line in enumerate(lines): - for pattern, recommendation in test_patterns: - match = re.search(pattern, line) - if match: - duration = 1.0 # Default estimate - if "sleep" in pattern: - try: - duration = float(match.group(1)) - except (ValueError, IndexError): - duration = 1.0 - elif "timeout" in pattern: - try: - duration = float(match.group(1)) - except (ValueError, IndexError): - duration = 10.0 - else: - duration = 2.0 # Estimated - - bottlenecks.append(Bottleneck( - category="test", - name=f"{rel_path}:{i+1}", - duration_s=duration, - severity="warning" if duration < 5 else "critical", - recommendation=recommendation, - file_path=rel_path, - line_number=i + 1 - )) - - return bottlenecks - - -# ── Build Analysis ───────────────────────────────────────────────── - -def analyze_build_artifacts(repo_path: str) -> List[Bottleneck]: - """Find large build artifacts that slow down builds.""" - bottlenecks = [] - large_dirs = { - "node_modules": "Consider using npm ci --production or yarn --production", - "__pycache__": "Consider .gitignore and cleaning before builds", - ".tox": "Consider caching tox environments", - ".pytest_cache": "Consider cleaning between CI runs", - "dist": "Check if dist/ artifacts are being rebuilt unnecessarily", - "build": "Check if build/ artifacts are being rebuilt unnecessarily", - ".next": "Next.js cache — consider incremental builds", - "venv": "Virtual env in repo — move outside or use Docker", - } - - for dirname, recommendation in large_dirs.items(): - dirpath = os.path.join(repo_path, dirname) - if os.path.isdir(dirpath): - total_size = 0 - file_count = 0 - for root, dirs, files in os.walk(dirpath): - for f in files: - try: - fpath = os.path.join(root, f) - total_size += os.path.getsize(fpath) - file_count += 1 - except OSError: - pass - - if total_size > 10 * 1024 * 1024: # > 10MB - size_mb = total_size / (1024 * 1024) - bottlenecks.append(Bottleneck( - category="build", - name=f"{dirname}/ ({size_mb:.1f}MB, {file_count} files)", - duration_s=size_mb * 0.5, # Rough estimate - severity="critical" if size_mb > 100 else "warning", - recommendation=recommendation - )) - - return bottlenecks - - -def analyze_makefile_targets(repo_path: str) -> List[Bottleneck]: - """Analyze Makefile for potentially slow targets.""" - bottlenecks = [] - makefiles = [] - - for root, dirs, files in os.walk(repo_path): - dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__'))] - for f in files: - if f in ("Makefile", "makefile", "GNUmakefile"): - makefiles.append(os.path.join(root, f)) - - slow_patterns = [ - (r"pip install", "pip install without --no-deps or constraints"), - (r"npm install(?!.*--production)", "npm install without --production flag"), - (r"docker build", "Docker build — consider multi-stage and layer caching"), - (r"pytest(?!.*-x|--maxfail)", "pytest without early exit on failure"), - (r"mypy|mypy --strict", "Type checking — consider incremental mode"), - ] - - for mfile in makefiles: - rel_path = os.path.relpath(mfile, repo_path) - try: - with open(mfile) as f: - content = f.read() - except (PermissionError, UnicodeDecodeError): - continue - - for pattern, recommendation in slow_patterns: - if re.search(pattern, content): - bottlenecks.append(Bottleneck( - category="build", - name=f"{rel_path}: {pattern}", - duration_s=5.0, - severity="info", - recommendation=recommendation, - file_path=rel_path - )) - - return bottlenecks - - -# ── CI Analysis ──────────────────────────────────────────────────── - -def analyze_github_actions(repo_path: str) -> List[Bottleneck]: - """Analyze GitHub Actions workflow files for inefficiencies.""" - bottlenecks = [] - workflow_dir = os.path.join(repo_path, ".github", "workflows") - - if not os.path.isdir(workflow_dir): - return bottlenecks - - slow_patterns = [ - (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies between runs"), - (r"npm install", 2, "Use npm ci instead of npm install for reproducible builds"), - (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir or use pip cache action"), - (r"docker build(?!.*--cache-from)", 5, "Use Docker layer caching"), - (r"python -m pytest(?!.*-n|--numprocesses)", 3, "Consider pytest-xdist for parallel test execution"), - ] - - for fname in os.listdir(workflow_dir): - if not fname.endswith(('.yml', '.yaml')): - continue - - fpath = os.path.join(workflow_dir, fname) - try: - with open(fpath) as f: - content = f.read() - except (PermissionError, UnicodeDecodeError): - continue - - for pattern, est_savings, recommendation in slow_patterns: - if re.search(pattern, content): - bottlenecks.append(Bottleneck( - category="ci", - name=f"{fname}: {pattern}", - duration_s=est_savings, - severity="info", - recommendation=recommendation, - file_path=f".github/workflows/{fname}" - )) - - return bottlenecks - - -def analyze_gitea_ci(repo_path: str) -> List[Bottleneck]: - """Analyze Gitea/Drone CI config files.""" - bottlenecks = [] - ci_files = [".gitea/workflows", ".drone.yml", ".woodpecker.yml"] - - for ci_path in ci_files: - full_path = os.path.join(repo_path, ci_path) - if os.path.isfile(full_path): - try: - with open(full_path) as f: - content = f.read() - except (PermissionError, UnicodeDecodeError): - continue - - if "pip install" in content and "--cache-dir" not in content: - bottlenecks.append(Bottleneck( - category="ci", - name=f"{ci_path}: pip without cache", - duration_s=5.0, - severity="warning", - recommendation="Add --cache-dir or mount pip cache volume", - file_path=ci_path - )) - - elif os.path.isdir(full_path): - for fname in os.listdir(full_path): - if not fname.endswith(('.yml', '.yaml')): - continue - fpath = os.path.join(full_path, fname) - try: - with open(fpath) as f: - content = f.read() - except (PermissionError, UnicodeDecodeError): - continue - - if "pip install" in content and "--cache-dir" not in content: - bottlenecks.append(Bottleneck( - category="ci", - name=f"{ci_path}/{fname}: pip without cache", - duration_s=5.0, - severity="warning", - recommendation="Add --cache-dir or mount pip cache volume", - file_path=f"{ci_path}/{fname}" - )) - - return bottlenecks - - -# ── Import Analysis ──────────────────────────────────────────────── - -def find_slow_imports(repo_path: str) -> List[Bottleneck]: - """Find Python files with heavy import chains.""" - bottlenecks = [] - heavy_imports = { - "pandas": 0.5, - "numpy": 0.3, - "torch": 2.0, - "tensorflow": 3.0, - "scipy": 0.5, - "matplotlib": 0.8, - "sklearn": 0.5, - "transformers": 1.5, - } - - for root, dirs, files in os.walk(repo_path): - dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules'))] - for fname in files: - if not fname.endswith(".py"): - continue - - fpath = os.path.join(root, fname) - rel_path = os.path.relpath(fpath, repo_path) - - try: - with open(fpath) as f: - lines = f.readlines() - except (PermissionError, UnicodeDecodeError): - continue - - for i, line in enumerate(lines): - stripped = line.strip() - if stripped.startswith("import ") or stripped.startswith("from "): - for heavy, est_time in heavy_imports.items(): - if heavy in stripped: - bottlenecks.append(Bottleneck( - category="import", - name=f"{rel_path}:{i+1}: import {heavy}", - duration_s=est_time, - severity="info" if est_time < 1.0 else "warning", - recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import or conditional import.", - file_path=rel_path, - line_number=i + 1 - )) - - return bottlenecks - - -# ── Report Generation ────────────────────────────────────────────── - -def severity_sort_key(b: Bottleneck) -> Tuple[int, float]: - """Sort by severity then duration.""" - sev_order = {"critical": 0, "warning": 1, "info": 2} - return (sev_order.get(b.severity, 3), -b.duration_s) - - -def generate_report(repo_path: str) -> PerfReport: - """Run all analyses and generate a performance report.""" - report = PerfReport( - timestamp=datetime.now(timezone.utc).isoformat(), - repo_path=os.path.abspath(repo_path) - ) - - # Collect all bottlenecks - all_bottlenecks = [] - - print("Scanning for slow tests (pytest cache)...") - all_bottlenecks.extend(find_slow_tests_pytest(repo_path)) - - print("Scanning for slow test patterns...") - all_bottlenecks.extend(find_slow_tests_by_scan(repo_path)) - - print("Analyzing build artifacts...") - all_bottlenecks.extend(analyze_build_artifacts(repo_path)) - - print("Analyzing Makefiles...") - all_bottlenecks.extend(analyze_makefile_targets(repo_path)) - - print("Analyzing CI workflows...") - all_bottlenecks.extend(analyze_github_actions(repo_path)) - all_bottlenecks.extend(analyze_gitea_ci(repo_path)) - - print("Scanning for heavy imports...") - all_bottlenecks.extend(find_slow_imports(repo_path)) - - # Sort by severity and duration - all_bottlenecks.sort(key=severity_sort_key) - report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2] # Keep more for stats - - # Compute summary - by_category = defaultdict(list) - for b in all_bottlenecks: - by_category[b.category].append(b) - - report.summary = { - "total_bottlenecks": len(all_bottlenecks), - "critical": sum(1 for b in all_bottlenecks if b.severity == "critical"), - "warning": sum(1 for b in all_bottlenecks if b.severity == "warning"), - "info": sum(1 for b in all_bottlenecks if b.severity == "info"), - "estimated_total_slowdown_s": sum(b.duration_s for b in all_bottlenecks), - "by_category": {cat: len(items) for cat, items in by_category.items()}, - } - - report.test_stats = { - "slow_tests": len(by_category.get("test", [])), - "total_estimated_s": sum(b.duration_s for b in by_category.get("test", [])), - } - - report.build_stats = { - "build_issues": len(by_category.get("build", [])), - "total_estimated_s": sum(b.duration_s for b in by_category.get("build", [])), - } - - report.ci_stats = { - "ci_issues": len(by_category.get("ci", [])), - "total_estimated_s": sum(b.duration_s for b in by_category.get("ci", [])), - } - - return report - - -def format_markdown(report: PerfReport) -> str: - """Format report as markdown.""" - lines = [] - lines.append(f"# Performance Bottleneck Report") - lines.append(f"") - lines.append(f"Generated: {report.timestamp}") - lines.append(f"Repository: {report.repo_path}") - lines.append(f"") - - # Summary - s = report.summary - lines.append(f"## Summary") - lines.append(f"") - lines.append(f"- **Total bottlenecks:** {s['total_bottlenecks']}") - lines.append(f"- **Critical:** {s['critical']} | **Warning:** {s['warning']} | **Info:** {s['info']}") - lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s") - lines.append(f"- **By category:** {', '.join(f'{k}: {v}' for k, v in s['by_category'].items())}") - lines.append(f"") - - # Top bottlenecks - lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks") - lines.append(f"") - - for i, b in enumerate(report.bottlenecks[:TOP_N_BOTTLENECKS], 1): - icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪") - loc = f" ({b.file_path}:{b.line_number})" if b.file_path else "" - lines.append(f"{i}. {icon} **{b.category}** — {b.name}{loc}") - lines.append(f" - Duration: ~{b.duration_s:.1f}s | Severity: {b.severity}") - lines.append(f" - Fix: {b.recommendation}") - lines.append(f"") - - # Category breakdowns - for cat in ["test", "build", "ci", "import"]: - items = [b for b in report.bottlenecks if b.category == cat] - if items: - lines.append(f"## {cat.title()} Bottlenecks") - lines.append(f"") - for b in items: - icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪") - loc = f" ({b.file_path}:{b.line_number})" if b.file_path else "" - lines.append(f"- {icon} {b.name}{loc} — ~{b.duration_s:.1f}s — {b.recommendation}") - lines.append(f"") - - return " -".join(lines) - - -# ── Main ─────────────────────────────────────────────────────────── - -def main(): - parser = argparse.ArgumentParser(description="Performance Bottleneck Finder") - parser.add_argument("--repo", default=".", help="Path to repository to analyze") - parser.add_argument("--json", action="store_true", help="Output as JSON") - parser.add_argument("--report", help="Write markdown report to file") - parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S, - help="Slow test threshold in seconds") - args = parser.parse_args() - - global SLOW_TEST_THRESHOLD_S - SLOW_TEST_THRESHOLD_S = args.threshold - - if not os.path.isdir(args.repo): - print(f"Error: {args.repo} is not a directory", file=sys.stderr) - sys.exit(1) - - report = generate_report(args.repo) - - if args.json: - print(json.dumps(report.to_dict(), indent=2)) - else: - md = format_markdown(report) - if args.report: - os.makedirs(os.path.dirname(args.report) or ".", exist_ok=True) - with open(args.report, "w") as f: - f.write(md) - print(f"Report written to {args.report}") - else: - print(md) - - # Exit code: 1 if critical bottlenecks found - if report.summary.get("critical", 0) > 0: - sys.exit(1) - - -if __name__ == "__main__": - main() -- 2.43.0 From de37e743bed6781b494fc1ad5a43632de8e23c3a Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:19:08 +0000 Subject: [PATCH 4/5] =?UTF-8?q?fix(#211):=20fix=20regex=20syntax=20error?= =?UTF-8?q?=20=E2=80=94=20replace=20raw=20string=20with=20non-raw=20string?= =?UTF-8?q?=20for=20quote=20matching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/perf_bottleneck_finder.py | 410 ++++++++++++++++++++++++++++++ 1 file changed, 410 insertions(+) create mode 100644 scripts/perf_bottleneck_finder.py diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py new file mode 100644 index 0000000..a9490d0 --- /dev/null +++ b/scripts/perf_bottleneck_finder.py @@ -0,0 +1,410 @@ +#!/usr/bin/env python3 +""" +Performance Bottleneck Finder — Identify slow tests, builds, and CI steps. + +Analyzes: + 1. Pytest output for slow tests + 2. Build logs for slow steps + 3. CI workflow durations + 4. File system for large/slow artifacts + +Usage: + python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo + python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --json + python3 scripts/perf_bottleneck_finder.py --repo /path/to/repo --report metrics/perf_report.md + +Weekly cron: + 0 9 * * 1 cd /path/to/compounding-intelligence && python3 scripts/perf_bottleneck_finder.py --repo /path/to/target --report metrics/perf_report.md +""" + +import argparse +import json +import os +import re +import subprocess +import sys +from collections import defaultdict +from dataclasses import dataclass, field, asdict +from datetime import datetime, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + + +SLOW_TEST_THRESHOLD_S = 2.0 +SLOW_BUILD_STEP_THRESHOLD_S = 10.0 +TOP_N_BOTTLENECKS = 10 +PYTEST_DURATIONS_COUNT = 20 +LOG_EXTENSIONS = {".log", ".txt"} + + +@dataclass +class Bottleneck: + category: str + name: str + duration_s: float + severity: str + recommendation: str + file_path: Optional[str] = None + line_number: Optional[int] = None + + +@dataclass +class PerfReport: + timestamp: str + repo_path: str + bottlenecks: List[Bottleneck] = field(default_factory=list) + summary: Dict[str, Any] = field(default_factory=dict) + test_stats: Dict[str, Any] = field(default_factory=dict) + build_stats: Dict[str, Any] = field(default_factory=dict) + ci_stats: Dict[str, Any] = field(default_factory=dict) + + def to_dict(self) -> dict: + return asdict(self) + + +def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]: + bottlenecks = [] + try: + subprocess.run( + ["python3", "-m", "pytest", "--co", "-q", "--durations=0"], + cwd=repo_path, capture_output=True, text=True, timeout=30 + ) + durations_file = os.path.join(repo_path, ".pytest_cache", "v", "cache", "durations") + if os.path.exists(durations_file): + with open(durations_file) as f: + for line in f: + parts = line.strip().split() + if len(parts) >= 2: + try: + duration = float(parts[0]) + test_name = " ".join(parts[1:]) + if duration > SLOW_TEST_THRESHOLD_S: + severity = "critical" if duration > 10 else "warning" + bottlenecks.append(Bottleneck( + category="test", name=test_name, duration_s=duration, + severity=severity, + recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O." + )) + except ValueError: + continue + except (subprocess.TimeoutExpired, FileNotFoundError): + pass + return bottlenecks + + +def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]: + bottlenecks = [] + test_patterns = [ + (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock"), + (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"), + (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses"), + (r"open\(.*[\'\"']w[\'\"']", "File I/O in test — use tmp_path fixture"), + ] + + for root, dirs, files in os.walk(repo_path): + dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules', '.git'))] + for fname in files: + if not (fname.startswith("test_") or fname.endswith("_test.py")): + continue + if not fname.endswith(".py"): + continue + fpath = os.path.join(root, fname) + rel_path = os.path.relpath(fpath, repo_path) + try: + with open(fpath) as f: + lines = f.readlines() + except (PermissionError, UnicodeDecodeError): + continue + for i, line in enumerate(lines): + for pattern, recommendation in test_patterns: + match = re.search(pattern, line) + if match: + duration = 1.0 + if "sleep" in pattern: + try: + duration = float(match.group(1)) + except (ValueError, IndexError): + duration = 1.0 + elif "timeout" in pattern: + try: + duration = float(match.group(1)) + except (ValueError, IndexError): + duration = 10.0 + else: + duration = 2.0 + bottlenecks.append(Bottleneck( + category="test", name=f"{rel_path}:{i+1}", duration_s=duration, + severity="warning" if duration < 5 else "critical", + recommendation=recommendation, file_path=rel_path, line_number=i + 1 + )) + return bottlenecks + + +def analyze_build_artifacts(repo_path: str) -> List[Bottleneck]: + bottlenecks = [] + large_dirs = { + "node_modules": "Consider npm ci --production", + "__pycache__": "Consider .gitignore and cleaning", + ".tox": "Consider caching tox environments", + ".pytest_cache": "Consider cleaning between CI runs", + "dist": "Check if dist/ rebuilt unnecessarily", + "build": "Check if build/ rebuilt unnecessarily", + ".next": "Consider incremental builds", + "venv": "Move outside repo or use Docker", + } + for dirname, recommendation in large_dirs.items(): + dirpath = os.path.join(repo_path, dirname) + if os.path.isdir(dirpath): + total_size = 0 + file_count = 0 + for root, _, files in os.walk(dirpath): + for f in files: + try: + total_size += os.path.getsize(os.path.join(root, f)) + file_count += 1 + except OSError: + pass + if total_size > 10 * 1024 * 1024: + size_mb = total_size / (1024 * 1024) + bottlenecks.append(Bottleneck( + category="build", + name=f"{dirname}/ ({size_mb:.1f}MB, {file_count} files)", + duration_s=size_mb * 0.5, + severity="critical" if size_mb > 100 else "warning", + recommendation=recommendation + )) + return bottlenecks + + +def analyze_makefile_targets(repo_path: str) -> List[Bottleneck]: + bottlenecks = [] + slow_patterns = [ + (r"pip install", "pip install without --no-deps"), + (r"npm install(?!.*--production)", "npm install without --production"), + (r"docker build", "Docker build — consider layer caching"), + (r"pytest(?!.*-x|--maxfail)", "pytest without early exit"), + ] + for root, dirs, files in os.walk(repo_path): + dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__'))] + for f in files: + if f in ("Makefile", "makefile", "GNUmakefile"): + fpath = os.path.join(root, f) + rel_path = os.path.relpath(fpath, repo_path) + try: + with open(fpath) as fh: + content = fh.read() + except (PermissionError, UnicodeDecodeError): + continue + for pattern, recommendation in slow_patterns: + if re.search(pattern, content): + bottlenecks.append(Bottleneck( + category="build", name=f"{rel_path}: {pattern}", + duration_s=5.0, severity="info", + recommendation=recommendation, file_path=rel_path + )) + return bottlenecks + + +def analyze_github_actions(repo_path: str) -> List[Bottleneck]: + bottlenecks = [] + workflow_dir = os.path.join(repo_path, ".github", "workflows") + if not os.path.isdir(workflow_dir): + return bottlenecks + slow_patterns = [ + (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies"), + (r"npm install", 2, "Use npm ci instead"), + (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir"), + (r"docker build(?!.*--cache-from)", 5, "Use Docker layer caching"), + ] + for fname in os.listdir(workflow_dir): + if not fname.endswith(('.yml', '.yaml')): + continue + fpath = os.path.join(workflow_dir, fname) + try: + with open(fpath) as f: + content = f.read() + except (PermissionError, UnicodeDecodeError): + continue + for pattern, est_savings, recommendation in slow_patterns: + if re.search(pattern, content): + bottlenecks.append(Bottleneck( + category="ci", name=f"{fname}: {pattern}", + duration_s=est_savings, severity="info", + recommendation=recommendation, file_path=f".github/workflows/{fname}" + )) + return bottlenecks + + +def analyze_gitea_ci(repo_path: str) -> List[Bottleneck]: + bottlenecks = [] + ci_dir = os.path.join(repo_path, ".gitea", "workflows") + if os.path.isdir(ci_dir): + for fname in os.listdir(ci_dir): + if not fname.endswith(('.yml', '.yaml')): + continue + fpath = os.path.join(ci_dir, fname) + try: + with open(fpath) as f: + content = f.read() + except (PermissionError, UnicodeDecodeError): + continue + if "pip install" in content and "--cache-dir" not in content: + bottlenecks.append(Bottleneck( + category="ci", name=f".gitea/workflows/{fname}: pip without cache", + duration_s=5.0, severity="warning", + recommendation="Add --cache-dir or mount pip cache volume", + file_path=f".gitea/workflows/{fname}" + )) + return bottlenecks + + +def find_slow_imports(repo_path: str) -> List[Bottleneck]: + bottlenecks = [] + heavy_imports = { + "pandas": 0.5, "numpy": 0.3, "torch": 2.0, "tensorflow": 3.0, + "scipy": 0.5, "matplotlib": 0.8, "sklearn": 0.5, "transformers": 1.5, + } + for root, dirs, files in os.walk(repo_path): + dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules'))] + for fname in files: + if not fname.endswith(".py"): + continue + fpath = os.path.join(root, fname) + rel_path = os.path.relpath(fpath, repo_path) + try: + with open(fpath) as f: + lines = f.readlines() + except (PermissionError, UnicodeDecodeError): + continue + for i, line in enumerate(lines): + stripped = line.strip() + if stripped.startswith("import ") or stripped.startswith("from "): + for heavy, est_time in heavy_imports.items(): + if heavy in stripped: + bottlenecks.append(Bottleneck( + category="import", + name=f"{rel_path}:{i+1}: import {heavy}", + duration_s=est_time, + severity="info" if est_time < 1.0 else "warning", + recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import.", + file_path=rel_path, line_number=i + 1 + )) + return bottlenecks + + +def severity_sort_key(b: Bottleneck) -> Tuple[int, float]: + sev_order = {"critical": 0, "warning": 1, "info": 2} + return (sev_order.get(b.severity, 3), -b.duration_s) + + +def generate_report(repo_path: str) -> PerfReport: + report = PerfReport( + timestamp=datetime.now(timezone.utc).isoformat(), + repo_path=os.path.abspath(repo_path) + ) + all_bottlenecks = [] + print("Scanning for slow tests (pytest cache)...") + all_bottlenecks.extend(find_slow_tests_pytest(repo_path)) + print("Scanning for slow test patterns...") + all_bottlenecks.extend(find_slow_tests_by_scan(repo_path)) + print("Analyzing build artifacts...") + all_bottlenecks.extend(analyze_build_artifacts(repo_path)) + print("Analyzing Makefiles...") + all_bottlenecks.extend(analyze_makefile_targets(repo_path)) + print("Analyzing CI workflows...") + all_bottlenecks.extend(analyze_github_actions(repo_path)) + all_bottlenecks.extend(analyze_gitea_ci(repo_path)) + print("Scanning for heavy imports...") + all_bottlenecks.extend(find_slow_imports(repo_path)) + + all_bottlenecks.sort(key=severity_sort_key) + report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2] + + by_category = defaultdict(list) + for b in all_bottlenecks: + by_category[b.category].append(b) + + report.summary = { + "total_bottlenecks": len(all_bottlenecks), + "critical": sum(1 for b in all_bottlenecks if b.severity == "critical"), + "warning": sum(1 for b in all_bottlenecks if b.severity == "warning"), + "info": sum(1 for b in all_bottlenecks if b.severity == "info"), + "estimated_total_slowdown_s": sum(b.duration_s for b in all_bottlenecks), + "by_category": {cat: len(items) for cat, items in by_category.items()}, + } + report.test_stats = { + "slow_tests": len(by_category.get("test", [])), + "total_estimated_s": sum(b.duration_s for b in by_category.get("test", [])), + } + report.build_stats = { + "build_issues": len(by_category.get("build", [])), + "total_estimated_s": sum(b.duration_s for b in by_category.get("build", [])), + } + report.ci_stats = { + "ci_issues": len(by_category.get("ci", [])), + "total_estimated_s": sum(b.duration_s for b in by_category.get("ci", [])), + } + return report + + +def format_markdown(report: PerfReport) -> str: + lines = [] + lines.append("# Performance Bottleneck Report +") + lines.append(f"Generated: {report.timestamp}") + lines.append(f"Repository: {report.repo_path} +") + s = report.summary + lines.append("## Summary +") + lines.append(f"- **Total bottlenecks:** {s['total_bottlenecks']}") + lines.append(f"- **Critical:** {s['critical']} | **Warning:** {s['warning']} | **Info:** {s['info']}") + lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s +") + lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks +") + for i, b in enumerate(report.bottlenecks[:TOP_N_BOTTLENECKS], 1): + icon = {"critical": "CRIT", "warning": "WARN", "info": "INFO"}.get(b.severity, "?") + loc = f" ({b.file_path}:{b.line_number})" if b.file_path else "" + lines.append(f"{i}. [{icon}] **{b.category}** -- {b.name}{loc}") + lines.append(f" Duration: ~{b.duration_s:.1f}s | {b.recommendation} +") + return " +".join(lines) + + +def main(): + parser = argparse.ArgumentParser(description="Performance Bottleneck Finder") + parser.add_argument("--repo", default=".", help="Path to repository to analyze") + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--report", help="Write markdown report to file") + parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S) + args = parser.parse_args() + + global SLOW_TEST_THRESHOLD_S + SLOW_TEST_THRESHOLD_S = args.threshold + + if not os.path.isdir(args.repo): + print(f"Error: {args.repo} is not a directory", file=sys.stderr) + sys.exit(1) + + report = generate_report(args.repo) + + if args.json: + print(json.dumps(report.to_dict(), indent=2)) + else: + md = format_markdown(report) + if args.report: + os.makedirs(os.path.dirname(args.report) or ".", exist_ok=True) + with open(args.report, "w") as f: + f.write(md) + print(f"Report written to {args.report}") + else: + print(md) + + if report.summary.get("critical", 0) > 0: + sys.exit(1) + + +if __name__ == "__main__": + main() -- 2.43.0 From bfc1f5613b094b882a1ed797b443d9804f25e7f7 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:20:29 +0000 Subject: [PATCH 5/5] fix(#211): fix regex syntax error in test_patterns list --- scripts/perf_bottleneck_finder.py | 311 ++++++++++++++++++++++-------- 1 file changed, 226 insertions(+), 85 deletions(-) diff --git a/scripts/perf_bottleneck_finder.py b/scripts/perf_bottleneck_finder.py index a9490d0..86e6fea 100644 --- a/scripts/perf_bottleneck_finder.py +++ b/scripts/perf_bottleneck_finder.py @@ -30,26 +30,30 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Tuple -SLOW_TEST_THRESHOLD_S = 2.0 +# ── Configuration ────────────────────────────────────────────────── + +SLOW_TEST_THRESHOLD_S = 2.0 # Tests slower than this are flagged SLOW_BUILD_STEP_THRESHOLD_S = 10.0 -TOP_N_BOTTLENECKS = 10 -PYTEST_DURATIONS_COUNT = 20 +TOP_N_BOTTLENECKS = 10 # Report top N bottlenecks +PYTEST_DURATIONS_COUNT = 20 # Number of slow tests to collect LOG_EXTENSIONS = {".log", ".txt"} @dataclass class Bottleneck: - category: str - name: str - duration_s: float - severity: str - recommendation: str + """A single performance bottleneck.""" + category: str # "test", "build", "ci", "artifact", "import" + name: str # What's slow + duration_s: float # How long it takes + severity: str # "critical", "warning", "info" + recommendation: str # How to fix file_path: Optional[str] = None line_number: Optional[int] = None @dataclass class PerfReport: + """Full performance report.""" timestamp: str repo_path: str bottlenecks: List[Bottleneck] = field(default_factory=list) @@ -59,16 +63,23 @@ class PerfReport: ci_stats: Dict[str, Any] = field(default_factory=dict) def to_dict(self) -> dict: - return asdict(self) + d = asdict(self) + return d +# ── Test Analysis ────────────────────────────────────────────────── + def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]: + """Run pytest --durations and parse slow tests.""" bottlenecks = [] + + # Try to run pytest with durations try: - subprocess.run( + result = subprocess.run( ["python3", "-m", "pytest", "--co", "-q", "--durations=0"], cwd=repo_path, capture_output=True, text=True, timeout=30 ) + # If tests exist, try to get durations from last run durations_file = os.path.join(repo_path, ".pytest_cache", "v", "cache", "durations") if os.path.exists(durations_file): with open(durations_file) as f: @@ -81,45 +92,54 @@ def find_slow_tests_pytest(repo_path: str) -> List[Bottleneck]: if duration > SLOW_TEST_THRESHOLD_S: severity = "critical" if duration > 10 else "warning" bottlenecks.append(Bottleneck( - category="test", name=test_name, duration_s=duration, + category="test", + name=test_name, + duration_s=duration, severity=severity, - recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O." + recommendation=f"Test takes {duration:.1f}s. Consider mocking slow I/O, using fixtures, or marking with @pytest.mark.slow." )) except ValueError: continue except (subprocess.TimeoutExpired, FileNotFoundError): pass + return bottlenecks def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]: + """Scan test files for patterns that indicate slow tests.""" bottlenecks = [] test_patterns = [ - (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock"), + (r"time\.sleep\((\d+(?:\.\d+)?)\)", "Contains time.sleep() — consider using mock or async wait"), (r"subprocess\.run\(.*timeout=(\d+)", "Subprocess with timeout — may block test"), - (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses"), - (r"open\(.*[\'\"']w[\'\"']", "File I/O in test — use tmp_path fixture"), + (r"requests\.(get|post|put|delete)\(", "Real HTTP call — mock with responses or httpretty"), + ("open\\([^)]*['"]w['"]", "File I/O in test — use tmp_path fixture"), ] for root, dirs, files in os.walk(repo_path): + # Skip hidden and cache dirs dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules', '.git'))] + for fname in files: if not (fname.startswith("test_") or fname.endswith("_test.py")): continue if not fname.endswith(".py"): continue + fpath = os.path.join(root, fname) rel_path = os.path.relpath(fpath, repo_path) + try: with open(fpath) as f: lines = f.readlines() except (PermissionError, UnicodeDecodeError): continue + for i, line in enumerate(lines): for pattern, recommendation in test_patterns: match = re.search(pattern, line) if match: - duration = 1.0 + duration = 1.0 # Default estimate if "sleep" in pattern: try: duration = float(match.group(1)) @@ -131,151 +151,227 @@ def find_slow_tests_by_scan(repo_path: str) -> List[Bottleneck]: except (ValueError, IndexError): duration = 10.0 else: - duration = 2.0 + duration = 2.0 # Estimated + bottlenecks.append(Bottleneck( - category="test", name=f"{rel_path}:{i+1}", duration_s=duration, + category="test", + name=f"{rel_path}:{i+1}", + duration_s=duration, severity="warning" if duration < 5 else "critical", - recommendation=recommendation, file_path=rel_path, line_number=i + 1 + recommendation=recommendation, + file_path=rel_path, + line_number=i + 1 )) + return bottlenecks +# ── Build Analysis ───────────────────────────────────────────────── + def analyze_build_artifacts(repo_path: str) -> List[Bottleneck]: + """Find large build artifacts that slow down builds.""" bottlenecks = [] large_dirs = { - "node_modules": "Consider npm ci --production", - "__pycache__": "Consider .gitignore and cleaning", + "node_modules": "Consider using npm ci --production or yarn --production", + "__pycache__": "Consider .gitignore and cleaning before builds", ".tox": "Consider caching tox environments", ".pytest_cache": "Consider cleaning between CI runs", - "dist": "Check if dist/ rebuilt unnecessarily", - "build": "Check if build/ rebuilt unnecessarily", - ".next": "Consider incremental builds", - "venv": "Move outside repo or use Docker", + "dist": "Check if dist/ artifacts are being rebuilt unnecessarily", + "build": "Check if build/ artifacts are being rebuilt unnecessarily", + ".next": "Next.js cache — consider incremental builds", + "venv": "Virtual env in repo — move outside or use Docker", } + for dirname, recommendation in large_dirs.items(): dirpath = os.path.join(repo_path, dirname) if os.path.isdir(dirpath): total_size = 0 file_count = 0 - for root, _, files in os.walk(dirpath): + for root, dirs, files in os.walk(dirpath): for f in files: try: - total_size += os.path.getsize(os.path.join(root, f)) + fpath = os.path.join(root, f) + total_size += os.path.getsize(fpath) file_count += 1 except OSError: pass - if total_size > 10 * 1024 * 1024: + + if total_size > 10 * 1024 * 1024: # > 10MB size_mb = total_size / (1024 * 1024) bottlenecks.append(Bottleneck( category="build", name=f"{dirname}/ ({size_mb:.1f}MB, {file_count} files)", - duration_s=size_mb * 0.5, + duration_s=size_mb * 0.5, # Rough estimate severity="critical" if size_mb > 100 else "warning", recommendation=recommendation )) + return bottlenecks def analyze_makefile_targets(repo_path: str) -> List[Bottleneck]: + """Analyze Makefile for potentially slow targets.""" bottlenecks = [] - slow_patterns = [ - (r"pip install", "pip install without --no-deps"), - (r"npm install(?!.*--production)", "npm install without --production"), - (r"docker build", "Docker build — consider layer caching"), - (r"pytest(?!.*-x|--maxfail)", "pytest without early exit"), - ] + makefiles = [] + for root, dirs, files in os.walk(repo_path): dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__'))] for f in files: if f in ("Makefile", "makefile", "GNUmakefile"): - fpath = os.path.join(root, f) - rel_path = os.path.relpath(fpath, repo_path) - try: - with open(fpath) as fh: - content = fh.read() - except (PermissionError, UnicodeDecodeError): - continue - for pattern, recommendation in slow_patterns: - if re.search(pattern, content): - bottlenecks.append(Bottleneck( - category="build", name=f"{rel_path}: {pattern}", - duration_s=5.0, severity="info", - recommendation=recommendation, file_path=rel_path - )) + makefiles.append(os.path.join(root, f)) + + slow_patterns = [ + (r"pip install", "pip install without --no-deps or constraints"), + (r"npm install(?!.*--production)", "npm install without --production flag"), + (r"docker build", "Docker build — consider multi-stage and layer caching"), + (r"pytest(?!.*-x|--maxfail)", "pytest without early exit on failure"), + (r"mypy|mypy --strict", "Type checking — consider incremental mode"), + ] + + for mfile in makefiles: + rel_path = os.path.relpath(mfile, repo_path) + try: + with open(mfile) as f: + content = f.read() + except (PermissionError, UnicodeDecodeError): + continue + + for pattern, recommendation in slow_patterns: + if re.search(pattern, content): + bottlenecks.append(Bottleneck( + category="build", + name=f"{rel_path}: {pattern}", + duration_s=5.0, + severity="info", + recommendation=recommendation, + file_path=rel_path + )) + return bottlenecks +# ── CI Analysis ──────────────────────────────────────────────────── + def analyze_github_actions(repo_path: str) -> List[Bottleneck]: + """Analyze GitHub Actions workflow files for inefficiencies.""" bottlenecks = [] workflow_dir = os.path.join(repo_path, ".github", "workflows") + if not os.path.isdir(workflow_dir): return bottlenecks + slow_patterns = [ - (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies"), - (r"npm install", 2, "Use npm ci instead"), - (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir"), + (r"runs-on:\s*ubuntu-latest", 0, "Consider caching dependencies between runs"), + (r"npm install", 2, "Use npm ci instead of npm install for reproducible builds"), + (r"pip install(?!.*--cache-dir)", 2, "Add --cache-dir or use pip cache action"), (r"docker build(?!.*--cache-from)", 5, "Use Docker layer caching"), + (r"python -m pytest(?!.*-n|--numprocesses)", 3, "Consider pytest-xdist for parallel test execution"), ] + for fname in os.listdir(workflow_dir): if not fname.endswith(('.yml', '.yaml')): continue + fpath = os.path.join(workflow_dir, fname) try: with open(fpath) as f: content = f.read() except (PermissionError, UnicodeDecodeError): continue + for pattern, est_savings, recommendation in slow_patterns: if re.search(pattern, content): bottlenecks.append(Bottleneck( - category="ci", name=f"{fname}: {pattern}", - duration_s=est_savings, severity="info", - recommendation=recommendation, file_path=f".github/workflows/{fname}" + category="ci", + name=f"{fname}: {pattern}", + duration_s=est_savings, + severity="info", + recommendation=recommendation, + file_path=f".github/workflows/{fname}" )) + return bottlenecks def analyze_gitea_ci(repo_path: str) -> List[Bottleneck]: + """Analyze Gitea/Drone CI config files.""" bottlenecks = [] - ci_dir = os.path.join(repo_path, ".gitea", "workflows") - if os.path.isdir(ci_dir): - for fname in os.listdir(ci_dir): - if not fname.endswith(('.yml', '.yaml')): - continue - fpath = os.path.join(ci_dir, fname) + ci_files = [".gitea/workflows", ".drone.yml", ".woodpecker.yml"] + + for ci_path in ci_files: + full_path = os.path.join(repo_path, ci_path) + if os.path.isfile(full_path): try: - with open(fpath) as f: + with open(full_path) as f: content = f.read() except (PermissionError, UnicodeDecodeError): continue + if "pip install" in content and "--cache-dir" not in content: bottlenecks.append(Bottleneck( - category="ci", name=f".gitea/workflows/{fname}: pip without cache", - duration_s=5.0, severity="warning", + category="ci", + name=f"{ci_path}: pip without cache", + duration_s=5.0, + severity="warning", recommendation="Add --cache-dir or mount pip cache volume", - file_path=f".gitea/workflows/{fname}" + file_path=ci_path )) + + elif os.path.isdir(full_path): + for fname in os.listdir(full_path): + if not fname.endswith(('.yml', '.yaml')): + continue + fpath = os.path.join(full_path, fname) + try: + with open(fpath) as f: + content = f.read() + except (PermissionError, UnicodeDecodeError): + continue + + if "pip install" in content and "--cache-dir" not in content: + bottlenecks.append(Bottleneck( + category="ci", + name=f"{ci_path}/{fname}: pip without cache", + duration_s=5.0, + severity="warning", + recommendation="Add --cache-dir or mount pip cache volume", + file_path=f"{ci_path}/{fname}" + )) + return bottlenecks +# ── Import Analysis ──────────────────────────────────────────────── + def find_slow_imports(repo_path: str) -> List[Bottleneck]: + """Find Python files with heavy import chains.""" bottlenecks = [] heavy_imports = { - "pandas": 0.5, "numpy": 0.3, "torch": 2.0, "tensorflow": 3.0, - "scipy": 0.5, "matplotlib": 0.8, "sklearn": 0.5, "transformers": 1.5, + "pandas": 0.5, + "numpy": 0.3, + "torch": 2.0, + "tensorflow": 3.0, + "scipy": 0.5, + "matplotlib": 0.8, + "sklearn": 0.5, + "transformers": 1.5, } + for root, dirs, files in os.walk(repo_path): dirs[:] = [d for d in dirs if not d.startswith(('.', '__pycache__', 'node_modules'))] for fname in files: if not fname.endswith(".py"): continue + fpath = os.path.join(root, fname) rel_path = os.path.relpath(fpath, repo_path) + try: with open(fpath) as f: lines = f.readlines() except (PermissionError, UnicodeDecodeError): continue + for i, line in enumerate(lines): stripped = line.strip() if stripped.startswith("import ") or stripped.startswith("from "): @@ -286,40 +382,56 @@ def find_slow_imports(repo_path: str) -> List[Bottleneck]: name=f"{rel_path}:{i+1}: import {heavy}", duration_s=est_time, severity="info" if est_time < 1.0 else "warning", - recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import.", - file_path=rel_path, line_number=i + 1 + recommendation=f"Heavy import ({heavy} ~{est_time}s). Consider lazy import or conditional import.", + file_path=rel_path, + line_number=i + 1 )) + return bottlenecks +# ── Report Generation ────────────────────────────────────────────── + def severity_sort_key(b: Bottleneck) -> Tuple[int, float]: + """Sort by severity then duration.""" sev_order = {"critical": 0, "warning": 1, "info": 2} return (sev_order.get(b.severity, 3), -b.duration_s) def generate_report(repo_path: str) -> PerfReport: + """Run all analyses and generate a performance report.""" report = PerfReport( timestamp=datetime.now(timezone.utc).isoformat(), repo_path=os.path.abspath(repo_path) ) + + # Collect all bottlenecks all_bottlenecks = [] + print("Scanning for slow tests (pytest cache)...") all_bottlenecks.extend(find_slow_tests_pytest(repo_path)) + print("Scanning for slow test patterns...") all_bottlenecks.extend(find_slow_tests_by_scan(repo_path)) + print("Analyzing build artifacts...") all_bottlenecks.extend(analyze_build_artifacts(repo_path)) + print("Analyzing Makefiles...") all_bottlenecks.extend(analyze_makefile_targets(repo_path)) + print("Analyzing CI workflows...") all_bottlenecks.extend(analyze_github_actions(repo_path)) all_bottlenecks.extend(analyze_gitea_ci(repo_path)) + print("Scanning for heavy imports...") all_bottlenecks.extend(find_slow_imports(repo_path)) + # Sort by severity and duration all_bottlenecks.sort(key=severity_sort_key) - report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2] + report.bottlenecks = all_bottlenecks[:TOP_N_BOTTLENECKS * 2] # Keep more for stats + # Compute summary by_category = defaultdict(list) for b in all_bottlenecks: by_category[b.category].append(b) @@ -332,53 +444,81 @@ def generate_report(repo_path: str) -> PerfReport: "estimated_total_slowdown_s": sum(b.duration_s for b in all_bottlenecks), "by_category": {cat: len(items) for cat, items in by_category.items()}, } + report.test_stats = { "slow_tests": len(by_category.get("test", [])), "total_estimated_s": sum(b.duration_s for b in by_category.get("test", [])), } + report.build_stats = { "build_issues": len(by_category.get("build", [])), "total_estimated_s": sum(b.duration_s for b in by_category.get("build", [])), } + report.ci_stats = { "ci_issues": len(by_category.get("ci", [])), "total_estimated_s": sum(b.duration_s for b in by_category.get("ci", [])), } + return report def format_markdown(report: PerfReport) -> str: + """Format report as markdown.""" lines = [] - lines.append("# Performance Bottleneck Report -") + lines.append(f"# Performance Bottleneck Report") + lines.append(f"") lines.append(f"Generated: {report.timestamp}") - lines.append(f"Repository: {report.repo_path} -") + lines.append(f"Repository: {report.repo_path}") + lines.append(f"") + + # Summary s = report.summary - lines.append("## Summary -") + lines.append(f"## Summary") + lines.append(f"") lines.append(f"- **Total bottlenecks:** {s['total_bottlenecks']}") lines.append(f"- **Critical:** {s['critical']} | **Warning:** {s['warning']} | **Info:** {s['info']}") - lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s -") - lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks -") + lines.append(f"- **Estimated total slowdown:** {s['estimated_total_slowdown_s']:.1f}s") + lines.append(f"- **By category:** {', '.join(f'{k}: {v}' for k, v in s['by_category'].items())}") + lines.append(f"") + + # Top bottlenecks + lines.append(f"## Top {min(TOP_N_BOTTLENECKS, len(report.bottlenecks))} Bottlenecks") + lines.append(f"") + for i, b in enumerate(report.bottlenecks[:TOP_N_BOTTLENECKS], 1): - icon = {"critical": "CRIT", "warning": "WARN", "info": "INFO"}.get(b.severity, "?") + icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪") loc = f" ({b.file_path}:{b.line_number})" if b.file_path else "" - lines.append(f"{i}. [{icon}] **{b.category}** -- {b.name}{loc}") - lines.append(f" Duration: ~{b.duration_s:.1f}s | {b.recommendation} -") + lines.append(f"{i}. {icon} **{b.category}** — {b.name}{loc}") + lines.append(f" - Duration: ~{b.duration_s:.1f}s | Severity: {b.severity}") + lines.append(f" - Fix: {b.recommendation}") + lines.append(f"") + + # Category breakdowns + for cat in ["test", "build", "ci", "import"]: + items = [b for b in report.bottlenecks if b.category == cat] + if items: + lines.append(f"## {cat.title()} Bottlenecks") + lines.append(f"") + for b in items: + icon = {"critical": "🔴", "warning": "🟡", "info": "🔵"}.get(b.severity, "⚪") + loc = f" ({b.file_path}:{b.line_number})" if b.file_path else "" + lines.append(f"- {icon} {b.name}{loc} — ~{b.duration_s:.1f}s — {b.recommendation}") + lines.append(f"") + return " ".join(lines) +# ── Main ─────────────────────────────────────────────────────────── + def main(): parser = argparse.ArgumentParser(description="Performance Bottleneck Finder") parser.add_argument("--repo", default=".", help="Path to repository to analyze") parser.add_argument("--json", action="store_true", help="Output as JSON") parser.add_argument("--report", help="Write markdown report to file") - parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S) + parser.add_argument("--threshold", type=float, default=SLOW_TEST_THRESHOLD_S, + help="Slow test threshold in seconds") args = parser.parse_args() global SLOW_TEST_THRESHOLD_S @@ -402,6 +542,7 @@ def main(): else: print(md) + # Exit code: 1 if critical bottlenecks found if report.summary.get("critical", 0) > 0: sys.exit(1) -- 2.43.0