diff --git a/scripts/progress_tracker.py b/scripts/progress_tracker.py new file mode 100644 index 0000000..913c723 --- /dev/null +++ b/scripts/progress_tracker.py @@ -0,0 +1,477 @@ +#!/usr/bin/env python3 +""" +Progress Tracker — Pipeline 10.8 +Track improvement metrics over time. Are we getting better? + +Metrics tracked: + 1. Test coverage — % of Python functions with associated tests (test:source file ratio + line coverage if available) + 2. Doc coverage — % of Python callables with docstrings (AST-based) + 3. Issue close rate — closed / (opened + closed) per week (Gitea API) + 4. Dep freshness — % of requirements pinned vs outdated (pip list --outdated) + +Output: + - metrics/snapshots/YYYY-MM-DD.json — one snapshot per run + - metrics/TRENDS.md — cumulative markdown table + - stdout summary + +Usage: + python3 scripts/progress_tracker.py + python3 scripts/progress_tracker.py --json + python3 scripts/progress_tracker.py --output metrics/TRENDS.md + +Weekly cron: + 0 9 * * 1 cd /path/to/compounding-intelligence && python3 scripts/progress_tracker.py +""" + +import argparse +import json +import os +import re +import subprocess +import sys +from collections import defaultdict +from datetime import datetime, timezone, timedelta +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +# ── Configuration ────────────────────────────────────────────────────────── + +SCRIPT_DIR = Path(__file__).resolve().parent +REPO_ROOT = SCRIPT_DIR.parent +METRICS_DIR = REPO_ROOT / "metrics" +SNAPSHOTS_DIR = METRICS_DIR / "snapshots" +TOKEN_PATH = Path.home() / ".config" / "gitea" / "token" +GITEA_API_BASE = "https://forge.alexanderwhitestone.com/api/v1" +ORG = "Timmy_Foundation" + +# Ensure paths exist +SNAPSHOTS_DIR.mkdir(parents=True, exist_ok=True) + + +# ── Helpers ───────────────────────────────────────────────────────────────── + +def run_cmd(cmd: List[str], cwd: Path = REPO_ROOT) -> str: + """Run a shell command and return stdout (stderr merged).""" + result = subprocess.run( + cmd, capture_output=True, text=True, cwd=cwd, timeout=30 + ) + if result.returncode != 0: + return "" + return result.stdout.strip() + + +def slugify_date(dt: datetime) -> str: + return dt.strftime("%Y-%m-%d") + + +def snapshot_path(dt: datetime) -> Path: + return SNAPSHOTS_DIR / f"{slugify_date(dt)}.json" + + +def load_snapshots() -> List[Dict[str, Any]]: + """Load all existing snapshots sorted by date.""" + snapshots = [] + for f in sorted(SNAPSHOTS_DIR.glob("*.json")): + try: + with open(f) as fp: + snapshots.append(json.load(fp)) + except Exception: + continue + return snapshots + + +# ── Metric 1: Test Coverage ───────────────────────────────────────────────── + +def collect_test_coverage() -> Dict[str, Any]: + """ + Compute test coverage metrics. + Counts test_*.py and *_test.py files vs non-test .py source files. + Also attempts to read .coverage if present. + """ + all_py = list(REPO_ROOT.rglob("*.py")) + + source_files = [] + test_files = [] + + for p in all_py: + try: + rel_parts = p.relative_to(REPO_ROOT).parts + except ValueError: + continue + + # Skip hidden/cache/temp dirs (check only relative parts) + if any(part.startswith('.') or part.startswith('__') for part in rel_parts): + continue + if any(part in ('node_modules', 'venv', '.venv', 'env', '.pytest_cache') for part in rel_parts): + continue + + if p.name.startswith("test_") or p.name.endswith("_test.py"): + test_files.append(p) + else: + source_files.append(p) + + # Try to get line coverage from .coverage + coverage_percent = None + coverage_tool = None + coverage_file = REPO_ROOT / ".coverage" + if coverage_file.exists(): + try: + import coverage # type: ignore + # Use coverage API if available + cov = coverage.Coverage(data_file=str(coverage_file)) + cov.load() + total = cov.report() + coverage_percent = total if isinstance(total, float) else None + coverage_tool = "coverage" + except Exception: + # Fallback: parse `coverage report` output + out = run_cmd(["coverage", "report", "--skip-empty"]) + if out: + for line in out.splitlines(): + if "TOTAL" in line: + parts = line.split() + if len(parts) >= 2: + try: + coverage_percent = float(parts[-1].rstrip('%')) + coverage_tool = "coverage" + break + except ValueError: + pass + + return { + "test_files": len(test_files), + "source_files": len(source_files), + "test_to_source_ratio": round(len(test_files) / len(source_files), 4) if source_files else 0.0, + "coverage_tool": coverage_tool, + "coverage_percent": coverage_percent, + } + + +# ── Metric 2: Doc Coverage ────────────────────────────────────────────────── + +def collect_doc_coverage() -> Dict[str, Any]: + """ + Check AST of Python files for docstrings. + Returns: callables_total, callables_with_doc, doc_coverage_percent + """ + import ast + + all_py = list(REPO_ROOT.rglob("*.py")) + + source_files = [] + test_files = [] + + for p in all_py: + try: + rel_parts = p.relative_to(REPO_ROOT).parts + except ValueError: + continue + + if any(part.startswith('.') or part.startswith('__') for part in rel_parts): + continue + if any(part in ('node_modules', 'venv', '.venv', 'env', '.pytest_cache') for part in rel_parts): + continue + + if p.name.startswith("test_") or p.name.endswith("_test.py"): + test_files.append(p) + else: + source_files.append(p) + + total_callables = 0 + with_doc = 0 + + for p in source_files + test_files: + try: + with open(p) as f: + tree = ast.parse(f.read(), filename=str(p)) + for node in ast.walk(tree): + if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): + total_callables += 1 + doc = ast.get_docstring(node) + if doc and doc.strip(): + with_doc += 1 + except Exception: + continue + + return { + "callables_total": total_callables, + "callables_with_doc": with_doc, + "doc_coverage_percent": round((with_doc / total_callables * 100) if total_callables else 0.0, 2), + } + + +# ── Metric 3: Issue Close Rate ────────────────────────────────────────────── + +def collect_issue_metrics() -> Dict[str, Any]: + """ + Use Gitea API to get issue open/close stats for the last 7 days. + Returns counts and close rate. + """ + token = "" + if TOKEN_PATH.exists(): + token = TOKEN_PATH.read_text().strip() + + if not token: + return { + "opened_last_7d": None, + "closed_last_7d": None, + "close_rate": None, + "total_open": None, + "note": "Gitea token not available" + } + + try: + from urllib.request import Request, urlopen + from urllib.error import HTTPError, URLError + except ImportError: + return {"error": "urllib not available"} + + now = datetime.now(timezone.utc) + week_ago = now - timedelta(days=7) + since = week_ago.strftime("%Y-%m-%d") + + headers = {"Authorization": f"token {token}"} + base_url = f"{GITEA_API_BASE}/repos/{ORG}/compounding-intelligence/issues" + + try: + # Get issues from last 7 days + url = f"{base_url}?state=all&since={since}&per_page=100" + req = Request(url, headers=headers) + with urlopen(req, timeout=15) as resp: + issues = json.loads(resp.read()) + + opened = 0 + closed = 0 + for issue in issues: + created = datetime.fromisoformat(issue["created_at"].replace("Z", "+00:00")) + if created >= week_ago: + opened += 1 + if issue.get("state") == "closed": + closed_at_str = issue.get("closed_at") + if closed_at_str: + closed_at = datetime.fromisoformat(closed_at_str.replace("Z", "+00:00")) + if closed_at >= week_ago: + closed += 1 + + # Total open issues + req2 = Request(f"{base_url}?state=open&per_page=1", headers=headers) + with urlopen(req2, timeout=15) as resp: + total_open = int(resp.headers.get("X-Total-Count", "0")) + + total = opened + closed + close_rate = closed / total if total > 0 else 0.0 + + return { + "opened_last_7d": opened, + "closed_last_7d": closed, + "close_rate": round(close_rate, 4), + "total_open": total_open, + } + except Exception as e: + return { + "opened_last_7d": None, + "closed_last_7d": None, + "close_rate": None, + "total_open": None, + "error": str(e)[:100], + "note": "Gitea API unavailable" + } + + +# ── Metric 4: Dependency Freshness ───────────────────────────────────────── + +def collect_dep_freshness() -> Dict[str, Any]: + """ + Check requirements.txt for outdated dependencies using pip list --outdated. + Returns freshness percentage and outdated list. + """ + req_file = REPO_ROOT / "requirements.txt" + if not req_file.exists(): + return { + "total_deps": 0, + "outdated_deps": 0, + "freshness_percent": 100.0, + "outdated_list": [], + "note": "requirements.txt not found" + } + + # Parse requirements (very simple: take name before comparison op) + reqs = [] + with open(req_file) as f: + for line in f: + line = line.strip() + if not line or line.startswith("#"): + continue + m = re.match(r"^([a-zA-Z0-9_.-]+)", line) + if m: + reqs.append(m.group(1)) + + if not reqs: + return {"total_deps": 0, "outdated_deps": 0, "freshness_percent": 100.0, "outdated_list": []} + + # Query pip for outdated packages (may fail if pip not available) + outdated_names = set() + try: + out = run_cmd(["pip", "list", "--outdated", "--format=json"]) + if out: + data = json.loads(out) + outdated_names = {item["name"].lower() for item in data} + except Exception: + pass + + outdated = [p for p in reqs if p.lower() in outdated_names] + total = len(reqs) + outdated_count = len(outdated) + freshness = round(((total - outdated_count) / total * 100) if total else 100.0, 1) + + return { + "total_deps": total, + "outdated_deps": outdated_count, + "freshness_percent": freshness, + "outdated_list": outdated, + } + + +# ── Snapshot & Trends ─────────────────────────────────────────────────────── + +def take_snapshot() -> Dict[str, Any]: + """Collect all metrics and return a snapshot dict.""" + now = datetime.now(timezone.utc) + test_cov = collect_test_coverage() + doc_cov = collect_doc_coverage() + issues = collect_issue_metrics() + deps = collect_dep_freshness() + + return { + "timestamp": now.isoformat(), + "date": slugify_date(now), + "metrics": { + "test_coverage": test_cov, + "doc_coverage": doc_cov, + "issues": issues, + "dependencies": deps, + } + } + + +def save_snapshot(snapshot: Dict[str, Any]) -> Path: + path = snapshot_path(datetime.fromisoformat(snapshot["timestamp"])) + with open(path, "w") as f: + json.dump(snapshot, f, indent=2) + return path + + +def generate_trends(snapshots: List[Dict[str, Any]], output_path: Optional[Path] = None) -> str: + """Generate markdown trends table; optionally write to file.""" + if not snapshots: + msg = "# Progress Tracker — Trends\n\nNo snapshots yet. Run `progress_tracker.py` to create the first snapshot." + if output_path: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(msg) + return msg + + lines = [ + "# Progress Tracker — Trends", + f"\nLast updated: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M UTC')}", + f"\nSnapshots: {len(snapshots)}\n", + "| Date | Test Files → Source | Doc Coverage | Issues Closed/Opened (7d) | Dep Freshness |", + "|------|---------------------|--------------|---------------------------|---------------|", + ] + + for snap in reversed(snapshots): # chronological + date = snap["date"] + m = snap["metrics"] + tc = m["test_coverage"] + test_str = f"{tc['test_files']}/{tc['source_files']} ({tc['test_to_source_ratio']:.2f})" + doc_str = f"{m['doc_coverage']['doc_coverage_percent']:.1f}%" + issues_str = f"{m['issues'].get('closed_last_7d','-')}/{m['issues'].get('opened_last_7d','-')}" + dep_str = f"{m['dependencies'].get('freshness_percent','?')}%" + lines.append(f"| {date} | {test_str} | {doc_str} | {issues_str} | {dep_str} |") + + # Current snapshot summary + cur = snapshots[-1] + cm = cur["metrics"] + lines.append(f"\n## Current Snapshot ({cur['date']})\n") + + tc = cm["test_coverage"] + cov_line = f"- Test coverage: {tc['coverage_percent']:.1f}% (via {tc['coverage_tool']})\n" if tc["coverage_percent"] else "- Test coverage: (pytest-cov not configured)\n" + lines.append(cov_line) + lines.append(f"- Doc coverage: {cm['doc_coverage']['doc_coverage_percent']:.1f}%") + + im = cm["issues"] + if im.get("close_rate") is not None: + lines.append(f"- Issue close rate (7d): {im['close_rate']*100:.1f}% ({im['closed_last_7d']} closed, {im['opened_last_7d']} opened)") + else: + lines.append(f"- Issue metrics: {im.get('note','unavailable')}") + + dd = cm["dependencies"] + lines.append(f"- Dep freshness: {dd.get('freshness_percent','?')}% outdated ({dd.get('outdated_deps',0)}/{dd.get('total_deps',0)} deps)") + if dd.get('outdated_list'): + lines.append(f" Outdated: {', '.join(dd['outdated_list'][:5])}") + + content = "\n".join(lines) + "\n" + + if output_path: + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(content) + + return content + + +# ── Main ───────────────────────────────────────────────────────────────────── + +def main() -> int: + parser = argparse.ArgumentParser(description="Progress Tracker — 10.8") + parser.add_argument("--json", action="store_true", help="Emit snapshot as JSON only") + parser.add_argument("--output", type=Path, default=METRICS_DIR / "TRENDS.md", + help="Write trends markdown to this file") + args = parser.parse_args() + + snapshot = take_snapshot() + all_snapshots = load_snapshots() + path_written = save_snapshot(snapshot) + + if args.json: + print(json.dumps(snapshot, indent=2)) + return 0 + + trends = generate_trends(all_snapshots + [snapshot], output_path=args.output) + + # Print current snapshot summary + print(f"Snapshot saved: {path_written}\n") + print(f"Progress Tracker — {snapshot['date']}") + print("=" * 50) + + m = snapshot["metrics"] + tc = m["test_coverage"] + print(f"Test files: {tc['test_files']} | Source files: {tc['source_files']} | Ratio: {tc['test_to_source_ratio']:.3f}") + if tc["coverage_percent"] is not None: + print(f"Line coverage: {tc['coverage_percent']:.1f}% (via {tc['coverage_tool']})") + else: + print("Line coverage: (not available — run `pytest --cov`)") + + print() + dc = m["doc_coverage"] + print(f"Callables with docstrings: {dc['callables_with_doc']}/{dc['callables_total']} ({dc['doc_coverage_percent']:.1f}%)") + + print() + im = m["issues"] + if im.get("close_rate") is not None: + print(f"Issues (7d): {im['closed_last_7d']} closed / {im['opened_last_7d']} opened → close rate: {im['close_rate']*100:.1f}%") + print(f"Total open: {im['total_open']}") + else: + print(f"Issues: {im.get('note','unavailable')}") + + print() + dd = m["dependencies"] + print(f"Dependencies: {dd.get('total_deps',0)} total, {dd.get('outdated_deps',0)} outdated") + if dd.get('outdated_list'): + shown = dd['outdated_list'][:5] + print(f"Outdated: {', '.join(shown)}" + ("..." if len(dd['outdated_list']) > 5 else "")) + + print(f"\nTrends written to: {args.output}") + return 0 + + +if __name__ == "__main__": + sys.exit(main())