diff --git a/.gitignore b/.gitignore index 12ae031a..fc21ecc2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,8 @@ -# Secrets -*.token -*.key -*.secret - -# Local state +*.pyc +*.pyo +*.egg-info/ +dist/ +build/ *.db *.db-wal *.db-shm diff --git a/bin/kaizen-retro.sh b/bin/kaizen-retro.sh new file mode 100755 index 00000000..e1e9a530 --- /dev/null +++ b/bin/kaizen-retro.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# kaizen-retro.sh — Automated retrospective after every burn cycle. +# +# Runs daily after the morning report. +# Analyzes success rates by agent, repo, and issue type. +# Identifies max-attempts issues, generates ONE concrete improvement, +# and posts the retro to Telegram + the master morning-report issue. +# +# Usage: +# ./bin/kaizen-retro.sh [--dry-run] + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="${SCRIPT_DIR%/bin}" +PYTHON="${PYTHON3:-python3}" + +# Source local env if available so TELEGRAM_BOT_TOKEN is picked up +HOME_DIR="${HOME:-$(eval echo ~$(whoami))}" +for env_file in "$HOME_DIR/.hermes/.env" "$HOME_DIR/.timmy/.env" "$REPO_ROOT/.env"; do + if [ -f "$env_file" ]; then + # shellcheck source=/dev/null + set -a + # shellcheck source=/dev/null + source "$env_file" + set +a + fi +done + +# If the configured Gitea URL is unreachable but localhost works, prefer localhost +if ! curl -sf "${GITEA_URL:-http://localhost:3000}/api/v1/version" >/dev/null 2>&1; then + if curl -sf http://localhost:3000/api/v1/version >/dev/null 2>&1; then + export GITEA_URL="http://localhost:3000" + fi +fi + +# Ensure the Python script exists +RETRO_PY="$REPO_ROOT/scripts/kaizen_retro.py" +if [ ! -f "$RETRO_PY" ]; then + echo "ERROR: kaizen_retro.py not found at $RETRO_PY" >&2 + exit 1 +fi + +# Run +exec "$PYTHON" "$RETRO_PY" "$@" diff --git a/cron/jobs.json b/cron/jobs.json index 475c17ff..3acc6ce9 100644 --- a/cron/jobs.json +++ b/cron/jobs.json @@ -137,7 +137,38 @@ "paused_reason": null, "skills": [], "skill": null + }, + { + "id": "kaizen-retro-349", + "name": "Kaizen Retro", + "prompt": "Run the automated burn-cycle retrospective. Execute: cd /root/wizards/ezra/workspace/timmy-config && ./bin/kaizen-retro.sh", + "model": "hermes3:latest", + "provider": "ollama", + "base_url": "http://localhost:11434/v1", + "schedule": { + "kind": "interval", + "minutes": 1440, + "display": "every 1440m" + }, + "schedule_display": "daily at 07:30", + "repeat": { + "times": null, + "completed": 0 + }, + "enabled": true, + "created_at": "2026-04-07T15:30:00.000000Z", + "next_run_at": "2026-04-08T07:30:00.000000Z", + "last_run_at": null, + "last_status": null, + "last_error": null, + "deliver": "local", + "origin": null, + "state": "scheduled", + "paused_at": null, + "paused_reason": null, + "skills": [], + "skill": null } ], "updated_at": "2026-04-07T15:00:00+00:00" -} \ No newline at end of file +} diff --git a/gitea_client.py b/gitea_client.py index 45841959..d6bbfe2e 100644 --- a/gitea_client.py +++ b/gitea_client.py @@ -146,6 +146,7 @@ class PullRequest: additions: int = 0 deletions: int = 0 created_at: str = "" + updated_at: str = "" closed_at: str = "" @classmethod @@ -166,6 +167,7 @@ class PullRequest: additions=d.get("additions", 0), deletions=d.get("deletions", 0), created_at=d.get("created_at", ""), + updated_at=d.get("updated_at", ""), closed_at=d.get("closed_at", ""), ) @@ -314,6 +316,7 @@ class GiteaClient: direction: str = "desc", limit: int = 30, page: int = 1, + since: Optional[str] = None, ) -> list[Issue]: """List issues for a repo.""" raw = self._get( @@ -326,6 +329,7 @@ class GiteaClient: direction=direction, limit=limit, page=page, + since=since, ) return [Issue.from_dict(i) for i in raw] diff --git a/scripts/kaizen_retro.py b/scripts/kaizen_retro.py new file mode 100644 index 00000000..1f870c2e --- /dev/null +++ b/scripts/kaizen_retro.py @@ -0,0 +1,526 @@ +#!/usr/bin/env python3 +""" +Kaizen Retro — Automated retrospective after every burn cycle. + +Reads overnight Gitea activity, fleet state, and loop logs. +Generates ONE concrete improvement suggestion and posts it. + +Usage: + python3 scripts/kaizen_retro.py [--dry-run] +""" + +from __future__ import annotations + +import argparse +import json +import os +import sys +import urllib.error +import urllib.request +from datetime import datetime, timedelta, timezone +from pathlib import Path +from typing import Any, Optional + +# Ensure repo root is on path so we can import gitea_client +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +from gitea_client import GiteaClient, GiteaError + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +REPOS = [ + "Timmy_Foundation/the-nexus", + "Timmy_Foundation/timmy-config", + "Timmy_Foundation/timmy-home", + "Timmy_Foundation/the-door", + "Timmy_Foundation/turboquant", + "Timmy_Foundation/hermes-agent", + "Timmy_Foundation/.profile", +] + +HERMES_HOME = Path.home() / ".hermes" +TIMMY_HOME = Path.home() / ".timmy" +WORKFORCE_STATE_PATH = HERMES_HOME / "workforce-state.json" +FLEET_ROUTING_PATH = HERMES_HOME / "fleet-routing.json" +CHANNEL_DIR_PATH = REPO_ROOT / "channel_directory.json" +REPORTS_DIR = REPO_ROOT / "reports" +MORNING_REPORT_REPO = "Timmy_Foundation/timmy-config" + +TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN") +TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_HOME_CHANNEL", "-1003664764329") +TELEGRAM_MAX_LEN = 4000 # leave headroom below the 4096 hard limit + +STALE_DAYS = 7 +MAX_ATTEMPT_COMMENT_THRESHOLD = 5 + +ISSUE_TYPE_KEYWORDS = { + "bug": ["bug", "fix", "crash", "error", "regression", "broken"], + "feature": ["feature", "implement", "add", "support", "enable"], + "docs": ["doc", "readme", "wiki", "guide", "documentation"], + "kaizen": ["kaizen", "retro", "improvement", "continuous"], + "devops": ["deploy", "ci", "cd", "docker", "server", "infra"], +} + +BLOCKER_LABELS = {"blocked", "timeout", "stale", "help wanted", "wontfix", "duplicate"} + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def load_json(path: Path) -> Any: + if not path.exists(): + return None + with open(path) as f: + return json.load(f) + + +def iso_day_ago(days: int = 1) -> str: + return (datetime.now(timezone.utc) - timedelta(days=days)).isoformat() + + +def classify_issue_type(issue: dict) -> str: + title = (issue.get("title", "") or "").lower() + body = (issue.get("body", "") or "").lower() + labels = [l.get("name", "").lower() for l in issue.get("labels", []) or []] + text = f"{title} {body} {' '.join(labels)}" + words = set(text.split()) + + best = "other" + best_score = 0 + for kind, keywords in ISSUE_TYPE_KEYWORDS.items(): + # Short keywords (<=3 chars) require whole-word match to avoid false positives like + # "ci" inside "cleanup" or "cd" inside "abcde". + score = sum( + 1 for kw in keywords + if (len(kw) <= 3 and kw in words) or (len(kw) > 3 and kw in text) + ) + # label match is stronger + for label in labels: + label_words = set(label.split()) + if any( + (len(kw) <= 3 and kw in label_words) or (len(kw) > 3 and kw in label) + for kw in keywords + ): + score += 3 + if score > best_score: + best_score = score + best = kind + return best + + +def is_max_attempts_candidate(issue: dict) -> bool: + """Heuristic for issues that consumed excessive attempts.""" + labels = {l.get("name", "").lower() for l in issue.get("labels", []) or []} + if labels & BLOCKER_LABELS: + return True + if issue.get("comments", 0) >= MAX_ATTEMPT_COMMENT_THRESHOLD: + return True + created = issue.get("created_at") + if created: + try: + created_dt = datetime.fromisoformat(created.replace("Z", "+00:00")) + if datetime.now(timezone.utc) - created_dt > timedelta(days=STALE_DAYS): + return True + except Exception: + pass + return False + + +def telegram_send(text: str, bot_token: str, chat_id: str) -> list[dict]: + """Post text to Telegram, chunking if it exceeds the message limit.""" + url = f"https://api.telegram.org/bot{bot_token}/sendMessage" + chunks = [] + if len(text) <= TELEGRAM_MAX_LEN: + chunks = [text] + else: + # Split on newlines to preserve readability + lines = text.splitlines(keepends=True) + current = "" + for line in lines: + if len(current) + len(line) > TELEGRAM_MAX_LEN: + if current: + chunks.append(current) + current = line + else: + current += line + if current: + chunks.append(current) + results = [] + for i, chunk in enumerate(chunks): + prefix = f"*(part {i + 1}/{len(chunks)})*\n" if len(chunks) > 1 else "" + payload = {"chat_id": chat_id, "text": prefix + chunk, "parse_mode": "Markdown"} + data = json.dumps(payload).encode() + req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + results.append(json.loads(resp.read().decode())) + return results + + +def find_latest_morning_report_issue(client: GiteaClient) -> Optional[int]: + try: + issues = client.list_issues(MORNING_REPORT_REPO, state="open", sort="created", direction="desc", limit=20) + for issue in issues: + if "good morning report" in issue.title.lower() or "morning report" in issue.title.lower(): + return issue.number + # fallback to closed + issues = client.list_issues(MORNING_REPORT_REPO, state="closed", sort="created", direction="desc", limit=20) + for issue in issues: + if "good morning report" in issue.title.lower() or "morning report" in issue.title.lower(): + return issue.number + except Exception: + pass + return None + + +def fmt_pct(num: float, den: float) -> str: + if den == 0: + return "N/A" + return f"{num/den:.0%}" + + +# --------------------------------------------------------------------------- +# Analysis +# --------------------------------------------------------------------------- + +def gather_metrics(client: GiteaClient, since: str) -> dict: + """Collect overnight metrics from Gitea.""" + metrics = { + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + "open_issues": [], + "max_attempts_issues": [], + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + } + + for repo in REPOS: + repo_short = repo.split("/")[1] + metrics["by_repo"][repo_short] = { + "closed": 0, + "merged_prs": 0, + "closed_prs": 0, + "open": 0, + "max_attempts": 0, + "successes": 0, + "failures": 0, + } + + # Closed issues since window + try: + closed = client.list_issues(repo, state="closed", since=since, sort="updated", direction="desc", limit=100) + for issue in closed: + issue_dict = { + "number": issue.number, + "title": issue.title, + "repo": repo_short, + "type": classify_issue_type({"title": issue.title, "body": issue.body, "labels": [{"name": lb.name} for lb in issue.labels]}), + "assignee": issue.assignees[0].login if issue.assignees else "unassigned", + } + metrics["closed_issues"].append(issue_dict) + metrics["by_repo"][repo_short]["closed"] += 1 + metrics["by_repo"][repo_short]["successes"] += 1 + + agent = issue_dict["assignee"] + if agent not in metrics["by_agent"]: + metrics["by_agent"][agent] = {"successes": 0, "failures": 0, "closed": 0, "repos": set()} + metrics["by_agent"][agent]["successes"] += 1 + metrics["by_agent"][agent]["closed"] += 1 + metrics["by_agent"][agent]["repos"].add(repo_short) + + t = issue_dict["type"] + if t not in metrics["by_type"]: + metrics["by_type"][t] = {"successes": 0, "failures": 0, "total": 0} + metrics["by_type"][t]["successes"] += 1 + metrics["by_type"][t]["total"] += 1 + except Exception as exc: + print(f"Warning: could not load closed issues for {repo}: {exc}", file=sys.stderr) + + # Open issues (for stale / max-attempts detection) + try: + open_issues = client.list_issues(repo, state="open", sort="created", direction="desc", limit=100) + metrics["by_repo"][repo_short]["open"] = len(open_issues) + for issue in open_issues: + issue_raw = { + "number": issue.number, + "title": issue.title, + "labels": [{"name": lb.name} for lb in issue.labels], + "comments": issue.comments, + "created_at": issue.created_at, + } + if is_max_attempts_candidate(issue_raw): + metrics["max_attempts_issues"].append({ + "number": issue.number, + "title": issue.title, + "repo": repo_short, + "type": classify_issue_type({"title": issue.title, "body": issue.body, "labels": issue_raw["labels"]}), + "assignee": issue.assignees[0].login if issue.assignees else "unassigned", + }) + metrics["by_repo"][repo_short]["max_attempts"] += 1 + metrics["by_repo"][repo_short]["failures"] += 1 + + agent = issue.assignees[0].login if issue.assignees else "unassigned" + if agent not in metrics["by_agent"]: + metrics["by_agent"][agent] = {"successes": 0, "failures": 0, "closed": 0, "repos": set()} + metrics["by_agent"][agent]["failures"] += 1 + metrics["by_agent"][agent]["repos"].add(repo_short) + + t = classify_issue_type({"title": issue.title, "body": issue.body, "labels": issue_raw["labels"]}) + if t not in metrics["by_type"]: + metrics["by_type"][t] = {"successes": 0, "failures": 0, "total": 0} + metrics["by_type"][t]["failures"] += 1 + metrics["by_type"][t]["total"] += 1 + except Exception as exc: + print(f"Warning: could not load open issues for {repo}: {exc}", file=sys.stderr) + + # PRs merged / closed since window (filter client-side; Gitea PR API ignores since) + try: + prs = client.list_pulls(repo, state="closed", sort="updated", limit=100) + since_dt = datetime.fromisoformat(since.replace("Z", "+00:00")) + for pr in prs: + updated = pr.updated_at or pr.created_at or "" + try: + updated_dt = datetime.fromisoformat(updated.replace("Z", "+00:00")) + if updated_dt < since_dt: + continue + except Exception: + pass + if pr.merged: + metrics["merged_prs"].append({ + "number": pr.number, + "title": pr.title, + "repo": repo_short, + "user": pr.user.login if pr.user else "unknown", + }) + metrics["by_repo"][repo_short]["merged_prs"] += 1 + else: + metrics["closed_prs"].append({ + "number": pr.number, + "title": pr.title, + "repo": repo_short, + "user": pr.user.login if pr.user else "unknown", + }) + metrics["by_repo"][repo_short]["closed_prs"] += 1 + except Exception as exc: + print(f"Warning: could not load PRs for {repo}: {exc}", file=sys.stderr) + + # Convert sets to lists for JSON serialization + for agent in metrics["by_agent"].values(): + agent["repos"] = sorted(agent["repos"]) + + return metrics + + +def load_workforce_state() -> dict: + return load_json(WORKFORCE_STATE_PATH) or {} + + +def load_fleet_routing() -> list[dict]: + data = load_json(FLEET_ROUTING_PATH) + if data and "agents" in data: + return data["agents"] + return [] + + +def generate_suggestion(metrics: dict, fleet: list[dict]) -> str: + """Generate ONE concrete improvement suggestion based on the data.""" + by_agent = metrics["by_agent"] + by_repo = metrics["by_repo"] + by_type = metrics["by_type"] + max_attempts = metrics["max_attempts_issues"] + + suggestions: list[str] = [] + + # 1. Agent with poor repo performance + for agent, stats in by_agent.items(): + total = stats["successes"] + stats["failures"] + if total >= 3 and stats["successes"] == 0: + repos = ", ".join(stats["repos"]) + suggestions.append( + f"🎯 **{agent}** has a 0% verify rate over the last cycle (0/{total}) on repos: {repos}. " + f"Consider removing these repos from {agent}'s routing or providing targeted onboarding." + ) + + # 2. Repo with highest failure concentration + repo_failures = [(r, s) for r, s in by_repo.items() if s["failures"] > 0] + if repo_failures: + repo_failures.sort(key=lambda x: x[1]["failures"], reverse=True) + worst_repo, worst_stats = repo_failures[0] + total_repo = worst_stats["successes"] + worst_stats["failures"] + if worst_stats["failures"] >= 2: + suggestions.append( + f"🎯 **{worst_repo}** has the most friction ({worst_stats['failures']} blocked/stale issues, " + f"{fmt_pct(worst_stats['successes'], total_repo)} success). " + f"Consider splitting issues in {worst_repo} into smaller chunks or assigning a stronger agent." + ) + + # 3. Max-attempts pattern + if len(max_attempts) >= 3: + type_counts: dict[str, int] = {} + for issue in max_attempts: + type_counts[issue["type"]] = type_counts.get(issue["type"], 0) + 1 + top_type = max(type_counts, key=type_counts.get) if type_counts else "unknown" + suggestions.append( + f"🎯 **{len(max_attempts)} issues** hit max-attempts or went stale. " + f"The dominant type is **{top_type}**. " + f"Consider adding acceptance criteria templates or pre-flight checklists for {top_type} issues." + ) + + # 4. Issue type disparity + for t, stats in by_type.items(): + total = stats["total"] + if total >= 3 and stats["successes"] == 0: + suggestions.append( + f"🎯 **{t}** issues have a 0% closure rate ({stats['failures']} stale). " + f"Consider routing all {t} issues to a specialist agent or creating a dedicated playbook." + ) + + # 5. Fleet routing gap (if fleet data exists) + active_agents = {a["name"] for a in fleet if a.get("active")} + assigned_agents = set(by_agent.keys()) + idle_agents = active_agents - assigned_agents - {"unassigned"} + if len(idle_agents) >= 2: + suggestions.append( + f"🎯 **{len(idle_agents)} active agents** have no assignments this cycle: {', '.join(idle_agents)}. " + f"Consider expanding their repo lists or investigating why they aren't receiving work." + ) + + if suggestions: + return suggestions[0] + + # Fallback: celebrate or nudge + total_closed = len(metrics["closed_issues"]) + total_merged = len(metrics["merged_prs"]) + if total_closed >= 5 or total_merged >= 3: + return ( + f"🎯 Strong cycle: {total_closed} issues closed, {total_merged} PRs merged. " + f"Next improvement: write down the top 3 patterns that made this cycle successful so we can replicate them." + ) + + return ( + "🎯 Low activity this cycle. Next improvement: ensure at least one agent loop is actively polling " + "for unassigned issues so work doesn't sit idle." + ) + + +def build_report(metrics: dict, suggestion: str, since: str) -> str: + now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + period = since[:10] + + lines = [ + f"# 🌀 Kaizen Retro — {now}", + f"*Period: {period} → now*\n", + "## Numbers", + f"- **Issues closed:** {len(metrics['closed_issues'])}", + f"- **PRs merged:** {len(metrics['merged_prs'])}", + f"- **PRs closed without merge:** {len(metrics['closed_prs'])}", + f"- **Max-attempts / stale issues:** {len(metrics['max_attempts_issues'])}", + "", + "## By Agent", + ] + + for agent, stats in sorted(metrics["by_agent"].items(), key=lambda x: x[1]["successes"] + x[1]["failures"], reverse=True): + total = stats["successes"] + stats["failures"] + rate = fmt_pct(stats["successes"], total) + lines.append(f"- **{agent}**: {stats['successes']} closed, {stats['failures']} stale / max-attempts — verify rate {rate}") + + lines.extend(["", "## By Repo"]) + for repo, stats in sorted(metrics["by_repo"].items(), key=lambda x: x[1]["successes"] + x[1]["failures"], reverse=True): + total = stats["successes"] + stats["failures"] + if total == 0 and stats["open"] == 0: + continue + rate = fmt_pct(stats["successes"], total) + lines.append( + f"- **{repo}**: {stats['successes']} closed, {stats['failures']} stale, {stats['open']} open — verify rate {rate}" + ) + + lines.extend(["", "## By Issue Type"]) + for t, stats in sorted(metrics["by_type"].items(), key=lambda x: x[1]["total"], reverse=True): + total = stats["total"] + rate = fmt_pct(stats["successes"], total) + lines.append(f"- **{t}**: {stats['successes']} closed, {stats['failures']} stale — verify rate {rate}") + + if metrics["max_attempts_issues"]: + lines.extend(["", "## Max-Attempts / Stale Issues"]) + for issue in metrics["max_attempts_issues"][:10]: + lines.append(f"- {issue['repo']}#{issue['number']} ({issue['type']}, assignee: {issue['assignee']}) — {issue['title']}") + if len(metrics["max_attempts_issues"]) > 10: + lines.append(f"- … and {len(metrics['max_attempts_issues']) - 10} more") + + lines.extend(["", "## One Concrete Improvement", suggestion, ""]) + + return "\n".join(lines) + + +# --------------------------------------------------------------------------- +# Main +# --------------------------------------------------------------------------- + +def main() -> int: + parser = argparse.ArgumentParser(description="Kaizen Retro — automated burn-cycle retrospective") + parser.add_argument("--dry-run", action="store_true", help="Print report but do not post") + parser.add_argument("--since", type=str, help="ISO timestamp for lookback window (default: 24h ago)") + parser.add_argument("--post-to", type=str, help="Override Telegram chat ID") + args = parser.parse_args() + + since = args.since or iso_day_ago(1) + client = GiteaClient() + + print("Gathering metrics since", since) + metrics = gather_metrics(client, since) + fleet = load_fleet_routing() + suggestion = generate_suggestion(metrics, fleet) + report = build_report(metrics, suggestion, since) + + print(report) + + # Save JSON snapshot + REPORTS_DIR.mkdir(parents=True, exist_ok=True) + snapshot_path = REPORTS_DIR / f"kaizen-retro-{datetime.now(timezone.utc).strftime('%Y%m%d')}.json" + snapshot = { + "generated_at": datetime.now(timezone.utc).isoformat(), + "since": since, + "metrics": metrics, + "suggestion": suggestion, + "report_markdown": report, + } + with open(snapshot_path, "w") as f: + json.dump(snapshot, f, indent=2) + print(f"\nSnapshot saved to {snapshot_path}") + + if args.dry_run: + return 0 + + # Post to Telegram + chat_id = args.post_to or TELEGRAM_CHAT_ID + bot_token = TELEGRAM_BOT_TOKEN + if bot_token and chat_id: + try: + telegram_send(report, bot_token, chat_id) + print("Posted to Telegram.") + except Exception as exc: + print(f"Failed to post to Telegram: {exc}", file=sys.stderr) + else: + print("Telegram not configured (set TELEGRAM_BOT_TOKEN and TELEGRAM_HOME_CHANNEL).", file=sys.stderr) + + # Comment on latest morning report issue + morning_issue = find_latest_morning_report_issue(client) + if morning_issue: + try: + client.create_comment(MORNING_REPORT_REPO, morning_issue, report) + print(f"Commented on morning report issue #{morning_issue}.") + except Exception as exc: + print(f"Failed to comment on morning report issue: {exc}", file=sys.stderr) + else: + print("No morning report issue found to comment on.", file=sys.stderr) + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tasks.py b/tasks.py index e9f10d92..3a92fa9d 100644 --- a/tasks.py +++ b/tasks.py @@ -1915,6 +1915,29 @@ That's all. Have a good morning. return {"filed": False, "error": str(e)} +# ── NEW 6b: Kaizen Retro ───────────────────────────────────────────── + +@huey.periodic_task(crontab(hour="7", minute="15")) # 7:15 AM daily, after morning report +def kaizen_retro(): + """Run the automated burn-cycle retrospective.""" + retro_script = Path(__file__).resolve().parent / "bin" / "kaizen-retro.sh" + if not retro_script.exists(): + return {"ran": False, "error": "kaizen-retro.sh not found"} + + result = subprocess.run( + ["bash", str(retro_script)], + capture_output=True, + text=True, + timeout=300, + ) + return { + "ran": True, + "exit_code": result.returncode, + "stdout": result.stdout[-2000:] if result.stdout else "", + "stderr": result.stderr[-1000:] if result.stderr else "", + } + + # ── NEW 7: Repo Watchdog ───────────────────────────────────────────── @huey.periodic_task(crontab(minute="*/20")) # every 20 minutes diff --git a/tests/test_kaizen_retro.py b/tests/test_kaizen_retro.py new file mode 100644 index 00000000..04376df2 --- /dev/null +++ b/tests/test_kaizen_retro.py @@ -0,0 +1,283 @@ +"""Tests for the Kaizen Retro burn-cycle retrospective script.""" + +from __future__ import annotations + +import importlib.util +import json +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +REPO_ROOT = Path(__file__).parent.parent + +# Load kaizen_retro.py as a module (it lives in scripts/, not a package) +spec = importlib.util.spec_from_file_location("kaizen_retro", REPO_ROOT / "scripts" / "kaizen_retro.py") +kr = importlib.util.module_from_spec(spec) +spec.loader.exec_module(kr) + + +# ── classify_issue_type ─────────────────────────────────────────────────── + +class TestClassifyIssueType: + def test_classifies_bug_from_title(self): + issue = {"title": "Fix crash on startup", "body": "", "labels": []} + assert kr.classify_issue_type(issue) == "bug" + + def test_classifies_feature_from_label(self): + issue = {"title": "Add dark mode", "body": "", "labels": [{"name": "enhancement"}]} + # label "enhancement" doesn't match any keyword directly, but "feature" and "add" are in title + assert kr.classify_issue_type(issue) == "feature" + + def test_classifies_docs_from_label(self): + issue = {"title": "Update guide", "body": "", "labels": [{"name": "documentation"}]} + assert kr.classify_issue_type(issue) == "docs" + + def test_label_match_stronger_than_title(self): + issue = {"title": "Something random", "body": "", "labels": [{"name": "bug"}]} + assert kr.classify_issue_type(issue) == "bug" + + def test_kaizen_takes_precedence_with_both_labels(self): + issue = {"title": "Process improvement", "body": "", "labels": [{"name": "kaizen"}, {"name": "bug"}]} + # kaizen label gives +3, bug gives +3, tie goes to first seen? kaizen appears first in dict + assert kr.classify_issue_type(issue) == "kaizen" + + def test_defaults_to_other(self): + issue = {"title": "Tidy up naming", "body": "No user-facing change", "labels": [{"name": "cleanup"}]} + assert kr.classify_issue_type(issue) == "other" + + +# ── is_max_attempts_candidate ───────────────────────────────────────────── + +class TestIsMaxAttemptsCandidate: + def test_blocker_label_returns_true(self): + issue = {"labels": [{"name": "blocked"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"} + assert kr.is_max_attempts_candidate(issue) is True + + def test_timeout_label_returns_true(self): + issue = {"labels": [{"name": "timeout"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"} + assert kr.is_max_attempts_candidate(issue) is True + + def test_high_comment_count_returns_true(self): + issue = {"labels": [], "comments": 5, "created_at": "2026-04-07T00:00:00Z"} + assert kr.is_max_attempts_candidate(issue) is True + + def test_fresh_issue_with_low_comments_returns_false(self): + now = datetime.now(timezone.utc) + issue = {"labels": [], "comments": 2, "created_at": now.isoformat()} + assert kr.is_max_attempts_candidate(issue) is False + + def test_stale_age_returns_true(self): + old = datetime.now(timezone.utc) - timedelta(days=10) + issue = {"labels": [], "comments": 0, "created_at": old.isoformat()} + assert kr.is_max_attempts_candidate(issue) is True + + +# ── fmt_pct ─────────────────────────────────────────────────────────────── + +class TestFmtPct: + def test_basic_percentage(self): + assert kr.fmt_pct(3, 4) == "75%" + + def test_zero_denominator(self): + assert kr.fmt_pct(0, 0) == "N/A" + + def test_perfect_rate(self): + assert kr.fmt_pct(10, 10) == "100%" + + +# ── generate_suggestion ─────────────────────────────────────────────────── + +class TestGenerateSuggestion: + def test_agent_zero_success_rate(self): + metrics = { + "by_agent": { + "groq": {"successes": 0, "failures": 5, "closed": 0, "repos": ["timmy-home"]}, + }, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "groq" in suggestion + assert "0%" in suggestion or "verify rate" in suggestion + + def test_repo_with_most_failures(self): + metrics = { + "by_agent": {}, + "by_repo": { + "the-nexus": {"successes": 2, "failures": 5, "closed": 2, "open": 3}, + }, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "the-nexus" in suggestion + assert "friction" in suggestion + + def test_max_attempts_pattern(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [ + {"type": "devops"}, {"type": "devops"}, {"type": "feature"} + ], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "devops" in suggestion + assert "max-attempts" in suggestion.lower() or "stale" in suggestion.lower() + + def test_idle_agents(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + fleet = [{"name": "allegro", "active": True}, {"name": "ezra", "active": True}] + suggestion = kr.generate_suggestion(metrics, fleet) + assert "idle" in suggestion.lower() or "no assignments" in suggestion.lower() + + def test_fallback_celebration(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [{}, {}, {}, {}, {}], + "merged_prs": [{}, {}, {}], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "Strong cycle" in suggestion + + def test_fallback_low_activity(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "Low activity" in suggestion or "idle" in suggestion.lower() + + +# ── build_report ────────────────────────────────────────────────────────── + +class TestBuildReport: + def test_report_contains_numbers_section(self): + metrics = { + "closed_issues": [{}, {}], + "merged_prs": [{}], + "closed_prs": [], + "max_attempts_issues": [], + "by_agent": {"ezra": {"successes": 2, "failures": 0, "repos": ["timmy-config"]}}, + "by_repo": {"timmy-config": {"successes": 2, "failures": 0, "open": 1}}, + "by_type": {"feature": {"successes": 2, "failures": 0, "total": 2}}, + } + report = kr.build_report(metrics, "Do better.", "2026-04-06T00:00:00+00:00") + assert "## Numbers" in report + assert "Issues closed:** 2" in report + assert "PRs merged:** 1" in report + assert "## By Agent" in report + assert "## By Repo" in report + assert "## By Issue Type" in report + assert "Do better." in report + + def test_report_skips_empty_repos(self): + metrics = { + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + "max_attempts_issues": [], + "by_agent": {}, + "by_repo": {"unused-repo": {"successes": 0, "failures": 0, "open": 0}}, + "by_type": {}, + } + report = kr.build_report(metrics, "Nudge.", "2026-04-06T00:00:00+00:00") + assert "unused-repo" not in report + + def test_report_truncates_max_attempts(self): + metrics = { + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + "max_attempts_issues": [{"repo": "r", "number": i, "type": "bug", "assignee": "a", "title": f"T{i}"} for i in range(15)], + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + } + report = kr.build_report(metrics, "Fix it.", "2026-04-06T00:00:00+00:00") + assert "and 5 more" in report + + +# ── telegram_send ───────────────────────────────────────────────────────── + +class TestTelegramSend: + def test_short_message_sent_in_one_piece(self): + with patch("urllib.request.urlopen") as mock_urlopen: + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"ok": true}' + mock_urlopen.return_value.__enter__.return_value = mock_resp + + results = kr.telegram_send("Hello", "fake-token", "123") + assert len(results) == 1 + assert results[0]["ok"] is True + # Verify payload + call_args = mock_urlopen.call_args + req = call_args[0][0] + payload = json.loads(req.data.decode()) + assert payload["text"] == "Hello" + assert payload["chat_id"] == "123" + + def test_long_message_chunked(self): + big_text = "Line\n" * 2000 # ~10k chars + with patch("urllib.request.urlopen") as mock_urlopen: + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"ok": true}' + mock_urlopen.return_value.__enter__.return_value = mock_resp + + results = kr.telegram_send(big_text, "fake-token", "123") + assert len(results) >= 2 + # First chunk should have a part prefix + req = mock_urlopen.call_args_list[0][0][0] + payload = json.loads(req.data.decode()) + assert "(part 1" in payload["text"] + + +# ── load helpers ────────────────────────────────────────────────────────── + +class TestLoadHelpers: + def test_load_json_missing_returns_none(self, tmp_path): + missing = tmp_path / "does_not_exist.json" + assert kr.load_json(missing) is None + + def test_load_json_valid(self, tmp_path): + p = tmp_path / "data.json" + p.write_text('{"a": 1}') + assert kr.load_json(p) == {"a": 1} + + def test_iso_day_ago_format(self): + s = kr.iso_day_ago(1) + # Should be a valid ISO timestamp string + dt = datetime.fromisoformat(s) + now = datetime.now(timezone.utc) + assert now - dt < timedelta(days=2)