[KAIZEN] Implement automated burn-cycle retrospective (fixes #349)

- Add bin/kaizen-retro.sh entry point and scripts/kaizen_retro.py
- Analyze closed issues, merged PRs, and stale/max-attempts issues
- Report success rates by agent, repo, and issue type
- Generate one concrete improvement suggestion per cycle
- Post retro to Telegram and comment on the latest morning report issue
- Wire into Huey as kaizen_retro() task at 07:15 daily
- Extend gitea_client.py with since param for list_issues and
  created_at/updated_at fields on PullRequest
This commit is contained in:
Ezra
2026-04-07 15:27:16 +00:00
committed by ezra
parent 2f6971902b
commit f18955ea90
5 changed files with 570 additions and 6 deletions

11
.gitignore vendored
View File

@@ -1,9 +1,8 @@
# Secrets
*.token
*.key
*.secret
# Local state
*.pyc
*.pyo
*.egg-info/
dist/
build/
*.db
*.db-wal
*.db-shm

45
bin/kaizen-retro.sh Executable file
View File

@@ -0,0 +1,45 @@
#!/usr/bin/env bash
# kaizen-retro.sh — Automated retrospective after every burn cycle.
#
# Runs daily after the morning report.
# Analyzes success rates by agent, repo, and issue type.
# Identifies max-attempts issues, generates ONE concrete improvement,
# and posts the retro to Telegram + the master morning-report issue.
#
# Usage:
# ./bin/kaizen-retro.sh [--dry-run]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="${SCRIPT_DIR%/bin}"
PYTHON="${PYTHON3:-python3}"
# Source local env if available so TELEGRAM_BOT_TOKEN is picked up
HOME_DIR="${HOME:-$(eval echo ~$(whoami))}"
for env_file in "$HOME_DIR/.hermes/.env" "$HOME_DIR/.timmy/.env" "$REPO_ROOT/.env"; do
if [ -f "$env_file" ]; then
# shellcheck source=/dev/null
set -a
# shellcheck source=/dev/null
source "$env_file"
set +a
fi
done
# If the configured Gitea URL is unreachable but localhost works, prefer localhost
if ! curl -sf "${GITEA_URL:-http://localhost:3000}/api/v1/version" >/dev/null 2>&1; then
if curl -sf http://localhost:3000/api/v1/version >/dev/null 2>&1; then
export GITEA_URL="http://localhost:3000"
fi
fi
# Ensure the Python script exists
RETRO_PY="$REPO_ROOT/scripts/kaizen_retro.py"
if [ ! -f "$RETRO_PY" ]; then
echo "ERROR: kaizen_retro.py not found at $RETRO_PY" >&2
exit 1
fi
# Run
exec "$PYTHON" "$RETRO_PY" "$@"

View File

@@ -146,6 +146,7 @@ class PullRequest:
additions: int = 0
deletions: int = 0
created_at: str = ""
updated_at: str = ""
closed_at: str = ""
@classmethod
@@ -166,6 +167,7 @@ class PullRequest:
additions=d.get("additions", 0),
deletions=d.get("deletions", 0),
created_at=d.get("created_at", ""),
updated_at=d.get("updated_at", ""),
closed_at=d.get("closed_at", ""),
)
@@ -314,6 +316,7 @@ class GiteaClient:
direction: str = "desc",
limit: int = 30,
page: int = 1,
since: Optional[str] = None,
) -> list[Issue]:
"""List issues for a repo."""
raw = self._get(
@@ -326,6 +329,7 @@ class GiteaClient:
direction=direction,
limit=limit,
page=page,
since=since,
)
return [Issue.from_dict(i) for i in raw]

493
scripts/kaizen_retro.py Normal file
View File

@@ -0,0 +1,493 @@
#!/usr/bin/env python3
"""
Kaizen Retro — Automated retrospective after every burn cycle.
Reads overnight Gitea activity, fleet state, and loop logs.
Generates ONE concrete improvement suggestion and posts it.
Usage:
python3 scripts/kaizen_retro.py [--dry-run]
"""
from __future__ import annotations
import argparse
import json
import os
import sys
import urllib.error
import urllib.request
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Optional
# Ensure repo root is on path so we can import gitea_client
REPO_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(REPO_ROOT))
from gitea_client import GiteaClient, GiteaError
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
REPOS = [
"Timmy_Foundation/the-nexus",
"Timmy_Foundation/timmy-config",
"Timmy_Foundation/timmy-home",
"Timmy_Foundation/the-door",
"Timmy_Foundation/turboquant",
"Timmy_Foundation/hermes-agent",
"Timmy_Foundation/.profile",
]
HERMES_HOME = Path.home() / ".hermes"
TIMMY_HOME = Path.home() / ".timmy"
WORKFORCE_STATE_PATH = HERMES_HOME / "workforce-state.json"
FLEET_ROUTING_PATH = HERMES_HOME / "fleet-routing.json"
CHANNEL_DIR_PATH = REPO_ROOT / "channel_directory.json"
REPORTS_DIR = REPO_ROOT / "reports"
MORNING_REPORT_REPO = "Timmy_Foundation/timmy-config"
TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN")
TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_HOME_CHANNEL", "-1003664764329")
STALE_DAYS = 7
MAX_ATTEMPT_COMMENT_THRESHOLD = 5
ISSUE_TYPE_KEYWORDS = {
"bug": ["bug", "fix", "crash", "error", "regression", "broken"],
"feature": ["feature", "implement", "add", "support", "enable"],
"docs": ["doc", "readme", "wiki", "guide", "documentation"],
"kaizen": ["kaizen", "retro", "improvement", "continuous"],
"devops": ["deploy", "ci", "cd", "docker", "server", "infra"],
}
BLOCKER_LABELS = {"blocked", "timeout", "stale", "help wanted", "wontfix", "duplicate"}
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def load_json(path: Path) -> Any:
if not path.exists():
return None
with open(path) as f:
return json.load(f)
def iso_day_ago(days: int = 1) -> str:
return (datetime.now(timezone.utc) - timedelta(days=days)).isoformat()
def classify_issue_type(issue: dict) -> str:
title = (issue.get("title", "") or "").lower()
body = (issue.get("body", "") or "").lower()
labels = [l.get("name", "").lower() for l in issue.get("labels", []) or []]
text = f"{title} {body} {' '.join(labels)}"
best = "other"
best_score = 0
for kind, keywords in ISSUE_TYPE_KEYWORDS.items():
score = sum(1 for kw in keywords if kw in text)
# label match is stronger
for label in labels:
if any(kw in label for kw in keywords):
score += 3
if score > best_score:
best_score = score
best = kind
return best
def is_max_attempts_candidate(issue: dict) -> bool:
"""Heuristic for issues that consumed excessive attempts."""
labels = {l.get("name", "").lower() for l in issue.get("labels", []) or []}
if labels & BLOCKER_LABELS:
return True
if issue.get("comments", 0) >= MAX_ATTEMPT_COMMENT_THRESHOLD:
return True
created = issue.get("created_at")
if created:
try:
created_dt = datetime.fromisoformat(created.replace("Z", "+00:00"))
if datetime.now(timezone.utc) - created_dt > timedelta(days=STALE_DAYS):
return True
except Exception:
pass
return False
def telegram_send(text: str, bot_token: str, chat_id: str) -> dict:
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
data = json.dumps({"chat_id": chat_id, "text": text, "parse_mode": "Markdown"}).encode()
req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode())
def find_latest_morning_report_issue(client: GiteaClient) -> Optional[int]:
try:
issues = client.list_issues(MORNING_REPORT_REPO, state="open", sort="created", direction="desc", limit=20)
for issue in issues:
if "good morning report" in issue.title.lower() or "morning report" in issue.title.lower():
return issue.number
# fallback to closed
issues = client.list_issues(MORNING_REPORT_REPO, state="closed", sort="created", direction="desc", limit=20)
for issue in issues:
if "good morning report" in issue.title.lower() or "morning report" in issue.title.lower():
return issue.number
except Exception:
pass
return None
def fmt_pct(num: float, den: float) -> str:
if den == 0:
return "N/A"
return f"{num/den:.0%}"
# ---------------------------------------------------------------------------
# Analysis
# ---------------------------------------------------------------------------
def gather_metrics(client: GiteaClient, since: str) -> dict:
"""Collect overnight metrics from Gitea."""
metrics = {
"closed_issues": [],
"merged_prs": [],
"closed_prs": [],
"open_issues": [],
"max_attempts_issues": [],
"by_agent": {},
"by_repo": {},
"by_type": {},
}
for repo in REPOS:
repo_short = repo.split("/")[1]
metrics["by_repo"][repo_short] = {
"closed": 0,
"merged_prs": 0,
"closed_prs": 0,
"open": 0,
"max_attempts": 0,
"successes": 0,
"failures": 0,
}
# Closed issues since window
try:
closed = client.list_issues(repo, state="closed", since=since, sort="updated", direction="desc", limit=100)
for issue in closed:
issue_dict = {
"number": issue.number,
"title": issue.title,
"repo": repo_short,
"type": classify_issue_type({"title": issue.title, "body": issue.body, "labels": [{"name": lb.name} for lb in issue.labels]}),
"assignee": issue.assignees[0].login if issue.assignees else "unassigned",
}
metrics["closed_issues"].append(issue_dict)
metrics["by_repo"][repo_short]["closed"] += 1
metrics["by_repo"][repo_short]["successes"] += 1
agent = issue_dict["assignee"]
if agent not in metrics["by_agent"]:
metrics["by_agent"][agent] = {"successes": 0, "failures": 0, "closed": 0, "repos": set()}
metrics["by_agent"][agent]["successes"] += 1
metrics["by_agent"][agent]["closed"] += 1
metrics["by_agent"][agent]["repos"].add(repo_short)
t = issue_dict["type"]
if t not in metrics["by_type"]:
metrics["by_type"][t] = {"successes": 0, "failures": 0, "total": 0}
metrics["by_type"][t]["successes"] += 1
metrics["by_type"][t]["total"] += 1
except Exception as exc:
print(f"Warning: could not load closed issues for {repo}: {exc}", file=sys.stderr)
# Open issues (for stale / max-attempts detection)
try:
open_issues = client.list_issues(repo, state="open", sort="created", direction="desc", limit=100)
metrics["by_repo"][repo_short]["open"] = len(open_issues)
for issue in open_issues:
issue_raw = {
"number": issue.number,
"title": issue.title,
"labels": [{"name": lb.name} for lb in issue.labels],
"comments": issue.comments,
"created_at": issue.created_at,
}
if is_max_attempts_candidate(issue_raw):
metrics["max_attempts_issues"].append({
"number": issue.number,
"title": issue.title,
"repo": repo_short,
"type": classify_issue_type({"title": issue.title, "body": issue.body, "labels": issue_raw["labels"]}),
"assignee": issue.assignees[0].login if issue.assignees else "unassigned",
})
metrics["by_repo"][repo_short]["max_attempts"] += 1
metrics["by_repo"][repo_short]["failures"] += 1
agent = issue.assignees[0].login if issue.assignees else "unassigned"
if agent not in metrics["by_agent"]:
metrics["by_agent"][agent] = {"successes": 0, "failures": 0, "closed": 0, "repos": set()}
metrics["by_agent"][agent]["failures"] += 1
metrics["by_agent"][agent]["repos"].add(repo_short)
t = classify_issue_type({"title": issue.title, "body": issue.body, "labels": issue_raw["labels"]})
if t not in metrics["by_type"]:
metrics["by_type"][t] = {"successes": 0, "failures": 0, "total": 0}
metrics["by_type"][t]["failures"] += 1
metrics["by_type"][t]["total"] += 1
except Exception as exc:
print(f"Warning: could not load open issues for {repo}: {exc}", file=sys.stderr)
# PRs merged / closed since window (filter client-side; Gitea PR API ignores since)
try:
prs = client.list_pulls(repo, state="closed", sort="updated", limit=100)
since_dt = datetime.fromisoformat(since.replace("Z", "+00:00"))
for pr in prs:
updated = pr.updated_at or pr.created_at or ""
try:
updated_dt = datetime.fromisoformat(updated.replace("Z", "+00:00"))
if updated_dt < since_dt:
continue
except Exception:
pass
if pr.merged:
metrics["merged_prs"].append({
"number": pr.number,
"title": pr.title,
"repo": repo_short,
"user": pr.user.login if pr.user else "unknown",
})
metrics["by_repo"][repo_short]["merged_prs"] += 1
else:
metrics["closed_prs"].append({
"number": pr.number,
"title": pr.title,
"repo": repo_short,
"user": pr.user.login if pr.user else "unknown",
})
metrics["by_repo"][repo_short]["closed_prs"] += 1
except Exception as exc:
print(f"Warning: could not load PRs for {repo}: {exc}", file=sys.stderr)
# Convert sets to lists for JSON serialization
for agent in metrics["by_agent"].values():
agent["repos"] = sorted(agent["repos"])
return metrics
def load_workforce_state() -> dict:
return load_json(WORKFORCE_STATE_PATH) or {}
def load_fleet_routing() -> list[dict]:
data = load_json(FLEET_ROUTING_PATH)
if data and "agents" in data:
return data["agents"]
return []
def generate_suggestion(metrics: dict, fleet: list[dict]) -> str:
"""Generate ONE concrete improvement suggestion based on the data."""
by_agent = metrics["by_agent"]
by_repo = metrics["by_repo"]
by_type = metrics["by_type"]
max_attempts = metrics["max_attempts_issues"]
suggestions: list[str] = []
# 1. Agent with poor repo performance
for agent, stats in by_agent.items():
total = stats["successes"] + stats["failures"]
if total >= 3 and stats["successes"] == 0:
repos = ", ".join(stats["repos"])
suggestions.append(
f"🎯 **{agent}** has a 0% verify rate over the last cycle (0/{total}) on repos: {repos}. "
f"Consider removing these repos from {agent}'s routing or providing targeted onboarding."
)
# 2. Repo with highest failure concentration
repo_failures = [(r, s) for r, s in by_repo.items() if s["failures"] > 0]
if repo_failures:
repo_failures.sort(key=lambda x: x[1]["failures"], reverse=True)
worst_repo, worst_stats = repo_failures[0]
total_repo = worst_stats["successes"] + worst_stats["failures"]
if worst_stats["failures"] >= 2:
suggestions.append(
f"🎯 **{worst_repo}** has the most friction ({worst_stats['failures']} blocked/stale issues, "
f"{fmt_pct(worst_stats['successes'], total_repo)} success). "
f"Consider splitting issues in {worst_repo} into smaller chunks or assigning a stronger agent."
)
# 3. Max-attempts pattern
if len(max_attempts) >= 3:
type_counts: dict[str, int] = {}
for issue in max_attempts:
type_counts[issue["type"]] = type_counts.get(issue["type"], 0) + 1
top_type = max(type_counts, key=type_counts.get) if type_counts else "unknown"
suggestions.append(
f"🎯 **{len(max_attempts)} issues** hit max-attempts or went stale. "
f"The dominant type is **{top_type}**. "
f"Consider adding acceptance criteria templates or pre-flight checklists for {top_type} issues."
)
# 4. Issue type disparity
for t, stats in by_type.items():
total = stats["total"]
if total >= 3 and stats["successes"] == 0:
suggestions.append(
f"🎯 **{t}** issues have a 0% closure rate ({stats['failures']} stale). "
f"Consider routing all {t} issues to a specialist agent or creating a dedicated playbook."
)
# 5. Fleet routing gap (if fleet data exists)
active_agents = {a["name"] for a in fleet if a.get("active")}
assigned_agents = set(by_agent.keys())
idle_agents = active_agents - assigned_agents - {"unassigned"}
if len(idle_agents) >= 2:
suggestions.append(
f"🎯 **{len(idle_agents)} active agents** have no assignments this cycle: {', '.join(idle_agents)}. "
f"Consider expanding their repo lists or investigating why they aren't receiving work."
)
if suggestions:
return suggestions[0]
# Fallback: celebrate or nudge
total_closed = len(metrics["closed_issues"])
total_merged = len(metrics["merged_prs"])
if total_closed >= 5 or total_merged >= 3:
return (
f"🎯 Strong cycle: {total_closed} issues closed, {total_merged} PRs merged. "
f"Next improvement: write down the top 3 patterns that made this cycle successful so we can replicate them."
)
return (
"🎯 Low activity this cycle. Next improvement: ensure at least one agent loop is actively polling "
"for unassigned issues so work doesn't sit idle."
)
def build_report(metrics: dict, suggestion: str, since: str) -> str:
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
period = since[:10]
lines = [
f"# 🌀 Kaizen Retro — {now}",
f"*Period: {period} → now*\n",
"## Numbers",
f"- **Issues closed:** {len(metrics['closed_issues'])}",
f"- **PRs merged:** {len(metrics['merged_prs'])}",
f"- **PRs closed without merge:** {len(metrics['closed_prs'])}",
f"- **Max-attempts / stale issues:** {len(metrics['max_attempts_issues'])}",
"",
"## By Agent",
]
for agent, stats in sorted(metrics["by_agent"].items(), key=lambda x: x[1]["successes"] + x[1]["failures"], reverse=True):
total = stats["successes"] + stats["failures"]
rate = fmt_pct(stats["successes"], total)
lines.append(f"- **{agent}**: {stats['successes']} closed, {stats['failures']} stale / max-attempts — verify rate {rate}")
lines.extend(["", "## By Repo"])
for repo, stats in sorted(metrics["by_repo"].items(), key=lambda x: x[1]["successes"] + x[1]["failures"], reverse=True):
total = stats["successes"] + stats["failures"]
if total == 0 and stats["open"] == 0:
continue
rate = fmt_pct(stats["successes"], total)
lines.append(
f"- **{repo}**: {stats['successes']} closed, {stats['failures']} stale, {stats['open']} open — verify rate {rate}"
)
lines.extend(["", "## By Issue Type"])
for t, stats in sorted(metrics["by_type"].items(), key=lambda x: x[1]["total"], reverse=True):
total = stats["total"]
rate = fmt_pct(stats["successes"], total)
lines.append(f"- **{t}**: {stats['successes']} closed, {stats['failures']} stale — verify rate {rate}")
if metrics["max_attempts_issues"]:
lines.extend(["", "## Max-Attempts / Stale Issues"])
for issue in metrics["max_attempts_issues"][:10]:
lines.append(f"- {issue['repo']}#{issue['number']} ({issue['type']}, assignee: {issue['assignee']}) — {issue['title']}")
if len(metrics["max_attempts_issues"]) > 10:
lines.append(f"- … and {len(metrics['max_attempts_issues']) - 10} more")
lines.extend(["", "## One Concrete Improvement", suggestion, ""])
return "\n".join(lines)
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main() -> int:
parser = argparse.ArgumentParser(description="Kaizen Retro — automated burn-cycle retrospective")
parser.add_argument("--dry-run", action="store_true", help="Print report but do not post")
parser.add_argument("--since", type=str, help="ISO timestamp for lookback window (default: 24h ago)")
parser.add_argument("--post-to", type=str, help="Override Telegram chat ID")
args = parser.parse_args()
since = args.since or iso_day_ago(1)
client = GiteaClient()
print("Gathering metrics since", since)
metrics = gather_metrics(client, since)
fleet = load_fleet_routing()
suggestion = generate_suggestion(metrics, fleet)
report = build_report(metrics, suggestion, since)
print(report)
# Save JSON snapshot
REPORTS_DIR.mkdir(parents=True, exist_ok=True)
snapshot_path = REPORTS_DIR / f"kaizen-retro-{datetime.now(timezone.utc).strftime('%Y%m%d')}.json"
snapshot = {
"generated_at": datetime.now(timezone.utc).isoformat(),
"since": since,
"metrics": metrics,
"suggestion": suggestion,
"report_markdown": report,
}
with open(snapshot_path, "w") as f:
json.dump(snapshot, f, indent=2)
print(f"\nSnapshot saved to {snapshot_path}")
if args.dry_run:
return 0
# Post to Telegram
chat_id = args.post_to or TELEGRAM_CHAT_ID
bot_token = TELEGRAM_BOT_TOKEN
if bot_token and chat_id:
try:
telegram_send(report, bot_token, chat_id)
print("Posted to Telegram.")
except Exception as exc:
print(f"Failed to post to Telegram: {exc}", file=sys.stderr)
else:
print("Telegram not configured (set TELEGRAM_BOT_TOKEN and TELEGRAM_HOME_CHANNEL).", file=sys.stderr)
# Comment on latest morning report issue
morning_issue = find_latest_morning_report_issue(client)
if morning_issue:
try:
client.create_comment(MORNING_REPORT_REPO, morning_issue, report)
print(f"Commented on morning report issue #{morning_issue}.")
except Exception as exc:
print(f"Failed to comment on morning report issue: {exc}", file=sys.stderr)
else:
print("No morning report issue found to comment on.", file=sys.stderr)
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -1915,6 +1915,29 @@ That's all. Have a good morning.
return {"filed": False, "error": str(e)}
# ── NEW 6b: Kaizen Retro ─────────────────────────────────────────────
@huey.periodic_task(crontab(hour="7", minute="15")) # 7:15 AM daily, after morning report
def kaizen_retro():
"""Run the automated burn-cycle retrospective."""
retro_script = Path(__file__).resolve().parent / "bin" / "kaizen-retro.sh"
if not retro_script.exists():
return {"ran": False, "error": "kaizen-retro.sh not found"}
result = subprocess.run(
["bash", str(retro_script)],
capture_output=True,
text=True,
timeout=300,
)
return {
"ran": True,
"exit_code": result.returncode,
"stdout": result.stdout[-2000:] if result.stdout else "",
"stderr": result.stderr[-1000:] if result.stderr else "",
}
# ── NEW 7: Repo Watchdog ─────────────────────────────────────────────
@huey.periodic_task(crontab(minute="*/20")) # every 20 minutes