diff --git a/cron/jobs.json b/cron/jobs.json index 475c17ff..3acc6ce9 100644 --- a/cron/jobs.json +++ b/cron/jobs.json @@ -137,7 +137,38 @@ "paused_reason": null, "skills": [], "skill": null + }, + { + "id": "kaizen-retro-349", + "name": "Kaizen Retro", + "prompt": "Run the automated burn-cycle retrospective. Execute: cd /root/wizards/ezra/workspace/timmy-config && ./bin/kaizen-retro.sh", + "model": "hermes3:latest", + "provider": "ollama", + "base_url": "http://localhost:11434/v1", + "schedule": { + "kind": "interval", + "minutes": 1440, + "display": "every 1440m" + }, + "schedule_display": "daily at 07:30", + "repeat": { + "times": null, + "completed": 0 + }, + "enabled": true, + "created_at": "2026-04-07T15:30:00.000000Z", + "next_run_at": "2026-04-08T07:30:00.000000Z", + "last_run_at": null, + "last_status": null, + "last_error": null, + "deliver": "local", + "origin": null, + "state": "scheduled", + "paused_at": null, + "paused_reason": null, + "skills": [], + "skill": null } ], "updated_at": "2026-04-07T15:00:00+00:00" -} \ No newline at end of file +} diff --git a/scripts/kaizen_retro.py b/scripts/kaizen_retro.py index 2196e898..1f870c2e 100644 --- a/scripts/kaizen_retro.py +++ b/scripts/kaizen_retro.py @@ -51,6 +51,7 @@ MORNING_REPORT_REPO = "Timmy_Foundation/timmy-config" TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN") TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_HOME_CHANNEL", "-1003664764329") +TELEGRAM_MAX_LEN = 4000 # leave headroom below the 4096 hard limit STALE_DAYS = 7 MAX_ATTEMPT_COMMENT_THRESHOLD = 5 @@ -86,14 +87,24 @@ def classify_issue_type(issue: dict) -> str: body = (issue.get("body", "") or "").lower() labels = [l.get("name", "").lower() for l in issue.get("labels", []) or []] text = f"{title} {body} {' '.join(labels)}" + words = set(text.split()) best = "other" best_score = 0 for kind, keywords in ISSUE_TYPE_KEYWORDS.items(): - score = sum(1 for kw in keywords if kw in text) + # Short keywords (<=3 chars) require whole-word match to avoid false positives like + # "ci" inside "cleanup" or "cd" inside "abcde". + score = sum( + 1 for kw in keywords + if (len(kw) <= 3 and kw in words) or (len(kw) > 3 and kw in text) + ) # label match is stronger for label in labels: - if any(kw in label for kw in keywords): + label_words = set(label.split()) + if any( + (len(kw) <= 3 and kw in label_words) or (len(kw) > 3 and kw in label) + for kw in keywords + ): score += 3 if score > best_score: best_score = score @@ -119,12 +130,34 @@ def is_max_attempts_candidate(issue: dict) -> bool: return False -def telegram_send(text: str, bot_token: str, chat_id: str) -> dict: +def telegram_send(text: str, bot_token: str, chat_id: str) -> list[dict]: + """Post text to Telegram, chunking if it exceeds the message limit.""" url = f"https://api.telegram.org/bot{bot_token}/sendMessage" - data = json.dumps({"chat_id": chat_id, "text": text, "parse_mode": "Markdown"}).encode() - req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"}) - with urllib.request.urlopen(req, timeout=30) as resp: - return json.loads(resp.read().decode()) + chunks = [] + if len(text) <= TELEGRAM_MAX_LEN: + chunks = [text] + else: + # Split on newlines to preserve readability + lines = text.splitlines(keepends=True) + current = "" + for line in lines: + if len(current) + len(line) > TELEGRAM_MAX_LEN: + if current: + chunks.append(current) + current = line + else: + current += line + if current: + chunks.append(current) + results = [] + for i, chunk in enumerate(chunks): + prefix = f"*(part {i + 1}/{len(chunks)})*\n" if len(chunks) > 1 else "" + payload = {"chat_id": chat_id, "text": prefix + chunk, "parse_mode": "Markdown"} + data = json.dumps(payload).encode() + req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"}) + with urllib.request.urlopen(req, timeout=30) as resp: + results.append(json.loads(resp.read().decode())) + return results def find_latest_morning_report_issue(client: GiteaClient) -> Optional[int]: diff --git a/tests/test_kaizen_retro.py b/tests/test_kaizen_retro.py new file mode 100644 index 00000000..04376df2 --- /dev/null +++ b/tests/test_kaizen_retro.py @@ -0,0 +1,283 @@ +"""Tests for the Kaizen Retro burn-cycle retrospective script.""" + +from __future__ import annotations + +import importlib.util +import json +import sys +from datetime import datetime, timedelta, timezone +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +REPO_ROOT = Path(__file__).parent.parent + +# Load kaizen_retro.py as a module (it lives in scripts/, not a package) +spec = importlib.util.spec_from_file_location("kaizen_retro", REPO_ROOT / "scripts" / "kaizen_retro.py") +kr = importlib.util.module_from_spec(spec) +spec.loader.exec_module(kr) + + +# ── classify_issue_type ─────────────────────────────────────────────────── + +class TestClassifyIssueType: + def test_classifies_bug_from_title(self): + issue = {"title": "Fix crash on startup", "body": "", "labels": []} + assert kr.classify_issue_type(issue) == "bug" + + def test_classifies_feature_from_label(self): + issue = {"title": "Add dark mode", "body": "", "labels": [{"name": "enhancement"}]} + # label "enhancement" doesn't match any keyword directly, but "feature" and "add" are in title + assert kr.classify_issue_type(issue) == "feature" + + def test_classifies_docs_from_label(self): + issue = {"title": "Update guide", "body": "", "labels": [{"name": "documentation"}]} + assert kr.classify_issue_type(issue) == "docs" + + def test_label_match_stronger_than_title(self): + issue = {"title": "Something random", "body": "", "labels": [{"name": "bug"}]} + assert kr.classify_issue_type(issue) == "bug" + + def test_kaizen_takes_precedence_with_both_labels(self): + issue = {"title": "Process improvement", "body": "", "labels": [{"name": "kaizen"}, {"name": "bug"}]} + # kaizen label gives +3, bug gives +3, tie goes to first seen? kaizen appears first in dict + assert kr.classify_issue_type(issue) == "kaizen" + + def test_defaults_to_other(self): + issue = {"title": "Tidy up naming", "body": "No user-facing change", "labels": [{"name": "cleanup"}]} + assert kr.classify_issue_type(issue) == "other" + + +# ── is_max_attempts_candidate ───────────────────────────────────────────── + +class TestIsMaxAttemptsCandidate: + def test_blocker_label_returns_true(self): + issue = {"labels": [{"name": "blocked"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"} + assert kr.is_max_attempts_candidate(issue) is True + + def test_timeout_label_returns_true(self): + issue = {"labels": [{"name": "timeout"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"} + assert kr.is_max_attempts_candidate(issue) is True + + def test_high_comment_count_returns_true(self): + issue = {"labels": [], "comments": 5, "created_at": "2026-04-07T00:00:00Z"} + assert kr.is_max_attempts_candidate(issue) is True + + def test_fresh_issue_with_low_comments_returns_false(self): + now = datetime.now(timezone.utc) + issue = {"labels": [], "comments": 2, "created_at": now.isoformat()} + assert kr.is_max_attempts_candidate(issue) is False + + def test_stale_age_returns_true(self): + old = datetime.now(timezone.utc) - timedelta(days=10) + issue = {"labels": [], "comments": 0, "created_at": old.isoformat()} + assert kr.is_max_attempts_candidate(issue) is True + + +# ── fmt_pct ─────────────────────────────────────────────────────────────── + +class TestFmtPct: + def test_basic_percentage(self): + assert kr.fmt_pct(3, 4) == "75%" + + def test_zero_denominator(self): + assert kr.fmt_pct(0, 0) == "N/A" + + def test_perfect_rate(self): + assert kr.fmt_pct(10, 10) == "100%" + + +# ── generate_suggestion ─────────────────────────────────────────────────── + +class TestGenerateSuggestion: + def test_agent_zero_success_rate(self): + metrics = { + "by_agent": { + "groq": {"successes": 0, "failures": 5, "closed": 0, "repos": ["timmy-home"]}, + }, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "groq" in suggestion + assert "0%" in suggestion or "verify rate" in suggestion + + def test_repo_with_most_failures(self): + metrics = { + "by_agent": {}, + "by_repo": { + "the-nexus": {"successes": 2, "failures": 5, "closed": 2, "open": 3}, + }, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "the-nexus" in suggestion + assert "friction" in suggestion + + def test_max_attempts_pattern(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [ + {"type": "devops"}, {"type": "devops"}, {"type": "feature"} + ], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "devops" in suggestion + assert "max-attempts" in suggestion.lower() or "stale" in suggestion.lower() + + def test_idle_agents(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + fleet = [{"name": "allegro", "active": True}, {"name": "ezra", "active": True}] + suggestion = kr.generate_suggestion(metrics, fleet) + assert "idle" in suggestion.lower() or "no assignments" in suggestion.lower() + + def test_fallback_celebration(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [{}, {}, {}, {}, {}], + "merged_prs": [{}, {}, {}], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "Strong cycle" in suggestion + + def test_fallback_low_activity(self): + metrics = { + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + "max_attempts_issues": [], + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + } + suggestion = kr.generate_suggestion(metrics, []) + assert "Low activity" in suggestion or "idle" in suggestion.lower() + + +# ── build_report ────────────────────────────────────────────────────────── + +class TestBuildReport: + def test_report_contains_numbers_section(self): + metrics = { + "closed_issues": [{}, {}], + "merged_prs": [{}], + "closed_prs": [], + "max_attempts_issues": [], + "by_agent": {"ezra": {"successes": 2, "failures": 0, "repos": ["timmy-config"]}}, + "by_repo": {"timmy-config": {"successes": 2, "failures": 0, "open": 1}}, + "by_type": {"feature": {"successes": 2, "failures": 0, "total": 2}}, + } + report = kr.build_report(metrics, "Do better.", "2026-04-06T00:00:00+00:00") + assert "## Numbers" in report + assert "Issues closed:** 2" in report + assert "PRs merged:** 1" in report + assert "## By Agent" in report + assert "## By Repo" in report + assert "## By Issue Type" in report + assert "Do better." in report + + def test_report_skips_empty_repos(self): + metrics = { + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + "max_attempts_issues": [], + "by_agent": {}, + "by_repo": {"unused-repo": {"successes": 0, "failures": 0, "open": 0}}, + "by_type": {}, + } + report = kr.build_report(metrics, "Nudge.", "2026-04-06T00:00:00+00:00") + assert "unused-repo" not in report + + def test_report_truncates_max_attempts(self): + metrics = { + "closed_issues": [], + "merged_prs": [], + "closed_prs": [], + "max_attempts_issues": [{"repo": "r", "number": i, "type": "bug", "assignee": "a", "title": f"T{i}"} for i in range(15)], + "by_agent": {}, + "by_repo": {}, + "by_type": {}, + } + report = kr.build_report(metrics, "Fix it.", "2026-04-06T00:00:00+00:00") + assert "and 5 more" in report + + +# ── telegram_send ───────────────────────────────────────────────────────── + +class TestTelegramSend: + def test_short_message_sent_in_one_piece(self): + with patch("urllib.request.urlopen") as mock_urlopen: + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"ok": true}' + mock_urlopen.return_value.__enter__.return_value = mock_resp + + results = kr.telegram_send("Hello", "fake-token", "123") + assert len(results) == 1 + assert results[0]["ok"] is True + # Verify payload + call_args = mock_urlopen.call_args + req = call_args[0][0] + payload = json.loads(req.data.decode()) + assert payload["text"] == "Hello" + assert payload["chat_id"] == "123" + + def test_long_message_chunked(self): + big_text = "Line\n" * 2000 # ~10k chars + with patch("urllib.request.urlopen") as mock_urlopen: + mock_resp = MagicMock() + mock_resp.read.return_value = b'{"ok": true}' + mock_urlopen.return_value.__enter__.return_value = mock_resp + + results = kr.telegram_send(big_text, "fake-token", "123") + assert len(results) >= 2 + # First chunk should have a part prefix + req = mock_urlopen.call_args_list[0][0][0] + payload = json.loads(req.data.decode()) + assert "(part 1" in payload["text"] + + +# ── load helpers ────────────────────────────────────────────────────────── + +class TestLoadHelpers: + def test_load_json_missing_returns_none(self, tmp_path): + missing = tmp_path / "does_not_exist.json" + assert kr.load_json(missing) is None + + def test_load_json_valid(self, tmp_path): + p = tmp_path / "data.json" + p.write_text('{"a": 1}') + assert kr.load_json(p) == {"a": 1} + + def test_iso_day_ago_format(self): + s = kr.iso_day_ago(1) + # Should be a valid ISO timestamp string + dt = datetime.fromisoformat(s) + now = datetime.now(timezone.utc) + assert now - dt < timedelta(days=2)