[KAIZEN] Harden retro scheduling, chunking, and tests (#349)
- Add Kaizen Retro to cron/jobs.json with explicit local model/provider - Add Telegram message chunking for reports approaching the 4096-char limit - Fix classify_issue_type false positives on short substrings (ci in cleanup) - Add 28 unit tests covering classification, max-attempts detection, suggestion generation, report formatting, and Telegram chunking
This commit is contained in:
@@ -137,7 +137,38 @@
|
||||
"paused_reason": null,
|
||||
"skills": [],
|
||||
"skill": null
|
||||
},
|
||||
{
|
||||
"id": "kaizen-retro-349",
|
||||
"name": "Kaizen Retro",
|
||||
"prompt": "Run the automated burn-cycle retrospective. Execute: cd /root/wizards/ezra/workspace/timmy-config && ./bin/kaizen-retro.sh",
|
||||
"model": "hermes3:latest",
|
||||
"provider": "ollama",
|
||||
"base_url": "http://localhost:11434/v1",
|
||||
"schedule": {
|
||||
"kind": "interval",
|
||||
"minutes": 1440,
|
||||
"display": "every 1440m"
|
||||
},
|
||||
"schedule_display": "daily at 07:30",
|
||||
"repeat": {
|
||||
"times": null,
|
||||
"completed": 0
|
||||
},
|
||||
"enabled": true,
|
||||
"created_at": "2026-04-07T15:30:00.000000Z",
|
||||
"next_run_at": "2026-04-08T07:30:00.000000Z",
|
||||
"last_run_at": null,
|
||||
"last_status": null,
|
||||
"last_error": null,
|
||||
"deliver": "local",
|
||||
"origin": null,
|
||||
"state": "scheduled",
|
||||
"paused_at": null,
|
||||
"paused_reason": null,
|
||||
"skills": [],
|
||||
"skill": null
|
||||
}
|
||||
],
|
||||
"updated_at": "2026-04-07T15:00:00+00:00"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,6 +51,7 @@ MORNING_REPORT_REPO = "Timmy_Foundation/timmy-config"
|
||||
|
||||
TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN")
|
||||
TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_HOME_CHANNEL", "-1003664764329")
|
||||
TELEGRAM_MAX_LEN = 4000 # leave headroom below the 4096 hard limit
|
||||
|
||||
STALE_DAYS = 7
|
||||
MAX_ATTEMPT_COMMENT_THRESHOLD = 5
|
||||
@@ -86,14 +87,24 @@ def classify_issue_type(issue: dict) -> str:
|
||||
body = (issue.get("body", "") or "").lower()
|
||||
labels = [l.get("name", "").lower() for l in issue.get("labels", []) or []]
|
||||
text = f"{title} {body} {' '.join(labels)}"
|
||||
words = set(text.split())
|
||||
|
||||
best = "other"
|
||||
best_score = 0
|
||||
for kind, keywords in ISSUE_TYPE_KEYWORDS.items():
|
||||
score = sum(1 for kw in keywords if kw in text)
|
||||
# Short keywords (<=3 chars) require whole-word match to avoid false positives like
|
||||
# "ci" inside "cleanup" or "cd" inside "abcde".
|
||||
score = sum(
|
||||
1 for kw in keywords
|
||||
if (len(kw) <= 3 and kw in words) or (len(kw) > 3 and kw in text)
|
||||
)
|
||||
# label match is stronger
|
||||
for label in labels:
|
||||
if any(kw in label for kw in keywords):
|
||||
label_words = set(label.split())
|
||||
if any(
|
||||
(len(kw) <= 3 and kw in label_words) or (len(kw) > 3 and kw in label)
|
||||
for kw in keywords
|
||||
):
|
||||
score += 3
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
@@ -119,12 +130,34 @@ def is_max_attempts_candidate(issue: dict) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def telegram_send(text: str, bot_token: str, chat_id: str) -> dict:
|
||||
def telegram_send(text: str, bot_token: str, chat_id: str) -> list[dict]:
|
||||
"""Post text to Telegram, chunking if it exceeds the message limit."""
|
||||
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
|
||||
data = json.dumps({"chat_id": chat_id, "text": text, "parse_mode": "Markdown"}).encode()
|
||||
req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
return json.loads(resp.read().decode())
|
||||
chunks = []
|
||||
if len(text) <= TELEGRAM_MAX_LEN:
|
||||
chunks = [text]
|
||||
else:
|
||||
# Split on newlines to preserve readability
|
||||
lines = text.splitlines(keepends=True)
|
||||
current = ""
|
||||
for line in lines:
|
||||
if len(current) + len(line) > TELEGRAM_MAX_LEN:
|
||||
if current:
|
||||
chunks.append(current)
|
||||
current = line
|
||||
else:
|
||||
current += line
|
||||
if current:
|
||||
chunks.append(current)
|
||||
results = []
|
||||
for i, chunk in enumerate(chunks):
|
||||
prefix = f"*(part {i + 1}/{len(chunks)})*\n" if len(chunks) > 1 else ""
|
||||
payload = {"chat_id": chat_id, "text": prefix + chunk, "parse_mode": "Markdown"}
|
||||
data = json.dumps(payload).encode()
|
||||
req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
results.append(json.loads(resp.read().decode()))
|
||||
return results
|
||||
|
||||
|
||||
def find_latest_morning_report_issue(client: GiteaClient) -> Optional[int]:
|
||||
|
||||
283
tests/test_kaizen_retro.py
Normal file
283
tests/test_kaizen_retro.py
Normal file
@@ -0,0 +1,283 @@
|
||||
"""Tests for the Kaizen Retro burn-cycle retrospective script."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib.util
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
|
||||
# Load kaizen_retro.py as a module (it lives in scripts/, not a package)
|
||||
spec = importlib.util.spec_from_file_location("kaizen_retro", REPO_ROOT / "scripts" / "kaizen_retro.py")
|
||||
kr = importlib.util.module_from_spec(spec)
|
||||
spec.loader.exec_module(kr)
|
||||
|
||||
|
||||
# ── classify_issue_type ───────────────────────────────────────────────────
|
||||
|
||||
class TestClassifyIssueType:
|
||||
def test_classifies_bug_from_title(self):
|
||||
issue = {"title": "Fix crash on startup", "body": "", "labels": []}
|
||||
assert kr.classify_issue_type(issue) == "bug"
|
||||
|
||||
def test_classifies_feature_from_label(self):
|
||||
issue = {"title": "Add dark mode", "body": "", "labels": [{"name": "enhancement"}]}
|
||||
# label "enhancement" doesn't match any keyword directly, but "feature" and "add" are in title
|
||||
assert kr.classify_issue_type(issue) == "feature"
|
||||
|
||||
def test_classifies_docs_from_label(self):
|
||||
issue = {"title": "Update guide", "body": "", "labels": [{"name": "documentation"}]}
|
||||
assert kr.classify_issue_type(issue) == "docs"
|
||||
|
||||
def test_label_match_stronger_than_title(self):
|
||||
issue = {"title": "Something random", "body": "", "labels": [{"name": "bug"}]}
|
||||
assert kr.classify_issue_type(issue) == "bug"
|
||||
|
||||
def test_kaizen_takes_precedence_with_both_labels(self):
|
||||
issue = {"title": "Process improvement", "body": "", "labels": [{"name": "kaizen"}, {"name": "bug"}]}
|
||||
# kaizen label gives +3, bug gives +3, tie goes to first seen? kaizen appears first in dict
|
||||
assert kr.classify_issue_type(issue) == "kaizen"
|
||||
|
||||
def test_defaults_to_other(self):
|
||||
issue = {"title": "Tidy up naming", "body": "No user-facing change", "labels": [{"name": "cleanup"}]}
|
||||
assert kr.classify_issue_type(issue) == "other"
|
||||
|
||||
|
||||
# ── is_max_attempts_candidate ─────────────────────────────────────────────
|
||||
|
||||
class TestIsMaxAttemptsCandidate:
|
||||
def test_blocker_label_returns_true(self):
|
||||
issue = {"labels": [{"name": "blocked"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"}
|
||||
assert kr.is_max_attempts_candidate(issue) is True
|
||||
|
||||
def test_timeout_label_returns_true(self):
|
||||
issue = {"labels": [{"name": "timeout"}], "comments": 0, "created_at": "2026-04-07T00:00:00Z"}
|
||||
assert kr.is_max_attempts_candidate(issue) is True
|
||||
|
||||
def test_high_comment_count_returns_true(self):
|
||||
issue = {"labels": [], "comments": 5, "created_at": "2026-04-07T00:00:00Z"}
|
||||
assert kr.is_max_attempts_candidate(issue) is True
|
||||
|
||||
def test_fresh_issue_with_low_comments_returns_false(self):
|
||||
now = datetime.now(timezone.utc)
|
||||
issue = {"labels": [], "comments": 2, "created_at": now.isoformat()}
|
||||
assert kr.is_max_attempts_candidate(issue) is False
|
||||
|
||||
def test_stale_age_returns_true(self):
|
||||
old = datetime.now(timezone.utc) - timedelta(days=10)
|
||||
issue = {"labels": [], "comments": 0, "created_at": old.isoformat()}
|
||||
assert kr.is_max_attempts_candidate(issue) is True
|
||||
|
||||
|
||||
# ── fmt_pct ───────────────────────────────────────────────────────────────
|
||||
|
||||
class TestFmtPct:
|
||||
def test_basic_percentage(self):
|
||||
assert kr.fmt_pct(3, 4) == "75%"
|
||||
|
||||
def test_zero_denominator(self):
|
||||
assert kr.fmt_pct(0, 0) == "N/A"
|
||||
|
||||
def test_perfect_rate(self):
|
||||
assert kr.fmt_pct(10, 10) == "100%"
|
||||
|
||||
|
||||
# ── generate_suggestion ───────────────────────────────────────────────────
|
||||
|
||||
class TestGenerateSuggestion:
|
||||
def test_agent_zero_success_rate(self):
|
||||
metrics = {
|
||||
"by_agent": {
|
||||
"groq": {"successes": 0, "failures": 5, "closed": 0, "repos": ["timmy-home"]},
|
||||
},
|
||||
"by_repo": {},
|
||||
"by_type": {},
|
||||
"max_attempts_issues": [],
|
||||
"closed_issues": [],
|
||||
"merged_prs": [],
|
||||
"closed_prs": [],
|
||||
}
|
||||
suggestion = kr.generate_suggestion(metrics, [])
|
||||
assert "groq" in suggestion
|
||||
assert "0%" in suggestion or "verify rate" in suggestion
|
||||
|
||||
def test_repo_with_most_failures(self):
|
||||
metrics = {
|
||||
"by_agent": {},
|
||||
"by_repo": {
|
||||
"the-nexus": {"successes": 2, "failures": 5, "closed": 2, "open": 3},
|
||||
},
|
||||
"by_type": {},
|
||||
"max_attempts_issues": [],
|
||||
"closed_issues": [],
|
||||
"merged_prs": [],
|
||||
"closed_prs": [],
|
||||
}
|
||||
suggestion = kr.generate_suggestion(metrics, [])
|
||||
assert "the-nexus" in suggestion
|
||||
assert "friction" in suggestion
|
||||
|
||||
def test_max_attempts_pattern(self):
|
||||
metrics = {
|
||||
"by_agent": {},
|
||||
"by_repo": {},
|
||||
"by_type": {},
|
||||
"max_attempts_issues": [
|
||||
{"type": "devops"}, {"type": "devops"}, {"type": "feature"}
|
||||
],
|
||||
"closed_issues": [],
|
||||
"merged_prs": [],
|
||||
"closed_prs": [],
|
||||
}
|
||||
suggestion = kr.generate_suggestion(metrics, [])
|
||||
assert "devops" in suggestion
|
||||
assert "max-attempts" in suggestion.lower() or "stale" in suggestion.lower()
|
||||
|
||||
def test_idle_agents(self):
|
||||
metrics = {
|
||||
"by_agent": {},
|
||||
"by_repo": {},
|
||||
"by_type": {},
|
||||
"max_attempts_issues": [],
|
||||
"closed_issues": [],
|
||||
"merged_prs": [],
|
||||
"closed_prs": [],
|
||||
}
|
||||
fleet = [{"name": "allegro", "active": True}, {"name": "ezra", "active": True}]
|
||||
suggestion = kr.generate_suggestion(metrics, fleet)
|
||||
assert "idle" in suggestion.lower() or "no assignments" in suggestion.lower()
|
||||
|
||||
def test_fallback_celebration(self):
|
||||
metrics = {
|
||||
"by_agent": {},
|
||||
"by_repo": {},
|
||||
"by_type": {},
|
||||
"max_attempts_issues": [],
|
||||
"closed_issues": [{}, {}, {}, {}, {}],
|
||||
"merged_prs": [{}, {}, {}],
|
||||
"closed_prs": [],
|
||||
}
|
||||
suggestion = kr.generate_suggestion(metrics, [])
|
||||
assert "Strong cycle" in suggestion
|
||||
|
||||
def test_fallback_low_activity(self):
|
||||
metrics = {
|
||||
"by_agent": {},
|
||||
"by_repo": {},
|
||||
"by_type": {},
|
||||
"max_attempts_issues": [],
|
||||
"closed_issues": [],
|
||||
"merged_prs": [],
|
||||
"closed_prs": [],
|
||||
}
|
||||
suggestion = kr.generate_suggestion(metrics, [])
|
||||
assert "Low activity" in suggestion or "idle" in suggestion.lower()
|
||||
|
||||
|
||||
# ── build_report ──────────────────────────────────────────────────────────
|
||||
|
||||
class TestBuildReport:
|
||||
def test_report_contains_numbers_section(self):
|
||||
metrics = {
|
||||
"closed_issues": [{}, {}],
|
||||
"merged_prs": [{}],
|
||||
"closed_prs": [],
|
||||
"max_attempts_issues": [],
|
||||
"by_agent": {"ezra": {"successes": 2, "failures": 0, "repos": ["timmy-config"]}},
|
||||
"by_repo": {"timmy-config": {"successes": 2, "failures": 0, "open": 1}},
|
||||
"by_type": {"feature": {"successes": 2, "failures": 0, "total": 2}},
|
||||
}
|
||||
report = kr.build_report(metrics, "Do better.", "2026-04-06T00:00:00+00:00")
|
||||
assert "## Numbers" in report
|
||||
assert "Issues closed:** 2" in report
|
||||
assert "PRs merged:** 1" in report
|
||||
assert "## By Agent" in report
|
||||
assert "## By Repo" in report
|
||||
assert "## By Issue Type" in report
|
||||
assert "Do better." in report
|
||||
|
||||
def test_report_skips_empty_repos(self):
|
||||
metrics = {
|
||||
"closed_issues": [],
|
||||
"merged_prs": [],
|
||||
"closed_prs": [],
|
||||
"max_attempts_issues": [],
|
||||
"by_agent": {},
|
||||
"by_repo": {"unused-repo": {"successes": 0, "failures": 0, "open": 0}},
|
||||
"by_type": {},
|
||||
}
|
||||
report = kr.build_report(metrics, "Nudge.", "2026-04-06T00:00:00+00:00")
|
||||
assert "unused-repo" not in report
|
||||
|
||||
def test_report_truncates_max_attempts(self):
|
||||
metrics = {
|
||||
"closed_issues": [],
|
||||
"merged_prs": [],
|
||||
"closed_prs": [],
|
||||
"max_attempts_issues": [{"repo": "r", "number": i, "type": "bug", "assignee": "a", "title": f"T{i}"} for i in range(15)],
|
||||
"by_agent": {},
|
||||
"by_repo": {},
|
||||
"by_type": {},
|
||||
}
|
||||
report = kr.build_report(metrics, "Fix it.", "2026-04-06T00:00:00+00:00")
|
||||
assert "and 5 more" in report
|
||||
|
||||
|
||||
# ── telegram_send ─────────────────────────────────────────────────────────
|
||||
|
||||
class TestTelegramSend:
|
||||
def test_short_message_sent_in_one_piece(self):
|
||||
with patch("urllib.request.urlopen") as mock_urlopen:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = b'{"ok": true}'
|
||||
mock_urlopen.return_value.__enter__.return_value = mock_resp
|
||||
|
||||
results = kr.telegram_send("Hello", "fake-token", "123")
|
||||
assert len(results) == 1
|
||||
assert results[0]["ok"] is True
|
||||
# Verify payload
|
||||
call_args = mock_urlopen.call_args
|
||||
req = call_args[0][0]
|
||||
payload = json.loads(req.data.decode())
|
||||
assert payload["text"] == "Hello"
|
||||
assert payload["chat_id"] == "123"
|
||||
|
||||
def test_long_message_chunked(self):
|
||||
big_text = "Line\n" * 2000 # ~10k chars
|
||||
with patch("urllib.request.urlopen") as mock_urlopen:
|
||||
mock_resp = MagicMock()
|
||||
mock_resp.read.return_value = b'{"ok": true}'
|
||||
mock_urlopen.return_value.__enter__.return_value = mock_resp
|
||||
|
||||
results = kr.telegram_send(big_text, "fake-token", "123")
|
||||
assert len(results) >= 2
|
||||
# First chunk should have a part prefix
|
||||
req = mock_urlopen.call_args_list[0][0][0]
|
||||
payload = json.loads(req.data.decode())
|
||||
assert "(part 1" in payload["text"]
|
||||
|
||||
|
||||
# ── load helpers ──────────────────────────────────────────────────────────
|
||||
|
||||
class TestLoadHelpers:
|
||||
def test_load_json_missing_returns_none(self, tmp_path):
|
||||
missing = tmp_path / "does_not_exist.json"
|
||||
assert kr.load_json(missing) is None
|
||||
|
||||
def test_load_json_valid(self, tmp_path):
|
||||
p = tmp_path / "data.json"
|
||||
p.write_text('{"a": 1}')
|
||||
assert kr.load_json(p) == {"a": 1}
|
||||
|
||||
def test_iso_day_ago_format(self):
|
||||
s = kr.iso_day_ago(1)
|
||||
# Should be a valid ISO timestamp string
|
||||
dt = datetime.fromisoformat(s)
|
||||
now = datetime.now(timezone.utc)
|
||||
assert now - dt < timedelta(days=2)
|
||||
Reference in New Issue
Block a user