[KAIZEN] Harden retro scheduling, chunking, and tests (#349)

- Add Kaizen Retro to cron/jobs.json with explicit local model/provider - Add Telegram message chunking for reports approaching the 4096-char limit - Fix classify_issue_type false positives on short substrings (ci in cleanup) - Add 28 unit tests covering classification, max-attempts detection, suggestion generation, report formatting, and Telegram chunking
2026-04-07 15:54:15 +00:00
parent f18955ea90
commit 2e64b160b5
3 changed files with 355 additions and 8 deletions
--- a/scripts/kaizen_retro.py
+++ b/scripts/kaizen_retro.py
@@ -51,6 +51,7 @@ MORNING_REPORT_REPO = "Timmy_Foundation/timmy-config"

 TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN")
 TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_HOME_CHANNEL", "-1003664764329")
+TELEGRAM_MAX_LEN = 4000  # leave headroom below the 4096 hard limit

 STALE_DAYS = 7
 MAX_ATTEMPT_COMMENT_THRESHOLD = 5
@@ -86,14 +87,24 @@ def classify_issue_type(issue: dict) -> str:
    body = (issue.get("body", "") or "").lower()
    labels = [l.get("name", "").lower() for l in issue.get("labels", []) or []]
    text = f"{title} {body} {' '.join(labels)}"
+    words = set(text.split())

    best = "other"
    best_score = 0
    for kind, keywords in ISSUE_TYPE_KEYWORDS.items():
-        score = sum(1 for kw in keywords if kw in text)
+        # Short keywords (<=3 chars) require whole-word match to avoid false positives like
+        # "ci" inside "cleanup" or "cd" inside "abcde".
+        score = sum(
+            1 for kw in keywords
+            if (len(kw) <= 3 and kw in words) or (len(kw) > 3 and kw in text)
+        )
        # label match is stronger
        for label in labels:
-            if any(kw in label for kw in keywords):
+            label_words = set(label.split())
+            if any(
+                (len(kw) <= 3 and kw in label_words) or (len(kw) > 3 and kw in label)
+                for kw in keywords
+            ):
                score += 3
        if score > best_score:
            best_score = score
@@ -119,12 +130,34 @@ def is_max_attempts_candidate(issue: dict) -> bool:
    return False


-def telegram_send(text: str, bot_token: str, chat_id: str) -> dict:
+def telegram_send(text: str, bot_token: str, chat_id: str) -> list[dict]:
+    """Post text to Telegram, chunking if it exceeds the message limit."""
    url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
-    data = json.dumps({"chat_id": chat_id, "text": text, "parse_mode": "Markdown"}).encode()
-    req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
-    with urllib.request.urlopen(req, timeout=30) as resp:
-        return json.loads(resp.read().decode())
+    chunks = []
+    if len(text) <= TELEGRAM_MAX_LEN:
+        chunks = [text]
+    else:
+        # Split on newlines to preserve readability
+        lines = text.splitlines(keepends=True)
+        current = ""
+        for line in lines:
+            if len(current) + len(line) > TELEGRAM_MAX_LEN:
+                if current:
+                    chunks.append(current)
+                current = line
+            else:
+                current += line
+        if current:
+            chunks.append(current)
+    results = []
+    for i, chunk in enumerate(chunks):
+        prefix = f"*(part {i + 1}/{len(chunks)})*\n" if len(chunks) > 1 else ""
+        payload = {"chat_id": chat_id, "text": prefix + chunk, "parse_mode": "Markdown"}
+        data = json.dumps(payload).encode()
+        req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
+        with urllib.request.urlopen(req, timeout=30) as resp:
+            results.append(json.loads(resp.read().decode()))
+    return results


 def find_latest_morning_report_issue(client: GiteaClient) -> Optional[int]: