[KAIZEN] Harden retro scheduling, chunking, and tests (#349)

- Add Kaizen Retro to cron/jobs.json with explicit local model/provider
- Add Telegram message chunking for reports approaching the 4096-char limit
- Fix classify_issue_type false positives on short substrings (ci in cleanup)
- Add 28 unit tests covering classification, max-attempts detection,
  suggestion generation, report formatting, and Telegram chunking
This commit is contained in:
Ezra
2026-04-07 15:54:15 +00:00
committed by ezra
parent f18955ea90
commit 2e64b160b5
3 changed files with 355 additions and 8 deletions

View File

@@ -51,6 +51,7 @@ MORNING_REPORT_REPO = "Timmy_Foundation/timmy-config"
TELEGRAM_BOT_TOKEN = os.environ.get("TELEGRAM_BOT_TOKEN")
TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_HOME_CHANNEL", "-1003664764329")
TELEGRAM_MAX_LEN = 4000 # leave headroom below the 4096 hard limit
STALE_DAYS = 7
MAX_ATTEMPT_COMMENT_THRESHOLD = 5
@@ -86,14 +87,24 @@ def classify_issue_type(issue: dict) -> str:
body = (issue.get("body", "") or "").lower()
labels = [l.get("name", "").lower() for l in issue.get("labels", []) or []]
text = f"{title} {body} {' '.join(labels)}"
words = set(text.split())
best = "other"
best_score = 0
for kind, keywords in ISSUE_TYPE_KEYWORDS.items():
score = sum(1 for kw in keywords if kw in text)
# Short keywords (<=3 chars) require whole-word match to avoid false positives like
# "ci" inside "cleanup" or "cd" inside "abcde".
score = sum(
1 for kw in keywords
if (len(kw) <= 3 and kw in words) or (len(kw) > 3 and kw in text)
)
# label match is stronger
for label in labels:
if any(kw in label for kw in keywords):
label_words = set(label.split())
if any(
(len(kw) <= 3 and kw in label_words) or (len(kw) > 3 and kw in label)
for kw in keywords
):
score += 3
if score > best_score:
best_score = score
@@ -119,12 +130,34 @@ def is_max_attempts_candidate(issue: dict) -> bool:
return False
def telegram_send(text: str, bot_token: str, chat_id: str) -> dict:
def telegram_send(text: str, bot_token: str, chat_id: str) -> list[dict]:
"""Post text to Telegram, chunking if it exceeds the message limit."""
url = f"https://api.telegram.org/bot{bot_token}/sendMessage"
data = json.dumps({"chat_id": chat_id, "text": text, "parse_mode": "Markdown"}).encode()
req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
with urllib.request.urlopen(req, timeout=30) as resp:
return json.loads(resp.read().decode())
chunks = []
if len(text) <= TELEGRAM_MAX_LEN:
chunks = [text]
else:
# Split on newlines to preserve readability
lines = text.splitlines(keepends=True)
current = ""
for line in lines:
if len(current) + len(line) > TELEGRAM_MAX_LEN:
if current:
chunks.append(current)
current = line
else:
current += line
if current:
chunks.append(current)
results = []
for i, chunk in enumerate(chunks):
prefix = f"*(part {i + 1}/{len(chunks)})*\n" if len(chunks) > 1 else ""
payload = {"chat_id": chat_id, "text": prefix + chunk, "parse_mode": "Markdown"}
data = json.dumps(payload).encode()
req = urllib.request.Request(url, data=data, headers={"Content-Type": "application/json"})
with urllib.request.urlopen(req, timeout=30) as resp:
results.append(json.loads(resp.read().decode()))
return results
def find_latest_morning_report_issue(client: GiteaClient) -> Optional[int]: