forked from Rockachopa/Timmy-time-dashboard
Compare commits
8 Commits
kimi/issue
...
kimi/issue
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c5e7dc09ae | ||
| cd3dc5d989 | |||
| e4de539bf3 | |||
| b2057f72e1 | |||
| 5f52dd54c0 | |||
| 9ceffd61d1 | |||
| 015d858be5 | |||
| b6d0b5f999 |
@@ -94,12 +94,17 @@ def extract_cycle_number(title: str) -> int | None:
|
|||||||
return int(m.group(1)) if m else None
|
return int(m.group(1)) if m else None
|
||||||
|
|
||||||
|
|
||||||
def extract_issue_number(title: str, body: str) -> int | None:
|
def extract_issue_number(title: str, body: str, pr_number: int | None = None) -> int | None:
|
||||||
# Try body first (usually has "closes #N")
|
"""Extract the issue number from PR body/title, ignoring the PR number itself.
|
||||||
|
|
||||||
|
Gitea appends "(#N)" to PR titles where N is the PR number — skip that
|
||||||
|
so we don't confuse it with the linked issue.
|
||||||
|
"""
|
||||||
for text in [body or "", title]:
|
for text in [body or "", title]:
|
||||||
m = ISSUE_RE.search(text)
|
for m in ISSUE_RE.finditer(text):
|
||||||
if m:
|
num = int(m.group(1))
|
||||||
return int(m.group(1))
|
if num != pr_number:
|
||||||
|
return num
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -140,7 +145,7 @@ def main():
|
|||||||
else:
|
else:
|
||||||
cycle_counter = max(cycle_counter, cycle)
|
cycle_counter = max(cycle_counter, cycle)
|
||||||
|
|
||||||
issue = extract_issue_number(title, body)
|
issue = extract_issue_number(title, body, pr_number=pr_num)
|
||||||
issue_type = classify_pr(title, body)
|
issue_type = classify_pr(title, body)
|
||||||
duration = estimate_duration(pr)
|
duration = estimate_duration(pr)
|
||||||
diff = get_pr_diff_stats(token, pr_num)
|
diff = get_pr_diff_stats(token, pr_num)
|
||||||
|
|||||||
@@ -4,11 +4,26 @@
|
|||||||
Called after each cycle completes (success or failure).
|
Called after each cycle completes (success or failure).
|
||||||
Appends a structured entry to .loop/retro/cycles.jsonl.
|
Appends a structured entry to .loop/retro/cycles.jsonl.
|
||||||
|
|
||||||
|
EPOCH NOTATION (turnover system):
|
||||||
|
Each cycle carries a symbolic epoch tag alongside the raw integer:
|
||||||
|
|
||||||
|
⟳WW.D:NNN
|
||||||
|
|
||||||
|
⟳ turnover glyph — marks epoch-aware cycles
|
||||||
|
WW ISO week-of-year (01–53)
|
||||||
|
D ISO weekday (1=Mon … 7=Sun)
|
||||||
|
NNN daily cycle counter, zero-padded, resets at midnight UTC
|
||||||
|
|
||||||
|
Example: ⟳12.3:042 — Week 12, Wednesday, 42nd cycle of the day.
|
||||||
|
|
||||||
|
The raw `cycle` integer is preserved for backward compatibility.
|
||||||
|
The `epoch` field carries the symbolic notation.
|
||||||
|
|
||||||
SUCCESS DEFINITION:
|
SUCCESS DEFINITION:
|
||||||
A cycle is only "success" if BOTH conditions are met:
|
A cycle is only "success" if BOTH conditions are met:
|
||||||
1. The hermes process exited cleanly (exit code 0)
|
1. The hermes process exited cleanly (exit code 0)
|
||||||
2. Main is green (smoke test passes on main after merge)
|
2. Main is green (smoke test passes on main after merge)
|
||||||
|
|
||||||
A cycle that merges a PR but leaves main red is a FAILURE.
|
A cycle that merges a PR but leaves main red is a FAILURE.
|
||||||
The --main-green flag records the smoke test result.
|
The --main-green flag records the smoke test result.
|
||||||
|
|
||||||
@@ -29,6 +44,8 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -36,10 +53,68 @@ from pathlib import Path
|
|||||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||||
RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
|
RETRO_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
|
||||||
SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json"
|
SUMMARY_FILE = REPO_ROOT / ".loop" / "retro" / "summary.json"
|
||||||
|
EPOCH_COUNTER_FILE = REPO_ROOT / ".loop" / "retro" / ".epoch_counter"
|
||||||
|
|
||||||
# How many recent entries to include in rolling summary
|
# How many recent entries to include in rolling summary
|
||||||
SUMMARY_WINDOW = 50
|
SUMMARY_WINDOW = 50
|
||||||
|
|
||||||
|
# Branch patterns that encode an issue number, e.g. kimi/issue-492
|
||||||
|
BRANCH_ISSUE_RE = re.compile(r"issue[/-](\d+)", re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
|
def detect_issue_from_branch() -> int | None:
|
||||||
|
"""Try to extract an issue number from the current git branch name."""
|
||||||
|
try:
|
||||||
|
branch = subprocess.check_output(
|
||||||
|
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
||||||
|
stderr=subprocess.DEVNULL,
|
||||||
|
text=True,
|
||||||
|
).strip()
|
||||||
|
except (subprocess.CalledProcessError, FileNotFoundError):
|
||||||
|
return None
|
||||||
|
m = BRANCH_ISSUE_RE.search(branch)
|
||||||
|
return int(m.group(1)) if m else None
|
||||||
|
|
||||||
|
|
||||||
|
# ── Epoch turnover ────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def _epoch_tag(now: datetime | None = None) -> tuple[str, dict]:
|
||||||
|
"""Generate the symbolic epoch tag and advance the daily counter.
|
||||||
|
|
||||||
|
Returns (epoch_string, epoch_parts) where epoch_parts is a dict with
|
||||||
|
week, weekday, daily_n for structured storage.
|
||||||
|
|
||||||
|
The daily counter persists in .epoch_counter as a two-line file:
|
||||||
|
line 1: ISO date (YYYY-MM-DD) of the current epoch day
|
||||||
|
line 2: integer count
|
||||||
|
When the date rolls over, the counter resets to 1.
|
||||||
|
"""
|
||||||
|
if now is None:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
iso_cal = now.isocalendar() # (year, week, weekday)
|
||||||
|
week = iso_cal[1]
|
||||||
|
weekday = iso_cal[2]
|
||||||
|
today_str = now.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
# Read / reset daily counter
|
||||||
|
daily_n = 1
|
||||||
|
EPOCH_COUNTER_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
if EPOCH_COUNTER_FILE.exists():
|
||||||
|
try:
|
||||||
|
lines = EPOCH_COUNTER_FILE.read_text().strip().splitlines()
|
||||||
|
if len(lines) == 2 and lines[0] == today_str:
|
||||||
|
daily_n = int(lines[1]) + 1
|
||||||
|
except (ValueError, IndexError):
|
||||||
|
pass # corrupt file — reset
|
||||||
|
|
||||||
|
# Persist
|
||||||
|
EPOCH_COUNTER_FILE.write_text(f"{today_str}\n{daily_n}\n")
|
||||||
|
|
||||||
|
tag = f"\u27f3{week:02d}.{weekday}:{daily_n:03d}"
|
||||||
|
parts = {"week": week, "weekday": weekday, "daily_n": daily_n}
|
||||||
|
return tag, parts
|
||||||
|
|
||||||
|
|
||||||
def parse_args() -> argparse.Namespace:
|
def parse_args() -> argparse.Namespace:
|
||||||
p = argparse.ArgumentParser(description="Log a cycle retrospective")
|
p = argparse.ArgumentParser(description="Log a cycle retrospective")
|
||||||
@@ -123,8 +198,30 @@ def update_summary() -> None:
|
|||||||
issue_failures[e["issue"]] = issue_failures.get(e["issue"], 0) + 1
|
issue_failures[e["issue"]] = issue_failures.get(e["issue"], 0) + 1
|
||||||
quarantine_candidates = {k: v for k, v in issue_failures.items() if v >= 2}
|
quarantine_candidates = {k: v for k, v in issue_failures.items() if v >= 2}
|
||||||
|
|
||||||
|
# Epoch turnover stats — cycles per week/day from epoch-tagged entries
|
||||||
|
epoch_entries = [e for e in recent if e.get("epoch")]
|
||||||
|
by_week: dict[int, int] = {}
|
||||||
|
by_weekday: dict[int, int] = {}
|
||||||
|
for e in epoch_entries:
|
||||||
|
w = e.get("epoch_week")
|
||||||
|
d = e.get("epoch_weekday")
|
||||||
|
if w is not None:
|
||||||
|
by_week[w] = by_week.get(w, 0) + 1
|
||||||
|
if d is not None:
|
||||||
|
by_weekday[d] = by_weekday.get(d, 0) + 1
|
||||||
|
|
||||||
|
# Current epoch — latest entry's epoch tag
|
||||||
|
current_epoch = epoch_entries[-1].get("epoch", "") if epoch_entries else ""
|
||||||
|
|
||||||
|
# Weekday names for display
|
||||||
|
weekday_glyphs = {1: "Mon", 2: "Tue", 3: "Wed", 4: "Thu",
|
||||||
|
5: "Fri", 6: "Sat", 7: "Sun"}
|
||||||
|
by_weekday_named = {weekday_glyphs.get(k, str(k)): v
|
||||||
|
for k, v in sorted(by_weekday.items())}
|
||||||
|
|
||||||
summary = {
|
summary = {
|
||||||
"updated_at": datetime.now(timezone.utc).isoformat(),
|
"updated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"current_epoch": current_epoch,
|
||||||
"window": len(recent),
|
"window": len(recent),
|
||||||
"measured_cycles": len(measured),
|
"measured_cycles": len(measured),
|
||||||
"total_cycles": len(entries),
|
"total_cycles": len(entries),
|
||||||
@@ -136,9 +233,12 @@ def update_summary() -> None:
|
|||||||
"total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),
|
"total_lines_removed": sum(e.get("lines_removed", 0) for e in recent),
|
||||||
"total_prs_merged": sum(1 for e in recent if e.get("pr")),
|
"total_prs_merged": sum(1 for e in recent if e.get("pr")),
|
||||||
"by_type": type_stats,
|
"by_type": type_stats,
|
||||||
|
"by_week": dict(sorted(by_week.items())),
|
||||||
|
"by_weekday": by_weekday_named,
|
||||||
"quarantine_candidates": quarantine_candidates,
|
"quarantine_candidates": quarantine_candidates,
|
||||||
"recent_failures": [
|
"recent_failures": [
|
||||||
{"cycle": e["cycle"], "issue": e.get("issue"), "reason": e.get("reason", "")}
|
{"cycle": e["cycle"], "epoch": e.get("epoch", ""),
|
||||||
|
"issue": e.get("issue"), "reason": e.get("reason", "")}
|
||||||
for e in failures[-5:]
|
for e in failures[-5:]
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
@@ -149,6 +249,10 @@ def update_summary() -> None:
|
|||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
|
# Auto-detect issue from branch when not explicitly provided
|
||||||
|
if args.issue is None:
|
||||||
|
args.issue = detect_issue_from_branch()
|
||||||
|
|
||||||
# Reject idle cycles — no issue and no duration means nothing happened
|
# Reject idle cycles — no issue and no duration means nothing happened
|
||||||
if not args.issue and args.duration == 0:
|
if not args.issue and args.duration == 0:
|
||||||
print(f"[retro] Cycle {args.cycle} skipped — idle (no issue, no duration)")
|
print(f"[retro] Cycle {args.cycle} skipped — idle (no issue, no duration)")
|
||||||
@@ -157,9 +261,17 @@ def main() -> None:
|
|||||||
# A cycle is only truly successful if hermes exited clean AND main is green
|
# A cycle is only truly successful if hermes exited clean AND main is green
|
||||||
truly_success = args.success and args.main_green
|
truly_success = args.success and args.main_green
|
||||||
|
|
||||||
|
# Generate epoch turnover tag
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
epoch_tag, epoch_parts = _epoch_tag(now)
|
||||||
|
|
||||||
entry = {
|
entry = {
|
||||||
"timestamp": datetime.now(timezone.utc).isoformat(),
|
"timestamp": now.isoformat(),
|
||||||
"cycle": args.cycle,
|
"cycle": args.cycle,
|
||||||
|
"epoch": epoch_tag,
|
||||||
|
"epoch_week": epoch_parts["week"],
|
||||||
|
"epoch_weekday": epoch_parts["weekday"],
|
||||||
|
"epoch_daily_n": epoch_parts["daily_n"],
|
||||||
"issue": args.issue,
|
"issue": args.issue,
|
||||||
"type": args.type,
|
"type": args.type,
|
||||||
"success": truly_success,
|
"success": truly_success,
|
||||||
@@ -184,7 +296,7 @@ def main() -> None:
|
|||||||
update_summary()
|
update_summary()
|
||||||
|
|
||||||
status = "✓ SUCCESS" if args.success else "✗ FAILURE"
|
status = "✓ SUCCESS" if args.success else "✗ FAILURE"
|
||||||
print(f"[retro] Cycle {args.cycle} {status}", end="")
|
print(f"[retro] {epoch_tag} Cycle {args.cycle} {status}", end="")
|
||||||
if args.issue:
|
if args.issue:
|
||||||
print(f" (#{args.issue} {args.type})", end="")
|
print(f" (#{args.issue} {args.type})", end="")
|
||||||
if args.duration:
|
if args.duration:
|
||||||
|
|||||||
407
scripts/loop_introspect.py
Normal file
407
scripts/loop_introspect.py
Normal file
@@ -0,0 +1,407 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Loop introspection — the self-improvement engine.
|
||||||
|
|
||||||
|
Analyzes retro data across time windows to detect trends, extract patterns,
|
||||||
|
and produce structured recommendations. Output is consumed by deep_triage
|
||||||
|
and injected into the loop prompt context.
|
||||||
|
|
||||||
|
This is the piece that closes the feedback loop:
|
||||||
|
cycle_retro → introspect → deep_triage → loop behavior changes
|
||||||
|
|
||||||
|
Run: python3 scripts/loop_introspect.py
|
||||||
|
Output: .loop/retro/insights.json (structured insights + recommendations)
|
||||||
|
Prints human-readable summary to stdout.
|
||||||
|
|
||||||
|
Called by: deep_triage.sh (before the LLM triage), timmy-loop.sh (every 50 cycles)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from collections import defaultdict
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||||
|
CYCLES_FILE = REPO_ROOT / ".loop" / "retro" / "cycles.jsonl"
|
||||||
|
DEEP_TRIAGE_FILE = REPO_ROOT / ".loop" / "retro" / "deep-triage.jsonl"
|
||||||
|
TRIAGE_FILE = REPO_ROOT / ".loop" / "retro" / "triage.jsonl"
|
||||||
|
QUARANTINE_FILE = REPO_ROOT / ".loop" / "quarantine.json"
|
||||||
|
INSIGHTS_FILE = REPO_ROOT / ".loop" / "retro" / "insights.json"
|
||||||
|
|
||||||
|
# ── Helpers ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def load_jsonl(path: Path) -> list[dict]:
|
||||||
|
"""Load a JSONL file, skipping bad lines."""
|
||||||
|
if not path.exists():
|
||||||
|
return []
|
||||||
|
entries = []
|
||||||
|
for line in path.read_text().strip().splitlines():
|
||||||
|
try:
|
||||||
|
entries.append(json.loads(line))
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
continue
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
|
def parse_ts(ts_str: str) -> datetime | None:
|
||||||
|
"""Parse an ISO timestamp, tolerating missing tz."""
|
||||||
|
if not ts_str:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
||||||
|
if dt.tzinfo is None:
|
||||||
|
dt = dt.replace(tzinfo=timezone.utc)
|
||||||
|
return dt
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def window(entries: list[dict], days: int) -> list[dict]:
|
||||||
|
"""Filter entries to the last N days."""
|
||||||
|
cutoff = datetime.now(timezone.utc) - timedelta(days=days)
|
||||||
|
result = []
|
||||||
|
for e in entries:
|
||||||
|
ts = parse_ts(e.get("timestamp", ""))
|
||||||
|
if ts and ts >= cutoff:
|
||||||
|
result.append(e)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ── Analysis functions ───────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def compute_trends(cycles: list[dict]) -> dict:
|
||||||
|
"""Compare recent window (last 7d) vs older window (7-14d ago)."""
|
||||||
|
recent = window(cycles, 7)
|
||||||
|
older = window(cycles, 14)
|
||||||
|
# Remove recent from older to get the 7-14d window
|
||||||
|
recent_set = {(e.get("cycle"), e.get("timestamp")) for e in recent}
|
||||||
|
older = [e for e in older if (e.get("cycle"), e.get("timestamp")) not in recent_set]
|
||||||
|
|
||||||
|
def stats(entries):
|
||||||
|
if not entries:
|
||||||
|
return {"count": 0, "success_rate": None, "avg_duration": None,
|
||||||
|
"lines_net": 0, "prs_merged": 0}
|
||||||
|
successes = sum(1 for e in entries if e.get("success"))
|
||||||
|
durations = [e["duration"] for e in entries if e.get("duration", 0) > 0]
|
||||||
|
return {
|
||||||
|
"count": len(entries),
|
||||||
|
"success_rate": round(successes / len(entries), 3) if entries else None,
|
||||||
|
"avg_duration": round(sum(durations) / len(durations)) if durations else None,
|
||||||
|
"lines_net": sum(e.get("lines_added", 0) - e.get("lines_removed", 0) for e in entries),
|
||||||
|
"prs_merged": sum(1 for e in entries if e.get("pr")),
|
||||||
|
}
|
||||||
|
|
||||||
|
recent_stats = stats(recent)
|
||||||
|
older_stats = stats(older)
|
||||||
|
|
||||||
|
trend = {
|
||||||
|
"recent_7d": recent_stats,
|
||||||
|
"previous_7d": older_stats,
|
||||||
|
"velocity_change": None,
|
||||||
|
"success_rate_change": None,
|
||||||
|
"duration_change": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
if recent_stats["count"] and older_stats["count"]:
|
||||||
|
trend["velocity_change"] = recent_stats["count"] - older_stats["count"]
|
||||||
|
if recent_stats["success_rate"] is not None and older_stats["success_rate"] is not None:
|
||||||
|
trend["success_rate_change"] = round(
|
||||||
|
recent_stats["success_rate"] - older_stats["success_rate"], 3
|
||||||
|
)
|
||||||
|
if recent_stats["avg_duration"] is not None and older_stats["avg_duration"] is not None:
|
||||||
|
trend["duration_change"] = recent_stats["avg_duration"] - older_stats["avg_duration"]
|
||||||
|
|
||||||
|
return trend
|
||||||
|
|
||||||
|
|
||||||
|
def type_analysis(cycles: list[dict]) -> dict:
|
||||||
|
"""Per-type success rates and durations."""
|
||||||
|
by_type: dict[str, list[dict]] = defaultdict(list)
|
||||||
|
for c in cycles:
|
||||||
|
by_type[c.get("type", "unknown")].append(c)
|
||||||
|
|
||||||
|
result = {}
|
||||||
|
for t, entries in by_type.items():
|
||||||
|
durations = [e["duration"] for e in entries if e.get("duration", 0) > 0]
|
||||||
|
successes = sum(1 for e in entries if e.get("success"))
|
||||||
|
result[t] = {
|
||||||
|
"count": len(entries),
|
||||||
|
"success_rate": round(successes / len(entries), 3) if entries else 0,
|
||||||
|
"avg_duration": round(sum(durations) / len(durations)) if durations else 0,
|
||||||
|
"max_duration": max(durations) if durations else 0,
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def repeat_failures(cycles: list[dict]) -> list[dict]:
|
||||||
|
"""Issues that have failed multiple times — quarantine candidates."""
|
||||||
|
failures: dict[int, list] = defaultdict(list)
|
||||||
|
for c in cycles:
|
||||||
|
if not c.get("success") and c.get("issue"):
|
||||||
|
failures[c["issue"]].append({
|
||||||
|
"cycle": c.get("cycle"),
|
||||||
|
"reason": c.get("reason", ""),
|
||||||
|
"duration": c.get("duration", 0),
|
||||||
|
})
|
||||||
|
# Only issues with 2+ failures
|
||||||
|
return [
|
||||||
|
{"issue": k, "failure_count": len(v), "attempts": v}
|
||||||
|
for k, v in sorted(failures.items(), key=lambda x: -len(x[1]))
|
||||||
|
if len(v) >= 2
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def duration_outliers(cycles: list[dict], threshold_multiple: float = 3.0) -> list[dict]:
|
||||||
|
"""Cycles that took way longer than average — something went wrong."""
|
||||||
|
durations = [c["duration"] for c in cycles if c.get("duration", 0) > 0]
|
||||||
|
if len(durations) < 5:
|
||||||
|
return []
|
||||||
|
avg = sum(durations) / len(durations)
|
||||||
|
threshold = avg * threshold_multiple
|
||||||
|
|
||||||
|
outliers = []
|
||||||
|
for c in cycles:
|
||||||
|
dur = c.get("duration", 0)
|
||||||
|
if dur > threshold:
|
||||||
|
outliers.append({
|
||||||
|
"cycle": c.get("cycle"),
|
||||||
|
"issue": c.get("issue"),
|
||||||
|
"type": c.get("type"),
|
||||||
|
"duration": dur,
|
||||||
|
"avg_duration": round(avg),
|
||||||
|
"multiple": round(dur / avg, 1) if avg > 0 else 0,
|
||||||
|
"reason": c.get("reason", ""),
|
||||||
|
})
|
||||||
|
return outliers
|
||||||
|
|
||||||
|
|
||||||
|
def triage_effectiveness(deep_triages: list[dict]) -> dict:
|
||||||
|
"""How well is the deep triage performing?"""
|
||||||
|
if not deep_triages:
|
||||||
|
return {"runs": 0, "note": "No deep triage data yet"}
|
||||||
|
|
||||||
|
total_reviewed = sum(d.get("issues_reviewed", 0) for d in deep_triages)
|
||||||
|
total_refined = sum(len(d.get("issues_refined", [])) for d in deep_triages)
|
||||||
|
total_created = sum(len(d.get("issues_created", [])) for d in deep_triages)
|
||||||
|
total_closed = sum(len(d.get("issues_closed", [])) for d in deep_triages)
|
||||||
|
timmy_available = sum(1 for d in deep_triages if d.get("timmy_available"))
|
||||||
|
|
||||||
|
# Extract Timmy's feedback themes
|
||||||
|
timmy_themes = []
|
||||||
|
for d in deep_triages:
|
||||||
|
fb = d.get("timmy_feedback", "")
|
||||||
|
if fb:
|
||||||
|
timmy_themes.append(fb[:200])
|
||||||
|
|
||||||
|
return {
|
||||||
|
"runs": len(deep_triages),
|
||||||
|
"total_reviewed": total_reviewed,
|
||||||
|
"total_refined": total_refined,
|
||||||
|
"total_created": total_created,
|
||||||
|
"total_closed": total_closed,
|
||||||
|
"timmy_consultation_rate": round(timmy_available / len(deep_triages), 2),
|
||||||
|
"timmy_recent_feedback": timmy_themes[-1] if timmy_themes else "",
|
||||||
|
"timmy_feedback_history": timmy_themes,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def generate_recommendations(
|
||||||
|
trends: dict,
|
||||||
|
types: dict,
|
||||||
|
repeats: list,
|
||||||
|
outliers: list,
|
||||||
|
triage_eff: dict,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Produce actionable recommendations from the analysis."""
|
||||||
|
recs = []
|
||||||
|
|
||||||
|
# 1. Success rate declining?
|
||||||
|
src = trends.get("success_rate_change")
|
||||||
|
if src is not None and src < -0.1:
|
||||||
|
recs.append({
|
||||||
|
"severity": "high",
|
||||||
|
"category": "reliability",
|
||||||
|
"finding": f"Success rate dropped {abs(src)*100:.0f}pp in the last 7 days",
|
||||||
|
"recommendation": "Review recent failures. Are issues poorly scoped? "
|
||||||
|
"Is main unstable? Check if triage is producing bad work items.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 2. Velocity dropping?
|
||||||
|
vc = trends.get("velocity_change")
|
||||||
|
if vc is not None and vc < -5:
|
||||||
|
recs.append({
|
||||||
|
"severity": "medium",
|
||||||
|
"category": "throughput",
|
||||||
|
"finding": f"Velocity dropped by {abs(vc)} cycles vs previous week",
|
||||||
|
"recommendation": "Check for loop stalls, long-running cycles, or queue starvation.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 3. Duration creep?
|
||||||
|
dc = trends.get("duration_change")
|
||||||
|
if dc is not None and dc > 120: # 2+ minutes longer
|
||||||
|
recs.append({
|
||||||
|
"severity": "medium",
|
||||||
|
"category": "efficiency",
|
||||||
|
"finding": f"Average cycle duration increased by {dc}s vs previous week",
|
||||||
|
"recommendation": "Issues may be growing in scope. Enforce tighter decomposition "
|
||||||
|
"in deep triage. Check if tests are getting slower.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 4. Type-specific problems
|
||||||
|
for t, info in types.items():
|
||||||
|
if info["count"] >= 3 and info["success_rate"] < 0.5:
|
||||||
|
recs.append({
|
||||||
|
"severity": "high",
|
||||||
|
"category": "type_reliability",
|
||||||
|
"finding": f"'{t}' issues fail {(1-info['success_rate'])*100:.0f}% of the time "
|
||||||
|
f"({info['count']} attempts)",
|
||||||
|
"recommendation": f"'{t}' issues need better scoping or different approach. "
|
||||||
|
f"Consider: tighter acceptance criteria, smaller scope, "
|
||||||
|
f"or delegating to Kimi with more context.",
|
||||||
|
})
|
||||||
|
if info["avg_duration"] > 600 and info["count"] >= 3: # >10 min avg
|
||||||
|
recs.append({
|
||||||
|
"severity": "medium",
|
||||||
|
"category": "type_efficiency",
|
||||||
|
"finding": f"'{t}' issues average {info['avg_duration']//60}m{info['avg_duration']%60}s "
|
||||||
|
f"(max {info['max_duration']//60}m)",
|
||||||
|
"recommendation": f"Break '{t}' issues into smaller pieces. Target <5 min per cycle.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 5. Repeat failures
|
||||||
|
for rf in repeats[:3]:
|
||||||
|
recs.append({
|
||||||
|
"severity": "high",
|
||||||
|
"category": "repeat_failure",
|
||||||
|
"finding": f"Issue #{rf['issue']} has failed {rf['failure_count']} times",
|
||||||
|
"recommendation": "Quarantine or rewrite this issue. Repeated failure = "
|
||||||
|
"bad scope or missing prerequisite.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 6. Outliers
|
||||||
|
if len(outliers) > 2:
|
||||||
|
recs.append({
|
||||||
|
"severity": "medium",
|
||||||
|
"category": "outliers",
|
||||||
|
"finding": f"{len(outliers)} cycles took {outliers[0].get('multiple', '?')}x+ "
|
||||||
|
f"longer than average",
|
||||||
|
"recommendation": "Long cycles waste resources. Add timeout enforcement or "
|
||||||
|
"break complex issues earlier.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 7. Code growth
|
||||||
|
recent = trends.get("recent_7d", {})
|
||||||
|
net = recent.get("lines_net", 0)
|
||||||
|
if net > 500:
|
||||||
|
recs.append({
|
||||||
|
"severity": "low",
|
||||||
|
"category": "code_health",
|
||||||
|
"finding": f"Net +{net} lines added in the last 7 days",
|
||||||
|
"recommendation": "Lines of code is a liability. Balance feature work with "
|
||||||
|
"refactoring. Target net-zero or negative line growth.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# 8. Triage health
|
||||||
|
if triage_eff.get("runs", 0) == 0:
|
||||||
|
recs.append({
|
||||||
|
"severity": "high",
|
||||||
|
"category": "triage",
|
||||||
|
"finding": "Deep triage has never run",
|
||||||
|
"recommendation": "Enable deep triage (every 20 cycles). The loop needs "
|
||||||
|
"LLM-driven issue refinement to stay effective.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# No recommendations = things are healthy
|
||||||
|
if not recs:
|
||||||
|
recs.append({
|
||||||
|
"severity": "info",
|
||||||
|
"category": "health",
|
||||||
|
"finding": "No significant issues detected",
|
||||||
|
"recommendation": "System is healthy. Continue current patterns.",
|
||||||
|
})
|
||||||
|
|
||||||
|
return recs
|
||||||
|
|
||||||
|
|
||||||
|
# ── Main ─────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
cycles = load_jsonl(CYCLES_FILE)
|
||||||
|
deep_triages = load_jsonl(DEEP_TRIAGE_FILE)
|
||||||
|
|
||||||
|
if not cycles:
|
||||||
|
print("[introspect] No cycle data found. Nothing to analyze.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Run all analyses
|
||||||
|
trends = compute_trends(cycles)
|
||||||
|
types = type_analysis(cycles)
|
||||||
|
repeats = repeat_failures(cycles)
|
||||||
|
outliers = duration_outliers(cycles)
|
||||||
|
triage_eff = triage_effectiveness(deep_triages)
|
||||||
|
recommendations = generate_recommendations(trends, types, repeats, outliers, triage_eff)
|
||||||
|
|
||||||
|
insights = {
|
||||||
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
||||||
|
"total_cycles_analyzed": len(cycles),
|
||||||
|
"trends": trends,
|
||||||
|
"by_type": types,
|
||||||
|
"repeat_failures": repeats[:5],
|
||||||
|
"duration_outliers": outliers[:5],
|
||||||
|
"triage_effectiveness": triage_eff,
|
||||||
|
"recommendations": recommendations,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Write insights
|
||||||
|
INSIGHTS_FILE.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
INSIGHTS_FILE.write_text(json.dumps(insights, indent=2) + "\n")
|
||||||
|
|
||||||
|
# Current epoch from latest entry
|
||||||
|
latest_epoch = ""
|
||||||
|
for c in reversed(cycles):
|
||||||
|
if c.get("epoch"):
|
||||||
|
latest_epoch = c["epoch"]
|
||||||
|
break
|
||||||
|
|
||||||
|
# Human-readable output
|
||||||
|
header = f"[introspect] Analyzed {len(cycles)} cycles"
|
||||||
|
if latest_epoch:
|
||||||
|
header += f" · current epoch: {latest_epoch}"
|
||||||
|
print(header)
|
||||||
|
|
||||||
|
print(f"\n TRENDS (7d vs previous 7d):")
|
||||||
|
r7 = trends["recent_7d"]
|
||||||
|
p7 = trends["previous_7d"]
|
||||||
|
print(f" Cycles: {r7['count']:>3d} (was {p7['count']})")
|
||||||
|
if r7["success_rate"] is not None:
|
||||||
|
arrow = "↑" if (trends["success_rate_change"] or 0) > 0 else "↓" if (trends["success_rate_change"] or 0) < 0 else "→"
|
||||||
|
print(f" Success rate: {r7['success_rate']*100:>4.0f}% {arrow}")
|
||||||
|
if r7["avg_duration"] is not None:
|
||||||
|
print(f" Avg duration: {r7['avg_duration']//60}m{r7['avg_duration']%60:02d}s")
|
||||||
|
print(f" PRs merged: {r7['prs_merged']:>3d} (was {p7['prs_merged']})")
|
||||||
|
print(f" Lines net: {r7['lines_net']:>+5d}")
|
||||||
|
|
||||||
|
print(f"\n BY TYPE:")
|
||||||
|
for t, info in sorted(types.items(), key=lambda x: -x[1]["count"]):
|
||||||
|
print(f" {t:12s} n={info['count']:>2d} "
|
||||||
|
f"ok={info['success_rate']*100:>3.0f}% "
|
||||||
|
f"avg={info['avg_duration']//60}m{info['avg_duration']%60:02d}s")
|
||||||
|
|
||||||
|
if repeats:
|
||||||
|
print(f"\n REPEAT FAILURES:")
|
||||||
|
for rf in repeats[:3]:
|
||||||
|
print(f" #{rf['issue']} failed {rf['failure_count']}x")
|
||||||
|
|
||||||
|
print(f"\n RECOMMENDATIONS ({len(recommendations)}):")
|
||||||
|
for i, rec in enumerate(recommendations, 1):
|
||||||
|
sev = {"high": "🔴", "medium": "🟡", "low": "🟢", "info": "ℹ️ "}.get(rec["severity"], "?")
|
||||||
|
print(f" {sev} {rec['finding']}")
|
||||||
|
print(f" → {rec['recommendation']}")
|
||||||
|
|
||||||
|
print(f"\n Written to: {INSIGHTS_FILE}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -10,6 +10,11 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
|
|||||||
APP_START_TIME: _datetime = _datetime.now(UTC)
|
APP_START_TIME: _datetime = _datetime.now(UTC)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_ollama_url(url: str) -> str:
|
||||||
|
"""Replace localhost with 127.0.0.1 to avoid IPv6 resolution delays."""
|
||||||
|
return url.replace("localhost", "127.0.0.1")
|
||||||
|
|
||||||
|
|
||||||
class Settings(BaseSettings):
|
class Settings(BaseSettings):
|
||||||
"""Central configuration — all env-var access goes through this class."""
|
"""Central configuration — all env-var access goes through this class."""
|
||||||
|
|
||||||
@@ -19,6 +24,11 @@ class Settings(BaseSettings):
|
|||||||
# Ollama host — override with OLLAMA_URL env var or .env file
|
# Ollama host — override with OLLAMA_URL env var or .env file
|
||||||
ollama_url: str = "http://localhost:11434"
|
ollama_url: str = "http://localhost:11434"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def normalized_ollama_url(self) -> str:
|
||||||
|
"""Return ollama_url with localhost replaced by 127.0.0.1."""
|
||||||
|
return normalize_ollama_url(self.ollama_url)
|
||||||
|
|
||||||
# LLM model passed to Agno/Ollama — override with OLLAMA_MODEL
|
# LLM model passed to Agno/Ollama — override with OLLAMA_MODEL
|
||||||
# qwen3:30b is the primary model — better reasoning and tool calling
|
# qwen3:30b is the primary model — better reasoning and tool calling
|
||||||
# than llama3.1:8b-instruct while still running locally on modest hardware.
|
# than llama3.1:8b-instruct while still running locally on modest hardware.
|
||||||
@@ -392,7 +402,7 @@ def check_ollama_model_available(model_name: str) -> bool:
|
|||||||
import json
|
import json
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
url = settings.ollama_url.replace("localhost", "127.0.0.1")
|
url = settings.normalized_ollama_url
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
f"{url}/api/tags",
|
f"{url}/api/tags",
|
||||||
method="GET",
|
method="GET",
|
||||||
|
|||||||
@@ -329,33 +329,21 @@ async def _discord_token_watcher() -> None:
|
|||||||
logger.warning("Discord auto-start failed: %s", exc)
|
logger.warning("Discord auto-start failed: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
@asynccontextmanager
|
def _init_services() -> None:
|
||||||
async def lifespan(app: FastAPI):
|
"""Validate config, enable event persistence, and init Spark engine."""
|
||||||
"""Application lifespan manager with non-blocking startup."""
|
|
||||||
|
|
||||||
# Validate security config (no-op in test mode)
|
|
||||||
from config import validate_startup
|
from config import validate_startup
|
||||||
|
from infrastructure.events.bus import init_event_bus_persistence
|
||||||
|
from spark.engine import get_spark_engine
|
||||||
|
|
||||||
validate_startup()
|
validate_startup()
|
||||||
|
|
||||||
# Enable event persistence (unified EventBus + swarm event_log)
|
|
||||||
from infrastructure.events.bus import init_event_bus_persistence
|
|
||||||
|
|
||||||
init_event_bus_persistence()
|
init_event_bus_persistence()
|
||||||
|
|
||||||
# Create all background tasks without waiting for them
|
|
||||||
briefing_task = asyncio.create_task(_briefing_scheduler())
|
|
||||||
thinking_task = asyncio.create_task(_thinking_scheduler())
|
|
||||||
loop_qa_task = asyncio.create_task(_loop_qa_scheduler())
|
|
||||||
presence_task = asyncio.create_task(_presence_watcher())
|
|
||||||
|
|
||||||
# Initialize Spark Intelligence engine
|
|
||||||
from spark.engine import get_spark_engine
|
|
||||||
|
|
||||||
if get_spark_engine().enabled:
|
if get_spark_engine().enabled:
|
||||||
logger.info("Spark Intelligence active — event capture enabled")
|
logger.info("Spark Intelligence active — event capture enabled")
|
||||||
|
|
||||||
# Auto-prune old vector store memories on startup
|
|
||||||
|
def _auto_prune() -> None:
|
||||||
|
"""Run startup housekeeping: prune memories, thoughts, events, and check vault size."""
|
||||||
if settings.memory_prune_days > 0:
|
if settings.memory_prune_days > 0:
|
||||||
try:
|
try:
|
||||||
from timmy.memory_system import prune_memories
|
from timmy.memory_system import prune_memories
|
||||||
@@ -373,7 +361,6 @@ async def lifespan(app: FastAPI):
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("Memory auto-prune skipped: %s", exc)
|
logger.debug("Memory auto-prune skipped: %s", exc)
|
||||||
|
|
||||||
# Auto-prune old thoughts on startup
|
|
||||||
if settings.thoughts_prune_days > 0:
|
if settings.thoughts_prune_days > 0:
|
||||||
try:
|
try:
|
||||||
from timmy.thinking import thinking_engine
|
from timmy.thinking import thinking_engine
|
||||||
@@ -391,7 +378,6 @@ async def lifespan(app: FastAPI):
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("Thought auto-prune skipped: %s", exc)
|
logger.debug("Thought auto-prune skipped: %s", exc)
|
||||||
|
|
||||||
# Auto-prune old system events on startup
|
|
||||||
if settings.events_prune_days > 0:
|
if settings.events_prune_days > 0:
|
||||||
try:
|
try:
|
||||||
from swarm.event_log import prune_old_events
|
from swarm.event_log import prune_old_events
|
||||||
@@ -409,7 +395,6 @@ async def lifespan(app: FastAPI):
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("Event auto-prune skipped: %s", exc)
|
logger.debug("Event auto-prune skipped: %s", exc)
|
||||||
|
|
||||||
# Warn if memory vault exceeds size limit
|
|
||||||
if settings.memory_vault_max_mb > 0:
|
if settings.memory_vault_max_mb > 0:
|
||||||
try:
|
try:
|
||||||
vault_path = Path(settings.repo_root) / "memory" / "notes"
|
vault_path = Path(settings.repo_root) / "memory" / "notes"
|
||||||
@@ -425,6 +410,60 @@ async def lifespan(app: FastAPI):
|
|||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logger.debug("Vault size check skipped: %s", exc)
|
logger.debug("Vault size check skipped: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
def _register_error_recorder() -> None:
|
||||||
|
"""Wire the session logger into the error-capture system."""
|
||||||
|
try:
|
||||||
|
from infrastructure.error_capture import register_error_recorder
|
||||||
|
from timmy.session_logger import get_session_logger
|
||||||
|
|
||||||
|
register_error_recorder(get_session_logger().record_error)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("Failed to register error recorder")
|
||||||
|
|
||||||
|
|
||||||
|
async def _shutdown(
|
||||||
|
tasks: list[asyncio.Task],
|
||||||
|
workshop_heartbeat: object,
|
||||||
|
) -> None:
|
||||||
|
"""Stop integrations, close sessions, and cancel background tasks."""
|
||||||
|
from integrations.chat_bridge.vendors.discord import discord_bot
|
||||||
|
from integrations.telegram_bot.bot import telegram_bot
|
||||||
|
|
||||||
|
await discord_bot.stop()
|
||||||
|
await telegram_bot.stop()
|
||||||
|
|
||||||
|
try:
|
||||||
|
from timmy.mcp_tools import close_mcp_sessions
|
||||||
|
|
||||||
|
await close_mcp_sessions()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.debug("MCP shutdown: %s", exc)
|
||||||
|
|
||||||
|
await workshop_heartbeat.stop() # type: ignore[union-attr]
|
||||||
|
|
||||||
|
for task in tasks:
|
||||||
|
task.cancel()
|
||||||
|
try:
|
||||||
|
await task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@asynccontextmanager
|
||||||
|
async def lifespan(app: FastAPI):
|
||||||
|
"""Application lifespan manager with non-blocking startup."""
|
||||||
|
_init_services()
|
||||||
|
_auto_prune()
|
||||||
|
|
||||||
|
# Create all background tasks without waiting for them
|
||||||
|
bg_tasks = [
|
||||||
|
asyncio.create_task(_briefing_scheduler()),
|
||||||
|
asyncio.create_task(_thinking_scheduler()),
|
||||||
|
asyncio.create_task(_loop_qa_scheduler()),
|
||||||
|
asyncio.create_task(_presence_watcher()),
|
||||||
|
]
|
||||||
|
|
||||||
# Start Workshop presence heartbeat with WS relay
|
# Start Workshop presence heartbeat with WS relay
|
||||||
from dashboard.routes.world import broadcast_world_state
|
from dashboard.routes.world import broadcast_world_state
|
||||||
from timmy.workshop_state import WorkshopHeartbeat
|
from timmy.workshop_state import WorkshopHeartbeat
|
||||||
@@ -433,45 +472,14 @@ async def lifespan(app: FastAPI):
|
|||||||
await workshop_heartbeat.start()
|
await workshop_heartbeat.start()
|
||||||
|
|
||||||
# Start chat integrations in background
|
# Start chat integrations in background
|
||||||
chat_task = asyncio.create_task(_start_chat_integrations_background())
|
bg_tasks.append(asyncio.create_task(_start_chat_integrations_background()))
|
||||||
|
|
||||||
# Register session logger with error capture (breaks infrastructure → timmy circular dep)
|
|
||||||
try:
|
|
||||||
from infrastructure.error_capture import register_error_recorder
|
|
||||||
from timmy.session_logger import get_session_logger
|
|
||||||
|
|
||||||
register_error_recorder(get_session_logger().record_error)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
_register_error_recorder()
|
||||||
logger.info("✓ Dashboard ready for requests")
|
logger.info("✓ Dashboard ready for requests")
|
||||||
|
|
||||||
yield
|
yield
|
||||||
|
|
||||||
# Cleanup on shutdown
|
await _shutdown(bg_tasks, workshop_heartbeat)
|
||||||
from integrations.chat_bridge.vendors.discord import discord_bot
|
|
||||||
from integrations.telegram_bot.bot import telegram_bot
|
|
||||||
|
|
||||||
await discord_bot.stop()
|
|
||||||
await telegram_bot.stop()
|
|
||||||
|
|
||||||
# Close MCP tool server sessions
|
|
||||||
try:
|
|
||||||
from timmy.mcp_tools import close_mcp_sessions
|
|
||||||
|
|
||||||
await close_mcp_sessions()
|
|
||||||
except Exception as exc:
|
|
||||||
logger.debug("MCP shutdown: %s", exc)
|
|
||||||
|
|
||||||
await workshop_heartbeat.stop()
|
|
||||||
|
|
||||||
for task in [briefing_task, thinking_task, chat_task, loop_qa_task, presence_task]:
|
|
||||||
if task:
|
|
||||||
task.cancel()
|
|
||||||
try:
|
|
||||||
await task
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
app = FastAPI(
|
app = FastAPI(
|
||||||
|
|||||||
@@ -91,7 +91,7 @@ async def chat_agent(request: Request, message: str = Form(...)):
|
|||||||
|
|
||||||
thinking_engine.record_user_input()
|
thinking_engine.record_user_input()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
logger.debug("Failed to record user input for thinking engine")
|
||||||
|
|
||||||
timestamp = datetime.now().strftime("%H:%M:%S")
|
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||||
response_text = None
|
response_text = None
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ async def api_chat(request: Request):
|
|||||||
|
|
||||||
thinking_engine.record_user_input()
|
thinking_engine.record_user_input()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
logger.debug("Failed to record user input for thinking engine")
|
||||||
|
|
||||||
timestamp = datetime.now().strftime("%H:%M:%S")
|
timestamp = datetime.now().strftime("%H:%M:%S")
|
||||||
|
|
||||||
|
|||||||
@@ -65,7 +65,7 @@ def _check_ollama_sync() -> DependencyStatus:
|
|||||||
try:
|
try:
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
url = settings.ollama_url.replace("localhost", "127.0.0.1")
|
url = settings.normalized_ollama_url
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
f"{url}/api/tags",
|
f"{url}/api/tags",
|
||||||
method="GET",
|
method="GET",
|
||||||
|
|||||||
@@ -166,7 +166,7 @@ async def api_briefing_status():
|
|||||||
if cached:
|
if cached:
|
||||||
last_generated = cached.generated_at.isoformat()
|
last_generated = cached.generated_at.isoformat()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
logger.debug("Failed to read briefing cache")
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{
|
{
|
||||||
@@ -190,6 +190,7 @@ async def api_memory_status():
|
|||||||
stats = get_memory_stats()
|
stats = get_memory_stats()
|
||||||
indexed_files = stats.get("total_entries", 0)
|
indexed_files = stats.get("total_entries", 0)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
logger.debug("Failed to get memory stats")
|
||||||
indexed_files = 0
|
indexed_files = 0
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
@@ -215,7 +216,7 @@ async def api_swarm_status():
|
|||||||
).fetchone()
|
).fetchone()
|
||||||
pending_tasks = row["cnt"] if row else 0
|
pending_tasks = row["cnt"] if row else 0
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
logger.debug("Failed to count pending tasks")
|
||||||
|
|
||||||
return JSONResponse(
|
return JSONResponse(
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -221,7 +221,7 @@ async def _heartbeat(websocket: WebSocket) -> None:
|
|||||||
await asyncio.sleep(_HEARTBEAT_INTERVAL)
|
await asyncio.sleep(_HEARTBEAT_INTERVAL)
|
||||||
await websocket.send_text(json.dumps({"type": "ping"}))
|
await websocket.send_text(json.dumps({"type": "ping"}))
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # connection gone — receive loop will clean up
|
logger.debug("Heartbeat stopped — connection gone")
|
||||||
|
|
||||||
|
|
||||||
@router.websocket("/ws")
|
@router.websocket("/ws")
|
||||||
@@ -250,7 +250,7 @@ async def world_ws(websocket: WebSocket) -> None:
|
|||||||
raw = await websocket.receive_text()
|
raw = await websocket.receive_text()
|
||||||
await _handle_client_message(raw)
|
await _handle_client_message(raw)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
logger.debug("WebSocket receive loop ended")
|
||||||
finally:
|
finally:
|
||||||
ping_task.cancel()
|
ping_task.cancel()
|
||||||
if websocket in _ws_clients:
|
if websocket in _ws_clients:
|
||||||
@@ -265,6 +265,7 @@ async def _broadcast(message: str) -> None:
|
|||||||
try:
|
try:
|
||||||
await ws.send_text(message)
|
await ws.send_text(message)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
logger.debug("Pruning dead WebSocket client")
|
||||||
dead.append(ws)
|
dead.append(ws)
|
||||||
for ws in dead:
|
for ws in dead:
|
||||||
if ws in _ws_clients:
|
if ws in _ws_clients:
|
||||||
@@ -340,7 +341,7 @@ async def _bark_and_broadcast(visitor_text: str) -> None:
|
|||||||
|
|
||||||
pip_familiar.on_event("visitor_spoke")
|
pip_familiar.on_event("visitor_spoke")
|
||||||
except Exception:
|
except Exception:
|
||||||
pass # Pip is optional
|
logger.debug("Pip familiar notification failed (optional)")
|
||||||
|
|
||||||
_refresh_ground(visitor_text)
|
_refresh_ground(visitor_text)
|
||||||
_tick_commitments()
|
_tick_commitments()
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import logging
|
|||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from enum import Enum, auto
|
from enum import Enum, auto
|
||||||
|
|
||||||
from config import settings
|
from config import normalize_ollama_url, settings
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -307,7 +307,7 @@ class MultiModalManager:
|
|||||||
import json
|
import json
|
||||||
import urllib.request
|
import urllib.request
|
||||||
|
|
||||||
url = self.ollama_url.replace("localhost", "127.0.0.1")
|
url = normalize_ollama_url(self.ollama_url)
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
f"{url}/api/tags",
|
f"{url}/api/tags",
|
||||||
method="GET",
|
method="GET",
|
||||||
@@ -462,7 +462,7 @@ class MultiModalManager:
|
|||||||
|
|
||||||
logger.info("Pulling model: %s", model_name)
|
logger.info("Pulling model: %s", model_name)
|
||||||
|
|
||||||
url = self.ollama_url.replace("localhost", "127.0.0.1")
|
url = normalize_ollama_url(self.ollama_url)
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
f"{url}/api/pull",
|
f"{url}/api/pull",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
|||||||
@@ -388,6 +388,101 @@ class CascadeRouter:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def _select_model(
|
||||||
|
self, provider: Provider, model: str | None, content_type: ContentType
|
||||||
|
) -> tuple[str | None, bool]:
|
||||||
|
"""Select the best model for the request, with vision fallback.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (selected_model, is_fallback_model).
|
||||||
|
"""
|
||||||
|
selected_model = model or provider.get_default_model()
|
||||||
|
is_fallback = False
|
||||||
|
|
||||||
|
if content_type != ContentType.TEXT and selected_model:
|
||||||
|
if provider.type == "ollama" and self._mm_manager:
|
||||||
|
from infrastructure.models.multimodal import ModelCapability
|
||||||
|
|
||||||
|
if content_type == ContentType.VISION:
|
||||||
|
supports = self._mm_manager.model_supports(
|
||||||
|
selected_model, ModelCapability.VISION
|
||||||
|
)
|
||||||
|
if not supports:
|
||||||
|
fallback = self._get_fallback_model(provider, selected_model, content_type)
|
||||||
|
if fallback:
|
||||||
|
logger.info(
|
||||||
|
"Model %s doesn't support vision, falling back to %s",
|
||||||
|
selected_model,
|
||||||
|
fallback,
|
||||||
|
)
|
||||||
|
selected_model = fallback
|
||||||
|
is_fallback = True
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
"No vision-capable model found on %s, trying anyway",
|
||||||
|
provider.name,
|
||||||
|
)
|
||||||
|
|
||||||
|
return selected_model, is_fallback
|
||||||
|
|
||||||
|
async def _attempt_with_retry(
|
||||||
|
self,
|
||||||
|
provider: Provider,
|
||||||
|
messages: list[dict],
|
||||||
|
model: str | None,
|
||||||
|
temperature: float,
|
||||||
|
max_tokens: int | None,
|
||||||
|
content_type: ContentType,
|
||||||
|
) -> dict:
|
||||||
|
"""Try a provider with retries, returning the result dict.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: If all retry attempts fail.
|
||||||
|
Returns error strings collected during retries via the exception message.
|
||||||
|
"""
|
||||||
|
errors: list[str] = []
|
||||||
|
for attempt in range(self.config.max_retries_per_provider):
|
||||||
|
try:
|
||||||
|
return await self._try_provider(
|
||||||
|
provider=provider,
|
||||||
|
messages=messages,
|
||||||
|
model=model,
|
||||||
|
temperature=temperature,
|
||||||
|
max_tokens=max_tokens,
|
||||||
|
content_type=content_type,
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
error_msg = str(exc)
|
||||||
|
logger.warning(
|
||||||
|
"Provider %s attempt %d failed: %s",
|
||||||
|
provider.name,
|
||||||
|
attempt + 1,
|
||||||
|
error_msg,
|
||||||
|
)
|
||||||
|
errors.append(f"{provider.name}: {error_msg}")
|
||||||
|
|
||||||
|
if attempt < self.config.max_retries_per_provider - 1:
|
||||||
|
await asyncio.sleep(self.config.retry_delay_seconds)
|
||||||
|
|
||||||
|
raise RuntimeError("; ".join(errors))
|
||||||
|
|
||||||
|
def _is_provider_available(self, provider: Provider) -> bool:
|
||||||
|
"""Check if a provider should be tried (enabled + circuit breaker)."""
|
||||||
|
if not provider.enabled:
|
||||||
|
logger.debug("Skipping %s (disabled)", provider.name)
|
||||||
|
return False
|
||||||
|
|
||||||
|
if provider.status == ProviderStatus.UNHEALTHY:
|
||||||
|
if self._can_close_circuit(provider):
|
||||||
|
provider.circuit_state = CircuitState.HALF_OPEN
|
||||||
|
provider.half_open_calls = 0
|
||||||
|
logger.info("Circuit breaker half-open for %s", provider.name)
|
||||||
|
else:
|
||||||
|
logger.debug("Skipping %s (circuit open)", provider.name)
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
async def complete(
|
async def complete(
|
||||||
self,
|
self,
|
||||||
messages: list[dict],
|
messages: list[dict],
|
||||||
@@ -414,7 +509,6 @@ class CascadeRouter:
|
|||||||
Raises:
|
Raises:
|
||||||
RuntimeError: If all providers fail
|
RuntimeError: If all providers fail
|
||||||
"""
|
"""
|
||||||
# Detect content type for multi-modal routing
|
|
||||||
content_type = self._detect_content_type(messages)
|
content_type = self._detect_content_type(messages)
|
||||||
if content_type != ContentType.TEXT:
|
if content_type != ContentType.TEXT:
|
||||||
logger.debug("Detected %s content, selecting appropriate model", content_type.value)
|
logger.debug("Detected %s content, selecting appropriate model", content_type.value)
|
||||||
@@ -422,93 +516,34 @@ class CascadeRouter:
|
|||||||
errors = []
|
errors = []
|
||||||
|
|
||||||
for provider in self.providers:
|
for provider in self.providers:
|
||||||
# Skip disabled providers
|
if not self._is_provider_available(provider):
|
||||||
if not provider.enabled:
|
|
||||||
logger.debug("Skipping %s (disabled)", provider.name)
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Skip unhealthy providers (circuit breaker)
|
selected_model, is_fallback_model = self._select_model(provider, model, content_type)
|
||||||
if provider.status == ProviderStatus.UNHEALTHY:
|
|
||||||
# Check if circuit breaker can close
|
|
||||||
if self._can_close_circuit(provider):
|
|
||||||
provider.circuit_state = CircuitState.HALF_OPEN
|
|
||||||
provider.half_open_calls = 0
|
|
||||||
logger.info("Circuit breaker half-open for %s", provider.name)
|
|
||||||
else:
|
|
||||||
logger.debug("Skipping %s (circuit open)", provider.name)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Determine which model to use
|
try:
|
||||||
selected_model = model or provider.get_default_model()
|
result = await self._attempt_with_retry(
|
||||||
is_fallback_model = False
|
provider,
|
||||||
|
messages,
|
||||||
|
selected_model,
|
||||||
|
temperature,
|
||||||
|
max_tokens,
|
||||||
|
content_type,
|
||||||
|
)
|
||||||
|
except RuntimeError as exc:
|
||||||
|
errors.append(str(exc))
|
||||||
|
self._record_failure(provider)
|
||||||
|
continue
|
||||||
|
|
||||||
# For non-text content, check if model supports it
|
self._record_success(provider, result.get("latency_ms", 0))
|
||||||
if content_type != ContentType.TEXT and selected_model:
|
return {
|
||||||
if provider.type == "ollama" and self._mm_manager:
|
"content": result["content"],
|
||||||
from infrastructure.models.multimodal import ModelCapability
|
"provider": provider.name,
|
||||||
|
"model": result.get("model", selected_model or provider.get_default_model()),
|
||||||
|
"latency_ms": result.get("latency_ms", 0),
|
||||||
|
"is_fallback_model": is_fallback_model,
|
||||||
|
}
|
||||||
|
|
||||||
# Check if selected model supports the required capability
|
|
||||||
if content_type == ContentType.VISION:
|
|
||||||
supports = self._mm_manager.model_supports(
|
|
||||||
selected_model, ModelCapability.VISION
|
|
||||||
)
|
|
||||||
if not supports:
|
|
||||||
# Find fallback model
|
|
||||||
fallback = self._get_fallback_model(
|
|
||||||
provider, selected_model, content_type
|
|
||||||
)
|
|
||||||
if fallback:
|
|
||||||
logger.info(
|
|
||||||
"Model %s doesn't support vision, falling back to %s",
|
|
||||||
selected_model,
|
|
||||||
fallback,
|
|
||||||
)
|
|
||||||
selected_model = fallback
|
|
||||||
is_fallback_model = True
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
"No vision-capable model found on %s, trying anyway",
|
|
||||||
provider.name,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Try this provider
|
|
||||||
for attempt in range(self.config.max_retries_per_provider):
|
|
||||||
try:
|
|
||||||
result = await self._try_provider(
|
|
||||||
provider=provider,
|
|
||||||
messages=messages,
|
|
||||||
model=selected_model,
|
|
||||||
temperature=temperature,
|
|
||||||
max_tokens=max_tokens,
|
|
||||||
content_type=content_type,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Success! Update metrics and return
|
|
||||||
self._record_success(provider, result.get("latency_ms", 0))
|
|
||||||
return {
|
|
||||||
"content": result["content"],
|
|
||||||
"provider": provider.name,
|
|
||||||
"model": result.get(
|
|
||||||
"model", selected_model or provider.get_default_model()
|
|
||||||
),
|
|
||||||
"latency_ms": result.get("latency_ms", 0),
|
|
||||||
"is_fallback_model": is_fallback_model,
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as exc:
|
|
||||||
error_msg = str(exc)
|
|
||||||
logger.warning(
|
|
||||||
"Provider %s attempt %d failed: %s", provider.name, attempt + 1, error_msg
|
|
||||||
)
|
|
||||||
errors.append(f"{provider.name}: {error_msg}")
|
|
||||||
|
|
||||||
if attempt < self.config.max_retries_per_provider - 1:
|
|
||||||
await asyncio.sleep(self.config.retry_delay_seconds)
|
|
||||||
|
|
||||||
# All retries failed for this provider
|
|
||||||
self._record_failure(provider)
|
|
||||||
|
|
||||||
# All providers failed
|
|
||||||
raise RuntimeError(f"All providers failed: {'; '.join(errors)}")
|
raise RuntimeError(f"All providers failed: {'; '.join(errors)}")
|
||||||
|
|
||||||
async def _try_provider(
|
async def _try_provider(
|
||||||
@@ -574,7 +609,7 @@ class CascadeRouter:
|
|||||||
"""Call Ollama API with multi-modal support."""
|
"""Call Ollama API with multi-modal support."""
|
||||||
import aiohttp
|
import aiohttp
|
||||||
|
|
||||||
url = f"{provider.url}/api/chat"
|
url = f"{provider.url or settings.ollama_url}/api/chat"
|
||||||
|
|
||||||
# Transform messages for Ollama format (including images)
|
# Transform messages for Ollama format (including images)
|
||||||
transformed_messages = self._transform_messages_for_ollama(messages)
|
transformed_messages = self._transform_messages_for_ollama(messages)
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ def _pull_model(model_name: str) -> bool:
|
|||||||
|
|
||||||
logger.info("Pulling model: %s", model_name)
|
logger.info("Pulling model: %s", model_name)
|
||||||
|
|
||||||
url = settings.ollama_url.replace("localhost", "127.0.0.1")
|
url = settings.normalized_ollama_url
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
f"{url}/api/pull",
|
f"{url}/api/pull",
|
||||||
method="POST",
|
method="POST",
|
||||||
|
|||||||
@@ -95,6 +95,126 @@ def _parse_steps(plan_text: str) -> list[str]:
|
|||||||
return [line.strip() for line in plan_text.strip().splitlines() if line.strip()]
|
return [line.strip() for line in plan_text.strip().splitlines() if line.strip()]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Extracted helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_content(run_result) -> str:
|
||||||
|
"""Extract text content from an agent run result."""
|
||||||
|
return run_result.content if hasattr(run_result, "content") else str(run_result)
|
||||||
|
|
||||||
|
|
||||||
|
def _clean(text: str) -> str:
|
||||||
|
"""Clean a model response using session's response cleaner."""
|
||||||
|
from timmy.session import _clean_response
|
||||||
|
|
||||||
|
return _clean_response(text)
|
||||||
|
|
||||||
|
|
||||||
|
async def _plan_task(
|
||||||
|
agent, task: str, session_id: str, max_steps: int
|
||||||
|
) -> tuple[list[str], bool] | str:
|
||||||
|
"""Run the planning phase — returns (steps, was_truncated) or error string."""
|
||||||
|
plan_prompt = (
|
||||||
|
f"Break this task into numbered steps (max {max_steps}). "
|
||||||
|
f"Return ONLY a numbered list, nothing else.\n\n"
|
||||||
|
f"Task: {task}"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
plan_run = await asyncio.to_thread(
|
||||||
|
agent.run, plan_prompt, stream=False, session_id=f"{session_id}_plan"
|
||||||
|
)
|
||||||
|
plan_text = _extract_content(plan_run)
|
||||||
|
except Exception as exc: # broad catch intentional: agent.run can raise any error
|
||||||
|
logger.error("Agentic loop: planning failed: %s", exc)
|
||||||
|
return f"Planning failed: {exc}"
|
||||||
|
|
||||||
|
steps = _parse_steps(plan_text)
|
||||||
|
if not steps:
|
||||||
|
return "Planning produced no steps."
|
||||||
|
|
||||||
|
planned_count = len(steps)
|
||||||
|
steps = steps[:max_steps]
|
||||||
|
return steps, planned_count > len(steps)
|
||||||
|
|
||||||
|
|
||||||
|
async def _execute_step(
|
||||||
|
agent,
|
||||||
|
task: str,
|
||||||
|
step_desc: str,
|
||||||
|
step_num: int,
|
||||||
|
total_steps: int,
|
||||||
|
recent_results: list[str],
|
||||||
|
session_id: str,
|
||||||
|
) -> AgenticStep:
|
||||||
|
"""Execute a single step, returning an AgenticStep."""
|
||||||
|
step_start = time.monotonic()
|
||||||
|
context = (
|
||||||
|
f"Task: {task}\n"
|
||||||
|
f"Step {step_num}/{total_steps}: {step_desc}\n"
|
||||||
|
f"Recent progress: {recent_results[-2:] if recent_results else []}\n\n"
|
||||||
|
f"Execute this step and report what you did."
|
||||||
|
)
|
||||||
|
step_run = await asyncio.to_thread(
|
||||||
|
agent.run, context, stream=False, session_id=f"{session_id}_step{step_num}"
|
||||||
|
)
|
||||||
|
step_result = _clean(_extract_content(step_run))
|
||||||
|
return AgenticStep(
|
||||||
|
step_num=step_num,
|
||||||
|
description=step_desc,
|
||||||
|
result=step_result,
|
||||||
|
status="completed",
|
||||||
|
duration_ms=int((time.monotonic() - step_start) * 1000),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _adapt_step(
|
||||||
|
agent,
|
||||||
|
step_desc: str,
|
||||||
|
step_num: int,
|
||||||
|
error: Exception,
|
||||||
|
step_start: float,
|
||||||
|
session_id: str,
|
||||||
|
) -> AgenticStep:
|
||||||
|
"""Attempt adaptation after a step failure."""
|
||||||
|
adapt_prompt = (
|
||||||
|
f"Step {step_num} failed with error: {error}\n"
|
||||||
|
f"Original step was: {step_desc}\n"
|
||||||
|
f"Adapt the plan and try an alternative approach for this step."
|
||||||
|
)
|
||||||
|
adapt_run = await asyncio.to_thread(
|
||||||
|
agent.run, adapt_prompt, stream=False, session_id=f"{session_id}_adapt{step_num}"
|
||||||
|
)
|
||||||
|
adapt_result = _clean(_extract_content(adapt_run))
|
||||||
|
return AgenticStep(
|
||||||
|
step_num=step_num,
|
||||||
|
description=f"[Adapted] {step_desc}",
|
||||||
|
result=adapt_result,
|
||||||
|
status="adapted",
|
||||||
|
duration_ms=int((time.monotonic() - step_start) * 1000),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _summarize(result: AgenticResult, total_steps: int, was_truncated: bool) -> None:
|
||||||
|
"""Fill in summary and final status on the result object (mutates in place)."""
|
||||||
|
completed = sum(1 for s in result.steps if s.status == "completed")
|
||||||
|
adapted = sum(1 for s in result.steps if s.status == "adapted")
|
||||||
|
failed = sum(1 for s in result.steps if s.status == "failed")
|
||||||
|
|
||||||
|
parts = [f"Completed {completed}/{total_steps} steps"]
|
||||||
|
if adapted:
|
||||||
|
parts.append(f"{adapted} adapted")
|
||||||
|
if failed:
|
||||||
|
parts.append(f"{failed} failed")
|
||||||
|
result.summary = f"{result.task}: {', '.join(parts)}."
|
||||||
|
|
||||||
|
if was_truncated or len(result.steps) < total_steps or failed:
|
||||||
|
result.status = "partial"
|
||||||
|
else:
|
||||||
|
result.status = "completed"
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Core loop
|
# Core loop
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -125,88 +245,41 @@ async def run_agentic_loop(
|
|||||||
|
|
||||||
task_id = str(uuid.uuid4())[:8]
|
task_id = str(uuid.uuid4())[:8]
|
||||||
start_time = time.monotonic()
|
start_time = time.monotonic()
|
||||||
|
|
||||||
agent = _get_loop_agent()
|
agent = _get_loop_agent()
|
||||||
result = AgenticResult(task_id=task_id, task=task, summary="")
|
result = AgenticResult(task_id=task_id, task=task, summary="")
|
||||||
|
|
||||||
# ── Phase 1: Planning ──────────────────────────────────────────────────
|
# Phase 1: Planning
|
||||||
plan_prompt = (
|
plan = await _plan_task(agent, task, session_id, max_steps)
|
||||||
f"Break this task into numbered steps (max {max_steps}). "
|
if isinstance(plan, str):
|
||||||
f"Return ONLY a numbered list, nothing else.\n\n"
|
|
||||||
f"Task: {task}"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
plan_run = await asyncio.to_thread(
|
|
||||||
agent.run, plan_prompt, stream=False, session_id=f"{session_id}_plan"
|
|
||||||
)
|
|
||||||
plan_text = plan_run.content if hasattr(plan_run, "content") else str(plan_run)
|
|
||||||
except Exception as exc: # broad catch intentional: agent.run can raise any error
|
|
||||||
logger.error("Agentic loop: planning failed: %s", exc)
|
|
||||||
result.status = "failed"
|
result.status = "failed"
|
||||||
result.summary = f"Planning failed: {exc}"
|
result.summary = plan
|
||||||
result.total_duration_ms = int((time.monotonic() - start_time) * 1000)
|
result.total_duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
steps = _parse_steps(plan_text)
|
steps, was_truncated = plan
|
||||||
if not steps:
|
|
||||||
result.status = "failed"
|
|
||||||
result.summary = "Planning produced no steps."
|
|
||||||
result.total_duration_ms = int((time.monotonic() - start_time) * 1000)
|
|
||||||
return result
|
|
||||||
|
|
||||||
# Enforce max_steps — track if we truncated
|
|
||||||
planned_steps = len(steps)
|
|
||||||
steps = steps[:max_steps]
|
|
||||||
total_steps = len(steps)
|
total_steps = len(steps)
|
||||||
was_truncated = planned_steps > total_steps
|
|
||||||
|
|
||||||
# Broadcast plan
|
|
||||||
await _broadcast_progress(
|
await _broadcast_progress(
|
||||||
"agentic.plan_ready",
|
"agentic.plan_ready",
|
||||||
{
|
{"task_id": task_id, "task": task, "steps": steps, "total": total_steps},
|
||||||
"task_id": task_id,
|
|
||||||
"task": task,
|
|
||||||
"steps": steps,
|
|
||||||
"total": total_steps,
|
|
||||||
},
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# ── Phase 2: Execution ─────────────────────────────────────────────────
|
# Phase 2: Execution
|
||||||
completed_results: list[str] = []
|
completed_results: list[str] = []
|
||||||
|
|
||||||
for i, step_desc in enumerate(steps, 1):
|
for i, step_desc in enumerate(steps, 1):
|
||||||
step_start = time.monotonic()
|
step_start = time.monotonic()
|
||||||
|
|
||||||
recent = completed_results[-2:] if completed_results else []
|
|
||||||
context = (
|
|
||||||
f"Task: {task}\n"
|
|
||||||
f"Step {i}/{total_steps}: {step_desc}\n"
|
|
||||||
f"Recent progress: {recent}\n\n"
|
|
||||||
f"Execute this step and report what you did."
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
step_run = await asyncio.to_thread(
|
step = await _execute_step(
|
||||||
agent.run, context, stream=False, session_id=f"{session_id}_step{i}"
|
agent,
|
||||||
)
|
task,
|
||||||
step_result = step_run.content if hasattr(step_run, "content") else str(step_run)
|
step_desc,
|
||||||
|
i,
|
||||||
# Clean the response
|
total_steps,
|
||||||
from timmy.session import _clean_response
|
completed_results,
|
||||||
|
session_id,
|
||||||
step_result = _clean_response(step_result)
|
|
||||||
|
|
||||||
step = AgenticStep(
|
|
||||||
step_num=i,
|
|
||||||
description=step_desc,
|
|
||||||
result=step_result,
|
|
||||||
status="completed",
|
|
||||||
duration_ms=int((time.monotonic() - step_start) * 1000),
|
|
||||||
)
|
)
|
||||||
result.steps.append(step)
|
result.steps.append(step)
|
||||||
completed_results.append(f"Step {i}: {step_result[:200]}")
|
completed_results.append(f"Step {i}: {step.result[:200]}")
|
||||||
|
|
||||||
# Broadcast progress
|
|
||||||
await _broadcast_progress(
|
await _broadcast_progress(
|
||||||
"agentic.step_complete",
|
"agentic.step_complete",
|
||||||
{
|
{
|
||||||
@@ -214,46 +287,18 @@ async def run_agentic_loop(
|
|||||||
"step": i,
|
"step": i,
|
||||||
"total": total_steps,
|
"total": total_steps,
|
||||||
"description": step_desc,
|
"description": step_desc,
|
||||||
"result": step_result[:200],
|
"result": step.result[:200],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if on_progress:
|
if on_progress:
|
||||||
await on_progress(step_desc, i, total_steps)
|
await on_progress(step_desc, i, total_steps)
|
||||||
|
|
||||||
except Exception as exc: # broad catch intentional: agent.run can raise any error
|
except Exception as exc: # broad catch intentional: agent.run can raise any error
|
||||||
logger.warning("Agentic loop step %d failed: %s", i, exc)
|
logger.warning("Agentic loop step %d failed: %s", i, exc)
|
||||||
|
|
||||||
# ── Adaptation: ask model to adapt ─────────────────────────────
|
|
||||||
adapt_prompt = (
|
|
||||||
f"Step {i} failed with error: {exc}\n"
|
|
||||||
f"Original step was: {step_desc}\n"
|
|
||||||
f"Adapt the plan and try an alternative approach for this step."
|
|
||||||
)
|
|
||||||
try:
|
try:
|
||||||
adapt_run = await asyncio.to_thread(
|
step = await _adapt_step(agent, step_desc, i, exc, step_start, session_id)
|
||||||
agent.run,
|
|
||||||
adapt_prompt,
|
|
||||||
stream=False,
|
|
||||||
session_id=f"{session_id}_adapt{i}",
|
|
||||||
)
|
|
||||||
adapt_result = (
|
|
||||||
adapt_run.content if hasattr(adapt_run, "content") else str(adapt_run)
|
|
||||||
)
|
|
||||||
from timmy.session import _clean_response
|
|
||||||
|
|
||||||
adapt_result = _clean_response(adapt_result)
|
|
||||||
|
|
||||||
step = AgenticStep(
|
|
||||||
step_num=i,
|
|
||||||
description=f"[Adapted] {step_desc}",
|
|
||||||
result=adapt_result,
|
|
||||||
status="adapted",
|
|
||||||
duration_ms=int((time.monotonic() - step_start) * 1000),
|
|
||||||
)
|
|
||||||
result.steps.append(step)
|
result.steps.append(step)
|
||||||
completed_results.append(f"Step {i} (adapted): {adapt_result[:200]}")
|
completed_results.append(f"Step {i} (adapted): {step.result[:200]}")
|
||||||
|
|
||||||
await _broadcast_progress(
|
await _broadcast_progress(
|
||||||
"agentic.step_adapted",
|
"agentic.step_adapted",
|
||||||
{
|
{
|
||||||
@@ -262,46 +307,26 @@ async def run_agentic_loop(
|
|||||||
"total": total_steps,
|
"total": total_steps,
|
||||||
"description": step_desc,
|
"description": step_desc,
|
||||||
"error": str(exc),
|
"error": str(exc),
|
||||||
"adaptation": adapt_result[:200],
|
"adaptation": step.result[:200],
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
if on_progress:
|
if on_progress:
|
||||||
await on_progress(f"[Adapted] {step_desc}", i, total_steps)
|
await on_progress(f"[Adapted] {step_desc}", i, total_steps)
|
||||||
|
except Exception as adapt_exc: # broad catch intentional
|
||||||
except Exception as adapt_exc: # broad catch intentional: agent.run can raise any error
|
|
||||||
logger.error("Agentic loop adaptation also failed: %s", adapt_exc)
|
logger.error("Agentic loop adaptation also failed: %s", adapt_exc)
|
||||||
step = AgenticStep(
|
result.steps.append(
|
||||||
step_num=i,
|
AgenticStep(
|
||||||
description=step_desc,
|
step_num=i,
|
||||||
result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}",
|
description=step_desc,
|
||||||
status="failed",
|
result=f"Failed: {exc}; Adaptation also failed: {adapt_exc}",
|
||||||
duration_ms=int((time.monotonic() - step_start) * 1000),
|
status="failed",
|
||||||
|
duration_ms=int((time.monotonic() - step_start) * 1000),
|
||||||
|
)
|
||||||
)
|
)
|
||||||
result.steps.append(step)
|
|
||||||
completed_results.append(f"Step {i}: FAILED")
|
completed_results.append(f"Step {i}: FAILED")
|
||||||
|
|
||||||
# ── Phase 3: Summary ───────────────────────────────────────────────────
|
# Phase 3: Summary
|
||||||
completed_count = sum(1 for s in result.steps if s.status == "completed")
|
_summarize(result, total_steps, was_truncated)
|
||||||
adapted_count = sum(1 for s in result.steps if s.status == "adapted")
|
|
||||||
failed_count = sum(1 for s in result.steps if s.status == "failed")
|
|
||||||
parts = [f"Completed {completed_count}/{total_steps} steps"]
|
|
||||||
if adapted_count:
|
|
||||||
parts.append(f"{adapted_count} adapted")
|
|
||||||
if failed_count:
|
|
||||||
parts.append(f"{failed_count} failed")
|
|
||||||
result.summary = f"{task}: {', '.join(parts)}."
|
|
||||||
|
|
||||||
# Determine final status
|
|
||||||
if was_truncated:
|
|
||||||
result.status = "partial"
|
|
||||||
elif len(result.steps) < total_steps:
|
|
||||||
result.status = "partial"
|
|
||||||
elif any(s.status == "failed" for s in result.steps):
|
|
||||||
result.status = "partial"
|
|
||||||
else:
|
|
||||||
result.status = "completed"
|
|
||||||
|
|
||||||
result.total_duration_ms = int((time.monotonic() - start_time) * 1000)
|
result.total_duration_ms = int((time.monotonic() - start_time) * 1000)
|
||||||
|
|
||||||
await _broadcast_progress(
|
await _broadcast_progress(
|
||||||
|
|||||||
@@ -636,7 +636,7 @@ class HotMemory:
|
|||||||
if len(lines) > 1:
|
if len(lines) > 1:
|
||||||
return "\n".join(lines)
|
return "\n".join(lines)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
logger.debug("DB context read failed, falling back to file")
|
||||||
|
|
||||||
# Fallback to file if DB unavailable
|
# Fallback to file if DB unavailable
|
||||||
if self.path.exists():
|
if self.path.exists():
|
||||||
|
|||||||
86
tests/loop/test_cycle_retro.py
Normal file
86
tests/loop/test_cycle_retro.py
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
"""Tests for scripts/cycle_retro.py issue auto-detection."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
# Import the module under test — it's a script so we import the helpers directly
|
||||||
|
import importlib
|
||||||
|
import subprocess
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
SCRIPTS_DIR = Path(__file__).resolve().parent.parent.parent / "scripts"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _add_scripts_to_path(monkeypatch):
|
||||||
|
monkeypatch.syspath_prepend(str(SCRIPTS_DIR))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def mod():
|
||||||
|
"""Import cycle_retro as a module."""
|
||||||
|
return importlib.import_module("cycle_retro")
|
||||||
|
|
||||||
|
|
||||||
|
class TestDetectIssueFromBranch:
|
||||||
|
def test_kimi_issue_branch(self, mod):
|
||||||
|
with patch.object(subprocess, "check_output", return_value="kimi/issue-492\n"):
|
||||||
|
assert mod.detect_issue_from_branch() == 492
|
||||||
|
|
||||||
|
def test_plain_issue_branch(self, mod):
|
||||||
|
with patch.object(subprocess, "check_output", return_value="issue-123\n"):
|
||||||
|
assert mod.detect_issue_from_branch() == 123
|
||||||
|
|
||||||
|
def test_issue_slash_number(self, mod):
|
||||||
|
with patch.object(subprocess, "check_output", return_value="fix/issue/55\n"):
|
||||||
|
assert mod.detect_issue_from_branch() == 55
|
||||||
|
|
||||||
|
def test_no_issue_in_branch(self, mod):
|
||||||
|
with patch.object(subprocess, "check_output", return_value="main\n"):
|
||||||
|
assert mod.detect_issue_from_branch() is None
|
||||||
|
|
||||||
|
def test_feature_branch(self, mod):
|
||||||
|
with patch.object(subprocess, "check_output", return_value="feature/add-widget\n"):
|
||||||
|
assert mod.detect_issue_from_branch() is None
|
||||||
|
|
||||||
|
def test_git_not_available(self, mod):
|
||||||
|
with patch.object(subprocess, "check_output", side_effect=FileNotFoundError):
|
||||||
|
assert mod.detect_issue_from_branch() is None
|
||||||
|
|
||||||
|
def test_git_fails(self, mod):
|
||||||
|
with patch.object(
|
||||||
|
subprocess,
|
||||||
|
"check_output",
|
||||||
|
side_effect=subprocess.CalledProcessError(1, "git"),
|
||||||
|
):
|
||||||
|
assert mod.detect_issue_from_branch() is None
|
||||||
|
|
||||||
|
|
||||||
|
class TestBackfillExtractIssueNumber:
|
||||||
|
"""Tests for backfill_retro.extract_issue_number PR-number filtering."""
|
||||||
|
|
||||||
|
@pytest.fixture()
|
||||||
|
def backfill(self):
|
||||||
|
return importlib.import_module("backfill_retro")
|
||||||
|
|
||||||
|
def test_body_has_issue(self, backfill):
|
||||||
|
assert backfill.extract_issue_number("fix: foo (#491)", "Fixes #490", pr_number=491) == 490
|
||||||
|
|
||||||
|
def test_title_skips_pr_number(self, backfill):
|
||||||
|
assert backfill.extract_issue_number("fix: foo (#491)", "", pr_number=491) is None
|
||||||
|
|
||||||
|
def test_title_with_issue_and_pr(self, backfill):
|
||||||
|
# [loop-cycle-538] refactor: ... (#459) (#481)
|
||||||
|
assert (
|
||||||
|
backfill.extract_issue_number(
|
||||||
|
"[loop-cycle-538] refactor: remove dead airllm (#459) (#481)",
|
||||||
|
"",
|
||||||
|
pr_number=481,
|
||||||
|
)
|
||||||
|
== 459
|
||||||
|
)
|
||||||
|
|
||||||
|
def test_no_pr_number_provided(self, backfill):
|
||||||
|
assert backfill.extract_issue_number("fix: foo (#491)", "") == 491
|
||||||
Reference in New Issue
Block a user