Files
Timmy-time-dashboard/timmy_automations/retrain/training_log.py
Claude (Opus 4.6) 1be1324a0d
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
[claude] Implement AutoLoRA continuous improvement loop (#1105) (#1118)
2026-03-23 18:18:32 +00:00

184 lines
5.9 KiB
Python

"""Training log — records each fine-tune cycle with metrics and skill deltas.
Writes to .loop/retrain/training_log.jsonl (one entry per cycle) and
maintains a human-readable .loop/retrain/training_log.md summary.
Each log entry captures:
- Iteration count
- Week processed
- Quality filter stats
- Examples added to dataset
- LoRA train result (loss, duration, adapter path)
- Skill accuracy deltas (from smoke tests)
Refs: #1105
"""
from __future__ import annotations
import json
import logging
from dataclasses import asdict, dataclass, field
from datetime import UTC, datetime
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
_DEFAULT_LOG_PATH = ".loop/retrain/training_log.jsonl"
_DEFAULT_SUMMARY_PATH = ".loop/retrain/training_log.md"
@dataclass
class CycleMetrics:
"""Metrics for a single retrain cycle."""
iteration: int
week: str
ran_at: str
# Quality filter
trajectories_total: int = 0
trajectories_high: int = 0
trajectories_medium: int = 0
trajectories_low: int = 0
trajectories_accepted: int = 0
# Dataset
examples_added: int = 0
dataset_total: int = 0
# Training
train_status: str = "skipped"
train_loss: float | None = None
train_duration_seconds: float = 0.0
adapter_path: str | None = None
model_name: str | None = None
# Skill accuracy (optional, from smoke tests)
skill_accuracy: dict[str, float] = field(default_factory=dict)
skill_delta: dict[str, float] = field(default_factory=dict)
# Human-readable summary
notes: str = ""
class TrainingLog:
"""Persistent log of all retrain cycles."""
def __init__(
self,
log_path: str | Path | None = None,
summary_path: str | Path | None = None,
repo_root: str | Path | None = None,
):
if repo_root is None:
repo_root = Path(__file__).resolve().parent.parent.parent
self._repo_root = Path(repo_root)
self._log_path = self._repo_root / (log_path or _DEFAULT_LOG_PATH)
self._summary_path = self._repo_root / (summary_path or _DEFAULT_SUMMARY_PATH)
self._log_path.parent.mkdir(parents=True, exist_ok=True)
@property
def log_path(self) -> Path:
return self._log_path
def next_iteration(self) -> int:
"""Return the next iteration number (1-indexed)."""
entries = self.load_all()
if not entries:
return 1
return max(e.get("iteration", 0) for e in entries) + 1
def record(self, metrics: CycleMetrics) -> None:
"""Append a cycle metrics record to the log."""
entry = asdict(metrics)
with open(self._log_path, "a") as f:
f.write(json.dumps(entry) + "\n")
self._update_summary(metrics)
logger.info(
"Training log: iteration=%d week=%s status=%s examples_added=%d",
metrics.iteration,
metrics.week,
metrics.train_status,
metrics.examples_added,
)
def load_all(self) -> list[dict[str, Any]]:
"""Load all cycle records from the log."""
if not self._log_path.exists():
return []
entries: list[dict[str, Any]] = []
with open(self._log_path) as f:
for line in f:
line = line.strip()
if not line:
continue
try:
entries.append(json.loads(line))
except json.JSONDecodeError:
logger.debug("Skipping malformed log entry")
return entries
def latest(self) -> dict[str, Any] | None:
"""Return the most recent cycle record."""
entries = self.load_all()
return entries[-1] if entries else None
def _update_summary(self, metrics: CycleMetrics) -> None:
"""Rewrite the markdown summary with all cycles."""
all_entries = self.load_all()
lines = [
"# AutoLoRA Training Log\n",
f"*Updated: {datetime.now(tz=UTC).isoformat()}*\n",
f"*Total iterations: {len(all_entries)}*\n",
"",
"## Cycles\n",
"| # | Week | Status | Loss | Examples | Duration |",
"|---|------|--------|------|----------|----------|",
]
for entry in reversed(all_entries[-20:]): # Last 20 cycles
loss = f"{entry.get('train_loss', 0.0) or 0.0:.4f}" if entry.get("train_loss") else ""
lines.append(
f"| {entry.get('iteration', '?')} "
f"| {entry.get('week', '?')} "
f"| {entry.get('train_status', '?')} "
f"| {loss} "
f"| +{entry.get('examples_added', 0)} ({entry.get('dataset_total', 0)} total) "
f"| {entry.get('train_duration_seconds', 0.0):.0f}s |"
)
lines.append("")
lines.append("## Skill Accuracy Over Time\n")
# Collect all unique skills
all_skills: set[str] = set()
for entry in all_entries:
all_skills.update(entry.get("skill_accuracy", {}).keys())
if all_skills:
skill_header = "| # | Week | " + " | ".join(sorted(all_skills)) + " |"
skill_sep = "|---|------|" + "|".join("---" for _ in all_skills) + "|"
lines.extend([skill_header, skill_sep])
for entry in reversed(all_entries[-10:]):
acc = entry.get("skill_accuracy", {})
row = f"| {entry.get('iteration', '?')} | {entry.get('week', '?')} | "
row += " | ".join(
f"{acc.get(s, 0.0):.0%}" if s in acc else ""
for s in sorted(all_skills)
)
row += " |"
lines.append(row)
else:
lines.append("*No skill accuracy data yet — run smoke tests after fine-tuning.*")
lines.append("")
if metrics.notes:
lines.append(f"## Latest Notes\n\n{metrics.notes}\n")
self._summary_path.write_text("\n".join(lines))