Timmy-time-dashboard/timmy_automations/retrain/training_log.py

"""Training log — records each fine-tune cycle with metrics and skill deltas.

Writes to .loop/retrain/training_log.jsonl (one entry per cycle) and
maintains a human-readable .loop/retrain/training_log.md summary.

Each log entry captures:
- Iteration count
- Week processed
- Quality filter stats
- Examples added to dataset
- LoRA train result (loss, duration, adapter path)
- Skill accuracy deltas (from smoke tests)

Refs: #1105
"""

from __future__ import annotations

import json
import logging
from dataclasses import asdict, dataclass, field
from datetime import UTC, datetime
from pathlib import Path
from typing import Any

logger = logging.getLogger(__name__)

_DEFAULT_LOG_PATH = ".loop/retrain/training_log.jsonl"
_DEFAULT_SUMMARY_PATH = ".loop/retrain/training_log.md"


@dataclass
class CycleMetrics:
    """Metrics for a single retrain cycle."""

    iteration: int
    week: str
    ran_at: str

    # Quality filter
    trajectories_total: int = 0
    trajectories_high: int = 0
    trajectories_medium: int = 0
    trajectories_low: int = 0
    trajectories_accepted: int = 0

    # Dataset
    examples_added: int = 0
    dataset_total: int = 0

    # Training
    train_status: str = "skipped"
    train_loss: float | None = None
    train_duration_seconds: float = 0.0
    adapter_path: str | None = None
    model_name: str | None = None

    # Skill accuracy (optional, from smoke tests)
    skill_accuracy: dict[str, float] = field(default_factory=dict)
    skill_delta: dict[str, float] = field(default_factory=dict)

    # Human-readable summary
    notes: str = ""


class TrainingLog:
    """Persistent log of all retrain cycles."""

    def __init__(
        self,
        log_path: str | Path | None = None,
        summary_path: str | Path | None = None,
        repo_root: str | Path | None = None,
    ):
        if repo_root is None:
            repo_root = Path(__file__).resolve().parent.parent.parent
        self._repo_root = Path(repo_root)

        self._log_path = self._repo_root / (log_path or _DEFAULT_LOG_PATH)
        self._summary_path = self._repo_root / (summary_path or _DEFAULT_SUMMARY_PATH)
        self._log_path.parent.mkdir(parents=True, exist_ok=True)

    @property
    def log_path(self) -> Path:
        return self._log_path

    def next_iteration(self) -> int:
        """Return the next iteration number (1-indexed)."""
        entries = self.load_all()
        if not entries:
            return 1
        return max(e.get("iteration", 0) for e in entries) + 1

    def record(self, metrics: CycleMetrics) -> None:
        """Append a cycle metrics record to the log."""
        entry = asdict(metrics)
        with open(self._log_path, "a") as f:
            f.write(json.dumps(entry) + "\n")

        self._update_summary(metrics)
        logger.info(
            "Training log: iteration=%d week=%s status=%s examples_added=%d",
            metrics.iteration,
            metrics.week,
            metrics.train_status,
            metrics.examples_added,
        )

    def load_all(self) -> list[dict[str, Any]]:
        """Load all cycle records from the log."""
        if not self._log_path.exists():
            return []
        entries: list[dict[str, Any]] = []
        with open(self._log_path) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    entries.append(json.loads(line))
                except json.JSONDecodeError:
                    logger.debug("Skipping malformed log entry")
        return entries

    def latest(self) -> dict[str, Any] | None:
        """Return the most recent cycle record."""
        entries = self.load_all()
        return entries[-1] if entries else None

    def _update_summary(self, metrics: CycleMetrics) -> None:
        """Rewrite the markdown summary with all cycles."""
        all_entries = self.load_all()

        lines = [
            "# AutoLoRA Training Log\n",
            f"*Updated: {datetime.now(tz=UTC).isoformat()}*\n",
            f"*Total iterations: {len(all_entries)}*\n",
            "",
            "## Cycles\n",
            "| # | Week | Status | Loss | Examples | Duration |",
            "|---|------|--------|------|----------|----------|",
        ]

        for entry in reversed(all_entries[-20:]):  # Last 20 cycles
            loss = f"{entry.get('train_loss', 0.0) or 0.0:.4f}" if entry.get("train_loss") else "—"
            lines.append(
                f"| {entry.get('iteration', '?')} "
                f"| {entry.get('week', '?')} "
                f"| {entry.get('train_status', '?')} "
                f"| {loss} "
                f"| +{entry.get('examples_added', 0)} ({entry.get('dataset_total', 0)} total) "
                f"| {entry.get('train_duration_seconds', 0.0):.0f}s |"
            )

        lines.append("")
        lines.append("## Skill Accuracy Over Time\n")

        # Collect all unique skills
        all_skills: set[str] = set()
        for entry in all_entries:
            all_skills.update(entry.get("skill_accuracy", {}).keys())

        if all_skills:
            skill_header = "| # | Week | " + " | ".join(sorted(all_skills)) + " |"
            skill_sep = "|---|------|" + "|".join("---" for _ in all_skills) + "|"
            lines.extend([skill_header, skill_sep])
            for entry in reversed(all_entries[-10:]):
                acc = entry.get("skill_accuracy", {})
                row = f"| {entry.get('iteration', '?')} | {entry.get('week', '?')} | "
                row += " | ".join(
                    f"{acc.get(s, 0.0):.0%}" if s in acc else "—"
                    for s in sorted(all_skills)
                )
                row += " |"
                lines.append(row)
        else:
            lines.append("*No skill accuracy data yet — run smoke tests after fine-tuning.*")

        lines.append("")
        if metrics.notes:
            lines.append(f"## Latest Notes\n\n{metrics.notes}\n")

        self._summary_path.write_text("\n".join(lines))