184 lines
5.9 KiB
Python
184 lines
5.9 KiB
Python
"""Training log — records each fine-tune cycle with metrics and skill deltas.
|
|
|
|
Writes to .loop/retrain/training_log.jsonl (one entry per cycle) and
|
|
maintains a human-readable .loop/retrain/training_log.md summary.
|
|
|
|
Each log entry captures:
|
|
- Iteration count
|
|
- Week processed
|
|
- Quality filter stats
|
|
- Examples added to dataset
|
|
- LoRA train result (loss, duration, adapter path)
|
|
- Skill accuracy deltas (from smoke tests)
|
|
|
|
Refs: #1105
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from dataclasses import asdict, dataclass, field
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
_DEFAULT_LOG_PATH = ".loop/retrain/training_log.jsonl"
|
|
_DEFAULT_SUMMARY_PATH = ".loop/retrain/training_log.md"
|
|
|
|
|
|
@dataclass
|
|
class CycleMetrics:
|
|
"""Metrics for a single retrain cycle."""
|
|
|
|
iteration: int
|
|
week: str
|
|
ran_at: str
|
|
|
|
# Quality filter
|
|
trajectories_total: int = 0
|
|
trajectories_high: int = 0
|
|
trajectories_medium: int = 0
|
|
trajectories_low: int = 0
|
|
trajectories_accepted: int = 0
|
|
|
|
# Dataset
|
|
examples_added: int = 0
|
|
dataset_total: int = 0
|
|
|
|
# Training
|
|
train_status: str = "skipped"
|
|
train_loss: float | None = None
|
|
train_duration_seconds: float = 0.0
|
|
adapter_path: str | None = None
|
|
model_name: str | None = None
|
|
|
|
# Skill accuracy (optional, from smoke tests)
|
|
skill_accuracy: dict[str, float] = field(default_factory=dict)
|
|
skill_delta: dict[str, float] = field(default_factory=dict)
|
|
|
|
# Human-readable summary
|
|
notes: str = ""
|
|
|
|
|
|
class TrainingLog:
|
|
"""Persistent log of all retrain cycles."""
|
|
|
|
def __init__(
|
|
self,
|
|
log_path: str | Path | None = None,
|
|
summary_path: str | Path | None = None,
|
|
repo_root: str | Path | None = None,
|
|
):
|
|
if repo_root is None:
|
|
repo_root = Path(__file__).resolve().parent.parent.parent
|
|
self._repo_root = Path(repo_root)
|
|
|
|
self._log_path = self._repo_root / (log_path or _DEFAULT_LOG_PATH)
|
|
self._summary_path = self._repo_root / (summary_path or _DEFAULT_SUMMARY_PATH)
|
|
self._log_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
@property
|
|
def log_path(self) -> Path:
|
|
return self._log_path
|
|
|
|
def next_iteration(self) -> int:
|
|
"""Return the next iteration number (1-indexed)."""
|
|
entries = self.load_all()
|
|
if not entries:
|
|
return 1
|
|
return max(e.get("iteration", 0) for e in entries) + 1
|
|
|
|
def record(self, metrics: CycleMetrics) -> None:
|
|
"""Append a cycle metrics record to the log."""
|
|
entry = asdict(metrics)
|
|
with open(self._log_path, "a") as f:
|
|
f.write(json.dumps(entry) + "\n")
|
|
|
|
self._update_summary(metrics)
|
|
logger.info(
|
|
"Training log: iteration=%d week=%s status=%s examples_added=%d",
|
|
metrics.iteration,
|
|
metrics.week,
|
|
metrics.train_status,
|
|
metrics.examples_added,
|
|
)
|
|
|
|
def load_all(self) -> list[dict[str, Any]]:
|
|
"""Load all cycle records from the log."""
|
|
if not self._log_path.exists():
|
|
return []
|
|
entries: list[dict[str, Any]] = []
|
|
with open(self._log_path) as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
try:
|
|
entries.append(json.loads(line))
|
|
except json.JSONDecodeError:
|
|
logger.debug("Skipping malformed log entry")
|
|
return entries
|
|
|
|
def latest(self) -> dict[str, Any] | None:
|
|
"""Return the most recent cycle record."""
|
|
entries = self.load_all()
|
|
return entries[-1] if entries else None
|
|
|
|
def _update_summary(self, metrics: CycleMetrics) -> None:
|
|
"""Rewrite the markdown summary with all cycles."""
|
|
all_entries = self.load_all()
|
|
|
|
lines = [
|
|
"# AutoLoRA Training Log\n",
|
|
f"*Updated: {datetime.now(tz=UTC).isoformat()}*\n",
|
|
f"*Total iterations: {len(all_entries)}*\n",
|
|
"",
|
|
"## Cycles\n",
|
|
"| # | Week | Status | Loss | Examples | Duration |",
|
|
"|---|------|--------|------|----------|----------|",
|
|
]
|
|
|
|
for entry in reversed(all_entries[-20:]): # Last 20 cycles
|
|
loss = f"{entry.get('train_loss', 0.0) or 0.0:.4f}" if entry.get("train_loss") else "—"
|
|
lines.append(
|
|
f"| {entry.get('iteration', '?')} "
|
|
f"| {entry.get('week', '?')} "
|
|
f"| {entry.get('train_status', '?')} "
|
|
f"| {loss} "
|
|
f"| +{entry.get('examples_added', 0)} ({entry.get('dataset_total', 0)} total) "
|
|
f"| {entry.get('train_duration_seconds', 0.0):.0f}s |"
|
|
)
|
|
|
|
lines.append("")
|
|
lines.append("## Skill Accuracy Over Time\n")
|
|
|
|
# Collect all unique skills
|
|
all_skills: set[str] = set()
|
|
for entry in all_entries:
|
|
all_skills.update(entry.get("skill_accuracy", {}).keys())
|
|
|
|
if all_skills:
|
|
skill_header = "| # | Week | " + " | ".join(sorted(all_skills)) + " |"
|
|
skill_sep = "|---|------|" + "|".join("---" for _ in all_skills) + "|"
|
|
lines.extend([skill_header, skill_sep])
|
|
for entry in reversed(all_entries[-10:]):
|
|
acc = entry.get("skill_accuracy", {})
|
|
row = f"| {entry.get('iteration', '?')} | {entry.get('week', '?')} | "
|
|
row += " | ".join(
|
|
f"{acc.get(s, 0.0):.0%}" if s in acc else "—"
|
|
for s in sorted(all_skills)
|
|
)
|
|
row += " |"
|
|
lines.append(row)
|
|
else:
|
|
lines.append("*No skill accuracy data yet — run smoke tests after fine-tuning.*")
|
|
|
|
lines.append("")
|
|
if metrics.notes:
|
|
lines.append(f"## Latest Notes\n\n{metrics.notes}\n")
|
|
|
|
self._summary_path.write_text("\n".join(lines))
|