Timmy-time-dashboard/timmy_automations/retrain/retrain.py

#!/usr/bin/env python3
"""AutoLoRA continuous improvement loop — the sovereignty retrain script.

Implements the weekly retrain cycle end-to-end:
  Work → Record trajectories → Export weekly → Filter quality
  → LoRA fine-tune → Load adapter → Model improves → Repeat forever

Run:
  python3 timmy_automations/retrain/retrain.py
  python3 timmy_automations/retrain/retrain.py --dry-run
  python3 timmy_automations/retrain/retrain.py --weeks-ago 1

Epic: #1091 — Project Bannerlord
Pipeline: AutoLoRA Sovereignty Loop (Step 6 of 7)
Refs: #1105
"""

from __future__ import annotations

import argparse
import json
import logging
import sys
from dataclasses import dataclass
from datetime import UTC, datetime
from pathlib import Path

# Allow running directly from repo root
_REPO_ROOT = Path(__file__).resolve().parent.parent.parent
if str(_REPO_ROOT) not in sys.path:
    sys.path.insert(0, str(_REPO_ROOT))

from timmy_automations.retrain.lora_trainer import LoRATrainer
from timmy_automations.retrain.quality_filter import QualityFilter
from timmy_automations.retrain.training_dataset import TrainingDataset
from timmy_automations.retrain.training_log import CycleMetrics, TrainingLog
from timmy_automations.retrain.trajectory_exporter import TrajectoryExporter

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(levelname)-8s %(name)s: %(message)s",
    datefmt="%Y-%m-%dT%H:%M:%S",
)
logger = logging.getLogger("retrain")


@dataclass
class RetrainResult:
    """Result of a complete retrain cycle."""

    iteration: int
    week: str
    trajectories_exported: int
    trajectories_accepted: int
    examples_added: int
    dataset_total: int
    train_status: str
    adapter_path: str | None
    model_name: str | None
    train_loss: float | None
    duration_seconds: float
    notes: str


class RetrainOrchestrator:
    """Orchestrates the complete AutoLoRA continuous improvement loop.

    Step 1: Export this week's conversation trajectories from session logs
    Step 2: Filter for high-quality exchanges
    Step 3: Append to the training dataset
    Step 4: Trigger LoRA fine-tune
    Step 5: Load the new adapter (via Ollama)
    Step 6: Log iteration, loss, skill accuracy
    """

    def __init__(
        self,
        base_model: str = "hermes4-14b",
        repo_root: str | Path | None = None,
        dry_run: bool = False,
    ):
        if repo_root is None:
            repo_root = _REPO_ROOT
        self._repo_root = Path(repo_root)
        self._dry_run = dry_run

        self.exporter = TrajectoryExporter(repo_root=self._repo_root)
        self.quality_filter = QualityFilter()
        self.dataset = TrainingDataset(repo_root=self._repo_root)
        self.trainer = LoRATrainer(
            base_model=base_model,
            repo_root=self._repo_root,
            dry_run=dry_run,
        )
        self.log = TrainingLog(repo_root=self._repo_root)

    def run(self, weeks_ago: int = 1) -> RetrainResult:
        """Execute one complete retrain cycle.

        Args:
            weeks_ago: Which week to process. 0 = current week (partial),
                       1 = last week (default, Sunday night run), etc.

        Returns:
            RetrainResult with full cycle summary.
        """
        started = datetime.now(tz=UTC)
        iteration = self.log.next_iteration()

        # Determine ISO week tag
        from datetime import timedelta
        now = datetime.now(tz=UTC)
        target_date = now - timedelta(weeks=weeks_ago)
        week_tag = f"{target_date.year}-W{target_date.isocalendar().week:02d}"

        logger.info(
            "=== AutoLoRA Retrain Cycle %d | Week: %s | dry_run=%s ===",
            iteration,
            week_tag,
            self._dry_run,
        )

        # Step 1: Export trajectories
        logger.info("Step 1: Exporting trajectories for %s...", week_tag)
        trajectories = self.exporter.export_week(weeks_ago=weeks_ago)
        logger.info("Exported %d raw trajectories", len(trajectories))

        # Step 2: Quality filter
        logger.info("Step 2: Applying quality filter...")
        trainable, filter_stats = self.quality_filter.filter(trajectories)
        logger.info(
            "Quality filter: %d/%d accepted (high=%d medium=%d low=%d)",
            filter_stats["accepted"],
            filter_stats["total"],
            filter_stats["high"],
            filter_stats["medium"],
            filter_stats["low"],
        )

        # Step 3: Append to dataset
        logger.info("Step 3: Appending to training dataset...")
        append_result = self.dataset.append(trainable, week_tag)
        logger.info(
            "Dataset: +%d new examples (%d total)",
            append_result.new_examples,
            append_result.total_examples,
        )

        # Step 4: LoRA fine-tune
        logger.info("Step 4: Triggering LoRA fine-tune (iteration=%d)...", iteration)
        train_result = self.trainer.train(
            dataset_path=self.dataset.dataset_path,
            iteration=iteration,
        )
        logger.info(
            "Train result: status=%s loss=%s duration=%.1fs",
            train_result.status,
            train_result.train_loss,
            train_result.duration_seconds,
        )

        # Step 5 & 6: Log cycle
        duration = (datetime.now(tz=UTC) - started).total_seconds()
        metrics = CycleMetrics(
            iteration=iteration,
            week=week_tag,
            ran_at=started.isoformat(),
            trajectories_total=filter_stats["total"],
            trajectories_high=filter_stats["high"],
            trajectories_medium=filter_stats["medium"],
            trajectories_low=filter_stats["low"],
            trajectories_accepted=filter_stats["accepted"],
            examples_added=append_result.new_examples,
            dataset_total=append_result.total_examples,
            train_status=train_result.status,
            train_loss=train_result.train_loss,
            train_duration_seconds=train_result.duration_seconds,
            adapter_path=train_result.adapter_path,
            model_name=train_result.model_name,
            notes=train_result.message,
        )
        self.log.record(metrics)

        result = RetrainResult(
            iteration=iteration,
            week=week_tag,
            trajectories_exported=len(trajectories),
            trajectories_accepted=filter_stats["accepted"],
            examples_added=append_result.new_examples,
            dataset_total=append_result.total_examples,
            train_status=train_result.status,
            adapter_path=train_result.adapter_path,
            model_name=train_result.model_name,
            train_loss=train_result.train_loss,
            duration_seconds=duration,
            notes=train_result.message,
        )

        logger.info(
            "=== Cycle %d complete: status=%s examples_added=%d total=%.1fs ===",
            iteration,
            train_result.status,
            append_result.new_examples,
            duration,
        )

        return result


def _print_result(result: RetrainResult, as_json: bool = False) -> None:
    """Print cycle result to stdout."""
    if as_json:
        print(
            json.dumps(
                {
                    "iteration": result.iteration,
                    "week": result.week,
                    "trajectories_exported": result.trajectories_exported,
                    "trajectories_accepted": result.trajectories_accepted,
                    "examples_added": result.examples_added,
                    "dataset_total": result.dataset_total,
                    "train_status": result.train_status,
                    "adapter_path": result.adapter_path,
                    "model_name": result.model_name,
                    "train_loss": result.train_loss,
                    "duration_seconds": result.duration_seconds,
                    "notes": result.notes,
                },
                indent=2,
            )
        )
        return

    print(f"\n{'='*60}")
    print(f"  AutoLoRA Retrain — Cycle {result.iteration}")
    print(f"  Week: {result.week}")
    print(f"{'='*60}")
    print(f"  Trajectories:  {result.trajectories_exported} exported, {result.trajectories_accepted} accepted")
    print(f"  Dataset:       +{result.examples_added} examples ({result.dataset_total} total)")
    print(f"  Fine-tune:     {result.train_status}")
    if result.train_loss is not None:
        print(f"  Train loss:    {result.train_loss:.4f}")
    if result.model_name:
        print(f"  New model:     {result.model_name}")
    if result.adapter_path:
        print(f"  Adapter:       {result.adapter_path}")
    print(f"  Duration:      {result.duration_seconds:.1f}s")
    print(f"  Notes:         {result.notes}")
    print(f"{'='*60}\n")


def main() -> int:
    parser = argparse.ArgumentParser(
        description="AutoLoRA continuous improvement loop — sovereignty engine for Timmy"
    )
    parser.add_argument(
        "--weeks-ago",
        type=int,
        default=1,
        help="Which week to process: 0=current (partial), 1=last week (default)",
    )
    parser.add_argument(
        "--base-model",
        default="hermes4-14b",
        help="Ollama base model name (default: hermes4-14b)",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Export and filter trajectories but skip actual fine-tuning",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        dest="as_json",
        help="Output result as JSON",
    )
    args = parser.parse_args()

    orchestrator = RetrainOrchestrator(
        base_model=args.base_model,
        dry_run=args.dry_run,
    )
    result = orchestrator.run(weeks_ago=args.weeks_ago)
    _print_result(result, as_json=args.as_json)

    # Exit 0 even on skipped/failed training — the loop must continue
    return 0


if __name__ == "__main__":
    sys.exit(main())