#!/usr/bin/env python3 """LoRA fine-tuning launcher for Hermes 4 on Timmy trajectory data. Wraps ``mlx_lm.lora`` with project-specific defaults and pre-flight checks. Requires Apple Silicon (M-series) and the ``mlx-lm`` package. Usage:: # Minimal — uses defaults (expects data in ~/timmy-lora-training/) python scripts/lora_finetune.py # Custom model path and data python scripts/lora_finetune.py \\ --model /path/to/hermes4-mlx \\ --data ~/timmy-lora-training \\ --iters 500 \\ --adapter-path ~/timmy-lora-adapter # Dry run (print command, don't execute) python scripts/lora_finetune.py --dry-run # After training, test with the adapter python scripts/lora_finetune.py --test \\ --prompt "List the open PRs on the Timmy Time Dashboard repo" # Fuse adapter into base model for Ollama import python scripts/lora_finetune.py --fuse \\ --save-path ~/timmy-fused-model Typical workflow:: # 1. Export trajectories python scripts/export_trajectories.py --verbose # 2. Prepare training dir mkdir -p ~/timmy-lora-training cp ~/timmy-training-data.jsonl ~/timmy-lora-training/train.jsonl # 3. Fine-tune python scripts/lora_finetune.py --verbose # 4. Test python scripts/lora_finetune.py --test # 5. Fuse + import to Ollama python scripts/lora_finetune.py --fuse ollama create timmy-hermes4 -f Modelfile.timmy-hermes4 Epic: #1091 Project Bannerlord — AutoLoRA Sovereignty Loop (Step 4 of 7) Refs: #1103 """ from __future__ import annotations import argparse import platform import shutil import subprocess import sys from pathlib import Path # ── Defaults ────────────────────────────────────────────────────────────────── DEFAULT_DATA_DIR = Path.home() / "timmy-lora-training" DEFAULT_ADAPTER_PATH = Path.home() / "timmy-lora-adapter" DEFAULT_FUSED_PATH = Path.home() / "timmy-fused-model" # mlx-lm model path — local HuggingFace checkout of Hermes 4 in MLX format. # Set MLX_HERMES4_PATH env var or pass --model to override. DEFAULT_MODEL_PATH_ENV = "MLX_HERMES4_PATH" # Training hyperparameters (conservative for 36 GB M3 Max) DEFAULT_BATCH_SIZE = 1 DEFAULT_LORA_LAYERS = 16 DEFAULT_ITERS = 1000 DEFAULT_LEARNING_RATE = 1e-5 # Test prompt used after training DEFAULT_TEST_PROMPT = ( "List the open PRs on the Timmy Time Dashboard repo and triage them by priority." ) # ── Pre-flight checks ───────────────────────────────────────────────────────── def _check_apple_silicon() -> bool: """Return True if running on Apple Silicon.""" return platform.system() == "Darwin" and platform.machine() == "arm64" def _check_mlx_lm() -> bool: """Return True if mlx-lm is installed and mlx_lm.lora is runnable.""" return shutil.which("mlx_lm.lora") is not None or _can_import("mlx_lm") def _can_import(module: str) -> bool: try: import importlib importlib.import_module(module) return True except ImportError: return False def _resolve_model_path(model_arg: str | None) -> str | None: """Resolve model path from arg or environment variable.""" if model_arg: return model_arg import os env_path = os.environ.get(DEFAULT_MODEL_PATH_ENV) if env_path: return env_path return None def _preflight(model_path: str | None, data_dir: Path, verbose: bool) -> list[str]: """Run pre-flight checks and return a list of warnings (empty = all OK).""" warnings: list[str] = [] if not _check_apple_silicon(): warnings.append( "Not running on Apple Silicon. mlx-lm requires an M-series Mac.\n" " Alternative: use Unsloth on Google Colab / RunPod / Modal." ) if not _check_mlx_lm(): warnings.append( "mlx-lm not found. Install with:\n pip install mlx-lm" ) if model_path is None: warnings.append( f"No model path specified. Set {DEFAULT_MODEL_PATH_ENV} or pass --model.\n" " Download Hermes 4 in MLX format from HuggingFace:\n" " https://huggingface.co/collections/NousResearch/hermes-4-collection-68a7\n" " or convert the GGUF:\n" " mlx_lm.convert --hf-path NousResearch/Hermes-4-14B --mlx-path ~/hermes4-mlx" ) elif not Path(model_path).exists(): warnings.append(f"Model path does not exist: {model_path}") train_file = data_dir / "train.jsonl" if not train_file.exists(): warnings.append( f"Training data not found: {train_file}\n" " Generate it with:\n" " python scripts/export_trajectories.py --verbose\n" f" mkdir -p {data_dir}\n" f" cp ~/timmy-training-data.jsonl {train_file}" ) if verbose and not warnings: print("Pre-flight checks: all OK") return warnings # ── Command builders ────────────────────────────────────────────────────────── def _build_train_cmd( model_path: str, data_dir: Path, adapter_path: Path, batch_size: int, lora_layers: int, iters: int, learning_rate: float, ) -> list[str]: return [ sys.executable, "-m", "mlx_lm.lora", "--model", model_path, "--train", "--data", str(data_dir), "--batch-size", str(batch_size), "--lora-layers", str(lora_layers), "--iters", str(iters), "--learning-rate", str(learning_rate), "--adapter-path", str(adapter_path), ] def _build_test_cmd( model_path: str, adapter_path: Path, prompt: str, ) -> list[str]: return [ sys.executable, "-m", "mlx_lm.generate", "--model", model_path, "--adapter-path", str(adapter_path), "--prompt", prompt, "--max-tokens", "512", ] def _build_fuse_cmd( model_path: str, adapter_path: Path, save_path: Path, ) -> list[str]: return [ sys.executable, "-m", "mlx_lm.fuse", "--model", model_path, "--adapter-path", str(adapter_path), "--save-path", str(save_path), ] # ── Runner ───────────────────────────────────────────────────────────────────── def _run(cmd: list[str], dry_run: bool, verbose: bool) -> int: """Print and optionally execute a command.""" print("\nCommand:") print(" " + " \\\n ".join(cmd)) if dry_run: print("\n(dry-run — not executing)") return 0 print() result = subprocess.run(cmd) return result.returncode # ── Main ────────────────────────────────────────────────────────────────────── def main(argv: list[str] | None = None) -> int: parser = argparse.ArgumentParser( description="LoRA fine-tuning launcher for Hermes 4 (AutoLoRA Step 4)", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=__doc__, ) # Mode flags (mutually exclusive-ish) mode = parser.add_mutually_exclusive_group() mode.add_argument( "--test", action="store_true", help="Run inference test with trained adapter instead of training", ) mode.add_argument( "--fuse", action="store_true", help="Fuse adapter into base model (for Ollama import)", ) # Paths parser.add_argument( "--model", default=None, help=f"Path to local MLX model (or set {DEFAULT_MODEL_PATH_ENV} env var)", ) parser.add_argument( "--data", type=Path, default=DEFAULT_DATA_DIR, help=f"Training data directory (default: {DEFAULT_DATA_DIR})", ) parser.add_argument( "--adapter-path", type=Path, default=DEFAULT_ADAPTER_PATH, help=f"LoRA adapter output path (default: {DEFAULT_ADAPTER_PATH})", ) parser.add_argument( "--save-path", type=Path, default=DEFAULT_FUSED_PATH, help=f"Fused model output path (default: {DEFAULT_FUSED_PATH})", ) # Hyperparameters parser.add_argument( "--batch-size", type=int, default=DEFAULT_BATCH_SIZE, help=f"Training batch size (default: {DEFAULT_BATCH_SIZE}; reduce to 1 if OOM)", ) parser.add_argument( "--lora-layers", type=int, default=DEFAULT_LORA_LAYERS, help=f"Number of LoRA layers (default: {DEFAULT_LORA_LAYERS}; reduce if OOM)", ) parser.add_argument( "--iters", type=int, default=DEFAULT_ITERS, help=f"Training iterations (default: {DEFAULT_ITERS})", ) parser.add_argument( "--learning-rate", type=float, default=DEFAULT_LEARNING_RATE, help=f"Learning rate (default: {DEFAULT_LEARNING_RATE})", ) # Misc parser.add_argument( "--prompt", default=DEFAULT_TEST_PROMPT, help="Prompt for --test mode", ) parser.add_argument( "--dry-run", action="store_true", help="Print command without executing", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Print extra progress information", ) parser.add_argument( "--skip-preflight", action="store_true", help="Skip pre-flight checks (useful in CI)", ) args = parser.parse_args(argv) model_path = _resolve_model_path(args.model) # ── Pre-flight ────────────────────────────────────────────────────────── if not args.skip_preflight: warnings = _preflight(model_path, args.data, args.verbose) if warnings: for w in warnings: print(f"WARNING: {w}\n") if not args.dry_run: print("Aborting due to pre-flight warnings. Use --dry-run to see commands anyway.") return 1 if model_path is None: # Allow dry-run without a model for documentation purposes model_path = "" # ── Mode dispatch ──────────────────────────────────────────────────────── if args.test: print(f"Testing fine-tuned model with adapter: {args.adapter_path}") cmd = _build_test_cmd(model_path, args.adapter_path, args.prompt) return _run(cmd, args.dry_run, args.verbose) if args.fuse: print(f"Fusing adapter {args.adapter_path} into base model → {args.save_path}") cmd = _build_fuse_cmd(model_path, args.adapter_path, args.save_path) rc = _run(cmd, args.dry_run, args.verbose) if rc == 0 and not args.dry_run: print( f"\nFused model saved to: {args.save_path}\n" "To import into Ollama:\n" f" ollama create timmy-hermes4 -f Modelfile.hermes4-14b\n" " (edit Modelfile to point FROM to the fused GGUF path)" ) return rc # Default: train print(f"Starting LoRA fine-tuning") print(f" Model: {model_path}") print(f" Data: {args.data}") print(f" Adapter path: {args.adapter_path}") print(f" Iterations: {args.iters}") print(f" Batch size: {args.batch_size}") print(f" LoRA layers: {args.lora_layers}") print(f" Learning rate:{args.learning_rate}") print() print("Estimated time: 2-8 hours on M3 Max (depends on dataset size).") print("If OOM: reduce --lora-layers to 8 or --batch-size stays at 1.") cmd = _build_train_cmd( model_path=model_path, data_dir=args.data, adapter_path=args.adapter_path, batch_size=args.batch_size, lora_layers=args.lora_layers, iters=args.iters, learning_rate=args.learning_rate, ) rc = _run(cmd, args.dry_run, args.verbose) if rc == 0 and not args.dry_run: print( f"\nTraining complete! Adapter saved to: {args.adapter_path}\n" "Test with:\n" f" python scripts/lora_finetune.py --test\n" "Then fuse + import to Ollama:\n" f" python scripts/lora_finetune.py --fuse" ) return rc if __name__ == "__main__": sys.exit(main())