#!/usr/bin/env bash # scripts/fuse_and_load.sh # # AutoLoRA Step 5: Fuse LoRA adapter → convert to GGUF → import into Ollama # # Prerequisites: # - mlx_lm installed: pip install mlx-lm # - llama.cpp cloned: ~/llama.cpp (with convert_hf_to_gguf.py) # - Ollama running: ollama serve (in another terminal) # - LoRA adapter at: ~/timmy-lora-adapter # - Base model at: $HERMES_MODEL_PATH (see below) # # Usage: # ./scripts/fuse_and_load.sh # HERMES_MODEL_PATH=/custom/path ./scripts/fuse_and_load.sh # QUANT=q4_k_m ./scripts/fuse_and_load.sh # # Environment variables: # HERMES_MODEL_PATH Path to the Hermes 4 14B HF model dir (default below) # ADAPTER_PATH Path to LoRA adapter (default: ~/timmy-lora-adapter) # FUSED_DIR Where to save the fused HF model (default: ~/timmy-fused-model) # GGUF_PATH Where to save the GGUF file (default: ~/timmy-fused-model.Q5_K_M.gguf) # QUANT GGUF quantisation (default: q5_k_m) # OLLAMA_MODEL Name to register in Ollama (default: timmy) # MODELFILE Path to Modelfile (default: Modelfile.timmy in repo root) # SKIP_FUSE Set to 1 to skip fuse step (use existing fused model) # SKIP_CONVERT Set to 1 to skip GGUF conversion (use existing GGUF) # # Epic: #1091 Project Bannerlord — AutoLoRA Sovereignty Loop (Step 5 of 7) # Refs: #1104 set -euo pipefail # ── Config ──────────────────────────────────────────────────────────────────── HERMES_MODEL_PATH="${HERMES_MODEL_PATH:-${HOME}/hermes4-14b-hf}" ADAPTER_PATH="${ADAPTER_PATH:-${HOME}/timmy-lora-adapter}" FUSED_DIR="${FUSED_DIR:-${HOME}/timmy-fused-model}" QUANT="${QUANT:-q5_k_m}" GGUF_FILENAME="timmy-fused-model.${QUANT^^}.gguf" GGUF_PATH="${GGUF_PATH:-${HOME}/${GGUF_FILENAME}}" OLLAMA_MODEL="${OLLAMA_MODEL:-timmy}" REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" MODELFILE="${MODELFILE:-${REPO_ROOT}/Modelfile.timmy}" # ── Helpers ─────────────────────────────────────────────────────────────────── log() { echo "[fuse_and_load] $*"; } fail() { echo "[fuse_and_load] ERROR: $*" >&2; exit 1; } require_cmd() { command -v "$1" >/dev/null 2>&1 || fail "'$1' not found. $2" } # ── Step 1: Fuse LoRA adapter into base model ───────────────────────────────── if [[ "${SKIP_FUSE:-0}" == "1" ]]; then log "Skipping fuse step (SKIP_FUSE=1)" else log "Step 1/3: Fusing LoRA adapter into base model" log " Base model: ${HERMES_MODEL_PATH}" log " Adapter: ${ADAPTER_PATH}" log " Output dir: ${FUSED_DIR}" require_cmd mlx_lm.fuse "Install with: pip install mlx-lm" [[ -d "${HERMES_MODEL_PATH}" ]] || fail "Base model directory not found: ${HERMES_MODEL_PATH}" [[ -d "${ADAPTER_PATH}" ]] || fail "LoRA adapter directory not found: ${ADAPTER_PATH}" mlx_lm.fuse \ --model "${HERMES_MODEL_PATH}" \ --adapter-path "${ADAPTER_PATH}" \ --save-path "${FUSED_DIR}" log "Fuse complete → ${FUSED_DIR}" fi # ── Step 2: Convert fused model to GGUF ────────────────────────────────────── if [[ "${SKIP_CONVERT:-0}" == "1" ]]; then log "Skipping convert step (SKIP_CONVERT=1)" else log "Step 2/3: Converting fused model to GGUF (${QUANT^^})" log " Input: ${FUSED_DIR}" log " Output: ${GGUF_PATH}" LLAMACPP_CONVERT="${HOME}/llama.cpp/convert_hf_to_gguf.py" [[ -f "${LLAMACPP_CONVERT}" ]] || fail "llama.cpp convert script not found at ${LLAMACPP_CONVERT}.\n Clone: git clone https://github.com/ggerganov/llama.cpp ~/llama.cpp" [[ -d "${FUSED_DIR}" ]] || fail "Fused model directory not found: ${FUSED_DIR}" python3 "${LLAMACPP_CONVERT}" \ "${FUSED_DIR}" \ --outtype "${QUANT}" \ --outfile "${GGUF_PATH}" log "Conversion complete → ${GGUF_PATH}" fi [[ -f "${GGUF_PATH}" ]] || fail "GGUF file not found at expected path: ${GGUF_PATH}" # ── Step 3: Import into Ollama ──────────────────────────────────────────────── log "Step 3/3: Importing into Ollama as '${OLLAMA_MODEL}'" log " GGUF: ${GGUF_PATH}" log " Modelfile: ${MODELFILE}" require_cmd ollama "Install Ollama: https://ollama.com/download" [[ -f "${MODELFILE}" ]] || fail "Modelfile not found: ${MODELFILE}" # Patch the GGUF path into the Modelfile at runtime (sed on a copy) TMP_MODELFILE="$(mktemp /tmp/Modelfile.timmy.XXXXXX)" sed "s|^FROM .*|FROM ${GGUF_PATH}|" "${MODELFILE}" > "${TMP_MODELFILE}" ollama create "${OLLAMA_MODEL}" -f "${TMP_MODELFILE}" rm -f "${TMP_MODELFILE}" log "Import complete. Verifying..." # ── Verify ──────────────────────────────────────────────────────────────────── if ollama list | grep -q "^${OLLAMA_MODEL}"; then log "✓ '${OLLAMA_MODEL}' is registered in Ollama" else fail "'${OLLAMA_MODEL}' not found in 'ollama list' — import may have failed" fi echo "" echo "==========================================" echo " Timmy model loaded successfully" echo " Model: ${OLLAMA_MODEL}" echo " GGUF: ${GGUF_PATH}" echo "==========================================" echo "" echo "Next steps:" echo " 1. Test skills: python scripts/test_timmy_skills.py" echo " 2. Switch harness: hermes model ${OLLAMA_MODEL}" echo " 3. File issues for any failing skills"