139 lines
5.8 KiB
Bash
Executable File
139 lines
5.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# scripts/fuse_and_load.sh
|
|
#
|
|
# AutoLoRA Step 5: Fuse LoRA adapter → convert to GGUF → import into Ollama
|
|
#
|
|
# Prerequisites:
|
|
# - mlx_lm installed: pip install mlx-lm
|
|
# - llama.cpp cloned: ~/llama.cpp (with convert_hf_to_gguf.py)
|
|
# - Ollama running: ollama serve (in another terminal)
|
|
# - LoRA adapter at: ~/timmy-lora-adapter
|
|
# - Base model at: $HERMES_MODEL_PATH (see below)
|
|
#
|
|
# Usage:
|
|
# ./scripts/fuse_and_load.sh
|
|
# HERMES_MODEL_PATH=/custom/path ./scripts/fuse_and_load.sh
|
|
# QUANT=q4_k_m ./scripts/fuse_and_load.sh
|
|
#
|
|
# Environment variables:
|
|
# HERMES_MODEL_PATH Path to the Hermes 4 14B HF model dir (default below)
|
|
# ADAPTER_PATH Path to LoRA adapter (default: ~/timmy-lora-adapter)
|
|
# FUSED_DIR Where to save the fused HF model (default: ~/timmy-fused-model)
|
|
# GGUF_PATH Where to save the GGUF file (default: ~/timmy-fused-model.Q5_K_M.gguf)
|
|
# QUANT GGUF quantisation (default: q5_k_m)
|
|
# OLLAMA_MODEL Name to register in Ollama (default: timmy)
|
|
# MODELFILE Path to Modelfile (default: Modelfile.timmy in repo root)
|
|
# SKIP_FUSE Set to 1 to skip fuse step (use existing fused model)
|
|
# SKIP_CONVERT Set to 1 to skip GGUF conversion (use existing GGUF)
|
|
#
|
|
# Epic: #1091 Project Bannerlord — AutoLoRA Sovereignty Loop (Step 5 of 7)
|
|
# Refs: #1104
|
|
|
|
set -euo pipefail
|
|
|
|
# ── Config ────────────────────────────────────────────────────────────────────
|
|
|
|
HERMES_MODEL_PATH="${HERMES_MODEL_PATH:-${HOME}/hermes4-14b-hf}"
|
|
ADAPTER_PATH="${ADAPTER_PATH:-${HOME}/timmy-lora-adapter}"
|
|
FUSED_DIR="${FUSED_DIR:-${HOME}/timmy-fused-model}"
|
|
QUANT="${QUANT:-q5_k_m}"
|
|
GGUF_FILENAME="timmy-fused-model.${QUANT^^}.gguf"
|
|
GGUF_PATH="${GGUF_PATH:-${HOME}/${GGUF_FILENAME}}"
|
|
OLLAMA_MODEL="${OLLAMA_MODEL:-timmy}"
|
|
REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
|
MODELFILE="${MODELFILE:-${REPO_ROOT}/Modelfile.timmy}"
|
|
|
|
# ── Helpers ───────────────────────────────────────────────────────────────────
|
|
|
|
log() { echo "[fuse_and_load] $*"; }
|
|
fail() { echo "[fuse_and_load] ERROR: $*" >&2; exit 1; }
|
|
|
|
require_cmd() {
|
|
command -v "$1" >/dev/null 2>&1 || fail "'$1' not found. $2"
|
|
}
|
|
|
|
# ── Step 1: Fuse LoRA adapter into base model ─────────────────────────────────
|
|
|
|
if [[ "${SKIP_FUSE:-0}" == "1" ]]; then
|
|
log "Skipping fuse step (SKIP_FUSE=1)"
|
|
else
|
|
log "Step 1/3: Fusing LoRA adapter into base model"
|
|
log " Base model: ${HERMES_MODEL_PATH}"
|
|
log " Adapter: ${ADAPTER_PATH}"
|
|
log " Output dir: ${FUSED_DIR}"
|
|
|
|
require_cmd mlx_lm.fuse "Install with: pip install mlx-lm"
|
|
|
|
[[ -d "${HERMES_MODEL_PATH}" ]] || fail "Base model directory not found: ${HERMES_MODEL_PATH}"
|
|
[[ -d "${ADAPTER_PATH}" ]] || fail "LoRA adapter directory not found: ${ADAPTER_PATH}"
|
|
|
|
mlx_lm.fuse \
|
|
--model "${HERMES_MODEL_PATH}" \
|
|
--adapter-path "${ADAPTER_PATH}" \
|
|
--save-path "${FUSED_DIR}"
|
|
|
|
log "Fuse complete → ${FUSED_DIR}"
|
|
fi
|
|
|
|
# ── Step 2: Convert fused model to GGUF ──────────────────────────────────────
|
|
|
|
if [[ "${SKIP_CONVERT:-0}" == "1" ]]; then
|
|
log "Skipping convert step (SKIP_CONVERT=1)"
|
|
else
|
|
log "Step 2/3: Converting fused model to GGUF (${QUANT^^})"
|
|
log " Input: ${FUSED_DIR}"
|
|
log " Output: ${GGUF_PATH}"
|
|
|
|
LLAMACPP_CONVERT="${HOME}/llama.cpp/convert_hf_to_gguf.py"
|
|
[[ -f "${LLAMACPP_CONVERT}" ]] || fail "llama.cpp convert script not found at ${LLAMACPP_CONVERT}.\n Clone: git clone https://github.com/ggerganov/llama.cpp ~/llama.cpp"
|
|
[[ -d "${FUSED_DIR}" ]] || fail "Fused model directory not found: ${FUSED_DIR}"
|
|
|
|
python3 "${LLAMACPP_CONVERT}" \
|
|
"${FUSED_DIR}" \
|
|
--outtype "${QUANT}" \
|
|
--outfile "${GGUF_PATH}"
|
|
|
|
log "Conversion complete → ${GGUF_PATH}"
|
|
fi
|
|
|
|
[[ -f "${GGUF_PATH}" ]] || fail "GGUF file not found at expected path: ${GGUF_PATH}"
|
|
|
|
# ── Step 3: Import into Ollama ────────────────────────────────────────────────
|
|
|
|
log "Step 3/3: Importing into Ollama as '${OLLAMA_MODEL}'"
|
|
log " GGUF: ${GGUF_PATH}"
|
|
log " Modelfile: ${MODELFILE}"
|
|
|
|
require_cmd ollama "Install Ollama: https://ollama.com/download"
|
|
|
|
[[ -f "${MODELFILE}" ]] || fail "Modelfile not found: ${MODELFILE}"
|
|
|
|
# Patch the GGUF path into the Modelfile at runtime (sed on a copy)
|
|
TMP_MODELFILE="$(mktemp /tmp/Modelfile.timmy.XXXXXX)"
|
|
sed "s|^FROM .*|FROM ${GGUF_PATH}|" "${MODELFILE}" > "${TMP_MODELFILE}"
|
|
|
|
ollama create "${OLLAMA_MODEL}" -f "${TMP_MODELFILE}"
|
|
rm -f "${TMP_MODELFILE}"
|
|
|
|
log "Import complete. Verifying..."
|
|
|
|
# ── Verify ────────────────────────────────────────────────────────────────────
|
|
|
|
if ollama list | grep -q "^${OLLAMA_MODEL}"; then
|
|
log "✓ '${OLLAMA_MODEL}' is registered in Ollama"
|
|
else
|
|
fail "'${OLLAMA_MODEL}' not found in 'ollama list' — import may have failed"
|
|
fi
|
|
|
|
echo ""
|
|
echo "=========================================="
|
|
echo " Timmy model loaded successfully"
|
|
echo " Model: ${OLLAMA_MODEL}"
|
|
echo " GGUF: ${GGUF_PATH}"
|
|
echo "=========================================="
|
|
echo ""
|
|
echo "Next steps:"
|
|
echo " 1. Test skills: python scripts/test_timmy_skills.py"
|
|
echo " 2. Switch harness: hermes model ${OLLAMA_MODEL}"
|
|
echo " 3. File issues for any failing skills"
|