Files
timmy-config/scripts/pipeline_training_factory.sh

72 lines
2.0 KiB
Bash

#!/usr/bin/env bash
# pipeline_training_factory.sh — Run model fine-tuning with generated training data.
#
# Collects training data, runs LoRA/QLoRA fine-tuning on available hardware.
#
# Usage:
# ./scripts/pipeline_training_factory.sh --max-tokens 150000
set -euo pipefail
HERMES_HOME="${HERMES_HOME:-$HOME/.hermes}"
TRAINING_DIR="${TRAINING_DIR:-$HOME/.timmy/training-data}"
MODELS_DIR="${MODELS_DIR:-$HOME/.timmy/models}"
MAX_TOKENS="${MAX_TOKENS:-150000}"
DRY_RUN=false
TOKENS_USED=0
while [[ $# -gt 0 ]]; do
case "$1" in
--max-tokens) MAX_TOKENS="$2"; shift 2 ;;
--dry-run) DRY_RUN=true; shift ;;
*) shift ;;
esac
done
log() { echo "[training-factory $(date '+%H:%M:%S')] $*"; }
mkdir -p "$MODELS_DIR"
# Collect all JSONL training data
DATA_FILES=$(find "$TRAINING_DIR" -name '*.jsonl' -type f 2>/dev/null | head -20)
TOTAL_PAIRS=0
for f in $DATA_FILES; do
COUNT=$(wc -l < "$f" 2>/dev/null || echo 0)
TOTAL_PAIRS=$((TOTAL_PAIRS + COUNT))
done
log "Found $TOTAL_PAIRS training pairs across $(echo "$DATA_FILES" | wc -w | tr -d ' ') files"
if [[ "$TOTAL_PAIRS" -lt 10 ]]; then
log "Insufficient training data ($TOTAL_PAIRS pairs < 10 minimum). Skipping."
exit 0
fi
if $DRY_RUN; then
log "DRY-RUN: Would train on $TOTAL_PAIRS pairs (budget: $MAX_TOKENS)"
exit 0
fi
# Check for GPU
GPU_AVAILABLE=false
if command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null; then
GPU_AVAILABLE=true
log "GPU detected"
elif [[ "$(uname)" == "Darwin" ]] && system_profiler SPDisplaysDataType 2>/dev/null | grep -q "Metal"; then
GPU_AVAILABLE=true
log "Apple Metal GPU detected"
fi
# Build merged training file
MERGED="$HERMES_HOME/training_merged.jsonl"
cat $DATA_FILES > "$MERGED" 2>/dev/null
TOKENS_USED=$((TOTAL_PAIRS * 60))
log "Merged training data: $MERGED ($TOTAL_PAIRS pairs, ~${TOKENS_USED} tokens)"
# Log completion (actual training would be triggered by autolora pipeline)
log "Training data ready. Tokens used: $TOKENS_USED / $MAX_TOKENS"
log "Run autolora pipeline for actual fine-tuning."
exit 0