Per direction shift (the-nexus#542). Replaces the autolora repo (1,500 lines of custom pipeline code) with config files for existing tools: - axolotl.yaml: replaces train_modal.py (239 lines) - mlx-lora.yaml: replaces MLX training scripts - eval-tasks.yaml: replaces run_eval.py (300 lines) - Makefile: replaces run_vibes.py, compare.py, convert_to_mlx.py Data migrated as-is: - curated_dataset.jsonl (26 gold-standard conversations) - preference_pairs.jsonl (DPO pairs) - prompts_vibes.yaml, prompts_nexus_vibes.yaml - v0-baseline eval results (historical record) Thin glue kept: - build_curated.py (data authoring, not infrastructure) - ingest_trajectories.py (domain-specific quality filter) Dependencies: pip install axolotl mlx-lm lm-evaluation-harness
102 lines
4.6 KiB
Makefile
102 lines
4.6 KiB
Makefile
# AutoLoRA Training Pipeline
|
|
# Replaces: autolora repo (1,500 lines) with config + make targets
|
|
#
|
|
# Prerequisites:
|
|
# pip install axolotl mlx-lm lm-evaluation-harness pyyaml
|
|
#
|
|
# Targets:
|
|
# make train-cloud — QLoRA on cloud GPU via Axolotl
|
|
# make train-local — LoRA on Apple Silicon via MLX
|
|
# make eval — Standard benchmarks via lm-eval-harness
|
|
# make vibes — Hand-picked prompts through Ollama, human review
|
|
# make ingest — Pull heartbeat trajectories into training data
|
|
# make curated — Regenerate curated exemplar dataset
|
|
|
|
MODEL ?= timmy:v0.1-q4
|
|
BASELINE ?= hermes3:latest
|
|
OLLAMA_URL ?= http://localhost:11434
|
|
OUTPUT ?= output
|
|
|
|
# ── Training ──────────────────────────────────────────────────────────
|
|
|
|
train-cloud: ## QLoRA fine-tune on cloud GPU (Axolotl)
|
|
axolotl train axolotl.yaml
|
|
|
|
train-local: ## LoRA fine-tune on Apple Silicon (MLX)
|
|
python -m mlx_lm.lora --config mlx-lora.yaml
|
|
|
|
# ── Evaluation ────────────────────────────────────────────────────────
|
|
|
|
eval: ## Run standard benchmarks against Ollama model
|
|
lm_eval --model local-completions \
|
|
--model_args "model=$(MODEL),base_url=$(OLLAMA_URL)/v1,tokenized_requests=False" \
|
|
--tasks hellaswag,truthfulqa_mc2,arc_challenge,winogrande \
|
|
--output_path evals_archive/$(MODEL)/
|
|
@echo "Results in evals_archive/$(MODEL)/"
|
|
|
|
eval-baseline: ## Run same benchmarks against baseline for comparison
|
|
lm_eval --model local-completions \
|
|
--model_args "model=$(BASELINE),base_url=$(OLLAMA_URL)/v1,tokenized_requests=False" \
|
|
--tasks hellaswag,truthfulqa_mc2,arc_challenge,winogrande \
|
|
--output_path evals_archive/$(BASELINE)/
|
|
|
|
vibes: ## Run vibes check — hand-picked prompts, human review
|
|
@echo "=== Vibes Check: $(MODEL) ==="
|
|
@echo "Date: $$(date '+%Y-%m-%d %H:%M')" > $(OUTPUT)/vibes-$(MODEL).md
|
|
@echo "Model: $(MODEL)" >> $(OUTPUT)/vibes-$(MODEL).md
|
|
@echo "" >> $(OUTPUT)/vibes-$(MODEL).md
|
|
@python -c "\
|
|
import yaml, subprocess, sys; \
|
|
prompts = yaml.safe_load(open('data/prompts_vibes.yaml'))['prompts']; \
|
|
f = open('$(OUTPUT)/vibes-$(MODEL).md', 'a'); \
|
|
[(\
|
|
sys.stdout.write(f\" [{p['id']}] {p['category']}...\"), \
|
|
sys.stdout.flush(), \
|
|
f.write(f\"## [{p['id']}] {p['category']}\n\"), \
|
|
f.write(f\"PROMPT: {p['prompt']}\n\"), \
|
|
f.write(f\"EXPECTED: {p['expected']}\n\n\"), \
|
|
f.write('RESPONSE:\n'), \
|
|
f.write(subprocess.run( \
|
|
['ollama', 'run', '$(MODEL)', p['prompt']], \
|
|
capture_output=True, text=True, timeout=120 \
|
|
).stdout), \
|
|
f.write('\n\nSCORE: ___/5\n\n---\n\n'), \
|
|
print(' done') \
|
|
) for p in prompts]; \
|
|
f.close()"
|
|
@echo "Output: $(OUTPUT)/vibes-$(MODEL).md — fill in scores manually."
|
|
|
|
# ── Data Pipeline ─────────────────────────────────────────────────────
|
|
|
|
ingest: ## Pull heartbeat trajectories into training data
|
|
python ingest_trajectories.py \
|
|
--trajectories ~/.nexus/trajectories/ \
|
|
--curated data/curated_dataset.jsonl \
|
|
--output data/merged_training_data.jsonl
|
|
@echo "Merged dataset ready. Convert for MLX with: make convert"
|
|
|
|
curated: ## Regenerate curated exemplar dataset
|
|
python build_curated.py
|
|
@echo "Curated dataset regenerated."
|
|
|
|
convert: ## Convert merged dataset to MLX format (train/valid split)
|
|
@mkdir -p data/mlx_curated
|
|
python -c "\
|
|
import json; \
|
|
lines = open('data/merged_training_data.jsonl').readlines(); \
|
|
sessions = [json.loads(l) for l in lines]; \
|
|
ROLE_MAP = {'system':'system','human':'user','gpt':'assistant','tool':'user'}; \
|
|
converted = [{'messages': [{'role': ROLE_MAP.get(t.get('from',''),'user'), 'content': t.get('value','')} for t in s.get('conversations',[])]} for s in sessions]; \
|
|
split = max(1, int(len(converted)*0.9)); \
|
|
open('data/mlx_curated/train.jsonl','w').writelines(json.dumps(c)+'\n' for c in converted[:split]); \
|
|
open('data/mlx_curated/valid.jsonl','w').writelines(json.dumps(c)+'\n' for c in converted[split:]); \
|
|
print(f'train: {split}, valid: {len(converted)-split}')"
|
|
|
|
# ── Helpers ───────────────────────────────────────────────────────────
|
|
|
|
.PHONY: train-cloud train-local eval eval-baseline vibes ingest curated convert help
|
|
|
|
help: ## Show this help
|
|
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | \
|
|
awk 'BEGIN {FS = ":.*?## "}; {printf " \033[36m%-16s\033[0m %s\n", $$1, $$2}'
|