Per direction shift (the-nexus#542). Replaces the autolora repo (1,500 lines of custom pipeline code) with config files for existing tools: - axolotl.yaml: replaces train_modal.py (239 lines) - mlx-lora.yaml: replaces MLX training scripts - eval-tasks.yaml: replaces run_eval.py (300 lines) - Makefile: replaces run_vibes.py, compare.py, convert_to_mlx.py Data migrated as-is: - curated_dataset.jsonl (26 gold-standard conversations) - preference_pairs.jsonl (DPO pairs) - prompts_vibes.yaml, prompts_nexus_vibes.yaml - v0-baseline eval results (historical record) Thin glue kept: - build_curated.py (data authoring, not infrastructure) - ingest_trajectories.py (domain-specific quality filter) Dependencies: pip install axolotl mlx-lm lm-evaluation-harness
62 lines
1.4 KiB
YAML
62 lines
1.4 KiB
YAML
# AutoLoRA Training Config — Axolotl
|
|
# Replaces: autolora/train_modal.py (239 lines)
|
|
#
|
|
# Usage:
|
|
# axolotl train training/axolotl.yaml # cloud GPU
|
|
# mlx_lm.lora --config training/mlx-lora.yaml # Apple Silicon (see mlx-lora.yaml)
|
|
#
|
|
# Cost: A100-40GB ~$1.10/hr, training ~30-60 min = $0.55-1.10
|
|
|
|
base_model: NousResearch/Hermes-4-14B
|
|
model_type: AutoModelForCausalLM
|
|
trust_remote_code: true
|
|
|
|
# QLoRA — 4-bit quantized training
|
|
load_in_4bit: true
|
|
adapter: qlora
|
|
lora_r: 16
|
|
lora_alpha: 32
|
|
lora_dropout: 0.05
|
|
lora_target_linear: true
|
|
# Explicit targets (same as autolora train_modal.py):
|
|
# lora_target_modules:
|
|
# - q_proj
|
|
# - k_proj
|
|
# - v_proj
|
|
# - o_proj
|
|
# - gate_proj
|
|
# - up_proj
|
|
# - down_proj
|
|
|
|
# Dataset — ShareGPT format (same as autolora curated data)
|
|
datasets:
|
|
- path: data/curated_dataset.jsonl
|
|
type: sharegpt
|
|
conversation: chatml # Hermes uses ChatML
|
|
|
|
val_set_size: 0.1
|
|
|
|
# Training params (matched to autolora/train_modal.py)
|
|
sequence_len: 2048
|
|
micro_batch_size: 1
|
|
gradient_accumulation_steps: 8
|
|
num_epochs: 3
|
|
learning_rate: 2e-4
|
|
lr_scheduler: cosine
|
|
warmup_ratio: 0.05
|
|
optimizer: paged_adamw_8bit
|
|
max_grad_norm: 0.3
|
|
bf16: true
|
|
|
|
# Output
|
|
output_dir: ./output/hermes4-14b-timmy
|
|
save_strategy: epoch
|
|
save_total_limit: 2
|
|
logging_steps: 5
|
|
|
|
# DPO (when ready — uncomment and point to preference data)
|
|
# rl: dpo
|
|
# datasets:
|
|
# - path: data/preference_pairs.jsonl
|
|
# type: chatml.default
|