Files
timmy-config/training/configs/dpo_8b.yaml
2026-03-25 21:03:57 -04:00

22 lines
497 B
YAML

# MLX DPO Training Configuration for Hermes 4 (8B Class)
# Optimized for Apple Silicon execution (daily driver capability).
model: "mlx-community/Hermes-3-Llama-3.1-8B-4bit"
train: true
# Use the curated DPO preference pairs dataset
data: "autolora/data/dpo/"
# Output adapter configuration
adapter_path: "autolora/adapters/dpo-8b-adapter"
save_every: 200
# DPO parameters
loss: "dpo"
iters: 1000
batch_size: 2
lora_layers: 16
learning_rate: 1e-5
lora_parameters:
keys: ['q_proj', 'v_proj']