Files
timmy-config/training/configs/dpo_32b.yaml
2026-03-25 19:35:58 -04:00

22 lines
447 B
YAML

# MLX DPO Training Configuration for Hermes 4 (32B Class)
# Optimized for 64GB+ Apple Silicon hardware limit.
model: "NousResearch/Hermes-4-32B"
train: true
# Use the curated DPO preference pairs dataset
data: "data/"
# Output adapter configuration
adapter_path: "adapters/dpo_32b_adapter"
save_every: 200
# DPO parameters
loss: "dpo"
iters: 1000
batch_size: 1
lora_layers: 16
learning_rate: 5e-6
lora_parameters:
keys: ['q_proj', 'v_proj']