22 lines
453 B
YAML
22 lines
453 B
YAML
# MLX DPO Training Configuration for Hermes 4 (3B Class)
|
|
# Optimized for Apple Silicon execution with max reactivity.
|
|
|
|
model: "NousResearch/Hermes-4-3B"
|
|
train: true
|
|
|
|
# Use the curated DPO preference pairs dataset
|
|
data: "data/"
|
|
|
|
# Output adapter configuration
|
|
adapter_path: "adapters/dpo_3b_adapter"
|
|
save_every: 200
|
|
|
|
# DPO parameters
|
|
loss: "dpo"
|
|
iters: 1000
|
|
batch_size: 2
|
|
lora_layers: 16
|
|
learning_rate: 1e-5
|
|
lora_parameters:
|
|
keys: ['q_proj', 'v_proj']
|