22 lines
497 B
YAML
22 lines
497 B
YAML
# MLX DPO Training Configuration for Hermes 4 (8B Class)
|
|
# Optimized for Apple Silicon execution (daily driver capability).
|
|
|
|
model: "mlx-community/Hermes-3-Llama-3.1-8B-4bit"
|
|
train: true
|
|
|
|
# Use the curated DPO preference pairs dataset
|
|
data: "autolora/data/dpo/"
|
|
|
|
# Output adapter configuration
|
|
adapter_path: "autolora/adapters/dpo-8b-adapter"
|
|
save_every: 200
|
|
|
|
# DPO parameters
|
|
loss: "dpo"
|
|
iters: 1000
|
|
batch_size: 2
|
|
lora_layers: 16
|
|
learning_rate: 1e-5
|
|
lora_parameters:
|
|
keys: ['q_proj', 'v_proj']
|