diff --git a/training/configs/dpo_8b.yaml b/training/configs/dpo_8b.yaml index fa97b8b0..1599c01d 100644 --- a/training/configs/dpo_8b.yaml +++ b/training/configs/dpo_8b.yaml @@ -1,14 +1,14 @@ # MLX DPO Training Configuration for Hermes 4 (8B Class) # Optimized for Apple Silicon execution (daily driver capability). -model: "NousResearch/Hermes-4-8B" +model: "mlx-community/Hermes-3-Llama-3.1-8B-4bit" train: true # Use the curated DPO preference pairs dataset -data: "data/" +data: "autolora/data/dpo/" # Output adapter configuration -adapter_path: "adapters/dpo_8b_adapter" +adapter_path: "autolora/adapters/dpo-8b-adapter" save_every: 200 # DPO parameters