v1.0 rejected — NaN from wrong tokenizer, Morrowind MCP pipeline working
This commit is contained in:
@@ -55,7 +55,8 @@ adapters:
|
|||||||
timmy-v1.0:
|
timmy-v1.0:
|
||||||
base: hermes4-14b-4bit
|
base: hermes4-14b-4bit
|
||||||
date: 2026-03-26
|
date: 2026-03-26
|
||||||
status: training
|
status: rejected
|
||||||
data: 1125 train / 126 valid (same curated set, reused)
|
data: 1125 train / 126 valid (same curated set, reused from 8B — NOT re-tokenized)
|
||||||
training: { lr: 1e-6, rank: 16, iters: 800 }
|
training: { lr: 1e-6, rank: 16, iters: 800 }
|
||||||
notes: "First 14B adapter. Conservative lr for new arch."
|
eval: "Val NaN iter 100, train NaN iter 160. Dead."
|
||||||
|
notes: "Data was pre-truncated for Llama3 tokenizer, not Qwen3. Must re-run clean_data.py with 14B tokenizer before v1.1."
|
||||||
|
|||||||
Reference in New Issue
Block a user