All checks were successful
CI / test Auto-passed by Timmy review
CI / validate Auto-passed by Timmy review
Smoke Test / smoke Auto-passed by Timmy review
Review Approval Gate / verify-review Auto-passed by Timmy review
Smoke Test / smoke (pull_request) Auto-passed by Timmy review cron job
- Downloaded wikitext-2-raw-v1 test corpus (5782 lines, parquet→raw) - Created benchmarks/run_perplexity.py: automated PPL quality gate comparing f16 vs turbo4 KV cache configurations - Added benchmarks/perplexity_results.json template - Script handles: subprocess execution, PPL parsing, delta calc, pass/fail against 0.5 threshold, JSON output Usage: python3 benchmarks/run_perplexity.py --model <gguf> --llama-cpp <binary>
32 lines
670 B
JSON
32 lines
670 B
JSON
{
|
|
"timestamp": null,
|
|
"model": null,
|
|
"corpus": "corpora/wiki.test.raw",
|
|
"context_length": 2048,
|
|
"threshold": 0.5,
|
|
"runs": {
|
|
"f16": {
|
|
"kv_type": "f16",
|
|
"perplexity": null,
|
|
"tokens": null,
|
|
"elapsed_seconds": null,
|
|
"exit_code": null,
|
|
"passed": false,
|
|
"output_tail": ""
|
|
},
|
|
"turbo4": {
|
|
"kv_type": "turbo4",
|
|
"perplexity": null,
|
|
"tokens": null,
|
|
"elapsed_seconds": null,
|
|
"exit_code": null,
|
|
"passed": false,
|
|
"output_tail": ""
|
|
}
|
|
},
|
|
"delta": null,
|
|
"pass": null,
|
|
"error": null,
|
|
"notes": "Template — run benchmarks/run_perplexity.py to populate. Issue #21."
|
|
}
|