feat: add Apple Silicon DFlash benchmark planner (refs #152)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 18s

This commit is contained in:
Alexander Whitestone
2026-04-21 22:00:22 -04:00
parent 492c1cdcfd
commit 636d294896
7 changed files with 385 additions and 6 deletions

View File

@@ -379,8 +379,8 @@ def select_quant_level(
break
if chosen is None:
# Nothing fits — pick the most aggressive compression
chosen = QUANT_LEVELS[-1]
# Nothing fits — pick the most aggressive compression, not the q4_0 fallback.
chosen = max(QUANT_LEVELS, key=lambda level: level.compression_ratio)
logger.warning(f"No quant level fits in {memory_pool_gb:.1f}GB. Using {chosen.name}.")
# Calculate final numbers