From eea73f1ce5d20b1f86a27954f6ad49590502c6d1 Mon Sep 17 00:00:00 2001
From: Alexander Whitestone <alexander@alexanderwhitestone.com>
Date: Tue, 21 Apr 2026 11:19:54 +0000
Subject: [PATCH] fix: reorder QUANT_LEVELS so q4_0 (4.0b) comes before turbo2
 (1.5b)

q4_0 has higher bits_per_channel (4.0 vs 1.5), meaning higher quality.
The selection logic iterates QUANT_LEVELS in order, picking the first
level that fits available memory. q4_0 should be tried before turbo2
because it produces better quality output when memory allows.

Closes #139
---
 evolution/quant_selector.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/evolution/quant_selector.py b/evolution/quant_selector.py
index 73a5442c..294c8a12 100644
--- a/evolution/quant_selector.py
+++ b/evolution/quant_selector.py
@@ -62,16 +62,6 @@ QUANT_LEVELS = [
         min_memory_headroom_gb=3.0,
         description="3-bit TurboQuant. High quality, ~6x KV compression."
     ),
-    QuantLevel(
-        name="turbo2",
-        bits_per_channel=1.5,
-        compression_ratio=10.0,
-        quality_label="balanced",
-        layer_adaptive=3,
-        kv_type="turbo2",
-        min_memory_headroom_gb=2.0,
-        description="2-bit TurboQuant. Balanced, ~10x KV compression."
-    ),
     QuantLevel(
         name="q4_0",
         bits_per_channel=4.0,
@@ -82,6 +72,16 @@ QUANT_LEVELS = [
         min_memory_headroom_gb=1.5,
         description="Standard 4-bit quant. Fast fallback, no TurboQuant."
     ),
+    QuantLevel(
+        name="turbo2",
+        bits_per_channel=1.5,
+        compression_ratio=10.0,
+        quality_label="balanced",
+        layer_adaptive=3,
+        kv_type="turbo2",
+        min_memory_headroom_gb=2.0,
+        description="2-bit TurboQuant. Balanced, ~10x KV compression."
+    ),
 ]