From eea73f1ce5d20b1f86a27954f6ad49590502c6d1 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:19:54 +0000 Subject: [PATCH] fix: reorder QUANT_LEVELS so q4_0 (4.0b) comes before turbo2 (1.5b) q4_0 has higher bits_per_channel (4.0 vs 1.5), meaning higher quality. The selection logic iterates QUANT_LEVELS in order, picking the first level that fits available memory. q4_0 should be tried before turbo2 because it produces better quality output when memory allows. Closes #139 --- evolution/quant_selector.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/evolution/quant_selector.py b/evolution/quant_selector.py index 73a5442c..294c8a12 100644 --- a/evolution/quant_selector.py +++ b/evolution/quant_selector.py @@ -62,16 +62,6 @@ QUANT_LEVELS = [ min_memory_headroom_gb=3.0, description="3-bit TurboQuant. High quality, ~6x KV compression." ), - QuantLevel( - name="turbo2", - bits_per_channel=1.5, - compression_ratio=10.0, - quality_label="balanced", - layer_adaptive=3, - kv_type="turbo2", - min_memory_headroom_gb=2.0, - description="2-bit TurboQuant. Balanced, ~10x KV compression." - ), QuantLevel( name="q4_0", bits_per_channel=4.0, @@ -82,6 +72,16 @@ QUANT_LEVELS = [ min_memory_headroom_gb=1.5, description="Standard 4-bit quant. Fast fallback, no TurboQuant." ), + QuantLevel( + name="turbo2", + bits_per_channel=1.5, + compression_ratio=10.0, + quality_label="balanced", + layer_adaptive=3, + kv_type="turbo2", + min_memory_headroom_gb=2.0, + description="2-bit TurboQuant. Balanced, ~10x KV compression." + ), ]