diff --git a/evolution/quant_selector.py b/evolution/quant_selector.py index 73a5442c..294c8a12 100644 --- a/evolution/quant_selector.py +++ b/evolution/quant_selector.py @@ -62,16 +62,6 @@ QUANT_LEVELS = [ min_memory_headroom_gb=3.0, description="3-bit TurboQuant. High quality, ~6x KV compression." ), - QuantLevel( - name="turbo2", - bits_per_channel=1.5, - compression_ratio=10.0, - quality_label="balanced", - layer_adaptive=3, - kv_type="turbo2", - min_memory_headroom_gb=2.0, - description="2-bit TurboQuant. Balanced, ~10x KV compression." - ), QuantLevel( name="q4_0", bits_per_channel=4.0, @@ -82,6 +72,16 @@ QUANT_LEVELS = [ min_memory_headroom_gb=1.5, description="Standard 4-bit quant. Fast fallback, no TurboQuant." ), + QuantLevel( + name="turbo2", + bits_per_channel=1.5, + compression_ratio=10.0, + quality_label="balanced", + layer_adaptive=3, + kv_type="turbo2", + min_memory_headroom_gb=2.0, + description="2-bit TurboQuant. Balanced, ~10x KV compression." + ), ]