From 2811ef66717d82733387f1f47c570520cc260fff Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 21 Apr 2026 11:26:26 +0000 Subject: [PATCH] fix: add quality_score to QuantLevel for explicit quality ordering Closes #138 The test assumed bits_per_channel correlates with quality, but q4_0 (4.0 bits) has lower quality than turbo2 (1.5 bits). Added explicit quality_score field to make quality ordering unambiguous. --- evolution/quant_selector.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/evolution/quant_selector.py b/evolution/quant_selector.py index 73a5442c..6aed74c6 100644 --- a/evolution/quant_selector.py +++ b/evolution/quant_selector.py @@ -37,6 +37,7 @@ class QuantLevel: layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7) kv_type: str # -ctk/-ctv flag value min_memory_headroom_gb: float # Minimum free memory to recommend this level + quality_score: int = 0 # Explicit quality ordering (higher = better) description: str = "" @@ -50,6 +51,7 @@ QUANT_LEVELS = [ layer_adaptive=7, kv_type="turbo4", min_memory_headroom_gb=4.0, + quality_score=100, description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression." ), QuantLevel( @@ -60,6 +62,7 @@ QUANT_LEVELS = [ layer_adaptive=5, kv_type="turbo3", min_memory_headroom_gb=3.0, + quality_score=80, description="3-bit TurboQuant. High quality, ~6x KV compression." ), QuantLevel( @@ -70,6 +73,7 @@ QUANT_LEVELS = [ layer_adaptive=3, kv_type="turbo2", min_memory_headroom_gb=2.0, + quality_score=60, description="2-bit TurboQuant. Balanced, ~10x KV compression." ), QuantLevel( @@ -80,6 +84,7 @@ QUANT_LEVELS = [ layer_adaptive=0, kv_type="q4_0", min_memory_headroom_gb=1.5, + quality_score=40, description="Standard 4-bit quant. Fast fallback, no TurboQuant." ), ]