fix: add quality_score to QuantLevel for explicit quality ordering
Refs #139, closes #138 The test assumed bits_per_channel correlates with quality, but q4_0 (4.0 bits) has lower quality than turbo2 (1.5 bits). Added explicit quality_score field to make quality ordering unambiguous.
This commit is contained in:
@@ -37,6 +37,7 @@ class QuantLevel:
|
||||
layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7)
|
||||
kv_type: str # -ctk/-ctv flag value
|
||||
min_memory_headroom_gb: float # Minimum free memory to recommend this level
|
||||
quality_score: int = 0 # Explicit quality ordering (higher = better)
|
||||
description: str = ""
|
||||
|
||||
|
||||
@@ -50,6 +51,7 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=7,
|
||||
kv_type="turbo4",
|
||||
min_memory_headroom_gb=4.0,
|
||||
quality_score=100,
|
||||
description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression."
|
||||
),
|
||||
QuantLevel(
|
||||
@@ -60,6 +62,7 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=5,
|
||||
kv_type="turbo3",
|
||||
min_memory_headroom_gb=3.0,
|
||||
quality_score=80,
|
||||
description="3-bit TurboQuant. High quality, ~6x KV compression."
|
||||
),
|
||||
QuantLevel(
|
||||
@@ -70,6 +73,7 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=3,
|
||||
kv_type="turbo2",
|
||||
min_memory_headroom_gb=2.0,
|
||||
quality_score=60,
|
||||
description="2-bit TurboQuant. Balanced, ~10x KV compression."
|
||||
),
|
||||
QuantLevel(
|
||||
@@ -80,6 +84,7 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=0,
|
||||
kv_type="q4_0",
|
||||
min_memory_headroom_gb=1.5,
|
||||
quality_score=40,
|
||||
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
|
||||
),
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user