Compare commits

...

2 Commits

Author SHA1 Message Date
48dfa5d0f7 fix: update test to use quality_score for quality ordering
All checks were successful
Smoke Test / smoke (pull_request) Successful in 20s
Closes #139, closes #138

The test assumed bits_per_channel correlates with quality, but q4_0 (4.0 bits)
has lower quality than turbo2 (1.5 bits). Updated test to use explicit
quality_score field instead.
2026-04-21 11:21:45 +00:00
1c6c3defe9 fix: add quality_score to QuantLevel for explicit quality ordering
Refs #139, closes #138

The test assumed bits_per_channel correlates with quality, but q4_0 (4.0 bits)
has lower quality than turbo2 (1.5 bits). Added explicit quality_score field
to make quality ordering unambiguous.
2026-04-21 11:20:26 +00:00
2 changed files with 10 additions and 1 deletions

View File

@@ -37,6 +37,7 @@ class QuantLevel:
layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7) layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7)
kv_type: str # -ctk/-ctv flag value kv_type: str # -ctk/-ctv flag value
min_memory_headroom_gb: float # Minimum free memory to recommend this level min_memory_headroom_gb: float # Minimum free memory to recommend this level
quality_score: int = 0 # Explicit quality ordering (higher = better)
description: str = "" description: str = ""
@@ -50,6 +51,7 @@ QUANT_LEVELS = [
layer_adaptive=7, layer_adaptive=7,
kv_type="turbo4", kv_type="turbo4",
min_memory_headroom_gb=4.0, min_memory_headroom_gb=4.0,
quality_score=100,
description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression." description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression."
), ),
QuantLevel( QuantLevel(
@@ -60,6 +62,7 @@ QUANT_LEVELS = [
layer_adaptive=5, layer_adaptive=5,
kv_type="turbo3", kv_type="turbo3",
min_memory_headroom_gb=3.0, min_memory_headroom_gb=3.0,
quality_score=80,
description="3-bit TurboQuant. High quality, ~6x KV compression." description="3-bit TurboQuant. High quality, ~6x KV compression."
), ),
QuantLevel( QuantLevel(
@@ -70,6 +73,7 @@ QUANT_LEVELS = [
layer_adaptive=3, layer_adaptive=3,
kv_type="turbo2", kv_type="turbo2",
min_memory_headroom_gb=2.0, min_memory_headroom_gb=2.0,
quality_score=60,
description="2-bit TurboQuant. Balanced, ~10x KV compression." description="2-bit TurboQuant. Balanced, ~10x KV compression."
), ),
QuantLevel( QuantLevel(
@@ -80,6 +84,7 @@ QUANT_LEVELS = [
layer_adaptive=0, layer_adaptive=0,
kv_type="q4_0", kv_type="q4_0",
min_memory_headroom_gb=1.5, min_memory_headroom_gb=1.5,
quality_score=40,
description="Standard 4-bit quant. Fast fallback, no TurboQuant." description="Standard 4-bit quant. Fast fallback, no TurboQuant."
), ),
] ]

View File

@@ -22,7 +22,10 @@ class TestQuantLevels:
def test_levels_ordered_by_quality(self): def test_levels_ordered_by_quality(self):
"""Levels should be ordered from best quality to most aggressive.""" """Levels should be ordered from best quality to most aggressive."""
for i in range(len(QUANT_LEVELS) - 1): for i in range(len(QUANT_LEVELS) - 1):
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel # Use quality_score for explicit quality ordering
# (bits_per_channel doesn't always correlate with quality:
# q4_0 has 4.0 bits but lower quality than turbo2 with 1.5 bits)
assert QUANT_LEVELS[i].quality_score > QUANT_LEVELS[i + 1].quality_score
def test_all_levels_have_required_fields(self): def test_all_levels_have_required_fields(self):
for level in QUANT_LEVELS: for level in QUANT_LEVELS:
@@ -32,6 +35,7 @@ class TestQuantLevels:
assert level.quality_label assert level.quality_label
assert level.layer_adaptive >= 0 assert level.layer_adaptive >= 0
assert level.kv_type assert level.kv_type
assert level.quality_score > 0
class TestKVEstimate: class TestKVEstimate: