Compare commits
2 Commits
step35/55-
...
fix/139-qu
| Author | SHA1 | Date | |
|---|---|---|---|
| 48dfa5d0f7 | |||
| 1c6c3defe9 |
@@ -37,6 +37,7 @@ class QuantLevel:
|
|||||||
layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7)
|
layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7)
|
||||||
kv_type: str # -ctk/-ctv flag value
|
kv_type: str # -ctk/-ctv flag value
|
||||||
min_memory_headroom_gb: float # Minimum free memory to recommend this level
|
min_memory_headroom_gb: float # Minimum free memory to recommend this level
|
||||||
|
quality_score: int = 0 # Explicit quality ordering (higher = better)
|
||||||
description: str = ""
|
description: str = ""
|
||||||
|
|
||||||
|
|
||||||
@@ -50,6 +51,7 @@ QUANT_LEVELS = [
|
|||||||
layer_adaptive=7,
|
layer_adaptive=7,
|
||||||
kv_type="turbo4",
|
kv_type="turbo4",
|
||||||
min_memory_headroom_gb=4.0,
|
min_memory_headroom_gb=4.0,
|
||||||
|
quality_score=100,
|
||||||
description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression."
|
description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression."
|
||||||
),
|
),
|
||||||
QuantLevel(
|
QuantLevel(
|
||||||
@@ -60,6 +62,7 @@ QUANT_LEVELS = [
|
|||||||
layer_adaptive=5,
|
layer_adaptive=5,
|
||||||
kv_type="turbo3",
|
kv_type="turbo3",
|
||||||
min_memory_headroom_gb=3.0,
|
min_memory_headroom_gb=3.0,
|
||||||
|
quality_score=80,
|
||||||
description="3-bit TurboQuant. High quality, ~6x KV compression."
|
description="3-bit TurboQuant. High quality, ~6x KV compression."
|
||||||
),
|
),
|
||||||
QuantLevel(
|
QuantLevel(
|
||||||
@@ -70,6 +73,7 @@ QUANT_LEVELS = [
|
|||||||
layer_adaptive=3,
|
layer_adaptive=3,
|
||||||
kv_type="turbo2",
|
kv_type="turbo2",
|
||||||
min_memory_headroom_gb=2.0,
|
min_memory_headroom_gb=2.0,
|
||||||
|
quality_score=60,
|
||||||
description="2-bit TurboQuant. Balanced, ~10x KV compression."
|
description="2-bit TurboQuant. Balanced, ~10x KV compression."
|
||||||
),
|
),
|
||||||
QuantLevel(
|
QuantLevel(
|
||||||
@@ -80,6 +84,7 @@ QUANT_LEVELS = [
|
|||||||
layer_adaptive=0,
|
layer_adaptive=0,
|
||||||
kv_type="q4_0",
|
kv_type="q4_0",
|
||||||
min_memory_headroom_gb=1.5,
|
min_memory_headroom_gb=1.5,
|
||||||
|
quality_score=40,
|
||||||
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
|
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -22,7 +22,10 @@ class TestQuantLevels:
|
|||||||
def test_levels_ordered_by_quality(self):
|
def test_levels_ordered_by_quality(self):
|
||||||
"""Levels should be ordered from best quality to most aggressive."""
|
"""Levels should be ordered from best quality to most aggressive."""
|
||||||
for i in range(len(QUANT_LEVELS) - 1):
|
for i in range(len(QUANT_LEVELS) - 1):
|
||||||
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel
|
# Use quality_score for explicit quality ordering
|
||||||
|
# (bits_per_channel doesn't always correlate with quality:
|
||||||
|
# q4_0 has 4.0 bits but lower quality than turbo2 with 1.5 bits)
|
||||||
|
assert QUANT_LEVELS[i].quality_score > QUANT_LEVELS[i + 1].quality_score
|
||||||
|
|
||||||
def test_all_levels_have_required_fields(self):
|
def test_all_levels_have_required_fields(self):
|
||||||
for level in QUANT_LEVELS:
|
for level in QUANT_LEVELS:
|
||||||
@@ -32,6 +35,7 @@ class TestQuantLevels:
|
|||||||
assert level.quality_label
|
assert level.quality_label
|
||||||
assert level.layer_adaptive >= 0
|
assert level.layer_adaptive >= 0
|
||||||
assert level.kv_type
|
assert level.kv_type
|
||||||
|
assert level.quality_score > 0
|
||||||
|
|
||||||
|
|
||||||
class TestKVEstimate:
|
class TestKVEstimate:
|
||||||
|
|||||||
Reference in New Issue
Block a user