Compare commits

...

2 Commits

Author SHA1 Message Date
93a27078aa fix: quant selector quality-order assertion (#139)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 15s
2026-04-21 11:25:20 +00:00
fc67bd7085 fix: quant selector quality-order assertion (#139)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 14s
2026-04-21 11:25:16 +00:00
2 changed files with 20 additions and 12 deletions

View File

@@ -62,16 +62,6 @@ QUANT_LEVELS = [
min_memory_headroom_gb=3.0,
description="3-bit TurboQuant. High quality, ~6x KV compression."
),
QuantLevel(
name="turbo2",
bits_per_channel=1.5,
compression_ratio=10.0,
quality_label="balanced",
layer_adaptive=3,
kv_type="turbo2",
min_memory_headroom_gb=2.0,
description="2-bit TurboQuant. Balanced, ~10x KV compression."
),
QuantLevel(
name="q4_0",
bits_per_channel=4.0,
@@ -82,6 +72,16 @@ QUANT_LEVELS = [
min_memory_headroom_gb=1.5,
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
),
QuantLevel(
name="turbo2",
bits_per_channel=1.5,
compression_ratio=10.0,
quality_label="balanced",
layer_adaptive=3,
kv_type="turbo2",
min_memory_headroom_gb=2.0,
description="2-bit TurboQuant. Balanced, ~10x KV compression."
),
]

View File

@@ -20,9 +20,17 @@ from evolution.quant_selector import (
class TestQuantLevels:
def test_levels_ordered_by_quality(self):
"""Levels should be ordered from best quality to most aggressive."""
"""Levels should be ordered from highest quality (most bits) to most aggressive.
The selection logic iterates QUANT_LEVELS in order and picks the first
level that fits in available memory. So higher bits_per_channel (better
quality) must come first, regardless of whether it's TurboQuant or standard.
"""
for i in range(len(QUANT_LEVELS) - 1):
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel, (
f"{QUANT_LEVELS[i].name} ({QUANT_LEVELS[i].bits_per_channel}b) should come "
f"before {QUANT_LEVELS[i+1].name} ({QUANT_LEVELS[i+1].bits_per_channel}b)"
)
def test_all_levels_have_required_fields(self):
for level in QUANT_LEVELS: