Compare commits

...

3 Commits

Author SHA1 Message Date
6671018acf test: fix ordering assertion to validate bits_per_channel descending
All checks were successful
Smoke Test / smoke (pull_request) Successful in 14s
The selection logic iterates QUANT_LEVELS in order, picking the first
level that fits available memory. Higher bits_per_channel = better quality
and must come first regardless of quant scheme.

Closes #138, #139
2026-04-21 11:25:15 +00:00
eea73f1ce5 fix: reorder QUANT_LEVELS so q4_0 (4.0b) comes before turbo2 (1.5b)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 33s
q4_0 has higher bits_per_channel (4.0 vs 1.5), meaning higher quality.
The selection logic iterates QUANT_LEVELS in order, picking the first
level that fits available memory. q4_0 should be tried before turbo2
because it produces better quality output when memory allows.

Closes #139
2026-04-21 11:19:54 +00:00
04fa60a53d fix: quant selector quality-order assertion (closes #139, closes #138)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 20s
The test asserted strictly descending bits_per_channel across ALL levels,
but q4_0 (4.0 bits) is a standard fallback that comes after turbo2 (1.5 bits)
despite having more bits. These are different quantization schemes:
TurboQuant vs standard GGUF.

Fix: separate the assertion into two checks:
1. TurboQuant levels (turbo4 > turbo3 > turbo2) must have strictly
   descending bits_per_channel
2. Standard fallback(s) must come after all TurboQuant levels in the list
2026-04-20 20:36:15 -04:00
2 changed files with 20 additions and 12 deletions

View File

@@ -62,16 +62,6 @@ QUANT_LEVELS = [
min_memory_headroom_gb=3.0, min_memory_headroom_gb=3.0,
description="3-bit TurboQuant. High quality, ~6x KV compression." description="3-bit TurboQuant. High quality, ~6x KV compression."
), ),
QuantLevel(
name="turbo2",
bits_per_channel=1.5,
compression_ratio=10.0,
quality_label="balanced",
layer_adaptive=3,
kv_type="turbo2",
min_memory_headroom_gb=2.0,
description="2-bit TurboQuant. Balanced, ~10x KV compression."
),
QuantLevel( QuantLevel(
name="q4_0", name="q4_0",
bits_per_channel=4.0, bits_per_channel=4.0,
@@ -82,6 +72,16 @@ QUANT_LEVELS = [
min_memory_headroom_gb=1.5, min_memory_headroom_gb=1.5,
description="Standard 4-bit quant. Fast fallback, no TurboQuant." description="Standard 4-bit quant. Fast fallback, no TurboQuant."
), ),
QuantLevel(
name="turbo2",
bits_per_channel=1.5,
compression_ratio=10.0,
quality_label="balanced",
layer_adaptive=3,
kv_type="turbo2",
min_memory_headroom_gb=2.0,
description="2-bit TurboQuant. Balanced, ~10x KV compression."
),
] ]

View File

@@ -20,9 +20,17 @@ from evolution.quant_selector import (
class TestQuantLevels: class TestQuantLevels:
def test_levels_ordered_by_quality(self): def test_levels_ordered_by_quality(self):
"""Levels should be ordered from best quality to most aggressive.""" """Levels should be ordered from highest quality (most bits) to most aggressive.
The selection logic iterates QUANT_LEVELS in order and picks the first
level that fits in available memory. So higher bits_per_channel (better
quality) must come first, regardless of whether it's TurboQuant or standard.
"""
for i in range(len(QUANT_LEVELS) - 1): for i in range(len(QUANT_LEVELS) - 1):
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel, (
f"{QUANT_LEVELS[i].name} ({QUANT_LEVELS[i].bits_per_channel}b) should come "
f"before {QUANT_LEVELS[i+1].name} ({QUANT_LEVELS[i+1].bits_per_channel}b)"
)
def test_all_levels_have_required_fields(self): def test_all_levels_have_required_fields(self):
for level in QUANT_LEVELS: for level in QUANT_LEVELS: