Compare commits
3 Commits
step35/104
...
fix/139-qu
| Author | SHA1 | Date | |
|---|---|---|---|
| 6671018acf | |||
| eea73f1ce5 | |||
| 04fa60a53d |
@@ -62,16 +62,6 @@ QUANT_LEVELS = [
|
|||||||
min_memory_headroom_gb=3.0,
|
min_memory_headroom_gb=3.0,
|
||||||
description="3-bit TurboQuant. High quality, ~6x KV compression."
|
description="3-bit TurboQuant. High quality, ~6x KV compression."
|
||||||
),
|
),
|
||||||
QuantLevel(
|
|
||||||
name="turbo2",
|
|
||||||
bits_per_channel=1.5,
|
|
||||||
compression_ratio=10.0,
|
|
||||||
quality_label="balanced",
|
|
||||||
layer_adaptive=3,
|
|
||||||
kv_type="turbo2",
|
|
||||||
min_memory_headroom_gb=2.0,
|
|
||||||
description="2-bit TurboQuant. Balanced, ~10x KV compression."
|
|
||||||
),
|
|
||||||
QuantLevel(
|
QuantLevel(
|
||||||
name="q4_0",
|
name="q4_0",
|
||||||
bits_per_channel=4.0,
|
bits_per_channel=4.0,
|
||||||
@@ -82,6 +72,16 @@ QUANT_LEVELS = [
|
|||||||
min_memory_headroom_gb=1.5,
|
min_memory_headroom_gb=1.5,
|
||||||
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
|
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
|
||||||
),
|
),
|
||||||
|
QuantLevel(
|
||||||
|
name="turbo2",
|
||||||
|
bits_per_channel=1.5,
|
||||||
|
compression_ratio=10.0,
|
||||||
|
quality_label="balanced",
|
||||||
|
layer_adaptive=3,
|
||||||
|
kv_type="turbo2",
|
||||||
|
min_memory_headroom_gb=2.0,
|
||||||
|
description="2-bit TurboQuant. Balanced, ~10x KV compression."
|
||||||
|
),
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -20,9 +20,17 @@ from evolution.quant_selector import (
|
|||||||
|
|
||||||
class TestQuantLevels:
|
class TestQuantLevels:
|
||||||
def test_levels_ordered_by_quality(self):
|
def test_levels_ordered_by_quality(self):
|
||||||
"""Levels should be ordered from best quality to most aggressive."""
|
"""Levels should be ordered from highest quality (most bits) to most aggressive.
|
||||||
|
|
||||||
|
The selection logic iterates QUANT_LEVELS in order and picks the first
|
||||||
|
level that fits in available memory. So higher bits_per_channel (better
|
||||||
|
quality) must come first, regardless of whether it's TurboQuant or standard.
|
||||||
|
"""
|
||||||
for i in range(len(QUANT_LEVELS) - 1):
|
for i in range(len(QUANT_LEVELS) - 1):
|
||||||
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel
|
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel, (
|
||||||
|
f"{QUANT_LEVELS[i].name} ({QUANT_LEVELS[i].bits_per_channel}b) should come "
|
||||||
|
f"before {QUANT_LEVELS[i+1].name} ({QUANT_LEVELS[i+1].bits_per_channel}b)"
|
||||||
|
)
|
||||||
|
|
||||||
def test_all_levels_have_required_fields(self):
|
def test_all_levels_have_required_fields(self):
|
||||||
for level in QUANT_LEVELS:
|
for level in QUANT_LEVELS:
|
||||||
|
|||||||
Reference in New Issue
Block a user