test: quant selector tests (#81)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 12s
All checks were successful
Smoke Test / smoke (pull_request) Successful in 12s
This commit is contained in:
163
tests/test_quant_selector.py
Normal file
163
tests/test_quant_selector.py
Normal file
@@ -0,0 +1,163 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for quant_selector.py"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import pytest
|
||||
from unittest.mock import patch, MagicMock
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
||||
from evolution.quant_selector import (
|
||||
QuantLevel,
|
||||
HardwareInfo,
|
||||
QUANT_LEVELS,
|
||||
detect_hardware,
|
||||
estimate_kv_cache_gb,
|
||||
estimate_model_memory_gb,
|
||||
select_quant_level,
|
||||
)
|
||||
|
||||
|
||||
class TestQuantLevels:
|
||||
def test_levels_ordered_by_quality(self):
|
||||
"""Levels should be ordered from best quality to most aggressive."""
|
||||
for i in range(len(QUANT_LEVELS) - 1):
|
||||
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel
|
||||
|
||||
def test_all_levels_have_required_fields(self):
|
||||
for level in QUANT_LEVELS:
|
||||
assert level.name
|
||||
assert level.bits_per_channel > 0
|
||||
assert level.compression_ratio > 1
|
||||
assert level.quality_label
|
||||
assert level.layer_adaptive >= 0
|
||||
assert level.kv_type
|
||||
|
||||
|
||||
class TestKVEstimate:
|
||||
def test_basic_estimate(self):
|
||||
# 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits
|
||||
kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
|
||||
assert kv_gb > 0
|
||||
assert kv_gb < 10 # Should be reasonable
|
||||
|
||||
def test_longer_context_larger(self):
|
||||
kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
|
||||
kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5)
|
||||
assert kv_128k > kv_32k
|
||||
|
||||
def test_higher_bits_larger(self):
|
||||
kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0)
|
||||
kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0)
|
||||
assert kv_4b > kv_2b
|
||||
|
||||
|
||||
class TestHardwareDetection:
|
||||
def test_detect_returns_info(self):
|
||||
hw = detect_hardware()
|
||||
assert hw.total_memory_gb > 0
|
||||
assert hw.available_memory_gb > 0
|
||||
assert hw.detection_method
|
||||
|
||||
@patch("evolution.quant_selector.platform.system", return_value="Linux")
|
||||
@patch("builtins.open", create=True)
|
||||
def test_linux_detection(self, mock_open, mock_system):
|
||||
mock_open.return_value.__enter__().read.return_value = (
|
||||
"MemTotal: 32000000 kB\n"
|
||||
"MemAvailable: 24000000 kB\n"
|
||||
)
|
||||
hw = _detect_linux_fallback()
|
||||
assert hw.total_memory_gb > 20
|
||||
|
||||
|
||||
def _detect_linux_fallback():
|
||||
"""Helper to test Linux detection with mocked /proc/meminfo."""
|
||||
from evolution.quant_selector import _detect_linux
|
||||
return _detect_linux()
|
||||
|
||||
|
||||
class TestSelection:
|
||||
def test_selects_turbo4_for_large_memory(self):
|
||||
"""With plenty of memory, should pick turbo4 (best quality)."""
|
||||
with patch("evolution.quant_selector.detect_hardware") as mock_hw:
|
||||
mock_hw.return_value = HardwareInfo(
|
||||
total_memory_gb=64,
|
||||
available_memory_gb=48,
|
||||
gpu_memory_gb=64,
|
||||
gpu_name="Test GPU",
|
||||
cpu_cores=16,
|
||||
detection_method="mock",
|
||||
)
|
||||
sel = select_quant_level(model_size_gb=14.0, context_length=32768)
|
||||
assert sel.level.name == "turbo4"
|
||||
assert sel.headroom_gb > 0
|
||||
|
||||
def test_selects_smaller_for_tight_memory(self):
|
||||
"""With tight memory, should pick a smaller quant."""
|
||||
with patch("evolution.quant_selector.detect_hardware") as mock_hw:
|
||||
mock_hw.return_value = HardwareInfo(
|
||||
total_memory_gb=16,
|
||||
available_memory_gb=12,
|
||||
gpu_memory_gb=16,
|
||||
gpu_name="Test GPU",
|
||||
cpu_cores=8,
|
||||
detection_method="mock",
|
||||
)
|
||||
sel = select_quant_level(model_size_gb=14.0, context_length=131072)
|
||||
# Should pick a smaller quant for 128K context on 16GB
|
||||
assert sel.level.bits_per_channel <= 4.0
|
||||
|
||||
def test_preferred_level(self):
|
||||
"""User can force a specific level."""
|
||||
with patch("evolution.quant_selector.detect_hardware") as mock_hw:
|
||||
mock_hw.return_value = HardwareInfo(
|
||||
total_memory_gb=64,
|
||||
available_memory_gb=48,
|
||||
cpu_cores=16,
|
||||
detection_method="mock",
|
||||
)
|
||||
sel = select_quant_level(
|
||||
model_size_gb=14.0, context_length=32768,
|
||||
preferred_level="turbo2"
|
||||
)
|
||||
assert sel.level.name == "turbo2"
|
||||
|
||||
def test_env_vars_populated(self):
|
||||
with patch("evolution.quant_selector.detect_hardware") as mock_hw:
|
||||
mock_hw.return_value = HardwareInfo(
|
||||
total_memory_gb=64,
|
||||
available_memory_gb=48,
|
||||
cpu_cores=16,
|
||||
detection_method="mock",
|
||||
)
|
||||
sel = select_quant_level(model_size_gb=14.0, context_length=32768)
|
||||
assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars
|
||||
assert "-ctk" in sel.server_flags
|
||||
assert "-ctv" in sel.server_flags
|
||||
|
||||
def test_warnings_on_low_headroom(self):
|
||||
with patch("evolution.quant_selector.detect_hardware") as mock_hw:
|
||||
mock_hw.return_value = HardwareInfo(
|
||||
total_memory_gb=18,
|
||||
available_memory_gb=14,
|
||||
gpu_memory_gb=18,
|
||||
gpu_name="Test GPU",
|
||||
cpu_cores=8,
|
||||
detection_method="mock",
|
||||
)
|
||||
sel = select_quant_level(model_size_gb=16.0, context_length=65536)
|
||||
assert len(sel.warnings) > 0
|
||||
|
||||
def test_reasoning_contains_key_info(self):
|
||||
with patch("evolution.quant_selector.detect_hardware") as mock_hw:
|
||||
mock_hw.return_value = HardwareInfo(
|
||||
total_memory_gb=32,
|
||||
available_memory_gb=24,
|
||||
is_apple_silicon=True,
|
||||
chip_name="M4 Max",
|
||||
cpu_cores=16,
|
||||
detection_method="mock",
|
||||
)
|
||||
sel = select_quant_level(model_size_gb=14.0, context_length=32768)
|
||||
assert "turbo4" in sel.reasoning
|
||||
assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning
|
||||
Reference in New Issue
Block a user