diff --git a/tests/test_quant_selector.py b/tests/test_quant_selector.py new file mode 100644 index 00000000..5447d143 --- /dev/null +++ b/tests/test_quant_selector.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +"""Tests for quant_selector.py""" + +import sys +import os +import pytest +from unittest.mock import patch, MagicMock + +sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) +from evolution.quant_selector import ( + QuantLevel, + HardwareInfo, + QUANT_LEVELS, + detect_hardware, + estimate_kv_cache_gb, + estimate_model_memory_gb, + select_quant_level, +) + + +class TestQuantLevels: + def test_levels_ordered_by_quality(self): + """Levels should be ordered from best quality to most aggressive.""" + for i in range(len(QUANT_LEVELS) - 1): + assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel + + def test_all_levels_have_required_fields(self): + for level in QUANT_LEVELS: + assert level.name + assert level.bits_per_channel > 0 + assert level.compression_ratio > 1 + assert level.quality_label + assert level.layer_adaptive >= 0 + assert level.kv_type + + +class TestKVEstimate: + def test_basic_estimate(self): + # 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits + kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5) + assert kv_gb > 0 + assert kv_gb < 10 # Should be reasonable + + def test_longer_context_larger(self): + kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5) + kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5) + assert kv_128k > kv_32k + + def test_higher_bits_larger(self): + kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0) + kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0) + assert kv_4b > kv_2b + + +class TestHardwareDetection: + def test_detect_returns_info(self): + hw = detect_hardware() + assert hw.total_memory_gb > 0 + assert hw.available_memory_gb > 0 + assert hw.detection_method + + @patch("evolution.quant_selector.platform.system", return_value="Linux") + @patch("builtins.open", create=True) + def test_linux_detection(self, mock_open, mock_system): + mock_open.return_value.__enter__().read.return_value = ( + "MemTotal: 32000000 kB\n" + "MemAvailable: 24000000 kB\n" + ) + hw = _detect_linux_fallback() + assert hw.total_memory_gb > 20 + + +def _detect_linux_fallback(): + """Helper to test Linux detection with mocked /proc/meminfo.""" + from evolution.quant_selector import _detect_linux + return _detect_linux() + + +class TestSelection: + def test_selects_turbo4_for_large_memory(self): + """With plenty of memory, should pick turbo4 (best quality).""" + with patch("evolution.quant_selector.detect_hardware") as mock_hw: + mock_hw.return_value = HardwareInfo( + total_memory_gb=64, + available_memory_gb=48, + gpu_memory_gb=64, + gpu_name="Test GPU", + cpu_cores=16, + detection_method="mock", + ) + sel = select_quant_level(model_size_gb=14.0, context_length=32768) + assert sel.level.name == "turbo4" + assert sel.headroom_gb > 0 + + def test_selects_smaller_for_tight_memory(self): + """With tight memory, should pick a smaller quant.""" + with patch("evolution.quant_selector.detect_hardware") as mock_hw: + mock_hw.return_value = HardwareInfo( + total_memory_gb=16, + available_memory_gb=12, + gpu_memory_gb=16, + gpu_name="Test GPU", + cpu_cores=8, + detection_method="mock", + ) + sel = select_quant_level(model_size_gb=14.0, context_length=131072) + # Should pick a smaller quant for 128K context on 16GB + assert sel.level.bits_per_channel <= 4.0 + + def test_preferred_level(self): + """User can force a specific level.""" + with patch("evolution.quant_selector.detect_hardware") as mock_hw: + mock_hw.return_value = HardwareInfo( + total_memory_gb=64, + available_memory_gb=48, + cpu_cores=16, + detection_method="mock", + ) + sel = select_quant_level( + model_size_gb=14.0, context_length=32768, + preferred_level="turbo2" + ) + assert sel.level.name == "turbo2" + + def test_env_vars_populated(self): + with patch("evolution.quant_selector.detect_hardware") as mock_hw: + mock_hw.return_value = HardwareInfo( + total_memory_gb=64, + available_memory_gb=48, + cpu_cores=16, + detection_method="mock", + ) + sel = select_quant_level(model_size_gb=14.0, context_length=32768) + assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars + assert "-ctk" in sel.server_flags + assert "-ctv" in sel.server_flags + + def test_warnings_on_low_headroom(self): + with patch("evolution.quant_selector.detect_hardware") as mock_hw: + mock_hw.return_value = HardwareInfo( + total_memory_gb=18, + available_memory_gb=14, + gpu_memory_gb=18, + gpu_name="Test GPU", + cpu_cores=8, + detection_method="mock", + ) + sel = select_quant_level(model_size_gb=16.0, context_length=65536) + assert len(sel.warnings) > 0 + + def test_reasoning_contains_key_info(self): + with patch("evolution.quant_selector.detect_hardware") as mock_hw: + mock_hw.return_value = HardwareInfo( + total_memory_gb=32, + available_memory_gb=24, + is_apple_silicon=True, + chip_name="M4 Max", + cpu_cores=16, + detection_method="mock", + ) + sel = select_quant_level(model_size_gb=14.0, context_length=32768) + assert "turbo4" in sel.reasoning + assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning