#!/usr/bin/env python3 """Tests for quant_selector.py""" import sys import os import pytest from unittest.mock import patch, MagicMock sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from evolution.quant_selector import ( QuantLevel, HardwareInfo, QUANT_LEVELS, detect_hardware, estimate_kv_cache_gb, estimate_model_memory_gb, select_quant_level, ) class TestQuantLevels: def test_levels_ordered_by_quality(self): """TurboQuant levels should be ordered from best quality to most aggressive. The quality ordering invariant for TurboQuant levels is monotonically increasing compression_ratio (more aggressive = more compression). Non-TurboQuant fallbacks (e.g. q4_0) are placed after all TurboQuant levels and may have any compression ratio — they exist as safe defaults, not as part of the quality progression. """ turbo_quant_names = {"turbo4", "turbo3", "turbo2"} turbo_levels = [l for l in QUANT_LEVELS if l.name in turbo_quant_names] for i in range(len(turbo_levels) - 1): assert turbo_levels[i].compression_ratio <= turbo_levels[i + 1].compression_ratio, ( f"TurboQuant {turbo_levels[i].name} (compression={turbo_levels[i].compression_ratio}x) " f"should have <= compression than {turbo_levels[i+1].name} " f"(compression={turbo_levels[i+1].compression_ratio}x)" ) def test_fallback_quant_is_last(self): """Non-TurboQuant fallbacks (e.g. q4_0) should be at the end of the list.""" turbo_quant_names = {"turbo4", "turbo3", "turbo2"} found_fallback = False for level in QUANT_LEVELS: if level.name not in turbo_quant_names: found_fallback = True elif found_fallback: pytest.fail( f"TurboQuant level '{level.name}' appears after a fallback level. " f"All TurboQuant levels must precede fallbacks." ) def test_all_levels_have_required_fields(self): for level in QUANT_LEVELS: assert level.name assert level.bits_per_channel > 0 assert level.compression_ratio > 1 assert level.quality_label assert level.layer_adaptive >= 0 assert level.kv_type class TestKVEstimate: def test_basic_estimate(self): # 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5) assert kv_gb > 0 assert kv_gb < 10 # Should be reasonable def test_longer_context_larger(self): kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5) kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5) assert kv_128k > kv_32k def test_higher_bits_larger(self): kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0) kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0) assert kv_4b > kv_2b class TestHardwareDetection: def test_detect_returns_info(self): hw = detect_hardware() assert hw.total_memory_gb > 0 assert hw.available_memory_gb > 0 assert hw.detection_method @patch("evolution.quant_selector.platform.system", return_value="Linux") @patch("builtins.open", create=True) def test_linux_detection(self, mock_open, mock_system): mock_open.return_value.__enter__().read.return_value = ( "MemTotal: 32000000 kB\n" "MemAvailable: 24000000 kB\n" ) hw = _detect_linux_fallback() assert hw.total_memory_gb > 20 def _detect_linux_fallback(): """Helper to test Linux detection with mocked /proc/meminfo.""" from evolution.quant_selector import _detect_linux return _detect_linux() class TestSelection: def test_selects_turbo4_for_large_memory(self): """With plenty of memory, should pick turbo4 (best quality).""" with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=64, available_memory_gb=48, gpu_memory_gb=64, gpu_name="Test GPU", cpu_cores=16, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=32768) assert sel.level.name == "turbo4" assert sel.headroom_gb > 0 def test_selects_smaller_for_tight_memory(self): """With tight memory, should pick a smaller quant.""" with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=16, available_memory_gb=12, gpu_memory_gb=16, gpu_name="Test GPU", cpu_cores=8, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=131072) # Should pick a smaller quant for 128K context on 16GB assert sel.level.bits_per_channel <= 4.0 def test_preferred_level(self): """User can force a specific level.""" with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=64, available_memory_gb=48, cpu_cores=16, detection_method="mock", ) sel = select_quant_level( model_size_gb=14.0, context_length=32768, preferred_level="turbo2" ) assert sel.level.name == "turbo2" def test_env_vars_populated(self): with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=64, available_memory_gb=48, cpu_cores=16, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=32768) assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars assert "-ctk" in sel.server_flags assert "-ctv" in sel.server_flags def test_warnings_on_low_headroom(self): with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=18, available_memory_gb=14, gpu_memory_gb=18, gpu_name="Test GPU", cpu_cores=8, detection_method="mock", ) sel = select_quant_level(model_size_gb=16.0, context_length=65536) assert len(sel.warnings) > 0 def test_reasoning_contains_key_info(self): with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=32, available_memory_gb=24, is_apple_silicon=True, chip_name="M4 Max", cpu_cores=16, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=32768) assert "turbo4" in sel.reasoning assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning