#!/usr/bin/env python3 """Tests for quant_selector.py""" import sys import os import pytest from unittest.mock import patch, MagicMock sys.path.insert(0, os.path.dirname(os.path.dirname(__file__))) from evolution.quant_selector import ( QuantLevel, HardwareInfo, QUANT_LEVELS, detect_hardware, estimate_kv_cache_gb, estimate_model_memory_gb, select_quant_level, ) class TestQuantLevels: def test_levels_keep_turboquant_quality_order_with_q4_fallback_last(self): """TurboQuant levels should lead, with q4_0 reserved as the non-Turbo fallback.""" names = [level.name for level in QUANT_LEVELS] assert names[:3] == ["turbo4", "turbo3", "turbo2"] assert names[-1] == "q4_0" def test_all_levels_have_required_fields(self): for level in QUANT_LEVELS: assert level.name assert level.bits_per_channel > 0 assert level.compression_ratio > 1 assert level.quality_label assert level.layer_adaptive >= 0 assert level.kv_type class TestKVEstimate: def test_basic_estimate(self): # 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5) assert kv_gb > 0 assert kv_gb < 10 # Should be reasonable def test_longer_context_larger(self): kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5) kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5) assert kv_128k > kv_32k def test_higher_bits_larger(self): kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0) kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0) assert kv_4b > kv_2b class TestHardwareDetection: def test_detect_returns_info(self): hw = detect_hardware() assert hw.total_memory_gb > 0 assert hw.available_memory_gb > 0 assert hw.detection_method @patch("evolution.quant_selector.platform.system", return_value="Linux") @patch("builtins.open", create=True) def test_linux_detection(self, mock_open, mock_system): mock_open.return_value.__enter__().read.return_value = ( "MemTotal: 32000000 kB\n" "MemAvailable: 24000000 kB\n" ) hw = _detect_linux_fallback() assert hw.total_memory_gb > 20 def _detect_linux_fallback(): """Helper to test Linux detection with mocked /proc/meminfo.""" from evolution.quant_selector import _detect_linux return _detect_linux() class TestSelection: def test_selects_turbo4_for_large_memory(self): """With plenty of memory, should pick turbo4 (best quality).""" with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=64, available_memory_gb=48, gpu_memory_gb=64, gpu_name="Test GPU", cpu_cores=16, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=32768) assert sel.level.name == "turbo4" assert sel.headroom_gb > 0 def test_selects_smaller_for_tight_memory(self): """With tight memory, should pick a smaller quant.""" with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=16, available_memory_gb=12, gpu_memory_gb=16, gpu_name="Test GPU", cpu_cores=8, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=131072) # Should pick a smaller quant for 128K context on 16GB assert sel.level.bits_per_channel <= 4.0 def test_preferred_level(self): """User can force a specific level.""" with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=64, available_memory_gb=48, cpu_cores=16, detection_method="mock", ) sel = select_quant_level( model_size_gb=14.0, context_length=32768, preferred_level="turbo2" ) assert sel.level.name == "turbo2" def test_env_vars_populated(self): with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=64, available_memory_gb=48, cpu_cores=16, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=32768) assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars assert "-ctk" in sel.server_flags assert "-ctv" in sel.server_flags def test_warnings_on_low_headroom(self): with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=18, available_memory_gb=14, gpu_memory_gb=18, gpu_name="Test GPU", cpu_cores=8, detection_method="mock", ) sel = select_quant_level(model_size_gb=16.0, context_length=65536) assert len(sel.warnings) > 0 def test_falls_back_to_turbo2_when_nothing_fits(self): with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=8, available_memory_gb=6, gpu_memory_gb=8, gpu_name="Tiny GPU", cpu_cores=4, detection_method="mock", ) sel = select_quant_level(model_size_gb=16.0, context_length=131072) assert sel.level.name == "turbo2" def test_reasoning_contains_key_info(self): with patch("evolution.quant_selector.detect_hardware") as mock_hw: mock_hw.return_value = HardwareInfo( total_memory_gb=32, available_memory_gb=24, is_apple_silicon=True, chip_name="M4 Max", cpu_cores=16, detection_method="mock", ) sel = select_quant_level(model_size_gb=14.0, context_length=32768) assert "turbo4" in sel.reasoning assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning