tests/test_quant_selector.py

#!/usr/bin/env python3
"""Tests for quant_selector.py"""

import sys
import os
import pytest
from unittest.mock import patch, MagicMock

sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from evolution.quant_selector import (
    QuantLevel,
    HardwareInfo,
    QUANT_LEVELS,
    detect_hardware,
    estimate_kv_cache_gb,
    estimate_model_memory_gb,
    select_quant_level,
)


class TestQuantLevels:
    def test_levels_keep_turboquant_quality_order_with_q4_fallback_last(self):
        """TurboQuant levels should lead, with q4_0 reserved as the non-Turbo fallback."""
        names = [level.name for level in QUANT_LEVELS]
        assert names[:3] == ["turbo4", "turbo3", "turbo2"]
        assert names[-1] == "q4_0"

    def test_all_levels_have_required_fields(self):
        for level in QUANT_LEVELS:
            assert level.name
            assert level.bits_per_channel > 0
            assert level.compression_ratio > 1
            assert level.quality_label
            assert level.layer_adaptive >= 0
            assert level.kv_type


class TestKVEstimate:
    def test_basic_estimate(self):
        # 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits
        kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
        assert kv_gb > 0
        assert kv_gb < 10  # Should be reasonable

    def test_longer_context_larger(self):
        kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
        kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5)
        assert kv_128k > kv_32k

    def test_higher_bits_larger(self):
        kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0)
        kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0)
        assert kv_4b > kv_2b


class TestHardwareDetection:
    def test_detect_returns_info(self):
        hw = detect_hardware()
        assert hw.total_memory_gb > 0
        assert hw.available_memory_gb > 0
        assert hw.detection_method

    @patch("evolution.quant_selector.platform.system", return_value="Linux")
    @patch("builtins.open", create=True)
    def test_linux_detection(self, mock_open, mock_system):
        mock_open.return_value.__enter__().read.return_value = (
            "MemTotal:       32000000 kB\n"
            "MemAvailable:   24000000 kB\n"
        )
        hw = _detect_linux_fallback()
        assert hw.total_memory_gb > 20


def _detect_linux_fallback():
    """Helper to test Linux detection with mocked /proc/meminfo."""
    from evolution.quant_selector import _detect_linux
    return _detect_linux()


class TestSelection:
    def test_selects_turbo4_for_large_memory(self):
        """With plenty of memory, should pick turbo4 (best quality)."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                gpu_memory_gb=64,
                gpu_name="Test GPU",
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert sel.level.name == "turbo4"
            assert sel.headroom_gb > 0

    def test_selects_smaller_for_tight_memory(self):
        """With tight memory, should pick a smaller quant."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=16,
                available_memory_gb=12,
                gpu_memory_gb=16,
                gpu_name="Test GPU",
                cpu_cores=8,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=131072)
            # Should pick a smaller quant for 128K context on 16GB
            assert sel.level.bits_per_channel <= 4.0

    def test_preferred_level(self):
        """User can force a specific level."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(
                model_size_gb=14.0, context_length=32768,
                preferred_level="turbo2"
            )
            assert sel.level.name == "turbo2"

    def test_env_vars_populated(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars
            assert "-ctk" in sel.server_flags
            assert "-ctv" in sel.server_flags

    def test_warnings_on_low_headroom(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=18,
                available_memory_gb=14,
                gpu_memory_gb=18,
                gpu_name="Test GPU",
                cpu_cores=8,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=16.0, context_length=65536)
            assert len(sel.warnings) > 0

    def test_falls_back_to_turbo2_when_nothing_fits(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=8,
                available_memory_gb=6,
                gpu_memory_gb=8,
                gpu_name="Tiny GPU",
                cpu_cores=4,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=16.0, context_length=131072)
            assert sel.level.name == "turbo2"

    def test_reasoning_contains_key_info(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=32,
                available_memory_gb=24,
                is_apple_silicon=True,
                chip_name="M4 Max",
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert "turbo4" in sel.reasoning
            assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning
test: quant selector tests (#81) 2026-04-15 15:04:41 +00:00			`#!/usr/bin/env python3`
			`"""Tests for quant_selector.py"""`

			`import sys`
			`import os`
			`import pytest`
			`from unittest.mock import patch, MagicMock`

			`sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))`
			`from evolution.quant_selector import (`
			`QuantLevel,`
			`HardwareInfo,`
			`QUANT_LEVELS,`
			`detect_hardware,`
			`estimate_kv_cache_gb,`
			`estimate_model_memory_gb,`
			`select_quant_level,`
			`)`


			`class TestQuantLevels:`
feat: add Apple Silicon DFlash benchmark planner (refs #152) 2026-04-21 22:00:22 -04:00			`def test_levels_keep_turboquant_quality_order_with_q4_fallback_last(self):`
			`"""TurboQuant levels should lead, with q4_0 reserved as the non-Turbo fallback."""`
			`names = [level.name for level in QUANT_LEVELS]`
			`assert names[:3] == ["turbo4", "turbo3", "turbo2"]`
			`assert names[-1] == "q4_0"`
test: quant selector tests (#81) 2026-04-15 15:04:41 +00:00
			`def test_all_levels_have_required_fields(self):`
			`for level in QUANT_LEVELS:`
			`assert level.name`
			`assert level.bits_per_channel > 0`
			`assert level.compression_ratio > 1`
			`assert level.quality_label`
			`assert level.layer_adaptive >= 0`
			`assert level.kv_type`


			`class TestKVEstimate:`
			`def test_basic_estimate(self):`
			`# 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits`
			`kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)`
			`assert kv_gb > 0`
			`assert kv_gb < 10 # Should be reasonable`

			`def test_longer_context_larger(self):`
			`kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)`
			`kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5)`
			`assert kv_128k > kv_32k`

			`def test_higher_bits_larger(self):`
			`kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0)`
			`kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0)`
			`assert kv_4b > kv_2b`


			`class TestHardwareDetection:`
			`def test_detect_returns_info(self):`
			`hw = detect_hardware()`
			`assert hw.total_memory_gb > 0`
			`assert hw.available_memory_gb > 0`
			`assert hw.detection_method`

			`@patch("evolution.quant_selector.platform.system", return_value="Linux")`
			`@patch("builtins.open", create=True)`
			`def test_linux_detection(self, mock_open, mock_system):`
			`mock_open.return_value.__enter__().read.return_value = (`
			`"MemTotal: 32000000 kB\n"`
			`"MemAvailable: 24000000 kB\n"`
			`)`
			`hw = _detect_linux_fallback()`
			`assert hw.total_memory_gb > 20`


			`def _detect_linux_fallback():`
			`"""Helper to test Linux detection with mocked /proc/meminfo."""`
			`from evolution.quant_selector import _detect_linux`
			`return _detect_linux()`


			`class TestSelection:`
			`def test_selects_turbo4_for_large_memory(self):`
			`"""With plenty of memory, should pick turbo4 (best quality)."""`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=64,`
			`available_memory_gb=48,`
			`gpu_memory_gb=64,`
			`gpu_name="Test GPU",`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=32768)`
			`assert sel.level.name == "turbo4"`
			`assert sel.headroom_gb > 0`

			`def test_selects_smaller_for_tight_memory(self):`
			`"""With tight memory, should pick a smaller quant."""`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=16,`
			`available_memory_gb=12,`
			`gpu_memory_gb=16,`
			`gpu_name="Test GPU",`
			`cpu_cores=8,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=131072)`
			`# Should pick a smaller quant for 128K context on 16GB`
			`assert sel.level.bits_per_channel <= 4.0`

			`def test_preferred_level(self):`
			`"""User can force a specific level."""`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=64,`
			`available_memory_gb=48,`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(`
			`model_size_gb=14.0, context_length=32768,`
			`preferred_level="turbo2"`
			`)`
			`assert sel.level.name == "turbo2"`

			`def test_env_vars_populated(self):`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=64,`
			`available_memory_gb=48,`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=32768)`
			`assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars`
			`assert "-ctk" in sel.server_flags`
			`assert "-ctv" in sel.server_flags`

			`def test_warnings_on_low_headroom(self):`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=18,`
			`available_memory_gb=14,`
			`gpu_memory_gb=18,`
			`gpu_name="Test GPU",`
			`cpu_cores=8,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=16.0, context_length=65536)`
			`assert len(sel.warnings) > 0`

feat: add Apple Silicon DFlash benchmark planner (refs #152) 2026-04-21 22:00:22 -04:00			`def test_falls_back_to_turbo2_when_nothing_fits(self):`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=8,`
			`available_memory_gb=6,`
			`gpu_memory_gb=8,`
			`gpu_name="Tiny GPU",`
			`cpu_cores=4,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=16.0, context_length=131072)`
			`assert sel.level.name == "turbo2"`

test: quant selector tests (#81) 2026-04-15 15:04:41 +00:00			`def test_reasoning_contains_key_info(self):`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=32,`
			`available_memory_gb=24,`
			`is_apple_silicon=True,`
			`chip_name="M4 Max",`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=32768)`
			`assert "turbo4" in sel.reasoning`
			`assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning`