tests/test_quant_selector.py

#!/usr/bin/env python3
"""Tests for quant_selector.py"""

import sys
import os
import pytest
from unittest.mock import patch, MagicMock

sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from evolution.quant_selector import (
    QuantLevel,
    HardwareInfo,
    QUANT_LEVELS,
    detect_hardware,
    estimate_kv_cache_gb,
    estimate_model_memory_gb,
    select_quant_level,
)


class TestQuantLevels:
    def test_levels_ordered_by_quality(self):
        """TurboQuant levels should be ordered from best quality to most aggressive.

        The quality ordering invariant for TurboQuant levels is monotonically
        increasing compression_ratio (more aggressive = more compression).
        Non-TurboQuant fallbacks (e.g. q4_0) are placed after all TurboQuant
        levels and may have any compression ratio — they exist as safe defaults,
        not as part of the quality progression.
        """
        turbo_quant_names = {"turbo4", "turbo3", "turbo2"}
        turbo_levels = [l for l in QUANT_LEVELS if l.name in turbo_quant_names]
        for i in range(len(turbo_levels) - 1):
            assert turbo_levels[i].compression_ratio <= turbo_levels[i + 1].compression_ratio, (
                f"TurboQuant {turbo_levels[i].name} (compression={turbo_levels[i].compression_ratio}x) "
                f"should have <= compression than {turbo_levels[i+1].name} "
                f"(compression={turbo_levels[i+1].compression_ratio}x)"
            )

    def test_fallback_quant_is_last(self):
        """Non-TurboQuant fallbacks (e.g. q4_0) should be at the end of the list."""
        turbo_quant_names = {"turbo4", "turbo3", "turbo2"}
        found_fallback = False
        for level in QUANT_LEVELS:
            if level.name not in turbo_quant_names:
                found_fallback = True
            elif found_fallback:
                pytest.fail(
                    f"TurboQuant level '{level.name}' appears after a fallback level. "
                    f"All TurboQuant levels must precede fallbacks."
                )

    def test_all_levels_have_required_fields(self):
        for level in QUANT_LEVELS:
            assert level.name
            assert level.bits_per_channel > 0
            assert level.compression_ratio > 1
            assert level.quality_label
            assert level.layer_adaptive >= 0
            assert level.kv_type


class TestKVEstimate:
    def test_basic_estimate(self):
        # 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits
        kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
        assert kv_gb > 0
        assert kv_gb < 10  # Should be reasonable

    def test_longer_context_larger(self):
        kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
        kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5)
        assert kv_128k > kv_32k

    def test_higher_bits_larger(self):
        kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0)
        kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0)
        assert kv_4b > kv_2b


class TestHardwareDetection:
    def test_detect_returns_info(self):
        hw = detect_hardware()
        assert hw.total_memory_gb > 0
        assert hw.available_memory_gb > 0
        assert hw.detection_method

    @patch("evolution.quant_selector.platform.system", return_value="Linux")
    @patch("builtins.open", create=True)
    def test_linux_detection(self, mock_open, mock_system):
        mock_open.return_value.__enter__().read.return_value = (
            "MemTotal:       32000000 kB\n"
            "MemAvailable:   24000000 kB\n"
        )
        hw = _detect_linux_fallback()
        assert hw.total_memory_gb > 20


def _detect_linux_fallback():
    """Helper to test Linux detection with mocked /proc/meminfo."""
    from evolution.quant_selector import _detect_linux
    return _detect_linux()


class TestSelection:
    def test_selects_turbo4_for_large_memory(self):
        """With plenty of memory, should pick turbo4 (best quality)."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                gpu_memory_gb=64,
                gpu_name="Test GPU",
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert sel.level.name == "turbo4"
            assert sel.headroom_gb > 0

    def test_selects_smaller_for_tight_memory(self):
        """With tight memory, should pick a smaller quant."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=16,
                available_memory_gb=12,
                gpu_memory_gb=16,
                gpu_name="Test GPU",
                cpu_cores=8,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=131072)
            # Should pick a smaller quant for 128K context on 16GB
            assert sel.level.bits_per_channel <= 4.0

    def test_preferred_level(self):
        """User can force a specific level."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(
                model_size_gb=14.0, context_length=32768,
                preferred_level="turbo2"
            )
            assert sel.level.name == "turbo2"

    def test_env_vars_populated(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars
            assert "-ctk" in sel.server_flags
            assert "-ctv" in sel.server_flags

    def test_warnings_on_low_headroom(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=18,
                available_memory_gb=14,
                gpu_memory_gb=18,
                gpu_name="Test GPU",
                cpu_cores=8,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=16.0, context_length=65536)
            assert len(sel.warnings) > 0

    def test_reasoning_contains_key_info(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=32,
                available_memory_gb=24,
                is_apple_silicon=True,
                chip_name="M4 Max",
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert "turbo4" in sel.reasoning
            assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning
test: quant selector tests (#81) 2026-04-15 15:04:41 +00:00			`#!/usr/bin/env python3`
			`"""Tests for quant_selector.py"""`

			`import sys`
			`import os`
			`import pytest`
			`from unittest.mock import patch, MagicMock`

			`sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))`
			`from evolution.quant_selector import (`
			`QuantLevel,`
			`HardwareInfo,`
			`QUANT_LEVELS,`
			`detect_hardware,`
			`estimate_kv_cache_gb,`
			`estimate_model_memory_gb,`
			`select_quant_level,`
			`)`


			`class TestQuantLevels:`
			`def test_levels_ordered_by_quality(self):`
fix(tests): quant_selector quality-order assertion matches design intent (#138, #139) The test `test_levels_ordered_by_quality` asserted strictly descending `bits_per_channel`, but `q4_0` (4.0 bits) is a non-TurboQuant fallback placed last regardless of bit width. The design invariant is: - TurboQuant levels (turbo4→turbo2): ordered by compression_ratio ascending (more aggressive = more compression) - Fallback levels (q4_0): placed after all TurboQuant levels as safe defaults, not part of the quality progression Changes: - `test_levels_ordered_by_quality`: Now validates compression_ratio ordering for TurboQuant levels only, not across fallbacks - `test_fallback_quant_is_last`: New test ensuring non-TurboQuant fallbacks always appear after TurboQuant levels Closes #138 Closes #139 (duplicate) 2026-04-21 07:25:52 -04:00			`"""TurboQuant levels should be ordered from best quality to most aggressive.`

			`The quality ordering invariant for TurboQuant levels is monotonically`
			`increasing compression_ratio (more aggressive = more compression).`
			`Non-TurboQuant fallbacks (e.g. q4_0) are placed after all TurboQuant`
			`levels and may have any compression ratio — they exist as safe defaults,`
			`not as part of the quality progression.`
			`"""`
			`turbo_quant_names = {"turbo4", "turbo3", "turbo2"}`
			`turbo_levels = [l for l in QUANT_LEVELS if l.name in turbo_quant_names]`
			`for i in range(len(turbo_levels) - 1):`
			`assert turbo_levels[i].compression_ratio <= turbo_levels[i + 1].compression_ratio, (`
			`f"TurboQuant {turbo_levels[i].name} (compression={turbo_levels[i].compression_ratio}x) "`
			`f"should have <= compression than {turbo_levels[i+1].name} "`
			`f"(compression={turbo_levels[i+1].compression_ratio}x)"`
			`)`

			`def test_fallback_quant_is_last(self):`
			`"""Non-TurboQuant fallbacks (e.g. q4_0) should be at the end of the list."""`
			`turbo_quant_names = {"turbo4", "turbo3", "turbo2"}`
			`found_fallback = False`
			`for level in QUANT_LEVELS:`
			`if level.name not in turbo_quant_names:`
			`found_fallback = True`
			`elif found_fallback:`
			`pytest.fail(`
			`f"TurboQuant level '{level.name}' appears after a fallback level. "`
			`f"All TurboQuant levels must precede fallbacks."`
			`)`
test: quant selector tests (#81) 2026-04-15 15:04:41 +00:00
			`def test_all_levels_have_required_fields(self):`
			`for level in QUANT_LEVELS:`
			`assert level.name`
			`assert level.bits_per_channel > 0`
			`assert level.compression_ratio > 1`
			`assert level.quality_label`
			`assert level.layer_adaptive >= 0`
			`assert level.kv_type`


			`class TestKVEstimate:`
			`def test_basic_estimate(self):`
			`# 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits`
			`kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)`
			`assert kv_gb > 0`
			`assert kv_gb < 10 # Should be reasonable`

			`def test_longer_context_larger(self):`
			`kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)`
			`kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5)`
			`assert kv_128k > kv_32k`

			`def test_higher_bits_larger(self):`
			`kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0)`
			`kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0)`
			`assert kv_4b > kv_2b`


			`class TestHardwareDetection:`
			`def test_detect_returns_info(self):`
			`hw = detect_hardware()`
			`assert hw.total_memory_gb > 0`
			`assert hw.available_memory_gb > 0`
			`assert hw.detection_method`

			`@patch("evolution.quant_selector.platform.system", return_value="Linux")`
			`@patch("builtins.open", create=True)`
			`def test_linux_detection(self, mock_open, mock_system):`
			`mock_open.return_value.__enter__().read.return_value = (`
			`"MemTotal: 32000000 kB\n"`
			`"MemAvailable: 24000000 kB\n"`
			`)`
			`hw = _detect_linux_fallback()`
			`assert hw.total_memory_gb > 20`


			`def _detect_linux_fallback():`
			`"""Helper to test Linux detection with mocked /proc/meminfo."""`
			`from evolution.quant_selector import _detect_linux`
			`return _detect_linux()`


			`class TestSelection:`
			`def test_selects_turbo4_for_large_memory(self):`
			`"""With plenty of memory, should pick turbo4 (best quality)."""`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=64,`
			`available_memory_gb=48,`
			`gpu_memory_gb=64,`
			`gpu_name="Test GPU",`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=32768)`
			`assert sel.level.name == "turbo4"`
			`assert sel.headroom_gb > 0`

			`def test_selects_smaller_for_tight_memory(self):`
			`"""With tight memory, should pick a smaller quant."""`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=16,`
			`available_memory_gb=12,`
			`gpu_memory_gb=16,`
			`gpu_name="Test GPU",`
			`cpu_cores=8,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=131072)`
			`# Should pick a smaller quant for 128K context on 16GB`
			`assert sel.level.bits_per_channel <= 4.0`

			`def test_preferred_level(self):`
			`"""User can force a specific level."""`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=64,`
			`available_memory_gb=48,`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(`
			`model_size_gb=14.0, context_length=32768,`
			`preferred_level="turbo2"`
			`)`
			`assert sel.level.name == "turbo2"`

			`def test_env_vars_populated(self):`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=64,`
			`available_memory_gb=48,`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=32768)`
			`assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars`
			`assert "-ctk" in sel.server_flags`
			`assert "-ctv" in sel.server_flags`

			`def test_warnings_on_low_headroom(self):`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=18,`
			`available_memory_gb=14,`
			`gpu_memory_gb=18,`
			`gpu_name="Test GPU",`
			`cpu_cores=8,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=16.0, context_length=65536)`
			`assert len(sel.warnings) > 0`

			`def test_reasoning_contains_key_info(self):`
			`with patch("evolution.quant_selector.detect_hardware") as mock_hw:`
			`mock_hw.return_value = HardwareInfo(`
			`total_memory_gb=32,`
			`available_memory_gb=24,`
			`is_apple_silicon=True,`
			`chip_name="M4 Max",`
			`cpu_cores=16,`
			`detection_method="mock",`
			`)`
			`sel = select_quant_level(model_size_gb=14.0, context_length=32768)`
			`assert "turbo4" in sel.reasoning`
			`assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning`