#!/usr/bin/env python3
"""Tests for quant_selector.py"""

import sys
import os
import pytest
from unittest.mock import patch, MagicMock

sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from evolution.quant_selector import (
    QuantLevel,
    HardwareInfo,
    QUANT_LEVELS,
    detect_hardware,
    estimate_kv_cache_gb,
    estimate_model_memory_gb,
    select_quant_level,
)


class TestQuantLevels:
    def test_levels_keep_turboquant_quality_order_with_q4_fallback_last(self):
        """TurboQuant levels should lead, with q4_0 reserved as the non-Turbo fallback."""
        names = [level.name for level in QUANT_LEVELS]
        assert names[:3] == ["turbo4", "turbo3", "turbo2"]
        assert names[-1] == "q4_0"

    def test_all_levels_have_required_fields(self):
        for level in QUANT_LEVELS:
            assert level.name
            assert level.bits_per_channel > 0
            assert level.compression_ratio > 1
            assert level.quality_label
            assert level.layer_adaptive >= 0
            assert level.kv_type


class TestKVEstimate:
    def test_basic_estimate(self):
        # 48 layers, 8 heads, 128 dim, 32K context, 3.5 bits
        kv_gb = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
        assert kv_gb > 0
        assert kv_gb < 10  # Should be reasonable

    def test_longer_context_larger(self):
        kv_32k = estimate_kv_cache_gb(32768, 48, 8, 128, 3.5)
        kv_128k = estimate_kv_cache_gb(131072, 48, 8, 128, 3.5)
        assert kv_128k > kv_32k

    def test_higher_bits_larger(self):
        kv_4b = estimate_kv_cache_gb(32768, 48, 8, 128, 4.0)
        kv_2b = estimate_kv_cache_gb(32768, 48, 8, 128, 2.0)
        assert kv_4b > kv_2b


class TestHardwareDetection:
    def test_detect_returns_info(self):
        hw = detect_hardware()
        assert hw.total_memory_gb > 0
        assert hw.available_memory_gb > 0
        assert hw.detection_method

    @patch("evolution.quant_selector.platform.system", return_value="Linux")
    @patch("builtins.open", create=True)
    def test_linux_detection(self, mock_open, mock_system):
        mock_open.return_value.__enter__().read.return_value = (
            "MemTotal:       32000000 kB\n"
            "MemAvailable:   24000000 kB\n"
        )
        hw = _detect_linux_fallback()
        assert hw.total_memory_gb > 20


def _detect_linux_fallback():
    """Helper to test Linux detection with mocked /proc/meminfo."""
    from evolution.quant_selector import _detect_linux
    return _detect_linux()


class TestSelection:
    def test_selects_turbo4_for_large_memory(self):
        """With plenty of memory, should pick turbo4 (best quality)."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                gpu_memory_gb=64,
                gpu_name="Test GPU",
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert sel.level.name == "turbo4"
            assert sel.headroom_gb > 0

    def test_selects_smaller_for_tight_memory(self):
        """With tight memory, should pick a smaller quant."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=16,
                available_memory_gb=12,
                gpu_memory_gb=16,
                gpu_name="Test GPU",
                cpu_cores=8,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=131072)
            # Should pick a smaller quant for 128K context on 16GB
            assert sel.level.bits_per_channel <= 4.0

    def test_preferred_level(self):
        """User can force a specific level."""
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(
                model_size_gb=14.0, context_length=32768,
                preferred_level="turbo2"
            )
            assert sel.level.name == "turbo2"

    def test_env_vars_populated(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=64,
                available_memory_gb=48,
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert "TURBO_LAYER_ADAPTIVE" in sel.env_vars
            assert "-ctk" in sel.server_flags
            assert "-ctv" in sel.server_flags

    def test_warnings_on_low_headroom(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=18,
                available_memory_gb=14,
                gpu_memory_gb=18,
                gpu_name="Test GPU",
                cpu_cores=8,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=16.0, context_length=65536)
            assert len(sel.warnings) > 0

    def test_falls_back_to_turbo2_when_nothing_fits(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=8,
                available_memory_gb=6,
                gpu_memory_gb=8,
                gpu_name="Tiny GPU",
                cpu_cores=4,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=16.0, context_length=131072)
            assert sel.level.name == "turbo2"

    def test_reasoning_contains_key_info(self):
        with patch("evolution.quant_selector.detect_hardware") as mock_hw:
            mock_hw.return_value = HardwareInfo(
                total_memory_gb=32,
                available_memory_gb=24,
                is_apple_silicon=True,
                chip_name="M4 Max",
                cpu_cores=16,
                detection_method="mock",
            )
            sel = select_quant_level(model_size_gb=14.0, context_length=32768)
            assert "turbo4" in sel.reasoning
            assert "M4 Max" in sel.reasoning or "32GB" in sel.reasoning