refactor: consolidate hardware optimizer with quant selector (#92 )

2026-04-20 20:38:56 -04:00
4 changed files with 50 additions and 14 deletions
--- a/evolution/hardware_optimizer.py
+++ b/evolution/hardware_optimizer.py
@@ -1,5 +1,29 @@
-"""Phase 19: Hardware-Aware Inference Optimization.
-Part of the TurboQuant suite for local inference excellence.
+"""Backward-compatible shim for hardware-aware quantization selection.
+
+The original Phase 19 placeholder `hardware_optimizer.py` never shipped real
+logic. The canonical implementation now lives in `evolution.quant_selector`.
+This shim preserves the legacy import path for any downstream callers while
+making `quant_selector.py` the single source of truth.
 """
-import logging
-# ... (rest of the code)
+
+from evolution.quant_selector import (  # noqa: F401
+    HardwareInfo,
+    QuantLevel,
+    QuantSelection,
+    QUANT_LEVELS,
+    detect_hardware,
+    estimate_kv_cache_gb,
+    estimate_model_memory_gb,
+    select_quant_level,
+)
+
+__all__ = [
+    "HardwareInfo",
+    "QuantLevel",
+    "QuantSelection",
+    "QUANT_LEVELS",
+    "detect_hardware",
+    "estimate_kv_cache_gb",
+    "estimate_model_memory_gb",
+    "select_quant_level",
+]
--- a/evolution/quant_selector.py
+++ b/evolution/quant_selector.py
@@ -37,7 +37,6 @@ class QuantLevel:
    layer_adaptive: int     # TURBO_LAYER_ADAPTIVE value (0-7)
    kv_type: str            # -ctk/-ctv flag value
    min_memory_headroom_gb: float  # Minimum free memory to recommend this level
-    quality_score: int = 0  # Explicit quality ordering (higher = better)
    description: str = ""


@@ -51,7 +50,6 @@ QUANT_LEVELS = [
        layer_adaptive=7,
        kv_type="turbo4",
        min_memory_headroom_gb=4.0,
-        quality_score=100,
        description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression."
    ),
    QuantLevel(
@@ -62,7 +60,6 @@ QUANT_LEVELS = [
        layer_adaptive=5,
        kv_type="turbo3",
        min_memory_headroom_gb=3.0,
-        quality_score=80,
        description="3-bit TurboQuant. High quality, ~6x KV compression."
    ),
    QuantLevel(
@@ -73,7 +70,6 @@ QUANT_LEVELS = [
        layer_adaptive=3,
        kv_type="turbo2",
        min_memory_headroom_gb=2.0,
-        quality_score=60,
        description="2-bit TurboQuant. Balanced, ~10x KV compression."
    ),
    QuantLevel(
@@ -84,7 +80,6 @@ QUANT_LEVELS = [
        layer_adaptive=0,
        kv_type="q4_0",
        min_memory_headroom_gb=1.5,
-        quality_score=40,
        description="Standard 4-bit quant. Fast fallback, no TurboQuant."
    ),
 ]
--- a/tests/test_hardware_optimizer.py
+++ b/tests/test_hardware_optimizer.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+"""Tests for hardware_optimizer compatibility shim."""
+
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
+
+from evolution import hardware_optimizer, quant_selector
+
+
+def test_hardware_optimizer_reexports_quant_selector_api():
+    assert hardware_optimizer.select_quant_level is quant_selector.select_quant_level
+    assert hardware_optimizer.detect_hardware is quant_selector.detect_hardware
+    assert hardware_optimizer.HardwareInfo is quant_selector.HardwareInfo
+    assert hardware_optimizer.QuantSelection is quant_selector.QuantSelection
+
+
+def test_hardware_optimizer_exports_quant_level_definitions():
+    assert hardware_optimizer.QUANT_LEVELS is quant_selector.QUANT_LEVELS
+    assert hardware_optimizer.QuantLevel is quant_selector.QuantLevel
--- a/tests/test_quant_selector.py
+++ b/tests/test_quant_selector.py
@@ -22,10 +22,7 @@ class TestQuantLevels:
    def test_levels_ordered_by_quality(self):
        """Levels should be ordered from best quality to most aggressive."""
        for i in range(len(QUANT_LEVELS) - 1):
-            # Use quality_score for explicit quality ordering
-            # (bits_per_channel doesn't always correlate with quality:
-            #  q4_0 has 4.0 bits but lower quality than turbo2 with 1.5 bits)
-            assert QUANT_LEVELS[i].quality_score > QUANT_LEVELS[i + 1].quality_score
+            assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel

    def test_all_levels_have_required_fields(self):
        for level in QUANT_LEVELS:
@@ -35,7 +32,6 @@ class TestQuantLevels:
            assert level.quality_label
            assert level.layer_adaptive >= 0
            assert level.kv_type
-            assert level.quality_score > 0


 class TestKVEstimate: