Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
a537511652 refactor: consolidate hardware optimizer with quant selector (#92)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 17s
2026-04-20 20:38:56 -04:00
4 changed files with 50 additions and 14 deletions

View File

@@ -1,5 +1,29 @@
"""Phase 19: Hardware-Aware Inference Optimization.
Part of the TurboQuant suite for local inference excellence.
"""Backward-compatible shim for hardware-aware quantization selection.
The original Phase 19 placeholder `hardware_optimizer.py` never shipped real
logic. The canonical implementation now lives in `evolution.quant_selector`.
This shim preserves the legacy import path for any downstream callers while
making `quant_selector.py` the single source of truth.
"""
import logging
# ... (rest of the code)
from evolution.quant_selector import ( # noqa: F401
HardwareInfo,
QuantLevel,
QuantSelection,
QUANT_LEVELS,
detect_hardware,
estimate_kv_cache_gb,
estimate_model_memory_gb,
select_quant_level,
)
__all__ = [
"HardwareInfo",
"QuantLevel",
"QuantSelection",
"QUANT_LEVELS",
"detect_hardware",
"estimate_kv_cache_gb",
"estimate_model_memory_gb",
"select_quant_level",
]

View File

@@ -37,7 +37,6 @@ class QuantLevel:
layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7)
kv_type: str # -ctk/-ctv flag value
min_memory_headroom_gb: float # Minimum free memory to recommend this level
quality_score: int = 0 # Explicit quality ordering (higher = better)
description: str = ""
@@ -51,7 +50,6 @@ QUANT_LEVELS = [
layer_adaptive=7,
kv_type="turbo4",
min_memory_headroom_gb=4.0,
quality_score=100,
description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression."
),
QuantLevel(
@@ -62,7 +60,6 @@ QUANT_LEVELS = [
layer_adaptive=5,
kv_type="turbo3",
min_memory_headroom_gb=3.0,
quality_score=80,
description="3-bit TurboQuant. High quality, ~6x KV compression."
),
QuantLevel(
@@ -73,7 +70,6 @@ QUANT_LEVELS = [
layer_adaptive=3,
kv_type="turbo2",
min_memory_headroom_gb=2.0,
quality_score=60,
description="2-bit TurboQuant. Balanced, ~10x KV compression."
),
QuantLevel(
@@ -84,7 +80,6 @@ QUANT_LEVELS = [
layer_adaptive=0,
kv_type="q4_0",
min_memory_headroom_gb=1.5,
quality_score=40,
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
),
]

View File

@@ -0,0 +1,21 @@
#!/usr/bin/env python3
"""Tests for hardware_optimizer compatibility shim."""
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
from evolution import hardware_optimizer, quant_selector
def test_hardware_optimizer_reexports_quant_selector_api():
assert hardware_optimizer.select_quant_level is quant_selector.select_quant_level
assert hardware_optimizer.detect_hardware is quant_selector.detect_hardware
assert hardware_optimizer.HardwareInfo is quant_selector.HardwareInfo
assert hardware_optimizer.QuantSelection is quant_selector.QuantSelection
def test_hardware_optimizer_exports_quant_level_definitions():
assert hardware_optimizer.QUANT_LEVELS is quant_selector.QUANT_LEVELS
assert hardware_optimizer.QuantLevel is quant_selector.QuantLevel

View File

@@ -22,10 +22,7 @@ class TestQuantLevels:
def test_levels_ordered_by_quality(self):
"""Levels should be ordered from best quality to most aggressive."""
for i in range(len(QUANT_LEVELS) - 1):
# Use quality_score for explicit quality ordering
# (bits_per_channel doesn't always correlate with quality:
# q4_0 has 4.0 bits but lower quality than turbo2 with 1.5 bits)
assert QUANT_LEVELS[i].quality_score > QUANT_LEVELS[i + 1].quality_score
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel
def test_all_levels_have_required_fields(self):
for level in QUANT_LEVELS:
@@ -35,7 +32,6 @@ class TestQuantLevels:
assert level.quality_label
assert level.layer_adaptive >= 0
assert level.kv_type
assert level.quality_score > 0
class TestKVEstimate: