Compare commits
1 Commits
burn/138-1
...
fix/92-har
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a537511652 |
@@ -1,5 +1,29 @@
|
||||
"""Phase 19: Hardware-Aware Inference Optimization.
|
||||
Part of the TurboQuant suite for local inference excellence.
|
||||
"""Backward-compatible shim for hardware-aware quantization selection.
|
||||
|
||||
The original Phase 19 placeholder `hardware_optimizer.py` never shipped real
|
||||
logic. The canonical implementation now lives in `evolution.quant_selector`.
|
||||
This shim preserves the legacy import path for any downstream callers while
|
||||
making `quant_selector.py` the single source of truth.
|
||||
"""
|
||||
import logging
|
||||
# ... (rest of the code)
|
||||
|
||||
from evolution.quant_selector import ( # noqa: F401
|
||||
HardwareInfo,
|
||||
QuantLevel,
|
||||
QuantSelection,
|
||||
QUANT_LEVELS,
|
||||
detect_hardware,
|
||||
estimate_kv_cache_gb,
|
||||
estimate_model_memory_gb,
|
||||
select_quant_level,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"HardwareInfo",
|
||||
"QuantLevel",
|
||||
"QuantSelection",
|
||||
"QUANT_LEVELS",
|
||||
"detect_hardware",
|
||||
"estimate_kv_cache_gb",
|
||||
"estimate_model_memory_gb",
|
||||
"select_quant_level",
|
||||
]
|
||||
|
||||
@@ -37,7 +37,6 @@ class QuantLevel:
|
||||
layer_adaptive: int # TURBO_LAYER_ADAPTIVE value (0-7)
|
||||
kv_type: str # -ctk/-ctv flag value
|
||||
min_memory_headroom_gb: float # Minimum free memory to recommend this level
|
||||
quality_score: int = 0 # Explicit quality ordering (higher = better)
|
||||
description: str = ""
|
||||
|
||||
|
||||
@@ -51,7 +50,6 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=7,
|
||||
kv_type="turbo4",
|
||||
min_memory_headroom_gb=4.0,
|
||||
quality_score=100,
|
||||
description="PolarQuant + QJL 4-bit. Best quality, ~4.2x KV compression."
|
||||
),
|
||||
QuantLevel(
|
||||
@@ -62,7 +60,6 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=5,
|
||||
kv_type="turbo3",
|
||||
min_memory_headroom_gb=3.0,
|
||||
quality_score=80,
|
||||
description="3-bit TurboQuant. High quality, ~6x KV compression."
|
||||
),
|
||||
QuantLevel(
|
||||
@@ -73,7 +70,6 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=3,
|
||||
kv_type="turbo2",
|
||||
min_memory_headroom_gb=2.0,
|
||||
quality_score=60,
|
||||
description="2-bit TurboQuant. Balanced, ~10x KV compression."
|
||||
),
|
||||
QuantLevel(
|
||||
@@ -84,7 +80,6 @@ QUANT_LEVELS = [
|
||||
layer_adaptive=0,
|
||||
kv_type="q4_0",
|
||||
min_memory_headroom_gb=1.5,
|
||||
quality_score=40,
|
||||
description="Standard 4-bit quant. Fast fallback, no TurboQuant."
|
||||
),
|
||||
]
|
||||
|
||||
21
tests/test_hardware_optimizer.py
Normal file
21
tests/test_hardware_optimizer.py
Normal file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for hardware_optimizer compatibility shim."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
||||
|
||||
from evolution import hardware_optimizer, quant_selector
|
||||
|
||||
|
||||
def test_hardware_optimizer_reexports_quant_selector_api():
|
||||
assert hardware_optimizer.select_quant_level is quant_selector.select_quant_level
|
||||
assert hardware_optimizer.detect_hardware is quant_selector.detect_hardware
|
||||
assert hardware_optimizer.HardwareInfo is quant_selector.HardwareInfo
|
||||
assert hardware_optimizer.QuantSelection is quant_selector.QuantSelection
|
||||
|
||||
|
||||
def test_hardware_optimizer_exports_quant_level_definitions():
|
||||
assert hardware_optimizer.QUANT_LEVELS is quant_selector.QUANT_LEVELS
|
||||
assert hardware_optimizer.QuantLevel is quant_selector.QuantLevel
|
||||
@@ -22,10 +22,7 @@ class TestQuantLevels:
|
||||
def test_levels_ordered_by_quality(self):
|
||||
"""Levels should be ordered from best quality to most aggressive."""
|
||||
for i in range(len(QUANT_LEVELS) - 1):
|
||||
# Use quality_score for explicit quality ordering
|
||||
# (bits_per_channel doesn't always correlate with quality:
|
||||
# q4_0 has 4.0 bits but lower quality than turbo2 with 1.5 bits)
|
||||
assert QUANT_LEVELS[i].quality_score > QUANT_LEVELS[i + 1].quality_score
|
||||
assert QUANT_LEVELS[i].bits_per_channel > QUANT_LEVELS[i + 1].bits_per_channel
|
||||
|
||||
def test_all_levels_have_required_fields(self):
|
||||
for level in QUANT_LEVELS:
|
||||
@@ -35,7 +32,6 @@ class TestQuantLevels:
|
||||
assert level.quality_label
|
||||
assert level.layer_adaptive >= 0
|
||||
assert level.kv_type
|
||||
assert level.quality_score > 0
|
||||
|
||||
|
||||
class TestKVEstimate:
|
||||
|
||||
Reference in New Issue
Block a user