feat: add Apple Silicon DFlash benchmark planner (refs #152)
All checks were successful
Smoke Test / smoke (pull_request) Successful in 18s
All checks were successful
Smoke Test / smoke (pull_request) Successful in 18s
This commit is contained in:
58
tests/test_dflash_apple_silicon.py
Normal file
58
tests/test_dflash_apple_silicon.py
Normal file
@@ -0,0 +1,58 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for Apple Silicon DFlash benchmark planning helpers (issue #152)."""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from unittest.mock import patch
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
|
||||
|
||||
from benchmarks.dflash_apple_silicon import ( # noqa: E402
|
||||
build_mlx_benchmark_command,
|
||||
detect_total_memory_gb,
|
||||
render_report_template,
|
||||
select_pair,
|
||||
)
|
||||
|
||||
|
||||
class TestPairSelection:
|
||||
def test_prefers_qwen35_9b_on_36gb_mac(self):
|
||||
pair = select_pair(total_memory_gb=36)
|
||||
assert pair.slug == "qwen35-9b"
|
||||
assert pair.base_model == "Qwen/Qwen3.5-9B"
|
||||
assert pair.draft_model == "z-lab/Qwen3.5-9B-DFlash"
|
||||
|
||||
def test_falls_back_to_4b_when_memory_is_tight(self):
|
||||
pair = select_pair(total_memory_gb=20)
|
||||
assert pair.slug == "qwen35-4b"
|
||||
assert pair.base_model == "Qwen/Qwen3.5-4B"
|
||||
|
||||
|
||||
class TestCommandGeneration:
|
||||
def test_builds_upstream_mlx_benchmark_command(self):
|
||||
pair = select_pair(total_memory_gb=36)
|
||||
command = build_mlx_benchmark_command(pair, dataset="gsm8k", max_samples=64)
|
||||
assert "python -m dflash.benchmark --backend mlx" in command
|
||||
assert "--model Qwen/Qwen3.5-9B" in command
|
||||
assert "--draft-model z-lab/Qwen3.5-9B-DFlash" in command
|
||||
assert "--dataset gsm8k" in command
|
||||
assert "--max-samples 64" in command
|
||||
assert "--draft-sliding-window-size 4096" in command
|
||||
|
||||
|
||||
class TestReportTemplate:
|
||||
def test_report_template_mentions_baseline_and_verdict(self):
|
||||
pair = select_pair(total_memory_gb=36)
|
||||
report = render_report_template(machine_label="M3 Max 36GB", pair=pair)
|
||||
assert "DFlash Apple Silicon Benchmark Report" in report
|
||||
assert "M3 Max 36GB" in report
|
||||
assert "Qwen/Qwen3.5-9B" in report
|
||||
assert "plain MLX or llama.cpp speculative decoding" in report
|
||||
assert "Worth operationalizing locally?" in report
|
||||
|
||||
|
||||
class TestMemoryDetection:
|
||||
@patch("benchmarks.dflash_apple_silicon.platform.system", return_value="Darwin")
|
||||
@patch("benchmarks.dflash_apple_silicon.subprocess.check_output", return_value=b"38654705664\n")
|
||||
def test_detect_total_memory_gb_on_macos(self, _mock_sysctl, _mock_system):
|
||||
assert detect_total_memory_gb() == 36.0
|
||||
Reference in New Issue
Block a user