tests/test_dflash_apple_silicon.py

#!/usr/bin/env python3
"""Tests for Apple Silicon DFlash benchmark planning helpers (issue #152)."""

import os
import sys
from unittest.mock import patch

sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))

from benchmarks.dflash_apple_silicon import (  # noqa: E402
    build_mlx_benchmark_command,
    detect_total_memory_gb,
    render_report_template,
    select_pair,
)


class TestPairSelection:
    def test_prefers_qwen35_9b_on_36gb_mac(self):
        pair = select_pair(total_memory_gb=36)
        assert pair.slug == "qwen35-9b"
        assert pair.base_model == "Qwen/Qwen3.5-9B"
        assert pair.draft_model == "z-lab/Qwen3.5-9B-DFlash"

    def test_falls_back_to_4b_when_memory_is_tight(self):
        pair = select_pair(total_memory_gb=20)
        assert pair.slug == "qwen35-4b"
        assert pair.base_model == "Qwen/Qwen3.5-4B"


class TestCommandGeneration:
    def test_builds_upstream_mlx_benchmark_command(self):
        pair = select_pair(total_memory_gb=36)
        command = build_mlx_benchmark_command(pair, dataset="gsm8k", max_samples=64)
        assert "python -m dflash.benchmark --backend mlx" in command
        assert "--model Qwen/Qwen3.5-9B" in command
        assert "--draft-model z-lab/Qwen3.5-9B-DFlash" in command
        assert "--dataset gsm8k" in command
        assert "--max-samples 64" in command
        assert "--draft-sliding-window-size 4096" in command


class TestReportTemplate:
    def test_report_template_mentions_baseline_and_verdict(self):
        pair = select_pair(total_memory_gb=36)
        report = render_report_template(machine_label="M3 Max 36GB", pair=pair)
        assert "DFlash Apple Silicon Benchmark Report" in report
        assert "M3 Max 36GB" in report
        assert "Qwen/Qwen3.5-9B" in report
        assert "plain MLX or llama.cpp speculative decoding" in report
        assert "Worth operationalizing locally?" in report


class TestMemoryDetection:
    @patch("benchmarks.dflash_apple_silicon.platform.system", return_value="Darwin")
    @patch("benchmarks.dflash_apple_silicon.subprocess.check_output", return_value=b"38654705664\n")
    def test_detect_total_memory_gb_on_macos(self, _mock_sysctl, _mock_system):
        assert detect_total_memory_gb() == 36.0
feat: add Apple Silicon DFlash benchmark planner (refs #152) 2026-04-21 22:00:22 -04:00			`#!/usr/bin/env python3`
			`"""Tests for Apple Silicon DFlash benchmark planning helpers (issue #152)."""`

			`import os`
			`import sys`
			`from unittest.mock import patch`

			`sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))`

			`from benchmarks.dflash_apple_silicon import ( # noqa: E402`
			`build_mlx_benchmark_command,`
			`detect_total_memory_gb,`
			`render_report_template,`
			`select_pair,`
			`)`


			`class TestPairSelection:`
			`def test_prefers_qwen35_9b_on_36gb_mac(self):`
			`pair = select_pair(total_memory_gb=36)`
			`assert pair.slug == "qwen35-9b"`
			`assert pair.base_model == "Qwen/Qwen3.5-9B"`
			`assert pair.draft_model == "z-lab/Qwen3.5-9B-DFlash"`

			`def test_falls_back_to_4b_when_memory_is_tight(self):`
			`pair = select_pair(total_memory_gb=20)`
			`assert pair.slug == "qwen35-4b"`
			`assert pair.base_model == "Qwen/Qwen3.5-4B"`


			`class TestCommandGeneration:`
			`def test_builds_upstream_mlx_benchmark_command(self):`
			`pair = select_pair(total_memory_gb=36)`
			`command = build_mlx_benchmark_command(pair, dataset="gsm8k", max_samples=64)`
			`assert "python -m dflash.benchmark --backend mlx" in command`
			`assert "--model Qwen/Qwen3.5-9B" in command`
			`assert "--draft-model z-lab/Qwen3.5-9B-DFlash" in command`
			`assert "--dataset gsm8k" in command`
			`assert "--max-samples 64" in command`
			`assert "--draft-sliding-window-size 4096" in command`


			`class TestReportTemplate:`
			`def test_report_template_mentions_baseline_and_verdict(self):`
			`pair = select_pair(total_memory_gb=36)`
			`report = render_report_template(machine_label="M3 Max 36GB", pair=pair)`
			`assert "DFlash Apple Silicon Benchmark Report" in report`
			`assert "M3 Max 36GB" in report`
			`assert "Qwen/Qwen3.5-9B" in report`
			`assert "plain MLX or llama.cpp speculative decoding" in report`
			`assert "Worth operationalizing locally?" in report`


			`class TestMemoryDetection:`
			`@patch("benchmarks.dflash_apple_silicon.platform.system", return_value="Darwin")`
			`@patch("benchmarks.dflash_apple_silicon.subprocess.check_output", return_value=b"38654705664\n")`
			`def test_detect_total_memory_gb_on_macos(self, _mock_sysctl, _mock_system):`
			`assert detect_total_memory_gb() == 36.0`