turboquant/benchmarks/test_bonsai_benchmark.py

#!/usr/bin/env python3
"""Tests for benchmarks/bonsai_benchmark.py — 8 tests."""

import json
import os
import sys
import tempfile

sys.path.insert(0, os.path.dirname(__file__) or ".")
import importlib.util
spec = importlib.util.spec_from_file_location(
    "bb", os.path.join(os.path.dirname(__file__) or ".", "bonsai_benchmark.py"))
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)

check_gsm8k_answer = mod.check_gsm8k_answer
find_models = mod.find_models
generate_report = mod.generate_report


def test_gsm8k_answer_correct():
    """Correct answer should be detected."""
    assert check_gsm8k_answer("The answer is 18.", ["18", "$18", "eighteen"])
    print("PASS: test_gsm8k_answer_correct")


def test_gsm8k_answer_case_insensitive():
    """Answer check should be case insensitive."""
    assert check_gsm8k_answer("The answer is EIGHTEEN.", ["18", "eighteen"])
    print("PASS: test_gsm8k_answer_case_insensitive")


def test_gsm8k_answer_wrong():
    """Wrong answer should return False."""
    assert not check_gsm8k_answer("The answer is 42.", ["18", "$18", "eighteen"])
    print("PASS: test_gsm8k_answer_wrong")


def test_gsm8k_answer_partial():
    """Partial match should work."""
    assert check_gsm8k_answer("She makes $18 per day.", ["18", "$18"])
    print("PASS: test_gsm8k_answer_partial")


def test_find_models_empty():
    """Empty directory should return empty lists."""
    with tempfile.TemporaryDirectory() as tmpdir:
        models = find_models(tmpdir)
        assert models["Q1_0"] == []
        assert models["Q4_0"] == []
    print("PASS: test_find_models_empty")


def test_find_models_with_files():
    """Should find models by quantization type."""
    with tempfile.TemporaryDirectory() as tmpdir:
        # Create test files
        q1_file = os.path.join(tmpdir, "Bonsai-8B-Q1_0.gguf")
        q4_file = os.path.join(tmpdir, "Bonsai-8B-Q4_0.gguf")
        other_file = os.path.join(tmpdir, "other.txt")

        for f in [q1_file, q4_file, other_file]:
            with open(f, "w") as fh:
                fh.write("")

        models = find_models(tmpdir)
        assert len(models["Q1_0"]) == 1
        assert len(models["Q4_0"]) == 1
        assert q1_file in models["Q1_0"]
        assert q4_file in models["Q4_0"]
    print("PASS: test_find_models_with_files")


def test_find_models_nested():
    """Should find models in subdirectories."""
    with tempfile.TemporaryDirectory() as tmpdir:
        subdir = os.path.join(tmpdir, "models")
        os.makedirs(subdir)

        q1_file = os.path.join(subdir, "Bonsai-1.7B-Q1_0.gguf")
        with open(q1_file, "w") as f:
            f.write("")

        models = find_models(tmpdir)
        assert len(models["Q1_0"]) == 1
        assert q1_file in models["Q1_0"]
    print("PASS: test_find_models_nested")


def test_generate_report():
    """Report generation should produce markdown."""
    with tempfile.TemporaryDirectory() as tmpdir:
        results = [{
            "model_name": "Bonsai-8B",
            "quant_type": "Q1_0",
            "model_path": "/test/Bonsai-8B-Q1_0.gguf",
            "file_size_mb": 1024.5,
            "memory_used_mb": 2048.0,
            "gsm8k_score": 0.6,
            "gsm8k_correct": 3,
            "gsm8k_total": 5,
            "tool_calling_rate": 0.8,
            "tool_calls_correct": 8,
            "tool_calls_total": 10,
            "avg_tokens_per_sec": 15.2,
            "gsm8k_results": [],
            "tool_results": []
        }]

        output_file = os.path.join(tmpdir, "report.md")
        report = generate_report(results, output_file)

        assert os.path.exists(output_file)
        assert "Bonsai-8B" in report
        assert "Q1_0" in report
        assert "GSM8K" in report
        assert "60.0%" in report
    print("PASS: test_generate_report")


def run_all():
    test_gsm8k_answer_correct()
    test_gsm8k_answer_case_insensitive()
    test_gsm8k_answer_wrong()
    test_gsm8k_answer_partial()
    test_find_models_empty()
    test_find_models_with_files()
    test_find_models_nested()
    test_generate_report()
    print("\nAll 8 tests passed!")


if __name__ == "__main__":
    run_all()