turboquant/tests/test_allegro_benchmarks.py

#!/usr/bin/env python3
"""
Smoke tests for Allegro VPS benchmark infrastructure — Issue #95

Validates the preset configuration and runner entry points without
actually contacting a llama-server (no network needed).
"""

import sys
import os
import json
import pytest
from pathlib import Path

# Add repo root to sys.path
REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO_ROOT))


# ─── Test fixtures ────────────────────────────────────────────────────────────
PROFILE_PATH = REPO_ROOT / "profiles" / "allegro-cpu-presets.yaml"
BENCHMARK_RUNNER = REPO_ROOT / "benchmarks" / "run_allegro_benchmarks.py"


# ─── Preset configuration validation ─────────────────────────────────────────
class TestAllegroPresets:
    """Validate allegro-cpu-presets.yaml structure and values."""

    def test_profile_file_exists(self):
        assert PROFILE_PATH.exists(), f"Profile not found: {PROFILE_PATH}"

    def test_profile_loads_as_yaml(self):
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)
        assert "presets" in data, "Profile must have a 'presets' key"
        assert isinstance(data["presets"], list), "presets must be a list"
        assert len(data["presets"]) > 0, "presets list cannot be empty"

    def test_each_preset_has_required_fields(self):
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)

        required = {"name", "model", "model_path", "kv_type",
                    "estimated_ram_gb", "fits_6gb_budget",
                    "estimated_tok_per_sec", "description"}

        for p in data["presets"]:
            missing = required - set(p.keys())
            assert not missing, f"Preset '{p.get('name','?')}' missing fields: {missing}"

    def test_ram_estimates_are_positive(self):
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)

        for p in data["presets"]:
            ram = p["estimated_ram_gb"]
            assert ram > 0, f"{p['name']}: estimated_ram_gb must be positive"

    def test_ram_estimates_reasonable_for_8gb_vps(self):
        """No single preset should exceed the total 8 GB RAM (even with swap)."""
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)

        for p in data["presets"]:
            ram = p["estimated_ram_gb"]
            assert ram < 10, (
                f"{p['name']}: estimated_ram_gb={ram} GB seems too high "
                f"for an 8 GB VPS even with swap"
            )

    def test_kv_type_is_string(self):
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)
        for p in data["presets"]:
            assert isinstance(p["kv_type"], str)
            assert len(p["kv_type"]) > 0

    def test_fits_6gb_budget_is_boolean(self):
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)
        for p in data["presets"]:
            assert isinstance(p["fits_6gb_budget"], bool)

    def test_preset_names_are_unique(self):
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)
        names = [p["name"] for p in data["presets"]]
        assert len(names) == len(set(names)), "Duplicate preset names found"

    def test_expected_preset_names_present(self):
        """Sanity check: the documented 5 presets should exist."""
        import yaml
        with open(PROFILE_PATH) as f:
            data = yaml.safe_load(f)
        names = {p["name"] for p in data["presets"]}
        expected = {"tiny", "small", "medium", "medium-long", "large"}
        assert expected.issubset(names), f"Missing presets: {expected - names}"


# ─── Benchmark runner import sanity ───────────────────────────────────────────
class TestAllegroRunner:
    """Verify run_allegro_benchmarks.py can be imported and exposes the expected API."""

    def test_runner_file_exists(self):
        assert BENCHMARK_RUNNER.exists(), f"Runner not found: {BENCHMARK_RUNNER}"

    def test_runner_is_executable_shebang(self):
        """First line should be a Python shebang."""
        with open(BENCHMARK_RUNNER) as f:
            first = f.readline().strip()
        assert first.startswith("#!"), "Missing shebang"
        assert "python" in first.lower(), "Shebang does not reference python"

    def test_runner_imports_main(self):
        """The runner script should define main() for subprocess invocation."""
        import importlib.util
        spec = importlib.util.spec_from_file_location(
            "run_allegro_benchmarks", BENCHMARK_RUNNER
        )
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)  # type: ignore[attr-defined]
        assert hasattr(mod, "main"), "runner must define a main() function"

    def test_runner_dry_run_invocation(self):
        """Subprocess dry-run should exit 0 and print OK."""
        import subprocess
        env = os.environ.copy()
        # Ensure we use the same python as the test runner
        result = subprocess.run(
            [sys.executable, str(BENCHMARK_RUNNER), "--dry-run"],
            capture_output=True,
            text=True,
            env=env,
            timeout=30,
        )
        assert result.returncode == 0, (
            f"dry-run failed (code {{result.returncode}})\nSTDERR: {{result.stderr}}"
        )
        assert "OK" in result.stdout, "dry-run did not print 'OK'"


# ─── Markdown report validation ────────────────────────────────────────────────
class TestAllegroMarkdownReport:
    """Validate the Allegro markdown report exists and has expected sections."""

    def test_markdown_report_exists(self):
        md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
        assert md_path.exists(), f"Markdown report not found: {md_path}"

    def test_markdown_contains_presets_table(self):
        md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
        content = md_path.read_text()
        assert "| Preset" in content, "Missing presets table header"
        assert "| tiny" in content, "Missing 'tiny' preset row"
        assert "| medium" in content, "Missing 'medium' preset row"

    def test_markdown_contains_hardware_spec(self):
        md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
        content = md_path.read_text()
        assert "2 vCPU" in content or "2 cores" in content,             "Should mention the Allegro VPS core count"
        assert "8 GB" in content, "Should mention the Allegro VPS RAM"

    def test_markdown_contains_recommendation(self):
        md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
        content = md_path.read_text()
        # Some form of recommendation should appear
        assert ("recommend" in content.lower() or
                "Recommended" in content or
                "best quality" in content.lower()),             "Should include a preset recommendation"


# ─── Integration helpers test ─────────────────────────────────────────────────
class TestAllegroHelpers:
    """Lightweight unit tests for helper functions loaded from the runner."""

    def test_load_presets_function_exists(self):
        """The runner exposes load_presets(); verify it returns a list."""
        import importlib.util
        spec = importlib.util.spec_from_file_location(
            "run_allegro_benchmarks", BENCHMARK_RUNNER
        )
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)  # type: ignore[attr-defined]
        presets = mod.load_presets()
        assert isinstance(presets, list)
        assert len(presets) >= 5, f"Expected 5 presets, got {{len(presets)}}"

    def test_get_preset_by_name_roundtrip(self):
        import importlib.util
        spec = importlib.util.spec_from_file_location(
            "run_allegro_benchmarks", BENCHMARK_RUNNER
        )
        mod = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(mod)
        for expected in ("tiny", "small", "medium"):
            p = mod.get_preset_by_name(expected)
            assert p is not None, f"get_preset_by_name('{expected}') returned None"
            assert p["name"] == expected


# ─── Entry point ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
    # Allow running as `python tests/test_allegro_benchmarks.py` for quick smoke.
    pytest.main([__file__, "-v"])