Files
turboquant/tests/test_allegro_benchmarks.py
step35 cb2f7b0aa7
All checks were successful
Smoke Test / smoke (pull_request) Successful in 8s
feat: add Allegro VPS benchmark infrastructure — presets, runner, tests
- profiles/allegro-cpu-presets.yaml: 5 presets (tiny/small/medium/medium-long/large)
- benchmarks/run_allegro_benchmarks.py: --dry-run, --all, --preset, --markdown
- benchmarks/allegro-2026-04-14.md: analysis & expected results
- tests/test_allegro_benchmarks.py: 19 smoke tests (preset validation, runner)

Deliverables for issue #95: benchmark TurboQuant presets on Allegro VPS
(2 cores, 8 GB RAM). Runner integrates with existing llama-server backend.
Presets tuned to ~6 GB usable memory budget; large preset needs swap.

Closes #95
2026-04-26 06:52:53 -04:00

212 lines
8.9 KiB
Python

#!/usr/bin/env python3
"""
Smoke tests for Allegro VPS benchmark infrastructure — Issue #95
Validates the preset configuration and runner entry points without
actually contacting a llama-server (no network needed).
"""
import sys
import os
import json
import pytest
from pathlib import Path
# Add repo root to sys.path
REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO_ROOT))
# ─── Test fixtures ────────────────────────────────────────────────────────────
PROFILE_PATH = REPO_ROOT / "profiles" / "allegro-cpu-presets.yaml"
BENCHMARK_RUNNER = REPO_ROOT / "benchmarks" / "run_allegro_benchmarks.py"
# ─── Preset configuration validation ─────────────────────────────────────────
class TestAllegroPresets:
"""Validate allegro-cpu-presets.yaml structure and values."""
def test_profile_file_exists(self):
assert PROFILE_PATH.exists(), f"Profile not found: {PROFILE_PATH}"
def test_profile_loads_as_yaml(self):
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
assert "presets" in data, "Profile must have a 'presets' key"
assert isinstance(data["presets"], list), "presets must be a list"
assert len(data["presets"]) > 0, "presets list cannot be empty"
def test_each_preset_has_required_fields(self):
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
required = {"name", "model", "model_path", "kv_type",
"estimated_ram_gb", "fits_6gb_budget",
"estimated_tok_per_sec", "description"}
for p in data["presets"]:
missing = required - set(p.keys())
assert not missing, f"Preset '{p.get('name','?')}' missing fields: {missing}"
def test_ram_estimates_are_positive(self):
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
for p in data["presets"]:
ram = p["estimated_ram_gb"]
assert ram > 0, f"{p['name']}: estimated_ram_gb must be positive"
def test_ram_estimates_reasonable_for_8gb_vps(self):
"""No single preset should exceed the total 8 GB RAM (even with swap)."""
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
for p in data["presets"]:
ram = p["estimated_ram_gb"]
assert ram < 10, (
f"{p['name']}: estimated_ram_gb={ram} GB seems too high "
f"for an 8 GB VPS even with swap"
)
def test_kv_type_is_string(self):
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
for p in data["presets"]:
assert isinstance(p["kv_type"], str)
assert len(p["kv_type"]) > 0
def test_fits_6gb_budget_is_boolean(self):
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
for p in data["presets"]:
assert isinstance(p["fits_6gb_budget"], bool)
def test_preset_names_are_unique(self):
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
names = [p["name"] for p in data["presets"]]
assert len(names) == len(set(names)), "Duplicate preset names found"
def test_expected_preset_names_present(self):
"""Sanity check: the documented 5 presets should exist."""
import yaml
with open(PROFILE_PATH) as f:
data = yaml.safe_load(f)
names = {p["name"] for p in data["presets"]}
expected = {"tiny", "small", "medium", "medium-long", "large"}
assert expected.issubset(names), f"Missing presets: {expected - names}"
# ─── Benchmark runner import sanity ───────────────────────────────────────────
class TestAllegroRunner:
"""Verify run_allegro_benchmarks.py can be imported and exposes the expected API."""
def test_runner_file_exists(self):
assert BENCHMARK_RUNNER.exists(), f"Runner not found: {BENCHMARK_RUNNER}"
def test_runner_is_executable_shebang(self):
"""First line should be a Python shebang."""
with open(BENCHMARK_RUNNER) as f:
first = f.readline().strip()
assert first.startswith("#!"), "Missing shebang"
assert "python" in first.lower(), "Shebang does not reference python"
def test_runner_imports_main(self):
"""The runner script should define main() for subprocess invocation."""
import importlib.util
spec = importlib.util.spec_from_file_location(
"run_allegro_benchmarks", BENCHMARK_RUNNER
)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod) # type: ignore[attr-defined]
assert hasattr(mod, "main"), "runner must define a main() function"
def test_runner_dry_run_invocation(self):
"""Subprocess dry-run should exit 0 and print OK."""
import subprocess
env = os.environ.copy()
# Ensure we use the same python as the test runner
result = subprocess.run(
[sys.executable, str(BENCHMARK_RUNNER), "--dry-run"],
capture_output=True,
text=True,
env=env,
timeout=30,
)
assert result.returncode == 0, (
f"dry-run failed (code {{result.returncode}})\nSTDERR: {{result.stderr}}"
)
assert "OK" in result.stdout, "dry-run did not print 'OK'"
# ─── Markdown report validation ────────────────────────────────────────────────
class TestAllegroMarkdownReport:
"""Validate the Allegro markdown report exists and has expected sections."""
def test_markdown_report_exists(self):
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
assert md_path.exists(), f"Markdown report not found: {md_path}"
def test_markdown_contains_presets_table(self):
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
content = md_path.read_text()
assert "| Preset" in content, "Missing presets table header"
assert "| tiny" in content, "Missing 'tiny' preset row"
assert "| medium" in content, "Missing 'medium' preset row"
def test_markdown_contains_hardware_spec(self):
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
content = md_path.read_text()
assert "2 vCPU" in content or "2 cores" in content, "Should mention the Allegro VPS core count"
assert "8 GB" in content, "Should mention the Allegro VPS RAM"
def test_markdown_contains_recommendation(self):
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
content = md_path.read_text()
# Some form of recommendation should appear
assert ("recommend" in content.lower() or
"Recommended" in content or
"best quality" in content.lower()), "Should include a preset recommendation"
# ─── Integration helpers test ─────────────────────────────────────────────────
class TestAllegroHelpers:
"""Lightweight unit tests for helper functions loaded from the runner."""
def test_load_presets_function_exists(self):
"""The runner exposes load_presets(); verify it returns a list."""
import importlib.util
spec = importlib.util.spec_from_file_location(
"run_allegro_benchmarks", BENCHMARK_RUNNER
)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod) # type: ignore[attr-defined]
presets = mod.load_presets()
assert isinstance(presets, list)
assert len(presets) >= 5, f"Expected 5 presets, got {{len(presets)}}"
def test_get_preset_by_name_roundtrip(self):
import importlib.util
spec = importlib.util.spec_from_file_location(
"run_allegro_benchmarks", BENCHMARK_RUNNER
)
mod = importlib.util.module_from_spec(spec)
spec.loader.exec_module(mod)
for expected in ("tiny", "small", "medium"):
p = mod.get_preset_by_name(expected)
assert p is not None, f"get_preset_by_name('{expected}') returned None"
assert p["name"] == expected
# ─── Entry point ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
# Allow running as `python tests/test_allegro_benchmarks.py` for quick smoke.
pytest.main([__file__, "-v"])