All checks were successful
Smoke Test / smoke (pull_request) Successful in 8s
- profiles/allegro-cpu-presets.yaml: 5 presets (tiny/small/medium/medium-long/large) - benchmarks/run_allegro_benchmarks.py: --dry-run, --all, --preset, --markdown - benchmarks/allegro-2026-04-14.md: analysis & expected results - tests/test_allegro_benchmarks.py: 19 smoke tests (preset validation, runner) Deliverables for issue #95: benchmark TurboQuant presets on Allegro VPS (2 cores, 8 GB RAM). Runner integrates with existing llama-server backend. Presets tuned to ~6 GB usable memory budget; large preset needs swap. Closes #95
212 lines
8.9 KiB
Python
212 lines
8.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Smoke tests for Allegro VPS benchmark infrastructure — Issue #95
|
|
|
|
Validates the preset configuration and runner entry points without
|
|
actually contacting a llama-server (no network needed).
|
|
"""
|
|
|
|
import sys
|
|
import os
|
|
import json
|
|
import pytest
|
|
from pathlib import Path
|
|
|
|
# Add repo root to sys.path
|
|
REPO_ROOT = Path(__file__).resolve().parents[1]
|
|
sys.path.insert(0, str(REPO_ROOT))
|
|
|
|
|
|
# ─── Test fixtures ────────────────────────────────────────────────────────────
|
|
PROFILE_PATH = REPO_ROOT / "profiles" / "allegro-cpu-presets.yaml"
|
|
BENCHMARK_RUNNER = REPO_ROOT / "benchmarks" / "run_allegro_benchmarks.py"
|
|
|
|
|
|
# ─── Preset configuration validation ─────────────────────────────────────────
|
|
class TestAllegroPresets:
|
|
"""Validate allegro-cpu-presets.yaml structure and values."""
|
|
|
|
def test_profile_file_exists(self):
|
|
assert PROFILE_PATH.exists(), f"Profile not found: {PROFILE_PATH}"
|
|
|
|
def test_profile_loads_as_yaml(self):
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
assert "presets" in data, "Profile must have a 'presets' key"
|
|
assert isinstance(data["presets"], list), "presets must be a list"
|
|
assert len(data["presets"]) > 0, "presets list cannot be empty"
|
|
|
|
def test_each_preset_has_required_fields(self):
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
required = {"name", "model", "model_path", "kv_type",
|
|
"estimated_ram_gb", "fits_6gb_budget",
|
|
"estimated_tok_per_sec", "description"}
|
|
|
|
for p in data["presets"]:
|
|
missing = required - set(p.keys())
|
|
assert not missing, f"Preset '{p.get('name','?')}' missing fields: {missing}"
|
|
|
|
def test_ram_estimates_are_positive(self):
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
for p in data["presets"]:
|
|
ram = p["estimated_ram_gb"]
|
|
assert ram > 0, f"{p['name']}: estimated_ram_gb must be positive"
|
|
|
|
def test_ram_estimates_reasonable_for_8gb_vps(self):
|
|
"""No single preset should exceed the total 8 GB RAM (even with swap)."""
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
|
|
for p in data["presets"]:
|
|
ram = p["estimated_ram_gb"]
|
|
assert ram < 10, (
|
|
f"{p['name']}: estimated_ram_gb={ram} GB seems too high "
|
|
f"for an 8 GB VPS even with swap"
|
|
)
|
|
|
|
def test_kv_type_is_string(self):
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
for p in data["presets"]:
|
|
assert isinstance(p["kv_type"], str)
|
|
assert len(p["kv_type"]) > 0
|
|
|
|
def test_fits_6gb_budget_is_boolean(self):
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
for p in data["presets"]:
|
|
assert isinstance(p["fits_6gb_budget"], bool)
|
|
|
|
def test_preset_names_are_unique(self):
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
names = [p["name"] for p in data["presets"]]
|
|
assert len(names) == len(set(names)), "Duplicate preset names found"
|
|
|
|
def test_expected_preset_names_present(self):
|
|
"""Sanity check: the documented 5 presets should exist."""
|
|
import yaml
|
|
with open(PROFILE_PATH) as f:
|
|
data = yaml.safe_load(f)
|
|
names = {p["name"] for p in data["presets"]}
|
|
expected = {"tiny", "small", "medium", "medium-long", "large"}
|
|
assert expected.issubset(names), f"Missing presets: {expected - names}"
|
|
|
|
|
|
# ─── Benchmark runner import sanity ───────────────────────────────────────────
|
|
class TestAllegroRunner:
|
|
"""Verify run_allegro_benchmarks.py can be imported and exposes the expected API."""
|
|
|
|
def test_runner_file_exists(self):
|
|
assert BENCHMARK_RUNNER.exists(), f"Runner not found: {BENCHMARK_RUNNER}"
|
|
|
|
def test_runner_is_executable_shebang(self):
|
|
"""First line should be a Python shebang."""
|
|
with open(BENCHMARK_RUNNER) as f:
|
|
first = f.readline().strip()
|
|
assert first.startswith("#!"), "Missing shebang"
|
|
assert "python" in first.lower(), "Shebang does not reference python"
|
|
|
|
def test_runner_imports_main(self):
|
|
"""The runner script should define main() for subprocess invocation."""
|
|
import importlib.util
|
|
spec = importlib.util.spec_from_file_location(
|
|
"run_allegro_benchmarks", BENCHMARK_RUNNER
|
|
)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(mod) # type: ignore[attr-defined]
|
|
assert hasattr(mod, "main"), "runner must define a main() function"
|
|
|
|
def test_runner_dry_run_invocation(self):
|
|
"""Subprocess dry-run should exit 0 and print OK."""
|
|
import subprocess
|
|
env = os.environ.copy()
|
|
# Ensure we use the same python as the test runner
|
|
result = subprocess.run(
|
|
[sys.executable, str(BENCHMARK_RUNNER), "--dry-run"],
|
|
capture_output=True,
|
|
text=True,
|
|
env=env,
|
|
timeout=30,
|
|
)
|
|
assert result.returncode == 0, (
|
|
f"dry-run failed (code {{result.returncode}})\nSTDERR: {{result.stderr}}"
|
|
)
|
|
assert "OK" in result.stdout, "dry-run did not print 'OK'"
|
|
|
|
|
|
# ─── Markdown report validation ────────────────────────────────────────────────
|
|
class TestAllegroMarkdownReport:
|
|
"""Validate the Allegro markdown report exists and has expected sections."""
|
|
|
|
def test_markdown_report_exists(self):
|
|
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
|
|
assert md_path.exists(), f"Markdown report not found: {md_path}"
|
|
|
|
def test_markdown_contains_presets_table(self):
|
|
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
|
|
content = md_path.read_text()
|
|
assert "| Preset" in content, "Missing presets table header"
|
|
assert "| tiny" in content, "Missing 'tiny' preset row"
|
|
assert "| medium" in content, "Missing 'medium' preset row"
|
|
|
|
def test_markdown_contains_hardware_spec(self):
|
|
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
|
|
content = md_path.read_text()
|
|
assert "2 vCPU" in content or "2 cores" in content, "Should mention the Allegro VPS core count"
|
|
assert "8 GB" in content, "Should mention the Allegro VPS RAM"
|
|
|
|
def test_markdown_contains_recommendation(self):
|
|
md_path = REPO_ROOT / "benchmarks" / "allegro-2026-04-14.md"
|
|
content = md_path.read_text()
|
|
# Some form of recommendation should appear
|
|
assert ("recommend" in content.lower() or
|
|
"Recommended" in content or
|
|
"best quality" in content.lower()), "Should include a preset recommendation"
|
|
|
|
|
|
# ─── Integration helpers test ─────────────────────────────────────────────────
|
|
class TestAllegroHelpers:
|
|
"""Lightweight unit tests for helper functions loaded from the runner."""
|
|
|
|
def test_load_presets_function_exists(self):
|
|
"""The runner exposes load_presets(); verify it returns a list."""
|
|
import importlib.util
|
|
spec = importlib.util.spec_from_file_location(
|
|
"run_allegro_benchmarks", BENCHMARK_RUNNER
|
|
)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(mod) # type: ignore[attr-defined]
|
|
presets = mod.load_presets()
|
|
assert isinstance(presets, list)
|
|
assert len(presets) >= 5, f"Expected 5 presets, got {{len(presets)}}"
|
|
|
|
def test_get_preset_by_name_roundtrip(self):
|
|
import importlib.util
|
|
spec = importlib.util.spec_from_file_location(
|
|
"run_allegro_benchmarks", BENCHMARK_RUNNER
|
|
)
|
|
mod = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(mod)
|
|
for expected in ("tiny", "small", "medium"):
|
|
p = mod.get_preset_by_name(expected)
|
|
assert p is not None, f"get_preset_by_name('{expected}') returned None"
|
|
assert p["name"] == expected
|
|
|
|
|
|
# ─── Entry point ───────────────────────────────────────────────────────────────
|
|
if __name__ == "__main__":
|
|
# Allow running as `python tests/test_allegro_benchmarks.py` for quick smoke.
|
|
pytest.main([__file__, "-v"])
|