"""Tests for TurboQuant test matrix (Issue #11).""" import json import re from unittest.mock import patch, MagicMock import pytest import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent / "benchmarks")) from test_matrix import ( evaluate_quality, evaluate_performance, report_to_markdown, TEST_PROMPTS, PPL_DELTA_MAX, TOKS_BASELINE_RATIO, TTFT_BASELINE_RATIO, ) class TestEvaluateQuality: def test_pattern_match(self): result = evaluate_quality("The first law of thermodynamics states...", r"(?i)(first law|energy)") assert result["matched"] is True def test_pattern_no_match(self): result = evaluate_quality("Hello world", r"(?i)(thermodynamics|entropy)") assert result["matched"] is False def test_substance_check(self): result = evaluate_quality("Short", r".*") assert result["has_substance"] is False def test_substance_pass(self): result = evaluate_quality("A" * 100, r".*") assert result["has_substance"] is True def test_response_length(self): result = evaluate_quality("Hello world", r".*") assert result["response_length"] == 11 class TestEvaluatePerformance: def test_tok_per_sec_pass(self): result = {"tok_per_sec": 100, "ttft": 0.5, "peak_mem_mb": 1000} baseline = {"tok_per_sec": 100, "ttft": 0.5} perf = evaluate_performance(result, baseline) assert perf["tok_per_sec_pass"] is True def test_tok_per_sec_fail(self): result = {"tok_per_sec": 50, "ttft": 0.5, "peak_mem_mb": 1000} baseline = {"tok_per_sec": 100, "ttft": 0.5} perf = evaluate_performance(result, baseline) assert perf["tok_per_sec_pass"] is False def test_ttft_pass(self): result = {"tok_per_sec": 100, "ttft": 0.5, "peak_mem_mb": 1000} baseline = {"tok_per_sec": 100, "ttft": 0.5} perf = evaluate_performance(result, baseline) assert perf["ttft_pass"] is True def test_ttft_fail(self): result = {"tok_per_sec": 100, "ttft": 1.0, "peak_mem_mb": 1000} baseline = {"tok_per_sec": 100, "ttft": 0.5} perf = evaluate_performance(result, baseline) assert perf["ttft_pass"] is False def test_memory_pass(self): result = {"tok_per_sec": 100, "ttft": 0.5, "peak_mem_mb": 10000} baseline = {"tok_per_sec": 100, "ttft": 0.5} perf = evaluate_performance(result, baseline) assert perf["peak_mem_pass"] is True class TestTestPrompts: def test_has_10_prompts(self): assert len(TEST_PROMPTS) == 10 def test_all_have_patterns(self): for p in TEST_PROMPTS: assert "pass_pattern" in p # Verify pattern compiles re.compile(p["pass_pattern"]) def test_all_have_categories(self): categories = {p["category"] for p in TEST_PROMPTS} assert len(categories) >= 4 # At least 4 different categories class TestReportMarkdown: def test_has_summary(self): report = { "generated_at": "2026-04-14T00:00:00", "model": "test-model", "backend": "ollama", "kv_type": "fp16", "total_prompts": 10, "passed": 9, "failed": 1, "pass_rate": 0.9, "quality_pass_rate": 0.95, "results": [ {"prompt_id": 1, "name": "Test", "category": "factual", "quality": {"matched": True}, "performance": {"tok_per_sec": 50}, "pass": True} ], } md = report_to_markdown(report) assert "Test Matrix Report" in md assert "9" in md # passed assert "GO" in md # 90% pass rate def test_nogo_on_low_pass_rate(self): report = { "generated_at": "2026-04-14", "model": "x", "backend": "x", "kv_type": "x", "total_prompts": 10, "passed": 5, "failed": 5, "pass_rate": 0.5, "quality_pass_rate": 0.5, "results": [], } md = report_to_markdown(report) assert "NO-GO" in md