Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 1m8s
Part of Epic #281. Verdict: APPROVED 8.8/10 security. MoE 35B/3B active, 128K ctx, Apache 2.0, perfect data locality. Closes #288
47 lines
1.7 KiB
Python
47 lines
1.7 KiB
Python
"""Tests for Qwen3.5:35B evaluation -- Issue #288."""
|
|
import pytest
|
|
from scripts.evaluate_qwen35 import ModelSpec, FLEET_MODELS, SECURITY_CRITERIA, HARDWARE_PROFILES, check_ollama_status, generate_report
|
|
|
|
class TestModelSpec:
|
|
def test_fields(self):
|
|
s = ModelSpec()
|
|
assert s.name == "Qwen3.5-35B-A3B"
|
|
assert s.context_length == 131072
|
|
assert s.license == "Apache 2.0"
|
|
assert s.tool_use_support is True
|
|
def test_quant_vram_decreasing(self):
|
|
s = ModelSpec()
|
|
items = sorted(s.quantization_options.items(), key=lambda x: x[1])
|
|
for i in range(1, len(items)):
|
|
assert items[i][1] >= items[i-1][1]
|
|
|
|
class TestSecurity:
|
|
def test_scores(self):
|
|
for c in SECURITY_CRITERIA:
|
|
assert 1 <= c["score"] <= 10
|
|
def test_weighted_avg(self):
|
|
wm = {"CRITICAL": 3, "HIGH": 2, "MEDIUM": 1}
|
|
tw = sum(wm[c["weight"]] for c in SECURITY_CRITERIA)
|
|
ws = sum(c["score"] * wm[c["weight"]] for c in SECURITY_CRITERIA)
|
|
assert ws / tw >= 7.0
|
|
|
|
class TestHardware:
|
|
def test_m2_fits(self):
|
|
assert HARDWARE_PROFILES["mac_m2_ultra_192gb"]["fits_q4"] is True
|
|
def test_m1_no(self):
|
|
assert HARDWARE_PROFILES["mac_m1_16gb"]["fits_q4"] is False
|
|
|
|
class TestReport:
|
|
def test_sections(self):
|
|
r = generate_report()
|
|
for s in ["Model Specification", "VRAM", "Hardware", "Security", "Fleet", "Recommendation"]:
|
|
assert s in r
|
|
def test_approved(self):
|
|
assert "APPROVED" in generate_report()
|
|
|
|
class TestOllama:
|
|
def test_returns_dict(self):
|
|
r = check_ollama_status()
|
|
assert isinstance(r, dict)
|
|
assert "running" in r
|