Some checks failed
Forge CI / smoke-and-build (pull_request) Failing after 48s
Part of Epic #281 -- Vitalik's Secure LLM Architecture. Evaluation of Qwen3.5-35B-A3B (MoE, 35B total / 3B active) for local deployment as privacy-sensitive inference tier. - scripts/evaluate_qwen35.py: specs, VRAM, hardware matrix, security scoring (Vitalik framework 8.8/10), fleet comparison, integration - tests/test_evaluate_qwen35.py: 9 tests Verdict: APPROVED. Perfect data locality, 128K context, Apache 2.0, MoE speed advantage, tool use supported, eliminates Privacy Filter. Closes #288
64 lines
1.9 KiB
Python
64 lines
1.9 KiB
Python
"""Tests for Qwen3.5:35B evaluation -- Issue #288."""
|
|
|
|
import json
|
|
import pytest
|
|
from scripts.evaluate_qwen35 import (
|
|
ModelSpec, FLEET_MODELS, SECURITY_CRITERIA, HARDWARE_PROFILES,
|
|
check_ollama_status, generate_report,
|
|
)
|
|
|
|
|
|
class TestModelSpec:
|
|
def test_spec_fields(self):
|
|
s = ModelSpec()
|
|
assert s.name == "Qwen3.5-35B-A3B"
|
|
assert s.total_params == "35B"
|
|
assert s.active_params == "3B per token"
|
|
assert s.context_length == 131072
|
|
assert s.license == "Apache 2.0"
|
|
assert s.tool_use_support is True
|
|
|
|
def test_quantization_decreasing_vram(self):
|
|
s = ModelSpec()
|
|
items = sorted(s.quantization_options.items(), key=lambda x: x[1])
|
|
for i in range(1, len(items)):
|
|
assert items[i][1] >= items[i-1][1]
|
|
|
|
|
|
class TestSecurity:
|
|
def test_scores_in_range(self):
|
|
for c in SECURITY_CRITERIA:
|
|
assert 1 <= c["score"] <= 10
|
|
assert c["weight"] in ("CRITICAL", "HIGH", "MEDIUM")
|
|
|
|
def test_weighted_average(self):
|
|
wm = {"CRITICAL": 3, "HIGH": 2, "MEDIUM": 1}
|
|
tw = sum(wm[c["weight"]] for c in SECURITY_CRITERIA)
|
|
ws = sum(c["score"] * wm[c["weight"]] for c in SECURITY_CRITERIA)
|
|
assert ws / tw >= 7.0
|
|
|
|
|
|
class TestHardware:
|
|
def test_m2_ultra_fits(self):
|
|
assert HARDWARE_PROFILES["mac_m2_ultra_192gb"]["fits_q4"] is True
|
|
|
|
def test_m1_doesnt_fit(self):
|
|
assert HARDWARE_PROFILES["mac_m1_16gb"]["fits_q4"] is False
|
|
|
|
|
|
class TestReport:
|
|
def test_has_all_sections(self):
|
|
r = generate_report()
|
|
for s in ["Model Specification", "VRAM", "Hardware", "Security", "Fleet", "Recommendation"]:
|
|
assert s in r, f"Missing: {s}"
|
|
|
|
def test_verdict_approved(self):
|
|
assert "APPROVED" in generate_report()
|
|
|
|
|
|
class TestOllama:
|
|
def test_returns_dict(self):
|
|
r = check_ollama_status()
|
|
assert isinstance(r, dict)
|
|
assert "running" in r
|