feat: evaluate Qwen3.5:35B as local model option (#288)

Part of Epic #281. Verdict: APPROVED 8.8/10 security. MoE 35B/3B active, 128K ctx, Apache 2.0, perfect data locality. Closes #288
2026-04-13 21:32:21 -04:00
parent c84e2279ea
commit 42e04ba03a
2 changed files with 155 additions and 0 deletions
--- a/tests/test_evaluate_qwen35.py
+++ b/tests/test_evaluate_qwen35.py
@@ -0,0 +1,46 @@
+"""Tests for Qwen3.5:35B evaluation -- Issue #288."""
+import pytest
+from scripts.evaluate_qwen35 import ModelSpec, FLEET_MODELS, SECURITY_CRITERIA, HARDWARE_PROFILES, check_ollama_status, generate_report
+
+class TestModelSpec:
+    def test_fields(self):
+        s = ModelSpec()
+        assert s.name == "Qwen3.5-35B-A3B"
+        assert s.context_length == 131072
+        assert s.license == "Apache 2.0"
+        assert s.tool_use_support is True
+    def test_quant_vram_decreasing(self):
+        s = ModelSpec()
+        items = sorted(s.quantization_options.items(), key=lambda x: x[1])
+        for i in range(1, len(items)):
+            assert items[i][1] >= items[i-1][1]
+
+class TestSecurity:
+    def test_scores(self):
+        for c in SECURITY_CRITERIA:
+            assert 1 <= c["score"] <= 10
+    def test_weighted_avg(self):
+        wm = {"CRITICAL": 3, "HIGH": 2, "MEDIUM": 1}
+        tw = sum(wm[c["weight"]] for c in SECURITY_CRITERIA)
+        ws = sum(c["score"] * wm[c["weight"]] for c in SECURITY_CRITERIA)
+        assert ws / tw >= 7.0
+
+class TestHardware:
+    def test_m2_fits(self):
+        assert HARDWARE_PROFILES["mac_m2_ultra_192gb"]["fits_q4"] is True
+    def test_m1_no(self):
+        assert HARDWARE_PROFILES["mac_m1_16gb"]["fits_q4"] is False
+
+class TestReport:
+    def test_sections(self):
+        r = generate_report()
+        for s in ["Model Specification", "VRAM", "Hardware", "Security", "Fleet", "Recommendation"]:
+            assert s in r
+    def test_approved(self):
+        assert "APPROVED" in generate_report()
+
+class TestOllama:
+    def test_returns_dict(self):
+        r = check_ollama_status()
+        assert isinstance(r, dict)
+        assert "running" in r