95 lines
3.1 KiB
Python
95 lines
3.1 KiB
Python
"""
|
|
Tests for scripts/provenance_validate.py and scripts/provenance_dashboard.py.
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent / "scripts"))
|
|
from provenance_validate import validate_file, validate_all
|
|
|
|
|
|
class TestValidateFile(unittest.TestCase):
|
|
def _write_jsonl(self, entries):
|
|
f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False)
|
|
for e in entries:
|
|
f.write(json.dumps(e) + '\n')
|
|
f.close()
|
|
return f.name
|
|
|
|
def test_all_valid(self):
|
|
entries = [
|
|
{"prompt": "x", "response": "y", "source_session_id": "s1", "model": "mimo", "timestamp": "2026-01-01"},
|
|
{"prompt": "a", "response": "b", "source_session_id": "s2", "model": "mimo", "timestamp": "2026-01-02"},
|
|
]
|
|
path = self._write_jsonl(entries)
|
|
result = validate_file(path)
|
|
os.unlink(path)
|
|
self.assertEqual(result["with_provenance"], 2)
|
|
self.assertEqual(result["coverage_pct"], 100.0)
|
|
|
|
def test_none_valid(self):
|
|
entries = [
|
|
{"prompt": "x", "response": "y"},
|
|
{"prompt": "a", "response": "b"},
|
|
]
|
|
path = self._write_jsonl(entries)
|
|
result = validate_file(path)
|
|
os.unlink(path)
|
|
self.assertEqual(result["with_provenance"], 0)
|
|
self.assertEqual(result["coverage_pct"], 0.0)
|
|
|
|
def test_partial(self):
|
|
entries = [
|
|
{"prompt": "x", "response": "y", "source_session_id": "s1", "model": "m1", "timestamp": "2026-01-01"},
|
|
{"prompt": "a", "response": "b"}, # missing provenance
|
|
]
|
|
path = self._write_jsonl(entries)
|
|
result = validate_file(path)
|
|
os.unlink(path)
|
|
self.assertEqual(result["with_provenance"], 1)
|
|
self.assertEqual(result["coverage_pct"], 50.0)
|
|
|
|
def test_missing_model_only(self):
|
|
entries = [
|
|
{"prompt": "x", "response": "y", "source_session_id": "s1", "timestamp": "2026-01-01"},
|
|
]
|
|
path = self._write_jsonl(entries)
|
|
result = validate_file(path)
|
|
os.unlink(path)
|
|
self.assertEqual(result["missing_by_field"]["model"], 1)
|
|
|
|
|
|
class TestValidateAll(unittest.TestCase):
|
|
def test_threshold_pass(self):
|
|
entries = [
|
|
{"prompt": "x", "response": "y", "source_session_id": "s1", "model": "m1", "timestamp": "2026-01-01"},
|
|
]
|
|
f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False)
|
|
for e in entries:
|
|
f.write(json.dumps(e) + '\n')
|
|
f.close()
|
|
|
|
result = validate_all([f.name], threshold=50)
|
|
os.unlink(f.name)
|
|
self.assertTrue(result["passes_threshold"])
|
|
|
|
def test_threshold_fail(self):
|
|
entries = [{"prompt": "x", "response": "y"}] # no provenance
|
|
f = tempfile.NamedTemporaryFile(mode='w', suffix='.jsonl', delete=False)
|
|
for e in entries:
|
|
f.write(json.dumps(e) + '\n')
|
|
f.close()
|
|
|
|
result = validate_all([f.name], threshold=50)
|
|
os.unlink(f.name)
|
|
self.assertFalse(result["passes_threshold"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|