SOUL.md compliance: 'Every response I generate should be logged locally with the inputs that produced it, the sources I consulted, and the confidence assessment I made.' scripts/audit_trail.py: AuditTrail class with log_response(), query(), get_stats(), get_by_session() Content-addressed IDs, JSONL storage, local-only (never sent anywhere) User can query: 'why did you say X?' -> search trail for matching entries tests/test_audit_trail.py: 7 tests passing
89 lines
3.2 KiB
Python
89 lines
3.2 KiB
Python
"""Tests for audit trail — SOUL.md compliance."""
|
|
import json
|
|
import tempfile
|
|
from pathlib import Path
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
|
|
class TestAuditTrail:
|
|
def test_log_and_query(self, tmp_path):
|
|
from scripts.audit_trail import AuditTrail
|
|
trail = AuditTrail(audit_dir=tmp_path)
|
|
|
|
trail.log_response(
|
|
input_text="What is Python?",
|
|
sources=["web_search:Python is a programming language"],
|
|
confidence=0.9,
|
|
output_text="Python is a programming language.",
|
|
model="test-model",
|
|
)
|
|
|
|
results = trail.query("Python")
|
|
assert len(results) == 1
|
|
assert results[0].confidence == 0.9
|
|
assert "Python" in results[0].output_text
|
|
|
|
def test_query_no_match(self, tmp_path):
|
|
from scripts.audit_trail import AuditTrail
|
|
trail = AuditTrail(audit_dir=tmp_path)
|
|
|
|
trail.log_response(
|
|
input_text="What is Rust?",
|
|
sources=[],
|
|
confidence=0.8,
|
|
output_text="Rust is a systems language.",
|
|
)
|
|
|
|
results = trail.query("Python")
|
|
assert len(results) == 0
|
|
|
|
def test_confidence_filter(self, tmp_path):
|
|
from scripts.audit_trail import AuditTrail
|
|
trail = AuditTrail(audit_dir=tmp_path)
|
|
|
|
trail.log_response(input_text="test", sources=[], confidence=0.3, output_text="low conf")
|
|
trail.log_response(input_text="test", sources=[], confidence=0.95, output_text="high conf")
|
|
|
|
high_only = trail.query("test", min_confidence=0.5)
|
|
assert len(high_only) == 1
|
|
assert high_only[0].confidence == 0.95
|
|
|
|
def test_stats(self, tmp_path):
|
|
from scripts.audit_trail import AuditTrail
|
|
trail = AuditTrail(audit_dir=tmp_path)
|
|
|
|
trail.log_response(input_text="a", sources=[], confidence=0.8, output_text="b")
|
|
trail.log_response(input_text="c", sources=[], confidence=0.6, output_text="d")
|
|
|
|
stats = trail.get_stats()
|
|
assert stats["total"] == 2
|
|
assert stats["avg_confidence"] == 0.7
|
|
|
|
def test_session_filter(self, tmp_path):
|
|
from scripts.audit_trail import AuditTrail
|
|
trail = AuditTrail(audit_dir=tmp_path)
|
|
|
|
trail.log_response(input_text="a", sources=[], confidence=0.9, output_text="b", session_id="s1")
|
|
trail.log_response(input_text="c", sources=[], confidence=0.9, output_text="d", session_id="s2")
|
|
|
|
s1_results = trail.get_by_session("s1")
|
|
assert len(s1_results) == 1
|
|
|
|
def test_empty_trail(self, tmp_path):
|
|
from scripts.audit_trail import AuditTrail
|
|
trail = AuditTrail(audit_dir=tmp_path)
|
|
|
|
assert trail.query("anything") == []
|
|
assert trail.get_stats()["total"] == 0
|
|
|
|
def test_content_addressed_id(self):
|
|
from scripts.audit_trail import AuditEntry
|
|
id1 = AuditEntry.generate_id("input", "output", "2026-01-01")
|
|
id2 = AuditEntry.generate_id("input", "output", "2026-01-01")
|
|
id3 = AuditEntry.generate_id("different", "output", "2026-01-01")
|
|
|
|
assert id1 == id2 # same content = same ID
|
|
assert id1 != id3 # different content = different ID
|