Files
timmy-home/tests/test_audit_trail.py
Alexander Whitestone a39f4fb1ab
Some checks failed
Agent PR Gate / gate (pull_request) Failing after 21s
Self-Healing Smoke / self-healing-smoke (pull_request) Failing after 18s
Smoke Test / smoke (pull_request) Failing after 18s
Agent PR Gate / report (pull_request) Has been cancelled
feat: audit trail - local logging of inputs, sources, confidence (#794)
SOUL.md compliance: 'Every response I generate should be logged locally
with the inputs that produced it, the sources I consulted, and the
confidence assessment I made.'

scripts/audit_trail.py:
  AuditTrail class with log_response(), query(), get_stats(), get_by_session()
  Content-addressed IDs, JSONL storage, local-only (never sent anywhere)
  User can query: 'why did you say X?' -> search trail for matching entries

tests/test_audit_trail.py: 7 tests passing
2026-04-17 01:34:15 -04:00

89 lines
3.2 KiB
Python

"""Tests for audit trail — SOUL.md compliance."""
import json
import tempfile
from pathlib import Path
from unittest.mock import patch
import pytest
class TestAuditTrail:
def test_log_and_query(self, tmp_path):
from scripts.audit_trail import AuditTrail
trail = AuditTrail(audit_dir=tmp_path)
trail.log_response(
input_text="What is Python?",
sources=["web_search:Python is a programming language"],
confidence=0.9,
output_text="Python is a programming language.",
model="test-model",
)
results = trail.query("Python")
assert len(results) == 1
assert results[0].confidence == 0.9
assert "Python" in results[0].output_text
def test_query_no_match(self, tmp_path):
from scripts.audit_trail import AuditTrail
trail = AuditTrail(audit_dir=tmp_path)
trail.log_response(
input_text="What is Rust?",
sources=[],
confidence=0.8,
output_text="Rust is a systems language.",
)
results = trail.query("Python")
assert len(results) == 0
def test_confidence_filter(self, tmp_path):
from scripts.audit_trail import AuditTrail
trail = AuditTrail(audit_dir=tmp_path)
trail.log_response(input_text="test", sources=[], confidence=0.3, output_text="low conf")
trail.log_response(input_text="test", sources=[], confidence=0.95, output_text="high conf")
high_only = trail.query("test", min_confidence=0.5)
assert len(high_only) == 1
assert high_only[0].confidence == 0.95
def test_stats(self, tmp_path):
from scripts.audit_trail import AuditTrail
trail = AuditTrail(audit_dir=tmp_path)
trail.log_response(input_text="a", sources=[], confidence=0.8, output_text="b")
trail.log_response(input_text="c", sources=[], confidence=0.6, output_text="d")
stats = trail.get_stats()
assert stats["total"] == 2
assert stats["avg_confidence"] == 0.7
def test_session_filter(self, tmp_path):
from scripts.audit_trail import AuditTrail
trail = AuditTrail(audit_dir=tmp_path)
trail.log_response(input_text="a", sources=[], confidence=0.9, output_text="b", session_id="s1")
trail.log_response(input_text="c", sources=[], confidence=0.9, output_text="d", session_id="s2")
s1_results = trail.get_by_session("s1")
assert len(s1_results) == 1
def test_empty_trail(self, tmp_path):
from scripts.audit_trail import AuditTrail
trail = AuditTrail(audit_dir=tmp_path)
assert trail.query("anything") == []
assert trail.get_stats()["total"] == 0
def test_content_addressed_id(self):
from scripts.audit_trail import AuditEntry
id1 = AuditEntry.generate_id("input", "output", "2026-01-01")
id2 = AuditEntry.generate_id("input", "output", "2026-01-01")
id3 = AuditEntry.generate_id("different", "output", "2026-01-01")
assert id1 == id2 # same content = same ID
assert id1 != id3 # different content = different ID