#!/usr/bin/env python3 # audit_trail.py - Local logging of inputs, sources, and confidence. # Implements SOUL.md "What Honesty Requires" - The Audit Trail. # Logs are stored locally. Never sent anywhere. The user owns them. # Part of #794 import json import hashlib import os import time from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional from dataclasses import dataclass, field, asdict AUDIT_DIR = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "audit-trail" @dataclass class AuditEntry: id: str ts: str input_text: str sources: List[str] confidence: float output_text: str model: str provider: str = "" session_id: str = "" source_types: List[str] = field(default_factory=list) @staticmethod def generate_id(input_text: str, output_text: str, ts: str) -> str: content = f"{ts}:{input_text}:{output_text}" return hashlib.sha256(content.encode()).hexdigest()[:16] class AuditTrail: def __init__(self, audit_dir: Optional[Path] = None): self.audit_dir = audit_dir or AUDIT_DIR self.audit_dir.mkdir(parents=True, exist_ok=True) self._log_file = self.audit_dir / "trail.jsonl" def log_response(self, input_text, sources, confidence, output_text, model="", provider="", session_id="", source_types=None): ts = datetime.now(timezone.utc).isoformat() entry = AuditEntry( id=AuditEntry.generate_id(input_text, output_text, ts), ts=ts, input_text=input_text[:1000], sources=[s[:200] for s in sources[:10]], confidence=round(confidence, 3), output_text=output_text[:2000], model=model, provider=provider, session_id=session_id, source_types=source_types or [], ) with open(self._log_file, "a") as f: f.write(json.dumps(asdict(entry)) + "\n") return entry def query(self, search_text, limit=10, min_confidence=0.0): if not self._log_file.exists(): return [] results = [] search_lower = search_text.lower() with open(self._log_file) as f: for line in f: line = line.strip() if not line: continue try: data = json.loads(line) except json.JSONDecodeError: continue if data.get("confidence", 0) < min_confidence: continue searchable = (data.get("input_text", "") + " " + data.get("output_text", "") + " " + " ".join(data.get("sources", []))).lower() if search_lower in searchable: results.append(AuditEntry(**{k: data.get(k, "") if isinstance(data.get(k), str) else data.get(k, []) if isinstance(data.get(k), list) else data.get(k, 0.0) for k in AuditEntry.__dataclass_fields__})) if len(results) >= limit: break return results def get_stats(self): if not self._log_file.exists(): return {"total": 0, "avg_confidence": 0, "sources_breakdown": {}} total = 0 confidence_sum = 0.0 source_types = {} with open(self._log_file) as f: for line in f: try: data = json.loads(line.strip()) total += 1 confidence_sum += data.get("confidence", 0) for st in data.get("source_types", []): source_types[st] = source_types.get(st, 0) + 1 except (json.JSONDecodeError, ValueError): continue return {"total": total, "avg_confidence": round(confidence_sum / max(total, 1), 3), "sources_breakdown": source_types} def get_by_session(self, session_id, limit=50): if not self._log_file.exists(): return [] results = [] with open(self._log_file) as f: for line in f: try: data = json.loads(line.strip()) if data.get("session_id") == session_id: results.append(AuditEntry(**{k: data.get(k, "") if isinstance(data.get(k), str) else data.get(k, []) if isinstance(data.get(k), list) else data.get(k, 0.0) for k in AuditEntry.__dataclass_fields__})) except (json.JSONDecodeError, ValueError): continue if len(results) >= limit: break return results _default_trail = None def get_trail(): global _default_trail if _default_trail is None: _default_trail = AuditTrail() return _default_trail def log_response(**kwargs): return get_trail().log_response(**kwargs) def query(search_text, **kwargs): return get_trail().query(search_text, **kwargs)