#!/usr/bin/env python3
# audit_trail.py - Local logging of inputs, sources, and confidence.
# Implements SOUL.md "What Honesty Requires" - The Audit Trail.
# Logs are stored locally. Never sent anywhere. The user owns them.
# Part of #794

import json
import hashlib
import os
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional
from dataclasses import dataclass, field, asdict

AUDIT_DIR = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes")) / "audit-trail"


@dataclass
class AuditEntry:
    id: str
    ts: str
    input_text: str
    sources: List[str]
    confidence: float
    output_text: str
    model: str
    provider: str = ""
    session_id: str = ""
    source_types: List[str] = field(default_factory=list)

    @staticmethod
    def generate_id(input_text: str, output_text: str, ts: str) -> str:
        content = f"{ts}:{input_text}:{output_text}"
        return hashlib.sha256(content.encode()).hexdigest()[:16]


class AuditTrail:
    def __init__(self, audit_dir: Optional[Path] = None):
        self.audit_dir = audit_dir or AUDIT_DIR
        self.audit_dir.mkdir(parents=True, exist_ok=True)
        self._log_file = self.audit_dir / "trail.jsonl"

    def log_response(self, input_text, sources, confidence, output_text,
                     model="", provider="", session_id="", source_types=None):
        ts = datetime.now(timezone.utc).isoformat()
        entry = AuditEntry(
            id=AuditEntry.generate_id(input_text, output_text, ts),
            ts=ts,
            input_text=input_text[:1000],
            sources=[s[:200] for s in sources[:10]],
            confidence=round(confidence, 3),
            output_text=output_text[:2000],
            model=model, provider=provider, session_id=session_id,
            source_types=source_types or [],
        )
        with open(self._log_file, "a") as f:
            f.write(json.dumps(asdict(entry)) + "\n")
        return entry

    def query(self, search_text, limit=10, min_confidence=0.0):
        if not self._log_file.exists():
            return []
        results = []
        search_lower = search_text.lower()
        with open(self._log_file) as f:
            for line in f:
                line = line.strip()
                if not line:
                    continue
                try:
                    data = json.loads(line)
                except json.JSONDecodeError:
                    continue
                if data.get("confidence", 0) < min_confidence:
                    continue
                searchable = (data.get("input_text", "") + " " +
                    data.get("output_text", "") + " " +
                    " ".join(data.get("sources", []))).lower()
                if search_lower in searchable:
                    results.append(AuditEntry(**{k: data.get(k, "") if isinstance(data.get(k), str)
                        else data.get(k, []) if isinstance(data.get(k), list)
                        else data.get(k, 0.0) for k in AuditEntry.__dataclass_fields__}))
                if len(results) >= limit:
                    break
        return results

    def get_stats(self):
        if not self._log_file.exists():
            return {"total": 0, "avg_confidence": 0, "sources_breakdown": {}}
        total = 0
        confidence_sum = 0.0
        source_types = {}
        with open(self._log_file) as f:
            for line in f:
                try:
                    data = json.loads(line.strip())
                    total += 1
                    confidence_sum += data.get("confidence", 0)
                    for st in data.get("source_types", []):
                        source_types[st] = source_types.get(st, 0) + 1
                except (json.JSONDecodeError, ValueError):
                    continue
        return {"total": total, "avg_confidence": round(confidence_sum / max(total, 1), 3),
                "sources_breakdown": source_types}

    def get_by_session(self, session_id, limit=50):
        if not self._log_file.exists():
            return []
        results = []
        with open(self._log_file) as f:
            for line in f:
                try:
                    data = json.loads(line.strip())
                    if data.get("session_id") == session_id:
                        results.append(AuditEntry(**{k: data.get(k, "") if isinstance(data.get(k), str)
                            else data.get(k, []) if isinstance(data.get(k), list)
                            else data.get(k, 0.0) for k in AuditEntry.__dataclass_fields__}))
                except (json.JSONDecodeError, ValueError):
                    continue
                if len(results) >= limit:
                    break
        return results


_default_trail = None

def get_trail():
    global _default_trail
    if _default_trail is None:
        _default_trail = AuditTrail()
    return _default_trail

def log_response(**kwargs):
    return get_trail().log_response(**kwargs)

def query(search_text, **kwargs):
    return get_trail().query(search_text, **kwargs)