#!/usr/bin/env python3
# source_distinction.py - I think vs I know annotation system.
# SOUL.md: "Every claim I make comes from one of two places: a verified source
# I can point to, or my own pattern-matching."
# Part of #793

from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional


class SourceType(Enum):
    VERIFIED = "verified"
    INFERRED = "inferred"
    STATED = "stated"
    UNKNOWN = "unknown"


@dataclass
class Claim:
    text: str
    source_type: SourceType
    source_ref: str = ""
    confidence: float = 0.0
    hedging: str = ""


@dataclass
class AnnotatedResponse:
    raw_text: str
    claims: List[Claim] = field(default_factory=list)

    def render(self):
        if not self.claims:
            return self.raw_text
        parts = []
        for claim in self.claims:
            if claim.source_type == SourceType.VERIFIED:
                prefix = "[verified: " + claim.source_ref + "]" if claim.source_ref else "[verified]"
                parts.append(claim.text + " " + prefix)
            elif claim.source_type == SourceType.INFERRED:
                hedge = claim.hedging or "I think"
                parts.append(hedge + " " + claim.text)
            elif claim.source_type == SourceType.STATED:
                parts.append(claim.text + " [you told me]")
            else:
                parts.append("I am not certain, but " + claim.text)
        return " ".join(parts)

    @property
    def verified_count(self):
        return sum(1 for c in self.claims if c.source_type == SourceType.VERIFIED)

    @property
    def inferred_count(self):
        return sum(1 for c in self.claims if c.source_type == SourceType.INFERRED)


def verified(text, source, confidence=0.95):
    return Claim(text=text, source_type=SourceType.VERIFIED, source_ref=source, confidence=confidence)

def inferred(text, hedging="I think", confidence=0.6):
    return Claim(text=text, source_type=SourceType.INFERRED, confidence=confidence, hedging=hedging)

def stated(text):
    return Claim(text=text, source_type=SourceType.STATED, confidence=1.0)


def annotate_response(raw_text, claims):
    return AnnotatedResponse(raw_text=raw_text, claims=claims)


def format_for_display(response):
    lines = []
    for claim in response.claims:
        if claim.source_type == SourceType.VERIFIED:
            ref = " (" + claim.source_ref + ")" if claim.source_ref else ""
            lines.append("  = " + claim.text + ref)
        elif claim.source_type == SourceType.INFERRED:
            lines.append("  ~ " + claim.hedging + " " + claim.text)
        elif claim.source_type == SourceType.STATED:
            lines.append("  > " + claim.text)
        else:
            lines.append("  ? " + claim.text)
    if response.claims:
        v = response.verified_count
        i = response.inferred_count
        t = len(response.claims)
        lines.append("")
        lines.append("  [" + str(v) + " verified, " + str(i) + " inferred, " + str(t) + " total]")
    return "\n".join(lines)


def source_distinction_check(text):
    hedging_words = ["i think", "i believe", "probably", "likely", "might",
                     "it seems", "perhaps", "i am not sure", "i guess",
                     "my understanding is", "i suspect"]
    text_lower = text.lower()
    hedging_count = sum(1 for h in hedging_words if h in text_lower)
    return {"has_hedging": hedging_count > 0, "hedging_count": hedging_count,
            "likely_inferred": hedging_count > 2}