#!/usr/bin/env python3 # source_distinction.py - I think vs I know annotation system. # SOUL.md: "Every claim I make comes from one of two places: a verified source # I can point to, or my own pattern-matching." # Part of #793 from dataclasses import dataclass, field from enum import Enum from typing import List, Optional class SourceType(Enum): VERIFIED = "verified" INFERRED = "inferred" STATED = "stated" UNKNOWN = "unknown" @dataclass class Claim: text: str source_type: SourceType source_ref: str = "" confidence: float = 0.0 hedging: str = "" @dataclass class AnnotatedResponse: raw_text: str claims: List[Claim] = field(default_factory=list) def render(self): if not self.claims: return self.raw_text parts = [] for claim in self.claims: if claim.source_type == SourceType.VERIFIED: prefix = "[verified: " + claim.source_ref + "]" if claim.source_ref else "[verified]" parts.append(claim.text + " " + prefix) elif claim.source_type == SourceType.INFERRED: hedge = claim.hedging or "I think" parts.append(hedge + " " + claim.text) elif claim.source_type == SourceType.STATED: parts.append(claim.text + " [you told me]") else: parts.append("I am not certain, but " + claim.text) return " ".join(parts) @property def verified_count(self): return sum(1 for c in self.claims if c.source_type == SourceType.VERIFIED) @property def inferred_count(self): return sum(1 for c in self.claims if c.source_type == SourceType.INFERRED) def verified(text, source, confidence=0.95): return Claim(text=text, source_type=SourceType.VERIFIED, source_ref=source, confidence=confidence) def inferred(text, hedging="I think", confidence=0.6): return Claim(text=text, source_type=SourceType.INFERRED, confidence=confidence, hedging=hedging) def stated(text): return Claim(text=text, source_type=SourceType.STATED, confidence=1.0) def annotate_response(raw_text, claims): return AnnotatedResponse(raw_text=raw_text, claims=claims) def format_for_display(response): lines = [] for claim in response.claims: if claim.source_type == SourceType.VERIFIED: ref = " (" + claim.source_ref + ")" if claim.source_ref else "" lines.append(" = " + claim.text + ref) elif claim.source_type == SourceType.INFERRED: lines.append(" ~ " + claim.hedging + " " + claim.text) elif claim.source_type == SourceType.STATED: lines.append(" > " + claim.text) else: lines.append(" ? " + claim.text) if response.claims: v = response.verified_count i = response.inferred_count t = len(response.claims) lines.append("") lines.append(" [" + str(v) + " verified, " + str(i) + " inferred, " + str(t) + " total]") return "\n".join(lines) def source_distinction_check(text): hedging_words = ["i think", "i believe", "probably", "likely", "might", "it seems", "perhaps", "i am not sure", "i guess", "my understanding is", "i suspect"] text_lower = text.lower() hedging_count = sum(1 for h in hedging_words if h in text_lower) return {"has_hedging": hedging_count > 0, "hedging_count": hedging_count, "likely_inferred": hedging_count > 2}