129 lines
3.7 KiB
Python
129 lines
3.7 KiB
Python
"""
|
|
Source Distinction Module — Verified vs Inferred Claims
|
|
|
|
SOUL.md compliance: "I tell the truth. When I do not know something, I say so.
|
|
I do not fabricate confidence."
|
|
|
|
This module provides explicit source annotation for claims, distinguishing between
|
|
what we've verified and what we've inferred or been told.
|
|
"""
|
|
|
|
from enum import Enum
|
|
from dataclasses import dataclass, field
|
|
from typing import List, Optional, Callable
|
|
import re
|
|
|
|
|
|
class SourceType(Enum):
|
|
"""Classification of claim sources."""
|
|
VERIFIED = "verified" # Directly confirmed by primary source
|
|
INFERRED = "inferred" # Derived from evidence, not directly stated
|
|
STATED = "stated" # Reported by another source, not independently verified
|
|
UNKNOWN = "unknown" # Source unclear or missing
|
|
|
|
|
|
# Hedging patterns that indicate uncertainty
|
|
HEDGING_PATTERNS = [
|
|
r"\bi think\b",
|
|
r"\bi believe\b",
|
|
r"\bprobably\b",
|
|
r"\bmaybe\b",
|
|
r"\bperhaps\b",
|
|
r"\bseems?\b",
|
|
r"\bappears?\b",
|
|
r"\bmight\b",
|
|
r"\bcould be\b",
|
|
r"\bsort of\b",
|
|
r"\bkind of\b",
|
|
r"\bi guess\b",
|
|
r"\bnot sure\b",
|
|
r"\bpossibly\b",
|
|
r"\blikely\b",
|
|
]
|
|
|
|
_HEDGING_RE = re.compile("|".join(HEDGING_PATTERNS), re.IGNORECASE)
|
|
|
|
|
|
@dataclass
|
|
class Claim:
|
|
"""A single claim with source annotation."""
|
|
text: str
|
|
source: SourceType = SourceType.UNKNOWN
|
|
citation: Optional[str] = None
|
|
confidence: float = 1.0
|
|
|
|
def render(self) -> str:
|
|
"""Render claim with source indicator."""
|
|
prefix = _source_prefix(self.source)
|
|
parts = [f"{prefix} {self.text}"]
|
|
if self.citation:
|
|
parts.append(f"({self.citation})")
|
|
return " ".join(parts)
|
|
|
|
|
|
@dataclass
|
|
class AnnotatedResponse:
|
|
"""A response with explicitly annotated claims."""
|
|
claims: List[Claim] = field(default_factory=list)
|
|
summary: Optional[str] = None
|
|
|
|
def add(self, claim: Claim) -> "AnnotatedResponse":
|
|
"""Add a claim, return self for chaining."""
|
|
self.claims.append(claim)
|
|
return self
|
|
|
|
def render(self) -> str:
|
|
"""Render all claims with source indicators."""
|
|
lines = []
|
|
if self.summary:
|
|
lines.append(self.summary)
|
|
lines.append("")
|
|
for claim in self.claims:
|
|
lines.append(claim.render())
|
|
return "\n".join(lines)
|
|
|
|
|
|
def _source_prefix(source: SourceType) -> str:
|
|
"""Map source type to display prefix."""
|
|
return {
|
|
SourceType.VERIFIED: "✓",
|
|
SourceType.INFERRED: "~",
|
|
SourceType.STATED: "◇",
|
|
SourceType.UNKNOWN: "?",
|
|
}[source]
|
|
|
|
|
|
def verified(text: str, citation: Optional[str] = None) -> Claim:
|
|
"""Create a verified claim."""
|
|
return Claim(text=text, source=SourceType.VERIFIED, citation=citation, confidence=1.0)
|
|
|
|
|
|
def inferred(text: str, citation: Optional[str] = None, confidence: float = 0.7) -> Claim:
|
|
"""Create an inferred claim."""
|
|
return Claim(text=text, source=SourceType.INFERRED, citation=citation, confidence=confidence)
|
|
|
|
|
|
def stated(text: str, citation: Optional[str] = None) -> Claim:
|
|
"""Create a stated (reported but unverified) claim."""
|
|
return Claim(text=text, source=SourceType.STATED, citation=citation, confidence=0.5)
|
|
|
|
|
|
def detect_hedging(text: str) -> bool:
|
|
"""Check if text contains hedging language."""
|
|
return bool(_HEDGING_RE.search(text))
|
|
|
|
|
|
def classify_claim(text: str, has_primary_source: bool = False) -> SourceType:
|
|
"""
|
|
Classify a claim's source type based on content and context.
|
|
|
|
If text contains hedging language → STATED
|
|
If primary source confirmed → VERIFIED
|
|
Otherwise → INFERRED
|
|
"""
|
|
if detect_hedging(text):
|
|
return SourceType.STATED
|
|
if has_primary_source:
|
|
return SourceType.VERIFIED
|
|
return SourceType.INFERRED
|