Files
timmy-home/scripts/source_distinction.py

102 lines
3.4 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
# source_distinction.py - I think vs I know annotation system.
# SOUL.md: "Every claim I make comes from one of two places: a verified source
# I can point to, or my own pattern-matching."
# Part of #793
from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional
class SourceType(Enum):
VERIFIED = "verified"
INFERRED = "inferred"
STATED = "stated"
UNKNOWN = "unknown"
@dataclass
class Claim:
text: str
source_type: SourceType
source_ref: str = ""
confidence: float = 0.0
hedging: str = ""
@dataclass
class AnnotatedResponse:
raw_text: str
claims: List[Claim] = field(default_factory=list)
def render(self):
if not self.claims:
return self.raw_text
parts = []
for claim in self.claims:
if claim.source_type == SourceType.VERIFIED:
prefix = "[verified: " + claim.source_ref + "]" if claim.source_ref else "[verified]"
parts.append(claim.text + " " + prefix)
elif claim.source_type == SourceType.INFERRED:
hedge = claim.hedging or "I think"
parts.append(hedge + " " + claim.text)
elif claim.source_type == SourceType.STATED:
parts.append(claim.text + " [you told me]")
else:
parts.append("I am not certain, but " + claim.text)
return " ".join(parts)
@property
def verified_count(self):
return sum(1 for c in self.claims if c.source_type == SourceType.VERIFIED)
@property
def inferred_count(self):
return sum(1 for c in self.claims if c.source_type == SourceType.INFERRED)
def verified(text, source, confidence=0.95):
return Claim(text=text, source_type=SourceType.VERIFIED, source_ref=source, confidence=confidence)
def inferred(text, hedging="I think", confidence=0.6):
return Claim(text=text, source_type=SourceType.INFERRED, confidence=confidence, hedging=hedging)
def stated(text):
return Claim(text=text, source_type=SourceType.STATED, confidence=1.0)
def annotate_response(raw_text, claims):
return AnnotatedResponse(raw_text=raw_text, claims=claims)
def format_for_display(response):
lines = []
for claim in response.claims:
if claim.source_type == SourceType.VERIFIED:
ref = " (" + claim.source_ref + ")" if claim.source_ref else ""
lines.append(" = " + claim.text + ref)
elif claim.source_type == SourceType.INFERRED:
lines.append(" ~ " + claim.hedging + " " + claim.text)
elif claim.source_type == SourceType.STATED:
lines.append(" > " + claim.text)
else:
lines.append(" ? " + claim.text)
if response.claims:
v = response.verified_count
i = response.inferred_count
t = len(response.claims)
lines.append("")
lines.append(" [" + str(v) + " verified, " + str(i) + " inferred, " + str(t) + " total]")
return "\n".join(lines)
def source_distinction_check(text):
hedging_words = ["i think", "i believe", "probably", "likely", "might",
"it seems", "perhaps", "i am not sure", "i guess",
"my understanding is", "i suspect"]
text_lower = text.lower()
hedging_count = sum(1 for h in hedging_words if h in text_lower)
return {"has_hedging": hedging_count > 0, "hedging_count": hedging_count,
"likely_inferred": hedging_count > 2}