scripts/source_distinction.py

#!/usr/bin/env python3
# source_distinction.py - I think vs I know annotation system.
# SOUL.md: "Every claim I make comes from one of two places: a verified source
# I can point to, or my own pattern-matching."
# Part of #793

from dataclasses import dataclass, field
from enum import Enum
from typing import List, Optional


class SourceType(Enum):
    VERIFIED = "verified"
    INFERRED = "inferred"
    STATED = "stated"
    UNKNOWN = "unknown"


@dataclass
class Claim:
    text: str
    source_type: SourceType
    source_ref: str = ""
    confidence: float = 0.0
    hedging: str = ""


@dataclass
class AnnotatedResponse:
    raw_text: str
    claims: List[Claim] = field(default_factory=list)

    def render(self):
        if not self.claims:
            return self.raw_text
        parts = []
        for claim in self.claims:
            if claim.source_type == SourceType.VERIFIED:
                prefix = "[verified: " + claim.source_ref + "]" if claim.source_ref else "[verified]"
                parts.append(claim.text + " " + prefix)
            elif claim.source_type == SourceType.INFERRED:
                hedge = claim.hedging or "I think"
                parts.append(hedge + " " + claim.text)
            elif claim.source_type == SourceType.STATED:
                parts.append(claim.text + " [you told me]")
            else:
                parts.append("I am not certain, but " + claim.text)
        return " ".join(parts)

    @property
    def verified_count(self):
        return sum(1 for c in self.claims if c.source_type == SourceType.VERIFIED)

    @property
    def inferred_count(self):
        return sum(1 for c in self.claims if c.source_type == SourceType.INFERRED)


def verified(text, source, confidence=0.95):
    return Claim(text=text, source_type=SourceType.VERIFIED, source_ref=source, confidence=confidence)

def inferred(text, hedging="I think", confidence=0.6):
    return Claim(text=text, source_type=SourceType.INFERRED, confidence=confidence, hedging=hedging)

def stated(text):
    return Claim(text=text, source_type=SourceType.STATED, confidence=1.0)


def annotate_response(raw_text, claims):
    return AnnotatedResponse(raw_text=raw_text, claims=claims)


def format_for_display(response):
    lines = []
    for claim in response.claims:
        if claim.source_type == SourceType.VERIFIED:
            ref = " (" + claim.source_ref + ")" if claim.source_ref else ""
            lines.append("  = " + claim.text + ref)
        elif claim.source_type == SourceType.INFERRED:
            lines.append("  ~ " + claim.hedging + " " + claim.text)
        elif claim.source_type == SourceType.STATED:
            lines.append("  > " + claim.text)
        else:
            lines.append("  ? " + claim.text)
    if response.claims:
        v = response.verified_count
        i = response.inferred_count
        t = len(response.claims)
        lines.append("")
        lines.append("  [" + str(v) + " verified, " + str(i) + " inferred, " + str(t) + " total]")
    return "\n".join(lines)


def source_distinction_check(text):
    hedging_words = ["i think", "i believe", "probably", "likely", "might",
                     "it seems", "perhaps", "i am not sure", "i guess",
                     "my understanding is", "i suspect"]
    text_lower = text.lower()
    hedging_count = sum(1 for h in hedging_words if h in text_lower)
    return {"has_hedging": hedging_count > 0, "hedging_count": hedging_count,
            "likely_inferred": hedging_count > 2}
feat: source distinction - I think vs I know (#793) SOUL.md compliance: 'Every claim I make comes from one of two places: a verified source I can point to, or my own pattern-matching.' scripts/source_distinction.py: SourceType enum: VERIFIED, INFERRED, STATED, UNKNOWN Claim dataclass with source_type, source_ref, confidence, hedging AnnotatedResponse with render() and format_for_display() Helper functions: verified(), inferred(), stated() source_distinction_check() - hedging word detection Tests: 9 passing 2026-04-17 01:44:06 -04:00			`#!/usr/bin/env python3`
			`# source_distinction.py - I think vs I know annotation system.`
			`# SOUL.md: "Every claim I make comes from one of two places: a verified source`
			`# I can point to, or my own pattern-matching."`
			`# Part of #793`

			`from dataclasses import dataclass, field`
			`from enum import Enum`
			`from typing import List, Optional`


			`class SourceType(Enum):`
			`VERIFIED = "verified"`
			`INFERRED = "inferred"`
			`STATED = "stated"`
			`UNKNOWN = "unknown"`


			`@dataclass`
			`class Claim:`
			`text: str`
			`source_type: SourceType`
			`source_ref: str = ""`
			`confidence: float = 0.0`
			`hedging: str = ""`


			`@dataclass`
			`class AnnotatedResponse:`
			`raw_text: str`
			`claims: List[Claim] = field(default_factory=list)`

			`def render(self):`
			`if not self.claims:`
			`return self.raw_text`
			`parts = []`
			`for claim in self.claims:`
			`if claim.source_type == SourceType.VERIFIED:`
			`prefix = "[verified: " + claim.source_ref + "]" if claim.source_ref else "[verified]"`
			`parts.append(claim.text + " " + prefix)`
			`elif claim.source_type == SourceType.INFERRED:`
			`hedge = claim.hedging or "I think"`
			`parts.append(hedge + " " + claim.text)`
			`elif claim.source_type == SourceType.STATED:`
			`parts.append(claim.text + " [you told me]")`
			`else:`
			`parts.append("I am not certain, but " + claim.text)`
			`return " ".join(parts)`

			`@property`
			`def verified_count(self):`
			`return sum(1 for c in self.claims if c.source_type == SourceType.VERIFIED)`

			`@property`
			`def inferred_count(self):`
			`return sum(1 for c in self.claims if c.source_type == SourceType.INFERRED)`


			`def verified(text, source, confidence=0.95):`
			`return Claim(text=text, source_type=SourceType.VERIFIED, source_ref=source, confidence=confidence)`

			`def inferred(text, hedging="I think", confidence=0.6):`
			`return Claim(text=text, source_type=SourceType.INFERRED, confidence=confidence, hedging=hedging)`

			`def stated(text):`
			`return Claim(text=text, source_type=SourceType.STATED, confidence=1.0)`


			`def annotate_response(raw_text, claims):`
			`return AnnotatedResponse(raw_text=raw_text, claims=claims)`


			`def format_for_display(response):`
			`lines = []`
			`for claim in response.claims:`
			`if claim.source_type == SourceType.VERIFIED:`
			`ref = " (" + claim.source_ref + ")" if claim.source_ref else ""`
			`lines.append(" = " + claim.text + ref)`
			`elif claim.source_type == SourceType.INFERRED:`
			`lines.append(" ~ " + claim.hedging + " " + claim.text)`
			`elif claim.source_type == SourceType.STATED:`
			`lines.append(" > " + claim.text)`
			`else:`
			`lines.append(" ? " + claim.text)`
			`if response.claims:`
			`v = response.verified_count`
			`i = response.inferred_count`
			`t = len(response.claims)`
			`lines.append("")`
			`lines.append(" [" + str(v) + " verified, " + str(i) + " inferred, " + str(t) + " total]")`
			`return "\n".join(lines)`


			`def source_distinction_check(text):`
			`hedging_words = ["i think", "i believe", "probably", "likely", "might",`
			`"it seems", "perhaps", "i am not sure", "i guess",`
			`"my understanding is", "i suspect"]`
			`text_lower = text.lower()`
			`hedging_count = sum(1 for h in hedging_words if h in text_lower)`
			`return {"has_hedging": hedging_count > 0, "hedging_count": hedging_count,`
			`"likely_inferred": hedging_count > 2}`