Timmy-time-dashboard/src/timmy/confidence.py

"""Confidence estimation for Timmy's responses.

Implements SOUL.md requirement: "When I am uncertain, I must say so in
proportion to my uncertainty."

This module provides heuristics to estimate confidence based on linguistic
signals in the response text. It measures uncertainty without modifying
the response content.
"""

import re

# Hedging words that indicate uncertainty
HEDGING_WORDS = [
    "i think",
    "maybe",
    "perhaps",
    "not sure",
    "might",
    "could be",
    "possibly",
    "i believe",
    "approximately",
    "roughly",
    "probably",
    "likely",
    "seems",
    "appears",
    "suggests",
    "i guess",
    "i suppose",
    "sort of",
    "kind of",
    "somewhat",
    "fairly",
    "relatively",
    "i'm not certain",
    "i am not certain",
    "uncertain",
    "unclear",
]

# Certainty words that indicate confidence
CERTAINTY_WORDS = [
    "i know",
    "definitely",
    "certainly",
    "the answer is",
    "specifically",
    "exactly",
    "absolutely",
    "without doubt",
    "i am certain",
    "i'm certain",
    "it is true that",
    "fact is",
    "in fact",
    "indeed",
    "undoubtedly",
    "clearly",
    "obviously",
    "conclusively",
]

# Very low confidence indicators (direct admissions of ignorance)
LOW_CONFIDENCE_PATTERNS = [
    r"i\s+(?:don't|do not)\s+know",
    r"i\s+(?:am|I'm|i'm)\s+(?:not\s+sure|unsure)",
    r"i\s+have\s+no\s+(?:idea|clue)",
    r"i\s+cannot\s+(?:say|tell|answer)",
    r"i\s+can't\s+(?:say|tell|answer)",
]


def estimate_confidence(text: str) -> float:
    """Estimate confidence level of a response based on linguistic signals.

    Analyzes the text for hedging words (reducing confidence) and certainty
    words (increasing confidence). Returns a score between 0.0 and 1.0.

    Args:
        text: The response text to analyze.

    Returns:
        A float between 0.0 (very uncertain) and 1.0 (very confident).
    """
    if not text or not text.strip():
        return 0.0

    text_lower = text.lower().strip()
    confidence = 0.5  # Start with neutral confidence

    # Check for direct admissions of ignorance (very low confidence)
    for pattern in LOW_CONFIDENCE_PATTERNS:
        if re.search(pattern, text_lower):
            # Direct admission of not knowing - very low confidence
            confidence = 0.15
            break

    # Count hedging words (reduce confidence)
    hedging_count = 0
    for hedge in HEDGING_WORDS:
        if hedge in text_lower:
            hedging_count += 1

    # Count certainty words (increase confidence)
    certainty_count = 0
    for certain in CERTAINTY_WORDS:
        if certain in text_lower:
            certainty_count += 1

    # Adjust confidence based on word counts
    # Each hedging word reduces confidence by 0.1
    # Each certainty word increases confidence by 0.1
    confidence -= hedging_count * 0.1
    confidence += certainty_count * 0.1

    # Short factual answers get a small boost
    word_count = len(text.split())
    if word_count <= 5 and confidence > 0.3:
        confidence += 0.1

    # Questions in response indicate uncertainty
    if "?" in text:
        confidence -= 0.15

    # Clamp to valid range
    return max(0.0, min(1.0, confidence))