Timmy-time-dashboard/tests/timmy/test_confidence.py

"""Tests for confidence estimation in src/timmy/confidence.py."""

from timmy.confidence import (
    CERTAINTY_WORDS,
    HEDGING_WORDS,
    estimate_confidence,
)


class TestEstimateConfidence:
    """Test cases for estimate_confidence function."""

    def test_empty_string_returns_zero(self):
        """Empty string should return 0.0 confidence."""
        assert estimate_confidence("") == 0.0

    def test_whitespace_only_returns_zero(self):
        """Whitespace-only string should return 0.0 confidence."""
        assert estimate_confidence("   ") == 0.0

    def test_normal_factual_response(self):
        """Factual response should have at least moderate confidence."""
        result = estimate_confidence("Paris is the capital of France.")
        assert 0.5 <= result <= 1.0
        # 6 words doesn't get short-response boost, should be at base
        assert result >= 0.5

    def test_i_dont_know_gives_very_low_confidence(self):
        """Direct admission of not knowing should give very low confidence."""
        result = estimate_confidence("I don't know the answer to that.")
        assert result <= 0.2

    def test_i_am_not_sure_gives_very_low_confidence(self):
        """Uncertainty admission should give very low confidence."""
        result = estimate_confidence("I am not sure about this.")
        assert result <= 0.2

    def test_hedging_words_reduce_confidence(self):
        """Hedging words should reduce confidence below base."""
        base = estimate_confidence("This is the answer.")
        hedged = estimate_confidence("I think this is the answer.")
        assert hedged < base

    def test_maybe_reduces_confidence(self):
        """The word 'maybe' should reduce confidence."""
        base = estimate_confidence("It will rain tomorrow.")
        hedged = estimate_confidence("Maybe it will rain tomorrow.")
        assert hedged < base

    def test_perhaps_reduces_confidence(self):
        """The word 'perhaps' should reduce confidence."""
        base = estimate_confidence("The solution is correct.")
        hedged = estimate_confidence("Perhaps the solution is correct.")
        assert hedged < base

    def test_certainty_words_increase_confidence(self):
        """Certainty words should increase confidence above base."""
        # Use longer sentence to avoid short-response boost confounding
        base = estimate_confidence("This is a longer sentence with more words.")
        certain = estimate_confidence(
            "I definitely know this is a longer sentence with more words."
        )
        assert certain > base

    def test_definitely_increases_confidence(self):
        """The word 'definitely' should increase confidence."""
        base = estimate_confidence("This will work.")
        certain = estimate_confidence("This will definitely work.")
        assert certain > base

    def test_question_reduces_confidence(self):
        """Questions in response should reduce confidence."""
        base = estimate_confidence("The value is 10.")
        questioning = estimate_confidence("The value is 10?")
        assert questioning < base

    def test_multiple_hedging_words_compound(self):
        """Multiple hedging words should compound to lower confidence."""
        text = "I think maybe this could be the answer, but I'm not sure."
        result = estimate_confidence(text)
        assert result < 0.4

    def test_output_always_in_valid_range(self):
        """Output should always be clamped to [0.0, 1.0]."""
        # Test with text that has many hedging words
        heavily_hedged = (
            "I think maybe perhaps possibly I believe this might could be approximately right."
        )
        result = estimate_confidence(heavily_hedged)
        assert 0.0 <= result <= 1.0

        # Test with text that has many certainty words
        heavily_certain = "I know definitely certainly absolutely without doubt the answer is specifically exactly correct."
        result = estimate_confidence(heavily_certain)
        assert 0.0 <= result <= 1.0

    def test_hedging_words_list_populated(self):
        """HEDGING_WORDS list should contain expected hedging phrases."""
        assert "i think" in HEDGING_WORDS
        assert "maybe" in HEDGING_WORDS
        assert "perhaps" in HEDGING_WORDS
        assert "not sure" in HEDGING_WORDS
        assert "possibly" in HEDGING_WORDS

    def test_certainty_words_list_populated(self):
        """CERTAINTY_WORDS list should contain expected certainty phrases."""
        assert "i know" in CERTAINTY_WORDS
        assert "definitely" in CERTAINTY_WORDS
        assert "certainly" in CERTAINTY_WORDS
        assert "the answer is" in CERTAINTY_WORDS

    def test_certainty_and_hedging_cancel(self):
        """Mix of certainty and hedging should balance out near base."""
        text = "I definitely think this is correct."
        result = estimate_confidence(text)
        # Should be near base (0.5) but hedging slightly stronger
        assert 0.3 <= result <= 0.7

    def test_i_have_no_idea_gives_very_low_confidence(self):
        """I have no idea should give very low confidence."""
        result = estimate_confidence("I have no idea what you're talking about.")
        assert result <= 0.2

    def test_short_response_gets_boost(self):
        """Very short factual responses should get confidence boost."""
        short = estimate_confidence("42")
        # Short factual should be higher due to boost
        assert short > 0.5