"""Tests for confidence estimation in src/timmy/confidence.py.""" from timmy.confidence import ( CERTAINTY_WORDS, HEDGING_WORDS, estimate_confidence, ) class TestEstimateConfidence: """Test cases for estimate_confidence function.""" def test_empty_string_returns_zero(self): """Empty string should return 0.0 confidence.""" assert estimate_confidence("") == 0.0 def test_whitespace_only_returns_zero(self): """Whitespace-only string should return 0.0 confidence.""" assert estimate_confidence(" ") == 0.0 def test_normal_factual_response(self): """Factual response should have at least moderate confidence.""" result = estimate_confidence("Paris is the capital of France.") assert 0.5 <= result <= 1.0 # 6 words doesn't get short-response boost, should be at base assert result >= 0.5 def test_i_dont_know_gives_very_low_confidence(self): """Direct admission of not knowing should give very low confidence.""" result = estimate_confidence("I don't know the answer to that.") assert result <= 0.2 def test_i_am_not_sure_gives_very_low_confidence(self): """Uncertainty admission should give very low confidence.""" result = estimate_confidence("I am not sure about this.") assert result <= 0.2 def test_hedging_words_reduce_confidence(self): """Hedging words should reduce confidence below base.""" base = estimate_confidence("This is the answer.") hedged = estimate_confidence("I think this is the answer.") assert hedged < base def test_maybe_reduces_confidence(self): """The word 'maybe' should reduce confidence.""" base = estimate_confidence("It will rain tomorrow.") hedged = estimate_confidence("Maybe it will rain tomorrow.") assert hedged < base def test_perhaps_reduces_confidence(self): """The word 'perhaps' should reduce confidence.""" base = estimate_confidence("The solution is correct.") hedged = estimate_confidence("Perhaps the solution is correct.") assert hedged < base def test_certainty_words_increase_confidence(self): """Certainty words should increase confidence above base.""" # Use longer sentence to avoid short-response boost confounding base = estimate_confidence("This is a longer sentence with more words.") certain = estimate_confidence( "I definitely know this is a longer sentence with more words." ) assert certain > base def test_definitely_increases_confidence(self): """The word 'definitely' should increase confidence.""" base = estimate_confidence("This will work.") certain = estimate_confidence("This will definitely work.") assert certain > base def test_question_reduces_confidence(self): """Questions in response should reduce confidence.""" base = estimate_confidence("The value is 10.") questioning = estimate_confidence("The value is 10?") assert questioning < base def test_multiple_hedging_words_compound(self): """Multiple hedging words should compound to lower confidence.""" text = "I think maybe this could be the answer, but I'm not sure." result = estimate_confidence(text) assert result < 0.4 def test_output_always_in_valid_range(self): """Output should always be clamped to [0.0, 1.0].""" # Test with text that has many hedging words heavily_hedged = ( "I think maybe perhaps possibly I believe this might could be approximately right." ) result = estimate_confidence(heavily_hedged) assert 0.0 <= result <= 1.0 # Test with text that has many certainty words heavily_certain = "I know definitely certainly absolutely without doubt the answer is specifically exactly correct." result = estimate_confidence(heavily_certain) assert 0.0 <= result <= 1.0 def test_hedging_words_list_populated(self): """HEDGING_WORDS list should contain expected hedging phrases.""" assert "i think" in HEDGING_WORDS assert "maybe" in HEDGING_WORDS assert "perhaps" in HEDGING_WORDS assert "not sure" in HEDGING_WORDS assert "possibly" in HEDGING_WORDS def test_certainty_words_list_populated(self): """CERTAINTY_WORDS list should contain expected certainty phrases.""" assert "i know" in CERTAINTY_WORDS assert "definitely" in CERTAINTY_WORDS assert "certainly" in CERTAINTY_WORDS assert "the answer is" in CERTAINTY_WORDS def test_certainty_and_hedging_cancel(self): """Mix of certainty and hedging should balance out near base.""" text = "I definitely think this is correct." result = estimate_confidence(text) # Should be near base (0.5) but hedging slightly stronger assert 0.3 <= result <= 0.7 def test_i_have_no_idea_gives_very_low_confidence(self): """I have no idea should give very low confidence.""" result = estimate_confidence("I have no idea what you're talking about.") assert result <= 0.2 def test_short_response_gets_boost(self): """Very short factual responses should get confidence boost.""" short = estimate_confidence("42") # Short factual should be higher due to boost assert short > 0.5