Files
Timmy-time-dashboard/tests/timmy/test_confidence.py
hermes f15ad3375a
All checks were successful
Tests / lint (push) Successful in 13s
Tests / test (push) Successful in 1m2s
[loop-cycle-47] feat: add confidence signaling module (#143) (#161)
2026-03-15 11:20:30 -04:00

129 lines
5.4 KiB
Python

"""Tests for confidence estimation in src/timmy/confidence.py."""
from timmy.confidence import (
CERTAINTY_WORDS,
HEDGING_WORDS,
estimate_confidence,
)
class TestEstimateConfidence:
"""Test cases for estimate_confidence function."""
def test_empty_string_returns_zero(self):
"""Empty string should return 0.0 confidence."""
assert estimate_confidence("") == 0.0
def test_whitespace_only_returns_zero(self):
"""Whitespace-only string should return 0.0 confidence."""
assert estimate_confidence(" ") == 0.0
def test_normal_factual_response(self):
"""Factual response should have at least moderate confidence."""
result = estimate_confidence("Paris is the capital of France.")
assert 0.5 <= result <= 1.0
# 6 words doesn't get short-response boost, should be at base
assert result >= 0.5
def test_i_dont_know_gives_very_low_confidence(self):
"""Direct admission of not knowing should give very low confidence."""
result = estimate_confidence("I don't know the answer to that.")
assert result <= 0.2
def test_i_am_not_sure_gives_very_low_confidence(self):
"""Uncertainty admission should give very low confidence."""
result = estimate_confidence("I am not sure about this.")
assert result <= 0.2
def test_hedging_words_reduce_confidence(self):
"""Hedging words should reduce confidence below base."""
base = estimate_confidence("This is the answer.")
hedged = estimate_confidence("I think this is the answer.")
assert hedged < base
def test_maybe_reduces_confidence(self):
"""The word 'maybe' should reduce confidence."""
base = estimate_confidence("It will rain tomorrow.")
hedged = estimate_confidence("Maybe it will rain tomorrow.")
assert hedged < base
def test_perhaps_reduces_confidence(self):
"""The word 'perhaps' should reduce confidence."""
base = estimate_confidence("The solution is correct.")
hedged = estimate_confidence("Perhaps the solution is correct.")
assert hedged < base
def test_certainty_words_increase_confidence(self):
"""Certainty words should increase confidence above base."""
# Use longer sentence to avoid short-response boost confounding
base = estimate_confidence("This is a longer sentence with more words.")
certain = estimate_confidence(
"I definitely know this is a longer sentence with more words."
)
assert certain > base
def test_definitely_increases_confidence(self):
"""The word 'definitely' should increase confidence."""
base = estimate_confidence("This will work.")
certain = estimate_confidence("This will definitely work.")
assert certain > base
def test_question_reduces_confidence(self):
"""Questions in response should reduce confidence."""
base = estimate_confidence("The value is 10.")
questioning = estimate_confidence("The value is 10?")
assert questioning < base
def test_multiple_hedging_words_compound(self):
"""Multiple hedging words should compound to lower confidence."""
text = "I think maybe this could be the answer, but I'm not sure."
result = estimate_confidence(text)
assert result < 0.4
def test_output_always_in_valid_range(self):
"""Output should always be clamped to [0.0, 1.0]."""
# Test with text that has many hedging words
heavily_hedged = (
"I think maybe perhaps possibly I believe this might could be approximately right."
)
result = estimate_confidence(heavily_hedged)
assert 0.0 <= result <= 1.0
# Test with text that has many certainty words
heavily_certain = "I know definitely certainly absolutely without doubt the answer is specifically exactly correct."
result = estimate_confidence(heavily_certain)
assert 0.0 <= result <= 1.0
def test_hedging_words_list_populated(self):
"""HEDGING_WORDS list should contain expected hedging phrases."""
assert "i think" in HEDGING_WORDS
assert "maybe" in HEDGING_WORDS
assert "perhaps" in HEDGING_WORDS
assert "not sure" in HEDGING_WORDS
assert "possibly" in HEDGING_WORDS
def test_certainty_words_list_populated(self):
"""CERTAINTY_WORDS list should contain expected certainty phrases."""
assert "i know" in CERTAINTY_WORDS
assert "definitely" in CERTAINTY_WORDS
assert "certainly" in CERTAINTY_WORDS
assert "the answer is" in CERTAINTY_WORDS
def test_certainty_and_hedging_cancel(self):
"""Mix of certainty and hedging should balance out near base."""
text = "I definitely think this is correct."
result = estimate_confidence(text)
# Should be near base (0.5) but hedging slightly stronger
assert 0.3 <= result <= 0.7
def test_i_have_no_idea_gives_very_low_confidence(self):
"""I have no idea should give very low confidence."""
result = estimate_confidence("I have no idea what you're talking about.")
assert result <= 0.2
def test_short_response_gets_boost(self):
"""Very short factual responses should get confidence boost."""
short = estimate_confidence("42")
# Short factual should be higher due to boost
assert short > 0.5