From c1af9e3905db323cdfeadd149dd07b4a7f1e1e37 Mon Sep 17 00:00:00 2001 From: hermes Date: Wed, 18 Mar 2026 22:01:51 -0400 Subject: [PATCH] =?UTF-8?q?[loop-cycle-154]=20refactor:=20extract=20=5Fann?= =?UTF-8?q?otate=5Fconfidence=20helper=20=E2=80=94=20DRY=203x=20duplicatio?= =?UTF-8?q?n=20(#369)=20(#376)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/timmy/session.py | 35 +++++++++++++++++---------- tests/timmy/test_session.py | 47 +++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 13 deletions(-) diff --git a/src/timmy/session.py b/src/timmy/session.py index 45ff9e3..56be853 100644 --- a/src/timmy/session.py +++ b/src/timmy/session.py @@ -19,6 +19,23 @@ from timmy.session_logger import get_session_logger logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Confidence annotation (SOUL.md: visible uncertainty) +# --------------------------------------------------------------------------- + +_CONFIDENCE_THRESHOLD = 0.7 + + +def _annotate_confidence(text: str, confidence: float | None) -> str: + """Append a confidence tag when below threshold. + + SOUL.md: "When I am uncertain, I must say so in proportion to my uncertainty." + """ + if confidence is not None and confidence < _CONFIDENCE_THRESHOLD: + return text + f"\n\n[confidence: {confidence:.0%}]" + return text + + # Default session ID for the dashboard (stable across requests) _DEFAULT_SESSION_ID = "dashboard" @@ -113,9 +130,7 @@ async def chat(message: str, session_id: str | None = None) -> str: confidence = estimate_confidence(response_text) logger.debug("Response confidence: %.2f", confidence) - # Make confidence visible to user when below threshold (SOUL.md requirement) - if confidence is not None and confidence < 0.7: - response_text += f"\n\n[confidence: {confidence:.0%}]" + response_text = _annotate_confidence(response_text, confidence) # Record Timmy response after getting it session_logger.record_message("timmy", response_text, confidence=confidence) @@ -159,11 +174,8 @@ async def chat_with_tools(message: str, session_id: str | None = None): confidence = estimate_confidence(response_text) if response_text else None logger.debug("Response confidence: %.2f", confidence) - # Make confidence visible to user when below threshold (SOUL.md requirement) - if confidence is not None and confidence < 0.7: - response_text += f"\n\n[confidence: {confidence:.0%}]" - # Update the run_output content to reflect the modified response - run_output.content = response_text + response_text = _annotate_confidence(response_text, confidence) + run_output.content = response_text session_logger.record_message("timmy", response_text, confidence=confidence) session_logger.flush() @@ -205,11 +217,8 @@ async def continue_chat(run_output, session_id: str | None = None): confidence = estimate_confidence(response_text) if response_text else None logger.debug("Response confidence: %.2f", confidence) - # Make confidence visible to user when below threshold (SOUL.md requirement) - if confidence is not None and confidence < 0.7: - response_text += f"\n\n[confidence: {confidence:.0%}]" - # Update the result content to reflect the modified response - result.content = response_text + response_text = _annotate_confidence(response_text, confidence) + result.content = response_text session_logger.record_message("timmy", response_text, confidence=confidence) session_logger.flush() diff --git a/tests/timmy/test_session.py b/tests/timmy/test_session.py index a20a624..db5b45f 100644 --- a/tests/timmy/test_session.py +++ b/tests/timmy/test_session.py @@ -19,6 +19,53 @@ def _reset_session_singleton(): mod._agent = None +# --------------------------------------------------------------------------- +# _annotate_confidence() helper +# --------------------------------------------------------------------------- + + +class TestAnnotateConfidence: + """Unit tests for the DRY confidence annotation helper.""" + + def test_below_threshold_adds_tag(self): + from timmy.session import _annotate_confidence + + result = _annotate_confidence("Hello world", 0.55) + assert "[confidence: 55%]" in result + + def test_above_threshold_no_tag(self): + from timmy.session import _annotate_confidence + + result = _annotate_confidence("Hello world", 0.85) + assert "[confidence:" not in result + assert result == "Hello world" + + def test_at_threshold_no_tag(self): + from timmy.session import _annotate_confidence + + result = _annotate_confidence("Hello world", 0.7) + assert "[confidence:" not in result + + def test_none_confidence_no_tag(self): + from timmy.session import _annotate_confidence + + result = _annotate_confidence("Hello world", None) + assert "[confidence:" not in result + assert result == "Hello world" + + def test_zero_confidence_adds_tag(self): + from timmy.session import _annotate_confidence + + result = _annotate_confidence("Hello world", 0.0) + assert "[confidence: 0%]" in result + + def test_preserves_original_text(self): + from timmy.session import _annotate_confidence + + result = _annotate_confidence("Original text here", 0.3) + assert result.startswith("Original text here") + + # --------------------------------------------------------------------------- # chat() # ---------------------------------------------------------------------------