forked from Rockachopa/Timmy-time-dashboard
This commit is contained in:
@@ -19,6 +19,23 @@ from timmy.session_logger import get_session_logger
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Confidence annotation (SOUL.md: visible uncertainty)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_CONFIDENCE_THRESHOLD = 0.7
|
||||
|
||||
|
||||
def _annotate_confidence(text: str, confidence: float | None) -> str:
|
||||
"""Append a confidence tag when below threshold.
|
||||
|
||||
SOUL.md: "When I am uncertain, I must say so in proportion to my uncertainty."
|
||||
"""
|
||||
if confidence is not None and confidence < _CONFIDENCE_THRESHOLD:
|
||||
return text + f"\n\n[confidence: {confidence:.0%}]"
|
||||
return text
|
||||
|
||||
|
||||
# Default session ID for the dashboard (stable across requests)
|
||||
_DEFAULT_SESSION_ID = "dashboard"
|
||||
|
||||
@@ -113,9 +130,7 @@ async def chat(message: str, session_id: str | None = None) -> str:
|
||||
confidence = estimate_confidence(response_text)
|
||||
logger.debug("Response confidence: %.2f", confidence)
|
||||
|
||||
# Make confidence visible to user when below threshold (SOUL.md requirement)
|
||||
if confidence is not None and confidence < 0.7:
|
||||
response_text += f"\n\n[confidence: {confidence:.0%}]"
|
||||
response_text = _annotate_confidence(response_text, confidence)
|
||||
|
||||
# Record Timmy response after getting it
|
||||
session_logger.record_message("timmy", response_text, confidence=confidence)
|
||||
@@ -159,11 +174,8 @@ async def chat_with_tools(message: str, session_id: str | None = None):
|
||||
confidence = estimate_confidence(response_text) if response_text else None
|
||||
logger.debug("Response confidence: %.2f", confidence)
|
||||
|
||||
# Make confidence visible to user when below threshold (SOUL.md requirement)
|
||||
if confidence is not None and confidence < 0.7:
|
||||
response_text += f"\n\n[confidence: {confidence:.0%}]"
|
||||
# Update the run_output content to reflect the modified response
|
||||
run_output.content = response_text
|
||||
response_text = _annotate_confidence(response_text, confidence)
|
||||
run_output.content = response_text
|
||||
|
||||
session_logger.record_message("timmy", response_text, confidence=confidence)
|
||||
session_logger.flush()
|
||||
@@ -205,11 +217,8 @@ async def continue_chat(run_output, session_id: str | None = None):
|
||||
confidence = estimate_confidence(response_text) if response_text else None
|
||||
logger.debug("Response confidence: %.2f", confidence)
|
||||
|
||||
# Make confidence visible to user when below threshold (SOUL.md requirement)
|
||||
if confidence is not None and confidence < 0.7:
|
||||
response_text += f"\n\n[confidence: {confidence:.0%}]"
|
||||
# Update the result content to reflect the modified response
|
||||
result.content = response_text
|
||||
response_text = _annotate_confidence(response_text, confidence)
|
||||
result.content = response_text
|
||||
|
||||
session_logger.record_message("timmy", response_text, confidence=confidence)
|
||||
session_logger.flush()
|
||||
|
||||
@@ -19,6 +19,53 @@ def _reset_session_singleton():
|
||||
mod._agent = None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _annotate_confidence() helper
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestAnnotateConfidence:
|
||||
"""Unit tests for the DRY confidence annotation helper."""
|
||||
|
||||
def test_below_threshold_adds_tag(self):
|
||||
from timmy.session import _annotate_confidence
|
||||
|
||||
result = _annotate_confidence("Hello world", 0.55)
|
||||
assert "[confidence: 55%]" in result
|
||||
|
||||
def test_above_threshold_no_tag(self):
|
||||
from timmy.session import _annotate_confidence
|
||||
|
||||
result = _annotate_confidence("Hello world", 0.85)
|
||||
assert "[confidence:" not in result
|
||||
assert result == "Hello world"
|
||||
|
||||
def test_at_threshold_no_tag(self):
|
||||
from timmy.session import _annotate_confidence
|
||||
|
||||
result = _annotate_confidence("Hello world", 0.7)
|
||||
assert "[confidence:" not in result
|
||||
|
||||
def test_none_confidence_no_tag(self):
|
||||
from timmy.session import _annotate_confidence
|
||||
|
||||
result = _annotate_confidence("Hello world", None)
|
||||
assert "[confidence:" not in result
|
||||
assert result == "Hello world"
|
||||
|
||||
def test_zero_confidence_adds_tag(self):
|
||||
from timmy.session import _annotate_confidence
|
||||
|
||||
result = _annotate_confidence("Hello world", 0.0)
|
||||
assert "[confidence: 0%]" in result
|
||||
|
||||
def test_preserves_original_text(self):
|
||||
from timmy.session import _annotate_confidence
|
||||
|
||||
result = _annotate_confidence("Original text here", 0.3)
|
||||
assert result.startswith("Original text here")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# chat()
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
Reference in New Issue
Block a user