"""Tests for timmy.session — persistent chat session with response sanitization.""" from unittest.mock import AsyncMock, MagicMock, patch import pytest # --------------------------------------------------------------------------- # Fixtures # --------------------------------------------------------------------------- @pytest.fixture(autouse=True) def _reset_session_singleton(): """Reset the module-level singleton between tests.""" import timmy.session as mod mod._agent = None yield mod._agent = None # --------------------------------------------------------------------------- # _annotate_confidence() helper # --------------------------------------------------------------------------- class TestAnnotateConfidence: """Unit tests for the DRY confidence annotation helper.""" def test_below_threshold_adds_tag(self): from timmy.session import _annotate_confidence result = _annotate_confidence("Hello world", 0.55) assert "[confidence: 55%]" in result def test_above_threshold_no_tag(self): from timmy.session import _annotate_confidence result = _annotate_confidence("Hello world", 0.85) assert "[confidence:" not in result assert result == "Hello world" def test_at_threshold_no_tag(self): from timmy.session import _annotate_confidence result = _annotate_confidence("Hello world", 0.7) assert "[confidence:" not in result def test_none_confidence_no_tag(self): from timmy.session import _annotate_confidence result = _annotate_confidence("Hello world", None) assert "[confidence:" not in result assert result == "Hello world" def test_zero_confidence_adds_tag(self): from timmy.session import _annotate_confidence result = _annotate_confidence("Hello world", 0.0) assert "[confidence: 0%]" in result def test_preserves_original_text(self): from timmy.session import _annotate_confidence result = _annotate_confidence("Original text here", 0.3) assert result.startswith("Original text here") # --------------------------------------------------------------------------- # chat() # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_returns_string(): """chat() should return a plain string response.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir.")) with patch("timmy.session._get_agent", return_value=mock_agent): from timmy.session import chat result = await chat("Hi Timmy") assert isinstance(result, str) assert "Hello, sir." in result @pytest.mark.asyncio async def test_chat_passes_session_id(): """chat() should pass the session_id to agent.arun().""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="OK.")) with patch("timmy.session._get_agent", return_value=mock_agent): from timmy.session import chat await chat("test", session_id="my-session") _, kwargs = mock_agent.arun.call_args assert kwargs["session_id"] == "my-session" @pytest.mark.asyncio async def test_chat_uses_default_session_id(): """chat() should use 'dashboard' as the default session_id.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="OK.")) with patch("timmy.session._get_agent", return_value=mock_agent): from timmy.session import chat await chat("test") _, kwargs = mock_agent.arun.call_args assert kwargs["session_id"] == "dashboard" @pytest.mark.asyncio async def test_chat_singleton_agent_reused(): """Calling chat() multiple times should reuse the same agent instance.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="OK.")) with patch("timmy.agent.create_timmy", return_value=mock_agent) as mock_factory: from timmy.session import chat await chat("first message") await chat("second message") # Factory called only once (singleton) mock_factory.assert_called_once() @pytest.mark.asyncio async def test_chat_extracts_user_name(): """chat() should extract user name from message and persist to memory.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Nice to meet you!")) mock_mem = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.memory_system.memory_system", mock_mem), ): from timmy.session import chat await chat("my name is Alex") mock_mem.update_user_fact.assert_called_once_with("Name", "Alex") @pytest.mark.asyncio async def test_chat_graceful_degradation_on_memory_failure(): """chat() should still work if the conversation manager raises.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="I'm operational.")) with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.conversation.conversation_manager") as mock_cm, ): mock_cm.extract_user_name.side_effect = Exception("memory broken") from timmy.session import chat result = await chat("test message") assert "operational" in result # --------------------------------------------------------------------------- # _clean_response() # --------------------------------------------------------------------------- def test_clean_response_strips_json_tool_calls(): """JSON tool call blocks should be removed from response text.""" from timmy.session import _clean_response dirty = 'Here is the answer. {"name": "python", "parameters": {"code": "0.15 * 3847.23", "variable_to_return": "result"}} The result is 577.' clean = _clean_response(dirty) assert '{"name"' not in clean assert '"parameters"' not in clean assert "The result is 577." in clean def test_clean_response_strips_arguments_format(): """JSON tool calls using 'arguments' key (OpenAI format) should also be removed.""" from timmy.session import _clean_response dirty = 'Here is the result. {"name": "shell", "arguments": {"args": ["mkdir", "-p", "/tmp/test"]}} The directory was created.' clean = _clean_response(dirty) assert '{"name"' not in clean assert '"arguments"' not in clean assert "The directory was created." in clean def test_clean_response_strips_function_calls(): """Function-call-style text should be removed.""" from timmy.session import _clean_response dirty = 'I will search for that. memory_search(query="recall number") Found nothing.' clean = _clean_response(dirty) assert "memory_search(" not in clean assert "Found nothing." in clean def test_clean_response_strips_chain_of_thought(): """Chain-of-thought narration lines should be removed.""" from timmy.session import _clean_response dirty = """Since there's no direct answer in my vault or hot memory, I'll use memory_search. Using memory_search(query="what is special"), I found a context. Here's a possible response: 77 is special because it's a prime number.""" clean = _clean_response(dirty) assert "Since there's no" not in clean assert "Here's a possible" not in clean assert "77 is special" in clean def test_clean_response_preserves_normal_text(): """Normal text without tool artifacts should pass through unchanged.""" from timmy.session import _clean_response normal = "The number 77 is the sum of the first seven primes: 2+3+5+7+11+13+17." assert _clean_response(normal) == normal def test_clean_response_handles_empty_string(): """Empty string should be returned as-is.""" from timmy.session import _clean_response assert _clean_response("") == "" def test_clean_response_handles_none(): """None should be returned as-is.""" from timmy.session import _clean_response assert _clean_response(None) is None # --------------------------------------------------------------------------- # reset_session() # --------------------------------------------------------------------------- def test_reset_session_clears_context(): """reset_session() should clear the conversation context.""" with patch("timmy.conversation.conversation_manager") as mock_cm: from timmy.session import reset_session reset_session("test-session") mock_cm.clear_context.assert_called_once_with("test-session") # --------------------------------------------------------------------------- # Confidence estimation integration # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_passes_confidence_to_record_message(): """chat() should estimate confidence and pass it to record_message.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="This is a confident answer.")) with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.85) as mock_estimate, patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import chat await chat("test message") mock_estimate.assert_called_once_with("This is a confident answer.") # Check that record_message was called with confidence calls = mock_logger.record_message.call_args_list assert len(calls) >= 2 # user message + timmy response # Last call should be timmy response with confidence _, kwargs = calls[-1] assert kwargs.get("confidence") == 0.85 @pytest.mark.asyncio async def test_chat_with_tools_passes_confidence_to_record_message(): """chat_with_tools() should estimate confidence and pass it to record_message.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool response here.")) with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.72) as mock_estimate, patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import chat_with_tools await chat_with_tools("test message") mock_estimate.assert_called_once_with("Tool response here.") calls = mock_logger.record_message.call_args_list assert len(calls) >= 2 _, kwargs = calls[-1] assert kwargs.get("confidence") == 0.72 @pytest.mark.asyncio async def test_continue_chat_passes_confidence_to_record_message(): """continue_chat() should estimate confidence and pass it to record_message.""" mock_agent = MagicMock() mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued response.")) mock_run_output = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.91) as mock_estimate, patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import continue_chat await continue_chat(mock_run_output) mock_estimate.assert_called_once_with("Continued response.") calls = mock_logger.record_message.call_args_list assert len(calls) >= 1 # should have timmy response _, kwargs = calls[-1] assert kwargs.get("confidence") == 0.91 # --------------------------------------------------------------------------- # Low confidence visibility (SOUL.md requirement) # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_shows_confidence_when_low(): """chat() should append confidence indicator when confidence < 0.7.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="I think maybe this might work.")) with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.45), patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import chat result = await chat("test message") assert "[confidence: 45%]" in result @pytest.mark.asyncio async def test_chat_hides_confidence_when_high(): """chat() should NOT append confidence indicator when confidence >= 0.7.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="This is definitely correct.")) with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.85), patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import chat result = await chat("test message") assert "[confidence:" not in result assert result == "This is definitely correct." @pytest.mark.asyncio async def test_chat_with_tools_shows_confidence_when_low(): """chat_with_tools() should include confidence indicator in response when low.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="I'm not sure about this.")) with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.55), patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import chat_with_tools result = await chat_with_tools("test message") assert "[confidence: 55%]" in result.content @pytest.mark.asyncio async def test_chat_with_tools_hides_confidence_when_high(): """chat_with_tools() should NOT include confidence indicator when high.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="The answer is definitely 42.")) with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.92), patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import chat_with_tools result = await chat_with_tools("test message") assert "[confidence:" not in result.content assert result.content == "The answer is definitely 42." @pytest.mark.asyncio async def test_continue_chat_shows_confidence_when_low(): """continue_chat() should include confidence indicator when low.""" mock_agent = MagicMock() mock_result = MagicMock(content="Perhaps this is the right approach.") mock_agent.acontinue_run = AsyncMock(return_value=mock_result) mock_run_output = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.35), patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import continue_chat result = await continue_chat(mock_run_output) assert "[confidence: 35%]" in result.content @pytest.mark.asyncio async def test_continue_chat_hides_confidence_when_high(): """continue_chat() should NOT include confidence indicator when high.""" mock_agent = MagicMock() mock_result = MagicMock(content="This is absolutely the correct solution.") mock_agent.acontinue_run = AsyncMock(return_value=mock_result) mock_run_output = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.estimate_confidence", return_value=0.88), patch("timmy.session.get_session_logger") as mock_get_logger, ): mock_logger = MagicMock() mock_get_logger.return_value = mock_logger from timmy.session import continue_chat result = await continue_chat(mock_run_output) assert "[confidence:" not in result.content assert result.content == "This is absolutely the correct solution."