"""Tests for audit trail logging in timmy.session. Issue #144: Add response audit trail logging. SOUL.md requires: "Every response I generate should be logged locally with the inputs that produced it, the sources I consulted, and the confidence assessment I made." """ from unittest.mock import AsyncMock, MagicMock, patch import pytest @pytest.fixture(autouse=True) def _reset_session_singleton(): """Reset the module-level singleton between tests.""" import timmy.session as mod mod._agent = None yield mod._agent = None # ----------------------------------------------------------------------------- # chat() audit trail tests # ----------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_records_user_message_before_agent_call(): """User message should be recorded before sending to agent.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir.")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat await chat("Hi Timmy") # Verify user message was recorded before agent call mock_session_logger.record_message.assert_any_call("user", "Hi Timmy") @pytest.mark.asyncio async def test_chat_records_timmy_response_after_agent_call(): """Timmy response should be recorded after getting it from agent.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir.")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), patch("timmy.session.estimate_confidence", return_value=0.75), ): from timmy.session import chat await chat("Hi Timmy") # Verify Timmy response was recorded after agent call with confidence mock_session_logger.record_message.assert_any_call("timmy", "Hello, sir.", confidence=0.75) @pytest.mark.asyncio async def test_chat_records_error_on_exception(): """Errors should be recorded when exceptions occur.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(side_effect=Exception("LLM failure")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat await chat("Hi Timmy") # Verify error was recorded mock_session_logger.record_error.assert_called_once() call_args = mock_session_logger.record_error.call_args assert "LLM failure" in call_args[0][0] assert call_args[1].get("context") == "chat" @pytest.mark.asyncio async def test_chat_records_error_on_connection_error(): """Connection errors should be recorded with context.""" import httpx mock_agent = MagicMock() mock_agent.arun = AsyncMock(side_effect=httpx.ConnectError("Connection refused")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat await chat("Hi Timmy") # Verify error was recorded mock_session_logger.record_error.assert_called_once() call_args = mock_session_logger.record_error.call_args assert "Connection refused" in call_args[0][0] assert call_args[1].get("context") == "chat" @pytest.mark.asyncio async def test_chat_calls_flush_at_end(): """flush() should be called at the end of successful chat.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir.")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat await chat("Hi Timmy") # Verify flush was called mock_session_logger.flush.assert_called_once() @pytest.mark.asyncio async def test_chat_calls_flush_on_error(): """flush() should be called even when errors occur.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(side_effect=Exception("LLM failure")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat await chat("Hi Timmy") # Verify flush was called even on error mock_session_logger.flush.assert_called_once() # ----------------------------------------------------------------------------- # chat_with_tools() audit trail tests # ----------------------------------------------------------------------------- @pytest.mark.asyncio async def test_chat_with_tools_records_user_message(): """User message should be recorded before sending to agent.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result here")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat_with_tools await chat_with_tools("Use a tool") # Verify user message was recorded mock_session_logger.record_message.assert_any_call("user", "Use a tool") @pytest.mark.asyncio async def test_chat_with_tools_records_timmy_response(): """Timmy response should be recorded after getting it from agent.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result here")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), patch("timmy.session.estimate_confidence", return_value=0.75), ): from timmy.session import chat_with_tools await chat_with_tools("Use a tool") # Verify Timmy response was recorded with confidence mock_session_logger.record_message.assert_any_call("timmy", "Tool result here", confidence=0.75) @pytest.mark.asyncio async def test_chat_with_tools_records_error(): """Errors should be recorded when exceptions occur.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(side_effect=Exception("Tool failure")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat_with_tools await chat_with_tools("Use a tool") # Verify error was recorded mock_session_logger.record_error.assert_called_once() call_args = mock_session_logger.record_error.call_args assert "Tool failure" in call_args[0][0] assert call_args[1].get("context") == "chat_with_tools" @pytest.mark.asyncio async def test_chat_with_tools_calls_flush(): """flush() should be called at the end of chat_with_tools.""" mock_agent = MagicMock() mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result")) mock_session_logger = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import chat_with_tools await chat_with_tools("Use a tool") # Verify flush was called mock_session_logger.flush.assert_called_once() # ----------------------------------------------------------------------------- # continue_chat() audit trail tests # ----------------------------------------------------------------------------- @pytest.mark.asyncio async def test_continue_chat_records_timmy_response(): """Timmy response should be recorded after continuing chat.""" mock_agent = MagicMock() mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued result")) mock_session_logger = MagicMock() mock_run_output = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), patch("timmy.session.estimate_confidence", return_value=0.75), ): from timmy.session import continue_chat await continue_chat(mock_run_output) # Verify Timmy response was recorded with confidence mock_session_logger.record_message.assert_called_once_with( "timmy", "Continued result", confidence=0.75 ) @pytest.mark.asyncio async def test_continue_chat_records_error(): """Errors should be recorded when exceptions occur in continue_chat.""" mock_agent = MagicMock() mock_agent.acontinue_run = AsyncMock(side_effect=Exception("Continue failed")) mock_session_logger = MagicMock() mock_run_output = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import continue_chat await continue_chat(mock_run_output) # Verify error was recorded mock_session_logger.record_error.assert_called_once() call_args = mock_session_logger.record_error.call_args assert "Continue failed" in call_args[0][0] assert call_args[1].get("context") == "continue_chat" @pytest.mark.asyncio async def test_continue_chat_calls_flush(): """flush() should be called at the end of continue_chat.""" mock_agent = MagicMock() mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued")) mock_session_logger = MagicMock() mock_run_output = MagicMock() with ( patch("timmy.session._get_agent", return_value=mock_agent), patch("timmy.session.get_session_logger", return_value=mock_session_logger), ): from timmy.session import continue_chat await continue_chat(mock_run_output) # Verify flush was called mock_session_logger.flush.assert_called_once()