diff --git a/src/timmy/session.py b/src/timmy/session.py index cfd1dce..e58bfe8 100644 --- a/src/timmy/session.py +++ b/src/timmy/session.py @@ -13,6 +13,8 @@ import re import httpx +from timmy.session_logger import get_session_logger + logger = logging.getLogger(__name__) # Default session ID for the dashboard (stable across requests) @@ -77,6 +79,10 @@ async def chat(message: str, session_id: str | None = None) -> str: """ sid = session_id or _DEFAULT_SESSION_ID agent = _get_agent() + session_logger = get_session_logger() + + # Record user message before sending to agent + session_logger.record_message("user", message) # Pre-processing: extract user facts _extract_facts(message) @@ -87,14 +93,24 @@ async def chat(message: str, session_id: str | None = None) -> str: response_text = run.content if hasattr(run, "content") else str(run) except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc: logger.error("Ollama disconnected: %s", exc) + session_logger.record_error(str(exc), context="chat") + session_logger.flush() return "Ollama appears to be disconnected. Check that ollama serve is running." except Exception as exc: logger.error("Session: agent.arun() failed: %s", exc) + session_logger.record_error(str(exc), context="chat") + session_logger.flush() return "I'm having trouble reaching my language model right now. Please try again shortly." # Post-processing: clean up any leaked tool calls or chain-of-thought response_text = _clean_response(response_text) + # Record Timmy response after getting it + session_logger.record_message("timmy", response_text) + + # Flush session logs to disk + session_logger.flush() + return response_text @@ -112,17 +128,31 @@ async def chat_with_tools(message: str, session_id: str | None = None): """ sid = session_id or _DEFAULT_SESSION_ID agent = _get_agent() + session_logger = get_session_logger() + + # Record user message before sending to agent + session_logger.record_message("user", message) + _extract_facts(message) try: - return await agent.arun(message, stream=False, session_id=sid) + run_output = await agent.arun(message, stream=False, session_id=sid) + # Record Timmy response after getting it + response_text = run_output.content if hasattr(run_output, "content") and run_output.content else "" + session_logger.record_message("timmy", response_text) + session_logger.flush() + return run_output except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc: logger.error("Ollama disconnected: %s", exc) + session_logger.record_error(str(exc), context="chat_with_tools") + session_logger.flush() return _ErrorRunOutput( "Ollama appears to be disconnected. Check that ollama serve is running." ) except Exception as exc: logger.error("Session: agent.arun() failed: %s", exc) + session_logger.record_error(str(exc), context="chat_with_tools") + session_logger.flush() # Return a duck-typed object that callers can handle uniformly return _ErrorRunOutput( "I'm having trouble reaching my language model right now. Please try again shortly." @@ -140,16 +170,26 @@ async def continue_chat(run_output, session_id: str | None = None): """ sid = session_id or _DEFAULT_SESSION_ID agent = _get_agent() + session_logger = get_session_logger() try: - return await agent.acontinue_run(run_response=run_output, stream=False, session_id=sid) + result = await agent.acontinue_run(run_response=run_output, stream=False, session_id=sid) + # Record Timmy response after getting it + response_text = result.content if hasattr(result, "content") and result.content else "" + session_logger.record_message("timmy", response_text) + session_logger.flush() + return result except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc: logger.error("Ollama disconnected: %s", exc) + session_logger.record_error(str(exc), context="continue_chat") + session_logger.flush() return _ErrorRunOutput( "Ollama appears to be disconnected. Check that ollama serve is running." ) except Exception as exc: logger.error("Session: agent.acontinue_run() failed: %s", exc) + session_logger.record_error(str(exc), context="continue_chat") + session_logger.flush() return _ErrorRunOutput(f"Error continuing run: {exc}") diff --git a/tests/timmy/test_audit_trail.py b/tests/timmy/test_audit_trail.py new file mode 100644 index 0000000..5cfb506 --- /dev/null +++ b/tests/timmy/test_audit_trail.py @@ -0,0 +1,300 @@ +"""Tests for audit trail logging in timmy.session. + +Issue #144: Add response audit trail logging. +SOUL.md requires: "Every response I generate should be logged locally with +the inputs that produced it, the sources I consulted, and the confidence +assessment I made." +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + + +@pytest.fixture(autouse=True) +def _reset_session_singleton(): + """Reset the module-level singleton between tests.""" + import timmy.session as mod + + mod._agent = None + yield + mod._agent = None + + +# ----------------------------------------------------------------------------- +# chat() audit trail tests +# ----------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_chat_records_user_message_before_agent_call(): + """User message should be recorded before sending to agent.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir.")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat + + await chat("Hi Timmy") + + # Verify user message was recorded before agent call + mock_session_logger.record_message.assert_any_call("user", "Hi Timmy") + + +@pytest.mark.asyncio +async def test_chat_records_timmy_response_after_agent_call(): + """Timmy response should be recorded after getting it from agent.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir.")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat + + await chat("Hi Timmy") + + # Verify Timmy response was recorded after agent call + mock_session_logger.record_message.assert_any_call("timmy", "Hello, sir.") + + +@pytest.mark.asyncio +async def test_chat_records_error_on_exception(): + """Errors should be recorded when exceptions occur.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(side_effect=Exception("LLM failure")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat + + result = await chat("Hi Timmy") + + # Verify error was recorded + mock_session_logger.record_error.assert_called_once() + call_args = mock_session_logger.record_error.call_args + assert "LLM failure" in call_args[0][0] + assert call_args[1].get("context") == "chat" + + +@pytest.mark.asyncio +async def test_chat_records_error_on_connection_error(): + """Connection errors should be recorded with context.""" + import httpx + + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(side_effect=httpx.ConnectError("Connection refused")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat + + result = await chat("Hi Timmy") + + # Verify error was recorded + mock_session_logger.record_error.assert_called_once() + call_args = mock_session_logger.record_error.call_args + assert "Connection refused" in call_args[0][0] + assert call_args[1].get("context") == "chat" + + +@pytest.mark.asyncio +async def test_chat_calls_flush_at_end(): + """flush() should be called at the end of successful chat.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir.")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat + + await chat("Hi Timmy") + + # Verify flush was called + mock_session_logger.flush.assert_called_once() + + +@pytest.mark.asyncio +async def test_chat_calls_flush_on_error(): + """flush() should be called even when errors occur.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(side_effect=Exception("LLM failure")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat + + await chat("Hi Timmy") + + # Verify flush was called even on error + mock_session_logger.flush.assert_called_once() + + +# ----------------------------------------------------------------------------- +# chat_with_tools() audit trail tests +# ----------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_chat_with_tools_records_user_message(): + """User message should be recorded before sending to agent.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result here")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat_with_tools + + await chat_with_tools("Use a tool") + + # Verify user message was recorded + mock_session_logger.record_message.assert_any_call("user", "Use a tool") + + +@pytest.mark.asyncio +async def test_chat_with_tools_records_timmy_response(): + """Timmy response should be recorded after getting it from agent.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result here")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat_with_tools + + await chat_with_tools("Use a tool") + + # Verify Timmy response was recorded + mock_session_logger.record_message.assert_any_call("timmy", "Tool result here") + + +@pytest.mark.asyncio +async def test_chat_with_tools_records_error(): + """Errors should be recorded when exceptions occur.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(side_effect=Exception("Tool failure")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat_with_tools + + result = await chat_with_tools("Use a tool") + + # Verify error was recorded + mock_session_logger.record_error.assert_called_once() + call_args = mock_session_logger.record_error.call_args + assert "Tool failure" in call_args[0][0] + assert call_args[1].get("context") == "chat_with_tools" + + +@pytest.mark.asyncio +async def test_chat_with_tools_calls_flush(): + """flush() should be called at the end of chat_with_tools.""" + mock_agent = MagicMock() + mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result")) + mock_session_logger = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import chat_with_tools + + await chat_with_tools("Use a tool") + + # Verify flush was called + mock_session_logger.flush.assert_called_once() + + +# ----------------------------------------------------------------------------- +# continue_chat() audit trail tests +# ----------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_continue_chat_records_timmy_response(): + """Timmy response should be recorded after continuing chat.""" + mock_agent = MagicMock() + mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued result")) + mock_session_logger = MagicMock() + mock_run_output = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import continue_chat + + await continue_chat(mock_run_output) + + # Verify Timmy response was recorded + mock_session_logger.record_message.assert_called_once_with("timmy", "Continued result") + + +@pytest.mark.asyncio +async def test_continue_chat_records_error(): + """Errors should be recorded when exceptions occur in continue_chat.""" + mock_agent = MagicMock() + mock_agent.acontinue_run = AsyncMock(side_effect=Exception("Continue failed")) + mock_session_logger = MagicMock() + mock_run_output = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import continue_chat + + result = await continue_chat(mock_run_output) + + # Verify error was recorded + mock_session_logger.record_error.assert_called_once() + call_args = mock_session_logger.record_error.call_args + assert "Continue failed" in call_args[0][0] + assert call_args[1].get("context") == "continue_chat" + + +@pytest.mark.asyncio +async def test_continue_chat_calls_flush(): + """flush() should be called at the end of continue_chat.""" + mock_agent = MagicMock() + mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued")) + mock_session_logger = MagicMock() + mock_run_output = MagicMock() + + with ( + patch("timmy.session._get_agent", return_value=mock_agent), + patch("timmy.session.get_session_logger", return_value=mock_session_logger), + ): + from timmy.session import continue_chat + + await continue_chat(mock_run_output) + + # Verify flush was called + mock_session_logger.flush.assert_called_once()