forked from Rockachopa/Timmy-time-dashboard
306 lines
10 KiB
Python
306 lines
10 KiB
Python
"""Tests for audit trail logging in timmy.session.
|
|
|
|
Issue #144: Add response audit trail logging.
|
|
SOUL.md requires: "Every response I generate should be logged locally with
|
|
the inputs that produced it, the sources I consulted, and the confidence
|
|
assessment I made."
|
|
"""
|
|
|
|
from unittest.mock import AsyncMock, MagicMock, patch
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_session_singleton():
|
|
"""Reset the module-level singleton between tests."""
|
|
import timmy.session as mod
|
|
|
|
mod._agent = None
|
|
yield
|
|
mod._agent = None
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# chat() audit trail tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_records_user_message_before_agent_call():
|
|
"""User message should be recorded before sending to agent."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir."))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat
|
|
|
|
await chat("Hi Timmy")
|
|
|
|
# Verify user message was recorded before agent call
|
|
mock_session_logger.record_message.assert_any_call("user", "Hi Timmy")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_records_timmy_response_after_agent_call():
|
|
"""Timmy response should be recorded after getting it from agent."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir."))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
patch("timmy.session.estimate_confidence", return_value=0.75),
|
|
):
|
|
from timmy.session import chat
|
|
|
|
await chat("Hi Timmy")
|
|
|
|
# Verify Timmy response was recorded after agent call with confidence
|
|
mock_session_logger.record_message.assert_any_call("timmy", "Hello, sir.", confidence=0.75)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_records_error_on_exception():
|
|
"""Errors should be recorded when exceptions occur."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(side_effect=Exception("LLM failure"))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat
|
|
|
|
await chat("Hi Timmy")
|
|
|
|
# Verify error was recorded
|
|
mock_session_logger.record_error.assert_called_once()
|
|
call_args = mock_session_logger.record_error.call_args
|
|
assert "LLM failure" in call_args[0][0]
|
|
assert call_args[1].get("context") == "chat"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_records_error_on_connection_error():
|
|
"""Connection errors should be recorded with context."""
|
|
import httpx
|
|
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(side_effect=httpx.ConnectError("Connection refused"))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat
|
|
|
|
await chat("Hi Timmy")
|
|
|
|
# Verify error was recorded
|
|
mock_session_logger.record_error.assert_called_once()
|
|
call_args = mock_session_logger.record_error.call_args
|
|
assert "Connection refused" in call_args[0][0]
|
|
assert call_args[1].get("context") == "chat"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_calls_flush_at_end():
|
|
"""flush() should be called at the end of successful chat."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir."))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat
|
|
|
|
await chat("Hi Timmy")
|
|
|
|
# Verify flush was called
|
|
mock_session_logger.flush.assert_called_once()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_calls_flush_on_error():
|
|
"""flush() should be called even when errors occur."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(side_effect=Exception("LLM failure"))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat
|
|
|
|
await chat("Hi Timmy")
|
|
|
|
# Verify flush was called even on error
|
|
mock_session_logger.flush.assert_called_once()
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# chat_with_tools() audit trail tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_with_tools_records_user_message():
|
|
"""User message should be recorded before sending to agent."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result here"))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat_with_tools
|
|
|
|
await chat_with_tools("Use a tool")
|
|
|
|
# Verify user message was recorded
|
|
mock_session_logger.record_message.assert_any_call("user", "Use a tool")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_with_tools_records_timmy_response():
|
|
"""Timmy response should be recorded after getting it from agent."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result here"))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
patch("timmy.session.estimate_confidence", return_value=0.75),
|
|
):
|
|
from timmy.session import chat_with_tools
|
|
|
|
await chat_with_tools("Use a tool")
|
|
|
|
# Verify Timmy response was recorded with confidence
|
|
mock_session_logger.record_message.assert_any_call("timmy", "Tool result here", confidence=0.75)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_with_tools_records_error():
|
|
"""Errors should be recorded when exceptions occur."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(side_effect=Exception("Tool failure"))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat_with_tools
|
|
|
|
await chat_with_tools("Use a tool")
|
|
|
|
# Verify error was recorded
|
|
mock_session_logger.record_error.assert_called_once()
|
|
call_args = mock_session_logger.record_error.call_args
|
|
assert "Tool failure" in call_args[0][0]
|
|
assert call_args[1].get("context") == "chat_with_tools"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_chat_with_tools_calls_flush():
|
|
"""flush() should be called at the end of chat_with_tools."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool result"))
|
|
mock_session_logger = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import chat_with_tools
|
|
|
|
await chat_with_tools("Use a tool")
|
|
|
|
# Verify flush was called
|
|
mock_session_logger.flush.assert_called_once()
|
|
|
|
|
|
# -----------------------------------------------------------------------------
|
|
# continue_chat() audit trail tests
|
|
# -----------------------------------------------------------------------------
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_continue_chat_records_timmy_response():
|
|
"""Timmy response should be recorded after continuing chat."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued result"))
|
|
mock_session_logger = MagicMock()
|
|
mock_run_output = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
patch("timmy.session.estimate_confidence", return_value=0.75),
|
|
):
|
|
from timmy.session import continue_chat
|
|
|
|
await continue_chat(mock_run_output)
|
|
|
|
# Verify Timmy response was recorded with confidence
|
|
mock_session_logger.record_message.assert_called_once_with(
|
|
"timmy", "Continued result", confidence=0.75
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_continue_chat_records_error():
|
|
"""Errors should be recorded when exceptions occur in continue_chat."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.acontinue_run = AsyncMock(side_effect=Exception("Continue failed"))
|
|
mock_session_logger = MagicMock()
|
|
mock_run_output = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import continue_chat
|
|
|
|
await continue_chat(mock_run_output)
|
|
|
|
# Verify error was recorded
|
|
mock_session_logger.record_error.assert_called_once()
|
|
call_args = mock_session_logger.record_error.call_args
|
|
assert "Continue failed" in call_args[0][0]
|
|
assert call_args[1].get("context") == "continue_chat"
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_continue_chat_calls_flush():
|
|
"""flush() should be called at the end of continue_chat."""
|
|
mock_agent = MagicMock()
|
|
mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued"))
|
|
mock_session_logger = MagicMock()
|
|
mock_run_output = MagicMock()
|
|
|
|
with (
|
|
patch("timmy.session._get_agent", return_value=mock_agent),
|
|
patch("timmy.session.get_session_logger", return_value=mock_session_logger),
|
|
):
|
|
from timmy.session import continue_chat
|
|
|
|
await continue_chat(mock_run_output)
|
|
|
|
# Verify flush was called
|
|
mock_session_logger.flush.assert_called_once()
|