Timmy-time-dashboard/tests/timmy/test_session.py

"""Tests for timmy.session — persistent chat session with response sanitization."""

from unittest.mock import AsyncMock, MagicMock, patch

import pytest

# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------


@pytest.fixture(autouse=True)
def _reset_session_singleton():
    """Reset the module-level singleton between tests."""
    import timmy.session as mod

    mod._agent = None
    yield
    mod._agent = None


# ---------------------------------------------------------------------------
# _annotate_confidence() helper
# ---------------------------------------------------------------------------


class TestAnnotateConfidence:
    """Unit tests for the DRY confidence annotation helper."""

    def test_below_threshold_adds_tag(self):
        from timmy.session import _annotate_confidence

        result = _annotate_confidence("Hello world", 0.55)
        assert "[confidence: 55%]" in result

    def test_above_threshold_no_tag(self):
        from timmy.session import _annotate_confidence

        result = _annotate_confidence("Hello world", 0.85)
        assert "[confidence:" not in result
        assert result == "Hello world"

    def test_at_threshold_no_tag(self):
        from timmy.session import _annotate_confidence

        result = _annotate_confidence("Hello world", 0.7)
        assert "[confidence:" not in result

    def test_none_confidence_no_tag(self):
        from timmy.session import _annotate_confidence

        result = _annotate_confidence("Hello world", None)
        assert "[confidence:" not in result
        assert result == "Hello world"

    def test_zero_confidence_adds_tag(self):
        from timmy.session import _annotate_confidence

        result = _annotate_confidence("Hello world", 0.0)
        assert "[confidence: 0%]" in result

    def test_preserves_original_text(self):
        from timmy.session import _annotate_confidence

        result = _annotate_confidence("Original text here", 0.3)
        assert result.startswith("Original text here")


# ---------------------------------------------------------------------------
# chat()
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_chat_returns_string():
    """chat() should return a plain string response."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="Hello, sir."))

    with patch("timmy.session._get_agent", return_value=mock_agent):
        from timmy.session import chat

        result = await chat("Hi Timmy")

    assert isinstance(result, str)
    assert "Hello, sir." in result


@pytest.mark.asyncio
async def test_chat_passes_session_id():
    """chat() should pass the session_id to agent.arun()."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="OK."))

    with patch("timmy.session._get_agent", return_value=mock_agent):
        from timmy.session import chat

        await chat("test", session_id="my-session")

    _, kwargs = mock_agent.arun.call_args
    assert kwargs["session_id"] == "my-session"


@pytest.mark.asyncio
async def test_chat_uses_default_session_id():
    """chat() should use 'dashboard' as the default session_id."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="OK."))

    with patch("timmy.session._get_agent", return_value=mock_agent):
        from timmy.session import chat

        await chat("test")

    _, kwargs = mock_agent.arun.call_args
    assert kwargs["session_id"] == "dashboard"


@pytest.mark.asyncio
async def test_chat_singleton_agent_reused():
    """Calling chat() multiple times should reuse the same agent instance."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="OK."))

    with patch("timmy.agent.create_timmy", return_value=mock_agent) as mock_factory:
        from timmy.session import chat

        await chat("first message")
        await chat("second message")

    # Factory called only once (singleton)
    mock_factory.assert_called_once()


@pytest.mark.asyncio
async def test_chat_extracts_user_name():
    """chat() should extract user name from message and persist to memory."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="Nice to meet you!"))

    mock_mem = MagicMock()

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.memory_system.memory_system", mock_mem),
    ):
        from timmy.session import chat

        await chat("my name is Alex")

    mock_mem.update_user_fact.assert_called_once_with("Name", "Alex")


@pytest.mark.asyncio
async def test_chat_graceful_degradation_on_memory_failure():
    """chat() should still work if the conversation manager raises."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="I'm operational."))

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.conversation.conversation_manager") as mock_cm,
    ):
        mock_cm.extract_user_name.side_effect = Exception("memory broken")

        from timmy.session import chat

        result = await chat("test message")

    assert "operational" in result


# ---------------------------------------------------------------------------
# _clean_response()
# ---------------------------------------------------------------------------


def test_clean_response_strips_json_tool_calls():
    """JSON tool call blocks should be removed from response text."""
    from timmy.session import _clean_response

    dirty = 'Here is the answer. {"name": "python", "parameters": {"code": "0.15 * 3847.23", "variable_to_return": "result"}} The result is 577.'
    clean = _clean_response(dirty)

    assert '{"name"' not in clean
    assert '"parameters"' not in clean
    assert "The result is 577." in clean


def test_clean_response_strips_arguments_format():
    """JSON tool calls using 'arguments' key (OpenAI format) should also be removed."""
    from timmy.session import _clean_response

    dirty = 'Here is the result. {"name": "shell", "arguments": {"args": ["mkdir", "-p", "/tmp/test"]}} The directory was created.'
    clean = _clean_response(dirty)

    assert '{"name"' not in clean
    assert '"arguments"' not in clean
    assert "The directory was created." in clean


def test_clean_response_strips_function_calls():
    """Function-call-style text should be removed."""
    from timmy.session import _clean_response

    dirty = 'I will search for that. memory_search(query="recall number") Found nothing.'
    clean = _clean_response(dirty)

    assert "memory_search(" not in clean
    assert "Found nothing." in clean


def test_clean_response_strips_chain_of_thought():
    """Chain-of-thought narration lines should be removed."""
    from timmy.session import _clean_response

    dirty = """Since there's no direct answer in my vault or hot memory, I'll use memory_search.
Using memory_search(query="what is special"), I found a context.
Here's a possible response:
77 is special because it's a prime number."""
    clean = _clean_response(dirty)

    assert "Since there's no" not in clean
    assert "Here's a possible" not in clean
    assert "77 is special" in clean


def test_clean_response_preserves_normal_text():
    """Normal text without tool artifacts should pass through unchanged."""
    from timmy.session import _clean_response

    normal = "The number 77 is the sum of the first seven primes: 2+3+5+7+11+13+17."
    assert _clean_response(normal) == normal


def test_clean_response_handles_empty_string():
    """Empty string should be returned as-is."""
    from timmy.session import _clean_response

    assert _clean_response("") == ""


def test_clean_response_handles_none():
    """None should be returned as-is."""
    from timmy.session import _clean_response

    assert _clean_response(None) is None


# ---------------------------------------------------------------------------
# reset_session()
# ---------------------------------------------------------------------------


def test_reset_session_clears_context():
    """reset_session() should clear the conversation context."""
    with patch("timmy.conversation.conversation_manager") as mock_cm:
        from timmy.session import reset_session

        reset_session("test-session")

    mock_cm.clear_context.assert_called_once_with("test-session")


# ---------------------------------------------------------------------------
# Confidence estimation integration
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_chat_passes_confidence_to_record_message():
    """chat() should estimate confidence and pass it to record_message."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="This is a confident answer."))

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.85) as mock_estimate,
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import chat

        await chat("test message")

    mock_estimate.assert_called_once_with("This is a confident answer.")
    # Check that record_message was called with confidence
    calls = mock_logger.record_message.call_args_list
    assert len(calls) >= 2  # user message + timmy response
    # Last call should be timmy response with confidence
    _, kwargs = calls[-1]
    assert kwargs.get("confidence") == 0.85


@pytest.mark.asyncio
async def test_chat_with_tools_passes_confidence_to_record_message():
    """chat_with_tools() should estimate confidence and pass it to record_message."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="Tool response here."))

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.72) as mock_estimate,
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import chat_with_tools

        await chat_with_tools("test message")

    mock_estimate.assert_called_once_with("Tool response here.")
    calls = mock_logger.record_message.call_args_list
    assert len(calls) >= 2
    _, kwargs = calls[-1]
    assert kwargs.get("confidence") == 0.72


@pytest.mark.asyncio
async def test_continue_chat_passes_confidence_to_record_message():
    """continue_chat() should estimate confidence and pass it to record_message."""
    mock_agent = MagicMock()
    mock_agent.acontinue_run = AsyncMock(return_value=MagicMock(content="Continued response."))

    mock_run_output = MagicMock()

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.91) as mock_estimate,
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import continue_chat

        await continue_chat(mock_run_output)

    mock_estimate.assert_called_once_with("Continued response.")
    calls = mock_logger.record_message.call_args_list
    assert len(calls) >= 1  # should have timmy response
    _, kwargs = calls[-1]
    assert kwargs.get("confidence") == 0.91


# ---------------------------------------------------------------------------
# Low confidence visibility (SOUL.md requirement)
# ---------------------------------------------------------------------------


@pytest.mark.asyncio
async def test_chat_shows_confidence_when_low():
    """chat() should append confidence indicator when confidence < 0.7."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="I think maybe this might work."))

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.45),
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import chat

        result = await chat("test message")

    assert "[confidence: 45%]" in result


@pytest.mark.asyncio
async def test_chat_hides_confidence_when_high():
    """chat() should NOT append confidence indicator when confidence >= 0.7."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="This is definitely correct."))

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.85),
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import chat

        result = await chat("test message")

    assert "[confidence:" not in result
    assert result == "This is definitely correct."


@pytest.mark.asyncio
async def test_chat_with_tools_shows_confidence_when_low():
    """chat_with_tools() should include confidence indicator in response when low."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="I'm not sure about this."))

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.55),
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import chat_with_tools

        result = await chat_with_tools("test message")

    assert "[confidence: 55%]" in result.content


@pytest.mark.asyncio
async def test_chat_with_tools_hides_confidence_when_high():
    """chat_with_tools() should NOT include confidence indicator when high."""
    mock_agent = MagicMock()
    mock_agent.arun = AsyncMock(return_value=MagicMock(content="The answer is definitely 42."))

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.92),
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import chat_with_tools

        result = await chat_with_tools("test message")

    assert "[confidence:" not in result.content
    assert result.content == "The answer is definitely 42."


@pytest.mark.asyncio
async def test_continue_chat_shows_confidence_when_low():
    """continue_chat() should include confidence indicator when low."""
    mock_agent = MagicMock()
    mock_result = MagicMock(content="Perhaps this is the right approach.")
    mock_agent.acontinue_run = AsyncMock(return_value=mock_result)

    mock_run_output = MagicMock()

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.35),
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import continue_chat

        result = await continue_chat(mock_run_output)

    assert "[confidence: 35%]" in result.content


@pytest.mark.asyncio
async def test_continue_chat_hides_confidence_when_high():
    """continue_chat() should NOT include confidence indicator when high."""
    mock_agent = MagicMock()
    mock_result = MagicMock(content="This is absolutely the correct solution.")
    mock_agent.acontinue_run = AsyncMock(return_value=mock_result)

    mock_run_output = MagicMock()

    with (
        patch("timmy.session._get_agent", return_value=mock_agent),
        patch("timmy.session.estimate_confidence", return_value=0.88),
        patch("timmy.session.get_session_logger") as mock_get_logger,
    ):
        mock_logger = MagicMock()
        mock_get_logger.return_value = mock_logger

        from timmy.session import continue_chat

        result = await continue_chat(mock_run_output)

    assert "[confidence:" not in result.content
    assert result.content == "This is absolutely the correct solution."