1
0
This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/tests/timmy/test_agent_retry.py

167 lines
5.4 KiB
Python

"""Tests for agent retry logic on transient errors."""
from unittest.mock import MagicMock, patch
import httpx
import pytest
from timmy.agents.base import SubAgent
@pytest.fixture
def sub_agent():
"""Create a SubAgent with mocked agent."""
agent = SubAgent(
agent_id="test-agent",
name="TestAgent",
role="test",
system_prompt="You are a test agent.",
model="llama3.2",
)
# Mock the underlying agent.run method
agent.agent = MagicMock()
return agent
@pytest.mark.asyncio
async def test_run_retries_on_transient_error(sub_agent):
"""Mock agent.run to fail twice then succeed. Verify result is returned."""
# Arrange: fail twice, then succeed
call_count = 0
def mock_run(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count <= 2:
raise Exception("Ollama 500 error: XML parse error")
# Return successful response
result = MagicMock()
result.content = "Success after retries"
return result
sub_agent.agent.run = mock_run
# Act
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
result = await sub_agent.run("test message")
# Assert
assert result == "Success after retries"
assert call_count == 3 # 2 failures + 1 success
# Verify exponential backoff: attempt 1 = 1s, attempt 2 = 2s
assert mock_sleep.call_count == 2
mock_sleep.assert_any_call(1)
mock_sleep.assert_any_call(2)
@pytest.mark.asyncio
async def test_run_exhausts_retries(sub_agent):
"""Mock agent.run to fail 3 times. Verify exception is raised."""
# Arrange: always fail
sub_agent.agent.run.side_effect = Exception("Ollama 500 error: XML parse error")
# Act & Assert
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
with pytest.raises(Exception, match="Ollama 500 error: XML parse error"):
await sub_agent.run("test message")
# Should have been called 3 times (max retries)
assert sub_agent.agent.run.call_count == 3
# Verify exponential backoff: attempt 1 = 1s, attempt 2 = 2s
assert mock_sleep.call_count == 2
mock_sleep.assert_any_call(1)
mock_sleep.assert_any_call(2)
@pytest.mark.asyncio
async def test_run_retries_on_connection_error(sub_agent):
"""ConnectError is transient (GPU contention) — retries with backoff (#70)."""
sub_agent.agent.run.side_effect = httpx.ConnectError("Connection refused")
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
with pytest.raises(httpx.ConnectError, match="Connection refused"):
await sub_agent.run("test message")
# Should have retried all 3 attempts
assert sub_agent.agent.run.call_count == 3
# Contention backoff: 2**1=2, 2**2=4
assert mock_sleep.call_count == 2
mock_sleep.assert_any_call(2)
mock_sleep.assert_any_call(4)
@pytest.mark.asyncio
async def test_run_retries_on_read_error(sub_agent):
"""ReadError is transient (GPU contention) — retries with backoff (#70)."""
sub_agent.agent.run.side_effect = httpx.ReadError("Server disconnected")
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
with pytest.raises(httpx.ReadError, match="Server disconnected"):
await sub_agent.run("test message")
# Should have retried all 3 attempts
assert sub_agent.agent.run.call_count == 3
# Contention backoff: 2**1=2, 2**2=4
assert mock_sleep.call_count == 2
mock_sleep.assert_any_call(2)
mock_sleep.assert_any_call(4)
@pytest.mark.asyncio
async def test_run_recovers_from_contention(sub_agent):
"""Simulate GPU contention: ReadError then success on retry (#70)."""
call_count = 0
def mock_run(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
raise httpx.ReadError("Server disconnected")
result = MagicMock()
result.content = "Recovered after contention"
return result
sub_agent.agent.run = mock_run
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
result = await sub_agent.run("test message")
assert result == "Recovered after contention"
assert call_count == 2
mock_sleep.assert_called_once_with(2) # 2**1 contention backoff
@pytest.mark.asyncio
async def test_run_logs_retry_attempts(sub_agent, caplog):
"""Mock agent.run to fail once then succeed. Verify logger.warning was called with retry info."""
import logging
# Set logging level to capture warnings
with caplog.at_level(logging.WARNING):
# Arrange: fail once, then succeed
call_count = 0
def mock_run(*args, **kwargs):
nonlocal call_count
call_count += 1
if call_count == 1:
raise Exception("Transient error")
# Return successful response
result = MagicMock()
result.content = "Success"
return result
sub_agent.agent.run = mock_run
# Act
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
result = await sub_agent.run("test message")
# Assert
assert result == "Success"
assert call_count == 2 # 1 failure + 1 success
assert "Agent run failed on attempt 1/3" in caplog.text
assert "Retrying..." in caplog.text
# Verify sleep was called with 1s for first attempt
mock_sleep.assert_called_once_with(1)