forked from Rockachopa/Timmy-time-dashboard
167 lines
5.4 KiB
Python
167 lines
5.4 KiB
Python
"""Tests for agent retry logic on transient errors."""
|
|
|
|
from unittest.mock import MagicMock, patch
|
|
|
|
import httpx
|
|
import pytest
|
|
|
|
from timmy.agents.base import SubAgent
|
|
|
|
|
|
@pytest.fixture
|
|
def sub_agent():
|
|
"""Create a SubAgent with mocked agent."""
|
|
agent = SubAgent(
|
|
agent_id="test-agent",
|
|
name="TestAgent",
|
|
role="test",
|
|
system_prompt="You are a test agent.",
|
|
model="llama3.2",
|
|
)
|
|
# Mock the underlying agent.run method
|
|
agent.agent = MagicMock()
|
|
return agent
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_retries_on_transient_error(sub_agent):
|
|
"""Mock agent.run to fail twice then succeed. Verify result is returned."""
|
|
# Arrange: fail twice, then succeed
|
|
call_count = 0
|
|
|
|
def mock_run(*args, **kwargs):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count <= 2:
|
|
raise Exception("Ollama 500 error: XML parse error")
|
|
# Return successful response
|
|
result = MagicMock()
|
|
result.content = "Success after retries"
|
|
return result
|
|
|
|
sub_agent.agent.run = mock_run
|
|
|
|
# Act
|
|
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
|
|
result = await sub_agent.run("test message")
|
|
|
|
# Assert
|
|
assert result == "Success after retries"
|
|
assert call_count == 3 # 2 failures + 1 success
|
|
# Verify exponential backoff: attempt 1 = 1s, attempt 2 = 2s
|
|
assert mock_sleep.call_count == 2
|
|
mock_sleep.assert_any_call(1)
|
|
mock_sleep.assert_any_call(2)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_exhausts_retries(sub_agent):
|
|
"""Mock agent.run to fail 3 times. Verify exception is raised."""
|
|
# Arrange: always fail
|
|
sub_agent.agent.run.side_effect = Exception("Ollama 500 error: XML parse error")
|
|
|
|
# Act & Assert
|
|
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
|
|
with pytest.raises(Exception, match="Ollama 500 error: XML parse error"):
|
|
await sub_agent.run("test message")
|
|
|
|
# Should have been called 3 times (max retries)
|
|
assert sub_agent.agent.run.call_count == 3
|
|
# Verify exponential backoff: attempt 1 = 1s, attempt 2 = 2s
|
|
assert mock_sleep.call_count == 2
|
|
mock_sleep.assert_any_call(1)
|
|
mock_sleep.assert_any_call(2)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_retries_on_connection_error(sub_agent):
|
|
"""ConnectError is transient (GPU contention) — retries with backoff (#70)."""
|
|
sub_agent.agent.run.side_effect = httpx.ConnectError("Connection refused")
|
|
|
|
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
|
|
with pytest.raises(httpx.ConnectError, match="Connection refused"):
|
|
await sub_agent.run("test message")
|
|
|
|
# Should have retried all 3 attempts
|
|
assert sub_agent.agent.run.call_count == 3
|
|
# Contention backoff: 2**1=2, 2**2=4
|
|
assert mock_sleep.call_count == 2
|
|
mock_sleep.assert_any_call(2)
|
|
mock_sleep.assert_any_call(4)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_retries_on_read_error(sub_agent):
|
|
"""ReadError is transient (GPU contention) — retries with backoff (#70)."""
|
|
sub_agent.agent.run.side_effect = httpx.ReadError("Server disconnected")
|
|
|
|
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
|
|
with pytest.raises(httpx.ReadError, match="Server disconnected"):
|
|
await sub_agent.run("test message")
|
|
|
|
# Should have retried all 3 attempts
|
|
assert sub_agent.agent.run.call_count == 3
|
|
# Contention backoff: 2**1=2, 2**2=4
|
|
assert mock_sleep.call_count == 2
|
|
mock_sleep.assert_any_call(2)
|
|
mock_sleep.assert_any_call(4)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_recovers_from_contention(sub_agent):
|
|
"""Simulate GPU contention: ReadError then success on retry (#70)."""
|
|
call_count = 0
|
|
|
|
def mock_run(*args, **kwargs):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count == 1:
|
|
raise httpx.ReadError("Server disconnected")
|
|
result = MagicMock()
|
|
result.content = "Recovered after contention"
|
|
return result
|
|
|
|
sub_agent.agent.run = mock_run
|
|
|
|
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
|
|
result = await sub_agent.run("test message")
|
|
|
|
assert result == "Recovered after contention"
|
|
assert call_count == 2
|
|
mock_sleep.assert_called_once_with(2) # 2**1 contention backoff
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_run_logs_retry_attempts(sub_agent, caplog):
|
|
"""Mock agent.run to fail once then succeed. Verify logger.warning was called with retry info."""
|
|
import logging
|
|
|
|
# Set logging level to capture warnings
|
|
with caplog.at_level(logging.WARNING):
|
|
# Arrange: fail once, then succeed
|
|
call_count = 0
|
|
|
|
def mock_run(*args, **kwargs):
|
|
nonlocal call_count
|
|
call_count += 1
|
|
if call_count == 1:
|
|
raise Exception("Transient error")
|
|
# Return successful response
|
|
result = MagicMock()
|
|
result.content = "Success"
|
|
return result
|
|
|
|
sub_agent.agent.run = mock_run
|
|
|
|
# Act
|
|
with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
|
|
result = await sub_agent.run("test message")
|
|
|
|
# Assert
|
|
assert result == "Success"
|
|
assert call_count == 2 # 1 failure + 1 success
|
|
assert "Agent run failed on attempt 1/3" in caplog.text
|
|
assert "Retrying..." in caplog.text
|
|
# Verify sleep was called with 1s for first attempt
|
|
mock_sleep.assert_called_once_with(1)
|