[loop-cycle-60] fix: retry with backoff on Ollama GPU contention (#70) (#238)

2026-03-15 14:28:47 -04:00
parent 7f656fcf22
commit f9911c002c
6 changed files with 178 additions and 51 deletions
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -340,15 +340,47 @@ class TimmyWithMemory:
        self.initial_context = self.memory.get_system_context()

    def chat(self, message: str) -> str:
-        """Simple chat interface that tracks in memory."""
+        """Simple chat interface that tracks in memory.
+
+        Retries on transient Ollama errors (GPU contention, timeouts)
+        with exponential backoff (#70).
+        """
+        import time
+
        # Check for user facts to extract
        self._extract_and_store_facts(message)

-        # Run agent
-        result = self.agent.run(message, stream=False)
-        response_text = result.content if hasattr(result, "content") else str(result)
-
-        return response_text
+        # Retry with backoff — GPU contention causes ReadError/ReadTimeout
+        max_retries = 3
+        for attempt in range(1, max_retries + 1):
+            try:
+                result = self.agent.run(message, stream=False)
+                return result.content if hasattr(result, "content") else str(result)
+            except (
+                httpx.ConnectError,
+                httpx.ReadError,
+                httpx.ReadTimeout,
+                httpx.ConnectTimeout,
+                ConnectionError,
+                TimeoutError,
+            ) as exc:
+                if attempt < max_retries:
+                    wait = min(2**attempt, 16)
+                    logger.warning(
+                        "Ollama contention on attempt %d/%d: %s. Waiting %ds before retry...",
+                        attempt,
+                        max_retries,
+                        type(exc).__name__,
+                        wait,
+                    )
+                    time.sleep(wait)
+                else:
+                    logger.error(
+                        "Ollama unreachable after %d attempts: %s",
+                        max_retries,
+                        exc,
+                    )
+                    raise

    def _extract_and_store_facts(self, message: str) -> None:
        """Extract user facts from message and store in memory."""
--- a/src/timmy/agents/base.py
+++ b/src/timmy/agents/base.py
@@ -122,20 +122,53 @@ class BaseAgent(ABC):
    async def run(self, message: str) -> str:
        """Run the agent with a message.

+        Retries on transient failures (connection errors, timeouts) with
+        exponential backoff.  GPU contention from concurrent Ollama
+        requests causes ReadError / ReadTimeout — these are transient
+        and should be retried, not raised immediately (#70).
+
        Returns:
            Agent response
        """
        max_retries = 3
        last_exception = None
+        # Transient errors that indicate Ollama contention or temporary
+        # unavailability — these deserve a retry with backoff.
+        _transient = (
+            httpx.ConnectError,
+            httpx.ReadError,
+            httpx.ReadTimeout,
+            httpx.ConnectTimeout,
+            ConnectionError,
+            TimeoutError,
+        )

        for attempt in range(1, max_retries + 1):
            try:
                result = self.agent.run(message, stream=False)
                response = result.content if hasattr(result, "content") else str(result)
                break  # Success, exit the retry loop
-            except (httpx.ConnectError, httpx.ReadError, ConnectionError) as exc:
-                logger.error("Ollama disconnected: %s", exc)
-                raise
+            except _transient as exc:
+                last_exception = exc
+                if attempt < max_retries:
+                    # Contention backoff — longer waits because the GPU
+                    # needs time to finish the other request.
+                    wait = min(2**attempt, 16)
+                    logger.warning(
+                        "Ollama contention on attempt %d/%d: %s. Waiting %ds before retry...",
+                        attempt,
+                        max_retries,
+                        type(exc).__name__,
+                        wait,
+                    )
+                    await asyncio.sleep(wait)
+                else:
+                    logger.error(
+                        "Ollama unreachable after %d attempts: %s",
+                        max_retries,
+                        exc,
+                    )
+                    raise last_exception from exc
            except Exception as exc:
                last_exception = exc
                if attempt < max_retries:
--- a/src/timmy/gematria.py
+++ b/src/timmy/gematria.py
@@ -41,10 +41,32 @@ _SUMERIAN: dict[str, int] = {c: v * 6 for c, v in _SIMPLE.items()}
 # Aleph=1..Tet=9, Yod=10..Tsade=90, Qoph=100..Tav=400
 # Standard mapping for the 22 Hebrew letters extended to 26 Latin chars
 _HEBREW: dict[str, int] = {
-    "A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "H": 8, "I": 9,
-    "J": 10, "K": 20, "L": 30, "M": 40, "N": 50, "O": 60, "P": 70, "Q": 80,
-    "R": 90, "S": 100, "T": 200, "U": 300, "V": 400, "W": 500, "X": 600,
-    "Y": 700, "Z": 800,
+    "A": 1,
+    "B": 2,
+    "C": 3,
+    "D": 4,
+    "E": 5,
+    "F": 6,
+    "G": 7,
+    "H": 8,
+    "I": 9,
+    "J": 10,
+    "K": 20,
+    "L": 30,
+    "M": 40,
+    "N": 50,
+    "O": 60,
+    "P": 70,
+    "Q": 80,
+    "R": 90,
+    "S": 100,
+    "T": 200,
+    "U": 300,
+    "V": 400,
+    "W": 500,
+    "X": 600,
+    "Y": 700,
+    "Z": 800,
 }

 CIPHERS: dict[str, dict[str, int]] = {
@@ -317,9 +339,9 @@ def _format_number_analysis(n: int) -> str:
    lines.append(f"  Digit sum: {info['digit_sum']}")

    if info.get("master_number"):
-        lines.append(f"  ★ Master Number")
+        lines.append("  ★ Master Number")
    if info.get("angel_number"):
-        lines.append(f"  ★ Angel Number (repeating digits)")
+        lines.append("  ★ Angel Number (repeating digits)")
    if info.get("significance"):
        lines.append(f"\n  Significance: {info['significance']}")

--- a/tests/timmy/test_agent_retry.py
+++ b/tests/timmy/test_agent_retry.py
@@ -74,31 +74,61 @@ async def test_run_exhausts_retries(sub_agent):


@pytest.mark.asyncio
-async def test_run_no_retry_on_connection_error(sub_agent):
-    """Mock agent.run to raise httpx.ConnectError. Verify it raises immediately without retry."""
-    # Arrange: raise ConnectError
+async def test_run_retries_on_connection_error(sub_agent):
+    """ConnectError is transient (GPU contention) — retries with backoff (#70)."""
    sub_agent.agent.run.side_effect = httpx.ConnectError("Connection refused")

-    # Act & Assert
-    with pytest.raises(httpx.ConnectError, match="Connection refused"):
-        await sub_agent.run("test message")
+    with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
+        with pytest.raises(httpx.ConnectError, match="Connection refused"):
+            await sub_agent.run("test message")

-    # Should have been called only once (no retry)
-    assert sub_agent.agent.run.call_count == 1
+    # Should have retried all 3 attempts
+    assert sub_agent.agent.run.call_count == 3
+    # Contention backoff: 2**1=2, 2**2=4
+    assert mock_sleep.call_count == 2
+    mock_sleep.assert_any_call(2)
+    mock_sleep.assert_any_call(4)


@pytest.mark.asyncio
-async def test_run_no_retry_on_read_error(sub_agent):
-    """Mock agent.run to raise httpx.ReadError. Verify it raises immediately without retry."""
-    # Arrange: raise ReadError
+async def test_run_retries_on_read_error(sub_agent):
+    """ReadError is transient (GPU contention) — retries with backoff (#70)."""
    sub_agent.agent.run.side_effect = httpx.ReadError("Server disconnected")

-    # Act & Assert
-    with pytest.raises(httpx.ReadError, match="Server disconnected"):
-        await sub_agent.run("test message")
+    with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
+        with pytest.raises(httpx.ReadError, match="Server disconnected"):
+            await sub_agent.run("test message")

-    # Should have been called only once (no retry)
-    assert sub_agent.agent.run.call_count == 1
+    # Should have retried all 3 attempts
+    assert sub_agent.agent.run.call_count == 3
+    # Contention backoff: 2**1=2, 2**2=4
+    assert mock_sleep.call_count == 2
+    mock_sleep.assert_any_call(2)
+    mock_sleep.assert_any_call(4)
+
+
+@pytest.mark.asyncio
+async def test_run_recovers_from_contention(sub_agent):
+    """Simulate GPU contention: ReadError then success on retry (#70)."""
+    call_count = 0
+
+    def mock_run(*args, **kwargs):
+        nonlocal call_count
+        call_count += 1
+        if call_count == 1:
+            raise httpx.ReadError("Server disconnected")
+        result = MagicMock()
+        result.content = "Recovered after contention"
+        return result
+
+    sub_agent.agent.run = mock_run
+
+    with patch("timmy.agents.base.asyncio.sleep") as mock_sleep:
+        result = await sub_agent.run("test message")
+
+    assert result == "Recovered after contention"
+    assert call_count == 2
+    mock_sleep.assert_called_once_with(2)  # 2**1 contention backoff


@pytest.mark.asyncio
--- a/tests/timmy/test_gematria.py
+++ b/tests/timmy/test_gematria.py
@@ -20,7 +20,6 @@ from timmy.gematria import (
    reduce_number,
 )

-
 # ── Core cipher computation ──────────────────────────────────────────────────


--- a/tests/timmy/test_ollama_disconnect.py
+++ b/tests/timmy/test_ollama_disconnect.py
@@ -1,8 +1,8 @@
 """Test Ollama disconnection handling.

 Verifies that:
-1. BaseAgent.run() logs 'Ollama disconnected' when agent.run() raises connection errors
-2. BaseAgent.run() re-raises the error (not silently swallowed)
+1. BaseAgent.run() retries on transient errors (contention/disconnect) with backoff (#70)
+2. BaseAgent.run() re-raises the error after retries are exhausted
 3. session.chat() returns disconnect-specific message on connection errors
 4. session.chat_with_tools() returns _ErrorRunOutput with disconnect message on connection errors
 """
@@ -18,14 +18,15 @@ import pytest
 class TestBaseAgentDisconnect:
    """Test BaseAgent.run() disconnection handling."""

-    def test_base_agent_logs_on_connect_error(self, caplog):
-        """BaseAgent.run() logs 'Ollama disconnected' on httpx.ConnectError."""
-        caplog.set_level(logging.ERROR)
+    def test_base_agent_retries_and_logs_on_connect_error(self, caplog):
+        """BaseAgent.run() retries on ConnectError with backoff, then logs 'Ollama unreachable' (#70)."""
+        caplog.set_level(logging.WARNING)
        importlib.import_module("timmy.agents.base")

        with (
            patch("timmy.agents.base.Ollama") as mock_ollama,
            patch("timmy.agents.base.Agent") as mock_agent_class,
+            patch("timmy.agents.base.asyncio.sleep"),
        ):
            mock_ollama.return_value = MagicMock()
            mock_agent = MagicMock()
@@ -51,18 +52,24 @@ class TestBaseAgentDisconnect:

                asyncio.run(agent.run("test message"))

-            assert any("Ollama disconnected" in record.message for record in caplog.records), (
-                f"Expected 'Ollama disconnected' in logs, got: {[r.message for r in caplog.records]}"
+            # Should have retried 3 times total
+            assert mock_agent.run.call_count == 3
+            assert any("Ollama contention" in record.message for record in caplog.records), (
+                f"Expected 'Ollama contention' in logs, got: {[r.message for r in caplog.records]}"
+            )
+            assert any("Ollama unreachable" in record.message for record in caplog.records), (
+                f"Expected 'Ollama unreachable' in logs, got: {[r.message for r in caplog.records]}"
            )

-    def test_base_agent_logs_on_read_error(self, caplog):
-        """BaseAgent.run() logs 'Ollama disconnected' on httpx.ReadError."""
-        caplog.set_level(logging.ERROR)
+    def test_base_agent_retries_and_logs_on_read_error(self, caplog):
+        """BaseAgent.run() retries on ReadError with backoff, then logs 'Ollama unreachable' (#70)."""
+        caplog.set_level(logging.WARNING)
        importlib.import_module("timmy.agents.base")

        with (
            patch("timmy.agents.base.Ollama") as mock_ollama,
            patch("timmy.agents.base.Agent") as mock_agent_class,
+            patch("timmy.agents.base.asyncio.sleep"),
        ):
            mock_ollama.return_value = MagicMock()
            mock_agent = MagicMock()
@@ -88,18 +95,20 @@ class TestBaseAgentDisconnect:

                asyncio.run(agent.run("test message"))

-            assert any("Ollama disconnected" in record.message for record in caplog.records), (
-                f"Expected 'Ollama disconnected' in logs, got: {[r.message for r in caplog.records]}"
+            assert mock_agent.run.call_count == 3
+            assert any("Ollama contention" in record.message for record in caplog.records), (
+                f"Expected 'Ollama contention' in logs, got: {[r.message for r in caplog.records]}"
            )

-    def test_base_agent_logs_on_connection_error(self, caplog):
-        """BaseAgent.run() logs 'Ollama disconnected' on ConnectionError."""
-        caplog.set_level(logging.ERROR)
+    def test_base_agent_retries_and_logs_on_connection_error(self, caplog):
+        """BaseAgent.run() retries on ConnectionError with backoff (#70)."""
+        caplog.set_level(logging.WARNING)
        importlib.import_module("timmy.agents.base")

        with (
            patch("timmy.agents.base.Ollama") as mock_ollama,
            patch("timmy.agents.base.Agent") as mock_agent_class,
+            patch("timmy.agents.base.asyncio.sleep"),
        ):
            mock_ollama.return_value = MagicMock()
            mock_agent = MagicMock()
@@ -125,17 +134,19 @@ class TestBaseAgentDisconnect:

                asyncio.run(agent.run("test message"))

-            assert any("Ollama disconnected" in record.message for record in caplog.records), (
-                f"Expected 'Ollama disconnected' in logs, got: {[r.message for r in caplog.records]}"
+            assert mock_agent.run.call_count == 3
+            assert any("Ollama unreachable" in record.message for record in caplog.records), (
+                f"Expected 'Ollama unreachable' in logs, got: {[r.message for r in caplog.records]}"
            )

-    def test_base_agent_re_raises_connection_error(self):
-        """BaseAgent.run() re-raises the connection error (not silently swallowed)."""
+    def test_base_agent_re_raises_connection_error_after_retries(self):
+        """BaseAgent.run() re-raises the connection error after exhausting retries (#70)."""
        importlib.import_module("timmy.agents.base")

        with (
            patch("timmy.agents.base.Ollama") as mock_ollama,
            patch("timmy.agents.base.Agent") as mock_agent_class,
+            patch("timmy.agents.base.asyncio.sleep"),
        ):
            mock_ollama.return_value = MagicMock()
            mock_agent = MagicMock()