Merge pull request '[loop-cycle-5] fix: warmup model on cold load (#82)' (#95) from fix/warmup-cold-model into main

2026-03-14 18:26:48 -04:00
parent d1a8b16cd7 86956bd057
commit 8a14bbb3e0
1 changed files with 30 additions and 1 deletions
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -16,6 +16,7 @@ Handoff Protocol maintains continuity across sessions.
 import logging
 from typing import TYPE_CHECKING, Union

+import httpx
 from agno.agent import Agent
 from agno.db.sqlite import SqliteDb
 from agno.models.ollama import Ollama
@@ -144,6 +145,32 @@ def _model_supports_tools(model_name: str) -> bool:
    return True


+def _warmup_model(model_name: str) -> bool:
+    """Warm up an Ollama model by sending a minimal generation request.
+
+    This prevents 'Server disconnected' errors on first request after cold model load.
+    Cold loads can take 30-40s, so we use a 60s timeout.
+
+    Args:
+        model_name: Name of the Ollama model to warm up
+
+    Returns:
+        True if warmup succeeded, False otherwise (does not raise)
+    """
+    try:
+        response = httpx.post(
+            f"{settings.ollama_url}/api/generate",
+            json={"model": model_name, "prompt": "hi", "options": {"num_predict": 1}},
+            timeout=60.0,
+        )
+        response.raise_for_status()
+        logger.info("Model %s warmed up successfully", model_name)
+        return True
+    except Exception as exc:
+        logger.warning("Model warmup failed: %s — first request may disconnect", exc)
+        return False
+
+
 def _resolve_backend(requested: str | None) -> str:
    """Return the backend name to use, resolving 'auto' and explicit overrides.

@@ -279,7 +306,7 @@ def create_timmy(
        logger.warning("Failed to load memory context: %s", exc)
        full_prompt = base_prompt

-    return Agent(
+    agent = Agent(
        name="Agent",
        model=Ollama(id=model_name, host=settings.ollama_url, timeout=300),
        db=SqliteDb(db_file=db_file),
@@ -291,6 +318,8 @@ def create_timmy(
        tool_call_limit=settings.max_agent_steps if use_tools else None,
        telemetry=settings.telemetry_enabled,
    )
+    _warmup_model(model_name)
+    return agent


 class TimmyWithMemory: