Merge pull request '[loop-cycle-5] fix: warmup model on cold load (#82)' (#95) from fix/warmup-cold-model into main

2026-03-14 18:26:48 -04:00
parent d1a8b16cd7 86956bd057
commit 8a14bbb3e0
1 changed files with 30 additions and 1 deletions
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -16,6 +16,7 @@ Handoff Protocol maintains continuity across sessions.
 import logging
 from typing import TYPE_CHECKING, Union
 import httpx
 from agno.agent import Agent
 from agno.db.sqlite import SqliteDb
 from agno.models.ollama import Ollama
@@ -144,6 +145,32 @@ def _model_supports_tools(model_name: str) -> bool:
    return True
 def _warmup_model(model_name: str) -> bool:
    """Warm up an Ollama model by sending a minimal generation request.
    This prevents 'Server disconnected' errors on first request after cold model load.
    Cold loads can take 30-40s, so we use a 60s timeout.
    Args:
        model_name: Name of the Ollama model to warm up
    Returns:
        True if warmup succeeded, False otherwise (does not raise)
    """
    try:
        response = httpx.post(
            f"{settings.ollama_url}/api/generate",
            json={"model": model_name, "prompt": "hi", "options": {"num_predict": 1}},
            timeout=60.0,
        )
        response.raise_for_status()
        logger.info("Model %s warmed up successfully", model_name)
        return True
    except Exception as exc:
        logger.warning("Model warmup failed: %s — first request may disconnect", exc)
        return False
 def _resolve_backend(requested: str | None) -> str:
    """Return the backend name to use, resolving 'auto' and explicit overrides.
@@ -279,7 +306,7 @@ def create_timmy(
        logger.warning("Failed to load memory context: %s", exc)
        full_prompt = base_prompt
-    return Agent(
+    agent = Agent(
        name="Agent",
        model=Ollama(id=model_name, host=settings.ollama_url, timeout=300),
        db=SqliteDb(db_file=db_file),
@@ -291,6 +318,8 @@ def create_timmy(
        tool_call_limit=settings.max_agent_steps if use_tools else None,
        telemetry=settings.telemetry_enabled,
    )
    _warmup_model(model_name)
    return agent
 class TimmyWithMemory: