Merge pull request '[loop-cycle-5] fix: warmup model on cold load (#82)' (#95) from fix/warmup-cold-model into main

This commit is contained in:
2026-03-14 18:26:48 -04:00

View File

@@ -16,6 +16,7 @@ Handoff Protocol maintains continuity across sessions.
import logging
from typing import TYPE_CHECKING, Union
import httpx
from agno.agent import Agent
from agno.db.sqlite import SqliteDb
from agno.models.ollama import Ollama
@@ -144,6 +145,32 @@ def _model_supports_tools(model_name: str) -> bool:
return True
def _warmup_model(model_name: str) -> bool:
"""Warm up an Ollama model by sending a minimal generation request.
This prevents 'Server disconnected' errors on first request after cold model load.
Cold loads can take 30-40s, so we use a 60s timeout.
Args:
model_name: Name of the Ollama model to warm up
Returns:
True if warmup succeeded, False otherwise (does not raise)
"""
try:
response = httpx.post(
f"{settings.ollama_url}/api/generate",
json={"model": model_name, "prompt": "hi", "options": {"num_predict": 1}},
timeout=60.0,
)
response.raise_for_status()
logger.info("Model %s warmed up successfully", model_name)
return True
except Exception as exc:
logger.warning("Model warmup failed: %s — first request may disconnect", exc)
return False
def _resolve_backend(requested: str | None) -> str:
"""Return the backend name to use, resolving 'auto' and explicit overrides.
@@ -279,7 +306,7 @@ def create_timmy(
logger.warning("Failed to load memory context: %s", exc)
full_prompt = base_prompt
return Agent(
agent = Agent(
name="Agent",
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300),
db=SqliteDb(db_file=db_file),
@@ -291,6 +318,8 @@ def create_timmy(
tool_call_limit=settings.max_agent_steps if use_tools else None,
telemetry=settings.telemetry_enabled,
)
_warmup_model(model_name)
return agent
class TimmyWithMemory: