forked from Rockachopa/Timmy-time-dashboard
Merge pull request '[loop-cycle-5] fix: warmup model on cold load (#82)' (#95) from fix/warmup-cold-model into main
This commit is contained in:
@@ -16,6 +16,7 @@ Handoff Protocol maintains continuity across sessions.
|
||||
import logging
|
||||
from typing import TYPE_CHECKING, Union
|
||||
|
||||
import httpx
|
||||
from agno.agent import Agent
|
||||
from agno.db.sqlite import SqliteDb
|
||||
from agno.models.ollama import Ollama
|
||||
@@ -144,6 +145,32 @@ def _model_supports_tools(model_name: str) -> bool:
|
||||
return True
|
||||
|
||||
|
||||
def _warmup_model(model_name: str) -> bool:
|
||||
"""Warm up an Ollama model by sending a minimal generation request.
|
||||
|
||||
This prevents 'Server disconnected' errors on first request after cold model load.
|
||||
Cold loads can take 30-40s, so we use a 60s timeout.
|
||||
|
||||
Args:
|
||||
model_name: Name of the Ollama model to warm up
|
||||
|
||||
Returns:
|
||||
True if warmup succeeded, False otherwise (does not raise)
|
||||
"""
|
||||
try:
|
||||
response = httpx.post(
|
||||
f"{settings.ollama_url}/api/generate",
|
||||
json={"model": model_name, "prompt": "hi", "options": {"num_predict": 1}},
|
||||
timeout=60.0,
|
||||
)
|
||||
response.raise_for_status()
|
||||
logger.info("Model %s warmed up successfully", model_name)
|
||||
return True
|
||||
except Exception as exc:
|
||||
logger.warning("Model warmup failed: %s — first request may disconnect", exc)
|
||||
return False
|
||||
|
||||
|
||||
def _resolve_backend(requested: str | None) -> str:
|
||||
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
||||
|
||||
@@ -279,7 +306,7 @@ def create_timmy(
|
||||
logger.warning("Failed to load memory context: %s", exc)
|
||||
full_prompt = base_prompt
|
||||
|
||||
return Agent(
|
||||
agent = Agent(
|
||||
name="Agent",
|
||||
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300),
|
||||
db=SqliteDb(db_file=db_file),
|
||||
@@ -291,6 +318,8 @@ def create_timmy(
|
||||
tool_call_limit=settings.max_agent_steps if use_tools else None,
|
||||
telemetry=settings.telemetry_enabled,
|
||||
)
|
||||
_warmup_model(model_name)
|
||||
return agent
|
||||
|
||||
|
||||
class TimmyWithMemory:
|
||||
|
||||
Reference in New Issue
Block a user