forked from Rockachopa/Timmy-time-dashboard
Merge pull request '[loop-cycle-5] fix: warmup model on cold load (#82)' (#95) from fix/warmup-cold-model into main
This commit is contained in:
@@ -16,6 +16,7 @@ Handoff Protocol maintains continuity across sessions.
|
|||||||
import logging
|
import logging
|
||||||
from typing import TYPE_CHECKING, Union
|
from typing import TYPE_CHECKING, Union
|
||||||
|
|
||||||
|
import httpx
|
||||||
from agno.agent import Agent
|
from agno.agent import Agent
|
||||||
from agno.db.sqlite import SqliteDb
|
from agno.db.sqlite import SqliteDb
|
||||||
from agno.models.ollama import Ollama
|
from agno.models.ollama import Ollama
|
||||||
@@ -144,6 +145,32 @@ def _model_supports_tools(model_name: str) -> bool:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
def _warmup_model(model_name: str) -> bool:
|
||||||
|
"""Warm up an Ollama model by sending a minimal generation request.
|
||||||
|
|
||||||
|
This prevents 'Server disconnected' errors on first request after cold model load.
|
||||||
|
Cold loads can take 30-40s, so we use a 60s timeout.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
model_name: Name of the Ollama model to warm up
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if warmup succeeded, False otherwise (does not raise)
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
response = httpx.post(
|
||||||
|
f"{settings.ollama_url}/api/generate",
|
||||||
|
json={"model": model_name, "prompt": "hi", "options": {"num_predict": 1}},
|
||||||
|
timeout=60.0,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
logger.info("Model %s warmed up successfully", model_name)
|
||||||
|
return True
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("Model warmup failed: %s — first request may disconnect", exc)
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def _resolve_backend(requested: str | None) -> str:
|
def _resolve_backend(requested: str | None) -> str:
|
||||||
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
"""Return the backend name to use, resolving 'auto' and explicit overrides.
|
||||||
|
|
||||||
@@ -279,7 +306,7 @@ def create_timmy(
|
|||||||
logger.warning("Failed to load memory context: %s", exc)
|
logger.warning("Failed to load memory context: %s", exc)
|
||||||
full_prompt = base_prompt
|
full_prompt = base_prompt
|
||||||
|
|
||||||
return Agent(
|
agent = Agent(
|
||||||
name="Agent",
|
name="Agent",
|
||||||
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300),
|
model=Ollama(id=model_name, host=settings.ollama_url, timeout=300),
|
||||||
db=SqliteDb(db_file=db_file),
|
db=SqliteDb(db_file=db_file),
|
||||||
@@ -291,6 +318,8 @@ def create_timmy(
|
|||||||
tool_call_limit=settings.max_agent_steps if use_tools else None,
|
tool_call_limit=settings.max_agent_steps if use_tools else None,
|
||||||
telemetry=settings.telemetry_enabled,
|
telemetry=settings.telemetry_enabled,
|
||||||
)
|
)
|
||||||
|
_warmup_model(model_name)
|
||||||
|
return agent
|
||||||
|
|
||||||
|
|
||||||
class TimmyWithMemory:
|
class TimmyWithMemory:
|
||||||
|
|||||||
Reference in New Issue
Block a user