diff --git a/src/timmy/agent.py b/src/timmy/agent.py index 091f2355..2cac1fbb 100644 --- a/src/timmy/agent.py +++ b/src/timmy/agent.py @@ -16,6 +16,7 @@ Handoff Protocol maintains continuity across sessions. import logging from typing import TYPE_CHECKING, Union +import httpx from agno.agent import Agent from agno.db.sqlite import SqliteDb from agno.models.ollama import Ollama @@ -144,6 +145,32 @@ def _model_supports_tools(model_name: str) -> bool: return True +def _warmup_model(model_name: str) -> bool: + """Warm up an Ollama model by sending a minimal generation request. + + This prevents 'Server disconnected' errors on first request after cold model load. + Cold loads can take 30-40s, so we use a 60s timeout. + + Args: + model_name: Name of the Ollama model to warm up + + Returns: + True if warmup succeeded, False otherwise (does not raise) + """ + try: + response = httpx.post( + f"{settings.ollama_url}/api/generate", + json={"model": model_name, "prompt": "hi", "options": {"num_predict": 1}}, + timeout=60.0, + ) + response.raise_for_status() + logger.info("Model %s warmed up successfully", model_name) + return True + except Exception as exc: + logger.warning("Model warmup failed: %s — first request may disconnect", exc) + return False + + def _resolve_backend(requested: str | None) -> str: """Return the backend name to use, resolving 'auto' and explicit overrides. @@ -279,7 +306,7 @@ def create_timmy( logger.warning("Failed to load memory context: %s", exc) full_prompt = base_prompt - return Agent( + agent = Agent( name="Agent", model=Ollama(id=model_name, host=settings.ollama_url, timeout=300), db=SqliteDb(db_file=db_file), @@ -291,6 +318,8 @@ def create_timmy( tool_call_limit=settings.max_agent_steps if use_tools else None, telemetry=settings.telemetry_enabled, ) + _warmup_model(model_name) + return agent class TimmyWithMemory: