diff --git a/src/timmy/thinking.py b/src/timmy/thinking.py index 50f983b..0b0850d 100644 --- a/src/timmy/thinking.py +++ b/src/timmy/thinking.py @@ -1124,21 +1124,37 @@ class ThinkingEngine: lines.append(f"- [{thought.seed_type}] {snippet}") return "\n".join(lines) + _thinking_agent = None # cached agent — avoids per-call resource leaks (#525) + async def _call_agent(self, prompt: str) -> str: """Call Timmy's agent to generate a thought. - Creates a lightweight agent with skip_mcp=True to avoid the cancel-scope + Reuses a cached agent with skip_mcp=True to avoid the cancel-scope errors that occur when MCP stdio transports are spawned inside asyncio - background tasks (#72). The thinking engine doesn't need Gitea or - filesystem tools — it only needs the LLM. + background tasks (#72) and to prevent per-call resource leaks (httpx + clients, SQLite connections, model warmups) that caused the thinking + loop to die every ~10 min (#525). + + Individual calls are capped at 120 s so a hung Ollama never blocks + the scheduler indefinitely. Strips ```` tags from reasoning models (qwen3, etc.) so that downstream parsers (fact distillation, issue filing) receive clean text. """ - from timmy.agent import create_timmy + import asyncio + + if self._thinking_agent is None: + from timmy.agent import create_timmy + + self._thinking_agent = create_timmy(skip_mcp=True) + + try: + async with asyncio.timeout(120): + run = await self._thinking_agent.arun(prompt, stream=False) + except TimeoutError: + logger.warning("Thinking LLM call timed out after 120 s") + return "" - agent = create_timmy(skip_mcp=True) - run = await agent.arun(prompt, stream=False) raw = run.content if hasattr(run, "content") else str(run) return _THINK_TAG_RE.sub("", raw) if raw else raw