This repository has been archived on 2026-03-24. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
Timmy-time-dashboard/src/timmy_serve/app.py
Timmy Time 3c3aca57f1 [loop-cycle-535] perf: cache Timmy agent at startup (#471) (#476)
## What
Cache the Timmy agent instance at app startup (in lifespan) instead of creating a new one per `/serve/chat` request.

## Changes
- `src/timmy_serve/app.py`: Create agent in lifespan, store in `app.state.timmy`
- `tests/timmy/test_timmy_serve_app.py`: Updated tests for lifespan-based caching, added `test_agent_cached_at_startup`

2085 unit tests pass. 2102 pre-push tests pass. 78.5% coverage.

Closes #471

Co-authored-by: Timmy <timmy@timmytime.ai>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/476
Co-authored-by: Timmy Time <timmy@Alexanderwhitestone.ai>
Co-committed-by: Timmy Time <timmy@Alexanderwhitestone.ai>
2026-03-19 15:28:57 -04:00

126 lines
3.7 KiB
Python

"""Timmy Serve — FastAPI app for Timmy's API.
Endpoints:
POST /serve/chat — Chat with Timmy
GET /serve/status — Service status
GET /health — Health check
"""
from __future__ import annotations
import logging
import time
from collections import defaultdict
from contextlib import asynccontextmanager
from fastapi import FastAPI, HTTPException, Request
from pydantic import BaseModel
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.responses import JSONResponse
from config import settings
from timmy.agent import create_timmy
logger = logging.getLogger(__name__)
class ChatRequest(BaseModel):
message: str
stream: bool = False
class ChatResponse(BaseModel):
response: str
class StatusResponse(BaseModel):
status: str
backend: str
class RateLimitMiddleware(BaseHTTPMiddleware):
"""Simple in-memory rate limiting middleware."""
def __init__(self, app, limit: int = 10, window: int = 60):
super().__init__(app)
self.limit = limit
self.window = window
self.requests: dict[str, list[float]] = defaultdict(list)
async def dispatch(self, request: Request, call_next):
# Only rate limit chat endpoint
if request.url.path == "/serve/chat" and request.method == "POST":
client_ip = request.client.host if request.client else "unknown"
now = time.time()
# Clean up old requests
self.requests[client_ip] = [
t for t in self.requests[client_ip] if now - t < self.window
]
if len(self.requests[client_ip]) >= self.limit:
logger.warning("Rate limit exceeded for %s", client_ip)
return JSONResponse(
status_code=429, content={"error": "Rate limit exceeded. Try again later."}
)
self.requests[client_ip].append(now)
return await call_next(request)
def create_timmy_serve_app() -> FastAPI:
"""Create the Timmy Serve FastAPI application."""
@asynccontextmanager
async def lifespan(app: FastAPI):
logger.info("Timmy Serve starting")
app.state.timmy = create_timmy()
logger.info("Timmy agent cached in app state")
yield
logger.info("Timmy Serve shutting down")
app = FastAPI(
title="Timmy Serve — Sovereign AI API",
version="1.0.0",
lifespan=lifespan,
docs_url="/docs" if settings.debug else None,
redoc_url="/redoc" if settings.debug else None,
)
# Add rate limiting middleware (10 requests per minute)
app.add_middleware(RateLimitMiddleware, limit=10, window=60)
@app.get("/serve/status", response_model=StatusResponse)
async def serve_status():
"""Get service status."""
return StatusResponse(
status="active",
backend=settings.timmy_model_backend,
)
@app.post("/serve/chat", response_model=ChatResponse)
async def serve_chat(request: Request, body: ChatRequest):
"""Process a chat request."""
try:
timmy = request.app.state.timmy
result = timmy.run(body.message, stream=False)
response_text = result.content if hasattr(result, "content") else str(result)
return ChatResponse(response=response_text)
except Exception as exc:
logger.error("Chat processing error: %s", exc)
raise HTTPException(status_code=500, detail=f"Processing error: {exc}") from exc
@app.get("/health")
async def health():
"""Health check endpoint."""
return {"status": "healthy", "service": "timmy-serve"}
return app
# Default app instance for uvicorn
timmy_serve_app = create_timmy_serve_app()