forked from Rockachopa/Timmy-time-dashboard
## What Cache the Timmy agent instance at app startup (in lifespan) instead of creating a new one per `/serve/chat` request. ## Changes - `src/timmy_serve/app.py`: Create agent in lifespan, store in `app.state.timmy` - `tests/timmy/test_timmy_serve_app.py`: Updated tests for lifespan-based caching, added `test_agent_cached_at_startup` 2085 unit tests pass. 2102 pre-push tests pass. 78.5% coverage. Closes #471 Co-authored-by: Timmy <timmy@timmytime.ai> Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/476 Co-authored-by: Timmy Time <timmy@Alexanderwhitestone.ai> Co-committed-by: Timmy Time <timmy@Alexanderwhitestone.ai>
126 lines
3.7 KiB
Python
126 lines
3.7 KiB
Python
"""Timmy Serve — FastAPI app for Timmy's API.
|
|
|
|
Endpoints:
|
|
POST /serve/chat — Chat with Timmy
|
|
GET /serve/status — Service status
|
|
GET /health — Health check
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import logging
|
|
import time
|
|
from collections import defaultdict
|
|
from contextlib import asynccontextmanager
|
|
|
|
from fastapi import FastAPI, HTTPException, Request
|
|
from pydantic import BaseModel
|
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
from starlette.responses import JSONResponse
|
|
|
|
from config import settings
|
|
from timmy.agent import create_timmy
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ChatRequest(BaseModel):
|
|
message: str
|
|
stream: bool = False
|
|
|
|
|
|
class ChatResponse(BaseModel):
|
|
response: str
|
|
|
|
|
|
class StatusResponse(BaseModel):
|
|
status: str
|
|
backend: str
|
|
|
|
|
|
class RateLimitMiddleware(BaseHTTPMiddleware):
|
|
"""Simple in-memory rate limiting middleware."""
|
|
|
|
def __init__(self, app, limit: int = 10, window: int = 60):
|
|
super().__init__(app)
|
|
self.limit = limit
|
|
self.window = window
|
|
self.requests: dict[str, list[float]] = defaultdict(list)
|
|
|
|
async def dispatch(self, request: Request, call_next):
|
|
# Only rate limit chat endpoint
|
|
if request.url.path == "/serve/chat" and request.method == "POST":
|
|
client_ip = request.client.host if request.client else "unknown"
|
|
now = time.time()
|
|
|
|
# Clean up old requests
|
|
self.requests[client_ip] = [
|
|
t for t in self.requests[client_ip] if now - t < self.window
|
|
]
|
|
|
|
if len(self.requests[client_ip]) >= self.limit:
|
|
logger.warning("Rate limit exceeded for %s", client_ip)
|
|
return JSONResponse(
|
|
status_code=429, content={"error": "Rate limit exceeded. Try again later."}
|
|
)
|
|
|
|
self.requests[client_ip].append(now)
|
|
|
|
return await call_next(request)
|
|
|
|
|
|
def create_timmy_serve_app() -> FastAPI:
|
|
"""Create the Timmy Serve FastAPI application."""
|
|
|
|
@asynccontextmanager
|
|
async def lifespan(app: FastAPI):
|
|
logger.info("Timmy Serve starting")
|
|
app.state.timmy = create_timmy()
|
|
logger.info("Timmy agent cached in app state")
|
|
yield
|
|
logger.info("Timmy Serve shutting down")
|
|
|
|
app = FastAPI(
|
|
title="Timmy Serve — Sovereign AI API",
|
|
version="1.0.0",
|
|
lifespan=lifespan,
|
|
docs_url="/docs" if settings.debug else None,
|
|
redoc_url="/redoc" if settings.debug else None,
|
|
)
|
|
|
|
# Add rate limiting middleware (10 requests per minute)
|
|
app.add_middleware(RateLimitMiddleware, limit=10, window=60)
|
|
|
|
@app.get("/serve/status", response_model=StatusResponse)
|
|
async def serve_status():
|
|
"""Get service status."""
|
|
return StatusResponse(
|
|
status="active",
|
|
backend=settings.timmy_model_backend,
|
|
)
|
|
|
|
@app.post("/serve/chat", response_model=ChatResponse)
|
|
async def serve_chat(request: Request, body: ChatRequest):
|
|
"""Process a chat request."""
|
|
try:
|
|
timmy = request.app.state.timmy
|
|
result = timmy.run(body.message, stream=False)
|
|
response_text = result.content if hasattr(result, "content") else str(result)
|
|
|
|
return ChatResponse(response=response_text)
|
|
|
|
except Exception as exc:
|
|
logger.error("Chat processing error: %s", exc)
|
|
raise HTTPException(status_code=500, detail=f"Processing error: {exc}") from exc
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
"""Health check endpoint."""
|
|
return {"status": "healthy", "service": "timmy-serve"}
|
|
|
|
return app
|
|
|
|
|
|
# Default app instance for uvicorn
|
|
timmy_serve_app = create_timmy_serve_app()
|