Compare commits

...

3 Commits

Author SHA1 Message Date
kimi
f3fc86de07 fix: remove dead airllm provider type from cascade router
Remove the airllm provider block from providers.yaml, the airllm
availability check from cascade.py, and associated tests. Renumber
remaining provider priorities (OpenAI → 2, Anthropic → 3).

Fixes #459

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 15:46:32 -04:00
dbc2fd5b0f [loop-cycle-536] fix: validate_startup checks CORS wildcard in production (#472) (#478) 2026-03-19 15:29:26 -04:00
3c3aca57f1 [loop-cycle-535] perf: cache Timmy agent at startup (#471) (#476)
## What
Cache the Timmy agent instance at app startup (in lifespan) instead of creating a new one per `/serve/chat` request.

## Changes
- `src/timmy_serve/app.py`: Create agent in lifespan, store in `app.state.timmy`
- `tests/timmy/test_timmy_serve_app.py`: Updated tests for lifespan-based caching, added `test_agent_cached_at_startup`

2085 unit tests pass. 2102 pre-push tests pass. 78.5% coverage.

Closes #471

Co-authored-by: Timmy <timmy@timmytime.ai>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/476
Co-authored-by: Timmy Time <timmy@Alexanderwhitestone.ai>
Co-committed-by: Timmy Time <timmy@Alexanderwhitestone.ai>
2026-03-19 15:28:57 -04:00
7 changed files with 82 additions and 67 deletions

View File

@@ -54,25 +54,11 @@ providers:
context_window: 2048 context_window: 2048
capabilities: [text, vision, streaming] capabilities: [text, vision, streaming]
# Secondary: Local AirLLM (if installed) # Secondary: OpenAI (if API key available)
- name: airllm-local
type: airllm
enabled: false # Enable if pip install airllm
priority: 2
models:
- name: 70b
default: true
capabilities: [text, tools, json, streaming]
- name: 8b
capabilities: [text, tools, json, streaming]
- name: 405b
capabilities: [text, tools, json, streaming]
# Tertiary: OpenAI (if API key available)
- name: openai-backup - name: openai-backup
type: openai type: openai
enabled: false # Enable by setting OPENAI_API_KEY enabled: false # Enable by setting OPENAI_API_KEY
priority: 3 priority: 2
api_key: "${OPENAI_API_KEY}" # Loaded from environment api_key: "${OPENAI_API_KEY}" # Loaded from environment
base_url: null # Use default OpenAI endpoint base_url: null # Use default OpenAI endpoint
models: models:
@@ -84,11 +70,11 @@ providers:
context_window: 128000 context_window: 128000
capabilities: [text, vision, tools, json, streaming] capabilities: [text, vision, tools, json, streaming]
# Quaternary: Anthropic (if API key available) # Tertiary: Anthropic (if API key available)
- name: anthropic-backup - name: anthropic-backup
type: anthropic type: anthropic
enabled: false # Enable by setting ANTHROPIC_API_KEY enabled: false # Enable by setting ANTHROPIC_API_KEY
priority: 4 priority: 3
api_key: "${ANTHROPIC_API_KEY}" api_key: "${ANTHROPIC_API_KEY}"
models: models:
- name: claude-3-haiku-20240307 - name: claude-3-haiku-20240307

View File

@@ -469,8 +469,19 @@ def validate_startup(*, force: bool = False) -> None:
", ".join(_missing), ", ".join(_missing),
) )
sys.exit(1) sys.exit(1)
if "*" in settings.cors_origins:
_startup_logger.error(
"PRODUCTION SECURITY ERROR: CORS wildcard '*' is not allowed "
"in production. Set CORS_ORIGINS to explicit origins."
)
sys.exit(1)
_startup_logger.info("Production mode: security secrets validated ✓") _startup_logger.info("Production mode: security secrets validated ✓")
else: else:
if "*" in settings.cors_origins:
_startup_logger.warning(
"SEC: CORS_ORIGINS contains wildcard '*'"
"restrict to explicit origins before deploying to production."
)
if not settings.l402_hmac_secret: if not settings.l402_hmac_secret:
_startup_logger.warning( _startup_logger.warning(
"SEC: L402_HMAC_SECRET is not set — " "SEC: L402_HMAC_SECRET is not set — "

View File

@@ -100,7 +100,7 @@ class Provider:
"""LLM provider configuration and state.""" """LLM provider configuration and state."""
name: str name: str
type: str # ollama, openai, anthropic, airllm type: str # ollama, openai, anthropic
enabled: bool enabled: bool
priority: int priority: int
url: str | None = None url: str | None = None
@@ -308,15 +308,6 @@ class CascadeRouter:
logger.debug("Ollama provider check error: %s", exc) logger.debug("Ollama provider check error: %s", exc)
return False return False
elif provider.type == "airllm":
# Check if airllm is installed
try:
import importlib.util
return importlib.util.find_spec("airllm") is not None
except (ImportError, ModuleNotFoundError):
return False
elif provider.type in ("openai", "anthropic", "grok"): elif provider.type in ("openai", "anthropic", "grok"):
# Check if API key is set # Check if API key is set
return provider.api_key is not None and provider.api_key != "" return provider.api_key is not None and provider.api_key != ""

View File

@@ -75,6 +75,8 @@ def create_timmy_serve_app() -> FastAPI:
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
logger.info("Timmy Serve starting") logger.info("Timmy Serve starting")
app.state.timmy = create_timmy()
logger.info("Timmy agent cached in app state")
yield yield
logger.info("Timmy Serve shutting down") logger.info("Timmy Serve shutting down")
@@ -101,7 +103,7 @@ def create_timmy_serve_app() -> FastAPI:
async def serve_chat(request: Request, body: ChatRequest): async def serve_chat(request: Request, body: ChatRequest):
"""Process a chat request.""" """Process a chat request."""
try: try:
timmy = create_timmy() timmy = request.app.state.timmy
result = timmy.run(body.message, stream=False) result = timmy.run(body.message, stream=False)
response_text = result.content if hasattr(result, "content") else str(result) response_text = result.content if hasattr(result, "content") else str(result)

View File

@@ -2,7 +2,7 @@
import time import time
from pathlib import Path from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch from unittest.mock import AsyncMock, patch
import pytest import pytest
import yaml import yaml
@@ -489,34 +489,6 @@ class TestProviderAvailabilityCheck:
assert router._check_provider_available(provider) is False assert router._check_provider_available(provider) is False
def test_check_airllm_installed(self):
"""Test AirLLM when installed."""
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="airllm",
type="airllm",
enabled=True,
priority=1,
)
with patch("importlib.util.find_spec", return_value=MagicMock()):
assert router._check_provider_available(provider) is True
def test_check_airllm_not_installed(self):
"""Test AirLLM when not installed."""
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider(
name="airllm",
type="airllm",
enabled=True,
priority=1,
)
with patch("importlib.util.find_spec", return_value=None):
assert router._check_provider_available(provider) is False
class TestCascadeRouterReload: class TestCascadeRouterReload:
"""Test hot-reload of providers.yaml.""" """Test hot-reload of providers.yaml."""

View File

@@ -49,6 +49,34 @@ class TestConfigLazyValidation:
# Should not raise # Should not raise
validate_startup(force=True) validate_startup(force=True)
def test_validate_startup_exits_on_cors_wildcard_in_production(self):
"""validate_startup() should exit in production when CORS has wildcard."""
from config import settings, validate_startup
with (
patch.object(settings, "timmy_env", "production"),
patch.object(settings, "l402_hmac_secret", "test-secret-hex-value-32"),
patch.object(settings, "l402_macaroon_secret", "test-macaroon-hex-value-32"),
patch.object(settings, "cors_origins", ["*"]),
pytest.raises(SystemExit),
):
validate_startup(force=True)
def test_validate_startup_warns_cors_wildcard_in_dev(self):
"""validate_startup() should warn in dev when CORS has wildcard."""
from config import settings, validate_startup
with (
patch.object(settings, "timmy_env", "development"),
patch.object(settings, "cors_origins", ["*"]),
patch("config._startup_logger") as mock_logger,
):
validate_startup(force=True)
mock_logger.warning.assert_any_call(
"SEC: CORS_ORIGINS contains wildcard '*'"
"restrict to explicit origins before deploying to production."
)
def test_validate_startup_skips_in_test_mode(self): def test_validate_startup_skips_in_test_mode(self):
"""validate_startup() should be a no-op in test mode.""" """validate_startup() should be a no-op in test mode."""
from config import validate_startup from config import validate_startup

View File

@@ -8,11 +8,14 @@ from fastapi.testclient import TestClient
@pytest.fixture @pytest.fixture
def serve_client(): def serve_client():
"""Create a TestClient for the timmy-serve app.""" """Create a TestClient for the timmy-serve app with mocked Timmy agent."""
from timmy_serve.app import create_timmy_serve_app with patch("timmy_serve.app.create_timmy") as mock_create:
mock_create.return_value = MagicMock()
from timmy_serve.app import create_timmy_serve_app
app = create_timmy_serve_app() app = create_timmy_serve_app()
return TestClient(app) with TestClient(app) as client:
yield client
class TestHealthEndpoint: class TestHealthEndpoint:
@@ -34,18 +37,40 @@ class TestServeStatus:
class TestServeChatEndpoint: class TestServeChatEndpoint:
@patch("timmy_serve.app.create_timmy") @patch("timmy_serve.app.create_timmy")
def test_chat_returns_response(self, mock_create, serve_client): def test_chat_returns_response(self, mock_create):
mock_agent = MagicMock() mock_agent = MagicMock()
mock_result = MagicMock() mock_result = MagicMock()
mock_result.content = "I am Timmy." mock_result.content = "I am Timmy."
mock_agent.run.return_value = mock_result mock_agent.run.return_value = mock_result
mock_create.return_value = mock_agent mock_create.return_value = mock_agent
resp = serve_client.post( from timmy_serve.app import create_timmy_serve_app
"/serve/chat",
json={"message": "Who are you?"}, app = create_timmy_serve_app()
) with TestClient(app) as client:
resp = client.post(
"/serve/chat",
json={"message": "Who are you?"},
)
assert resp.status_code == 200 assert resp.status_code == 200
data = resp.json() data = resp.json()
assert data["response"] == "I am Timmy." assert data["response"] == "I am Timmy."
mock_agent.run.assert_called_once_with("Who are you?", stream=False) mock_agent.run.assert_called_once_with("Who are you?", stream=False)
@patch("timmy_serve.app.create_timmy")
def test_agent_cached_at_startup(self, mock_create):
"""Verify create_timmy is called once at startup, not per request."""
mock_agent = MagicMock()
mock_result = MagicMock()
mock_result.content = "reply"
mock_agent.run.return_value = mock_result
mock_create.return_value = mock_agent
from timmy_serve.app import create_timmy_serve_app
app = create_timmy_serve_app()
with TestClient(app) as client:
# Two requests — create_timmy should only be called once (at startup)
client.post("/serve/chat", json={"message": "hello"})
client.post("/serve/chat", json={"message": "world"})
mock_create.assert_called_once()