Compare commits

...

8 Commits

Author SHA1 Message Date
kimi
f3fc86de07 fix: remove dead airllm provider type from cascade router
Remove the airllm provider block from providers.yaml, the airllm
availability check from cascade.py, and associated tests. Renumber
remaining provider priorities (OpenAI → 2, Anthropic → 3).

Fixes #459

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 15:46:32 -04:00
dbc2fd5b0f [loop-cycle-536] fix: validate_startup checks CORS wildcard in production (#472) (#478) 2026-03-19 15:29:26 -04:00
3c3aca57f1 [loop-cycle-535] perf: cache Timmy agent at startup (#471) (#476)
## What
Cache the Timmy agent instance at app startup (in lifespan) instead of creating a new one per `/serve/chat` request.

## Changes
- `src/timmy_serve/app.py`: Create agent in lifespan, store in `app.state.timmy`
- `tests/timmy/test_timmy_serve_app.py`: Updated tests for lifespan-based caching, added `test_agent_cached_at_startup`

2085 unit tests pass. 2102 pre-push tests pass. 78.5% coverage.

Closes #471

Co-authored-by: Timmy <timmy@timmytime.ai>
Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/476
Co-authored-by: Timmy Time <timmy@Alexanderwhitestone.ai>
Co-committed-by: Timmy Time <timmy@Alexanderwhitestone.ai>
2026-03-19 15:28:57 -04:00
0ae00af3f8 fix: remove AirLLM config settings from config.py (#475)
Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-19 15:24:43 -04:00
3df526f6ef [loop-cycle-2] feat: hot-reload providers.yaml without restart (#458) (#470) 2026-03-19 15:11:40 -04:00
50aaf60db2 [loop-cycle-2] fix: strip CORS wildcards in production (#462) (#469) 2026-03-19 15:05:27 -04:00
a751be3038 fix: default CORS origins to localhost instead of wildcard (#467)
Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-19 14:57:36 -04:00
92594ea588 [loop-cycle] feat: implement source distinction in system prompts (#463) (#464) 2026-03-19 14:49:31 -04:00
11 changed files with 368 additions and 81 deletions

View File

@@ -54,25 +54,11 @@ providers:
context_window: 2048 context_window: 2048
capabilities: [text, vision, streaming] capabilities: [text, vision, streaming]
# Secondary: Local AirLLM (if installed) # Secondary: OpenAI (if API key available)
- name: airllm-local
type: airllm
enabled: false # Enable if pip install airllm
priority: 2
models:
- name: 70b
default: true
capabilities: [text, tools, json, streaming]
- name: 8b
capabilities: [text, tools, json, streaming]
- name: 405b
capabilities: [text, tools, json, streaming]
# Tertiary: OpenAI (if API key available)
- name: openai-backup - name: openai-backup
type: openai type: openai
enabled: false # Enable by setting OPENAI_API_KEY enabled: false # Enable by setting OPENAI_API_KEY
priority: 3 priority: 2
api_key: "${OPENAI_API_KEY}" # Loaded from environment api_key: "${OPENAI_API_KEY}" # Loaded from environment
base_url: null # Use default OpenAI endpoint base_url: null # Use default OpenAI endpoint
models: models:
@@ -84,11 +70,11 @@ providers:
context_window: 128000 context_window: 128000
capabilities: [text, vision, tools, json, streaming] capabilities: [text, vision, tools, json, streaming]
# Quaternary: Anthropic (if API key available) # Tertiary: Anthropic (if API key available)
- name: anthropic-backup - name: anthropic-backup
type: anthropic type: anthropic
enabled: false # Enable by setting ANTHROPIC_API_KEY enabled: false # Enable by setting ANTHROPIC_API_KEY
priority: 4 priority: 3
api_key: "${ANTHROPIC_API_KEY}" api_key: "${ANTHROPIC_API_KEY}"
models: models:
- name: claude-3-haiku-20240307 - name: claude-3-haiku-20240307

View File

@@ -64,17 +64,10 @@ class Settings(BaseSettings):
# Seconds to wait for user confirmation before auto-rejecting. # Seconds to wait for user confirmation before auto-rejecting.
discord_confirm_timeout: int = 120 discord_confirm_timeout: int = 120
# ── AirLLM / backend selection ─────────────────────────────────────────── # ── Backend selection ────────────────────────────────────────────────────
# "ollama" — always use Ollama (default, safe everywhere) # "ollama" — always use Ollama (default, safe everywhere)
# "airllm" — always use AirLLM (requires pip install ".[bigbrain]") # "auto" — pick best available local backend, fall back to Ollama
# "auto" — use AirLLM on Apple Silicon if airllm is installed, timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama"
# fall back to Ollama otherwise
timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
# AirLLM model size when backend is airllm or auto.
# Larger = smarter, but needs more RAM / disk.
# 8b ~16 GB | 70b ~140 GB | 405b ~810 GB
airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
# ── Grok (xAI) — opt-in premium cloud backend ──────────────────────── # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
# Grok is a premium augmentation layer — local-first ethos preserved. # Grok is a premium augmentation layer — local-first ethos preserved.
@@ -138,7 +131,12 @@ class Settings(BaseSettings):
# CORS allowed origins for the web chat interface (Gitea Pages, etc.) # CORS allowed origins for the web chat interface (Gitea Pages, etc.)
# Set CORS_ORIGINS as a comma-separated list, e.g. "http://localhost:3000,https://example.com" # Set CORS_ORIGINS as a comma-separated list, e.g. "http://localhost:3000,https://example.com"
cors_origins: list[str] = ["*"] cors_origins: list[str] = [
"http://localhost:3000",
"http://localhost:8000",
"http://127.0.0.1:3000",
"http://127.0.0.1:8000",
]
# Trusted hosts for the Host header check (TrustedHostMiddleware). # Trusted hosts for the Host header check (TrustedHostMiddleware).
# Set TRUSTED_HOSTS as a comma-separated list. Wildcards supported (e.g. "*.ts.net"). # Set TRUSTED_HOSTS as a comma-separated list. Wildcards supported (e.g. "*.ts.net").
@@ -471,8 +469,19 @@ def validate_startup(*, force: bool = False) -> None:
", ".join(_missing), ", ".join(_missing),
) )
sys.exit(1) sys.exit(1)
if "*" in settings.cors_origins:
_startup_logger.error(
"PRODUCTION SECURITY ERROR: CORS wildcard '*' is not allowed "
"in production. Set CORS_ORIGINS to explicit origins."
)
sys.exit(1)
_startup_logger.info("Production mode: security secrets validated ✓") _startup_logger.info("Production mode: security secrets validated ✓")
else: else:
if "*" in settings.cors_origins:
_startup_logger.warning(
"SEC: CORS_ORIGINS contains wildcard '*'"
"restrict to explicit origins before deploying to production."
)
if not settings.l402_hmac_secret: if not settings.l402_hmac_secret:
_startup_logger.warning( _startup_logger.warning(
"SEC: L402_HMAC_SECRET is not set — " "SEC: L402_HMAC_SECRET is not set — "

View File

@@ -484,15 +484,14 @@ app = FastAPI(
def _get_cors_origins() -> list[str]: def _get_cors_origins() -> list[str]:
"""Get CORS origins from settings, with sensible defaults.""" """Get CORS origins from settings, rejecting wildcards in production."""
origins = settings.cors_origins origins = settings.cors_origins
if settings.debug and origins == ["*"]: if "*" in origins and not settings.debug:
return [ logger.warning(
"http://localhost:3000", "Wildcard '*' in CORS_ORIGINS stripped in production — "
"http://localhost:8000", "set explicit origins via CORS_ORIGINS env var"
"http://127.0.0.1:3000", )
"http://127.0.0.1:8000", origins = [o for o in origins if o != "*"]
]
return origins return origins

View File

@@ -183,6 +183,22 @@ async def run_health_check(
} }
@router.post("/reload")
async def reload_config(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],
) -> dict[str, Any]:
"""Hot-reload providers.yaml without restart.
Preserves circuit breaker state and metrics for existing providers.
"""
try:
result = cascade.reload_config()
return {"status": "ok", **result}
except Exception as exc:
logger.error("Config reload failed: %s", exc)
raise HTTPException(status_code=500, detail=f"Reload failed: {exc}") from exc
@router.get("/config") @router.get("/config")
async def get_config( async def get_config(
cascade: Annotated[CascadeRouter, Depends(get_cascade_router)], cascade: Annotated[CascadeRouter, Depends(get_cascade_router)],

View File

@@ -100,7 +100,7 @@ class Provider:
"""LLM provider configuration and state.""" """LLM provider configuration and state."""
name: str name: str
type: str # ollama, openai, anthropic, airllm type: str # ollama, openai, anthropic
enabled: bool enabled: bool
priority: int priority: int
url: str | None = None url: str | None = None
@@ -308,15 +308,6 @@ class CascadeRouter:
logger.debug("Ollama provider check error: %s", exc) logger.debug("Ollama provider check error: %s", exc)
return False return False
elif provider.type == "airllm":
# Check if airllm is installed
try:
import importlib.util
return importlib.util.find_spec("airllm") is not None
except (ImportError, ModuleNotFoundError):
return False
elif provider.type in ("openai", "anthropic", "grok"): elif provider.type in ("openai", "anthropic", "grok"):
# Check if API key is set # Check if API key is set
return provider.api_key is not None and provider.api_key != "" return provider.api_key is not None and provider.api_key != ""
@@ -815,6 +806,66 @@ class CascadeRouter:
provider.status = ProviderStatus.HEALTHY provider.status = ProviderStatus.HEALTHY
logger.info("Circuit breaker CLOSED for %s", provider.name) logger.info("Circuit breaker CLOSED for %s", provider.name)
def reload_config(self) -> dict:
"""Hot-reload providers.yaml, preserving runtime state.
Re-reads the config file, rebuilds the provider list, and
preserves circuit breaker state and metrics for providers
that still exist after reload.
Returns:
Summary dict with added/removed/preserved counts.
"""
# Snapshot current runtime state keyed by provider name
old_state: dict[
str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]
] = {}
for p in self.providers:
old_state[p.name] = (
p.metrics,
p.circuit_state,
p.circuit_opened_at,
p.half_open_calls,
p.status,
)
old_names = set(old_state.keys())
# Reload from disk
self.providers = []
self._load_config()
# Restore preserved state
new_names = {p.name for p in self.providers}
preserved = 0
for p in self.providers:
if p.name in old_state:
metrics, circuit, opened_at, half_open, status = old_state[p.name]
p.metrics = metrics
p.circuit_state = circuit
p.circuit_opened_at = opened_at
p.half_open_calls = half_open
p.status = status
preserved += 1
added = new_names - old_names
removed = old_names - new_names
logger.info(
"Config reloaded: %d providers (%d preserved, %d added, %d removed)",
len(self.providers),
preserved,
len(added),
len(removed),
)
return {
"total_providers": len(self.providers),
"preserved": preserved,
"added": sorted(added),
"removed": sorted(removed),
}
def get_metrics(self) -> dict: def get_metrics(self) -> dict:
"""Get metrics for all providers.""" """Get metrics for all providers."""
return { return {

View File

@@ -220,7 +220,7 @@ def create_timmy(
print_response(message, stream). print_response(message, stream).
""" """
resolved = _resolve_backend(backend) resolved = _resolve_backend(backend)
size = model_size or settings.airllm_model_size size = model_size or "70b"
if resolved == "claude": if resolved == "claude":
from timmy.backends import ClaudeBackend from timmy.backends import ClaudeBackend
@@ -300,7 +300,11 @@ def create_timmy(
max_context = 2000 if not use_tools else 8000 max_context = 2000 if not use_tools else 8000
if len(memory_context) > max_context: if len(memory_context) > max_context:
memory_context = memory_context[:max_context] + "\n... [truncated]" memory_context = memory_context[:max_context] + "\n... [truncated]"
full_prompt = f"{base_prompt}\n\n## Memory Context\n\n{memory_context}" full_prompt = (
f"{base_prompt}\n\n"
f"## GROUNDED CONTEXT (verified sources — cite when using)\n\n"
f"{memory_context}"
)
else: else:
full_prompt = base_prompt full_prompt = base_prompt
except Exception as exc: except Exception as exc:

View File

@@ -23,6 +23,9 @@ Rules:
- Remember what the user tells you during the conversation. - Remember what the user tells you during the conversation.
- If you don't know something, say so honestly — never fabricate facts. - If you don't know something, say so honestly — never fabricate facts.
- If a request is ambiguous, ask a brief clarifying question before guessing. - If a request is ambiguous, ask a brief clarifying question before guessing.
- SOURCE DISTINCTION: When answering from memory or retrieved context, cite it.
When answering from your own training, use hedging: "I think", "I believe".
The user must be able to tell grounded claims from pattern-matching.
- Use the user's name if you know it. - Use the user's name if you know it.
- When you state a fact, commit to it. - When you state a fact, commit to it.
- NEVER attempt arithmetic in your head. If asked to compute anything, respond: - NEVER attempt arithmetic in your head. If asked to compute anything, respond:
@@ -78,6 +81,18 @@ HONESTY:
- Never fabricate tool output. Call the tool and wait. - Never fabricate tool output. Call the tool and wait.
- If a tool errors, report the exact error. - If a tool errors, report the exact error.
SOURCE DISTINCTION (SOUL requirement — non-negotiable):
- Every claim you make comes from one of two places: a verified source you
can point to, or your own pattern-matching. The user must be able to tell
which is which.
- When your response uses information from GROUNDED CONTEXT (memory, retrieved
documents, tool output), cite it: "From memory:", "According to [source]:".
- When you are generating from your training data alone, signal it naturally:
"I think", "My understanding is", "I believe" — never false certainty.
- If the user asks a factual question and you have no grounded source, say so:
"I don't have a verified source for this — from my training I think..."
- Prefer "I don't know" over a confident-sounding guess. Refusal over fabrication.
MEMORY (three tiers): MEMORY (three tiers):
- Tier 1: MEMORY.md (hot, always loaded) - Tier 1: MEMORY.md (hot, always loaded)
- Tier 2: memory/ vault (structured, append-only, date-stamped) - Tier 2: memory/ vault (structured, append-only, date-stamped)

View File

@@ -75,6 +75,8 @@ def create_timmy_serve_app() -> FastAPI:
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):
logger.info("Timmy Serve starting") logger.info("Timmy Serve starting")
app.state.timmy = create_timmy()
logger.info("Timmy agent cached in app state")
yield yield
logger.info("Timmy Serve shutting down") logger.info("Timmy Serve shutting down")
@@ -101,7 +103,7 @@ def create_timmy_serve_app() -> FastAPI:
async def serve_chat(request: Request, body: ChatRequest): async def serve_chat(request: Request, body: ChatRequest):
"""Process a chat request.""" """Process a chat request."""
try: try:
timmy = create_timmy() timmy = request.app.state.timmy
result = timmy.run(body.message, stream=False) result = timmy.run(body.message, stream=False)
response_text = result.content if hasattr(result, "content") else str(result) response_text = result.content if hasattr(result, "content") else str(result)

View File

@@ -2,7 +2,7 @@
import time import time
from pathlib import Path from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch from unittest.mock import AsyncMock, patch
import pytest import pytest
import yaml import yaml
@@ -489,30 +489,182 @@ class TestProviderAvailabilityCheck:
assert router._check_provider_available(provider) is False assert router._check_provider_available(provider) is False
def test_check_airllm_installed(self):
"""Test AirLLM when installed."""
router = CascadeRouter(config_path=Path("/nonexistent"))
provider = Provider( class TestCascadeRouterReload:
name="airllm", """Test hot-reload of providers.yaml."""
type="airllm",
enabled=True, def test_reload_preserves_metrics(self, tmp_path):
priority=1, """Test that reload preserves metrics for existing providers."""
config = {
"providers": [
{
"name": "test-openai",
"type": "openai",
"enabled": True,
"priority": 1,
"api_key": "sk-test",
}
],
}
config_path = tmp_path / "providers.yaml"
config_path.write_text(yaml.dump(config))
router = CascadeRouter(config_path=config_path)
assert len(router.providers) == 1
# Simulate some traffic
router._record_success(router.providers[0], 150.0)
router._record_success(router.providers[0], 250.0)
assert router.providers[0].metrics.total_requests == 2
# Reload
result = router.reload_config()
assert result["total_providers"] == 1
assert result["preserved"] == 1
assert result["added"] == []
assert result["removed"] == []
# Metrics survived
assert router.providers[0].metrics.total_requests == 2
assert router.providers[0].metrics.total_latency_ms == 400.0
def test_reload_preserves_circuit_breaker(self, tmp_path):
"""Test that reload preserves circuit breaker state."""
config = {
"cascade": {"circuit_breaker": {"failure_threshold": 2}},
"providers": [
{
"name": "test-openai",
"type": "openai",
"enabled": True,
"priority": 1,
"api_key": "sk-test",
}
],
}
config_path = tmp_path / "providers.yaml"
config_path.write_text(yaml.dump(config))
router = CascadeRouter(config_path=config_path)
# Open circuit breaker
for _ in range(2):
router._record_failure(router.providers[0])
assert router.providers[0].circuit_state == CircuitState.OPEN
# Reload
router.reload_config()
# Circuit breaker state preserved
assert router.providers[0].circuit_state == CircuitState.OPEN
assert router.providers[0].status == ProviderStatus.UNHEALTHY
def test_reload_detects_added_provider(self, tmp_path):
"""Test that reload detects newly added providers."""
config = {
"providers": [
{
"name": "openai-1",
"type": "openai",
"enabled": True,
"priority": 1,
"api_key": "sk-test",
}
],
}
config_path = tmp_path / "providers.yaml"
config_path.write_text(yaml.dump(config))
router = CascadeRouter(config_path=config_path)
assert len(router.providers) == 1
# Add a second provider to config
config["providers"].append(
{
"name": "anthropic-1",
"type": "anthropic",
"enabled": True,
"priority": 2,
"api_key": "sk-ant-test",
}
) )
config_path.write_text(yaml.dump(config))
with patch("importlib.util.find_spec", return_value=MagicMock()): result = router.reload_config()
assert router._check_provider_available(provider) is True
def test_check_airllm_not_installed(self): assert result["total_providers"] == 2
"""Test AirLLM when not installed.""" assert result["preserved"] == 1
router = CascadeRouter(config_path=Path("/nonexistent")) assert result["added"] == ["anthropic-1"]
assert result["removed"] == []
provider = Provider( def test_reload_detects_removed_provider(self, tmp_path):
name="airllm", """Test that reload detects removed providers."""
type="airllm", config = {
enabled=True, "providers": [
priority=1, {
) "name": "openai-1",
"type": "openai",
"enabled": True,
"priority": 1,
"api_key": "sk-test",
},
{
"name": "anthropic-1",
"type": "anthropic",
"enabled": True,
"priority": 2,
"api_key": "sk-ant-test",
},
],
}
config_path = tmp_path / "providers.yaml"
config_path.write_text(yaml.dump(config))
with patch("importlib.util.find_spec", return_value=None): router = CascadeRouter(config_path=config_path)
assert router._check_provider_available(provider) is False assert len(router.providers) == 2
# Remove anthropic
config["providers"] = [config["providers"][0]]
config_path.write_text(yaml.dump(config))
result = router.reload_config()
assert result["total_providers"] == 1
assert result["preserved"] == 1
assert result["removed"] == ["anthropic-1"]
def test_reload_re_sorts_by_priority(self, tmp_path):
"""Test that providers are re-sorted by priority after reload."""
config = {
"providers": [
{
"name": "low-priority",
"type": "openai",
"enabled": True,
"priority": 10,
"api_key": "sk-test",
},
{
"name": "high-priority",
"type": "openai",
"enabled": True,
"priority": 1,
"api_key": "sk-test2",
},
],
}
config_path = tmp_path / "providers.yaml"
config_path.write_text(yaml.dump(config))
router = CascadeRouter(config_path=config_path)
assert router.providers[0].name == "high-priority"
# Swap priorities
config["providers"][0]["priority"] = 1
config["providers"][1]["priority"] = 10
config_path.write_text(yaml.dump(config))
router.reload_config()
assert router.providers[0].name == "low-priority"
assert router.providers[1].name == "high-priority"

View File

@@ -49,6 +49,34 @@ class TestConfigLazyValidation:
# Should not raise # Should not raise
validate_startup(force=True) validate_startup(force=True)
def test_validate_startup_exits_on_cors_wildcard_in_production(self):
"""validate_startup() should exit in production when CORS has wildcard."""
from config import settings, validate_startup
with (
patch.object(settings, "timmy_env", "production"),
patch.object(settings, "l402_hmac_secret", "test-secret-hex-value-32"),
patch.object(settings, "l402_macaroon_secret", "test-macaroon-hex-value-32"),
patch.object(settings, "cors_origins", ["*"]),
pytest.raises(SystemExit),
):
validate_startup(force=True)
def test_validate_startup_warns_cors_wildcard_in_dev(self):
"""validate_startup() should warn in dev when CORS has wildcard."""
from config import settings, validate_startup
with (
patch.object(settings, "timmy_env", "development"),
patch.object(settings, "cors_origins", ["*"]),
patch("config._startup_logger") as mock_logger,
):
validate_startup(force=True)
mock_logger.warning.assert_any_call(
"SEC: CORS_ORIGINS contains wildcard '*'"
"restrict to explicit origins before deploying to production."
)
def test_validate_startup_skips_in_test_mode(self): def test_validate_startup_skips_in_test_mode(self):
"""validate_startup() should be a no-op in test mode.""" """validate_startup() should be a no-op in test mode."""
from config import validate_startup from config import validate_startup

View File

@@ -8,11 +8,14 @@ from fastapi.testclient import TestClient
@pytest.fixture @pytest.fixture
def serve_client(): def serve_client():
"""Create a TestClient for the timmy-serve app.""" """Create a TestClient for the timmy-serve app with mocked Timmy agent."""
from timmy_serve.app import create_timmy_serve_app with patch("timmy_serve.app.create_timmy") as mock_create:
mock_create.return_value = MagicMock()
from timmy_serve.app import create_timmy_serve_app
app = create_timmy_serve_app() app = create_timmy_serve_app()
return TestClient(app) with TestClient(app) as client:
yield client
class TestHealthEndpoint: class TestHealthEndpoint:
@@ -34,18 +37,40 @@ class TestServeStatus:
class TestServeChatEndpoint: class TestServeChatEndpoint:
@patch("timmy_serve.app.create_timmy") @patch("timmy_serve.app.create_timmy")
def test_chat_returns_response(self, mock_create, serve_client): def test_chat_returns_response(self, mock_create):
mock_agent = MagicMock() mock_agent = MagicMock()
mock_result = MagicMock() mock_result = MagicMock()
mock_result.content = "I am Timmy." mock_result.content = "I am Timmy."
mock_agent.run.return_value = mock_result mock_agent.run.return_value = mock_result
mock_create.return_value = mock_agent mock_create.return_value = mock_agent
resp = serve_client.post( from timmy_serve.app import create_timmy_serve_app
"/serve/chat",
json={"message": "Who are you?"}, app = create_timmy_serve_app()
) with TestClient(app) as client:
resp = client.post(
"/serve/chat",
json={"message": "Who are you?"},
)
assert resp.status_code == 200 assert resp.status_code == 200
data = resp.json() data = resp.json()
assert data["response"] == "I am Timmy." assert data["response"] == "I am Timmy."
mock_agent.run.assert_called_once_with("Who are you?", stream=False) mock_agent.run.assert_called_once_with("Who are you?", stream=False)
@patch("timmy_serve.app.create_timmy")
def test_agent_cached_at_startup(self, mock_create):
"""Verify create_timmy is called once at startup, not per request."""
mock_agent = MagicMock()
mock_result = MagicMock()
mock_result.content = "reply"
mock_agent.run.return_value = mock_result
mock_create.return_value = mock_agent
from timmy_serve.app import create_timmy_serve_app
app = create_timmy_serve_app()
with TestClient(app) as client:
# Two requests — create_timmy should only be called once (at startup)
client.post("/serve/chat", json={"message": "hello"})
client.post("/serve/chat", json={"message": "world"})
mock_create.assert_called_once()