Merge branch 'main' into kimi/issue-490

fix: use settings.ollama_url instead of hardcoded fallback in cascade router (#491 )
Co-authored-by: Kimi Agent <kimi@timmy.local> Co-committed-by: Kimi Agent <kimi@timmy.local>
2026-03-19 16:06:57 -04:00 · 2026-03-19 16:02:20 -04:00 · 2026-03-19 15:57:22 -04:00 · 2026-03-19 15:52:29 -04:00 · 2026-03-19 15:44:10 -04:00 · 2026-03-19 15:29:26 -04:00
9 changed files with 89 additions and 76 deletions
--- a/config/providers.yaml
+++ b/config/providers.yaml
@@ -54,19 +54,6 @@ providers:
        context_window: 2048
        capabilities: [text, vision, streaming]
  # Secondary: Local AirLLM (if installed)
  - name: airllm-local
    type: airllm
    enabled: false  # Enable if pip install airllm
    priority: 2
    models:
      - name: 70b
        default: true
        capabilities: [text, tools, json, streaming]
      - name: 8b
        capabilities: [text, tools, json, streaming]
      - name: 405b
        capabilities: [text, tools, json, streaming]
  # Tertiary: OpenAI (if API key available)
  - name: openai-backup
--- a/src/config.py
+++ b/src/config.py
@@ -64,17 +64,10 @@ class Settings(BaseSettings):
    # Seconds to wait for user confirmation before auto-rejecting.
    discord_confirm_timeout: int = 120
-    # ── AirLLM / backend selection ───────────────────────────────────────────
+    # ── Backend selection ────────────────────────────────────────────────────
    # "ollama"  — always use Ollama (default, safe everywhere)
-    # "airllm"  — always use AirLLM (requires pip install ".[bigbrain]")
+    # "auto"    — pick best available local backend, fall back to Ollama
-    # "auto"    — use AirLLM on Apple Silicon if airllm is installed,
+    timmy_model_backend: Literal["ollama", "grok", "claude", "auto"] = "ollama"
    #             fall back to Ollama otherwise
    timmy_model_backend: Literal["ollama", "airllm", "grok", "claude", "auto"] = "ollama"
    # AirLLM model size when backend is airllm or auto.
    # Larger = smarter, but needs more RAM / disk.
    # 8b  ~16 GB  |  70b  ~140 GB  |  405b  ~810 GB
    airllm_model_size: Literal["8b", "70b", "405b"] = "70b"
    # ── Grok (xAI) — opt-in premium cloud backend ────────────────────────
    # Grok is a premium augmentation layer — local-first ethos preserved.
@@ -476,8 +469,19 @@ def validate_startup(*, force: bool = False) -> None:
                ", ".join(_missing),
            )
            sys.exit(1)
        if "*" in settings.cors_origins:
            _startup_logger.error(
                "PRODUCTION SECURITY ERROR: CORS wildcard '*' is not allowed "
                "in production. Set CORS_ORIGINS to explicit origins."
            )
            sys.exit(1)
        _startup_logger.info("Production mode: security secrets validated ✓")
    else:
        if "*" in settings.cors_origins:
            _startup_logger.warning(
                "SEC: CORS_ORIGINS contains wildcard '*' — "
                "restrict to explicit origins before deploying to production."
            )
        if not settings.l402_hmac_secret:
            _startup_logger.warning(
                "SEC: L402_HMAC_SECRET is not set — "
--- a/src/dashboard/middleware/csrf.py
+++ b/src/dashboard/middleware/csrf.py
@@ -100,7 +100,7 @@ class CSRFMiddleware(BaseHTTPMiddleware):
            ...
    Usage:
-        app.add_middleware(CSRFMiddleware, secret="your-secret-key")
+        app.add_middleware(CSRFMiddleware, secret=settings.csrf_secret)
    Attributes:
        secret: Secret key for token signing (optional, for future use).
--- a/src/infrastructure/router/cascade.py
+++ b/src/infrastructure/router/cascade.py
@@ -18,6 +18,8 @@ from enum import Enum
 from pathlib import Path
 from typing import Any
 from config import settings
 try:
    import yaml
 except ImportError:
@@ -100,7 +102,7 @@ class Provider:
    """LLM provider configuration and state."""
    name: str
-    type: str  # ollama, openai, anthropic, airllm
+    type: str  # ollama, openai, anthropic
    enabled: bool
    priority: int
    url: str | None = None
@@ -301,22 +303,13 @@ class CascadeRouter:
                # Can't check without requests, assume available
                return True
            try:
-                url = provider.url or "http://localhost:11434"
+                url = provider.url or settings.ollama_url
                response = requests.get(f"{url}/api/tags", timeout=5)
                return response.status_code == 200
            except Exception as exc:
                logger.debug("Ollama provider check error: %s", exc)
                return False
        elif provider.type == "airllm":
            # Check if airllm is installed
            try:
                import importlib.util
                return importlib.util.find_spec("airllm") is not None
            except (ImportError, ModuleNotFoundError):
                return False
        elif provider.type in ("openai", "anthropic", "grok"):
            # Check if API key is set
            return provider.api_key is not None and provider.api_key != ""
@@ -826,7 +819,9 @@ class CascadeRouter:
            Summary dict with added/removed/preserved counts.
        """
        # Snapshot current runtime state keyed by provider name
-        old_state: dict[str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]] = {}
+        old_state: dict[
            str, tuple[ProviderMetrics, CircuitState, float | None, int, ProviderStatus]
        ] = {}
        for p in self.providers:
            old_state[p.name] = (
                p.metrics,
--- a/src/timmy/agent.py
+++ b/src/timmy/agent.py
@@ -220,7 +220,7 @@ def create_timmy(
    print_response(message, stream).
    """
    resolved = _resolve_backend(backend)
-    size = model_size or settings.airllm_model_size
+    size = model_size or "70b"
    if resolved == "claude":
        from timmy.backends import ClaudeBackend
--- a/src/timmy_serve/app.py
+++ b/src/timmy_serve/app.py
@@ -75,6 +75,8 @@ def create_timmy_serve_app() -> FastAPI:
    @asynccontextmanager
    async def lifespan(app: FastAPI):
        logger.info("Timmy Serve starting")
        app.state.timmy = create_timmy()
        logger.info("Timmy agent cached in app state")
        yield
        logger.info("Timmy Serve shutting down")
@@ -101,7 +103,7 @@ def create_timmy_serve_app() -> FastAPI:
    async def serve_chat(request: Request, body: ChatRequest):
        """Process a chat request."""
        try:
-            timmy = create_timmy()
+            timmy = request.app.state.timmy
            result = timmy.run(body.message, stream=False)
            response_text = result.content if hasattr(result, "content") else str(result)
--- a/tests/infrastructure/test_router_cascade.py
+++ b/tests/infrastructure/test_router_cascade.py
@@ -2,7 +2,7 @@
 import time
 from pathlib import Path
-from unittest.mock import AsyncMock, MagicMock, patch
+from unittest.mock import AsyncMock, patch
 import pytest
 import yaml
@@ -489,34 +489,6 @@ class TestProviderAvailabilityCheck:
        assert router._check_provider_available(provider) is False
    def test_check_airllm_installed(self):
        """Test AirLLM when installed."""
        router = CascadeRouter(config_path=Path("/nonexistent"))
        provider = Provider(
            name="airllm",
            type="airllm",
            enabled=True,
            priority=1,
        )
        with patch("importlib.util.find_spec", return_value=MagicMock()):
            assert router._check_provider_available(provider) is True
    def test_check_airllm_not_installed(self):
        """Test AirLLM when not installed."""
        router = CascadeRouter(config_path=Path("/nonexistent"))
        provider = Provider(
            name="airllm",
            type="airllm",
            enabled=True,
            priority=1,
        )
        with patch("importlib.util.find_spec", return_value=None):
            assert router._check_provider_available(provider) is False
 class TestCascadeRouterReload:
    """Test hot-reload of providers.yaml."""
--- a/tests/test_lazy_init.py
+++ b/tests/test_lazy_init.py
@@ -49,6 +49,34 @@ class TestConfigLazyValidation:
            # Should not raise
            validate_startup(force=True)
    def test_validate_startup_exits_on_cors_wildcard_in_production(self):
        """validate_startup() should exit in production when CORS has wildcard."""
        from config import settings, validate_startup
        with (
            patch.object(settings, "timmy_env", "production"),
            patch.object(settings, "l402_hmac_secret", "test-secret-hex-value-32"),
            patch.object(settings, "l402_macaroon_secret", "test-macaroon-hex-value-32"),
            patch.object(settings, "cors_origins", ["*"]),
            pytest.raises(SystemExit),
        ):
            validate_startup(force=True)
    def test_validate_startup_warns_cors_wildcard_in_dev(self):
        """validate_startup() should warn in dev when CORS has wildcard."""
        from config import settings, validate_startup
        with (
            patch.object(settings, "timmy_env", "development"),
            patch.object(settings, "cors_origins", ["*"]),
            patch("config._startup_logger") as mock_logger,
        ):
            validate_startup(force=True)
            mock_logger.warning.assert_any_call(
                "SEC: CORS_ORIGINS contains wildcard '*' — "
                "restrict to explicit origins before deploying to production."
            )
    def test_validate_startup_skips_in_test_mode(self):
        """validate_startup() should be a no-op in test mode."""
        from config import validate_startup
--- a/tests/timmy/test_timmy_serve_app.py
+++ b/tests/timmy/test_timmy_serve_app.py
@@ -8,11 +8,14 @@ from fastapi.testclient import TestClient
@pytest.fixture
 def serve_client():
-    """Create a TestClient for the timmy-serve app."""
+    """Create a TestClient for the timmy-serve app with mocked Timmy agent."""
-    from timmy_serve.app import create_timmy_serve_app
+    with patch("timmy_serve.app.create_timmy") as mock_create:
        mock_create.return_value = MagicMock()
        from timmy_serve.app import create_timmy_serve_app
-    app = create_timmy_serve_app()
+        app = create_timmy_serve_app()
-    return TestClient(app)
+        with TestClient(app) as client:
            yield client
 class TestHealthEndpoint:
@@ -34,18 +37,40 @@ class TestServeStatus:
 class TestServeChatEndpoint:
    @patch("timmy_serve.app.create_timmy")
-    def test_chat_returns_response(self, mock_create, serve_client):
+    def test_chat_returns_response(self, mock_create):
        mock_agent = MagicMock()
        mock_result = MagicMock()
        mock_result.content = "I am Timmy."
        mock_agent.run.return_value = mock_result
        mock_create.return_value = mock_agent
-        resp = serve_client.post(
+        from timmy_serve.app import create_timmy_serve_app
-            "/serve/chat",
+
-            json={"message": "Who are you?"},
+        app = create_timmy_serve_app()
-        )
+        with TestClient(app) as client:
            resp = client.post(
                "/serve/chat",
                json={"message": "Who are you?"},
            )
        assert resp.status_code == 200
        data = resp.json()
        assert data["response"] == "I am Timmy."
        mock_agent.run.assert_called_once_with("Who are you?", stream=False)
    @patch("timmy_serve.app.create_timmy")
    def test_agent_cached_at_startup(self, mock_create):
        """Verify create_timmy is called once at startup, not per request."""
        mock_agent = MagicMock()
        mock_result = MagicMock()
        mock_result.content = "reply"
        mock_agent.run.return_value = mock_result
        mock_create.return_value = mock_agent
        from timmy_serve.app import create_timmy_serve_app
        app = create_timmy_serve_app()
        with TestClient(app) as client:
            # Two requests — create_timmy should only be called once (at startup)
            client.post("/serve/chat", json={"message": "hello"})
            client.post("/serve/chat", json={"message": "world"})
        mock_create.assert_called_once()
Author	SHA1	Message	Date
Timmy Time	4d8272f64e	Merge branch 'main' into kimi/issue-490	2026-03-19 16:06:57 -04:00
Kimi Agent	d70e4f810a	fix: use settings.ollama_url instead of hardcoded fallback in cascade router (#491 ) Co-authored-by: Kimi Agent <kimi@timmy.local> Co-committed-by: Kimi Agent <kimi@timmy.local>	2026-03-19 16:02:20 -04:00
kimi	0b81a01c90	fix: use settings.ollama_url instead of hardcoded fallback in cascade router Fixes #490	2026-03-19 15:57:22 -04:00
Kimi Agent	7f20742fcf	fix: replace hardcoded secret placeholder in CSRF middleware docstring (#488 ) Co-authored-by: Kimi Agent <kimi@timmy.local> Co-committed-by: Kimi Agent <kimi@timmy.local>	2026-03-19 15:52:29 -04:00
Timmy Time	15eb7c3b45	[loop-cycle-538] refactor: remove dead airllm provider from cascade router (#459 ) (#481 )	2026-03-19 15:44:10 -04:00
Timmy Time	dbc2fd5b0f	[loop-cycle-536] fix: validate_startup checks CORS wildcard in production (#472 ) (#478 )	2026-03-19 15:29:26 -04:00
Timmy Time	3c3aca57f1	[loop-cycle-535] perf: cache Timmy agent at startup (#471 ) (#476 ) ## What Cache the Timmy agent instance at app startup (in lifespan) instead of creating a new one per `/serve/chat` request. ## Changes - `src/timmy_serve/app.py`: Create agent in lifespan, store in `app.state.timmy` - `tests/timmy/test_timmy_serve_app.py`: Updated tests for lifespan-based caching, added `test_agent_cached_at_startup` 2085 unit tests pass. 2102 pre-push tests pass. 78.5% coverage. Closes #471 Co-authored-by: Timmy <timmy@timmytime.ai> Reviewed-on: http://localhost:3000/rockachopa/Timmy-time-dashboard/pulls/476 Co-authored-by: Timmy Time <timmy@Alexanderwhitestone.ai> Co-committed-by: Timmy Time <timmy@Alexanderwhitestone.ai>	2026-03-19 15:28:57 -04:00
Kimi Agent	0ae00af3f8	fix: remove AirLLM config settings from config.py (#475 ) Co-authored-by: Kimi Agent <kimi@timmy.local> Co-committed-by: Kimi Agent <kimi@timmy.local>	2026-03-19 15:24:43 -04:00