feat: add in-browser local model support for iPhone via WebLLM

Enable Timmy to run directly on iPhone by loading a small LLM into the browser via WebGPU (Safari 26+ / iOS 26+). No server connection required — fully sovereign, fully offline. New files: - static/local_llm.js: WebLLM wrapper with model catalogue, WebGPU detection, streaming chat, and progress callbacks - templates/mobile_local.html: Mobile-optimized UI with model selector, download progress, LOCAL/SERVER badge, and chat - tests/dashboard/test_local_models.py: 31 tests covering routes, config, template UX, JS asset, and XSS prevention Changes: - config.py: browser_model_enabled, browser_model_id, browser_model_fallback settings - routes/mobile.py: /mobile/local page, /mobile/local-models API - base.html: LOCAL AI nav link Supported models: SmolLM2-360M (~200MB), Qwen2.5-0.5B (~350MB), SmolLM2-1.7B (~1GB), Llama-3.2-1B (~700MB). Falls back to server-side Ollama when local model is unavailable. https://claude.ai/code/session_01Cqkvr4sZbED7T3iDu1rwSD
2026-02-27 00:03:05 +00:00
parent 528c86298a
commit 3b7fcc5ebc
6 changed files with 1117 additions and 1 deletions
--- a/src/dashboard/routes/mobile.py
+++ b/src/dashboard/routes/mobile.py
@@ -3,6 +3,9 @@
 Provides a simplified, mobile-first view of the dashboard that
 prioritizes the chat interface and essential status information.
 Designed for quick access from a phone's home screen.
+
+The /mobile/local endpoint loads a small LLM directly into the
+browser via WebLLM so Timmy can run on an iPhone with no server.
 """

 from pathlib import Path
@@ -11,6 +14,8 @@ from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse
 from fastapi.templating import Jinja2Templates

+from config import settings
+
 router = APIRouter(tags=["mobile"])
 templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))

@@ -26,11 +31,44 @@ async def mobile_dashboard(request: Request):
    return templates.TemplateResponse(request, "index.html")


+@router.get("/mobile/local", response_class=HTMLResponse)
+async def mobile_local_dashboard(request: Request):
+    """Mobile dashboard with in-browser local model inference.
+
+    Loads a small LLM (via WebLLM / WebGPU) directly into Safari
+    so Timmy works on an iPhone without any server connection.
+    Falls back to server-side Ollama when the local model is
+    unavailable or the user prefers it.
+    """
+    return templates.TemplateResponse(
+        request,
+        "mobile_local.html",
+        {
+            "browser_model_enabled": settings.browser_model_enabled,
+            "browser_model_id": settings.browser_model_id,
+            "browser_model_fallback": settings.browser_model_fallback,
+            "server_model": settings.ollama_model,
+            "page_title": "Timmy — Local AI",
+        },
+    )
+
+
+@router.get("/mobile/local-models")
+async def local_models_config():
+    """Return browser model configuration for the JS client."""
+    return {
+        "enabled": settings.browser_model_enabled,
+        "default_model": settings.browser_model_id,
+        "fallback_to_server": settings.browser_model_fallback,
+        "server_model": settings.ollama_model,
+        "server_url": settings.ollama_url,
+    }
+
+
@router.get("/mobile/status")
 async def mobile_status():
    """Lightweight status endpoint optimized for mobile polling."""
    from dashboard.routes.health import check_ollama
-    from config import settings

    ollama_ok = await check_ollama()
    return {
@@ -38,4 +76,6 @@ async def mobile_status():
        "model": settings.ollama_model,
        "agent": "timmy",
        "ready": True,
+        "browser_model_enabled": settings.browser_model_enabled,
+        "browser_model_id": settings.browser_model_id,
    }