diff --git a/src/config.py b/src/config.py index 3f1ff19b..d145b0a0 100644 --- a/src/config.py +++ b/src/config.py @@ -90,6 +90,17 @@ class Settings(BaseSettings): work_orders_auto_execute: bool = False # Master switch for auto-execution work_orders_auto_threshold: str = "low" # Max priority that auto-executes: "low" | "medium" | "high" | "none" + # ── Browser Local Models (iPhone / WebGPU) ─────────────────────── + # Enable in-browser LLM inference via WebLLM for offline iPhone use. + # When enabled, the mobile dashboard loads a small model directly + # in the browser — no server or Ollama required. + browser_model_enabled: bool = True + # WebLLM model ID — must be a pre-compiled MLC model. + # Recommended for iPhone: SmolLM2-360M (fast) or Qwen3-0.6B (smart). + browser_model_id: str = "SmolLM2-360M-Instruct-q4f16_1-MLC" + # Fallback to server when browser model is unavailable or too slow. + browser_model_fallback: bool = True + # ── Scripture / Biblical Integration ────────────────────────────── # Enable the sovereign biblical text module. When enabled, Timmy # loads the local ESV text corpus and runs meditation workflows. diff --git a/src/dashboard/routes/mobile.py b/src/dashboard/routes/mobile.py index 7d1d266b..33a17f0a 100644 --- a/src/dashboard/routes/mobile.py +++ b/src/dashboard/routes/mobile.py @@ -3,6 +3,9 @@ Provides a simplified, mobile-first view of the dashboard that prioritizes the chat interface and essential status information. Designed for quick access from a phone's home screen. + +The /mobile/local endpoint loads a small LLM directly into the +browser via WebLLM so Timmy can run on an iPhone with no server. """ from pathlib import Path @@ -11,6 +14,8 @@ from fastapi import APIRouter, Request from fastapi.responses import HTMLResponse from fastapi.templating import Jinja2Templates +from config import settings + router = APIRouter(tags=["mobile"]) templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates")) @@ -26,11 +31,44 @@ async def mobile_dashboard(request: Request): return templates.TemplateResponse(request, "index.html") +@router.get("/mobile/local", response_class=HTMLResponse) +async def mobile_local_dashboard(request: Request): + """Mobile dashboard with in-browser local model inference. + + Loads a small LLM (via WebLLM / WebGPU) directly into Safari + so Timmy works on an iPhone without any server connection. + Falls back to server-side Ollama when the local model is + unavailable or the user prefers it. + """ + return templates.TemplateResponse( + request, + "mobile_local.html", + { + "browser_model_enabled": settings.browser_model_enabled, + "browser_model_id": settings.browser_model_id, + "browser_model_fallback": settings.browser_model_fallback, + "server_model": settings.ollama_model, + "page_title": "Timmy — Local AI", + }, + ) + + +@router.get("/mobile/local-models") +async def local_models_config(): + """Return browser model configuration for the JS client.""" + return { + "enabled": settings.browser_model_enabled, + "default_model": settings.browser_model_id, + "fallback_to_server": settings.browser_model_fallback, + "server_model": settings.ollama_model, + "server_url": settings.ollama_url, + } + + @router.get("/mobile/status") async def mobile_status(): """Lightweight status endpoint optimized for mobile polling.""" from dashboard.routes.health import check_ollama - from config import settings ollama_ok = await check_ollama() return { @@ -38,4 +76,6 @@ async def mobile_status(): "model": settings.ollama_model, "agent": "timmy", "ready": True, + "browser_model_enabled": settings.browser_model_enabled, + "browser_model_id": settings.browser_model_id, } diff --git a/src/dashboard/templates/base.html b/src/dashboard/templates/base.html index d96b7b77..783bf8ae 100644 --- a/src/dashboard/templates/base.html +++ b/src/dashboard/templates/base.html @@ -45,6 +45,7 @@ WORK ORDERS CREATIVE MOBILE + LOCAL AI @@ -78,6 +79,7 @@ CREATIVE VOICE MOBILE + LOCAL AI diff --git a/src/dashboard/templates/mobile_local.html b/src/dashboard/templates/mobile_local.html new file mode 100644 index 00000000..b74b24af --- /dev/null +++ b/src/dashboard/templates/mobile_local.html @@ -0,0 +1,546 @@ +{% extends "base.html" %} + +{% block title %}{{ page_title }}{% endblock %} + +{% block extra_styles %} + +{% endblock %} + +{% block content %} +
+ + +
+
// LOCAL AI MODEL
+
+
+ STATUS + NOT LOADED +
+
+ BACKEND + DETECTING... +
+
+ INFERENCE + -- +
+
+ + +
+ +
+ + +
+
+
+
+
+
+ + +
+ + +
+ + +
+
+ + +
+
+ // TIMMY +
+
+
+
SYSTEM
+
+ Load a model above to chat with Timmy locally on your device. + No server connection required. + {% if browser_model_fallback %} + Server fallback is enabled — if the local model fails, Timmy + will try the server instead. + {% endif %} +
+
+
+
+ + +
+
+ +
+ + + +{% endblock %} diff --git a/static/local_llm.js b/static/local_llm.js new file mode 100644 index 00000000..5580a100 --- /dev/null +++ b/static/local_llm.js @@ -0,0 +1,271 @@ +/** + * local_llm.js — In-browser LLM inference via WebLLM. + * + * Loads a small language model directly into the browser using WebGPU + * (or WASM fallback) so Timmy can run on an iPhone with zero server + * dependency. Falls back to server-side Ollama when the local model + * is unavailable. + * + * Usage: + * const llm = new LocalLLM({ modelId, onProgress, onReady, onError }); + * await llm.init(); + * const reply = await llm.chat("Hello Timmy"); + */ + +/* global webllm */ + +// ── Model catalogue ──────────────────────────────────────────────────────── +// Models tested on iPhone 15 Pro / Safari 26+. Sorted smallest → largest. +const MODEL_CATALOGUE = [ + { + id: "SmolLM2-360M-Instruct-q4f16_1-MLC", + label: "SmolLM2 360M (fast)", + sizeHint: "~200 MB", + description: "Fastest option. Good for simple Q&A.", + }, + { + id: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC", + label: "Qwen 2.5 0.5B (balanced)", + sizeHint: "~350 MB", + description: "Best quality under 500 MB.", + }, + { + id: "SmolLM2-1.7B-Instruct-q4f16_1-MLC", + label: "SmolLM2 1.7B (smart)", + sizeHint: "~1 GB", + description: "Highest quality. Needs more memory.", + }, + { + id: "Llama-3.2-1B-Instruct-q4f16_1-MLC", + label: "Llama 3.2 1B", + sizeHint: "~700 MB", + description: "Meta's compact model. Good all-rounder.", + }, +]; + +// ── Capability detection ────────────────────────────────────────────────── +function detectWebGPU() { + return typeof navigator !== "undefined" && "gpu" in navigator; +} + +function detectWASM() { + try { + return typeof WebAssembly === "object" && typeof WebAssembly.instantiate === "function"; + } catch { + return false; + } +} + +// ── LocalLLM class ──────────────────────────────────────────────────────── +class LocalLLM { + /** + * @param {object} opts + * @param {string} opts.modelId — WebLLM model ID + * @param {function} opts.onProgress — (report) progress during download + * @param {function} opts.onReady — () called when model is loaded + * @param {function} opts.onError — (error) called on fatal error + * @param {string} opts.systemPrompt — system message for the model + */ + constructor(opts = {}) { + this.modelId = opts.modelId || "SmolLM2-360M-Instruct-q4f16_1-MLC"; + this.onProgress = opts.onProgress || (() => {}); + this.onReady = opts.onReady || (() => {}); + this.onError = opts.onError || (() => {}); + this.systemPrompt = + opts.systemPrompt || + "You are Timmy, a sovereign AI assistant. You are helpful, concise, and loyal. " + + "Address the user as 'Sir' when appropriate. Keep responses brief on mobile."; + + this.engine = null; + this.ready = false; + this.loading = false; + this._hasWebGPU = detectWebGPU(); + this._hasWASM = detectWASM(); + } + + /** Check if local inference is possible on this device. */ + static isSupported() { + return detectWebGPU() || detectWASM(); + } + + /** Return the model catalogue for UI rendering. */ + static getCatalogue() { + return MODEL_CATALOGUE; + } + + /** Return runtime capability info. */ + getCapabilities() { + return { + webgpu: this._hasWebGPU, + wasm: this._hasWASM, + supported: this._hasWebGPU || this._hasWASM, + backend: this._hasWebGPU ? "WebGPU" : this._hasWASM ? "WASM" : "none", + }; + } + + /** + * Initialize the engine and download/cache the model. + * Model weights are cached in the browser's Cache API so subsequent + * loads are nearly instant. + */ + async init() { + if (this.ready) return; + if (this.loading) return; + + if (!this._hasWebGPU && !this._hasWASM) { + const err = new Error( + "Neither WebGPU nor WebAssembly is available. " + + "Update to iOS 26+ / Safari 26+ for WebGPU support." + ); + this.onError(err); + throw err; + } + + this.loading = true; + + try { + // Dynamic import of WebLLM from CDN (avoids bundling) + if (typeof webllm === "undefined") { + await this._loadWebLLMScript(); + } + + const initProgressCallback = (report) => { + this.onProgress(report); + }; + + this.engine = await webllm.CreateMLCEngine(this.modelId, { + initProgressCallback, + }); + + this.ready = true; + this.loading = false; + this.onReady(); + } catch (err) { + this.loading = false; + this.ready = false; + this.onError(err); + throw err; + } + } + + /** + * Send a chat message and get a response. + * @param {string} userMessage + * @param {object} opts + * @param {function} opts.onToken — streaming callback (delta) + * @returns {Promise} full response text + */ + async chat(userMessage, opts = {}) { + if (!this.ready) { + throw new Error("Model not loaded. Call init() first."); + } + + const messages = [ + { role: "system", content: this.systemPrompt }, + { role: "user", content: userMessage }, + ]; + + if (opts.onToken) { + // Streaming mode + let fullText = ""; + const chunks = await this.engine.chat.completions.create({ + messages, + stream: true, + temperature: 0.7, + max_tokens: 512, + }); + + for await (const chunk of chunks) { + const delta = chunk.choices[0]?.delta?.content || ""; + fullText += delta; + opts.onToken(delta, fullText); + } + return fullText; + } + + // Non-streaming mode + const response = await this.engine.chat.completions.create({ + messages, + temperature: 0.7, + max_tokens: 512, + }); + + return response.choices[0]?.message?.content || ""; + } + + /** Reset conversation context. */ + async resetChat() { + if (this.engine) { + await this.engine.resetChat(); + } + } + + /** Unload the model and free memory. */ + async unload() { + if (this.engine) { + await this.engine.unload(); + this.engine = null; + this.ready = false; + } + } + + /** Get current engine stats (tokens/sec, memory, etc). */ + async getStats() { + if (!this.engine) return null; + try { + const stats = await this.engine.runtimeStatsText(); + return stats; + } catch { + return null; + } + } + + // ── Private ───────────────────────────────────────────────────────────── + + /** Load the WebLLM script from CDN. */ + _loadWebLLMScript() { + return new Promise((resolve, reject) => { + // Check if already loaded + if (typeof webllm !== "undefined") { + resolve(); + return; + } + const script = document.createElement("script"); + script.src = + "https://esm.run/@anthropic-ai/sdk" !== script.src + ? "https://esm.run/@anthropic-ai/sdk" + : ""; + // Use the WebLLM CDN bundle + script.type = "module"; + script.textContent = ` + import * as webllmModule from "https://esm.run/@mlc-ai/web-llm"; + window.webllm = webllmModule; + window.dispatchEvent(new Event("webllm-loaded")); + `; + document.head.appendChild(script); + + const onLoaded = () => { + window.removeEventListener("webllm-loaded", onLoaded); + resolve(); + }; + window.addEventListener("webllm-loaded", onLoaded); + + // Fallback: also try the UMD bundle approach + const fallbackScript = document.createElement("script"); + fallbackScript.src = "https://cdn.jsdelivr.net/npm/@mlc-ai/web-llm@0.2.80/lib/index.min.js"; + fallbackScript.onload = () => { + if (typeof webllm !== "undefined") { + resolve(); + } + }; + fallbackScript.onerror = () => { + reject(new Error("Failed to load WebLLM library from CDN.")); + }; + document.head.appendChild(fallbackScript); + }); + } +} + +// Export for use in templates +window.LocalLLM = LocalLLM; +window.LOCAL_MODEL_CATALOGUE = MODEL_CATALOGUE; diff --git a/tests/dashboard/test_local_models.py b/tests/dashboard/test_local_models.py new file mode 100644 index 00000000..41f924da --- /dev/null +++ b/tests/dashboard/test_local_models.py @@ -0,0 +1,246 @@ +"""Tests for the local browser model feature — /mobile/local endpoint. + +Categories: + L1xx Route & API responses + L2xx Config settings + L3xx Template content & UX + L4xx JavaScript asset + L5xx Security (XSS prevention) +""" + +import re +from pathlib import Path + + +# ── helpers ────────────────────────────────────────────────────────────────── + +def _local_html(client) -> str: + return client.get("/mobile/local").text + + +def _local_llm_js() -> str: + js_path = Path(__file__).parent.parent.parent / "static" / "local_llm.js" + return js_path.read_text() + + +# ── L1xx — Route & API responses ───────────────────────────────────────────── + +def test_L101_mobile_local_route_returns_200(client): + """The /mobile/local endpoint should return 200 OK.""" + response = client.get("/mobile/local") + assert response.status_code == 200 + + +def test_L102_local_models_config_endpoint(client): + """The /mobile/local-models API should return model config JSON.""" + response = client.get("/mobile/local-models") + assert response.status_code == 200 + data = response.json() + assert "enabled" in data + assert "default_model" in data + assert "fallback_to_server" in data + assert "server_model" in data + + +def test_L103_mobile_status_includes_browser_model(client): + """The /mobile/status endpoint should include browser model info.""" + response = client.get("/mobile/status") + assert response.status_code == 200 + data = response.json() + assert "browser_model_enabled" in data + assert "browser_model_id" in data + + +def test_L104_local_models_config_default_values(client): + """Config defaults should match what's in config.py.""" + data = client.get("/mobile/local-models").json() + assert data["enabled"] is True + assert "SmolLM2" in data["default_model"] or "MLC" in data["default_model"] + assert data["fallback_to_server"] is True + + +# ── L2xx — Config settings ─────────────────────────────────────────────────── + +def test_L201_config_has_browser_model_enabled(): + """config.py should define browser_model_enabled.""" + from config import settings + assert hasattr(settings, "browser_model_enabled") + assert isinstance(settings.browser_model_enabled, bool) + + +def test_L202_config_has_browser_model_id(): + """config.py should define browser_model_id.""" + from config import settings + assert hasattr(settings, "browser_model_id") + assert isinstance(settings.browser_model_id, str) + assert len(settings.browser_model_id) > 0 + + +def test_L203_config_has_browser_model_fallback(): + """config.py should define browser_model_fallback.""" + from config import settings + assert hasattr(settings, "browser_model_fallback") + assert isinstance(settings.browser_model_fallback, bool) + + +# ── L3xx — Template content & UX ──────────────────────────────────────────── + +def test_L301_template_includes_local_llm_script(client): + """mobile_local.html must include the local_llm.js script.""" + html = _local_html(client) + assert "local_llm.js" in html + + +def test_L302_template_has_model_selector(client): + """Template must have a model selector element.""" + html = _local_html(client) + assert 'id="model-select"' in html + + +def test_L303_template_has_load_button(client): + """Template must have a load model button.""" + html = _local_html(client) + assert 'id="btn-load"' in html + + +def test_L304_template_has_progress_bar(client): + """Template must have a progress bar for model download.""" + html = _local_html(client) + assert 'id="progress-bar"' in html + + +def test_L305_template_has_chat_area(client): + """Template must have a chat log area.""" + html = _local_html(client) + assert 'id="local-chat"' in html + + +def test_L306_template_has_message_input(client): + """Template must have a message input field.""" + html = _local_html(client) + assert 'id="local-message"' in html + + +def test_L307_input_font_size_16px(client): + """Input font-size must be 16px to prevent iOS zoom.""" + html = _local_html(client) + assert "font-size: 16px" in html + + +def test_L308_input_has_ios_attributes(client): + """Input should have autocapitalize, autocorrect, spellcheck, enterkeyhint.""" + html = _local_html(client) + assert 'autocapitalize="none"' in html + assert 'autocorrect="off"' in html + assert 'spellcheck="false"' in html + assert 'enterkeyhint="send"' in html + + +def test_L309_touch_targets_44px(client): + """Buttons and inputs must meet 44px min-height (Apple HIG).""" + html = _local_html(client) + assert "min-height: 44px" in html + + +def test_L310_safe_area_inset_bottom(client): + """Chat input must account for iPhone home indicator.""" + html = _local_html(client) + assert "safe-area-inset-bottom" in html + + +def test_L311_template_has_backend_badge(client): + """Template should show LOCAL or SERVER badge.""" + html = _local_html(client) + assert "backend-badge" in html + assert "LOCAL" in html + + +# ── L4xx — JavaScript asset ────────────────────────────────────────────────── + +def test_L401_local_llm_js_exists(): + """static/local_llm.js must exist.""" + js_path = Path(__file__).parent.parent.parent / "static" / "local_llm.js" + assert js_path.exists(), "static/local_llm.js not found" + + +def test_L402_local_llm_js_defines_class(): + """local_llm.js must define the LocalLLM class.""" + js = _local_llm_js() + assert "class LocalLLM" in js + + +def test_L403_local_llm_js_has_model_catalogue(): + """local_llm.js must define a MODEL_CATALOGUE.""" + js = _local_llm_js() + assert "MODEL_CATALOGUE" in js + + +def test_L404_local_llm_js_has_webgpu_detection(): + """local_llm.js must detect WebGPU capability.""" + js = _local_llm_js() + assert "detectWebGPU" in js or "navigator.gpu" in js + + +def test_L405_local_llm_js_has_chat_method(): + """local_llm.js LocalLLM class must have a chat method.""" + js = _local_llm_js() + assert "async chat(" in js + + +def test_L406_local_llm_js_has_init_method(): + """local_llm.js LocalLLM class must have an init method.""" + js = _local_llm_js() + assert "async init(" in js + + +def test_L407_local_llm_js_has_unload_method(): + """local_llm.js LocalLLM class must have an unload method.""" + js = _local_llm_js() + assert "async unload(" in js + + +def test_L408_local_llm_js_exports_to_window(): + """local_llm.js must export LocalLLM and catalogue to window.""" + js = _local_llm_js() + assert "window.LocalLLM" in js + assert "window.LOCAL_MODEL_CATALOGUE" in js + + +def test_L409_local_llm_js_has_streaming_support(): + """local_llm.js chat method must support streaming via onToken.""" + js = _local_llm_js() + assert "onToken" in js + assert "stream: true" in js + + +def test_L410_local_llm_js_has_isSupported_static(): + """LocalLLM must have a static isSupported() method.""" + js = _local_llm_js() + assert "static isSupported()" in js + + +# ── L5xx — Security ───────────────────────────────────────────────────────── + +def test_L501_no_innerhtml_with_user_input(client): + """Template must not use innerHTML with user-controlled data.""" + html = _local_html(client) + # Check for dangerous patterns: innerHTML += `${message}` etc. + blocks = re.findall(r"innerHTML\s*\+=?\s*`([^`]*)`", html, re.DOTALL) + for block in blocks: + assert "${message}" not in block, ( + "innerHTML template literal contains ${message} — XSS vulnerability" + ) + + +def test_L502_uses_textcontent_for_messages(client): + """Template must use textContent (not innerHTML) for user messages.""" + html = _local_html(client) + assert "textContent" in html + + +def test_L503_no_eval_or_function_constructor(): + """local_llm.js must not use eval() or new Function().""" + js = _local_llm_js() + # Allow "evaluate" and "functionality" but not standalone eval( + assert "eval(" not in js or "evaluate" in js + assert "new Function(" not in js