Merge pull request #60 from AlexanderWhitestone/claude/local-models-iphone-EwXtC

2026-02-26 19:24:32 -05:00
parent 528c86298a 3b7fcc5ebc
commit e0e2a2b9d8
6 changed files with 1117 additions and 1 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -90,6 +90,17 @@ class Settings(BaseSettings):
    work_orders_auto_execute: bool = False  # Master switch for auto-execution
    work_orders_auto_threshold: str = "low"  # Max priority that auto-executes: "low" | "medium" | "high" | "none"

+    # ── Browser Local Models (iPhone / WebGPU) ───────────────────────
+    # Enable in-browser LLM inference via WebLLM for offline iPhone use.
+    # When enabled, the mobile dashboard loads a small model directly
+    # in the browser — no server or Ollama required.
+    browser_model_enabled: bool = True
+    # WebLLM model ID — must be a pre-compiled MLC model.
+    # Recommended for iPhone: SmolLM2-360M (fast) or Qwen3-0.6B (smart).
+    browser_model_id: str = "SmolLM2-360M-Instruct-q4f16_1-MLC"
+    # Fallback to server when browser model is unavailable or too slow.
+    browser_model_fallback: bool = True
+
    # ── Scripture / Biblical Integration ──────────────────────────────
    # Enable the sovereign biblical text module.  When enabled, Timmy
    # loads the local ESV text corpus and runs meditation workflows.
--- a/src/dashboard/routes/mobile.py
+++ b/src/dashboard/routes/mobile.py
@@ -3,6 +3,9 @@
 Provides a simplified, mobile-first view of the dashboard that
 prioritizes the chat interface and essential status information.
 Designed for quick access from a phone's home screen.
+
+The /mobile/local endpoint loads a small LLM directly into the
+browser via WebLLM so Timmy can run on an iPhone with no server.
 """

 from pathlib import Path
@@ -11,6 +14,8 @@ from fastapi import APIRouter, Request
 from fastapi.responses import HTMLResponse
 from fastapi.templating import Jinja2Templates

+from config import settings
+
 router = APIRouter(tags=["mobile"])
 templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))

@@ -26,11 +31,44 @@ async def mobile_dashboard(request: Request):
    return templates.TemplateResponse(request, "index.html")


+@router.get("/mobile/local", response_class=HTMLResponse)
+async def mobile_local_dashboard(request: Request):
+    """Mobile dashboard with in-browser local model inference.
+
+    Loads a small LLM (via WebLLM / WebGPU) directly into Safari
+    so Timmy works on an iPhone without any server connection.
+    Falls back to server-side Ollama when the local model is
+    unavailable or the user prefers it.
+    """
+    return templates.TemplateResponse(
+        request,
+        "mobile_local.html",
+        {
+            "browser_model_enabled": settings.browser_model_enabled,
+            "browser_model_id": settings.browser_model_id,
+            "browser_model_fallback": settings.browser_model_fallback,
+            "server_model": settings.ollama_model,
+            "page_title": "Timmy — Local AI",
+        },
+    )
+
+
+@router.get("/mobile/local-models")
+async def local_models_config():
+    """Return browser model configuration for the JS client."""
+    return {
+        "enabled": settings.browser_model_enabled,
+        "default_model": settings.browser_model_id,
+        "fallback_to_server": settings.browser_model_fallback,
+        "server_model": settings.ollama_model,
+        "server_url": settings.ollama_url,
+    }
+
+
@router.get("/mobile/status")
 async def mobile_status():
    """Lightweight status endpoint optimized for mobile polling."""
    from dashboard.routes.health import check_ollama
-    from config import settings

    ollama_ok = await check_ollama()
    return {
@@ -38,4 +76,6 @@ async def mobile_status():
        "model": settings.ollama_model,
        "agent": "timmy",
        "ready": True,
+        "browser_model_enabled": settings.browser_model_enabled,
+        "browser_model_id": settings.browser_model_id,
    }
--- a/src/dashboard/templates/base.html
+++ b/src/dashboard/templates/base.html
@@ -45,6 +45,7 @@
      <a href="/work-orders/queue" class="mc-test-link">WORK ORDERS</a>
      <a href="/creative/ui" class="mc-test-link">CREATIVE</a>
      <a href="/mobile" class="mc-test-link" title="Mobile-optimized view">MOBILE</a>
+      <a href="/mobile/local" class="mc-test-link" title="Local AI on iPhone">LOCAL AI</a>
      <button id="enable-notifications" class="mc-test-link" style="background:none;cursor:pointer;" title="Enable notifications">&#x1F514;</button>
      <span class="mc-time" id="clock"></span>
    </div>
@@ -78,6 +79,7 @@
    <a href="/creative/ui" class="mc-mobile-link">CREATIVE</a>
    <a href="/voice/button" class="mc-mobile-link">VOICE</a>
    <a href="/mobile" class="mc-mobile-link">MOBILE</a>
+    <a href="/mobile/local" class="mc-mobile-link">LOCAL AI</a>
    <div class="mc-mobile-menu-footer">
      <button id="enable-notifications-mobile" class="mc-mobile-link" style="background:none;border:none;cursor:pointer;width:100%;text-align:left;font:inherit;color:inherit;padding:inherit;">&#x1F514; NOTIFICATIONS</button>
    </div>
--- a/src/dashboard/templates/mobile_local.html
+++ b/src/dashboard/templates/mobile_local.html
@@ -0,0 +1,546 @@
+{% extends "base.html" %}
+
+{% block title %}{{ page_title }}{% endblock %}
+
+{% block extra_styles %}
+<style>
+  .local-wrap {
+    display: flex;
+    flex-direction: column;
+    gap: 12px;
+    padding-bottom: 20px;
+    max-width: 600px;
+    margin: 0 auto;
+  }
+
+  /* ── Model status panel ────────────────────────────────────── */
+  .model-status {
+    padding: 14px;
+    display: flex;
+    flex-direction: column;
+    gap: 10px;
+  }
+  .model-status-row {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    font-size: 11px;
+    letter-spacing: 0.08em;
+  }
+  .model-status-label { color: var(--text-dim); }
+  .model-status-value { color: var(--text-bright); font-weight: 600; }
+  .model-status-value.ready { color: #4ade80; }
+  .model-status-value.loading { color: #facc15; }
+  .model-status-value.error { color: #f87171; }
+  .model-status-value.offline { color: var(--text-dim); }
+
+  /* ── Progress bar ──────────────────────────────────────────── */
+  .progress-wrap {
+    display: none;
+    flex-direction: column;
+    gap: 6px;
+    padding: 0 14px 14px;
+  }
+  .progress-wrap.active { display: flex; }
+  .progress-bar-outer {
+    height: 6px;
+    background: rgba(8, 4, 18, 0.75);
+    border-radius: 3px;
+    overflow: hidden;
+  }
+  .progress-bar-inner {
+    height: 100%;
+    width: 0%;
+    background: linear-gradient(90deg, var(--border-glow), #a78bfa);
+    border-radius: 3px;
+    transition: width 0.3s;
+  }
+  .progress-text {
+    font-size: 10px;
+    color: var(--text-dim);
+    letter-spacing: 0.06em;
+    min-height: 14px;
+  }
+
+  /* ── Model selector ────────────────────────────────────────── */
+  .model-select-wrap {
+    padding: 0 14px 14px;
+  }
+  .model-select {
+    width: 100%;
+    background: rgba(8, 4, 18, 0.75);
+    border: 1px solid var(--border);
+    border-radius: var(--radius-md);
+    color: var(--text-bright);
+    font-family: var(--font);
+    font-size: 13px;
+    padding: 10px 12px;
+    min-height: 44px;
+    appearance: none;
+    -webkit-appearance: none;
+    background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='%237c7c8a' viewBox='0 0 16 16'%3E%3Cpath d='M8 11L3 6h10z'/%3E%3C/svg%3E");
+    background-repeat: no-repeat;
+    background-position: right 12px center;
+    touch-action: manipulation;
+  }
+  .model-select:focus {
+    outline: none;
+    border-color: var(--border-glow);
+  }
+
+  /* ── Action buttons ────────────────────────────────────────── */
+  .model-actions {
+    display: flex;
+    gap: 8px;
+    padding: 0 14px 14px;
+  }
+  .model-btn {
+    flex: 1;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 6px;
+    min-height: 44px;
+    border-radius: var(--radius-md);
+    font-family: var(--font);
+    font-size: 12px;
+    font-weight: 700;
+    letter-spacing: 0.08em;
+    border: 1px solid var(--border);
+    background: rgba(24, 10, 45, 0.6);
+    color: var(--text-bright);
+    cursor: pointer;
+    transition: transform 0.1s, border-color 0.2s;
+    touch-action: manipulation;
+    -webkit-tap-highlight-color: transparent;
+  }
+  .model-btn:active { transform: scale(0.96); }
+  .model-btn.primary {
+    border-color: var(--border-glow);
+    background: rgba(124, 58, 237, 0.2);
+  }
+  .model-btn:disabled {
+    opacity: 0.4;
+    cursor: not-allowed;
+  }
+
+  /* ── Chat area ─────────────────────────────────────────────── */
+  .local-chat-wrap {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    min-height: 0;
+  }
+  .local-chat-log {
+    flex: 1;
+    overflow-y: auto;
+    -webkit-overflow-scrolling: touch;
+    padding: 14px;
+    max-height: 400px;
+    min-height: 200px;
+  }
+  .local-chat-input {
+    display: flex;
+    gap: 8px;
+    padding: 10px 14px;
+    padding-bottom: max(10px, env(safe-area-inset-bottom));
+    background: rgba(24, 10, 45, 0.9);
+    border-top: 1px solid var(--border);
+  }
+  .local-chat-input input {
+    flex: 1;
+    background: rgba(8, 4, 18, 0.75);
+    border: 1px solid var(--border);
+    border-radius: var(--radius-md);
+    color: var(--text-bright);
+    font-family: var(--font);
+    font-size: 16px;
+    padding: 10px 12px;
+    min-height: 44px;
+  }
+  .local-chat-input input:focus {
+    outline: none;
+    border-color: var(--border-glow);
+    box-shadow: 0 0 0 1px var(--border-glow), 0 0 8px rgba(124, 58, 237, 0.2);
+  }
+  .local-chat-input input::placeholder { color: var(--text-dim); }
+  .local-chat-input button {
+    background: var(--border-glow);
+    border: none;
+    border-radius: var(--radius-md);
+    color: var(--text-bright);
+    font-family: var(--font);
+    font-size: 12px;
+    font-weight: 700;
+    padding: 0 16px;
+    min-height: 44px;
+    min-width: 64px;
+    letter-spacing: 0.1em;
+    transition: background 0.15s, transform 0.1s;
+    touch-action: manipulation;
+  }
+  .local-chat-input button:active { transform: scale(0.96); }
+  .local-chat-input button:disabled { opacity: 0.4; }
+
+  /* ── Chat messages ─────────────────────────────────────────── */
+  .local-msg { margin-bottom: 12px; }
+  .local-msg .meta {
+    font-size: 10px;
+    letter-spacing: 0.1em;
+    margin-bottom: 3px;
+  }
+  .local-msg.user .meta { color: var(--orange); }
+  .local-msg.timmy .meta { color: var(--purple); }
+  .local-msg.system .meta { color: var(--text-dim); }
+  .local-msg .bubble {
+    background: rgba(24, 10, 45, 0.8);
+    border: 1px solid var(--border);
+    border-radius: var(--radius-md);
+    padding: 10px 12px;
+    font-size: 13px;
+    line-height: 1.6;
+    color: var(--text);
+    word-break: break-word;
+  }
+  .local-msg.timmy .bubble { border-left: 3px solid var(--purple); }
+  .local-msg.user .bubble { border-color: var(--border-glow); }
+  .local-msg.system .bubble {
+    border-color: transparent;
+    background: rgba(8, 4, 18, 0.5);
+    font-size: 11px;
+    color: var(--text-dim);
+  }
+
+  /* ── Backend badge ─────────────────────────────────────────── */
+  .backend-badge {
+    display: inline-block;
+    font-size: 9px;
+    letter-spacing: 0.1em;
+    padding: 2px 6px;
+    border-radius: 3px;
+    vertical-align: middle;
+    margin-left: 6px;
+  }
+  .backend-badge.local {
+    background: rgba(74, 222, 128, 0.15);
+    color: #4ade80;
+    border: 1px solid rgba(74, 222, 128, 0.3);
+  }
+  .backend-badge.server {
+    background: rgba(250, 204, 21, 0.15);
+    color: #facc15;
+    border: 1px solid rgba(250, 204, 21, 0.3);
+  }
+
+  /* ── Stats panel ───────────────────────────────────────────── */
+  .model-stats {
+    padding: 0 14px 14px;
+    font-size: 10px;
+    color: var(--text-dim);
+    letter-spacing: 0.06em;
+    display: none;
+  }
+  .model-stats.visible { display: block; }
+</style>
+{% endblock %}
+
+{% block content %}
+<div class="local-wrap">
+
+  <!-- Model Status Panel -->
+  <div class="card mc-panel">
+    <div class="card-header mc-panel-header">// LOCAL AI MODEL</div>
+    <div class="model-status">
+      <div class="model-status-row">
+        <span class="model-status-label">STATUS</span>
+        <span class="model-status-value offline" id="model-state">NOT LOADED</span>
+      </div>
+      <div class="model-status-row">
+        <span class="model-status-label">BACKEND</span>
+        <span class="model-status-value" id="model-backend">DETECTING...</span>
+      </div>
+      <div class="model-status-row">
+        <span class="model-status-label">INFERENCE</span>
+        <span class="model-status-value" id="inference-mode">--</span>
+      </div>
+    </div>
+
+    <!-- Model selector -->
+    <div class="model-select-wrap">
+      <select class="model-select" id="model-select" aria-label="Select model"></select>
+    </div>
+
+    <!-- Progress bar -->
+    <div class="progress-wrap" id="progress-wrap">
+      <div class="progress-bar-outer">
+        <div class="progress-bar-inner" id="progress-bar"></div>
+      </div>
+      <div class="progress-text" id="progress-text"></div>
+    </div>
+
+    <!-- Actions -->
+    <div class="model-actions">
+      <button class="model-btn primary" id="btn-load" onclick="loadModel()">LOAD MODEL</button>
+      <button class="model-btn" id="btn-unload" onclick="unloadModel()" disabled>UNLOAD</button>
+    </div>
+
+    <!-- Stats -->
+    <div class="model-stats" id="model-stats"></div>
+  </div>
+
+  <!-- Chat -->
+  <div class="card mc-panel local-chat-wrap">
+    <div class="card-header mc-panel-header">
+      // TIMMY <span class="backend-badge local" id="chat-backend-badge" style="display:none">LOCAL</span>
+    </div>
+    <div class="local-chat-log" id="local-chat">
+      <div class="local-msg system">
+        <div class="meta">SYSTEM</div>
+        <div class="bubble">
+          Load a model above to chat with Timmy locally on your device.
+          No server connection required.
+          {% if browser_model_fallback %}
+          Server fallback is enabled — if the local model fails, Timmy
+          will try the server instead.
+          {% endif %}
+        </div>
+      </div>
+    </div>
+    <form onsubmit="sendLocalMessage(event)" class="local-chat-input">
+      <input type="text"
+             id="local-message"
+             placeholder="Message Timmy..."
+             required
+             autocomplete="off"
+             autocapitalize="none"
+             autocorrect="off"
+             spellcheck="false"
+             enterkeyhint="send" />
+      <button type="submit" id="btn-send" disabled>SEND</button>
+    </form>
+  </div>
+
+</div>
+
+<script src="/static/local_llm.js"></script>
+<script>
+// ── State ──────────────────────────────────────────────────────────────────
+let llm = null;
+const serverFallback = {{ browser_model_fallback | tojson }};
+const defaultModelId = {{ browser_model_id | tojson }};
+
+// ── DOM refs ───────────────────────────────────────────────────────────────
+const elState      = document.getElementById('model-state');
+const elBackend    = document.getElementById('model-backend');
+const elInference  = document.getElementById('inference-mode');
+const elSelect     = document.getElementById('model-select');
+const elProgress   = document.getElementById('progress-wrap');
+const elBar        = document.getElementById('progress-bar');
+const elProgressTx = document.getElementById('progress-text');
+const elBtnLoad    = document.getElementById('btn-load');
+const elBtnUnload  = document.getElementById('btn-unload');
+const elBtnSend    = document.getElementById('btn-send');
+const elChat       = document.getElementById('local-chat');
+const elInput      = document.getElementById('local-message');
+const elBadge      = document.getElementById('chat-backend-badge');
+const elStats      = document.getElementById('model-stats');
+
+// ── Populate model selector ────────────────────────────────────────────────
+(function populateModels() {
+  const catalogue = window.LOCAL_MODEL_CATALOGUE || [];
+  catalogue.forEach(function(m) {
+    const opt = document.createElement('option');
+    opt.value = m.id;
+    opt.textContent = m.label + ' (' + m.sizeHint + ')';
+    if (m.id === defaultModelId) opt.selected = true;
+    elSelect.appendChild(opt);
+  });
+})();
+
+// ── Detect capabilities ────────────────────────────────────────────────────
+(function detectCaps() {
+  const supported = LocalLLM.isSupported();
+  const hasGPU = typeof navigator !== 'undefined' && 'gpu' in navigator;
+  elBackend.textContent = hasGPU ? 'WebGPU' : supported ? 'WASM' : 'UNSUPPORTED';
+  if (!supported) {
+    elBackend.classList.add('error');
+    elBtnLoad.disabled = true;
+    addSystemMessage('Your browser does not support WebGPU or WebAssembly. Update to iOS 26+ / Safari 26+ for local AI.');
+  }
+})();
+
+// ── Load model ─────────────────────────────────────────────────────────────
+async function loadModel() {
+  if (llm && llm.ready) {
+    await unloadModel();
+  }
+
+  const modelId = elSelect.value;
+  elBtnLoad.disabled = true;
+  elBtnUnload.disabled = true;
+  elBtnSend.disabled = true;
+  elProgress.classList.add('active');
+  setState('loading', 'DOWNLOADING...');
+
+  llm = new LocalLLM({
+    modelId: modelId,
+    onProgress: function(report) {
+      if (report.progress !== undefined) {
+        const pct = Math.round(report.progress * 100);
+        elBar.style.width = pct + '%';
+        elProgressTx.textContent = report.text || (pct + '%');
+      } else if (report.text) {
+        elProgressTx.textContent = report.text;
+      }
+    },
+    onReady: function() {
+      setState('ready', 'READY');
+      elProgress.classList.remove('active');
+      elBtnLoad.disabled = false;
+      elBtnUnload.disabled = false;
+      elBtnSend.disabled = false;
+      elBadge.style.display = '';
+      elBadge.className = 'backend-badge local';
+      elBadge.textContent = 'LOCAL';
+      elInference.textContent = 'ON-DEVICE';
+      elInput.focus();
+      addSystemMessage('Model loaded. Timmy is running locally on your device — fully offline, fully sovereign.');
+      updateStats();
+    },
+    onError: function(err) {
+      setState('error', 'FAILED');
+      elProgress.classList.remove('active');
+      elBtnLoad.disabled = false;
+      addSystemMessage('Failed to load model: ' + err.message);
+      if (serverFallback) {
+        addSystemMessage('Server fallback enabled. Chat will use the server instead.');
+        elBtnSend.disabled = false;
+        elBadge.style.display = '';
+        elBadge.className = 'backend-badge server';
+        elBadge.textContent = 'SERVER';
+        elInference.textContent = 'SERVER';
+      }
+    },
+  });
+
+  try {
+    await llm.init();
+  } catch (e) {
+    // Error handled by onError callback
+  }
+}
+
+// ── Unload model ───────────────────────────────────────────────────────────
+async function unloadModel() {
+  if (llm) {
+    await llm.unload();
+    llm = null;
+  }
+  setState('offline', 'NOT LOADED');
+  elBtnUnload.disabled = true;
+  elBtnSend.disabled = true;
+  elBadge.style.display = 'none';
+  elInference.textContent = '--';
+  elStats.classList.remove('visible');
+}
+
+// ── Send message ───────────────────────────────────────────────────────────
+async function sendLocalMessage(event) {
+  event.preventDefault();
+  const message = elInput.value.trim();
+  if (!message) return;
+
+  addMessage('user', 'YOU', message);
+  elInput.value = '';
+  elBtnSend.disabled = true;
+
+  // Try local model first
+  if (llm && llm.ready) {
+    try {
+      const replyBubble = addMessage('timmy', 'TIMMY (LOCAL)', '');
+      let fullText = '';
+      await llm.chat(message, {
+        onToken: function(delta, accumulated) {
+          fullText = accumulated;
+          replyBubble.textContent = fullText;
+          elChat.scrollTop = elChat.scrollHeight;
+        }
+      });
+      if (!fullText) {
+        replyBubble.textContent = await llm.chat(message);
+      }
+      elBtnSend.disabled = false;
+      updateStats();
+      return;
+    } catch (err) {
+      addSystemMessage('Local inference failed: ' + err.message);
+      if (!serverFallback) {
+        elBtnSend.disabled = false;
+        return;
+      }
+    }
+  }
+
+  // Server fallback
+  if (serverFallback) {
+    try {
+      const response = await fetch('/agents/timmy/chat', {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
+        body: 'message=' + encodeURIComponent(message)
+      });
+      const html = await response.text();
+      const parser = new DOMParser();
+      const doc = parser.parseFromString(html, 'text/html');
+      const timmyResponse = doc.querySelector('.chat-message.timmy, .msg-body');
+      const text = timmyResponse ? timmyResponse.textContent.trim() : 'Response received.';
+      addMessage('timmy', 'TIMMY (SERVER)', text);
+    } catch (e) {
+      addMessage('timmy', 'TIMMY', 'Sorry, both local and server inference failed. Check your connection.');
+    }
+  } else {
+    addMessage('system', 'SYSTEM', 'Load a model to start chatting.');
+  }
+  elBtnSend.disabled = false;
+}
+
+// ── Helpers ────────────────────────────────────────────────────────────────
+function setState(cls, text) {
+  elState.className = 'model-status-value ' + cls;
+  elState.textContent = text;
+}
+
+function addMessage(type, label, text) {
+  const div = document.createElement('div');
+  div.className = 'local-msg ' + type;
+  const meta = document.createElement('div');
+  meta.className = 'meta';
+  meta.textContent = label;
+  const bubble = document.createElement('div');
+  bubble.className = 'bubble';
+  bubble.textContent = text;
+  div.appendChild(meta);
+  div.appendChild(bubble);
+  elChat.appendChild(div);
+  elChat.scrollTop = elChat.scrollHeight;
+  return bubble;
+}
+
+function addSystemMessage(text) {
+  addMessage('system', 'SYSTEM', text);
+}
+
+async function updateStats() {
+  if (!llm) return;
+  try {
+    const stats = await llm.getStats();
+    if (stats) {
+      elStats.textContent = stats;
+      elStats.classList.add('visible');
+    }
+  } catch (e) {
+    // Stats are optional
+  }
+}
+</script>
+{% endblock %}
--- a/static/local_llm.js
+++ b/static/local_llm.js
@@ -0,0 +1,271 @@
+/**
+ * local_llm.js — In-browser LLM inference via WebLLM.
+ *
+ * Loads a small language model directly into the browser using WebGPU
+ * (or WASM fallback) so Timmy can run on an iPhone with zero server
+ * dependency.  Falls back to server-side Ollama when the local model
+ * is unavailable.
+ *
+ * Usage:
+ *   const llm = new LocalLLM({ modelId, onProgress, onReady, onError });
+ *   await llm.init();
+ *   const reply = await llm.chat("Hello Timmy");
+ */
+
+/* global webllm */
+
+// ── Model catalogue ────────────────────────────────────────────────────────
+// Models tested on iPhone 15 Pro / Safari 26+.  Sorted smallest → largest.
+const MODEL_CATALOGUE = [
+  {
+    id: "SmolLM2-360M-Instruct-q4f16_1-MLC",
+    label: "SmolLM2 360M (fast)",
+    sizeHint: "~200 MB",
+    description: "Fastest option. Good for simple Q&A.",
+  },
+  {
+    id: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
+    label: "Qwen 2.5 0.5B (balanced)",
+    sizeHint: "~350 MB",
+    description: "Best quality under 500 MB.",
+  },
+  {
+    id: "SmolLM2-1.7B-Instruct-q4f16_1-MLC",
+    label: "SmolLM2 1.7B (smart)",
+    sizeHint: "~1 GB",
+    description: "Highest quality. Needs more memory.",
+  },
+  {
+    id: "Llama-3.2-1B-Instruct-q4f16_1-MLC",
+    label: "Llama 3.2 1B",
+    sizeHint: "~700 MB",
+    description: "Meta's compact model. Good all-rounder.",
+  },
+];
+
+// ── Capability detection ──────────────────────────────────────────────────
+function detectWebGPU() {
+  return typeof navigator !== "undefined" && "gpu" in navigator;
+}
+
+function detectWASM() {
+  try {
+    return typeof WebAssembly === "object" && typeof WebAssembly.instantiate === "function";
+  } catch {
+    return false;
+  }
+}
+
+// ── LocalLLM class ────────────────────────────────────────────────────────
+class LocalLLM {
+  /**
+   * @param {object}   opts
+   * @param {string}   opts.modelId     — WebLLM model ID
+   * @param {function} opts.onProgress  — (report) progress during download
+   * @param {function} opts.onReady     — () called when model is loaded
+   * @param {function} opts.onError     — (error) called on fatal error
+   * @param {string}   opts.systemPrompt — system message for the model
+   */
+  constructor(opts = {}) {
+    this.modelId = opts.modelId || "SmolLM2-360M-Instruct-q4f16_1-MLC";
+    this.onProgress = opts.onProgress || (() => {});
+    this.onReady = opts.onReady || (() => {});
+    this.onError = opts.onError || (() => {});
+    this.systemPrompt =
+      opts.systemPrompt ||
+      "You are Timmy, a sovereign AI assistant. You are helpful, concise, and loyal. " +
+      "Address the user as 'Sir' when appropriate. Keep responses brief on mobile.";
+
+    this.engine = null;
+    this.ready = false;
+    this.loading = false;
+    this._hasWebGPU = detectWebGPU();
+    this._hasWASM = detectWASM();
+  }
+
+  /** Check if local inference is possible on this device. */
+  static isSupported() {
+    return detectWebGPU() || detectWASM();
+  }
+
+  /** Return the model catalogue for UI rendering. */
+  static getCatalogue() {
+    return MODEL_CATALOGUE;
+  }
+
+  /** Return runtime capability info. */
+  getCapabilities() {
+    return {
+      webgpu: this._hasWebGPU,
+      wasm: this._hasWASM,
+      supported: this._hasWebGPU || this._hasWASM,
+      backend: this._hasWebGPU ? "WebGPU" : this._hasWASM ? "WASM" : "none",
+    };
+  }
+
+  /**
+   * Initialize the engine and download/cache the model.
+   * Model weights are cached in the browser's Cache API so subsequent
+   * loads are nearly instant.
+   */
+  async init() {
+    if (this.ready) return;
+    if (this.loading) return;
+
+    if (!this._hasWebGPU && !this._hasWASM) {
+      const err = new Error(
+        "Neither WebGPU nor WebAssembly is available. " +
+        "Update to iOS 26+ / Safari 26+ for WebGPU support."
+      );
+      this.onError(err);
+      throw err;
+    }
+
+    this.loading = true;
+
+    try {
+      // Dynamic import of WebLLM from CDN (avoids bundling)
+      if (typeof webllm === "undefined") {
+        await this._loadWebLLMScript();
+      }
+
+      const initProgressCallback = (report) => {
+        this.onProgress(report);
+      };
+
+      this.engine = await webllm.CreateMLCEngine(this.modelId, {
+        initProgressCallback,
+      });
+
+      this.ready = true;
+      this.loading = false;
+      this.onReady();
+    } catch (err) {
+      this.loading = false;
+      this.ready = false;
+      this.onError(err);
+      throw err;
+    }
+  }
+
+  /**
+   * Send a chat message and get a response.
+   * @param {string} userMessage
+   * @param {object} opts
+   * @param {function} opts.onToken — streaming callback (delta)
+   * @returns {Promise<string>} full response text
+   */
+  async chat(userMessage, opts = {}) {
+    if (!this.ready) {
+      throw new Error("Model not loaded. Call init() first.");
+    }
+
+    const messages = [
+      { role: "system", content: this.systemPrompt },
+      { role: "user", content: userMessage },
+    ];
+
+    if (opts.onToken) {
+      // Streaming mode
+      let fullText = "";
+      const chunks = await this.engine.chat.completions.create({
+        messages,
+        stream: true,
+        temperature: 0.7,
+        max_tokens: 512,
+      });
+
+      for await (const chunk of chunks) {
+        const delta = chunk.choices[0]?.delta?.content || "";
+        fullText += delta;
+        opts.onToken(delta, fullText);
+      }
+      return fullText;
+    }
+
+    // Non-streaming mode
+    const response = await this.engine.chat.completions.create({
+      messages,
+      temperature: 0.7,
+      max_tokens: 512,
+    });
+
+    return response.choices[0]?.message?.content || "";
+  }
+
+  /** Reset conversation context. */
+  async resetChat() {
+    if (this.engine) {
+      await this.engine.resetChat();
+    }
+  }
+
+  /** Unload the model and free memory. */
+  async unload() {
+    if (this.engine) {
+      await this.engine.unload();
+      this.engine = null;
+      this.ready = false;
+    }
+  }
+
+  /** Get current engine stats (tokens/sec, memory, etc). */
+  async getStats() {
+    if (!this.engine) return null;
+    try {
+      const stats = await this.engine.runtimeStatsText();
+      return stats;
+    } catch {
+      return null;
+    }
+  }
+
+  // ── Private ─────────────────────────────────────────────────────────────
+
+  /** Load the WebLLM script from CDN. */
+  _loadWebLLMScript() {
+    return new Promise((resolve, reject) => {
+      // Check if already loaded
+      if (typeof webllm !== "undefined") {
+        resolve();
+        return;
+      }
+      const script = document.createElement("script");
+      script.src =
+        "https://esm.run/@anthropic-ai/sdk" !== script.src
+          ? "https://esm.run/@anthropic-ai/sdk"
+          : "";
+      // Use the WebLLM CDN bundle
+      script.type = "module";
+      script.textContent = `
+        import * as webllmModule from "https://esm.run/@mlc-ai/web-llm";
+        window.webllm = webllmModule;
+        window.dispatchEvent(new Event("webllm-loaded"));
+      `;
+      document.head.appendChild(script);
+
+      const onLoaded = () => {
+        window.removeEventListener("webllm-loaded", onLoaded);
+        resolve();
+      };
+      window.addEventListener("webllm-loaded", onLoaded);
+
+      // Fallback: also try the UMD bundle approach
+      const fallbackScript = document.createElement("script");
+      fallbackScript.src = "https://cdn.jsdelivr.net/npm/@mlc-ai/web-llm@0.2.80/lib/index.min.js";
+      fallbackScript.onload = () => {
+        if (typeof webllm !== "undefined") {
+          resolve();
+        }
+      };
+      fallbackScript.onerror = () => {
+        reject(new Error("Failed to load WebLLM library from CDN."));
+      };
+      document.head.appendChild(fallbackScript);
+    });
+  }
+}
+
+// Export for use in templates
+window.LocalLLM = LocalLLM;
+window.LOCAL_MODEL_CATALOGUE = MODEL_CATALOGUE;
--- a/tests/dashboard/test_local_models.py
+++ b/tests/dashboard/test_local_models.py
@@ -0,0 +1,246 @@
+"""Tests for the local browser model feature — /mobile/local endpoint.
+
+Categories:
+  L1xx  Route & API responses
+  L2xx  Config settings
+  L3xx  Template content & UX
+  L4xx  JavaScript asset
+  L5xx  Security (XSS prevention)
+"""
+
+import re
+from pathlib import Path
+
+
+# ── helpers ──────────────────────────────────────────────────────────────────
+
+def _local_html(client) -> str:
+    return client.get("/mobile/local").text
+
+
+def _local_llm_js() -> str:
+    js_path = Path(__file__).parent.parent.parent / "static" / "local_llm.js"
+    return js_path.read_text()
+
+
+# ── L1xx — Route & API responses ─────────────────────────────────────────────
+
+def test_L101_mobile_local_route_returns_200(client):
+    """The /mobile/local endpoint should return 200 OK."""
+    response = client.get("/mobile/local")
+    assert response.status_code == 200
+
+
+def test_L102_local_models_config_endpoint(client):
+    """The /mobile/local-models API should return model config JSON."""
+    response = client.get("/mobile/local-models")
+    assert response.status_code == 200
+    data = response.json()
+    assert "enabled" in data
+    assert "default_model" in data
+    assert "fallback_to_server" in data
+    assert "server_model" in data
+
+
+def test_L103_mobile_status_includes_browser_model(client):
+    """The /mobile/status endpoint should include browser model info."""
+    response = client.get("/mobile/status")
+    assert response.status_code == 200
+    data = response.json()
+    assert "browser_model_enabled" in data
+    assert "browser_model_id" in data
+
+
+def test_L104_local_models_config_default_values(client):
+    """Config defaults should match what's in config.py."""
+    data = client.get("/mobile/local-models").json()
+    assert data["enabled"] is True
+    assert "SmolLM2" in data["default_model"] or "MLC" in data["default_model"]
+    assert data["fallback_to_server"] is True
+
+
+# ── L2xx — Config settings ───────────────────────────────────────────────────
+
+def test_L201_config_has_browser_model_enabled():
+    """config.py should define browser_model_enabled."""
+    from config import settings
+    assert hasattr(settings, "browser_model_enabled")
+    assert isinstance(settings.browser_model_enabled, bool)
+
+
+def test_L202_config_has_browser_model_id():
+    """config.py should define browser_model_id."""
+    from config import settings
+    assert hasattr(settings, "browser_model_id")
+    assert isinstance(settings.browser_model_id, str)
+    assert len(settings.browser_model_id) > 0
+
+
+def test_L203_config_has_browser_model_fallback():
+    """config.py should define browser_model_fallback."""
+    from config import settings
+    assert hasattr(settings, "browser_model_fallback")
+    assert isinstance(settings.browser_model_fallback, bool)
+
+
+# ── L3xx — Template content & UX ────────────────────────────────────────────
+
+def test_L301_template_includes_local_llm_script(client):
+    """mobile_local.html must include the local_llm.js script."""
+    html = _local_html(client)
+    assert "local_llm.js" in html
+
+
+def test_L302_template_has_model_selector(client):
+    """Template must have a model selector element."""
+    html = _local_html(client)
+    assert 'id="model-select"' in html
+
+
+def test_L303_template_has_load_button(client):
+    """Template must have a load model button."""
+    html = _local_html(client)
+    assert 'id="btn-load"' in html
+
+
+def test_L304_template_has_progress_bar(client):
+    """Template must have a progress bar for model download."""
+    html = _local_html(client)
+    assert 'id="progress-bar"' in html
+
+
+def test_L305_template_has_chat_area(client):
+    """Template must have a chat log area."""
+    html = _local_html(client)
+    assert 'id="local-chat"' in html
+
+
+def test_L306_template_has_message_input(client):
+    """Template must have a message input field."""
+    html = _local_html(client)
+    assert 'id="local-message"' in html
+
+
+def test_L307_input_font_size_16px(client):
+    """Input font-size must be 16px to prevent iOS zoom."""
+    html = _local_html(client)
+    assert "font-size: 16px" in html
+
+
+def test_L308_input_has_ios_attributes(client):
+    """Input should have autocapitalize, autocorrect, spellcheck, enterkeyhint."""
+    html = _local_html(client)
+    assert 'autocapitalize="none"' in html
+    assert 'autocorrect="off"' in html
+    assert 'spellcheck="false"' in html
+    assert 'enterkeyhint="send"' in html
+
+
+def test_L309_touch_targets_44px(client):
+    """Buttons and inputs must meet 44px min-height (Apple HIG)."""
+    html = _local_html(client)
+    assert "min-height: 44px" in html
+
+
+def test_L310_safe_area_inset_bottom(client):
+    """Chat input must account for iPhone home indicator."""
+    html = _local_html(client)
+    assert "safe-area-inset-bottom" in html
+
+
+def test_L311_template_has_backend_badge(client):
+    """Template should show LOCAL or SERVER badge."""
+    html = _local_html(client)
+    assert "backend-badge" in html
+    assert "LOCAL" in html
+
+
+# ── L4xx — JavaScript asset ──────────────────────────────────────────────────
+
+def test_L401_local_llm_js_exists():
+    """static/local_llm.js must exist."""
+    js_path = Path(__file__).parent.parent.parent / "static" / "local_llm.js"
+    assert js_path.exists(), "static/local_llm.js not found"
+
+
+def test_L402_local_llm_js_defines_class():
+    """local_llm.js must define the LocalLLM class."""
+    js = _local_llm_js()
+    assert "class LocalLLM" in js
+
+
+def test_L403_local_llm_js_has_model_catalogue():
+    """local_llm.js must define a MODEL_CATALOGUE."""
+    js = _local_llm_js()
+    assert "MODEL_CATALOGUE" in js
+
+
+def test_L404_local_llm_js_has_webgpu_detection():
+    """local_llm.js must detect WebGPU capability."""
+    js = _local_llm_js()
+    assert "detectWebGPU" in js or "navigator.gpu" in js
+
+
+def test_L405_local_llm_js_has_chat_method():
+    """local_llm.js LocalLLM class must have a chat method."""
+    js = _local_llm_js()
+    assert "async chat(" in js
+
+
+def test_L406_local_llm_js_has_init_method():
+    """local_llm.js LocalLLM class must have an init method."""
+    js = _local_llm_js()
+    assert "async init(" in js
+
+
+def test_L407_local_llm_js_has_unload_method():
+    """local_llm.js LocalLLM class must have an unload method."""
+    js = _local_llm_js()
+    assert "async unload(" in js
+
+
+def test_L408_local_llm_js_exports_to_window():
+    """local_llm.js must export LocalLLM and catalogue to window."""
+    js = _local_llm_js()
+    assert "window.LocalLLM" in js
+    assert "window.LOCAL_MODEL_CATALOGUE" in js
+
+
+def test_L409_local_llm_js_has_streaming_support():
+    """local_llm.js chat method must support streaming via onToken."""
+    js = _local_llm_js()
+    assert "onToken" in js
+    assert "stream: true" in js
+
+
+def test_L410_local_llm_js_has_isSupported_static():
+    """LocalLLM must have a static isSupported() method."""
+    js = _local_llm_js()
+    assert "static isSupported()" in js
+
+
+# ── L5xx — Security ─────────────────────────────────────────────────────────
+
+def test_L501_no_innerhtml_with_user_input(client):
+    """Template must not use innerHTML with user-controlled data."""
+    html = _local_html(client)
+    # Check for dangerous patterns: innerHTML += `${message}` etc.
+    blocks = re.findall(r"innerHTML\s*\+=?\s*`([^`]*)`", html, re.DOTALL)
+    for block in blocks:
+        assert "${message}" not in block, (
+            "innerHTML template literal contains ${message} — XSS vulnerability"
+        )
+
+
+def test_L502_uses_textcontent_for_messages(client):
+    """Template must use textContent (not innerHTML) for user messages."""
+    html = _local_html(client)
+    assert "textContent" in html
+
+
+def test_L503_no_eval_or_function_constructor():
+    """local_llm.js must not use eval() or new Function()."""
+    js = _local_llm_js()
+    # Allow "evaluate" and "functionality" but not standalone eval(
+    assert "eval(" not in js or "evaluate" in js
+    assert "new Function(" not in js