Merge pull request #60 from AlexanderWhitestone/claude/local-models-iphone-EwXtC

This commit is contained in:
Alexander Whitestone
2026-02-26 19:24:32 -05:00
committed by GitHub
6 changed files with 1117 additions and 1 deletions

View File

@@ -90,6 +90,17 @@ class Settings(BaseSettings):
work_orders_auto_execute: bool = False # Master switch for auto-execution
work_orders_auto_threshold: str = "low" # Max priority that auto-executes: "low" | "medium" | "high" | "none"
# ── Browser Local Models (iPhone / WebGPU) ───────────────────────
# Enable in-browser LLM inference via WebLLM for offline iPhone use.
# When enabled, the mobile dashboard loads a small model directly
# in the browser — no server or Ollama required.
browser_model_enabled: bool = True
# WebLLM model ID — must be a pre-compiled MLC model.
# Recommended for iPhone: SmolLM2-360M (fast) or Qwen3-0.6B (smart).
browser_model_id: str = "SmolLM2-360M-Instruct-q4f16_1-MLC"
# Fallback to server when browser model is unavailable or too slow.
browser_model_fallback: bool = True
# ── Scripture / Biblical Integration ──────────────────────────────
# Enable the sovereign biblical text module. When enabled, Timmy
# loads the local ESV text corpus and runs meditation workflows.

View File

@@ -3,6 +3,9 @@
Provides a simplified, mobile-first view of the dashboard that
prioritizes the chat interface and essential status information.
Designed for quick access from a phone's home screen.
The /mobile/local endpoint loads a small LLM directly into the
browser via WebLLM so Timmy can run on an iPhone with no server.
"""
from pathlib import Path
@@ -11,6 +14,8 @@ from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from fastapi.templating import Jinja2Templates
from config import settings
router = APIRouter(tags=["mobile"])
templates = Jinja2Templates(directory=str(Path(__file__).parent.parent / "templates"))
@@ -26,11 +31,44 @@ async def mobile_dashboard(request: Request):
return templates.TemplateResponse(request, "index.html")
@router.get("/mobile/local", response_class=HTMLResponse)
async def mobile_local_dashboard(request: Request):
"""Mobile dashboard with in-browser local model inference.
Loads a small LLM (via WebLLM / WebGPU) directly into Safari
so Timmy works on an iPhone without any server connection.
Falls back to server-side Ollama when the local model is
unavailable or the user prefers it.
"""
return templates.TemplateResponse(
request,
"mobile_local.html",
{
"browser_model_enabled": settings.browser_model_enabled,
"browser_model_id": settings.browser_model_id,
"browser_model_fallback": settings.browser_model_fallback,
"server_model": settings.ollama_model,
"page_title": "Timmy — Local AI",
},
)
@router.get("/mobile/local-models")
async def local_models_config():
"""Return browser model configuration for the JS client."""
return {
"enabled": settings.browser_model_enabled,
"default_model": settings.browser_model_id,
"fallback_to_server": settings.browser_model_fallback,
"server_model": settings.ollama_model,
"server_url": settings.ollama_url,
}
@router.get("/mobile/status")
async def mobile_status():
"""Lightweight status endpoint optimized for mobile polling."""
from dashboard.routes.health import check_ollama
from config import settings
ollama_ok = await check_ollama()
return {
@@ -38,4 +76,6 @@ async def mobile_status():
"model": settings.ollama_model,
"agent": "timmy",
"ready": True,
"browser_model_enabled": settings.browser_model_enabled,
"browser_model_id": settings.browser_model_id,
}

View File

@@ -45,6 +45,7 @@
<a href="/work-orders/queue" class="mc-test-link">WORK ORDERS</a>
<a href="/creative/ui" class="mc-test-link">CREATIVE</a>
<a href="/mobile" class="mc-test-link" title="Mobile-optimized view">MOBILE</a>
<a href="/mobile/local" class="mc-test-link" title="Local AI on iPhone">LOCAL AI</a>
<button id="enable-notifications" class="mc-test-link" style="background:none;cursor:pointer;" title="Enable notifications">&#x1F514;</button>
<span class="mc-time" id="clock"></span>
</div>
@@ -78,6 +79,7 @@
<a href="/creative/ui" class="mc-mobile-link">CREATIVE</a>
<a href="/voice/button" class="mc-mobile-link">VOICE</a>
<a href="/mobile" class="mc-mobile-link">MOBILE</a>
<a href="/mobile/local" class="mc-mobile-link">LOCAL AI</a>
<div class="mc-mobile-menu-footer">
<button id="enable-notifications-mobile" class="mc-mobile-link" style="background:none;border:none;cursor:pointer;width:100%;text-align:left;font:inherit;color:inherit;padding:inherit;">&#x1F514; NOTIFICATIONS</button>
</div>

View File

@@ -0,0 +1,546 @@
{% extends "base.html" %}
{% block title %}{{ page_title }}{% endblock %}
{% block extra_styles %}
<style>
.local-wrap {
display: flex;
flex-direction: column;
gap: 12px;
padding-bottom: 20px;
max-width: 600px;
margin: 0 auto;
}
/* ── Model status panel ────────────────────────────────────── */
.model-status {
padding: 14px;
display: flex;
flex-direction: column;
gap: 10px;
}
.model-status-row {
display: flex;
justify-content: space-between;
align-items: center;
font-size: 11px;
letter-spacing: 0.08em;
}
.model-status-label { color: var(--text-dim); }
.model-status-value { color: var(--text-bright); font-weight: 600; }
.model-status-value.ready { color: #4ade80; }
.model-status-value.loading { color: #facc15; }
.model-status-value.error { color: #f87171; }
.model-status-value.offline { color: var(--text-dim); }
/* ── Progress bar ──────────────────────────────────────────── */
.progress-wrap {
display: none;
flex-direction: column;
gap: 6px;
padding: 0 14px 14px;
}
.progress-wrap.active { display: flex; }
.progress-bar-outer {
height: 6px;
background: rgba(8, 4, 18, 0.75);
border-radius: 3px;
overflow: hidden;
}
.progress-bar-inner {
height: 100%;
width: 0%;
background: linear-gradient(90deg, var(--border-glow), #a78bfa);
border-radius: 3px;
transition: width 0.3s;
}
.progress-text {
font-size: 10px;
color: var(--text-dim);
letter-spacing: 0.06em;
min-height: 14px;
}
/* ── Model selector ────────────────────────────────────────── */
.model-select-wrap {
padding: 0 14px 14px;
}
.model-select {
width: 100%;
background: rgba(8, 4, 18, 0.75);
border: 1px solid var(--border);
border-radius: var(--radius-md);
color: var(--text-bright);
font-family: var(--font);
font-size: 13px;
padding: 10px 12px;
min-height: 44px;
appearance: none;
-webkit-appearance: none;
background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='%237c7c8a' viewBox='0 0 16 16'%3E%3Cpath d='M8 11L3 6h10z'/%3E%3C/svg%3E");
background-repeat: no-repeat;
background-position: right 12px center;
touch-action: manipulation;
}
.model-select:focus {
outline: none;
border-color: var(--border-glow);
}
/* ── Action buttons ────────────────────────────────────────── */
.model-actions {
display: flex;
gap: 8px;
padding: 0 14px 14px;
}
.model-btn {
flex: 1;
display: flex;
align-items: center;
justify-content: center;
gap: 6px;
min-height: 44px;
border-radius: var(--radius-md);
font-family: var(--font);
font-size: 12px;
font-weight: 700;
letter-spacing: 0.08em;
border: 1px solid var(--border);
background: rgba(24, 10, 45, 0.6);
color: var(--text-bright);
cursor: pointer;
transition: transform 0.1s, border-color 0.2s;
touch-action: manipulation;
-webkit-tap-highlight-color: transparent;
}
.model-btn:active { transform: scale(0.96); }
.model-btn.primary {
border-color: var(--border-glow);
background: rgba(124, 58, 237, 0.2);
}
.model-btn:disabled {
opacity: 0.4;
cursor: not-allowed;
}
/* ── Chat area ─────────────────────────────────────────────── */
.local-chat-wrap {
flex: 1;
display: flex;
flex-direction: column;
min-height: 0;
}
.local-chat-log {
flex: 1;
overflow-y: auto;
-webkit-overflow-scrolling: touch;
padding: 14px;
max-height: 400px;
min-height: 200px;
}
.local-chat-input {
display: flex;
gap: 8px;
padding: 10px 14px;
padding-bottom: max(10px, env(safe-area-inset-bottom));
background: rgba(24, 10, 45, 0.9);
border-top: 1px solid var(--border);
}
.local-chat-input input {
flex: 1;
background: rgba(8, 4, 18, 0.75);
border: 1px solid var(--border);
border-radius: var(--radius-md);
color: var(--text-bright);
font-family: var(--font);
font-size: 16px;
padding: 10px 12px;
min-height: 44px;
}
.local-chat-input input:focus {
outline: none;
border-color: var(--border-glow);
box-shadow: 0 0 0 1px var(--border-glow), 0 0 8px rgba(124, 58, 237, 0.2);
}
.local-chat-input input::placeholder { color: var(--text-dim); }
.local-chat-input button {
background: var(--border-glow);
border: none;
border-radius: var(--radius-md);
color: var(--text-bright);
font-family: var(--font);
font-size: 12px;
font-weight: 700;
padding: 0 16px;
min-height: 44px;
min-width: 64px;
letter-spacing: 0.1em;
transition: background 0.15s, transform 0.1s;
touch-action: manipulation;
}
.local-chat-input button:active { transform: scale(0.96); }
.local-chat-input button:disabled { opacity: 0.4; }
/* ── Chat messages ─────────────────────────────────────────── */
.local-msg { margin-bottom: 12px; }
.local-msg .meta {
font-size: 10px;
letter-spacing: 0.1em;
margin-bottom: 3px;
}
.local-msg.user .meta { color: var(--orange); }
.local-msg.timmy .meta { color: var(--purple); }
.local-msg.system .meta { color: var(--text-dim); }
.local-msg .bubble {
background: rgba(24, 10, 45, 0.8);
border: 1px solid var(--border);
border-radius: var(--radius-md);
padding: 10px 12px;
font-size: 13px;
line-height: 1.6;
color: var(--text);
word-break: break-word;
}
.local-msg.timmy .bubble { border-left: 3px solid var(--purple); }
.local-msg.user .bubble { border-color: var(--border-glow); }
.local-msg.system .bubble {
border-color: transparent;
background: rgba(8, 4, 18, 0.5);
font-size: 11px;
color: var(--text-dim);
}
/* ── Backend badge ─────────────────────────────────────────── */
.backend-badge {
display: inline-block;
font-size: 9px;
letter-spacing: 0.1em;
padding: 2px 6px;
border-radius: 3px;
vertical-align: middle;
margin-left: 6px;
}
.backend-badge.local {
background: rgba(74, 222, 128, 0.15);
color: #4ade80;
border: 1px solid rgba(74, 222, 128, 0.3);
}
.backend-badge.server {
background: rgba(250, 204, 21, 0.15);
color: #facc15;
border: 1px solid rgba(250, 204, 21, 0.3);
}
/* ── Stats panel ───────────────────────────────────────────── */
.model-stats {
padding: 0 14px 14px;
font-size: 10px;
color: var(--text-dim);
letter-spacing: 0.06em;
display: none;
}
.model-stats.visible { display: block; }
</style>
{% endblock %}
{% block content %}
<div class="local-wrap">
<!-- Model Status Panel -->
<div class="card mc-panel">
<div class="card-header mc-panel-header">// LOCAL AI MODEL</div>
<div class="model-status">
<div class="model-status-row">
<span class="model-status-label">STATUS</span>
<span class="model-status-value offline" id="model-state">NOT LOADED</span>
</div>
<div class="model-status-row">
<span class="model-status-label">BACKEND</span>
<span class="model-status-value" id="model-backend">DETECTING...</span>
</div>
<div class="model-status-row">
<span class="model-status-label">INFERENCE</span>
<span class="model-status-value" id="inference-mode">--</span>
</div>
</div>
<!-- Model selector -->
<div class="model-select-wrap">
<select class="model-select" id="model-select" aria-label="Select model"></select>
</div>
<!-- Progress bar -->
<div class="progress-wrap" id="progress-wrap">
<div class="progress-bar-outer">
<div class="progress-bar-inner" id="progress-bar"></div>
</div>
<div class="progress-text" id="progress-text"></div>
</div>
<!-- Actions -->
<div class="model-actions">
<button class="model-btn primary" id="btn-load" onclick="loadModel()">LOAD MODEL</button>
<button class="model-btn" id="btn-unload" onclick="unloadModel()" disabled>UNLOAD</button>
</div>
<!-- Stats -->
<div class="model-stats" id="model-stats"></div>
</div>
<!-- Chat -->
<div class="card mc-panel local-chat-wrap">
<div class="card-header mc-panel-header">
// TIMMY <span class="backend-badge local" id="chat-backend-badge" style="display:none">LOCAL</span>
</div>
<div class="local-chat-log" id="local-chat">
<div class="local-msg system">
<div class="meta">SYSTEM</div>
<div class="bubble">
Load a model above to chat with Timmy locally on your device.
No server connection required.
{% if browser_model_fallback %}
Server fallback is enabled — if the local model fails, Timmy
will try the server instead.
{% endif %}
</div>
</div>
</div>
<form onsubmit="sendLocalMessage(event)" class="local-chat-input">
<input type="text"
id="local-message"
placeholder="Message Timmy..."
required
autocomplete="off"
autocapitalize="none"
autocorrect="off"
spellcheck="false"
enterkeyhint="send" />
<button type="submit" id="btn-send" disabled>SEND</button>
</form>
</div>
</div>
<script src="/static/local_llm.js"></script>
<script>
// ── State ──────────────────────────────────────────────────────────────────
let llm = null;
const serverFallback = {{ browser_model_fallback | tojson }};
const defaultModelId = {{ browser_model_id | tojson }};
// ── DOM refs ───────────────────────────────────────────────────────────────
const elState = document.getElementById('model-state');
const elBackend = document.getElementById('model-backend');
const elInference = document.getElementById('inference-mode');
const elSelect = document.getElementById('model-select');
const elProgress = document.getElementById('progress-wrap');
const elBar = document.getElementById('progress-bar');
const elProgressTx = document.getElementById('progress-text');
const elBtnLoad = document.getElementById('btn-load');
const elBtnUnload = document.getElementById('btn-unload');
const elBtnSend = document.getElementById('btn-send');
const elChat = document.getElementById('local-chat');
const elInput = document.getElementById('local-message');
const elBadge = document.getElementById('chat-backend-badge');
const elStats = document.getElementById('model-stats');
// ── Populate model selector ────────────────────────────────────────────────
(function populateModels() {
const catalogue = window.LOCAL_MODEL_CATALOGUE || [];
catalogue.forEach(function(m) {
const opt = document.createElement('option');
opt.value = m.id;
opt.textContent = m.label + ' (' + m.sizeHint + ')';
if (m.id === defaultModelId) opt.selected = true;
elSelect.appendChild(opt);
});
})();
// ── Detect capabilities ────────────────────────────────────────────────────
(function detectCaps() {
const supported = LocalLLM.isSupported();
const hasGPU = typeof navigator !== 'undefined' && 'gpu' in navigator;
elBackend.textContent = hasGPU ? 'WebGPU' : supported ? 'WASM' : 'UNSUPPORTED';
if (!supported) {
elBackend.classList.add('error');
elBtnLoad.disabled = true;
addSystemMessage('Your browser does not support WebGPU or WebAssembly. Update to iOS 26+ / Safari 26+ for local AI.');
}
})();
// ── Load model ─────────────────────────────────────────────────────────────
async function loadModel() {
if (llm && llm.ready) {
await unloadModel();
}
const modelId = elSelect.value;
elBtnLoad.disabled = true;
elBtnUnload.disabled = true;
elBtnSend.disabled = true;
elProgress.classList.add('active');
setState('loading', 'DOWNLOADING...');
llm = new LocalLLM({
modelId: modelId,
onProgress: function(report) {
if (report.progress !== undefined) {
const pct = Math.round(report.progress * 100);
elBar.style.width = pct + '%';
elProgressTx.textContent = report.text || (pct + '%');
} else if (report.text) {
elProgressTx.textContent = report.text;
}
},
onReady: function() {
setState('ready', 'READY');
elProgress.classList.remove('active');
elBtnLoad.disabled = false;
elBtnUnload.disabled = false;
elBtnSend.disabled = false;
elBadge.style.display = '';
elBadge.className = 'backend-badge local';
elBadge.textContent = 'LOCAL';
elInference.textContent = 'ON-DEVICE';
elInput.focus();
addSystemMessage('Model loaded. Timmy is running locally on your device — fully offline, fully sovereign.');
updateStats();
},
onError: function(err) {
setState('error', 'FAILED');
elProgress.classList.remove('active');
elBtnLoad.disabled = false;
addSystemMessage('Failed to load model: ' + err.message);
if (serverFallback) {
addSystemMessage('Server fallback enabled. Chat will use the server instead.');
elBtnSend.disabled = false;
elBadge.style.display = '';
elBadge.className = 'backend-badge server';
elBadge.textContent = 'SERVER';
elInference.textContent = 'SERVER';
}
},
});
try {
await llm.init();
} catch (e) {
// Error handled by onError callback
}
}
// ── Unload model ───────────────────────────────────────────────────────────
async function unloadModel() {
if (llm) {
await llm.unload();
llm = null;
}
setState('offline', 'NOT LOADED');
elBtnUnload.disabled = true;
elBtnSend.disabled = true;
elBadge.style.display = 'none';
elInference.textContent = '--';
elStats.classList.remove('visible');
}
// ── Send message ───────────────────────────────────────────────────────────
async function sendLocalMessage(event) {
event.preventDefault();
const message = elInput.value.trim();
if (!message) return;
addMessage('user', 'YOU', message);
elInput.value = '';
elBtnSend.disabled = true;
// Try local model first
if (llm && llm.ready) {
try {
const replyBubble = addMessage('timmy', 'TIMMY (LOCAL)', '');
let fullText = '';
await llm.chat(message, {
onToken: function(delta, accumulated) {
fullText = accumulated;
replyBubble.textContent = fullText;
elChat.scrollTop = elChat.scrollHeight;
}
});
if (!fullText) {
replyBubble.textContent = await llm.chat(message);
}
elBtnSend.disabled = false;
updateStats();
return;
} catch (err) {
addSystemMessage('Local inference failed: ' + err.message);
if (!serverFallback) {
elBtnSend.disabled = false;
return;
}
}
}
// Server fallback
if (serverFallback) {
try {
const response = await fetch('/agents/timmy/chat', {
method: 'POST',
headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
body: 'message=' + encodeURIComponent(message)
});
const html = await response.text();
const parser = new DOMParser();
const doc = parser.parseFromString(html, 'text/html');
const timmyResponse = doc.querySelector('.chat-message.timmy, .msg-body');
const text = timmyResponse ? timmyResponse.textContent.trim() : 'Response received.';
addMessage('timmy', 'TIMMY (SERVER)', text);
} catch (e) {
addMessage('timmy', 'TIMMY', 'Sorry, both local and server inference failed. Check your connection.');
}
} else {
addMessage('system', 'SYSTEM', 'Load a model to start chatting.');
}
elBtnSend.disabled = false;
}
// ── Helpers ────────────────────────────────────────────────────────────────
function setState(cls, text) {
elState.className = 'model-status-value ' + cls;
elState.textContent = text;
}
function addMessage(type, label, text) {
const div = document.createElement('div');
div.className = 'local-msg ' + type;
const meta = document.createElement('div');
meta.className = 'meta';
meta.textContent = label;
const bubble = document.createElement('div');
bubble.className = 'bubble';
bubble.textContent = text;
div.appendChild(meta);
div.appendChild(bubble);
elChat.appendChild(div);
elChat.scrollTop = elChat.scrollHeight;
return bubble;
}
function addSystemMessage(text) {
addMessage('system', 'SYSTEM', text);
}
async function updateStats() {
if (!llm) return;
try {
const stats = await llm.getStats();
if (stats) {
elStats.textContent = stats;
elStats.classList.add('visible');
}
} catch (e) {
// Stats are optional
}
}
</script>
{% endblock %}

271
static/local_llm.js Normal file
View File

@@ -0,0 +1,271 @@
/**
* local_llm.js — In-browser LLM inference via WebLLM.
*
* Loads a small language model directly into the browser using WebGPU
* (or WASM fallback) so Timmy can run on an iPhone with zero server
* dependency. Falls back to server-side Ollama when the local model
* is unavailable.
*
* Usage:
* const llm = new LocalLLM({ modelId, onProgress, onReady, onError });
* await llm.init();
* const reply = await llm.chat("Hello Timmy");
*/
/* global webllm */
// ── Model catalogue ────────────────────────────────────────────────────────
// Models tested on iPhone 15 Pro / Safari 26+. Sorted smallest → largest.
const MODEL_CATALOGUE = [
{
id: "SmolLM2-360M-Instruct-q4f16_1-MLC",
label: "SmolLM2 360M (fast)",
sizeHint: "~200 MB",
description: "Fastest option. Good for simple Q&A.",
},
{
id: "Qwen2.5-0.5B-Instruct-q4f16_1-MLC",
label: "Qwen 2.5 0.5B (balanced)",
sizeHint: "~350 MB",
description: "Best quality under 500 MB.",
},
{
id: "SmolLM2-1.7B-Instruct-q4f16_1-MLC",
label: "SmolLM2 1.7B (smart)",
sizeHint: "~1 GB",
description: "Highest quality. Needs more memory.",
},
{
id: "Llama-3.2-1B-Instruct-q4f16_1-MLC",
label: "Llama 3.2 1B",
sizeHint: "~700 MB",
description: "Meta's compact model. Good all-rounder.",
},
];
// ── Capability detection ──────────────────────────────────────────────────
function detectWebGPU() {
return typeof navigator !== "undefined" && "gpu" in navigator;
}
function detectWASM() {
try {
return typeof WebAssembly === "object" && typeof WebAssembly.instantiate === "function";
} catch {
return false;
}
}
// ── LocalLLM class ────────────────────────────────────────────────────────
class LocalLLM {
/**
* @param {object} opts
* @param {string} opts.modelId — WebLLM model ID
* @param {function} opts.onProgress — (report) progress during download
* @param {function} opts.onReady — () called when model is loaded
* @param {function} opts.onError — (error) called on fatal error
* @param {string} opts.systemPrompt — system message for the model
*/
constructor(opts = {}) {
this.modelId = opts.modelId || "SmolLM2-360M-Instruct-q4f16_1-MLC";
this.onProgress = opts.onProgress || (() => {});
this.onReady = opts.onReady || (() => {});
this.onError = opts.onError || (() => {});
this.systemPrompt =
opts.systemPrompt ||
"You are Timmy, a sovereign AI assistant. You are helpful, concise, and loyal. " +
"Address the user as 'Sir' when appropriate. Keep responses brief on mobile.";
this.engine = null;
this.ready = false;
this.loading = false;
this._hasWebGPU = detectWebGPU();
this._hasWASM = detectWASM();
}
/** Check if local inference is possible on this device. */
static isSupported() {
return detectWebGPU() || detectWASM();
}
/** Return the model catalogue for UI rendering. */
static getCatalogue() {
return MODEL_CATALOGUE;
}
/** Return runtime capability info. */
getCapabilities() {
return {
webgpu: this._hasWebGPU,
wasm: this._hasWASM,
supported: this._hasWebGPU || this._hasWASM,
backend: this._hasWebGPU ? "WebGPU" : this._hasWASM ? "WASM" : "none",
};
}
/**
* Initialize the engine and download/cache the model.
* Model weights are cached in the browser's Cache API so subsequent
* loads are nearly instant.
*/
async init() {
if (this.ready) return;
if (this.loading) return;
if (!this._hasWebGPU && !this._hasWASM) {
const err = new Error(
"Neither WebGPU nor WebAssembly is available. " +
"Update to iOS 26+ / Safari 26+ for WebGPU support."
);
this.onError(err);
throw err;
}
this.loading = true;
try {
// Dynamic import of WebLLM from CDN (avoids bundling)
if (typeof webllm === "undefined") {
await this._loadWebLLMScript();
}
const initProgressCallback = (report) => {
this.onProgress(report);
};
this.engine = await webllm.CreateMLCEngine(this.modelId, {
initProgressCallback,
});
this.ready = true;
this.loading = false;
this.onReady();
} catch (err) {
this.loading = false;
this.ready = false;
this.onError(err);
throw err;
}
}
/**
* Send a chat message and get a response.
* @param {string} userMessage
* @param {object} opts
* @param {function} opts.onToken — streaming callback (delta)
* @returns {Promise<string>} full response text
*/
async chat(userMessage, opts = {}) {
if (!this.ready) {
throw new Error("Model not loaded. Call init() first.");
}
const messages = [
{ role: "system", content: this.systemPrompt },
{ role: "user", content: userMessage },
];
if (opts.onToken) {
// Streaming mode
let fullText = "";
const chunks = await this.engine.chat.completions.create({
messages,
stream: true,
temperature: 0.7,
max_tokens: 512,
});
for await (const chunk of chunks) {
const delta = chunk.choices[0]?.delta?.content || "";
fullText += delta;
opts.onToken(delta, fullText);
}
return fullText;
}
// Non-streaming mode
const response = await this.engine.chat.completions.create({
messages,
temperature: 0.7,
max_tokens: 512,
});
return response.choices[0]?.message?.content || "";
}
/** Reset conversation context. */
async resetChat() {
if (this.engine) {
await this.engine.resetChat();
}
}
/** Unload the model and free memory. */
async unload() {
if (this.engine) {
await this.engine.unload();
this.engine = null;
this.ready = false;
}
}
/** Get current engine stats (tokens/sec, memory, etc). */
async getStats() {
if (!this.engine) return null;
try {
const stats = await this.engine.runtimeStatsText();
return stats;
} catch {
return null;
}
}
// ── Private ─────────────────────────────────────────────────────────────
/** Load the WebLLM script from CDN. */
_loadWebLLMScript() {
return new Promise((resolve, reject) => {
// Check if already loaded
if (typeof webllm !== "undefined") {
resolve();
return;
}
const script = document.createElement("script");
script.src =
"https://esm.run/@anthropic-ai/sdk" !== script.src
? "https://esm.run/@anthropic-ai/sdk"
: "";
// Use the WebLLM CDN bundle
script.type = "module";
script.textContent = `
import * as webllmModule from "https://esm.run/@mlc-ai/web-llm";
window.webllm = webllmModule;
window.dispatchEvent(new Event("webllm-loaded"));
`;
document.head.appendChild(script);
const onLoaded = () => {
window.removeEventListener("webllm-loaded", onLoaded);
resolve();
};
window.addEventListener("webllm-loaded", onLoaded);
// Fallback: also try the UMD bundle approach
const fallbackScript = document.createElement("script");
fallbackScript.src = "https://cdn.jsdelivr.net/npm/@mlc-ai/web-llm@0.2.80/lib/index.min.js";
fallbackScript.onload = () => {
if (typeof webllm !== "undefined") {
resolve();
}
};
fallbackScript.onerror = () => {
reject(new Error("Failed to load WebLLM library from CDN."));
};
document.head.appendChild(fallbackScript);
});
}
}
// Export for use in templates
window.LocalLLM = LocalLLM;
window.LOCAL_MODEL_CATALOGUE = MODEL_CATALOGUE;

View File

@@ -0,0 +1,246 @@
"""Tests for the local browser model feature — /mobile/local endpoint.
Categories:
L1xx Route & API responses
L2xx Config settings
L3xx Template content & UX
L4xx JavaScript asset
L5xx Security (XSS prevention)
"""
import re
from pathlib import Path
# ── helpers ──────────────────────────────────────────────────────────────────
def _local_html(client) -> str:
return client.get("/mobile/local").text
def _local_llm_js() -> str:
js_path = Path(__file__).parent.parent.parent / "static" / "local_llm.js"
return js_path.read_text()
# ── L1xx — Route & API responses ─────────────────────────────────────────────
def test_L101_mobile_local_route_returns_200(client):
"""The /mobile/local endpoint should return 200 OK."""
response = client.get("/mobile/local")
assert response.status_code == 200
def test_L102_local_models_config_endpoint(client):
"""The /mobile/local-models API should return model config JSON."""
response = client.get("/mobile/local-models")
assert response.status_code == 200
data = response.json()
assert "enabled" in data
assert "default_model" in data
assert "fallback_to_server" in data
assert "server_model" in data
def test_L103_mobile_status_includes_browser_model(client):
"""The /mobile/status endpoint should include browser model info."""
response = client.get("/mobile/status")
assert response.status_code == 200
data = response.json()
assert "browser_model_enabled" in data
assert "browser_model_id" in data
def test_L104_local_models_config_default_values(client):
"""Config defaults should match what's in config.py."""
data = client.get("/mobile/local-models").json()
assert data["enabled"] is True
assert "SmolLM2" in data["default_model"] or "MLC" in data["default_model"]
assert data["fallback_to_server"] is True
# ── L2xx — Config settings ───────────────────────────────────────────────────
def test_L201_config_has_browser_model_enabled():
"""config.py should define browser_model_enabled."""
from config import settings
assert hasattr(settings, "browser_model_enabled")
assert isinstance(settings.browser_model_enabled, bool)
def test_L202_config_has_browser_model_id():
"""config.py should define browser_model_id."""
from config import settings
assert hasattr(settings, "browser_model_id")
assert isinstance(settings.browser_model_id, str)
assert len(settings.browser_model_id) > 0
def test_L203_config_has_browser_model_fallback():
"""config.py should define browser_model_fallback."""
from config import settings
assert hasattr(settings, "browser_model_fallback")
assert isinstance(settings.browser_model_fallback, bool)
# ── L3xx — Template content & UX ────────────────────────────────────────────
def test_L301_template_includes_local_llm_script(client):
"""mobile_local.html must include the local_llm.js script."""
html = _local_html(client)
assert "local_llm.js" in html
def test_L302_template_has_model_selector(client):
"""Template must have a model selector element."""
html = _local_html(client)
assert 'id="model-select"' in html
def test_L303_template_has_load_button(client):
"""Template must have a load model button."""
html = _local_html(client)
assert 'id="btn-load"' in html
def test_L304_template_has_progress_bar(client):
"""Template must have a progress bar for model download."""
html = _local_html(client)
assert 'id="progress-bar"' in html
def test_L305_template_has_chat_area(client):
"""Template must have a chat log area."""
html = _local_html(client)
assert 'id="local-chat"' in html
def test_L306_template_has_message_input(client):
"""Template must have a message input field."""
html = _local_html(client)
assert 'id="local-message"' in html
def test_L307_input_font_size_16px(client):
"""Input font-size must be 16px to prevent iOS zoom."""
html = _local_html(client)
assert "font-size: 16px" in html
def test_L308_input_has_ios_attributes(client):
"""Input should have autocapitalize, autocorrect, spellcheck, enterkeyhint."""
html = _local_html(client)
assert 'autocapitalize="none"' in html
assert 'autocorrect="off"' in html
assert 'spellcheck="false"' in html
assert 'enterkeyhint="send"' in html
def test_L309_touch_targets_44px(client):
"""Buttons and inputs must meet 44px min-height (Apple HIG)."""
html = _local_html(client)
assert "min-height: 44px" in html
def test_L310_safe_area_inset_bottom(client):
"""Chat input must account for iPhone home indicator."""
html = _local_html(client)
assert "safe-area-inset-bottom" in html
def test_L311_template_has_backend_badge(client):
"""Template should show LOCAL or SERVER badge."""
html = _local_html(client)
assert "backend-badge" in html
assert "LOCAL" in html
# ── L4xx — JavaScript asset ──────────────────────────────────────────────────
def test_L401_local_llm_js_exists():
"""static/local_llm.js must exist."""
js_path = Path(__file__).parent.parent.parent / "static" / "local_llm.js"
assert js_path.exists(), "static/local_llm.js not found"
def test_L402_local_llm_js_defines_class():
"""local_llm.js must define the LocalLLM class."""
js = _local_llm_js()
assert "class LocalLLM" in js
def test_L403_local_llm_js_has_model_catalogue():
"""local_llm.js must define a MODEL_CATALOGUE."""
js = _local_llm_js()
assert "MODEL_CATALOGUE" in js
def test_L404_local_llm_js_has_webgpu_detection():
"""local_llm.js must detect WebGPU capability."""
js = _local_llm_js()
assert "detectWebGPU" in js or "navigator.gpu" in js
def test_L405_local_llm_js_has_chat_method():
"""local_llm.js LocalLLM class must have a chat method."""
js = _local_llm_js()
assert "async chat(" in js
def test_L406_local_llm_js_has_init_method():
"""local_llm.js LocalLLM class must have an init method."""
js = _local_llm_js()
assert "async init(" in js
def test_L407_local_llm_js_has_unload_method():
"""local_llm.js LocalLLM class must have an unload method."""
js = _local_llm_js()
assert "async unload(" in js
def test_L408_local_llm_js_exports_to_window():
"""local_llm.js must export LocalLLM and catalogue to window."""
js = _local_llm_js()
assert "window.LocalLLM" in js
assert "window.LOCAL_MODEL_CATALOGUE" in js
def test_L409_local_llm_js_has_streaming_support():
"""local_llm.js chat method must support streaming via onToken."""
js = _local_llm_js()
assert "onToken" in js
assert "stream: true" in js
def test_L410_local_llm_js_has_isSupported_static():
"""LocalLLM must have a static isSupported() method."""
js = _local_llm_js()
assert "static isSupported()" in js
# ── L5xx — Security ─────────────────────────────────────────────────────────
def test_L501_no_innerhtml_with_user_input(client):
"""Template must not use innerHTML with user-controlled data."""
html = _local_html(client)
# Check for dangerous patterns: innerHTML += `${message}` etc.
blocks = re.findall(r"innerHTML\s*\+=?\s*`([^`]*)`", html, re.DOTALL)
for block in blocks:
assert "${message}" not in block, (
"innerHTML template literal contains ${message} — XSS vulnerability"
)
def test_L502_uses_textcontent_for_messages(client):
"""Template must use textContent (not innerHTML) for user messages."""
html = _local_html(client)
assert "textContent" in html
def test_L503_no_eval_or_function_constructor():
"""local_llm.js must not use eval() or new Function()."""
js = _local_llm_js()
# Allow "evaluate" and "functionality" but not standalone eval(
assert "eval(" not in js or "evaluate" in js
assert "new Function(" not in js