144 lines
6.1 KiB
HTML
144 lines
6.1 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
|
|
<title>TurboQuant WASM Inference</title>
|
|
<style>
|
|
*{box-sizing:border-box}body{font-family:monospace;max-width:800px;margin:0 auto;padding:20px;background:#0d1117;color:#c9d1d9}
|
|
h1{color:#58a6ff}h2{color:#8b949e;border-bottom:1px solid #21262d;padding-bottom:8px}
|
|
.card{background:#161b22;border:1px solid #30363d;border-radius:6px;padding:16px;margin:12px 0}
|
|
button{background:#238636;color:#fff;border:none;padding:8px 16px;border-radius:6px;cursor:pointer;font-family:monospace;margin:4px}
|
|
button:hover{background:#2ea043}button:disabled{background:#21262d;color:#484f58;cursor:not-allowed}
|
|
input,textarea,select{background:#0d1117;color:#c9d1d9;border:1px solid #30363d;border-radius:6px;padding:8px;font-family:monospace;width:100%}
|
|
#output{min-height:100px;white-space:pre-wrap}#metrics{display:grid;grid-template-columns:1fr 1fr 1fr;gap:8px}
|
|
.metric{text-align:center}.metric .val{font-size:24px;color:#58a6ff}.metric .label{font-size:12px;color:#8b949e}
|
|
.status{padding:4px 8px;border-radius:4px;font-size:12px}.ok{background:#238636}.err{background:#da3633}.pending{background:#d29922}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<h1>TurboQuant WASM Inference</h1>
|
|
<p>Run quantized models directly in the browser via WebAssembly. No server. No data leaves your machine.</p>
|
|
|
|
<div class="card">
|
|
<h2>1. Initialize</h2>
|
|
<button id="btnInit" onclick="initModule()">Initialize WASM Module</button>
|
|
<span id="initStatus" class="status">not started</span>
|
|
</div>
|
|
|
|
<div class="card">
|
|
<h2>2. Load Model</h2>
|
|
<input type="file" id="modelFile" accept=".bin,.gguf,.tq" disabled>
|
|
<button id="btnLoad" onclick="loadModel()" disabled>Load Model</button>
|
|
<span id="loadStatus" class="status">waiting</span>
|
|
</div>
|
|
|
|
<div class="card">
|
|
<h2>3. Inference</h2>
|
|
<textarea id="prompt" rows="3" placeholder="Enter prompt..." disabled>Hello, I am</textarea>
|
|
<div style="display:flex;gap:8px;margin:8px 0">
|
|
<select id="temp" disabled><option value="0">Greedy (temp=0)</option><option value="0.2">Focused (0.2)</option><option value="0.7" selected>Balanced (0.7)</option><option value="1.0">Creative (1.0)</option></select>
|
|
<input type="number" id="maxTok" value="64" min="1" max="512" style="width:80px" disabled>
|
|
<button id="btnGen" onclick="doGenerate()" disabled>Generate</button>
|
|
<button id="btnBench" onclick="doBenchmark()" disabled>Benchmark</button>
|
|
</div>
|
|
<div id="output" class="card"></div>
|
|
</div>
|
|
|
|
<div class="card">
|
|
<h2>Metrics</h2>
|
|
<div id="metrics"><div class="metric"><div class="val" id="mLoad">--</div><div class="label">Load (ms)</div></div><div class="metric"><div class="val" id="mLat">--</div><div class="label">Latency (tok/s)</div></div><div class="metric"><div class="val" id="mMem">--</div><div class="label">Memory (MB)</div></div></div>
|
|
</div>
|
|
|
|
<div class="card">
|
|
<h2>Viability Assessment</h2>
|
|
<pre id="assessment">
|
|
Waiting for benchmark results...
|
|
|
|
Target models:
|
|
Falcon-H1-Tiny-90M: ~45MB quantized (Q2_K)
|
|
Bonsai-1.7B: ~400MB quantized (Q4_0)
|
|
|
|
Browser limits:
|
|
Chrome: 4GB WASM memory
|
|
Firefox: 2GB WASM memory
|
|
Safari: 1GB WASM memory (may fail on 1.7B)
|
|
</pre>
|
|
</div>
|
|
|
|
<script>
|
|
let worker = null;
|
|
let loadStart = 0;
|
|
|
|
function initModule() {
|
|
document.getElementById('initStatus').className = 'status pending';
|
|
document.getElementById('initStatus').textContent = 'loading...';
|
|
worker = new Worker('inference-worker.js');
|
|
worker.onmessage = handleMsg;
|
|
worker.postMessage({cmd: 'init'});
|
|
}
|
|
|
|
function handleMsg(e) {
|
|
const m = e.data;
|
|
if (m.event === 'ready') {
|
|
document.getElementById('initStatus').className = 'status ok';
|
|
document.getElementById('initStatus').textContent = 'ready';
|
|
document.getElementById('modelFile').disabled = false;
|
|
document.getElementById('btnLoad').disabled = false;
|
|
}
|
|
else if (m.event === 'loaded') {
|
|
const ms = m.ms.toFixed(0);
|
|
document.getElementById('loadStatus').className = m.ok ? 'status ok' : 'status err';
|
|
document.getElementById('loadStatus').textContent = m.ok ? 'loaded (' + ms + 'ms)' : 'FAILED';
|
|
document.getElementById('mLoad').textContent = ms;
|
|
if (m.ok) enableInference();
|
|
}
|
|
else if (m.event === 'generated') {
|
|
document.getElementById('output').textContent += m.text;
|
|
document.getElementById('mLat').textContent = m.tokensPerSec;
|
|
if (performance.memory) {
|
|
document.getElementById('mMem').textContent = (performance.memory.usedJSHeapSize / 1e6).toFixed(0);
|
|
}
|
|
}
|
|
else if (m.event === 'benchmark') {
|
|
const ms = m.msPerToken.toFixed(1);
|
|
const tps = (1000 / m.msPerToken).toFixed(1);
|
|
document.getElementById('assessment').textContent =
|
|
'BENCHMARK RESULTS (' + m.runs + ' runs)\n' +
|
|
' Latency: ' + ms + ' ms/token (' + tps + ' tok/s)\n' +
|
|
' Load time: ' + document.getElementById('mLoad').textContent + ' ms\n' +
|
|
' Memory: ' + document.getElementById('mMem').textContent + ' MB\n\n' +
|
|
'VIABILITY: ' + (m.msPerToken < 100 ? 'VIABLE' : 'BORDERLINE — consider smaller model') + '\n' +
|
|
(m.msPerToken < 100 ? 'Ready for the-door service worker integration.' : 'Try Falcon-H1-Tiny-90M for faster inference.');
|
|
}
|
|
else if (m.event === 'error') {
|
|
document.getElementById('output').textContent = 'ERROR: ' + m.msg;
|
|
}
|
|
}
|
|
|
|
function loadModel() {
|
|
const f = document.getElementById('modelFile').files[0];
|
|
if (!f) return;
|
|
document.getElementById('loadStatus').className = 'status pending';
|
|
document.getElementById('loadStatus').textContent = 'reading ' + (f.size/1e6).toFixed(1) + 'MB...';
|
|
const r = new FileReader();
|
|
r.onload = () => worker.postMessage({cmd: 'load', data: r.result}, [r.result]);
|
|
r.readAsArrayBuffer(f);
|
|
}
|
|
|
|
function enableInference() {
|
|
['prompt','temp','maxTok','btnGen','btnBench'].forEach(id => document.getElementById(id).disabled = false);
|
|
}
|
|
|
|
function doGenerate() {
|
|
document.getElementById('output').textContent = '';
|
|
worker.postMessage({cmd: 'generate', prompt: document.getElementById('prompt').value,
|
|
maxTokens: parseInt(document.getElementById('maxTok').value),
|
|
temperature: parseFloat(document.getElementById('temp').value)});
|
|
}
|
|
|
|
function doBenchmark() {
|
|
worker.postMessage({cmd: 'benchmark', runs: 100});
|
|
}
|
|
</script>
|
|
</body>
|
|
</html> |