Implements M3 Sovereignty Layer features from issue #15: - edge-intelligence.js: Local-first LLM inference pipeline - Tries WebLLM (SmolLM2-360M via WebGPU) first for near-zero latency - Falls back to Transformers.js (LaMini-Flan-T5-77M, CPU/WASM) - Falls back to Ollama backend; never blocks on missing services - Lazy activation via HUD button so models only load on user demand - nostr-identity.js: Silent Nostr signing without extension popup - Generates a keypair on first visit, persists to localStorage - Signs NIP-01 events locally (no window.nostr / extension needed) - Supports importKey() for existing identities and rotateKey() - Optional delegation to NIP-07 extension via useExtension(true) - app.js: Integrates both modules - Chat pipeline: edge model → Ollama → local fallback responses - Animated "thinking" indicator while inference runs - Nostr npub displayed in HUD on init - index.html + style.css: Edge AI status badge + Nostr identity badge in the HUD, with loading/ready/fallback states Fixes #15 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
146 lines
4.9 KiB
JavaScript
146 lines
4.9 KiB
JavaScript
// ═══════════════════════════════════════════
|
|
// EDGE INTELLIGENCE — Browser-side LLM
|
|
// ═══════════════════════════════════════════
|
|
// Inference priority:
|
|
// 1. WebLLM (WebGPU) — SmolLM2-360M, near-zero latency once loaded
|
|
// 2. Transformers.js (WASM/CPU) — LaMini-Flan-T5-77M, broader device support
|
|
// 3. Signals caller to use Ollama backend
|
|
// All modes are local-first; no server round-trip for simple queries.
|
|
|
|
const WEBLLM_MODEL = 'SmolLM2-360M-Instruct-q0f16-MLC';
|
|
const TRANSFORMERS_MODEL = 'Xenova/LaMini-Flan-T5-77M';
|
|
|
|
const SYSTEM_PROMPT =
|
|
'You are Timmy, a sovereign lizard-wizard AI in The Nexus. ' +
|
|
'Be concise, witty, and helpful. Keep replies under 2 sentences.';
|
|
|
|
// ─── State ─────────────────────────────────
|
|
let _engine = null;
|
|
let _mode = 'uninitialized'; // 'webllm' | 'transformers' | 'ollama' | 'uninitialized'
|
|
let _statusCb = null;
|
|
let _isLoading = false;
|
|
let _isReady = false;
|
|
|
|
// ─── Public API ────────────────────────────
|
|
|
|
/**
|
|
* Begin loading the best available edge model.
|
|
* @param {(state: string, text: string) => void} statusCallback
|
|
*/
|
|
async function init(statusCallback) {
|
|
if (_isLoading || _isReady) return;
|
|
_isLoading = true;
|
|
_statusCb = statusCallback ?? (() => {});
|
|
|
|
_setStatus('loading', 'Probing edge capabilities…');
|
|
|
|
if (await _tryWebLLM()) { _isLoading = false; _isReady = true; return; }
|
|
if (await _tryTransformers()) { _isLoading = false; _isReady = true; return; }
|
|
|
|
// Both failed — signal Ollama fallback
|
|
_mode = 'ollama';
|
|
_isLoading = false;
|
|
_setStatus('fallback', 'Edge AI: Ollama backend');
|
|
}
|
|
|
|
/**
|
|
* Run inference on a user message.
|
|
* Returns a string if handled locally, null if caller should use Ollama.
|
|
* @param {string} userMessage
|
|
* @returns {Promise<string|null>}
|
|
*/
|
|
async function query(userMessage) {
|
|
if (!_isReady) return null;
|
|
|
|
try {
|
|
if (_mode === 'webllm' && _engine) {
|
|
const result = await _engine.chat.completions.create({
|
|
messages: [
|
|
{ role: 'system', content: SYSTEM_PROMPT },
|
|
{ role: 'user', content: userMessage },
|
|
],
|
|
max_tokens: 120,
|
|
temperature: 0.7,
|
|
stream: false,
|
|
});
|
|
return result.choices[0].message.content.trim();
|
|
}
|
|
|
|
if (_mode === 'transformers' && _engine) {
|
|
const out = await _engine(userMessage, {
|
|
max_new_tokens: 80,
|
|
do_sample: true,
|
|
temperature: 0.7,
|
|
});
|
|
const raw = out[0]?.generated_text ?? '';
|
|
// Strip the input echo that some seq2seq models return
|
|
return raw.replace(userMessage, '').trim() || raw.trim();
|
|
}
|
|
} catch (err) {
|
|
console.warn('[EdgeAI] Inference error:', err);
|
|
}
|
|
|
|
return null; // caller should fall back to Ollama
|
|
}
|
|
|
|
/** True once a model is loaded and ready. */
|
|
function isReady() { return _isReady; }
|
|
|
|
/** Current inference mode string. */
|
|
function getMode() { return _mode; }
|
|
|
|
// ─── Private helpers ───────────────────────
|
|
|
|
async function _tryWebLLM() {
|
|
if (!navigator.gpu) return false;
|
|
try {
|
|
_setStatus('loading', 'Initializing WebLLM (WebGPU)…');
|
|
const { CreateMLCEngine } = await import('https://esm.run/@mlc-ai/web-llm');
|
|
_engine = await CreateMLCEngine(WEBLLM_MODEL, {
|
|
initProgressCallback: (p) => {
|
|
const pct = Math.round((p.progress ?? 0) * 100);
|
|
_setStatus('loading', `Loading SmolLM2: ${pct}%`);
|
|
},
|
|
});
|
|
_mode = 'webllm';
|
|
_setStatus('ready', 'Edge AI: WebGPU ⚡');
|
|
return true;
|
|
} catch (err) {
|
|
console.warn('[EdgeAI] WebLLM unavailable:', err.message ?? err);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function _tryTransformers() {
|
|
try {
|
|
_setStatus('loading', 'Initializing edge model (CPU)…');
|
|
const { pipeline, env } = await import(
|
|
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2'
|
|
);
|
|
// Use local cache; allow remote model hub
|
|
env.allowLocalModels = false;
|
|
env.allowRemoteModels = true;
|
|
|
|
_engine = await pipeline('text2text-generation', TRANSFORMERS_MODEL, {
|
|
progress_callback: (info) => {
|
|
if (info.status === 'downloading') {
|
|
const pct = info.total ? Math.round((info.loaded / info.total) * 100) : 0;
|
|
_setStatus('loading', `Downloading model: ${pct}%`);
|
|
}
|
|
},
|
|
});
|
|
_mode = 'transformers';
|
|
_setStatus('ready', 'Edge AI: CPU ◈');
|
|
return true;
|
|
} catch (err) {
|
|
console.warn('[EdgeAI] Transformers.js unavailable:', err.message ?? err);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
function _setStatus(state, text) {
|
|
_statusCb(state, text);
|
|
}
|
|
|
|
export const EdgeIntelligence = { init, query, isReady, getMode };
|