Files
the-nexus/edge-intelligence.js
Alexander Whitestone c1e4a848a8 feat: edge intelligence — browser model + silent Nostr signing
Implements M3 Sovereignty Layer features from issue #15:

- edge-intelligence.js: Local-first LLM inference pipeline
  - Tries WebLLM (SmolLM2-360M via WebGPU) first for near-zero latency
  - Falls back to Transformers.js (LaMini-Flan-T5-77M, CPU/WASM)
  - Falls back to Ollama backend; never blocks on missing services
  - Lazy activation via HUD button so models only load on user demand

- nostr-identity.js: Silent Nostr signing without extension popup
  - Generates a keypair on first visit, persists to localStorage
  - Signs NIP-01 events locally (no window.nostr / extension needed)
  - Supports importKey() for existing identities and rotateKey()
  - Optional delegation to NIP-07 extension via useExtension(true)

- app.js: Integrates both modules
  - Chat pipeline: edge model → Ollama → local fallback responses
  - Animated "thinking" indicator while inference runs
  - Nostr npub displayed in HUD on init

- index.html + style.css: Edge AI status badge + Nostr identity badge
  in the HUD, with loading/ready/fallback states

Fixes #15

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 18:39:08 -04:00

146 lines
4.9 KiB
JavaScript

// ═══════════════════════════════════════════
// EDGE INTELLIGENCE — Browser-side LLM
// ═══════════════════════════════════════════
// Inference priority:
// 1. WebLLM (WebGPU) — SmolLM2-360M, near-zero latency once loaded
// 2. Transformers.js (WASM/CPU) — LaMini-Flan-T5-77M, broader device support
// 3. Signals caller to use Ollama backend
// All modes are local-first; no server round-trip for simple queries.
const WEBLLM_MODEL = 'SmolLM2-360M-Instruct-q0f16-MLC';
const TRANSFORMERS_MODEL = 'Xenova/LaMini-Flan-T5-77M';
const SYSTEM_PROMPT =
'You are Timmy, a sovereign lizard-wizard AI in The Nexus. ' +
'Be concise, witty, and helpful. Keep replies under 2 sentences.';
// ─── State ─────────────────────────────────
let _engine = null;
let _mode = 'uninitialized'; // 'webllm' | 'transformers' | 'ollama' | 'uninitialized'
let _statusCb = null;
let _isLoading = false;
let _isReady = false;
// ─── Public API ────────────────────────────
/**
* Begin loading the best available edge model.
* @param {(state: string, text: string) => void} statusCallback
*/
async function init(statusCallback) {
if (_isLoading || _isReady) return;
_isLoading = true;
_statusCb = statusCallback ?? (() => {});
_setStatus('loading', 'Probing edge capabilities…');
if (await _tryWebLLM()) { _isLoading = false; _isReady = true; return; }
if (await _tryTransformers()) { _isLoading = false; _isReady = true; return; }
// Both failed — signal Ollama fallback
_mode = 'ollama';
_isLoading = false;
_setStatus('fallback', 'Edge AI: Ollama backend');
}
/**
* Run inference on a user message.
* Returns a string if handled locally, null if caller should use Ollama.
* @param {string} userMessage
* @returns {Promise<string|null>}
*/
async function query(userMessage) {
if (!_isReady) return null;
try {
if (_mode === 'webllm' && _engine) {
const result = await _engine.chat.completions.create({
messages: [
{ role: 'system', content: SYSTEM_PROMPT },
{ role: 'user', content: userMessage },
],
max_tokens: 120,
temperature: 0.7,
stream: false,
});
return result.choices[0].message.content.trim();
}
if (_mode === 'transformers' && _engine) {
const out = await _engine(userMessage, {
max_new_tokens: 80,
do_sample: true,
temperature: 0.7,
});
const raw = out[0]?.generated_text ?? '';
// Strip the input echo that some seq2seq models return
return raw.replace(userMessage, '').trim() || raw.trim();
}
} catch (err) {
console.warn('[EdgeAI] Inference error:', err);
}
return null; // caller should fall back to Ollama
}
/** True once a model is loaded and ready. */
function isReady() { return _isReady; }
/** Current inference mode string. */
function getMode() { return _mode; }
// ─── Private helpers ───────────────────────
async function _tryWebLLM() {
if (!navigator.gpu) return false;
try {
_setStatus('loading', 'Initializing WebLLM (WebGPU)…');
const { CreateMLCEngine } = await import('https://esm.run/@mlc-ai/web-llm');
_engine = await CreateMLCEngine(WEBLLM_MODEL, {
initProgressCallback: (p) => {
const pct = Math.round((p.progress ?? 0) * 100);
_setStatus('loading', `Loading SmolLM2: ${pct}%`);
},
});
_mode = 'webllm';
_setStatus('ready', 'Edge AI: WebGPU ⚡');
return true;
} catch (err) {
console.warn('[EdgeAI] WebLLM unavailable:', err.message ?? err);
return false;
}
}
async function _tryTransformers() {
try {
_setStatus('loading', 'Initializing edge model (CPU)…');
const { pipeline, env } = await import(
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2'
);
// Use local cache; allow remote model hub
env.allowLocalModels = false;
env.allowRemoteModels = true;
_engine = await pipeline('text2text-generation', TRANSFORMERS_MODEL, {
progress_callback: (info) => {
if (info.status === 'downloading') {
const pct = info.total ? Math.round((info.loaded / info.total) * 100) : 0;
_setStatus('loading', `Downloading model: ${pct}%`);
}
},
});
_mode = 'transformers';
_setStatus('ready', 'Edge AI: CPU ◈');
return true;
} catch (err) {
console.warn('[EdgeAI] Transformers.js unavailable:', err.message ?? err);
return false;
}
}
function _setStatus(state, text) {
_statusCb(state, text);
}
export const EdgeIntelligence = { init, query, isReady, getMode };