Some checks failed
CI / Typecheck & Lint (pull_request) Failing after 0s
1. ui.js: edge triage now runs BEFORE session handler delegation - classify() called for all send() paths (session + WebSocket) - trivial + localReply → setSpeechBubble() used for local reply display - session handler only receives moderate/complex messages - _fetchEstimate() fired for non-trivial in session mode too 2. edge-worker.js: quantization footprint documented (~87MB int8, cached)
161 lines
7.4 KiB
JavaScript
161 lines
7.4 KiB
JavaScript
/**
|
|
* edge-worker.js — Web Worker entry point for browser-side AI triage.
|
|
*
|
|
* Runs in a dedicated Web Worker thread via new Worker(url, {type:'module'}).
|
|
* Receives messages: { id, type: 'classify'|'sentiment', text }
|
|
* Replies with: { id, result }
|
|
*
|
|
* Models: Xenova/mobilebert-uncased-mnli (zero-shot classification)
|
|
* Xenova/distilbert-base-uncased-finetuned-sst-2-english (sentiment)
|
|
*
|
|
* Lifecycle events (no id):
|
|
* { type: 'ready' } — both models loaded and warm
|
|
* { type: 'error', message } — fatal model-load failure
|
|
*
|
|
* Model caching:
|
|
* @xenova/transformers v2 caches model weights in the browser's Cache API
|
|
* (via fetch() → opaque cache). After the first load (~80 MB combined),
|
|
* subsequent page loads serve models from the cache without network round-trips.
|
|
* We configure useBrowserCache: true (the default) and disable the filesystem
|
|
* backend so only the browser cache is used. The existing service worker at
|
|
* sw.js uses a cache-first strategy that extends coverage to these assets.
|
|
*/
|
|
|
|
import { pipeline, env } from '@xenova/transformers';
|
|
|
|
// ── Transformers.js caching config ───────────────────────────────────────────
|
|
// Use browser Cache API for model weights (default behaviour, made explicit).
|
|
// Disable Node.js filesystem path so it falls back to browser cache only.
|
|
env.useBrowserCache = true; // cache model weights via browser Cache API
|
|
env.allowLocalModels = false; // no filesystem — browser-only environment
|
|
|
|
// Classification labels → complexity tier mapping
|
|
// trivial — handled locally, no server call, no sats
|
|
// moderate — real request but may be free-tier; show cost preview before send
|
|
// complex — substantive work; always priced; show cost preview before send
|
|
const TRIVIAL_LABELS = ['greeting', 'small-talk'];
|
|
const MODERATE_LABELS = ['simple-question'];
|
|
const COMPLEX_LABELS = ['technical-task', 'creative-work', 'complex-question', 'code-request'];
|
|
const ALL_LABELS = [...TRIVIAL_LABELS, ...MODERATE_LABELS, ...COMPLEX_LABELS];
|
|
|
|
const TRIVIAL_THRESHOLD = 0.55; // minimum score to call trivial "trivial"
|
|
const MODERATE_THRESHOLD = 0.40; // below this → upgrade to complex (model is uncertain)
|
|
|
|
const LOCAL_REPLIES = [
|
|
"Greetings, traveller! Ask me something arcane and I shall conjure wisdom from the ether.",
|
|
"Ah, a visitor! I sense curious energies about you. What wisdom do you seek?",
|
|
"*adjusts hat* Hello there! The crystal ball is warm and ready.",
|
|
"Well met! Timmy Tower is open for business. What shall we conjure today?",
|
|
"Hail! The generosity pool glimmers. What brings you to my tower?",
|
|
];
|
|
|
|
function _randomReply() {
|
|
return LOCAL_REPLIES[Math.floor(Math.random() * LOCAL_REPLIES.length)];
|
|
}
|
|
|
|
let _classifier = null;
|
|
let _sentimentPipe = null;
|
|
|
|
// ── Fast greeting heuristic ───────────────────────────────────────────────────
|
|
function _isGreeting(text) {
|
|
return /^(hi|hey|hello|howdy|greetings|yo|sup|hiya|what'?s up)[!?.,]?\s*$/i.test(text.trim());
|
|
}
|
|
|
|
// ── Model loading ─────────────────────────────────────────────────────────────
|
|
// Both models use int8 quantization (quantized:true) to keep first-load small:
|
|
// Xenova/mobilebert-uncased-mnli ~22 MB (zero-shot classification)
|
|
// Xenova/distilbert-base-uncased-*-sst-2 ~65 MB (sentiment analysis)
|
|
// Combined ~87 MB on first load; subsequent loads served from the browser Cache API.
|
|
async function _loadModels() {
|
|
[_classifier, _sentimentPipe] = await Promise.all([
|
|
pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true }),
|
|
pipeline('sentiment-analysis', 'Xenova/distilbert-base-uncased-finetuned-sst-2-english', { quantized: true }),
|
|
]);
|
|
}
|
|
|
|
// ── Handlers ──────────────────────────────────────────────────────────────────
|
|
|
|
/**
|
|
* _classify — returns the complexity tier of the input text.
|
|
*
|
|
* Returns:
|
|
* {
|
|
* complexity: 'trivial' | 'moderate' | 'complex',
|
|
* score: number, // top-label confidence
|
|
* reason: string, // winning label name or heuristic name
|
|
* localReply?: string, // only present when complexity === 'trivial'
|
|
* }
|
|
*
|
|
* Complexity tiers:
|
|
* trivial — greeting/small-talk with high confidence; answered locally, 0 sats
|
|
* moderate — simple question or uncertain classification; show cost preview, route to server
|
|
* complex — technical / creative / code work; always priced, show cost preview
|
|
*/
|
|
async function _classify(text) {
|
|
const trimmed = text.trim();
|
|
|
|
// Fast path: single-word/short greetings are trivially local
|
|
if (_isGreeting(trimmed)) {
|
|
return { complexity: 'trivial', score: 0.99, reason: 'greeting-heuristic', localReply: _randomReply() };
|
|
}
|
|
|
|
// If model hasn't loaded yet, default to moderate (show estimate, let server decide)
|
|
if (!_classifier) {
|
|
return { complexity: 'moderate', score: 0, reason: 'model-unavailable' };
|
|
}
|
|
|
|
try {
|
|
const result = await _classifier(trimmed, ALL_LABELS, { multi_label: false });
|
|
const topLabel = result.labels[0];
|
|
const topScore = result.scores[0];
|
|
|
|
if (TRIVIAL_LABELS.includes(topLabel) && topScore >= TRIVIAL_THRESHOLD) {
|
|
// High-confidence trivial: small-talk or greeting → answer locally
|
|
return { complexity: 'trivial', score: topScore, reason: topLabel, localReply: _randomReply() };
|
|
}
|
|
|
|
if (COMPLEX_LABELS.includes(topLabel) || topScore < MODERATE_THRESHOLD) {
|
|
// Explicitly complex label, or model is uncertain (score too low to trust) → complex
|
|
return { complexity: 'complex', score: topScore, reason: topLabel };
|
|
}
|
|
|
|
// Middle ground: simple-question or trivial label with moderate confidence → moderate
|
|
return { complexity: 'moderate', score: topScore, reason: topLabel };
|
|
|
|
} catch (err) {
|
|
return { complexity: 'moderate', score: 0, reason: 'classify-error', error: String(err) };
|
|
}
|
|
}
|
|
|
|
async function _sentiment(text) {
|
|
if (!_sentimentPipe) return { label: 'NEUTRAL', score: 0.5 };
|
|
|
|
try {
|
|
const [result] = await _sentimentPipe(text.trim());
|
|
const { label, score } = result;
|
|
if (Math.abs(score - 0.5) < 0.15) return { label: 'NEUTRAL', score };
|
|
return { label: label.toUpperCase(), score };
|
|
} catch {
|
|
return { label: 'NEUTRAL', score: 0.5 };
|
|
}
|
|
}
|
|
|
|
// ── Message dispatch ──────────────────────────────────────────────────────────
|
|
|
|
self.addEventListener('message', async ({ data }) => {
|
|
const { id, type, text } = data ?? {};
|
|
|
|
if (type === 'classify') {
|
|
const result = await _classify(text ?? '');
|
|
self.postMessage({ id, result });
|
|
} else if (type === 'sentiment') {
|
|
const result = await _sentiment(text ?? '');
|
|
self.postMessage({ id, result });
|
|
}
|
|
});
|
|
|
|
// ── Boot: load models, then signal ready ─────────────────────────────────────
|
|
_loadModels()
|
|
.then(() => { self.postMessage({ type: 'ready' }); })
|
|
.catch(err => { self.postMessage({ type: 'error', message: String(err) }); });
|