From dabadb4298312ca5efd147327630f808bf2d9c69 Mon Sep 17 00:00:00 2001 From: Replit Agent Date: Thu, 19 Mar 2026 19:10:46 +0000 Subject: [PATCH] task-28 fix5: session triage, speech-bubble local badge, footprint docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. ui.js: edge triage now runs BEFORE session handler delegation - classify() called for all send() paths (session + WebSocket) - trivial + localReply → setSpeechBubble() used for local reply display - session handler only receives moderate/complex messages - _fetchEstimate() fired for non-trivial in session mode too 2. edge-worker.js: quantization footprint documented (~87MB int8, cached) --- the-matrix/js/edge-worker.js | 4 ++++ the-matrix/js/ui.js | 29 ++++++++++++++++------------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/the-matrix/js/edge-worker.js b/the-matrix/js/edge-worker.js index e05ce2c..ed69085 100644 --- a/the-matrix/js/edge-worker.js +++ b/the-matrix/js/edge-worker.js @@ -62,6 +62,10 @@ function _isGreeting(text) { } // ── Model loading ───────────────────────────────────────────────────────────── +// Both models use int8 quantization (quantized:true) to keep first-load small: +// Xenova/mobilebert-uncased-mnli ~22 MB (zero-shot classification) +// Xenova/distilbert-base-uncased-*-sst-2 ~65 MB (sentiment analysis) +// Combined ~87 MB on first load; subsequent loads served from the browser Cache API. async function _loadModels() { [_classifier, _sentimentPipe] = await Promise.all([ pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true }), diff --git a/the-matrix/js/ui.js b/the-matrix/js/ui.js index 4795eed..b646057 100644 --- a/the-matrix/js/ui.js +++ b/the-matrix/js/ui.js @@ -1,6 +1,6 @@ import { sendVisitorMessage } from './websocket.js'; import { classify } from './edge-worker-client.js'; -import { setMood } from './agents.js'; +import { setMood, setSpeechBubble } from './agents.js'; import { getOrRefreshToken } from './nostr-identity.js'; const $fps = document.getElementById('fps'); @@ -153,29 +153,32 @@ function initInputBar() { $input.value = ''; _hideCostPreview(); - if (_sessionSendHandler) { - _sessionSendHandler(text); - return; - } - - // ── Edge triage — classify text in the Web Worker ───────────────────────── - // Worker returns { complexity: 'trivial'|'moderate'|'complex', score, reason, localReply? } + // ── Edge triage — runs in BOTH session mode and WebSocket mode ───────────── + // Worker returns { complexity:'trivial'|'moderate'|'complex', score, reason, localReply? } const cls = await classify(text); if (cls.complexity === 'trivial' && cls.localReply) { - // Greeting / small-talk → answer locally, 0 sats, no network call + // Greeting / small-talk → answer locally, 0 sats, no network call in any mode appendSystemMessage(`you: ${text}`); - appendSystemMessage(`Timmy [local]: ${cls.localReply}`); + setSpeechBubble(`${cls.localReply} ⚡ local`); _showCostPreview('answered locally ⚡ 0 sats', '#44dd88'); setTimeout(_hideCostPreview, 3000); return; } + // Non-trivial: delegate to session handler (if active) or WebSocket + if (_sessionSendHandler) { + // moderate/complex — fire estimate async for cost preview, then hand off + if (cls.complexity === 'moderate' || cls.complexity === 'complex') { + _fetchEstimate(text); + } + _sessionSendHandler(text); + return; + } + // moderate or complex — fetch cost estimate (driven by complexity outcome), - // then route to server. The preview is already shown via debounce on input, - // but we refresh it now with the actual classification context. + // then route to server via WebSocket. if (cls.complexity === 'moderate' || cls.complexity === 'complex') { - // Fire estimate fetch; don't await — let it update the badge async while WS is in flight _fetchEstimate(text); }