task-28 fix5: session triage, speech-bubble local badge, footprint docs

1. ui.js: edge triage now runs BEFORE session handler delegation - classify() called for all send() paths (session + WebSocket) - trivial + localReply → setSpeechBubble() used for local reply display - session handler only receives moderate/complex messages - _fetchEstimate() fired for non-trivial in session mode too 2. edge-worker.js: quantization footprint documented (~87MB int8, cached)
2026-03-19 19:10:46 +00:00
parent 8897371815
commit dabadb4298
2 changed files with 20 additions and 13 deletions
--- a/the-matrix/js/edge-worker.js
+++ b/the-matrix/js/edge-worker.js
@@ -62,6 +62,10 @@ function _isGreeting(text) {
 }

 // ── Model loading ─────────────────────────────────────────────────────────────
+// Both models use int8 quantization (quantized:true) to keep first-load small:
+//   Xenova/mobilebert-uncased-mnli         ~22 MB  (zero-shot classification)
+//   Xenova/distilbert-base-uncased-*-sst-2 ~65 MB  (sentiment analysis)
+// Combined ~87 MB on first load; subsequent loads served from the browser Cache API.
 async function _loadModels() {
  [_classifier, _sentimentPipe] = await Promise.all([
    pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true }),
--- a/the-matrix/js/ui.js
+++ b/the-matrix/js/ui.js
@@ -1,6 +1,6 @@
 import { sendVisitorMessage } from './websocket.js';
 import { classify } from './edge-worker-client.js';
-import { setMood } from './agents.js';
+import { setMood, setSpeechBubble } from './agents.js';
 import { getOrRefreshToken } from './nostr-identity.js';

 const $fps        = document.getElementById('fps');
@@ -153,29 +153,32 @@ function initInputBar() {
    $input.value = '';
    _hideCostPreview();

-    if (_sessionSendHandler) {
-      _sessionSendHandler(text);
-      return;
-    }
-
-    // ── Edge triage — classify text in the Web Worker ─────────────────────────
-    // Worker returns { complexity: 'trivial'|'moderate'|'complex', score, reason, localReply? }
+    // ── Edge triage — runs in BOTH session mode and WebSocket mode ─────────────
+    // Worker returns { complexity:'trivial'|'moderate'|'complex', score, reason, localReply? }
    const cls = await classify(text);

    if (cls.complexity === 'trivial' && cls.localReply) {
-      // Greeting / small-talk → answer locally, 0 sats, no network call
+      // Greeting / small-talk → answer locally, 0 sats, no network call in any mode
      appendSystemMessage(`you: ${text}`);
-      appendSystemMessage(`Timmy [local]: ${cls.localReply}`);
+      setSpeechBubble(`${cls.localReply} ⚡ local`);
      _showCostPreview('answered locally ⚡ 0 sats', '#44dd88');
      setTimeout(_hideCostPreview, 3000);
      return;
    }

+    // Non-trivial: delegate to session handler (if active) or WebSocket
+    if (_sessionSendHandler) {
+      // moderate/complex — fire estimate async for cost preview, then hand off
+      if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
+        _fetchEstimate(text);
+      }
+      _sessionSendHandler(text);
+      return;
+    }
+
    // moderate or complex — fetch cost estimate (driven by complexity outcome),
-    // then route to server. The preview is already shown via debounce on input,
-    // but we refresh it now with the actual classification context.
+    // then route to server via WebSocket.
    if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
-      // Fire estimate fetch; don't await — let it update the badge async while WS is in flight
      _fetchEstimate(text);
    }