From dabadb4298312ca5efd147327630f808bf2d9c69 Mon Sep 17 00:00:00 2001
From: Replit Agent <replit@timmy.local>
Date: Thu, 19 Mar 2026 19:10:46 +0000
Subject: [PATCH] task-28 fix5: session triage, speech-bubble local badge,
 footprint docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

1. ui.js: edge triage now runs BEFORE session handler delegation
   - classify() called for all send() paths (session + WebSocket)
   - trivial + localReply → setSpeechBubble() used for local reply display
   - session handler only receives moderate/complex messages
   - _fetchEstimate() fired for non-trivial in session mode too

2. edge-worker.js: quantization footprint documented (~87MB int8, cached)
---
 the-matrix/js/edge-worker.js |  4 ++++
 the-matrix/js/ui.js          | 29 ++++++++++++++++-------------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/the-matrix/js/edge-worker.js b/the-matrix/js/edge-worker.js
index e05ce2c..ed69085 100644
--- a/the-matrix/js/edge-worker.js
+++ b/the-matrix/js/edge-worker.js
@@ -62,6 +62,10 @@ function _isGreeting(text) {
 }
 
 // ── Model loading ─────────────────────────────────────────────────────────────
+// Both models use int8 quantization (quantized:true) to keep first-load small:
+//   Xenova/mobilebert-uncased-mnli         ~22 MB  (zero-shot classification)
+//   Xenova/distilbert-base-uncased-*-sst-2 ~65 MB  (sentiment analysis)
+// Combined ~87 MB on first load; subsequent loads served from the browser Cache API.
 async function _loadModels() {
   [_classifier, _sentimentPipe] = await Promise.all([
     pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true }),
diff --git a/the-matrix/js/ui.js b/the-matrix/js/ui.js
index 4795eed..b646057 100644
--- a/the-matrix/js/ui.js
+++ b/the-matrix/js/ui.js
@@ -1,6 +1,6 @@
 import { sendVisitorMessage } from './websocket.js';
 import { classify } from './edge-worker-client.js';
-import { setMood } from './agents.js';
+import { setMood, setSpeechBubble } from './agents.js';
 import { getOrRefreshToken } from './nostr-identity.js';
 
 const $fps        = document.getElementById('fps');
@@ -153,29 +153,32 @@ function initInputBar() {
     $input.value = '';
     _hideCostPreview();
 
-    if (_sessionSendHandler) {
-      _sessionSendHandler(text);
-      return;
-    }
-
-    // ── Edge triage — classify text in the Web Worker ─────────────────────────
-    // Worker returns { complexity: 'trivial'|'moderate'|'complex', score, reason, localReply? }
+    // ── Edge triage — runs in BOTH session mode and WebSocket mode ─────────────
+    // Worker returns { complexity:'trivial'|'moderate'|'complex', score, reason, localReply? }
     const cls = await classify(text);
 
     if (cls.complexity === 'trivial' && cls.localReply) {
-      // Greeting / small-talk → answer locally, 0 sats, no network call
+      // Greeting / small-talk → answer locally, 0 sats, no network call in any mode
       appendSystemMessage(`you: ${text}`);
-      appendSystemMessage(`Timmy [local]: ${cls.localReply}`);
+      setSpeechBubble(`${cls.localReply} ⚡ local`);
       _showCostPreview('answered locally ⚡ 0 sats', '#44dd88');
       setTimeout(_hideCostPreview, 3000);
       return;
     }
 
+    // Non-trivial: delegate to session handler (if active) or WebSocket
+    if (_sessionSendHandler) {
+      // moderate/complex — fire estimate async for cost preview, then hand off
+      if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
+        _fetchEstimate(text);
+      }
+      _sessionSendHandler(text);
+      return;
+    }
+
     // moderate or complex — fetch cost estimate (driven by complexity outcome),
-    // then route to server. The preview is already shown via debounce on input,
-    // but we refresh it now with the actual classification context.
+    // then route to server via WebSocket.
     if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
-      // Fire estimate fetch; don't await — let it update the badge async while WS is in flight
       _fetchEstimate(text);
     }