task-28 fix5: session triage, speech-bubble local badge, footprint docs

1. ui.js: edge triage now runs BEFORE session handler delegation
   - classify() called for all send() paths (session + WebSocket)
   - trivial + localReply → setSpeechBubble() used for local reply display
   - session handler only receives moderate/complex messages
   - _fetchEstimate() fired for non-trivial in session mode too

2. edge-worker.js: quantization footprint documented (~87MB int8, cached)
This commit is contained in:
Replit Agent
2026-03-19 19:10:46 +00:00
parent 8897371815
commit dabadb4298
2 changed files with 20 additions and 13 deletions

View File

@@ -62,6 +62,10 @@ function _isGreeting(text) {
}
// ── Model loading ─────────────────────────────────────────────────────────────
// Both models use int8 quantization (quantized:true) to keep first-load small:
// Xenova/mobilebert-uncased-mnli ~22 MB (zero-shot classification)
// Xenova/distilbert-base-uncased-*-sst-2 ~65 MB (sentiment analysis)
// Combined ~87 MB on first load; subsequent loads served from the browser Cache API.
async function _loadModels() {
[_classifier, _sentimentPipe] = await Promise.all([
pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true }),

View File

@@ -1,6 +1,6 @@
import { sendVisitorMessage } from './websocket.js';
import { classify } from './edge-worker-client.js';
import { setMood } from './agents.js';
import { setMood, setSpeechBubble } from './agents.js';
import { getOrRefreshToken } from './nostr-identity.js';
const $fps = document.getElementById('fps');
@@ -153,29 +153,32 @@ function initInputBar() {
$input.value = '';
_hideCostPreview();
if (_sessionSendHandler) {
_sessionSendHandler(text);
return;
}
// ── Edge triage — classify text in the Web Worker ─────────────────────────
// Worker returns { complexity: 'trivial'|'moderate'|'complex', score, reason, localReply? }
// ── Edge triage — runs in BOTH session mode and WebSocket mode ─────────────
// Worker returns { complexity:'trivial'|'moderate'|'complex', score, reason, localReply? }
const cls = await classify(text);
if (cls.complexity === 'trivial' && cls.localReply) {
// Greeting / small-talk → answer locally, 0 sats, no network call
// Greeting / small-talk → answer locally, 0 sats, no network call in any mode
appendSystemMessage(`you: ${text}`);
appendSystemMessage(`Timmy [local]: ${cls.localReply}`);
setSpeechBubble(`${cls.localReply} ⚡ local`);
_showCostPreview('answered locally ⚡ 0 sats', '#44dd88');
setTimeout(_hideCostPreview, 3000);
return;
}
// Non-trivial: delegate to session handler (if active) or WebSocket
if (_sessionSendHandler) {
// moderate/complex — fire estimate async for cost preview, then hand off
if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
_fetchEstimate(text);
}
_sessionSendHandler(text);
return;
}
// moderate or complex — fetch cost estimate (driven by complexity outcome),
// then route to server. The preview is already shown via debounce on input,
// but we refresh it now with the actual classification context.
// then route to server via WebSocket.
if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
// Fire estimate fetch; don't await — let it update the badge async while WS is in flight
_fetchEstimate(text);
}