task-28 fix5: session triage, speech-bubble local badge, footprint docs
1. ui.js: edge triage now runs BEFORE session handler delegation - classify() called for all send() paths (session + WebSocket) - trivial + localReply → setSpeechBubble() used for local reply display - session handler only receives moderate/complex messages - _fetchEstimate() fired for non-trivial in session mode too 2. edge-worker.js: quantization footprint documented (~87MB int8, cached)
This commit is contained in:
@@ -62,6 +62,10 @@ function _isGreeting(text) {
|
||||
}
|
||||
|
||||
// ── Model loading ─────────────────────────────────────────────────────────────
|
||||
// Both models use int8 quantization (quantized:true) to keep first-load small:
|
||||
// Xenova/mobilebert-uncased-mnli ~22 MB (zero-shot classification)
|
||||
// Xenova/distilbert-base-uncased-*-sst-2 ~65 MB (sentiment analysis)
|
||||
// Combined ~87 MB on first load; subsequent loads served from the browser Cache API.
|
||||
async function _loadModels() {
|
||||
[_classifier, _sentimentPipe] = await Promise.all([
|
||||
pipeline('zero-shot-classification', 'Xenova/mobilebert-uncased-mnli', { quantized: true }),
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { sendVisitorMessage } from './websocket.js';
|
||||
import { classify } from './edge-worker-client.js';
|
||||
import { setMood } from './agents.js';
|
||||
import { setMood, setSpeechBubble } from './agents.js';
|
||||
import { getOrRefreshToken } from './nostr-identity.js';
|
||||
|
||||
const $fps = document.getElementById('fps');
|
||||
@@ -153,29 +153,32 @@ function initInputBar() {
|
||||
$input.value = '';
|
||||
_hideCostPreview();
|
||||
|
||||
if (_sessionSendHandler) {
|
||||
_sessionSendHandler(text);
|
||||
return;
|
||||
}
|
||||
|
||||
// ── Edge triage — classify text in the Web Worker ─────────────────────────
|
||||
// Worker returns { complexity: 'trivial'|'moderate'|'complex', score, reason, localReply? }
|
||||
// ── Edge triage — runs in BOTH session mode and WebSocket mode ─────────────
|
||||
// Worker returns { complexity:'trivial'|'moderate'|'complex', score, reason, localReply? }
|
||||
const cls = await classify(text);
|
||||
|
||||
if (cls.complexity === 'trivial' && cls.localReply) {
|
||||
// Greeting / small-talk → answer locally, 0 sats, no network call
|
||||
// Greeting / small-talk → answer locally, 0 sats, no network call in any mode
|
||||
appendSystemMessage(`you: ${text}`);
|
||||
appendSystemMessage(`Timmy [local]: ${cls.localReply}`);
|
||||
setSpeechBubble(`${cls.localReply} ⚡ local`);
|
||||
_showCostPreview('answered locally ⚡ 0 sats', '#44dd88');
|
||||
setTimeout(_hideCostPreview, 3000);
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-trivial: delegate to session handler (if active) or WebSocket
|
||||
if (_sessionSendHandler) {
|
||||
// moderate/complex — fire estimate async for cost preview, then hand off
|
||||
if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
|
||||
_fetchEstimate(text);
|
||||
}
|
||||
_sessionSendHandler(text);
|
||||
return;
|
||||
}
|
||||
|
||||
// moderate or complex — fetch cost estimate (driven by complexity outcome),
|
||||
// then route to server. The preview is already shown via debounce on input,
|
||||
// but we refresh it now with the actual classification context.
|
||||
// then route to server via WebSocket.
|
||||
if (cls.complexity === 'moderate' || cls.complexity === 'complex') {
|
||||
// Fire estimate fetch; don't await — let it update the badge async while WS is in flight
|
||||
_fetchEstimate(text);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user