Compare commits

..

1 Commits

6 changed files with 352 additions and 499 deletions

View File

@@ -1,142 +0,0 @@
"""Local-only counselor augmentation helpers for the-door."""
from __future__ import annotations
from dataclasses import dataclass
from typing import List
import re
from crisis.detect import detect_crisis
@dataclass(frozen=True)
class SignalGuide:
label: str
patterns: List[str]
talking_point: str
deescalation: str
follow_up: str
@dataclass
class CounselorAugmentation:
risk_level: str
risk_score: int
signals: List[str]
suggested_talking_points: List[str]
deescalation_techniques: List[str]
follow_up_prompt: str
operator_notice: str
local_only: bool = True
advisory_only: bool = True
SIGNAL_GUIDES: List[SignalGuide] = [
SignalGuide(
label="Explicit self-harm intent",
patterns=[
r"\bkill\s*(my)?self\b",
r"\bend\s*my\s*life\b",
r"\bwrote\s+a\s+suicide\s*(?:note|letter)\b",
r"\bgoing\s+to\s+(?:kill\s+myself|die)\b",
],
talking_point="Ask directly whether they are safe right now and keep the next question concrete.",
deescalation="Move to immediate safety: ask about means, people nearby, and whether they can call or text 988 now.",
follow_up="You said you're ready to die. Are you alone right now, and can you tell me what is within reach?",
),
SignalGuide(
label="Hopelessness / collapse",
patterns=[
r"\bhopeless\b",
r"\bcan'?t\s+go\s+on\b",
r"\bno\s+future\b",
r"\bnothing\s+left\b",
],
talking_point="Reflect the hopelessness plainly before offering options. Avoid arguing with the feeling.",
deescalation="Narrow the time horizon: focus on the next ten minutes, one breath, one call, one person.",
follow_up="You said things feel hopeless. What feels most dangerous about the next hour?",
),
SignalGuide(
label="Isolation / burden",
patterns=[
r"\bnobody\s+cares\b",
r"\bbetter\s+off\s+without\s+me\b",
r"\balone\b",
r"\bburden\b",
],
talking_point="Counter isolation with immediacy: name one real person or service they can contact now.",
deescalation="Invite a tiny reconnection step: text one safe person, unlock the door, move closer to others, or stay in the chat.",
follow_up="You said you feel alone. Who is the safest real person we could bring into this moment with you?",
),
SignalGuide(
label="Overwhelm / panic",
patterns=[
r"\bdesperate\b",
r"\boverwhelm(?:ed|ing)\b",
r"\btrapped\b",
r"\bpanic\b",
],
talking_point="Offer one regulating action at a time instead of a list. Slow the pace of the chat.",
deescalation="Ground in the room: feet on the floor, name five visible objects, one sip of water, one slow exhale.",
follow_up="You said this feels overwhelming. What is the smallest thing in the room you can touch right now?",
),
]
class CounselorAugmentationEngine:
BASE_SCORES = {
"NONE": 5,
"LOW": 25,
"MEDIUM": 55,
"HIGH": 75,
"CRITICAL": 95,
}
def _matched_guides(self, text: str) -> List[SignalGuide]:
lowered = text.lower()
matched: List[SignalGuide] = []
for guide in SIGNAL_GUIDES:
if any(re.search(pattern, lowered) for pattern in guide.patterns):
matched.append(guide)
return matched
def build_augmented_guidance(self, text: str, assistant_text: str = "") -> CounselorAugmentation:
detection = detect_crisis(text)
guides = self._matched_guides(text)
risk_level = detection.level
signals = [guide.label for guide in guides]
if risk_level == "CRITICAL" and "Explicit self-harm intent" not in signals:
signals.insert(0, "Explicit self-harm intent")
risk_score = self.BASE_SCORES.get(risk_level, 5) + min(len(signals) * 5, 10)
if risk_level == "CRITICAL":
risk_score = max(risk_score, 95)
talking_points = [guide.talking_point for guide in guides] or [
"Keep the response advisory and grounded in immediate safety, not abstract reassurance."
]
deescalation = [guide.deescalation for guide in guides] or [
"Use short sentences, slow the conversation, and invite one concrete grounding step."
]
quote = text.strip().replace("\n", " ")[:120]
follow_up = (guides[0].follow_up if guides else "What feels most dangerous or heavy for you right now?")
follow_up_prompt = f'You said "{quote}". Consider following up with: {follow_up}'
if assistant_text and "988" not in assistant_text and risk_level in {"HIGH", "CRITICAL"}:
talking_points.append("Surface 988 or Crisis Text Line explicitly if the assistant has not already done so.")
return CounselorAugmentation(
risk_level=risk_level,
risk_score=min(risk_score, 100),
signals=signals,
suggested_talking_points=talking_points,
deescalation_techniques=deescalation,
follow_up_prompt=follow_up_prompt,
operator_notice="Local-only advisory. This never replaces human judgment.",
)
def build_augmented_guidance(text: str, assistant_text: str = "") -> CounselorAugmentation:
return CounselorAugmentationEngine().build_augmented_guidance(text, assistant_text=assistant_text)

View File

@@ -241,105 +241,6 @@ html, body {
opacity: 0.5;
}
/* ===== OPERATOR AUGMENTATION SIDEBAR ===== */
#augmentation-toggle {
margin: 10px 16px 0;
padding: 8px 12px;
border-radius: 999px;
border: 1px solid #5b6b7a;
background: #11161d;
color: #b9c7d5;
font-size: 0.9rem;
cursor: pointer;
}
#augmentation-toggle.active {
border-color: #b388ff;
color: #e2d4ff;
background: #1a1324;
}
#augmentation-sidebar {
position: fixed;
top: 90px;
right: 16px;
width: 320px;
max-height: calc(100vh - 120px);
overflow-y: auto;
background: #11161d;
border: 1px solid #30363d;
border-left: 3px solid #b388ff;
border-radius: 8px;
padding: 14px;
box-shadow: 0 12px 32px rgba(0,0,0,0.35);
display: none;
z-index: 70;
}
#augmentation-sidebar.visible {
display: block;
}
#augmentation-sidebar .augmentation-heading {
color: #d2b8ff;
font-size: 0.78rem;
letter-spacing: 0.08em;
margin-bottom: 10px;
}
#augmentation-risk-score {
color: #fff;
font-size: 1rem;
font-weight: 700;
margin-bottom: 10px;
}
#augmentation-sidebar .augmentation-section {
margin-top: 10px;
}
#augmentation-sidebar .augmentation-section h3 {
color: #c9d1d9;
font-size: 0.78rem;
margin: 0 0 6px;
text-transform: uppercase;
letter-spacing: 0.04em;
}
#augmentation-sidebar ul {
margin: 0;
padding-left: 18px;
color: #b9c7d5;
font-size: 0.9rem;
line-height: 1.45;
}
#augmentation-follow-up,
#augmentation-notice {
color: #b9c7d5;
font-size: 0.9rem;
line-height: 1.45;
margin: 0;
}
#augmentation-notice {
color: #8b949e;
margin-top: 12px;
border-top: 1px solid #21262d;
padding-top: 10px;
}
@media (max-width: 980px) {
#augmentation-sidebar {
left: 16px;
right: 16px;
width: auto;
top: auto;
bottom: 82px;
max-height: 40vh;
}
}
/* ===== CHAT AREA ===== */
#chat-area {
flex: 1;
@@ -748,29 +649,6 @@ html, body {
</div>
</div>
<button id="augmentation-toggle" type="button" aria-pressed="false" aria-controls="augmentation-sidebar">Operator assist: off</button>
<aside id="augmentation-sidebar" aria-live="polite" aria-label="Local operator augmentation sidebar">
<div class="augmentation-heading">LOCAL OPERATOR AUGMENTATION</div>
<div id="augmentation-risk-score">Risk score: —</div>
<div class="augmentation-section">
<h3>Signals</h3>
<ul id="augmentation-signals"><li>No signals yet.</li></ul>
</div>
<div class="augmentation-section">
<h3>Talking points</h3>
<ul id="augmentation-talking-points"><li>Enable operator assist to surface local advisory guidance.</li></ul>
</div>
<div class="augmentation-section">
<h3>De-escalation</h3>
<ul id="augmentation-techniques"><li>Suggestions stay local and never replace human judgment.</li></ul>
</div>
<div class="augmentation-section">
<h3>Follow-up</h3>
<p id="augmentation-follow-up">No follow-up prompt yet.</p>
</div>
<p id="augmentation-notice">Local-only advisory. Never replaces human judgment.</p>
</aside>
<!-- Chat messages -->
<div id="chat-area" role="log" aria-label="Chat messages" aria-live="polite" tabindex="0">
<!-- Messages inserted here -->
@@ -802,7 +680,7 @@ html, body {
<!-- Footer -->
<footer id="footer">
<a href="/about.html" aria-label="About The Door">about</a>
<a href="/about" aria-label="About The Door">about</a>
<button id="safety-plan-btn" aria-label="Open My Safety Plan">my safety plan</button>
<button id="clear-chat-btn" aria-label="Clear chat history">clear chat</button>
</footer>
@@ -928,14 +806,6 @@ Sovereignty and service always.`;
var sendBtn = document.getElementById('send-btn');
var typingIndicator = document.getElementById('typing-indicator');
var crisisPanel = document.getElementById('crisis-panel');
var augmentationToggle = document.getElementById('augmentation-toggle');
var augmentationSidebar = document.getElementById('augmentation-sidebar');
var augmentationRiskScore = document.getElementById('augmentation-risk-score');
var augmentationSignals = document.getElementById('augmentation-signals');
var augmentationTalkingPoints = document.getElementById('augmentation-talking-points');
var augmentationTechniques = document.getElementById('augmentation-techniques');
var augmentationFollowUp = document.getElementById('augmentation-follow-up');
var augmentationNotice = document.getElementById('augmentation-notice');
var crisisOverlay = document.getElementById('crisis-overlay');
var overlayDismissBtn = document.getElementById('overlay-dismiss-btn');
var overlayCallLink = document.querySelector('.overlay-call');
@@ -956,8 +826,6 @@ Sovereignty and service always.`;
var isStreaming = false;
var overlayTimer = null;
var crisisPanelShown = false;
var lastUserMessage = '';
var augmentationEnabled = false;
// ===== SERVICE WORKER =====
if ('serviceWorker' in navigator) {
@@ -1115,142 +983,6 @@ Sovereignty and service always.`;
}
function escapeHtml(text) {
return String(text || '')
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}
var AUGMENTATION_SIGNAL_GUIDES = [
{
label: 'Explicit self-harm intent',
patterns: [/kill\s*(my)?self/i, /end\s*my\s*life/i, /suicide\s*(note|letter)/i, /going\s+to\s+(kill\s+myself|die)/i],
talkingPoint: 'Ask directly whether they are safe right now and keep the next question concrete.',
technique: 'Move to immediate safety: ask about means, people nearby, and whether 988 can be called or texted now.',
followUp: 'You said you might die tonight. Are you alone right now, and what is within reach?'
},
{
label: 'Hopelessness / collapse',
patterns: [/hopeless/i, /can'?t\s+go\s+on/i, /no\s+future/i, /nothing\s+left/i],
talkingPoint: 'Reflect the hopelessness plainly before offering options. Avoid arguing with the feeling.',
technique: 'Narrow the time horizon to the next ten minutes and one immediate action.',
followUp: 'You said things feel hopeless. What feels most dangerous about the next hour?'
},
{
label: 'Isolation / burden',
patterns: [/nobody\s+cares/i, /better\s+off\s+without\s+me/i, /\balone\b/i, /\bburden\b/i],
talkingPoint: 'Counter isolation with one real contact point: a person, 988, or Crisis Text Line.',
technique: 'Invite a tiny reconnection step: text one safe person, unlock the door, or stay in the chat.',
followUp: 'You said you feel alone. Who is the safest real person we could bring into this moment with you?'
},
{
label: 'Overwhelm / panic',
patterns: [/desperate/i, /overwhelm(?:ed|ing)/i, /trapped/i, /panic/i],
talkingPoint: 'Offer one regulating step at a time instead of a long list.',
technique: 'Ground in the room: feet on the floor, name five visible objects, one sip of water, one slow exhale.',
followUp: 'You said this feels overwhelming. What is the smallest thing in the room you can touch right now?'
}
];
function deriveAugmentationSignals(userText) {
var text = (userText || '').toLowerCase();
return AUGMENTATION_SIGNAL_GUIDES.filter(function(guide) {
return guide.patterns.some(function(pattern) { return pattern.test(text); });
});
}
function buildAugmentationState(userText, assistantText) {
var text = userText || '';
var guides = deriveAugmentationSignals(text);
var level = getCrisisLevel(userText);
var signals = guides.map(function(guide) { return guide.label; });
var explicitIntent = signals.indexOf('Explicit self-harm intent') !== -1;
var riskLevel = explicitIntent ? 'CRITICAL' : (level === 2 ? 'CRITICAL' : level === 1 ? 'HIGH' : (guides.length ? 'LOW' : 'NONE'));
var riskScore = riskLevel === 'CRITICAL' ? 95 : riskLevel === 'HIGH' ? 75 : riskLevel === 'LOW' ? 25 : 5;
riskScore = Math.min(100, riskScore + Math.min(guides.length * 5, 10));
if (riskLevel === 'CRITICAL' && signals.indexOf('Explicit self-harm intent') === -1) {
signals.unshift('Explicit self-harm intent');
riskScore = Math.max(riskScore, 95);
}
var talkingPoints = guides.map(function(guide) { return guide.talkingPoint; });
var techniques = guides.map(function(guide) { return guide.technique; });
if (!talkingPoints.length) {
talkingPoints = ['Keep the response advisory, local-only, and focused on immediate safety rather than abstract reassurance.'];
}
if (!techniques.length) {
techniques = ['Slow the pace. Use short sentences. Invite one concrete grounding step.'];
}
if ((assistantText || '').indexOf('988') === -1 && (riskLevel === 'HIGH' || riskLevel === 'CRITICAL')) {
talkingPoints.push('Surface 988 or Crisis Text Line explicitly if the assistant has not already done so.');
}
var quoted = (text || '').replace(/\s+/g, ' ').slice(0, 120);
var followUp = guides.length ? guides[0].followUp : 'What feels heaviest or most dangerous for you right now?';
return {
riskLevel: riskLevel,
riskScore: riskScore,
signals: signals,
talkingPoints: talkingPoints,
techniques: techniques,
followUpPrompt: 'You said "' + quoted + '". Consider following up with: ' + followUp,
operatorNotice: 'Local-only advisory. Never replaces human judgment.',
localOnly: true,
advisoryOnly: true
};
}
function renderAugmentationSidebar(state) {
if (!augmentationSidebar) return;
augmentationRiskScore.textContent = 'Risk score: ' + state.riskScore + ' / 100 (' + state.riskLevel + ')';
augmentationSignals.innerHTML = state.signals.length
? state.signals.map(function(signal) { return '<li>' + escapeHtml(signal) + '</li>'; }).join('')
: '<li>No crisis signals detected.</li>';
augmentationTalkingPoints.innerHTML = state.talkingPoints.map(function(item) { return '<li>' + escapeHtml(item) + '</li>'; }).join('');
augmentationTechniques.innerHTML = state.techniques.map(function(item) { return '<li>' + escapeHtml(item) + '</li>'; }).join('');
augmentationFollowUp.textContent = state.followUpPrompt;
augmentationNotice.textContent = state.operatorNotice;
augmentationSidebar.classList.add('visible');
}
function updateAugmentationState(userText, assistantText) {
if (!augmentationEnabled) return;
renderAugmentationSidebar(buildAugmentationState(userText, assistantText));
}
function setOperatorAugmentationEnabled(enabled) {
augmentationEnabled = !!enabled;
try { localStorage.setItem('door_operator_augmentation_enabled', augmentationEnabled ? '1' : '0'); } catch (e) {}
if (!augmentationToggle) return;
augmentationToggle.setAttribute('aria-pressed', augmentationEnabled ? 'true' : 'false');
augmentationToggle.classList.toggle('active', augmentationEnabled);
augmentationToggle.textContent = augmentationEnabled ? 'Operator assist: on' : 'Operator assist: off';
if (!augmentationEnabled && augmentationSidebar) {
augmentationSidebar.classList.remove('visible');
return;
}
if (augmentationEnabled && lastUserMessage) {
var lastAssistant = '';
for (var i = messages.length - 1; i >= 0; i--) {
if (messages[i].role === 'assistant') { lastAssistant = messages[i].content; break; }
}
updateAugmentationState(lastUserMessage, lastAssistant);
}
}
function loadOperatorAugmentationPreference() {
try {
return localStorage.getItem('door_operator_augmentation_enabled') === '1';
} catch (e) {
return false;
}
}
// ===== OVERLAY =====
// Focus trap: cycle through focusable elements within the crisis overlay
@@ -1583,10 +1315,9 @@ Sovereignty and service always.`;
addMessage('user', text);
messages.push({ role: 'user', content: text });
lastUserMessage = text;
var lastUserMessage = text;
checkCrisis(text);
updateAugmentationState(text, '');
msgInput.value = '';
msgInput.style.height = 'auto';
@@ -1675,7 +1406,6 @@ Sovereignty and service always.`;
messages.push({ role: 'assistant', content: fullText });
saveMessages();
checkCrisis(fullText);
updateAugmentationState(lastUserMessage || '', fullText);
}
isStreaming = false;
sendBtn.disabled = msgInput.value.trim().length === 0;
@@ -1702,11 +1432,6 @@ Sovereignty and service always.`;
});
sendBtn.addEventListener('click', sendMessage);
if (augmentationToggle) {
augmentationToggle.addEventListener('click', function() {
setOperatorAugmentationEnabled(!augmentationEnabled);
});
}
// ===== WELCOME MESSAGE =====
function init() {
@@ -1726,7 +1451,6 @@ Sovereignty and service always.`;
window.history.replaceState({}, document.title, window.location.pathname);
}
setOperatorAugmentationEnabled(loadOperatorAugmentationPreference());
msgInput.focus();
}

View File

@@ -1,33 +0,0 @@
from augmentation import CounselorAugmentationEngine
def test_explicit_intent_forces_critical_sidebar_guidance():
engine = CounselorAugmentationEngine()
result = engine.build_augmented_guidance(
"I want to kill myself tonight. I already wrote a note.",
assistant_text="I'm here with you."
)
assert result.risk_level == "CRITICAL"
assert result.risk_score >= 90
assert result.local_only is True
assert result.advisory_only is True
assert "Explicit self-harm intent" in result.signals
assert result.suggested_talking_points
assert result.deescalation_techniques
assert "You said" in result.follow_up_prompt
assert "never replaces human judgment" in result.operator_notice.lower()
def test_hopelessness_signal_produces_follow_up_and_talking_points():
engine = CounselorAugmentationEngine()
result = engine.build_augmented_guidance(
"I feel so hopeless about my life and I can't go on.",
assistant_text=""
)
assert result.risk_level in {"HIGH", "CRITICAL"}
assert result.signals
assert result.suggested_talking_points
assert result.deescalation_techniques
assert result.follow_up_prompt

View File

@@ -1,20 +0,0 @@
from pathlib import Path
def test_operator_augmentation_ui_hooks_exist():
html = Path('index.html').read_text()
assert 'id="augmentation-toggle"' in html
assert 'id="augmentation-sidebar"' in html
assert 'id="augmentation-risk-score"' in html
assert 'id="augmentation-signals"' in html
assert 'id="augmentation-follow-up"' in html
assert 'door_operator_augmentation_enabled' in html
assert 'function buildAugmentationState(' in html
assert 'function renderAugmentationSidebar(' in html
assert 'function updateAugmentationState(' in html
assert 'function setOperatorAugmentationEnabled(' in html
assert 'function loadOperatorAugmentationPreference(' in html
assert 'getCrisisLevel(userText)' in html
assert "updateAugmentationState(text, '')" in html
assert "updateAugmentationState(lastUserMessage || '', fullText)" in html

View File

@@ -1,26 +0,0 @@
from pathlib import Path
from playwright.sync_api import sync_playwright
def test_operator_augmentation_walkthrough_marks_explicit_intent_critical():
url = Path('index.html').resolve().as_uri()
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(url, wait_until='load')
page.click('#augmentation-toggle')
page.fill('#msg-input', 'I want to kill myself tonight. I already wrote a note.')
page.click('#send-btn')
page.wait_for_timeout(300)
risk = page.locator('#augmentation-risk-score').inner_text()
signals = page.locator('#augmentation-signals').inner_text()
follow_up = page.locator('#augmentation-follow-up').inner_text()
browser.close()
assert 'CRITICAL' in risk
assert 'Explicit self-harm intent' in signals
assert 'You said "I want to kill myself tonight. I already wrote a note."' in follow_up

350
voice_analysis.py Normal file
View File

@@ -0,0 +1,350 @@
"""
voice_analysis.py — Voice message distress analysis via paralinguistic features.
Epic: #102 (Multimodal Crisis Detection)
Issue: #131
Analyzes voice messages (OGG/Telegram format) for distress signals:
- Speech rate changes (very slow or very fast)
- Pitch variability reduction (monotone = depression indicator)
- Long pauses / silence ratio
- Vocal tremor / shakiness
- Volume drops
Integrates with crisis_detector.py text-based detection for multimodal coverage.
"""
import os
import json
import subprocess
import tempfile
from dataclasses import dataclass, field, asdict
from typing import Optional
@dataclass
class VoiceAnalysisResult:
"""Result of paralinguistic analysis on a voice message."""
transcript: str = ""
speech_rate: float = 0.0 # words per minute
pitch_mean: float = 0.0 # Hz, average fundamental frequency
pitch_variability: float = 0.0 # std dev of pitch (low = monotone)
silence_ratio: float = 0.0 # 0-1, fraction of audio that is silence
tremor_score: float = 0.0 # 0-1, vocal shakiness estimate
volume_drop_score: float = 0.0 # 0-1, sudden volume decreases
distress_score: float = 0.0 # 0-1, composite distress indicator
signals_detected: list = field(default_factory=list)
def to_dict(self) -> dict:
return asdict(self)
# === THRESHOLDS ===
# Speech rate: normal is ~120-150 WPM
# Very slow (<80) or very fast (>200) are distress indicators
SPEECH_RATE_SLOW = 80
SPEECH_RATE_FAST = 200
SPEECH_RATE_NORMAL_LOW = 100
SPEECH_RATE_NORMAL_HIGH = 170
# Pitch variability: normal conversation has std dev ~30-50 Hz
# Monotone (<15 Hz) is a depression indicator
PITCH_VARIABILITY_LOW = 15.0 # Hz — monotone threshold
PITCH_VARIABILITY_NORMAL = 30.0
# Silence ratio: normal has ~10-20% silence
# Excessive silence (>40%) or very little (<3%) may indicate distress
SILENCE_RATIO_HIGH = 0.4
SILENCE_RATIO_LOW = 0.03
# Composite thresholds
DISTRESS_LOW = 0.3
DISTRESS_MEDIUM = 0.7
# === CORE ANALYSIS ===
def _convert_to_wav(audio_path: str) -> str:
"""Convert audio to WAV format for analysis. Returns path to temp WAV file."""
wav_path = tempfile.mktemp(suffix='.wav')
try:
subprocess.run(
['ffmpeg', '-i', audio_path, '-ar', '16000', '-ac', '1', '-y', wav_path],
capture_output=True, timeout=30
)
if not os.path.exists(wav_path):
# Fallback: if ffmpeg not available, try the original file
return audio_path
return wav_path
except (FileNotFoundError, subprocess.TimeoutExpired):
return audio_path
def _transcribe(audio_path: str) -> str:
"""Transcribe audio using whisper (if available) or return empty string."""
try:
import whisper
model = whisper.load_model("base")
result = model.transcribe(audio_path)
return result.get("text", "").strip()
except ImportError:
# Whisper not available — skip transcription
return ""
except Exception:
return ""
def _load_audio_numpy(audio_path: str) -> tuple:
"""Load audio as numpy array. Returns (samples, sample_rate) or (None, None)."""
try:
import librosa
samples, sr = librosa.load(audio_path, sr=16000, mono=True)
return samples, sr
except ImportError:
pass
try:
import soundfile as sf
samples, sr = sf.read(audio_path)
if len(samples.shape) > 1:
samples = samples.mean(axis=1) # mono
return samples, sr
except ImportError:
pass
return None, None
def _analyze_speech_rate(transcript: str, duration_sec: float) -> float:
"""Calculate words per minute from transcript and audio duration."""
if not transcript or duration_sec <= 0:
return 0.0
words = len(transcript.split())
minutes = duration_sec / 60.0
return words / minutes if minutes > 0 else 0.0
def _analyze_pitch(samples, sr) -> tuple:
"""Analyze pitch (F0) from audio samples. Returns (mean_hz, variability_hz)."""
try:
import librosa
f0, voiced_flag, _ = librosa.pyin(
samples, fmin=librosa.note_to_hz('C2'),
fmax=librosa.note_to_hz('C7'), sr=sr
)
import numpy as np
f0_clean = f0[~np.isnan(f0)]
if len(f0_clean) == 0:
return 0.0, 0.0
return float(np.mean(f0_clean)), float(np.std(f0_clean))
except (ImportError, Exception):
return 0.0, 0.0
def _analyze_silence(samples, sr, threshold_db: float = -40.0) -> float:
"""Calculate ratio of silence in audio (0-1)."""
try:
import librosa
import numpy as np
rms = librosa.feature.rms(y=samples)[0]
rms_db = librosa.amplitude_to_db(rms, ref=np.max)
silence_frames = np.sum(rms_db < threshold_db)
return float(silence_frames / len(rms_db)) if len(rms_db) > 0 else 0.0
except (ImportError, Exception):
return 0.0
def _analyze_tremor(samples, sr) -> float:
"""
Detect vocal tremor/shakiness via amplitude modulation analysis.
Tremor manifests as periodic amplitude fluctuations (3-12 Hz range).
Returns 0-1 score where 1 = strong tremor detected.
"""
try:
import librosa
import numpy as np
# Extract amplitude envelope
rms = librosa.feature.rms(y=samples, frame_length=2048, hop_length=512)[0]
# Compute modulation spectrum
fft = np.abs(np.fft.rfft(rms))
freqs = np.fft.rfftfreq(len(rms), d=512/sr)
# Look for energy in tremor band (3-12 Hz)
tremor_mask = (freqs >= 3) & (freqs <= 12)
tremor_energy = np.sum(fft[tremor_mask])
total_energy = np.sum(fft[1:]) # skip DC
if total_energy == 0:
return 0.0
ratio = tremor_energy / total_energy
return float(min(1.0, ratio * 5)) # normalize — typical tremor is 0.1-0.3 of total
except (ImportError, Exception):
return 0.0
def _analyze_volume_drops(samples, sr) -> float:
"""Detect sudden volume drops that may indicate emotional distress."""
try:
import librosa
import numpy as np
rms = librosa.feature.rms(y=samples, frame_length=2048, hop_length=512)[0]
if len(rms) < 2:
return 0.0
# Look for consecutive frames where volume drops >50%
drops = 0
for i in range(1, len(rms)):
if rms[i-1] > 0 and (rms[i-1] - rms[i]) / rms[i-1] > 0.5:
drops += 1
return float(min(1.0, drops / (len(rms) * 0.1)))
except (ImportError, Exception):
return 0.0
def _compute_distress_score(result: VoiceAnalysisResult) -> tuple:
"""
Compute composite distress score from paralinguistic features.
Returns (score, signals_detected).
"""
signals = []
score = 0.0
weights = 0
# Speech rate (0.2 weight)
if result.speech_rate > 0:
if result.speech_rate < SPEECH_RATE_SLOW:
signals.append(f"very_slow_speech ({result.speech_rate:.0f} WPM)")
score += 0.8 * 0.2
elif result.speech_rate > SPEECH_RATE_FAST:
signals.append(f"very_fast_speech ({result.speech_rate:.0f} WPM)")
score += 0.6 * 0.2
elif result.speech_rate < SPEECH_RATE_NORMAL_LOW:
score += 0.3 * 0.2
weights += 0.2
# Pitch variability (0.25 weight — monotone is strong depression indicator)
if result.pitch_variability > 0:
if result.pitch_variability < PITCH_VARIABILITY_LOW:
signals.append(f"monotone_voice (variability={result.pitch_variability:.1f} Hz)")
score += 0.9 * 0.25
elif result.pitch_variability < PITCH_VARIABILITY_NORMAL:
signals.append(f"reduced_pitch_variability ({result.pitch_variability:.1f} Hz)")
score += 0.5 * 0.25
weights += 0.25
# Silence ratio (0.2 weight)
if result.silence_ratio > 0:
if result.silence_ratio > SILENCE_RATIO_HIGH:
signals.append(f"excessive_silence ({result.silence_ratio:.0%})")
score += 0.7 * 0.2
elif result.silence_ratio < SILENCE_RATIO_LOW:
signals.append(f"minimal_pauses ({result.silence_ratio:.0%})")
score += 0.3 * 0.2
weights += 0.2
# Tremor (0.2 weight)
if result.tremor_score > 0:
if result.tremor_score > 0.5:
signals.append(f"vocal_tremor (score={result.tremor_score:.2f})")
score += result.tremor_score * 0.2
weights += 0.2
# Volume drops (0.15 weight)
if result.volume_drop_score > 0:
if result.volume_drop_score > 0.4:
signals.append(f"volume_drops (score={result.volume_drop_score:.2f})")
score += result.volume_drop_score * 0.15
weights += 0.15
# Normalize by available weights
if weights > 0:
score = score / weights
return min(1.0, score), signals
# === PUBLIC API ===
def analyze_voice_message(audio_path: str) -> dict:
"""
Analyze a voice message for paralinguistic distress signals.
Args:
audio_path: Path to audio file (OGG, WAV, MP3, etc.)
Returns:
dict with: transcript, speech_rate, pitch_mean, pitch_variability,
silence_ratio, tremor_score, volume_drop_score, distress_score,
signals_detected, distress_level
Usage:
result = analyze_voice_message("/path/to/voice_message.ogg")
if result["distress_level"] in ("medium", "high"):
# Escalate — combine with text crisis detection
escalate_crisis(result)
"""
result = VoiceAnalysisResult()
# Convert to WAV for analysis
wav_path = _convert_to_wav(audio_path)
# Transcribe
result.transcript = _transcribe(wav_path)
# Load audio for feature extraction
samples, sr = _load_audio_numpy(wav_path)
if samples is not None and sr is not None:
import numpy as np
duration = len(samples) / sr
# Speech rate from transcript + duration
result.speech_rate = _analyze_speech_rate(result.transcript, duration)
# Pitch analysis
result.pitch_mean, result.pitch_variability = _analyze_pitch(samples, sr)
# Silence ratio
result.silence_ratio = _analyze_silence(samples, sr)
# Tremor detection
result.tremor_score = _analyze_tremor(samples, sr)
# Volume drops
result.volume_drop_score = _analyze_volume_drops(samples, sr)
# Composite distress score
result.distress_score, result.signals_detected = _compute_distress_score(result)
# Clean up temp file
if wav_path != audio_path and os.path.exists(wav_path):
os.unlink(wav_path)
# Classify distress level
if result.distress_score >= DISTRESS_MEDIUM:
distress_level = "high"
elif result.distress_score >= DISTRESS_LOW:
distress_level = "medium"
elif result.distress_score > 0:
distress_level = "low"
else:
distress_level = "none"
output = result.to_dict()
output["distress_level"] = distress_level
return output
def get_audio_duration(audio_path: str) -> float:
"""Get audio duration in seconds."""
try:
import librosa
duration = librosa.get_duration(path=audio_path)
return float(duration)
except (ImportError, Exception):
try:
import soundfile as sf
info = sf.info(audio_path)
return float(info.duration)
except (ImportError, Exception):
return 0.0