Compare commits

...

1 Commits

Author SHA1 Message Date
kimi
cd62c61cd6 refactor: break up _record_utterance() into focused helpers
All checks were successful
Tests / lint (pull_request) Successful in 4s
Tests / test (pull_request) Successful in 1m10s
Extract _capture_audio_blocks() and _finalize_utterance() from the 73-line
_record_utterance() method, and promote _rms() to a module-level function.

Fixes #570

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 21:39:35 -04:00
2 changed files with 86 additions and 35 deletions

View File

@@ -78,6 +78,11 @@ DEFAULT_MAX_UTTERANCE = 30.0 # safety cap — don't record forever
DEFAULT_SESSION_ID = "voice"
def _rms(block: np.ndarray) -> float:
"""Compute root-mean-square energy of an audio block."""
return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
@dataclass
class VoiceConfig:
"""Configuration for the voice loop."""
@@ -161,13 +166,6 @@ class VoiceLoop:
min_blocks = int(self.config.min_utterance / 0.1)
max_blocks = int(self.config.max_utterance / 0.1)
audio_chunks: list[np.ndarray] = []
silent_count = 0
recording = False
def _rms(block: np.ndarray) -> float:
return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
sys.stdout.write("\n 🎤 Listening... (speak now)\n")
sys.stdout.flush()
@@ -177,42 +175,70 @@ class VoiceLoop:
dtype="float32",
blocksize=block_size,
) as stream:
while self._running:
block, overflowed = stream.read(block_size)
if overflowed:
logger.debug("Audio buffer overflowed")
chunks = self._capture_audio_blocks(stream, block_size, silence_blocks, max_blocks)
rms = _rms(block)
return self._finalize_utterance(chunks, min_blocks, sr)
if not recording:
if rms > self.config.silence_threshold:
recording = True
silent_count = 0
audio_chunks.append(block.copy())
sys.stdout.write(" 📢 Recording...\r")
sys.stdout.flush()
else:
def _capture_audio_blocks(
self,
stream,
block_size: int,
silence_blocks: int,
max_blocks: int,
) -> list[np.ndarray]:
"""Read audio blocks from *stream* until silence or safety cap.
Returns the list of captured audio blocks (may be empty if no
speech was detected).
"""
audio_chunks: list[np.ndarray] = []
silent_count = 0
recording = False
while self._running:
block, overflowed = stream.read(block_size)
if overflowed:
logger.debug("Audio buffer overflowed")
rms = _rms(block)
if not recording:
if rms > self.config.silence_threshold:
recording = True
silent_count = 0
audio_chunks.append(block.copy())
sys.stdout.write(" 📢 Recording...\r")
sys.stdout.flush()
else:
audio_chunks.append(block.copy())
if rms < self.config.silence_threshold:
silent_count += 1
else:
silent_count = 0
if rms < self.config.silence_threshold:
silent_count += 1
else:
silent_count = 0
# End of utterance
if silent_count >= silence_blocks:
break
if silent_count >= silence_blocks:
break
# Safety cap
if len(audio_chunks) >= max_blocks:
logger.info("Max utterance length reached, stopping.")
break
if len(audio_chunks) >= max_blocks:
logger.info("Max utterance length reached, stopping.")
break
if not audio_chunks or len(audio_chunks) < min_blocks:
return audio_chunks
@staticmethod
def _finalize_utterance(
chunks: list[np.ndarray], min_blocks: int, sample_rate: int
) -> np.ndarray | None:
"""Concatenate captured chunks and report duration.
Returns None if the utterance is too short (below *min_blocks*).
"""
if not chunks or len(chunks) < min_blocks:
return None
audio = np.concatenate(audio_chunks, axis=0).flatten()
duration = len(audio) / sr
audio = np.concatenate(chunks, axis=0).flatten()
duration = len(audio) / sample_rate
sys.stdout.write(f" ✂️ Captured {duration:.1f}s of audio\n")
sys.stdout.flush()
return audio

View File

@@ -15,7 +15,7 @@ except ImportError:
np = None
try:
from timmy.voice_loop import VoiceConfig, VoiceLoop, _strip_markdown
from timmy.voice_loop import VoiceConfig, VoiceLoop, _rms, _strip_markdown
except ImportError:
pass # pytestmark will skip all tests anyway
@@ -336,3 +336,28 @@ class TestSpeakSetsFlag:
# After speak
assert loop._speaking is False
class TestRms:
def test_rms_of_silence(self):
block = np.zeros(1600, dtype=np.float32)
assert _rms(block) == 0.0
def test_rms_of_signal(self):
block = np.ones(1600, dtype=np.float32) * 0.5
assert abs(_rms(block) - 0.5) < 1e-5
class TestFinalizeUtterance:
def test_returns_none_for_empty(self):
assert VoiceLoop._finalize_utterance([], min_blocks=5, sample_rate=16000) is None
def test_returns_none_below_min(self):
chunks = [np.zeros(1600, dtype=np.float32) for _ in range(3)]
assert VoiceLoop._finalize_utterance(chunks, min_blocks=5, sample_rate=16000) is None
def test_concatenates_chunks(self):
chunks = [np.ones(1600, dtype=np.float32) for _ in range(5)]
result = VoiceLoop._finalize_utterance(chunks, min_blocks=3, sample_rate=16000)
assert result is not None
assert len(result) == 8000