refactor: break up _record_utterance() into focused helpers

Extract _capture_audio_blocks() and _finalize_utterance() from the 73-line _record_utterance() method, and promote _rms() to a module-level function. Fixes #570 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-19 21:39:35 -04:00
2 changed files with 86 additions and 35 deletions
--- a/src/timmy/voice_loop.py
+++ b/src/timmy/voice_loop.py
@@ -78,6 +78,11 @@ DEFAULT_MAX_UTTERANCE = 30.0  # safety cap — don't record forever
 DEFAULT_SESSION_ID = "voice"


+def _rms(block: np.ndarray) -> float:
+    """Compute root-mean-square energy of an audio block."""
+    return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
+
+
@dataclass
 class VoiceConfig:
    """Configuration for the voice loop."""
@@ -161,13 +166,6 @@ class VoiceLoop:
        min_blocks = int(self.config.min_utterance / 0.1)
        max_blocks = int(self.config.max_utterance / 0.1)

-        audio_chunks: list[np.ndarray] = []
-        silent_count = 0
-        recording = False
-
-        def _rms(block: np.ndarray) -> float:
-            return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
-
        sys.stdout.write("\n  🎤 Listening... (speak now)\n")
        sys.stdout.flush()

@@ -177,42 +175,70 @@ class VoiceLoop:
            dtype="float32",
            blocksize=block_size,
        ) as stream:
-            while self._running:
-                block, overflowed = stream.read(block_size)
-                if overflowed:
-                    logger.debug("Audio buffer overflowed")
+            chunks = self._capture_audio_blocks(stream, block_size, silence_blocks, max_blocks)

-                rms = _rms(block)
+        return self._finalize_utterance(chunks, min_blocks, sr)

-                if not recording:
-                    if rms > self.config.silence_threshold:
-                        recording = True
-                        silent_count = 0
-                        audio_chunks.append(block.copy())
-                        sys.stdout.write("  📢 Recording...\r")
-                        sys.stdout.flush()
-                else:
+    def _capture_audio_blocks(
+        self,
+        stream,
+        block_size: int,
+        silence_blocks: int,
+        max_blocks: int,
+    ) -> list[np.ndarray]:
+        """Read audio blocks from *stream* until silence or safety cap.
+
+        Returns the list of captured audio blocks (may be empty if no
+        speech was detected).
+        """
+        audio_chunks: list[np.ndarray] = []
+        silent_count = 0
+        recording = False
+
+        while self._running:
+            block, overflowed = stream.read(block_size)
+            if overflowed:
+                logger.debug("Audio buffer overflowed")
+
+            rms = _rms(block)
+
+            if not recording:
+                if rms > self.config.silence_threshold:
+                    recording = True
+                    silent_count = 0
                    audio_chunks.append(block.copy())
+                    sys.stdout.write("  📢 Recording...\r")
+                    sys.stdout.flush()
+            else:
+                audio_chunks.append(block.copy())

-                    if rms < self.config.silence_threshold:
-                        silent_count += 1
-                    else:
-                        silent_count = 0
+                if rms < self.config.silence_threshold:
+                    silent_count += 1
+                else:
+                    silent_count = 0

-                    # End of utterance
-                    if silent_count >= silence_blocks:
-                        break
+                if silent_count >= silence_blocks:
+                    break

-                    # Safety cap
-                    if len(audio_chunks) >= max_blocks:
-                        logger.info("Max utterance length reached, stopping.")
-                        break
+                if len(audio_chunks) >= max_blocks:
+                    logger.info("Max utterance length reached, stopping.")
+                    break

-        if not audio_chunks or len(audio_chunks) < min_blocks:
+        return audio_chunks
+
+    @staticmethod
+    def _finalize_utterance(
+        chunks: list[np.ndarray], min_blocks: int, sample_rate: int
+    ) -> np.ndarray | None:
+        """Concatenate captured chunks and report duration.
+
+        Returns None if the utterance is too short (below *min_blocks*).
+        """
+        if not chunks or len(chunks) < min_blocks:
            return None

-        audio = np.concatenate(audio_chunks, axis=0).flatten()
-        duration = len(audio) / sr
+        audio = np.concatenate(chunks, axis=0).flatten()
+        duration = len(audio) / sample_rate
        sys.stdout.write(f"  ✂️  Captured {duration:.1f}s of audio\n")
        sys.stdout.flush()
        return audio
--- a/tests/timmy/test_voice_loop.py
+++ b/tests/timmy/test_voice_loop.py
@@ -15,7 +15,7 @@ except ImportError:
    np = None

 try:
-    from timmy.voice_loop import VoiceConfig, VoiceLoop, _strip_markdown
+    from timmy.voice_loop import VoiceConfig, VoiceLoop, _rms, _strip_markdown
 except ImportError:
    pass  # pytestmark will skip all tests anyway

@@ -336,3 +336,28 @@ class TestSpeakSetsFlag:

        # After speak
        assert loop._speaking is False
+
+
+class TestRms:
+    def test_rms_of_silence(self):
+        block = np.zeros(1600, dtype=np.float32)
+        assert _rms(block) == 0.0
+
+    def test_rms_of_signal(self):
+        block = np.ones(1600, dtype=np.float32) * 0.5
+        assert abs(_rms(block) - 0.5) < 1e-5
+
+
+class TestFinalizeUtterance:
+    def test_returns_none_for_empty(self):
+        assert VoiceLoop._finalize_utterance([], min_blocks=5, sample_rate=16000) is None
+
+    def test_returns_none_below_min(self):
+        chunks = [np.zeros(1600, dtype=np.float32) for _ in range(3)]
+        assert VoiceLoop._finalize_utterance(chunks, min_blocks=5, sample_rate=16000) is None
+
+    def test_concatenates_chunks(self):
+        chunks = [np.ones(1600, dtype=np.float32) for _ in range(5)]
+        result = VoiceLoop._finalize_utterance(chunks, min_blocks=3, sample_rate=16000)
+        assert result is not None
+        assert len(result) == 8000