diff --git a/src/timmy/voice_loop.py b/src/timmy/voice_loop.py
index 6058e42..5b31979 100644
--- a/src/timmy/voice_loop.py
+++ b/src/timmy/voice_loop.py
@@ -78,6 +78,11 @@ DEFAULT_MAX_UTTERANCE = 30.0  # safety cap — don't record forever
 DEFAULT_SESSION_ID = "voice"
 
 
+def _rms(block: np.ndarray) -> float:
+    """Compute root-mean-square energy of an audio block."""
+    return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
+
+
 @dataclass
 class VoiceConfig:
     """Configuration for the voice loop."""
@@ -161,13 +166,6 @@ class VoiceLoop:
         min_blocks = int(self.config.min_utterance / 0.1)
         max_blocks = int(self.config.max_utterance / 0.1)
 
-        audio_chunks: list[np.ndarray] = []
-        silent_count = 0
-        recording = False
-
-        def _rms(block: np.ndarray) -> float:
-            return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
-
         sys.stdout.write("\n  🎤 Listening... (speak now)\n")
         sys.stdout.flush()
 
@@ -177,42 +175,70 @@ class VoiceLoop:
             dtype="float32",
             blocksize=block_size,
         ) as stream:
-            while self._running:
-                block, overflowed = stream.read(block_size)
-                if overflowed:
-                    logger.debug("Audio buffer overflowed")
+            chunks = self._capture_audio_blocks(stream, block_size, silence_blocks, max_blocks)
 
-                rms = _rms(block)
+        return self._finalize_utterance(chunks, min_blocks, sr)
 
-                if not recording:
-                    if rms > self.config.silence_threshold:
-                        recording = True
-                        silent_count = 0
-                        audio_chunks.append(block.copy())
-                        sys.stdout.write("  📢 Recording...\r")
-                        sys.stdout.flush()
-                else:
+    def _capture_audio_blocks(
+        self,
+        stream,
+        block_size: int,
+        silence_blocks: int,
+        max_blocks: int,
+    ) -> list[np.ndarray]:
+        """Read audio blocks from *stream* until silence or safety cap.
+
+        Returns the list of captured audio blocks (may be empty if no
+        speech was detected).
+        """
+        audio_chunks: list[np.ndarray] = []
+        silent_count = 0
+        recording = False
+
+        while self._running:
+            block, overflowed = stream.read(block_size)
+            if overflowed:
+                logger.debug("Audio buffer overflowed")
+
+            rms = _rms(block)
+
+            if not recording:
+                if rms > self.config.silence_threshold:
+                    recording = True
+                    silent_count = 0
                     audio_chunks.append(block.copy())
+                    sys.stdout.write("  📢 Recording...\r")
+                    sys.stdout.flush()
+            else:
+                audio_chunks.append(block.copy())
 
-                    if rms < self.config.silence_threshold:
-                        silent_count += 1
-                    else:
-                        silent_count = 0
+                if rms < self.config.silence_threshold:
+                    silent_count += 1
+                else:
+                    silent_count = 0
 
-                    # End of utterance
-                    if silent_count >= silence_blocks:
-                        break
+                if silent_count >= silence_blocks:
+                    break
 
-                    # Safety cap
-                    if len(audio_chunks) >= max_blocks:
-                        logger.info("Max utterance length reached, stopping.")
-                        break
+                if len(audio_chunks) >= max_blocks:
+                    logger.info("Max utterance length reached, stopping.")
+                    break
 
-        if not audio_chunks or len(audio_chunks) < min_blocks:
+        return audio_chunks
+
+    @staticmethod
+    def _finalize_utterance(
+        chunks: list[np.ndarray], min_blocks: int, sample_rate: int
+    ) -> np.ndarray | None:
+        """Concatenate captured chunks and report duration.
+
+        Returns None if the utterance is too short (below *min_blocks*).
+        """
+        if not chunks or len(chunks) < min_blocks:
             return None
 
-        audio = np.concatenate(audio_chunks, axis=0).flatten()
-        duration = len(audio) / sr
+        audio = np.concatenate(chunks, axis=0).flatten()
+        duration = len(audio) / sample_rate
         sys.stdout.write(f"  ✂️  Captured {duration:.1f}s of audio\n")
         sys.stdout.flush()
         return audio
diff --git a/tests/timmy/test_voice_loop.py b/tests/timmy/test_voice_loop.py
index 809bd15..e5d930c 100644
--- a/tests/timmy/test_voice_loop.py
+++ b/tests/timmy/test_voice_loop.py
@@ -15,7 +15,7 @@ except ImportError:
     np = None
 
 try:
-    from timmy.voice_loop import VoiceConfig, VoiceLoop, _strip_markdown
+    from timmy.voice_loop import VoiceConfig, VoiceLoop, _rms, _strip_markdown
 except ImportError:
     pass  # pytestmark will skip all tests anyway
 
@@ -336,3 +336,28 @@ class TestSpeakSetsFlag:
 
         # After speak
         assert loop._speaking is False
+
+
+class TestRms:
+    def test_rms_of_silence(self):
+        block = np.zeros(1600, dtype=np.float32)
+        assert _rms(block) == 0.0
+
+    def test_rms_of_signal(self):
+        block = np.ones(1600, dtype=np.float32) * 0.5
+        assert abs(_rms(block) - 0.5) < 1e-5
+
+
+class TestFinalizeUtterance:
+    def test_returns_none_for_empty(self):
+        assert VoiceLoop._finalize_utterance([], min_blocks=5, sample_rate=16000) is None
+
+    def test_returns_none_below_min(self):
+        chunks = [np.zeros(1600, dtype=np.float32) for _ in range(3)]
+        assert VoiceLoop._finalize_utterance(chunks, min_blocks=5, sample_rate=16000) is None
+
+    def test_concatenates_chunks(self):
+        chunks = [np.ones(1600, dtype=np.float32) for _ in range(5)]
+        result = VoiceLoop._finalize_utterance(chunks, min_blocks=3, sample_rate=16000)
+        assert result is not None
+        assert len(result) == 8000