From 9f244ffc704f4ee3a64fdb51e41ccfdc368c49e5 Mon Sep 17 00:00:00 2001
From: Kimi Agent <kimi@timmy.local>
Date: Thu, 19 Mar 2026 21:37:32 -0400
Subject: [PATCH] refactor: break up _record_utterance() into focused helpers
 (#572)

Co-authored-by: Kimi Agent <kimi@timmy.local>
Co-committed-by: Kimi Agent <kimi@timmy.local>
---
 src/timmy/voice_loop.py        | 93 +++++++++++++++++++++-------------
 tests/timmy/test_voice_loop.py | 27 +++++++++-
 2 files changed, 85 insertions(+), 35 deletions(-)

diff --git a/src/timmy/voice_loop.py b/src/timmy/voice_loop.py
index 6058e42a..e2fcfb25 100644
--- a/src/timmy/voice_loop.py
+++ b/src/timmy/voice_loop.py
@@ -78,6 +78,11 @@ DEFAULT_MAX_UTTERANCE = 30.0  # safety cap — don't record forever
 DEFAULT_SESSION_ID = "voice"
 
 
+def _rms(block: np.ndarray) -> float:
+    """Compute root-mean-square energy of an audio block."""
+    return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
+
+
 @dataclass
 class VoiceConfig:
     """Configuration for the voice loop."""
@@ -161,13 +166,6 @@ class VoiceLoop:
         min_blocks = int(self.config.min_utterance / 0.1)
         max_blocks = int(self.config.max_utterance / 0.1)
 
-        audio_chunks: list[np.ndarray] = []
-        silent_count = 0
-        recording = False
-
-        def _rms(block: np.ndarray) -> float:
-            return float(np.sqrt(np.mean(block.astype(np.float32) ** 2)))
-
         sys.stdout.write("\n  🎤 Listening... (speak now)\n")
         sys.stdout.flush()
 
@@ -177,42 +175,69 @@ class VoiceLoop:
             dtype="float32",
             blocksize=block_size,
         ) as stream:
-            while self._running:
-                block, overflowed = stream.read(block_size)
-                if overflowed:
-                    logger.debug("Audio buffer overflowed")
+            chunks = self._capture_audio_blocks(stream, block_size, silence_blocks, max_blocks)
 
-                rms = _rms(block)
+        return self._finalize_utterance(chunks, min_blocks, sr)
 
-                if not recording:
-                    if rms > self.config.silence_threshold:
-                        recording = True
-                        silent_count = 0
-                        audio_chunks.append(block.copy())
-                        sys.stdout.write("  📢 Recording...\r")
-                        sys.stdout.flush()
+    def _capture_audio_blocks(
+        self,
+        stream,
+        block_size: int,
+        silence_blocks: int,
+        max_blocks: int,
+    ) -> list[np.ndarray]:
+        """Read audio blocks from *stream* until silence or max length.
+
+        Returns the list of captured audio chunks (may be empty).
+        """
+        chunks: list[np.ndarray] = []
+        silent_count = 0
+        recording = False
+
+        while self._running:
+            block, overflowed = stream.read(block_size)
+            if overflowed:
+                logger.debug("Audio buffer overflowed")
+
+            rms = _rms(block)
+
+            if not recording:
+                if rms > self.config.silence_threshold:
+                    recording = True
+                    silent_count = 0
+                    chunks.append(block.copy())
+                    sys.stdout.write("  📢 Recording...\r")
+                    sys.stdout.flush()
+            else:
+                chunks.append(block.copy())
+
+                if rms < self.config.silence_threshold:
+                    silent_count += 1
                 else:
-                    audio_chunks.append(block.copy())
+                    silent_count = 0
 
-                    if rms < self.config.silence_threshold:
-                        silent_count += 1
-                    else:
-                        silent_count = 0
+                if silent_count >= silence_blocks:
+                    break
 
-                    # End of utterance
-                    if silent_count >= silence_blocks:
-                        break
+                if len(chunks) >= max_blocks:
+                    logger.info("Max utterance length reached, stopping.")
+                    break
 
-                    # Safety cap
-                    if len(audio_chunks) >= max_blocks:
-                        logger.info("Max utterance length reached, stopping.")
-                        break
+        return chunks
 
-        if not audio_chunks or len(audio_chunks) < min_blocks:
+    @staticmethod
+    def _finalize_utterance(
+        chunks: list[np.ndarray], min_blocks: int, sample_rate: int
+    ) -> np.ndarray | None:
+        """Concatenate recorded chunks and report duration.
+
+        Returns ``None`` if the utterance is too short to be meaningful.
+        """
+        if not chunks or len(chunks) < min_blocks:
             return None
 
-        audio = np.concatenate(audio_chunks, axis=0).flatten()
-        duration = len(audio) / sr
+        audio = np.concatenate(chunks, axis=0).flatten()
+        duration = len(audio) / sample_rate
         sys.stdout.write(f"  ✂️  Captured {duration:.1f}s of audio\n")
         sys.stdout.flush()
         return audio
diff --git a/tests/timmy/test_voice_loop.py b/tests/timmy/test_voice_loop.py
index 809bd151..74d56e59 100644
--- a/tests/timmy/test_voice_loop.py
+++ b/tests/timmy/test_voice_loop.py
@@ -15,7 +15,7 @@ except ImportError:
     np = None
 
 try:
-    from timmy.voice_loop import VoiceConfig, VoiceLoop, _strip_markdown
+    from timmy.voice_loop import VoiceConfig, VoiceLoop, _rms, _strip_markdown
 except ImportError:
     pass  # pytestmark will skip all tests anyway
 
@@ -147,6 +147,31 @@ class TestStripMarkdown:
         assert "*" not in result
 
 
+class TestRms:
+    def test_silent_block(self):
+        block = np.zeros(1600, dtype=np.float32)
+        assert _rms(block) == pytest.approx(0.0, abs=1e-7)
+
+    def test_loud_block(self):
+        block = np.ones(1600, dtype=np.float32)
+        assert _rms(block) == pytest.approx(1.0, abs=1e-5)
+
+
+class TestFinalizeUtterance:
+    def test_returns_none_for_empty(self):
+        assert VoiceLoop._finalize_utterance([], min_blocks=5, sample_rate=16000) is None
+
+    def test_returns_none_for_too_short(self):
+        chunks = [np.zeros(1600, dtype=np.float32) for _ in range(3)]
+        assert VoiceLoop._finalize_utterance(chunks, min_blocks=5, sample_rate=16000) is None
+
+    def test_returns_audio_for_sufficient_chunks(self):
+        chunks = [np.ones(1600, dtype=np.float32) for _ in range(6)]
+        result = VoiceLoop._finalize_utterance(chunks, min_blocks=5, sample_rate=16000)
+        assert result is not None
+        assert len(result) == 6 * 1600
+
+
 class TestThink:
     def test_think_returns_response(self):
         loop = VoiceLoop()