diff --git a/gateway/run.py b/gateway/run.py index 3f47dafbc..43b7c079a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2248,7 +2248,8 @@ class GatewayRunner: ) # Auto voice reply: send TTS audio before the text response - if self._should_send_voice_reply(event, response, agent_messages): + _already_sent = bool(agent_result.get("already_sent")) + if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent): await self._send_voice_reply(event, response) # If streaming already delivered the response, return None so @@ -3054,6 +3055,7 @@ class GatewayRunner: event: MessageEvent, response: str, agent_messages: list, + already_sent: bool = False, ) -> bool: """Decide whether the runner should send a TTS voice reply. @@ -3062,8 +3064,9 @@ class GatewayRunner: - response is empty or an error - agent already called text_to_speech tool (dedup) - voice input and base adapter auto-TTS already handled it (skip_double) - Exception: Discord voice channel — base play_tts is a no-op there, - so the runner must handle VC playback. + UNLESS streaming already consumed the response (already_sent=True), + in which case the base adapter won't have text for auto-TTS so the + runner must handle it. """ if not response or response.startswith("Error:"): return False @@ -3093,7 +3096,10 @@ class GatewayRunner: # Dedup: base adapter auto-TTS already handles voice input # (play_tts plays in VC when connected, so runner can skip). - if is_voice_input: + # When streaming already delivered the text (already_sent=True), + # the base adapter will receive None and can't run auto-TTS, + # so the runner must take over. + if is_voice_input and not already_sent: return False return True diff --git a/tests/gateway/test_voice_command.py b/tests/gateway/test_voice_command.py index e04fde767..3d0040d95 100644 --- a/tests/gateway/test_voice_command.py +++ b/tests/gateway/test_voice_command.py @@ -2467,7 +2467,8 @@ class TestVoiceTTSPlayback: runner.adapters = {} return runner - def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello", agent_msgs=None): + def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello", + agent_msgs=None, already_sent=False): from gateway.platforms.base import MessageType, MessageEvent, SessionSource from gateway.config import Platform runner._voice_mode["ch1"] = voice_mode @@ -2476,28 +2477,32 @@ class TestVoiceTTSPlayback: user_id="1", user_name="test", chat_type="channel", ) event = MessageEvent(source=source, text="test", message_type=msg_type) - return runner._should_send_voice_reply(event, response, agent_msgs or []) + return runner._should_send_voice_reply( + event, response, agent_msgs or [], already_sent=already_sent, + ) + + # -- Streaming OFF (existing behavior, must not change) -- def test_voice_input_runner_skips(self): - """Voice input: runner skips — base adapter handles via play_tts.""" + """Streaming OFF + voice input: runner skips — base adapter handles.""" from gateway.platforms.base import MessageType runner = self._make_runner() - assert self._call_should_reply(runner, "all", MessageType.VOICE) is False + assert self._call_should_reply(runner, "all", MessageType.VOICE, already_sent=False) is False def test_text_input_voice_all_runner_fires(self): - """Text input + voice_mode=all: runner generates TTS.""" + """Streaming OFF + text input + voice_mode=all: runner generates TTS.""" from gateway.platforms.base import MessageType runner = self._make_runner() - assert self._call_should_reply(runner, "all", MessageType.TEXT) is True + assert self._call_should_reply(runner, "all", MessageType.TEXT, already_sent=False) is True def test_text_input_voice_off_no_tts(self): - """Text input + voice_mode=off: no TTS.""" + """Streaming OFF + text input + voice_mode=off: no TTS.""" from gateway.platforms.base import MessageType runner = self._make_runner() assert self._call_should_reply(runner, "off", MessageType.TEXT) is False def test_text_input_voice_only_no_tts(self): - """Text input + voice_mode=voice_only: no TTS for text.""" + """Streaming OFF + text input + voice_mode=voice_only: no TTS for text.""" from gateway.platforms.base import MessageType runner = self._make_runner() assert self._call_should_reply(runner, "voice_only", MessageType.TEXT) is False @@ -2523,6 +2528,43 @@ class TestVoiceTTSPlayback: ]}] assert self._call_should_reply(runner, "all", MessageType.TEXT, agent_msgs=agent_msgs) is False + # -- Streaming ON (already_sent=True) -- + + def test_streaming_on_voice_input_runner_fires(self): + """Streaming ON + voice input: runner handles TTS (base adapter has no text).""" + from gateway.platforms.base import MessageType + runner = self._make_runner() + assert self._call_should_reply(runner, "all", MessageType.VOICE, already_sent=True) is True + + def test_streaming_on_text_input_runner_fires(self): + """Streaming ON + text input: runner handles TTS (same as before).""" + from gateway.platforms.base import MessageType + runner = self._make_runner() + assert self._call_should_reply(runner, "all", MessageType.TEXT, already_sent=True) is True + + def test_streaming_on_voice_off_no_tts(self): + """Streaming ON + voice_mode=off: no TTS regardless of streaming.""" + from gateway.platforms.base import MessageType + runner = self._make_runner() + assert self._call_should_reply(runner, "off", MessageType.VOICE, already_sent=True) is False + + def test_streaming_on_empty_response_no_tts(self): + """Streaming ON + empty response: no TTS.""" + from gateway.platforms.base import MessageType + runner = self._make_runner() + assert self._call_should_reply(runner, "all", MessageType.VOICE, response="", already_sent=True) is False + + def test_streaming_on_agent_tts_dedup(self): + """Streaming ON + agent called TTS: runner skips (dedup still works).""" + from gateway.platforms.base import MessageType + runner = self._make_runner() + agent_msgs = [{"role": "assistant", "tool_calls": [ + {"id": "1", "type": "function", "function": {"name": "text_to_speech", "arguments": "{}"}} + ]}] + assert self._call_should_reply( + runner, "all", MessageType.VOICE, agent_msgs=agent_msgs, already_sent=True, + ) is False + class TestUDPKeepalive: """UDP keepalive prevents Discord from dropping the voice session."""