fix(voice): enable TTS voice reply when streaming is active (#2322)

When streaming is enabled, the base adapter receives None from
_handle_message (already_sent=True) and cannot run auto-TTS for
voice input. The runner was unconditionally skipping voice input
TTS assuming the base adapter would handle it.

Now the runner takes over TTS responsibility when streaming has
already delivered the text response, so voice channel playback
works with both streaming on and off.

Streaming off behavior is unchanged (default already_sent=False
preserves the original code path exactly).

Co-authored-by: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com>
This commit is contained in:
Teknium
2026-03-21 08:08:37 -07:00
committed by GitHub
parent 06f4df52f1
commit 28bb0e770f
2 changed files with 60 additions and 12 deletions

View File

@@ -2248,7 +2248,8 @@ class GatewayRunner:
)
# Auto voice reply: send TTS audio before the text response
if self._should_send_voice_reply(event, response, agent_messages):
_already_sent = bool(agent_result.get("already_sent"))
if self._should_send_voice_reply(event, response, agent_messages, already_sent=_already_sent):
await self._send_voice_reply(event, response)
# If streaming already delivered the response, return None so
@@ -3054,6 +3055,7 @@ class GatewayRunner:
event: MessageEvent,
response: str,
agent_messages: list,
already_sent: bool = False,
) -> bool:
"""Decide whether the runner should send a TTS voice reply.
@@ -3062,8 +3064,9 @@ class GatewayRunner:
- response is empty or an error
- agent already called text_to_speech tool (dedup)
- voice input and base adapter auto-TTS already handled it (skip_double)
Exception: Discord voice channel — base play_tts is a no-op there,
so the runner must handle VC playback.
UNLESS streaming already consumed the response (already_sent=True),
in which case the base adapter won't have text for auto-TTS so the
runner must handle it.
"""
if not response or response.startswith("Error:"):
return False
@@ -3093,7 +3096,10 @@ class GatewayRunner:
# Dedup: base adapter auto-TTS already handles voice input
# (play_tts plays in VC when connected, so runner can skip).
if is_voice_input:
# When streaming already delivered the text (already_sent=True),
# the base adapter will receive None and can't run auto-TTS,
# so the runner must take over.
if is_voice_input and not already_sent:
return False
return True

View File

@@ -2467,7 +2467,8 @@ class TestVoiceTTSPlayback:
runner.adapters = {}
return runner
def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello", agent_msgs=None):
def _call_should_reply(self, runner, voice_mode, msg_type, response="Hello",
agent_msgs=None, already_sent=False):
from gateway.platforms.base import MessageType, MessageEvent, SessionSource
from gateway.config import Platform
runner._voice_mode["ch1"] = voice_mode
@@ -2476,28 +2477,32 @@ class TestVoiceTTSPlayback:
user_id="1", user_name="test", chat_type="channel",
)
event = MessageEvent(source=source, text="test", message_type=msg_type)
return runner._should_send_voice_reply(event, response, agent_msgs or [])
return runner._should_send_voice_reply(
event, response, agent_msgs or [], already_sent=already_sent,
)
# -- Streaming OFF (existing behavior, must not change) --
def test_voice_input_runner_skips(self):
"""Voice input: runner skips — base adapter handles via play_tts."""
"""Streaming OFF + voice input: runner skips — base adapter handles."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.VOICE) is False
assert self._call_should_reply(runner, "all", MessageType.VOICE, already_sent=False) is False
def test_text_input_voice_all_runner_fires(self):
"""Text input + voice_mode=all: runner generates TTS."""
"""Streaming OFF + text input + voice_mode=all: runner generates TTS."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.TEXT) is True
assert self._call_should_reply(runner, "all", MessageType.TEXT, already_sent=False) is True
def test_text_input_voice_off_no_tts(self):
"""Text input + voice_mode=off: no TTS."""
"""Streaming OFF + text input + voice_mode=off: no TTS."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "off", MessageType.TEXT) is False
def test_text_input_voice_only_no_tts(self):
"""Text input + voice_mode=voice_only: no TTS for text."""
"""Streaming OFF + text input + voice_mode=voice_only: no TTS for text."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "voice_only", MessageType.TEXT) is False
@@ -2523,6 +2528,43 @@ class TestVoiceTTSPlayback:
]}]
assert self._call_should_reply(runner, "all", MessageType.TEXT, agent_msgs=agent_msgs) is False
# -- Streaming ON (already_sent=True) --
def test_streaming_on_voice_input_runner_fires(self):
"""Streaming ON + voice input: runner handles TTS (base adapter has no text)."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.VOICE, already_sent=True) is True
def test_streaming_on_text_input_runner_fires(self):
"""Streaming ON + text input: runner handles TTS (same as before)."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.TEXT, already_sent=True) is True
def test_streaming_on_voice_off_no_tts(self):
"""Streaming ON + voice_mode=off: no TTS regardless of streaming."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "off", MessageType.VOICE, already_sent=True) is False
def test_streaming_on_empty_response_no_tts(self):
"""Streaming ON + empty response: no TTS."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
assert self._call_should_reply(runner, "all", MessageType.VOICE, response="", already_sent=True) is False
def test_streaming_on_agent_tts_dedup(self):
"""Streaming ON + agent called TTS: runner skips (dedup still works)."""
from gateway.platforms.base import MessageType
runner = self._make_runner()
agent_msgs = [{"role": "assistant", "tool_calls": [
{"id": "1", "type": "function", "function": {"name": "text_to_speech", "arguments": "{}"}}
]}]
assert self._call_should_reply(
runner, "all", MessageType.VOICE, agent_msgs=agent_msgs, already_sent=True,
) is False
class TestUDPKeepalive:
"""UDP keepalive prevents Discord from dropping the voice session."""