From cc974904f8a6ff9e07bc364b400d1de69c9dcb06 Mon Sep 17 00:00:00 2001 From: 0xbyt4 <35742124+0xbyt4@users.noreply.github.com> Date: Wed, 11 Mar 2026 02:13:43 +0300 Subject: [PATCH] =?UTF-8?q?feat:=20Discord=20voice=20channel=20support=20?= =?UTF-8?q?=E2=80=94=20bot=20joins=20VC=20and=20speaks=20replies?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - /voice channel: bot joins user's voice channel, speaks TTS replies - /voice leave: disconnect from voice channel - Auto-disconnect after 5 min inactivity - _get_guild_id() helper extracts guild from raw_message - Load opus codec for voice playback - discord.py[voice] in pyproject.toml (pulls PyNaCl + davey) --- gateway/platforms/discord.py | 124 ++++++++++++++++++++++++++++++++++- gateway/run.py | 89 +++++++++++++++++++++++-- pyproject.toml | 2 +- 3 files changed, 209 insertions(+), 6 deletions(-) diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 717fc921..a7fd45f6 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -82,17 +82,35 @@ class DiscordAdapter(BasePlatformAdapter): # Discord message limits MAX_MESSAGE_LENGTH = 2000 + # Auto-disconnect from voice channel after this many seconds of inactivity + VOICE_TIMEOUT = 300 + def __init__(self, config: PlatformConfig): super().__init__(config, Platform.DISCORD) self._client: Optional[commands.Bot] = None self._ready_event = asyncio.Event() self._allowed_user_ids: set = set() # For button approval authorization + # Voice channel state (per-guild) + self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient + self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id + self._voice_timeout_tasks: Dict[int, asyncio.Task] = {} # guild_id -> timeout task async def connect(self) -> bool: """Connect to Discord and start receiving events.""" if not DISCORD_AVAILABLE: logger.error("[%s] discord.py not installed. Run: pip install discord.py", self.name) return False + + # Load opus codec for voice channel support + if not discord.opus.is_loaded(): + try: + discord.opus.load_opus("/opt/homebrew/lib/libopus.dylib") + except Exception: + # Try common Linux path as fallback + try: + discord.opus.load_opus("libopus.so.0") + except Exception: + logger.warning("Opus codec not found — voice channel playback disabled") if not self.config.token: logger.error("[%s] No bot token configured", self.name) @@ -361,6 +379,108 @@ class DiscordAdapter(BasePlatformAdapter): logger.error("[%s] Failed to send audio, falling back to base adapter: %s", self.name, e, exc_info=True) return await super().send_voice(chat_id, audio_path, caption, reply_to, metadata=metadata) + # ------------------------------------------------------------------ + # Voice channel methods (join / leave / play) + # ------------------------------------------------------------------ + + async def join_voice_channel(self, channel) -> bool: + """Join a Discord voice channel. Returns True on success.""" + if not self._client or not DISCORD_AVAILABLE: + return False + guild_id = channel.guild.id + + # Already connected in this guild? + existing = self._voice_clients.get(guild_id) + if existing and existing.is_connected(): + if existing.channel.id == channel.id: + self._reset_voice_timeout(guild_id) + return True + await existing.move_to(channel) + self._reset_voice_timeout(guild_id) + return True + + vc = await channel.connect() + self._voice_clients[guild_id] = vc + self._reset_voice_timeout(guild_id) + return True + + async def leave_voice_channel(self, guild_id: int) -> None: + """Disconnect from the voice channel in a guild.""" + vc = self._voice_clients.pop(guild_id, None) + if vc and vc.is_connected(): + await vc.disconnect() + task = self._voice_timeout_tasks.pop(guild_id, None) + if task: + task.cancel() + self._voice_text_channels.pop(guild_id, None) + + async def play_in_voice_channel(self, guild_id: int, audio_path: str) -> bool: + """Play an audio file in the connected voice channel.""" + vc = self._voice_clients.get(guild_id) + if not vc or not vc.is_connected(): + return False + + # Wait for current playback to finish + while vc.is_playing(): + await asyncio.sleep(0.1) + + done = asyncio.Event() + loop = asyncio.get_event_loop() + + def _after(error): + if error: + logger.error("Voice playback error: %s", error) + loop.call_soon_threadsafe(done.set) + + source = discord.FFmpegPCMAudio(audio_path) + source = discord.PCMVolumeTransformer(source, volume=1.0) + vc.play(source, after=_after) + await done.wait() + self._reset_voice_timeout(guild_id) + return True + + async def get_user_voice_channel(self, guild_id: int, user_id: str): + """Return the voice channel the user is currently in, or None.""" + if not self._client: + return None + guild = self._client.get_guild(guild_id) + if not guild: + return None + member = guild.get_member(int(user_id)) + if not member or not member.voice: + return None + return member.voice.channel + + def _reset_voice_timeout(self, guild_id: int) -> None: + """Reset the auto-disconnect inactivity timer.""" + task = self._voice_timeout_tasks.pop(guild_id, None) + if task: + task.cancel() + self._voice_timeout_tasks[guild_id] = asyncio.ensure_future( + self._voice_timeout_handler(guild_id) + ) + + async def _voice_timeout_handler(self, guild_id: int) -> None: + """Auto-disconnect after VOICE_TIMEOUT seconds of inactivity.""" + try: + await asyncio.sleep(self.VOICE_TIMEOUT) + except asyncio.CancelledError: + return + text_ch_id = self._voice_text_channels.get(guild_id) + await self.leave_voice_channel(guild_id) + if text_ch_id and self._client: + ch = self._client.get_channel(text_ch_id) + if ch: + try: + await ch.send("Left voice channel (inactivity timeout).") + except Exception: + pass + + def is_in_voice_channel(self, guild_id: int) -> bool: + """Check if the bot is connected to a voice channel in this guild.""" + vc = self._voice_clients.get(guild_id) + return vc is not None and vc.is_connected() + async def send_image_file( self, chat_id: str, @@ -685,8 +805,10 @@ class DiscordAdapter(BasePlatformAdapter): await self._run_simple_slash(interaction, "/reload-mcp") @tree.command(name="voice", description="Toggle voice reply mode") - @discord.app_commands.describe(mode="Voice mode: on, off, tts, or status") + @discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status") @discord.app_commands.choices(mode=[ + discord.app_commands.Choice(name="channel — join your voice channel", value="channel"), + discord.app_commands.Choice(name="leave — leave voice channel", value="leave"), discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"), discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"), discord.app_commands.Choice(name="off — text only", value="off"), diff --git a/gateway/run.py b/gateway/run.py index 79e5c3bc..4674548a 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -2105,8 +2105,22 @@ class GatewayRunner: f"Cron jobs and cross-platform messages will be delivered here." ) + @staticmethod + def _get_guild_id(event: MessageEvent) -> Optional[int]: + """Extract Discord guild_id from the raw message object.""" + raw = getattr(event, "raw_message", None) + if raw is None: + return None + # Slash command interaction + if hasattr(raw, "guild_id") and raw.guild_id: + return int(raw.guild_id) + # Regular message + if hasattr(raw, "guild") and raw.guild: + return raw.guild.id + return None + async def _handle_voice_command(self, event: MessageEvent) -> str: - """Handle /voice [on|off|tts|status] command.""" + """Handle /voice [on|off|tts|channel|leave|status] command.""" args = event.get_command_args().strip().lower() chat_id = event.source.chat_id @@ -2129,6 +2143,10 @@ class GatewayRunner: "Auto-TTS enabled.\n" "All replies will include a voice message." ) + elif args in ("channel", "join"): + return await self._handle_voice_channel_join(event) + elif args == "leave": + return await self._handle_voice_channel_leave(event) elif args == "status": mode = self._voice_mode.get(chat_id, "off") labels = { @@ -2136,6 +2154,14 @@ class GatewayRunner: "voice_only": "On (voice reply to voice messages)", "all": "TTS (voice reply to all messages)", } + # Append voice channel info if connected + adapter = self.adapters.get(event.source.platform) + guild_id = self._get_guild_id(event) + if guild_id and hasattr(adapter, "is_in_voice_channel"): + if adapter.is_in_voice_channel(guild_id): + vc = adapter._voice_clients.get(guild_id) + ch_name = vc.channel.name if vc and vc.channel else "unknown" + return f"Voice mode: {labels.get(mode, mode)}\nVoice channel: {ch_name}" return f"Voice mode: {labels.get(mode, mode)}" else: # Toggle: off → on, on/all → off @@ -2149,6 +2175,54 @@ class GatewayRunner: self._save_voice_modes() return "Voice mode disabled." + async def _handle_voice_channel_join(self, event: MessageEvent) -> str: + """Join the user's current Discord voice channel.""" + adapter = self.adapters.get(event.source.platform) + if not hasattr(adapter, "join_voice_channel"): + return "Voice channels are not supported on this platform." + + guild_id = self._get_guild_id(event) + if not guild_id: + return "This command only works in a Discord server." + + voice_channel = await adapter.get_user_voice_channel( + guild_id, event.source.user_id + ) + if not voice_channel: + return "You need to be in a voice channel first." + + try: + success = await adapter.join_voice_channel(voice_channel) + except Exception as e: + logger.warning("Failed to join voice channel: %s", e) + return f"Failed to join voice channel: {e}" + + if success: + adapter._voice_text_channels[guild_id] = int(event.source.chat_id) + self._voice_mode[event.source.chat_id] = "all" + self._save_voice_modes() + return ( + f"Joined voice channel **{voice_channel.name}**.\n" + f"I'll speak my replies here. Use /voice leave to disconnect." + ) + return "Failed to join voice channel. Check bot permissions (Connect + Speak)." + + async def _handle_voice_channel_leave(self, event: MessageEvent) -> str: + """Leave the Discord voice channel.""" + adapter = self.adapters.get(event.source.platform) + guild_id = self._get_guild_id(event) + + if not guild_id or not hasattr(adapter, "leave_voice_channel"): + return "Not in a voice channel." + + if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id): + return "Not in a voice channel." + + await adapter.leave_voice_channel(guild_id) + self._voice_mode.pop(event.source.chat_id, None) + self._save_voice_modes() + return "Left voice channel." + async def _send_voice_reply(self, event: MessageEvent, text: str) -> None: """Generate TTS audio and send as a voice message before the text reply.""" try: @@ -2178,7 +2252,15 @@ class GatewayRunner: return adapter = self.adapters.get(event.source.platform) - if adapter and hasattr(adapter, "send_voice"): + + # If connected to a voice channel, play there instead of sending a file + guild_id = self._get_guild_id(event) + if (guild_id + and hasattr(adapter, "play_in_voice_channel") + and hasattr(adapter, "is_in_voice_channel") + and adapter.is_in_voice_channel(guild_id)): + await adapter.play_in_voice_channel(guild_id, actual_path) + elif adapter and hasattr(adapter, "send_voice"): send_kwargs: Dict[str, Any] = { "chat_id": event.source.chat_id, "audio_path": actual_path, @@ -2186,7 +2268,6 @@ class GatewayRunner: } if event.source.thread_id: send_kwargs["metadata"] = {"thread_id": event.source.thread_id} - # Only pass metadata if the adapter accepts it import inspect sig = inspect.signature(adapter.send_voice) if "metadata" not in sig.parameters: @@ -2198,7 +2279,7 @@ class GatewayRunner: except OSError: pass except Exception as e: - logger.warning("Auto voice reply failed: %s", e) + logger.warning("Auto voice reply failed: %s", e, exc_info=True) async def _handle_rollback_command(self, event: MessageEvent) -> str: """Handle /rollback command — list or restore filesystem checkpoints.""" diff --git a/pyproject.toml b/pyproject.toml index eb005ab9..fa248cd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ modal = ["swe-rex[modal]>=1.4.0"] daytona = ["daytona>=0.148.0"] dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"] -messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"] +messaging = ["python-telegram-bot>=20.0", "discord.py[voice]>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"] cron = ["croniter"] slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"] cli = ["simple-term-menu"]