feat: Discord voice channel support — bot joins VC and speaks replies

- /voice channel: bot joins user's voice channel, speaks TTS replies
- /voice leave: disconnect from voice channel
- Auto-disconnect after 5 min inactivity
- _get_guild_id() helper extracts guild from raw_message
- Load opus codec for voice playback
- discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)
This commit is contained in:
0xbyt4
2026-03-11 02:13:43 +03:00
parent cbe4c23efa
commit cc974904f8
3 changed files with 209 additions and 6 deletions

View File

@@ -82,17 +82,35 @@ class DiscordAdapter(BasePlatformAdapter):
# Discord message limits # Discord message limits
MAX_MESSAGE_LENGTH = 2000 MAX_MESSAGE_LENGTH = 2000
# Auto-disconnect from voice channel after this many seconds of inactivity
VOICE_TIMEOUT = 300
def __init__(self, config: PlatformConfig): def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.DISCORD) super().__init__(config, Platform.DISCORD)
self._client: Optional[commands.Bot] = None self._client: Optional[commands.Bot] = None
self._ready_event = asyncio.Event() self._ready_event = asyncio.Event()
self._allowed_user_ids: set = set() # For button approval authorization self._allowed_user_ids: set = set() # For button approval authorization
# Voice channel state (per-guild)
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id
self._voice_timeout_tasks: Dict[int, asyncio.Task] = {} # guild_id -> timeout task
async def connect(self) -> bool: async def connect(self) -> bool:
"""Connect to Discord and start receiving events.""" """Connect to Discord and start receiving events."""
if not DISCORD_AVAILABLE: if not DISCORD_AVAILABLE:
logger.error("[%s] discord.py not installed. Run: pip install discord.py", self.name) logger.error("[%s] discord.py not installed. Run: pip install discord.py", self.name)
return False return False
# Load opus codec for voice channel support
if not discord.opus.is_loaded():
try:
discord.opus.load_opus("/opt/homebrew/lib/libopus.dylib")
except Exception:
# Try common Linux path as fallback
try:
discord.opus.load_opus("libopus.so.0")
except Exception:
logger.warning("Opus codec not found — voice channel playback disabled")
if not self.config.token: if not self.config.token:
logger.error("[%s] No bot token configured", self.name) logger.error("[%s] No bot token configured", self.name)
@@ -361,6 +379,108 @@ class DiscordAdapter(BasePlatformAdapter):
logger.error("[%s] Failed to send audio, falling back to base adapter: %s", self.name, e, exc_info=True) logger.error("[%s] Failed to send audio, falling back to base adapter: %s", self.name, e, exc_info=True)
return await super().send_voice(chat_id, audio_path, caption, reply_to, metadata=metadata) return await super().send_voice(chat_id, audio_path, caption, reply_to, metadata=metadata)
# ------------------------------------------------------------------
# Voice channel methods (join / leave / play)
# ------------------------------------------------------------------
async def join_voice_channel(self, channel) -> bool:
"""Join a Discord voice channel. Returns True on success."""
if not self._client or not DISCORD_AVAILABLE:
return False
guild_id = channel.guild.id
# Already connected in this guild?
existing = self._voice_clients.get(guild_id)
if existing and existing.is_connected():
if existing.channel.id == channel.id:
self._reset_voice_timeout(guild_id)
return True
await existing.move_to(channel)
self._reset_voice_timeout(guild_id)
return True
vc = await channel.connect()
self._voice_clients[guild_id] = vc
self._reset_voice_timeout(guild_id)
return True
async def leave_voice_channel(self, guild_id: int) -> None:
"""Disconnect from the voice channel in a guild."""
vc = self._voice_clients.pop(guild_id, None)
if vc and vc.is_connected():
await vc.disconnect()
task = self._voice_timeout_tasks.pop(guild_id, None)
if task:
task.cancel()
self._voice_text_channels.pop(guild_id, None)
async def play_in_voice_channel(self, guild_id: int, audio_path: str) -> bool:
"""Play an audio file in the connected voice channel."""
vc = self._voice_clients.get(guild_id)
if not vc or not vc.is_connected():
return False
# Wait for current playback to finish
while vc.is_playing():
await asyncio.sleep(0.1)
done = asyncio.Event()
loop = asyncio.get_event_loop()
def _after(error):
if error:
logger.error("Voice playback error: %s", error)
loop.call_soon_threadsafe(done.set)
source = discord.FFmpegPCMAudio(audio_path)
source = discord.PCMVolumeTransformer(source, volume=1.0)
vc.play(source, after=_after)
await done.wait()
self._reset_voice_timeout(guild_id)
return True
async def get_user_voice_channel(self, guild_id: int, user_id: str):
"""Return the voice channel the user is currently in, or None."""
if not self._client:
return None
guild = self._client.get_guild(guild_id)
if not guild:
return None
member = guild.get_member(int(user_id))
if not member or not member.voice:
return None
return member.voice.channel
def _reset_voice_timeout(self, guild_id: int) -> None:
"""Reset the auto-disconnect inactivity timer."""
task = self._voice_timeout_tasks.pop(guild_id, None)
if task:
task.cancel()
self._voice_timeout_tasks[guild_id] = asyncio.ensure_future(
self._voice_timeout_handler(guild_id)
)
async def _voice_timeout_handler(self, guild_id: int) -> None:
"""Auto-disconnect after VOICE_TIMEOUT seconds of inactivity."""
try:
await asyncio.sleep(self.VOICE_TIMEOUT)
except asyncio.CancelledError:
return
text_ch_id = self._voice_text_channels.get(guild_id)
await self.leave_voice_channel(guild_id)
if text_ch_id and self._client:
ch = self._client.get_channel(text_ch_id)
if ch:
try:
await ch.send("Left voice channel (inactivity timeout).")
except Exception:
pass
def is_in_voice_channel(self, guild_id: int) -> bool:
"""Check if the bot is connected to a voice channel in this guild."""
vc = self._voice_clients.get(guild_id)
return vc is not None and vc.is_connected()
async def send_image_file( async def send_image_file(
self, self,
chat_id: str, chat_id: str,
@@ -685,8 +805,10 @@ class DiscordAdapter(BasePlatformAdapter):
await self._run_simple_slash(interaction, "/reload-mcp") await self._run_simple_slash(interaction, "/reload-mcp")
@tree.command(name="voice", description="Toggle voice reply mode") @tree.command(name="voice", description="Toggle voice reply mode")
@discord.app_commands.describe(mode="Voice mode: on, off, tts, or status") @discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
@discord.app_commands.choices(mode=[ @discord.app_commands.choices(mode=[
discord.app_commands.Choice(name="channel — join your voice channel", value="channel"),
discord.app_commands.Choice(name="leave — leave voice channel", value="leave"),
discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"), discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"),
discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"), discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"),
discord.app_commands.Choice(name="off — text only", value="off"), discord.app_commands.Choice(name="off — text only", value="off"),

View File

@@ -2105,8 +2105,22 @@ class GatewayRunner:
f"Cron jobs and cross-platform messages will be delivered here." f"Cron jobs and cross-platform messages will be delivered here."
) )
@staticmethod
def _get_guild_id(event: MessageEvent) -> Optional[int]:
"""Extract Discord guild_id from the raw message object."""
raw = getattr(event, "raw_message", None)
if raw is None:
return None
# Slash command interaction
if hasattr(raw, "guild_id") and raw.guild_id:
return int(raw.guild_id)
# Regular message
if hasattr(raw, "guild") and raw.guild:
return raw.guild.id
return None
async def _handle_voice_command(self, event: MessageEvent) -> str: async def _handle_voice_command(self, event: MessageEvent) -> str:
"""Handle /voice [on|off|tts|status] command.""" """Handle /voice [on|off|tts|channel|leave|status] command."""
args = event.get_command_args().strip().lower() args = event.get_command_args().strip().lower()
chat_id = event.source.chat_id chat_id = event.source.chat_id
@@ -2129,6 +2143,10 @@ class GatewayRunner:
"Auto-TTS enabled.\n" "Auto-TTS enabled.\n"
"All replies will include a voice message." "All replies will include a voice message."
) )
elif args in ("channel", "join"):
return await self._handle_voice_channel_join(event)
elif args == "leave":
return await self._handle_voice_channel_leave(event)
elif args == "status": elif args == "status":
mode = self._voice_mode.get(chat_id, "off") mode = self._voice_mode.get(chat_id, "off")
labels = { labels = {
@@ -2136,6 +2154,14 @@ class GatewayRunner:
"voice_only": "On (voice reply to voice messages)", "voice_only": "On (voice reply to voice messages)",
"all": "TTS (voice reply to all messages)", "all": "TTS (voice reply to all messages)",
} }
# Append voice channel info if connected
adapter = self.adapters.get(event.source.platform)
guild_id = self._get_guild_id(event)
if guild_id and hasattr(adapter, "is_in_voice_channel"):
if adapter.is_in_voice_channel(guild_id):
vc = adapter._voice_clients.get(guild_id)
ch_name = vc.channel.name if vc and vc.channel else "unknown"
return f"Voice mode: {labels.get(mode, mode)}\nVoice channel: {ch_name}"
return f"Voice mode: {labels.get(mode, mode)}" return f"Voice mode: {labels.get(mode, mode)}"
else: else:
# Toggle: off → on, on/all → off # Toggle: off → on, on/all → off
@@ -2149,6 +2175,54 @@ class GatewayRunner:
self._save_voice_modes() self._save_voice_modes()
return "Voice mode disabled." return "Voice mode disabled."
async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
"""Join the user's current Discord voice channel."""
adapter = self.adapters.get(event.source.platform)
if not hasattr(adapter, "join_voice_channel"):
return "Voice channels are not supported on this platform."
guild_id = self._get_guild_id(event)
if not guild_id:
return "This command only works in a Discord server."
voice_channel = await adapter.get_user_voice_channel(
guild_id, event.source.user_id
)
if not voice_channel:
return "You need to be in a voice channel first."
try:
success = await adapter.join_voice_channel(voice_channel)
except Exception as e:
logger.warning("Failed to join voice channel: %s", e)
return f"Failed to join voice channel: {e}"
if success:
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
self._voice_mode[event.source.chat_id] = "all"
self._save_voice_modes()
return (
f"Joined voice channel **{voice_channel.name}**.\n"
f"I'll speak my replies here. Use /voice leave to disconnect."
)
return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
"""Leave the Discord voice channel."""
adapter = self.adapters.get(event.source.platform)
guild_id = self._get_guild_id(event)
if not guild_id or not hasattr(adapter, "leave_voice_channel"):
return "Not in a voice channel."
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
return "Not in a voice channel."
await adapter.leave_voice_channel(guild_id)
self._voice_mode.pop(event.source.chat_id, None)
self._save_voice_modes()
return "Left voice channel."
async def _send_voice_reply(self, event: MessageEvent, text: str) -> None: async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
"""Generate TTS audio and send as a voice message before the text reply.""" """Generate TTS audio and send as a voice message before the text reply."""
try: try:
@@ -2178,7 +2252,15 @@ class GatewayRunner:
return return
adapter = self.adapters.get(event.source.platform) adapter = self.adapters.get(event.source.platform)
if adapter and hasattr(adapter, "send_voice"):
# If connected to a voice channel, play there instead of sending a file
guild_id = self._get_guild_id(event)
if (guild_id
and hasattr(adapter, "play_in_voice_channel")
and hasattr(adapter, "is_in_voice_channel")
and adapter.is_in_voice_channel(guild_id)):
await adapter.play_in_voice_channel(guild_id, actual_path)
elif adapter and hasattr(adapter, "send_voice"):
send_kwargs: Dict[str, Any] = { send_kwargs: Dict[str, Any] = {
"chat_id": event.source.chat_id, "chat_id": event.source.chat_id,
"audio_path": actual_path, "audio_path": actual_path,
@@ -2186,7 +2268,6 @@ class GatewayRunner:
} }
if event.source.thread_id: if event.source.thread_id:
send_kwargs["metadata"] = {"thread_id": event.source.thread_id} send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
# Only pass metadata if the adapter accepts it
import inspect import inspect
sig = inspect.signature(adapter.send_voice) sig = inspect.signature(adapter.send_voice)
if "metadata" not in sig.parameters: if "metadata" not in sig.parameters:
@@ -2198,7 +2279,7 @@ class GatewayRunner:
except OSError: except OSError:
pass pass
except Exception as e: except Exception as e:
logger.warning("Auto voice reply failed: %s", e) logger.warning("Auto voice reply failed: %s", e, exc_info=True)
async def _handle_rollback_command(self, event: MessageEvent) -> str: async def _handle_rollback_command(self, event: MessageEvent) -> str:
"""Handle /rollback command — list or restore filesystem checkpoints.""" """Handle /rollback command — list or restore filesystem checkpoints."""

View File

@@ -43,7 +43,7 @@ dependencies = [
modal = ["swe-rex[modal]>=1.4.0"] modal = ["swe-rex[modal]>=1.4.0"]
daytona = ["daytona>=0.148.0"] daytona = ["daytona>=0.148.0"]
dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"] dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"] messaging = ["python-telegram-bot>=20.0", "discord.py[voice]>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
cron = ["croniter"] cron = ["croniter"]
slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"] slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
cli = ["simple-term-menu"] cli = ["simple-term-menu"]