feat: Discord voice channel support — bot joins VC and speaks replies
- /voice channel: bot joins user's voice channel, speaks TTS replies - /voice leave: disconnect from voice channel - Auto-disconnect after 5 min inactivity - _get_guild_id() helper extracts guild from raw_message - Load opus codec for voice playback - discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)
This commit is contained in:
@@ -82,17 +82,35 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
# Discord message limits
|
||||
MAX_MESSAGE_LENGTH = 2000
|
||||
|
||||
# Auto-disconnect from voice channel after this many seconds of inactivity
|
||||
VOICE_TIMEOUT = 300
|
||||
|
||||
def __init__(self, config: PlatformConfig):
|
||||
super().__init__(config, Platform.DISCORD)
|
||||
self._client: Optional[commands.Bot] = None
|
||||
self._ready_event = asyncio.Event()
|
||||
self._allowed_user_ids: set = set() # For button approval authorization
|
||||
# Voice channel state (per-guild)
|
||||
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
|
||||
self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id
|
||||
self._voice_timeout_tasks: Dict[int, asyncio.Task] = {} # guild_id -> timeout task
|
||||
|
||||
async def connect(self) -> bool:
|
||||
"""Connect to Discord and start receiving events."""
|
||||
if not DISCORD_AVAILABLE:
|
||||
logger.error("[%s] discord.py not installed. Run: pip install discord.py", self.name)
|
||||
return False
|
||||
|
||||
# Load opus codec for voice channel support
|
||||
if not discord.opus.is_loaded():
|
||||
try:
|
||||
discord.opus.load_opus("/opt/homebrew/lib/libopus.dylib")
|
||||
except Exception:
|
||||
# Try common Linux path as fallback
|
||||
try:
|
||||
discord.opus.load_opus("libopus.so.0")
|
||||
except Exception:
|
||||
logger.warning("Opus codec not found — voice channel playback disabled")
|
||||
|
||||
if not self.config.token:
|
||||
logger.error("[%s] No bot token configured", self.name)
|
||||
@@ -361,6 +379,108 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
logger.error("[%s] Failed to send audio, falling back to base adapter: %s", self.name, e, exc_info=True)
|
||||
return await super().send_voice(chat_id, audio_path, caption, reply_to, metadata=metadata)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Voice channel methods (join / leave / play)
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def join_voice_channel(self, channel) -> bool:
|
||||
"""Join a Discord voice channel. Returns True on success."""
|
||||
if not self._client or not DISCORD_AVAILABLE:
|
||||
return False
|
||||
guild_id = channel.guild.id
|
||||
|
||||
# Already connected in this guild?
|
||||
existing = self._voice_clients.get(guild_id)
|
||||
if existing and existing.is_connected():
|
||||
if existing.channel.id == channel.id:
|
||||
self._reset_voice_timeout(guild_id)
|
||||
return True
|
||||
await existing.move_to(channel)
|
||||
self._reset_voice_timeout(guild_id)
|
||||
return True
|
||||
|
||||
vc = await channel.connect()
|
||||
self._voice_clients[guild_id] = vc
|
||||
self._reset_voice_timeout(guild_id)
|
||||
return True
|
||||
|
||||
async def leave_voice_channel(self, guild_id: int) -> None:
|
||||
"""Disconnect from the voice channel in a guild."""
|
||||
vc = self._voice_clients.pop(guild_id, None)
|
||||
if vc and vc.is_connected():
|
||||
await vc.disconnect()
|
||||
task = self._voice_timeout_tasks.pop(guild_id, None)
|
||||
if task:
|
||||
task.cancel()
|
||||
self._voice_text_channels.pop(guild_id, None)
|
||||
|
||||
async def play_in_voice_channel(self, guild_id: int, audio_path: str) -> bool:
|
||||
"""Play an audio file in the connected voice channel."""
|
||||
vc = self._voice_clients.get(guild_id)
|
||||
if not vc or not vc.is_connected():
|
||||
return False
|
||||
|
||||
# Wait for current playback to finish
|
||||
while vc.is_playing():
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
done = asyncio.Event()
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
def _after(error):
|
||||
if error:
|
||||
logger.error("Voice playback error: %s", error)
|
||||
loop.call_soon_threadsafe(done.set)
|
||||
|
||||
source = discord.FFmpegPCMAudio(audio_path)
|
||||
source = discord.PCMVolumeTransformer(source, volume=1.0)
|
||||
vc.play(source, after=_after)
|
||||
await done.wait()
|
||||
self._reset_voice_timeout(guild_id)
|
||||
return True
|
||||
|
||||
async def get_user_voice_channel(self, guild_id: int, user_id: str):
|
||||
"""Return the voice channel the user is currently in, or None."""
|
||||
if not self._client:
|
||||
return None
|
||||
guild = self._client.get_guild(guild_id)
|
||||
if not guild:
|
||||
return None
|
||||
member = guild.get_member(int(user_id))
|
||||
if not member or not member.voice:
|
||||
return None
|
||||
return member.voice.channel
|
||||
|
||||
def _reset_voice_timeout(self, guild_id: int) -> None:
|
||||
"""Reset the auto-disconnect inactivity timer."""
|
||||
task = self._voice_timeout_tasks.pop(guild_id, None)
|
||||
if task:
|
||||
task.cancel()
|
||||
self._voice_timeout_tasks[guild_id] = asyncio.ensure_future(
|
||||
self._voice_timeout_handler(guild_id)
|
||||
)
|
||||
|
||||
async def _voice_timeout_handler(self, guild_id: int) -> None:
|
||||
"""Auto-disconnect after VOICE_TIMEOUT seconds of inactivity."""
|
||||
try:
|
||||
await asyncio.sleep(self.VOICE_TIMEOUT)
|
||||
except asyncio.CancelledError:
|
||||
return
|
||||
text_ch_id = self._voice_text_channels.get(guild_id)
|
||||
await self.leave_voice_channel(guild_id)
|
||||
if text_ch_id and self._client:
|
||||
ch = self._client.get_channel(text_ch_id)
|
||||
if ch:
|
||||
try:
|
||||
await ch.send("Left voice channel (inactivity timeout).")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def is_in_voice_channel(self, guild_id: int) -> bool:
|
||||
"""Check if the bot is connected to a voice channel in this guild."""
|
||||
vc = self._voice_clients.get(guild_id)
|
||||
return vc is not None and vc.is_connected()
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
@@ -685,8 +805,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
||||
await self._run_simple_slash(interaction, "/reload-mcp")
|
||||
|
||||
@tree.command(name="voice", description="Toggle voice reply mode")
|
||||
@discord.app_commands.describe(mode="Voice mode: on, off, tts, or status")
|
||||
@discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
|
||||
@discord.app_commands.choices(mode=[
|
||||
discord.app_commands.Choice(name="channel — join your voice channel", value="channel"),
|
||||
discord.app_commands.Choice(name="leave — leave voice channel", value="leave"),
|
||||
discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"),
|
||||
discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"),
|
||||
discord.app_commands.Choice(name="off — text only", value="off"),
|
||||
|
||||
@@ -2105,8 +2105,22 @@ class GatewayRunner:
|
||||
f"Cron jobs and cross-platform messages will be delivered here."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _get_guild_id(event: MessageEvent) -> Optional[int]:
|
||||
"""Extract Discord guild_id from the raw message object."""
|
||||
raw = getattr(event, "raw_message", None)
|
||||
if raw is None:
|
||||
return None
|
||||
# Slash command interaction
|
||||
if hasattr(raw, "guild_id") and raw.guild_id:
|
||||
return int(raw.guild_id)
|
||||
# Regular message
|
||||
if hasattr(raw, "guild") and raw.guild:
|
||||
return raw.guild.id
|
||||
return None
|
||||
|
||||
async def _handle_voice_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /voice [on|off|tts|status] command."""
|
||||
"""Handle /voice [on|off|tts|channel|leave|status] command."""
|
||||
args = event.get_command_args().strip().lower()
|
||||
chat_id = event.source.chat_id
|
||||
|
||||
@@ -2129,6 +2143,10 @@ class GatewayRunner:
|
||||
"Auto-TTS enabled.\n"
|
||||
"All replies will include a voice message."
|
||||
)
|
||||
elif args in ("channel", "join"):
|
||||
return await self._handle_voice_channel_join(event)
|
||||
elif args == "leave":
|
||||
return await self._handle_voice_channel_leave(event)
|
||||
elif args == "status":
|
||||
mode = self._voice_mode.get(chat_id, "off")
|
||||
labels = {
|
||||
@@ -2136,6 +2154,14 @@ class GatewayRunner:
|
||||
"voice_only": "On (voice reply to voice messages)",
|
||||
"all": "TTS (voice reply to all messages)",
|
||||
}
|
||||
# Append voice channel info if connected
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
guild_id = self._get_guild_id(event)
|
||||
if guild_id and hasattr(adapter, "is_in_voice_channel"):
|
||||
if adapter.is_in_voice_channel(guild_id):
|
||||
vc = adapter._voice_clients.get(guild_id)
|
||||
ch_name = vc.channel.name if vc and vc.channel else "unknown"
|
||||
return f"Voice mode: {labels.get(mode, mode)}\nVoice channel: {ch_name}"
|
||||
return f"Voice mode: {labels.get(mode, mode)}"
|
||||
else:
|
||||
# Toggle: off → on, on/all → off
|
||||
@@ -2149,6 +2175,54 @@ class GatewayRunner:
|
||||
self._save_voice_modes()
|
||||
return "Voice mode disabled."
|
||||
|
||||
async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
|
||||
"""Join the user's current Discord voice channel."""
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
if not hasattr(adapter, "join_voice_channel"):
|
||||
return "Voice channels are not supported on this platform."
|
||||
|
||||
guild_id = self._get_guild_id(event)
|
||||
if not guild_id:
|
||||
return "This command only works in a Discord server."
|
||||
|
||||
voice_channel = await adapter.get_user_voice_channel(
|
||||
guild_id, event.source.user_id
|
||||
)
|
||||
if not voice_channel:
|
||||
return "You need to be in a voice channel first."
|
||||
|
||||
try:
|
||||
success = await adapter.join_voice_channel(voice_channel)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to join voice channel: %s", e)
|
||||
return f"Failed to join voice channel: {e}"
|
||||
|
||||
if success:
|
||||
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
|
||||
self._voice_mode[event.source.chat_id] = "all"
|
||||
self._save_voice_modes()
|
||||
return (
|
||||
f"Joined voice channel **{voice_channel.name}**.\n"
|
||||
f"I'll speak my replies here. Use /voice leave to disconnect."
|
||||
)
|
||||
return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
|
||||
|
||||
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
|
||||
"""Leave the Discord voice channel."""
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
guild_id = self._get_guild_id(event)
|
||||
|
||||
if not guild_id or not hasattr(adapter, "leave_voice_channel"):
|
||||
return "Not in a voice channel."
|
||||
|
||||
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
|
||||
return "Not in a voice channel."
|
||||
|
||||
await adapter.leave_voice_channel(guild_id)
|
||||
self._voice_mode.pop(event.source.chat_id, None)
|
||||
self._save_voice_modes()
|
||||
return "Left voice channel."
|
||||
|
||||
async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
|
||||
"""Generate TTS audio and send as a voice message before the text reply."""
|
||||
try:
|
||||
@@ -2178,7 +2252,15 @@ class GatewayRunner:
|
||||
return
|
||||
|
||||
adapter = self.adapters.get(event.source.platform)
|
||||
if adapter and hasattr(adapter, "send_voice"):
|
||||
|
||||
# If connected to a voice channel, play there instead of sending a file
|
||||
guild_id = self._get_guild_id(event)
|
||||
if (guild_id
|
||||
and hasattr(adapter, "play_in_voice_channel")
|
||||
and hasattr(adapter, "is_in_voice_channel")
|
||||
and adapter.is_in_voice_channel(guild_id)):
|
||||
await adapter.play_in_voice_channel(guild_id, actual_path)
|
||||
elif adapter and hasattr(adapter, "send_voice"):
|
||||
send_kwargs: Dict[str, Any] = {
|
||||
"chat_id": event.source.chat_id,
|
||||
"audio_path": actual_path,
|
||||
@@ -2186,7 +2268,6 @@ class GatewayRunner:
|
||||
}
|
||||
if event.source.thread_id:
|
||||
send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
|
||||
# Only pass metadata if the adapter accepts it
|
||||
import inspect
|
||||
sig = inspect.signature(adapter.send_voice)
|
||||
if "metadata" not in sig.parameters:
|
||||
@@ -2198,7 +2279,7 @@ class GatewayRunner:
|
||||
except OSError:
|
||||
pass
|
||||
except Exception as e:
|
||||
logger.warning("Auto voice reply failed: %s", e)
|
||||
logger.warning("Auto voice reply failed: %s", e, exc_info=True)
|
||||
|
||||
async def _handle_rollback_command(self, event: MessageEvent) -> str:
|
||||
"""Handle /rollback command — list or restore filesystem checkpoints."""
|
||||
|
||||
@@ -43,7 +43,7 @@ dependencies = [
|
||||
modal = ["swe-rex[modal]>=1.4.0"]
|
||||
daytona = ["daytona>=0.148.0"]
|
||||
dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
|
||||
messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||
messaging = ["python-telegram-bot>=20.0", "discord.py[voice]>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||
cron = ["croniter"]
|
||||
slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||
cli = ["simple-term-menu"]
|
||||
|
||||
Reference in New Issue
Block a user