feat: Discord voice channel support — bot joins VC and speaks replies
- /voice channel: bot joins user's voice channel, speaks TTS replies - /voice leave: disconnect from voice channel - Auto-disconnect after 5 min inactivity - _get_guild_id() helper extracts guild from raw_message - Load opus codec for voice playback - discord.py[voice] in pyproject.toml (pulls PyNaCl + davey)
This commit is contained in:
@@ -82,17 +82,35 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||||||
# Discord message limits
|
# Discord message limits
|
||||||
MAX_MESSAGE_LENGTH = 2000
|
MAX_MESSAGE_LENGTH = 2000
|
||||||
|
|
||||||
|
# Auto-disconnect from voice channel after this many seconds of inactivity
|
||||||
|
VOICE_TIMEOUT = 300
|
||||||
|
|
||||||
def __init__(self, config: PlatformConfig):
|
def __init__(self, config: PlatformConfig):
|
||||||
super().__init__(config, Platform.DISCORD)
|
super().__init__(config, Platform.DISCORD)
|
||||||
self._client: Optional[commands.Bot] = None
|
self._client: Optional[commands.Bot] = None
|
||||||
self._ready_event = asyncio.Event()
|
self._ready_event = asyncio.Event()
|
||||||
self._allowed_user_ids: set = set() # For button approval authorization
|
self._allowed_user_ids: set = set() # For button approval authorization
|
||||||
|
# Voice channel state (per-guild)
|
||||||
|
self._voice_clients: Dict[int, Any] = {} # guild_id -> VoiceClient
|
||||||
|
self._voice_text_channels: Dict[int, int] = {} # guild_id -> text_channel_id
|
||||||
|
self._voice_timeout_tasks: Dict[int, asyncio.Task] = {} # guild_id -> timeout task
|
||||||
|
|
||||||
async def connect(self) -> bool:
|
async def connect(self) -> bool:
|
||||||
"""Connect to Discord and start receiving events."""
|
"""Connect to Discord and start receiving events."""
|
||||||
if not DISCORD_AVAILABLE:
|
if not DISCORD_AVAILABLE:
|
||||||
logger.error("[%s] discord.py not installed. Run: pip install discord.py", self.name)
|
logger.error("[%s] discord.py not installed. Run: pip install discord.py", self.name)
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# Load opus codec for voice channel support
|
||||||
|
if not discord.opus.is_loaded():
|
||||||
|
try:
|
||||||
|
discord.opus.load_opus("/opt/homebrew/lib/libopus.dylib")
|
||||||
|
except Exception:
|
||||||
|
# Try common Linux path as fallback
|
||||||
|
try:
|
||||||
|
discord.opus.load_opus("libopus.so.0")
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Opus codec not found — voice channel playback disabled")
|
||||||
|
|
||||||
if not self.config.token:
|
if not self.config.token:
|
||||||
logger.error("[%s] No bot token configured", self.name)
|
logger.error("[%s] No bot token configured", self.name)
|
||||||
@@ -361,6 +379,108 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||||||
logger.error("[%s] Failed to send audio, falling back to base adapter: %s", self.name, e, exc_info=True)
|
logger.error("[%s] Failed to send audio, falling back to base adapter: %s", self.name, e, exc_info=True)
|
||||||
return await super().send_voice(chat_id, audio_path, caption, reply_to, metadata=metadata)
|
return await super().send_voice(chat_id, audio_path, caption, reply_to, metadata=metadata)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Voice channel methods (join / leave / play)
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def join_voice_channel(self, channel) -> bool:
|
||||||
|
"""Join a Discord voice channel. Returns True on success."""
|
||||||
|
if not self._client or not DISCORD_AVAILABLE:
|
||||||
|
return False
|
||||||
|
guild_id = channel.guild.id
|
||||||
|
|
||||||
|
# Already connected in this guild?
|
||||||
|
existing = self._voice_clients.get(guild_id)
|
||||||
|
if existing and existing.is_connected():
|
||||||
|
if existing.channel.id == channel.id:
|
||||||
|
self._reset_voice_timeout(guild_id)
|
||||||
|
return True
|
||||||
|
await existing.move_to(channel)
|
||||||
|
self._reset_voice_timeout(guild_id)
|
||||||
|
return True
|
||||||
|
|
||||||
|
vc = await channel.connect()
|
||||||
|
self._voice_clients[guild_id] = vc
|
||||||
|
self._reset_voice_timeout(guild_id)
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def leave_voice_channel(self, guild_id: int) -> None:
|
||||||
|
"""Disconnect from the voice channel in a guild."""
|
||||||
|
vc = self._voice_clients.pop(guild_id, None)
|
||||||
|
if vc and vc.is_connected():
|
||||||
|
await vc.disconnect()
|
||||||
|
task = self._voice_timeout_tasks.pop(guild_id, None)
|
||||||
|
if task:
|
||||||
|
task.cancel()
|
||||||
|
self._voice_text_channels.pop(guild_id, None)
|
||||||
|
|
||||||
|
async def play_in_voice_channel(self, guild_id: int, audio_path: str) -> bool:
|
||||||
|
"""Play an audio file in the connected voice channel."""
|
||||||
|
vc = self._voice_clients.get(guild_id)
|
||||||
|
if not vc or not vc.is_connected():
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Wait for current playback to finish
|
||||||
|
while vc.is_playing():
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
|
||||||
|
done = asyncio.Event()
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
|
||||||
|
def _after(error):
|
||||||
|
if error:
|
||||||
|
logger.error("Voice playback error: %s", error)
|
||||||
|
loop.call_soon_threadsafe(done.set)
|
||||||
|
|
||||||
|
source = discord.FFmpegPCMAudio(audio_path)
|
||||||
|
source = discord.PCMVolumeTransformer(source, volume=1.0)
|
||||||
|
vc.play(source, after=_after)
|
||||||
|
await done.wait()
|
||||||
|
self._reset_voice_timeout(guild_id)
|
||||||
|
return True
|
||||||
|
|
||||||
|
async def get_user_voice_channel(self, guild_id: int, user_id: str):
|
||||||
|
"""Return the voice channel the user is currently in, or None."""
|
||||||
|
if not self._client:
|
||||||
|
return None
|
||||||
|
guild = self._client.get_guild(guild_id)
|
||||||
|
if not guild:
|
||||||
|
return None
|
||||||
|
member = guild.get_member(int(user_id))
|
||||||
|
if not member or not member.voice:
|
||||||
|
return None
|
||||||
|
return member.voice.channel
|
||||||
|
|
||||||
|
def _reset_voice_timeout(self, guild_id: int) -> None:
|
||||||
|
"""Reset the auto-disconnect inactivity timer."""
|
||||||
|
task = self._voice_timeout_tasks.pop(guild_id, None)
|
||||||
|
if task:
|
||||||
|
task.cancel()
|
||||||
|
self._voice_timeout_tasks[guild_id] = asyncio.ensure_future(
|
||||||
|
self._voice_timeout_handler(guild_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
async def _voice_timeout_handler(self, guild_id: int) -> None:
|
||||||
|
"""Auto-disconnect after VOICE_TIMEOUT seconds of inactivity."""
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(self.VOICE_TIMEOUT)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
return
|
||||||
|
text_ch_id = self._voice_text_channels.get(guild_id)
|
||||||
|
await self.leave_voice_channel(guild_id)
|
||||||
|
if text_ch_id and self._client:
|
||||||
|
ch = self._client.get_channel(text_ch_id)
|
||||||
|
if ch:
|
||||||
|
try:
|
||||||
|
await ch.send("Left voice channel (inactivity timeout).")
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def is_in_voice_channel(self, guild_id: int) -> bool:
|
||||||
|
"""Check if the bot is connected to a voice channel in this guild."""
|
||||||
|
vc = self._voice_clients.get(guild_id)
|
||||||
|
return vc is not None and vc.is_connected()
|
||||||
|
|
||||||
async def send_image_file(
|
async def send_image_file(
|
||||||
self,
|
self,
|
||||||
chat_id: str,
|
chat_id: str,
|
||||||
@@ -685,8 +805,10 @@ class DiscordAdapter(BasePlatformAdapter):
|
|||||||
await self._run_simple_slash(interaction, "/reload-mcp")
|
await self._run_simple_slash(interaction, "/reload-mcp")
|
||||||
|
|
||||||
@tree.command(name="voice", description="Toggle voice reply mode")
|
@tree.command(name="voice", description="Toggle voice reply mode")
|
||||||
@discord.app_commands.describe(mode="Voice mode: on, off, tts, or status")
|
@discord.app_commands.describe(mode="Voice mode: on, off, tts, channel, leave, or status")
|
||||||
@discord.app_commands.choices(mode=[
|
@discord.app_commands.choices(mode=[
|
||||||
|
discord.app_commands.Choice(name="channel — join your voice channel", value="channel"),
|
||||||
|
discord.app_commands.Choice(name="leave — leave voice channel", value="leave"),
|
||||||
discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"),
|
discord.app_commands.Choice(name="on — voice reply to voice messages", value="on"),
|
||||||
discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"),
|
discord.app_commands.Choice(name="tts — voice reply to all messages", value="tts"),
|
||||||
discord.app_commands.Choice(name="off — text only", value="off"),
|
discord.app_commands.Choice(name="off — text only", value="off"),
|
||||||
|
|||||||
@@ -2105,8 +2105,22 @@ class GatewayRunner:
|
|||||||
f"Cron jobs and cross-platform messages will be delivered here."
|
f"Cron jobs and cross-platform messages will be delivered here."
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_guild_id(event: MessageEvent) -> Optional[int]:
|
||||||
|
"""Extract Discord guild_id from the raw message object."""
|
||||||
|
raw = getattr(event, "raw_message", None)
|
||||||
|
if raw is None:
|
||||||
|
return None
|
||||||
|
# Slash command interaction
|
||||||
|
if hasattr(raw, "guild_id") and raw.guild_id:
|
||||||
|
return int(raw.guild_id)
|
||||||
|
# Regular message
|
||||||
|
if hasattr(raw, "guild") and raw.guild:
|
||||||
|
return raw.guild.id
|
||||||
|
return None
|
||||||
|
|
||||||
async def _handle_voice_command(self, event: MessageEvent) -> str:
|
async def _handle_voice_command(self, event: MessageEvent) -> str:
|
||||||
"""Handle /voice [on|off|tts|status] command."""
|
"""Handle /voice [on|off|tts|channel|leave|status] command."""
|
||||||
args = event.get_command_args().strip().lower()
|
args = event.get_command_args().strip().lower()
|
||||||
chat_id = event.source.chat_id
|
chat_id = event.source.chat_id
|
||||||
|
|
||||||
@@ -2129,6 +2143,10 @@ class GatewayRunner:
|
|||||||
"Auto-TTS enabled.\n"
|
"Auto-TTS enabled.\n"
|
||||||
"All replies will include a voice message."
|
"All replies will include a voice message."
|
||||||
)
|
)
|
||||||
|
elif args in ("channel", "join"):
|
||||||
|
return await self._handle_voice_channel_join(event)
|
||||||
|
elif args == "leave":
|
||||||
|
return await self._handle_voice_channel_leave(event)
|
||||||
elif args == "status":
|
elif args == "status":
|
||||||
mode = self._voice_mode.get(chat_id, "off")
|
mode = self._voice_mode.get(chat_id, "off")
|
||||||
labels = {
|
labels = {
|
||||||
@@ -2136,6 +2154,14 @@ class GatewayRunner:
|
|||||||
"voice_only": "On (voice reply to voice messages)",
|
"voice_only": "On (voice reply to voice messages)",
|
||||||
"all": "TTS (voice reply to all messages)",
|
"all": "TTS (voice reply to all messages)",
|
||||||
}
|
}
|
||||||
|
# Append voice channel info if connected
|
||||||
|
adapter = self.adapters.get(event.source.platform)
|
||||||
|
guild_id = self._get_guild_id(event)
|
||||||
|
if guild_id and hasattr(adapter, "is_in_voice_channel"):
|
||||||
|
if adapter.is_in_voice_channel(guild_id):
|
||||||
|
vc = adapter._voice_clients.get(guild_id)
|
||||||
|
ch_name = vc.channel.name if vc and vc.channel else "unknown"
|
||||||
|
return f"Voice mode: {labels.get(mode, mode)}\nVoice channel: {ch_name}"
|
||||||
return f"Voice mode: {labels.get(mode, mode)}"
|
return f"Voice mode: {labels.get(mode, mode)}"
|
||||||
else:
|
else:
|
||||||
# Toggle: off → on, on/all → off
|
# Toggle: off → on, on/all → off
|
||||||
@@ -2149,6 +2175,54 @@ class GatewayRunner:
|
|||||||
self._save_voice_modes()
|
self._save_voice_modes()
|
||||||
return "Voice mode disabled."
|
return "Voice mode disabled."
|
||||||
|
|
||||||
|
async def _handle_voice_channel_join(self, event: MessageEvent) -> str:
|
||||||
|
"""Join the user's current Discord voice channel."""
|
||||||
|
adapter = self.adapters.get(event.source.platform)
|
||||||
|
if not hasattr(adapter, "join_voice_channel"):
|
||||||
|
return "Voice channels are not supported on this platform."
|
||||||
|
|
||||||
|
guild_id = self._get_guild_id(event)
|
||||||
|
if not guild_id:
|
||||||
|
return "This command only works in a Discord server."
|
||||||
|
|
||||||
|
voice_channel = await adapter.get_user_voice_channel(
|
||||||
|
guild_id, event.source.user_id
|
||||||
|
)
|
||||||
|
if not voice_channel:
|
||||||
|
return "You need to be in a voice channel first."
|
||||||
|
|
||||||
|
try:
|
||||||
|
success = await adapter.join_voice_channel(voice_channel)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Failed to join voice channel: %s", e)
|
||||||
|
return f"Failed to join voice channel: {e}"
|
||||||
|
|
||||||
|
if success:
|
||||||
|
adapter._voice_text_channels[guild_id] = int(event.source.chat_id)
|
||||||
|
self._voice_mode[event.source.chat_id] = "all"
|
||||||
|
self._save_voice_modes()
|
||||||
|
return (
|
||||||
|
f"Joined voice channel **{voice_channel.name}**.\n"
|
||||||
|
f"I'll speak my replies here. Use /voice leave to disconnect."
|
||||||
|
)
|
||||||
|
return "Failed to join voice channel. Check bot permissions (Connect + Speak)."
|
||||||
|
|
||||||
|
async def _handle_voice_channel_leave(self, event: MessageEvent) -> str:
|
||||||
|
"""Leave the Discord voice channel."""
|
||||||
|
adapter = self.adapters.get(event.source.platform)
|
||||||
|
guild_id = self._get_guild_id(event)
|
||||||
|
|
||||||
|
if not guild_id or not hasattr(adapter, "leave_voice_channel"):
|
||||||
|
return "Not in a voice channel."
|
||||||
|
|
||||||
|
if not hasattr(adapter, "is_in_voice_channel") or not adapter.is_in_voice_channel(guild_id):
|
||||||
|
return "Not in a voice channel."
|
||||||
|
|
||||||
|
await adapter.leave_voice_channel(guild_id)
|
||||||
|
self._voice_mode.pop(event.source.chat_id, None)
|
||||||
|
self._save_voice_modes()
|
||||||
|
return "Left voice channel."
|
||||||
|
|
||||||
async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
|
async def _send_voice_reply(self, event: MessageEvent, text: str) -> None:
|
||||||
"""Generate TTS audio and send as a voice message before the text reply."""
|
"""Generate TTS audio and send as a voice message before the text reply."""
|
||||||
try:
|
try:
|
||||||
@@ -2178,7 +2252,15 @@ class GatewayRunner:
|
|||||||
return
|
return
|
||||||
|
|
||||||
adapter = self.adapters.get(event.source.platform)
|
adapter = self.adapters.get(event.source.platform)
|
||||||
if adapter and hasattr(adapter, "send_voice"):
|
|
||||||
|
# If connected to a voice channel, play there instead of sending a file
|
||||||
|
guild_id = self._get_guild_id(event)
|
||||||
|
if (guild_id
|
||||||
|
and hasattr(adapter, "play_in_voice_channel")
|
||||||
|
and hasattr(adapter, "is_in_voice_channel")
|
||||||
|
and adapter.is_in_voice_channel(guild_id)):
|
||||||
|
await adapter.play_in_voice_channel(guild_id, actual_path)
|
||||||
|
elif adapter and hasattr(adapter, "send_voice"):
|
||||||
send_kwargs: Dict[str, Any] = {
|
send_kwargs: Dict[str, Any] = {
|
||||||
"chat_id": event.source.chat_id,
|
"chat_id": event.source.chat_id,
|
||||||
"audio_path": actual_path,
|
"audio_path": actual_path,
|
||||||
@@ -2186,7 +2268,6 @@ class GatewayRunner:
|
|||||||
}
|
}
|
||||||
if event.source.thread_id:
|
if event.source.thread_id:
|
||||||
send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
|
send_kwargs["metadata"] = {"thread_id": event.source.thread_id}
|
||||||
# Only pass metadata if the adapter accepts it
|
|
||||||
import inspect
|
import inspect
|
||||||
sig = inspect.signature(adapter.send_voice)
|
sig = inspect.signature(adapter.send_voice)
|
||||||
if "metadata" not in sig.parameters:
|
if "metadata" not in sig.parameters:
|
||||||
@@ -2198,7 +2279,7 @@ class GatewayRunner:
|
|||||||
except OSError:
|
except OSError:
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning("Auto voice reply failed: %s", e)
|
logger.warning("Auto voice reply failed: %s", e, exc_info=True)
|
||||||
|
|
||||||
async def _handle_rollback_command(self, event: MessageEvent) -> str:
|
async def _handle_rollback_command(self, event: MessageEvent) -> str:
|
||||||
"""Handle /rollback command — list or restore filesystem checkpoints."""
|
"""Handle /rollback command — list or restore filesystem checkpoints."""
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ dependencies = [
|
|||||||
modal = ["swe-rex[modal]>=1.4.0"]
|
modal = ["swe-rex[modal]>=1.4.0"]
|
||||||
daytona = ["daytona>=0.148.0"]
|
daytona = ["daytona>=0.148.0"]
|
||||||
dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
|
dev = ["pytest", "pytest-asyncio", "pytest-xdist", "mcp>=1.2.0"]
|
||||||
messaging = ["python-telegram-bot>=20.0", "discord.py>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
messaging = ["python-telegram-bot>=20.0", "discord.py[voice]>=2.0", "aiohttp>=3.9.0", "slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||||
cron = ["croniter"]
|
cron = ["croniter"]
|
||||||
slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
slack = ["slack-bolt>=1.18.0", "slack-sdk>=3.27.0"]
|
||||||
cli = ["simple-term-menu"]
|
cli = ["simple-term-menu"]
|
||||||
|
|||||||
Reference in New Issue
Block a user