diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 2e9da3354..b3ddb8359 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -6,10 +6,11 @@ and implement the required methods. """ import asyncio +import re from abc import ABC, abstractmethod from dataclasses import dataclass, field from datetime import datetime -from typing import Dict, List, Optional, Any, Callable, Awaitable +from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple from enum import Enum import sys @@ -177,6 +178,68 @@ class BasePlatformAdapter(ABC): """ pass + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """ + Send an image natively via the platform API. + + Override in subclasses to send images as proper attachments + instead of plain-text URLs. Default falls back to sending the + URL as a text message. + """ + # Fallback: send URL as text (subclasses override for native images) + text = f"{caption}\n{image_url}" if caption else image_url + return await self.send(chat_id=chat_id, content=text, reply_to=reply_to) + + @staticmethod + def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]: + """ + Extract image URLs from markdown and HTML image tags in a response. + + Finds patterns like: + - ![alt text](https://example.com/image.png) + - + - + + Args: + content: The response text to scan. + + Returns: + Tuple of (list of (url, alt_text) pairs, cleaned content with image tags removed). + """ + images = [] + cleaned = content + + # Match markdown images: ![alt](url) + md_pattern = r'!\[([^\]]*)\]\((https?://[^\s\)]+)\)' + for match in re.finditer(md_pattern, content): + alt_text = match.group(1) + url = match.group(2) + # Only extract URLs that look like actual images + if any(url.lower().endswith(ext) or ext in url.lower() for ext in + ['.png', '.jpg', '.jpeg', '.gif', '.webp', 'fal.media', 'fal-cdn', 'replicate.delivery']): + images.append((url, alt_text)) + + # Match HTML img tags: or or + html_pattern = r']+)["\']?\s*/?>\s*(?:)?' + for match in re.finditer(html_pattern, content): + url = match.group(1) + images.append((url, "")) + + # Remove matched image tags from content if we found images + if images: + cleaned = re.sub(md_pattern, '', cleaned) + cleaned = re.sub(html_pattern, '', cleaned) + # Clean up leftover blank lines + cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip() + + return images, cleaned + async def _keep_typing(self, chat_id: str, interval: float = 2.0) -> None: """ Continuously send typing indicator until cancelled. @@ -231,23 +294,41 @@ class BasePlatformAdapter(ABC): # Send response if any if response: - result = await self.send( - chat_id=event.source.chat_id, - content=response, - reply_to=event.message_id - ) + # Extract image URLs and send them as native platform attachments + images, text_content = self.extract_images(response) - # Log send failures (don't raise - user already saw tool progress) - if not result.success: - print(f"[{self.name}] Failed to send response: {result.error}") - # Try sending without markdown as fallback - fallback_result = await self.send( + # Send the text portion first (if any remains after extracting images) + if text_content: + result = await self.send( chat_id=event.source.chat_id, - content=f"(Response formatting failed, plain text:)\n\n{response[:3500]}", + content=text_content, reply_to=event.message_id ) - if not fallback_result.success: - print(f"[{self.name}] Fallback send also failed: {fallback_result.error}") + + # Log send failures (don't raise - user already saw tool progress) + if not result.success: + print(f"[{self.name}] Failed to send response: {result.error}") + # Try sending without markdown as fallback + fallback_result = await self.send( + chat_id=event.source.chat_id, + content=f"(Response formatting failed, plain text:)\n\n{text_content[:3500]}", + reply_to=event.message_id + ) + if not fallback_result.success: + print(f"[{self.name}] Fallback send also failed: {fallback_result.error}") + + # Send extracted images as native attachments + for image_url, alt_text in images: + try: + img_result = await self.send_image( + chat_id=event.source.chat_id, + image_url=image_url, + caption=alt_text if alt_text else None, + ) + if not img_result.success: + print(f"[{self.name}] Failed to send image: {img_result.error}") + except Exception as img_err: + print(f"[{self.name}] Error sending image: {img_err}") # Check if there's a pending message that was queued during our processing if session_key in self._pending_messages: diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 345b19899..0d0cc9e25 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -8,6 +8,7 @@ Uses discord.py library for: """ import asyncio +import os from typing import Dict, List, Optional, Any try: @@ -173,6 +174,61 @@ class DiscordAdapter(BasePlatformAdapter): except Exception as e: return SendResult(success=False, error=str(e)) + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send an image natively as a Discord file attachment.""" + if not self._client: + return SendResult(success=False, error="Not connected") + + try: + import aiohttp + + channel = self._client.get_channel(int(chat_id)) + if not channel: + channel = await self._client.fetch_channel(int(chat_id)) + if not channel: + return SendResult(success=False, error=f"Channel {chat_id} not found") + + # Download the image and send as a Discord file attachment + # (Discord renders attachments inline, unlike plain URLs) + async with aiohttp.ClientSession() as session: + async with session.get(image_url, timeout=aiohttp.ClientTimeout(total=30)) as resp: + if resp.status != 200: + raise Exception(f"Failed to download image: HTTP {resp.status}") + + image_data = await resp.read() + + # Determine filename from URL or content type + content_type = resp.headers.get("content-type", "image/png") + ext = "png" + if "jpeg" in content_type or "jpg" in content_type: + ext = "jpg" + elif "gif" in content_type: + ext = "gif" + elif "webp" in content_type: + ext = "webp" + + import io + file = discord.File(io.BytesIO(image_data), filename=f"image.{ext}") + + msg = await channel.send( + content=caption if caption else None, + file=file, + ) + return SendResult(success=True, message_id=str(msg.id)) + + except ImportError: + print(f"[{self.name}] aiohttp not installed, falling back to URL. Run: pip install aiohttp") + return await super().send_image(chat_id, image_url, caption, reply_to) + except Exception as e: + print(f"[{self.name}] Failed to send image attachment, falling back to URL: {e}") + return await super().send_image(chat_id, image_url, caption, reply_to) + async def send_typing(self, chat_id: str) -> None: """Send typing indicator.""" if self._client: @@ -232,6 +288,36 @@ class DiscordAdapter(BasePlatformAdapter): async def _handle_message(self, message: DiscordMessage) -> None: """Handle incoming Discord messages.""" + # In server channels (not DMs), require the bot to be @mentioned + # UNLESS the channel is in the free-response list. + # + # Config: + # DISCORD_FREE_RESPONSE_CHANNELS: Comma-separated channel IDs where the + # bot responds to every message without needing a mention. + # DISCORD_REQUIRE_MENTION: Set to "false" to disable mention requirement + # globally (all channels become free-response). Default: "true". + + if not isinstance(message.channel, discord.DMChannel): + # Check if this channel is in the free-response list + free_channels_raw = os.getenv("DISCORD_FREE_RESPONSE_CHANNELS", "") + free_channels = {ch.strip() for ch in free_channels_raw.split(",") if ch.strip()} + channel_id = str(message.channel.id) + + # Global override: if DISCORD_REQUIRE_MENTION=false, all channels are free + require_mention = os.getenv("DISCORD_REQUIRE_MENTION", "true").lower() not in ("false", "0", "no") + + is_free_channel = channel_id in free_channels + + if require_mention and not is_free_channel: + # Must be @mentioned to respond + if self._client.user not in message.mentions: + return # Silently ignore messages that don't mention the bot + + # Strip the bot mention from the message text so the agent sees clean input + if self._client.user and self._client.user in message.mentions: + message.content = message.content.replace(f"<@{self._client.user.id}>", "").strip() + message.content = message.content.replace(f"<@!{self._client.user.id}>", "").strip() + # Determine message type msg_type = MessageType.TEXT if message.content.startswith("/"): diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 10c67c96b..8cd8fc2fe 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -174,6 +174,31 @@ class TelegramAdapter(BasePlatformAdapter): except Exception as e: return SendResult(success=False, error=str(e)) + async def send_image( + self, + chat_id: str, + image_url: str, + caption: Optional[str] = None, + reply_to: Optional[str] = None, + ) -> SendResult: + """Send an image natively as a Telegram photo.""" + if not self._bot: + return SendResult(success=False, error="Not connected") + + try: + # Telegram can send photos directly from URLs + msg = await self._bot.send_photo( + chat_id=int(chat_id), + photo=image_url, + caption=caption[:1024] if caption else None, # Telegram caption limit + reply_to_message_id=int(reply_to) if reply_to else None, + ) + return SendResult(success=True, message_id=str(msg.message_id)) + except Exception as e: + print(f"[{self.name}] Failed to send photo, falling back to URL: {e}") + # Fallback: send as text link + return await super().send_image(chat_id, image_url, caption, reply_to) + async def send_typing(self, chat_id: str) -> None: """Send typing indicator.""" if self._bot: diff --git a/model_tools.py b/model_tools.py index b5035ab32..f0250ee21 100644 --- a/model_tools.py +++ b/model_tools.py @@ -392,7 +392,7 @@ def get_image_tool_definitions() -> List[Dict[str, Any]]: "type": "function", "function": { "name": "image_generate", - "description": "Generate high-quality images from text prompts using FLUX 2 Pro model with automatic 2x upscaling. Creates detailed, artistic images that are automatically upscaled for hi-rez results. Returns a single upscaled image URL that can be displayed using tags.", + "description": "Generate high-quality images from text prompts using FLUX 2 Pro model with automatic 2x upscaling. Creates detailed, artistic images that are automatically upscaled for hi-rez results. Returns a single upscaled image URL. Display it using markdown: ![description](URL)", "parameters": { "type": "object", "properties": {