Merge PR #292: feat(whatsapp): native media attachments for images, videos and documents
Authored by satelerd. Adds native WhatsApp media sending for images, videos, and documents via MEDIA: tags. Also includes conflict resolution with edit_message feature, Telegram hint fix (only advertise supported media types), and import cleanup.
This commit is contained in:
@@ -90,11 +90,21 @@ SKILLS_GUIDANCE = (
|
||||
PLATFORM_HINTS = {
|
||||
"whatsapp": (
|
||||
"You are on a text messaging communication platform, WhatsApp. "
|
||||
"Please do not use markdown as it does not render."
|
||||
"Please do not use markdown as it does not render. "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. The file "
|
||||
"will be sent as a native WhatsApp attachment — images (.jpg, .png, "
|
||||
".webp) appear as photos, videos (.mp4, .mov) play inline, and other "
|
||||
"files arrive as downloadable documents. You can also include image "
|
||||
"URLs in markdown format  and they will be sent as photos."
|
||||
),
|
||||
"telegram": (
|
||||
"You are on a text messaging communication platform, Telegram. "
|
||||
"Please do not use markdown as it does not render."
|
||||
"Please do not use markdown as it does not render. "
|
||||
"You can send media files natively: to deliver a file to the user, "
|
||||
"include MEDIA:/absolute/path/to/file in your response. Audio "
|
||||
"(.ogg) sends as voice bubbles. You can also include image URLs "
|
||||
"in markdown format  and they will be sent as native photos."
|
||||
),
|
||||
"discord": (
|
||||
"You are in a Discord server or group chat communicating with your user."
|
||||
|
||||
@@ -526,7 +526,63 @@ class BasePlatformAdapter(ABC):
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a video natively via the platform API.
|
||||
|
||||
Override in subclasses to send videos as inline playable media.
|
||||
Default falls back to sending the file path as text.
|
||||
"""
|
||||
text = f"🎬 Video: {video_path}"
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a document/file natively via the platform API.
|
||||
|
||||
Override in subclasses to send files as downloadable attachments.
|
||||
Default falls back to sending the file path as text.
|
||||
"""
|
||||
text = f"📎 File: {file_path}"
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""
|
||||
Send a local image file natively via the platform API.
|
||||
|
||||
Unlike send_image() which takes a URL, this takes a local file path.
|
||||
Override in subclasses for native photo attachments.
|
||||
Default falls back to sending the file path as text.
|
||||
"""
|
||||
text = f"🖼️ Image: {image_path}"
|
||||
if caption:
|
||||
text = f"{caption}\n{text}"
|
||||
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
||||
|
||||
@staticmethod
|
||||
def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
|
||||
"""
|
||||
@@ -693,19 +749,41 @@ class BasePlatformAdapter(ABC):
|
||||
except Exception as img_err:
|
||||
print(f"[{self.name}] Error sending image: {img_err}")
|
||||
|
||||
# Send extracted audio/voice files as native attachments
|
||||
for audio_path, is_voice in media_files:
|
||||
# Send extracted media files — route by file type
|
||||
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
|
||||
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'}
|
||||
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
|
||||
|
||||
for media_path, is_voice in media_files:
|
||||
if human_delay > 0:
|
||||
await asyncio.sleep(human_delay)
|
||||
try:
|
||||
voice_result = await self.send_voice(
|
||||
chat_id=event.source.chat_id,
|
||||
audio_path=audio_path,
|
||||
)
|
||||
if not voice_result.success:
|
||||
print(f"[{self.name}] Failed to send voice: {voice_result.error}")
|
||||
except Exception as voice_err:
|
||||
print(f"[{self.name}] Error sending voice: {voice_err}")
|
||||
ext = Path(media_path).suffix.lower()
|
||||
if ext in _AUDIO_EXTS:
|
||||
media_result = await self.send_voice(
|
||||
chat_id=event.source.chat_id,
|
||||
audio_path=media_path,
|
||||
)
|
||||
elif ext in _VIDEO_EXTS:
|
||||
media_result = await self.send_video(
|
||||
chat_id=event.source.chat_id,
|
||||
video_path=media_path,
|
||||
)
|
||||
elif ext in _IMAGE_EXTS:
|
||||
media_result = await self.send_image_file(
|
||||
chat_id=event.source.chat_id,
|
||||
image_path=media_path,
|
||||
)
|
||||
else:
|
||||
media_result = await self.send_document(
|
||||
chat_id=event.source.chat_id,
|
||||
file_path=media_path,
|
||||
)
|
||||
|
||||
if not media_result.success:
|
||||
print(f"[{self.name}] Failed to send media ({ext}): {media_result.error}")
|
||||
except Exception as media_err:
|
||||
print(f"[{self.name}] Error sending media: {media_err}")
|
||||
|
||||
# Check if there's a pending message that was queued during our processing
|
||||
if session_key in self._pending_messages:
|
||||
|
||||
@@ -381,6 +381,101 @@ class WhatsAppAdapter(BasePlatformAdapter):
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def _send_media_to_bridge(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
media_type: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send any media file via bridge /send-media endpoint."""
|
||||
if not self._running:
|
||||
return SendResult(success=False, error="Not connected")
|
||||
try:
|
||||
import aiohttp
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
return SendResult(success=False, error=f"File not found: {file_path}")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"chatId": chat_id,
|
||||
"filePath": file_path,
|
||||
"mediaType": media_type,
|
||||
}
|
||||
if caption:
|
||||
payload["caption"] = caption
|
||||
if file_name:
|
||||
payload["fileName"] = file_name
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
f"http://localhost:{self._bridge_port}/send-media",
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=120),
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=data.get("messageId"),
|
||||
raw_response=data,
|
||||
)
|
||||
else:
|
||||
error = await resp.text()
|
||||
return SendResult(success=False, error=error)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(success=False, error=str(e))
|
||||
|
||||
async def send_image(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_url: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Download image URL to cache, send natively via bridge."""
|
||||
try:
|
||||
local_path = await cache_image_from_url(image_url)
|
||||
return await self._send_media_to_bridge(chat_id, local_path, "image", caption)
|
||||
except Exception:
|
||||
return await super().send_image(chat_id, image_url, caption, reply_to)
|
||||
|
||||
async def send_image_file(
|
||||
self,
|
||||
chat_id: str,
|
||||
image_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a local image file natively via bridge."""
|
||||
return await self._send_media_to_bridge(chat_id, image_path, "image", caption)
|
||||
|
||||
async def send_video(
|
||||
self,
|
||||
chat_id: str,
|
||||
video_path: str,
|
||||
caption: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a video natively via bridge — plays inline in WhatsApp."""
|
||||
return await self._send_media_to_bridge(chat_id, video_path, "video", caption)
|
||||
|
||||
async def send_document(
|
||||
self,
|
||||
chat_id: str,
|
||||
file_path: str,
|
||||
caption: Optional[str] = None,
|
||||
file_name: Optional[str] = None,
|
||||
reply_to: Optional[str] = None,
|
||||
) -> SendResult:
|
||||
"""Send a document/file as a downloadable attachment via bridge."""
|
||||
return await self._send_media_to_bridge(
|
||||
chat_id, file_path, "document", caption,
|
||||
file_name or os.path.basename(file_path),
|
||||
)
|
||||
|
||||
async def send_typing(self, chat_id: str) -> None:
|
||||
"""Send typing indicator via bridge."""
|
||||
if not self._running:
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
* GET /messages - Long-poll for new incoming messages
|
||||
* POST /send - Send a message { chatId, message, replyTo? }
|
||||
* POST /edit - Edit a sent message { chatId, messageId, message }
|
||||
* POST /send-media - Send media natively { chatId, filePath, mediaType?, caption?, fileName? }
|
||||
* POST /typing - Send typing indicator { chatId }
|
||||
* GET /chat/:id - Get chat info
|
||||
* GET /health - Health check
|
||||
@@ -22,7 +23,7 @@ import express from 'express';
|
||||
import { Boom } from '@hapi/boom';
|
||||
import pino from 'pino';
|
||||
import path from 'path';
|
||||
import { mkdirSync } from 'fs';
|
||||
import { mkdirSync, readFileSync, existsSync } from 'fs';
|
||||
import qrcode from 'qrcode-terminal';
|
||||
|
||||
// Parse CLI args
|
||||
@@ -238,6 +239,76 @@ app.post('/edit', async (req, res) => {
|
||||
}
|
||||
});
|
||||
|
||||
// MIME type map and media type inference for /send-media
|
||||
const MIME_MAP = {
|
||||
jpg: 'image/jpeg', jpeg: 'image/jpeg', png: 'image/png',
|
||||
webp: 'image/webp', gif: 'image/gif',
|
||||
mp4: 'video/mp4', mov: 'video/quicktime', avi: 'video/x-msvideo',
|
||||
mkv: 'video/x-matroska', '3gp': 'video/3gpp',
|
||||
pdf: 'application/pdf',
|
||||
doc: 'application/msword',
|
||||
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
};
|
||||
|
||||
function inferMediaType(ext) {
|
||||
if (['jpg', 'jpeg', 'png', 'webp', 'gif'].includes(ext)) return 'image';
|
||||
if (['mp4', 'mov', 'avi', 'mkv', '3gp'].includes(ext)) return 'video';
|
||||
if (['ogg', 'opus', 'mp3', 'wav', 'm4a'].includes(ext)) return 'audio';
|
||||
return 'document';
|
||||
}
|
||||
|
||||
// Send media (image, video, document) natively
|
||||
app.post('/send-media', async (req, res) => {
|
||||
if (!sock || connectionState !== 'connected') {
|
||||
return res.status(503).json({ error: 'Not connected to WhatsApp' });
|
||||
}
|
||||
|
||||
const { chatId, filePath, mediaType, caption, fileName } = req.body;
|
||||
if (!chatId || !filePath) {
|
||||
return res.status(400).json({ error: 'chatId and filePath are required' });
|
||||
}
|
||||
|
||||
try {
|
||||
if (!existsSync(filePath)) {
|
||||
return res.status(404).json({ error: `File not found: ${filePath}` });
|
||||
}
|
||||
|
||||
const buffer = readFileSync(filePath);
|
||||
const ext = filePath.toLowerCase().split('.').pop();
|
||||
const type = mediaType || inferMediaType(ext);
|
||||
let msgPayload;
|
||||
|
||||
switch (type) {
|
||||
case 'image':
|
||||
msgPayload = { image: buffer, caption: caption || undefined, mimetype: MIME_MAP[ext] || 'image/jpeg' };
|
||||
break;
|
||||
case 'video':
|
||||
msgPayload = { video: buffer, caption: caption || undefined, mimetype: MIME_MAP[ext] || 'video/mp4' };
|
||||
break;
|
||||
case 'audio': {
|
||||
const audioMime = (ext === 'ogg' || ext === 'opus') ? 'audio/ogg; codecs=opus' : 'audio/mpeg';
|
||||
msgPayload = { audio: buffer, mimetype: audioMime, ptt: ext === 'ogg' || ext === 'opus' };
|
||||
break;
|
||||
}
|
||||
case 'document':
|
||||
default:
|
||||
msgPayload = {
|
||||
document: buffer,
|
||||
fileName: fileName || path.basename(filePath),
|
||||
caption: caption || undefined,
|
||||
mimetype: MIME_MAP[ext] || 'application/octet-stream',
|
||||
};
|
||||
break;
|
||||
}
|
||||
|
||||
const sent = await sock.sendMessage(chatId, msgPayload);
|
||||
res.json({ success: true, messageId: sent?.key?.id });
|
||||
} catch (err) {
|
||||
res.status(500).json({ error: err.message });
|
||||
}
|
||||
});
|
||||
|
||||
// Typing indicator
|
||||
app.post('/typing', async (req, res) => {
|
||||
if (!sock || connectionState !== 'connected') {
|
||||
|
||||
Reference in New Issue
Block a user