fix: complete send_message MEDIA delivery salvage
- prevent raw MEDIA tag leakage outside the gateway pipeline - make extract_media handle quoted/backticked paths and optional whitespace - send Telegram media natively with explicit error/warning handling - add regression tests for Telegram media dispatch and MEDIA parsing
This commit is contained in:
@@ -618,16 +618,22 @@ class BasePlatformAdapter(ABC):
|
||||
has_voice_tag = "[[audio_as_voice]]" in content
|
||||
cleaned = cleaned.replace("[[audio_as_voice]]", "")
|
||||
|
||||
# Extract MEDIA:<path> tags
|
||||
media_pattern = r'MEDIA:\s*(\S+)'
|
||||
for match in re.finditer(media_pattern, content):
|
||||
path = match.group(1).strip().rstrip('`"\',)}')
|
||||
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
|
||||
# and quoted/backticked paths for LLM-formatted outputs.
|
||||
media_pattern = re.compile(
|
||||
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|\S+)[`"']?'''
|
||||
)
|
||||
for match in media_pattern.finditer(content):
|
||||
path = match.group("path").strip()
|
||||
if len(path) >= 2 and path[0] == path[-1] and path[0] in "`\"'":
|
||||
path = path[1:-1].strip()
|
||||
path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
|
||||
if path:
|
||||
media.append((path, has_voice_tag))
|
||||
|
||||
# Remove MEDIA tags from content (including surrounding backticks/quotes)
|
||||
|
||||
# Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
|
||||
if media:
|
||||
cleaned = re.sub(r'[`"\']*MEDIA:\s*\S+[`"\']*', '', cleaned)
|
||||
cleaned = media_pattern.sub('', cleaned)
|
||||
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
||||
|
||||
return media, cleaned
|
||||
|
||||
@@ -258,6 +258,29 @@ class TestExtractMedia:
|
||||
_, cleaned = BasePlatformAdapter.extract_media(content)
|
||||
assert "\n\n\n" not in cleaned
|
||||
|
||||
def test_media_tag_allows_optional_whitespace_after_colon(self):
|
||||
content = "MEDIA: /path/to/audio.ogg"
|
||||
media, cleaned = BasePlatformAdapter.extract_media(content)
|
||||
assert media == [("/path/to/audio.ogg", False)]
|
||||
assert cleaned == ""
|
||||
|
||||
def test_media_tag_strips_wrapping_quotes_and_backticks(self):
|
||||
content = "MEDIA: `/path/to/file.png`\nMEDIA:\"/path/to/file2.png\"\nMEDIA:'/path/to/file3.png'"
|
||||
media, cleaned = BasePlatformAdapter.extract_media(content)
|
||||
assert media == [
|
||||
("/path/to/file.png", False),
|
||||
("/path/to/file2.png", False),
|
||||
("/path/to/file3.png", False),
|
||||
]
|
||||
assert cleaned == ""
|
||||
|
||||
def test_media_tag_supports_quoted_paths_with_spaces(self):
|
||||
content = "Here\nMEDIA: '/tmp/my image.png'\nAfter"
|
||||
media, cleaned = BasePlatformAdapter.extract_media(content)
|
||||
assert media == [("/tmp/my image.png", False)]
|
||||
assert "Here" in cleaned
|
||||
assert "After" in cleaned
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# truncate_message
|
||||
|
||||
@@ -2,11 +2,13 @@
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import AsyncMock, patch
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
|
||||
from gateway.config import Platform
|
||||
from tools.send_message_tool import send_message_tool
|
||||
from tools.send_message_tool import _send_telegram, send_message_tool
|
||||
|
||||
|
||||
def _run_async_immediately(coro):
|
||||
@@ -14,13 +16,18 @@ def _run_async_immediately(coro):
|
||||
|
||||
|
||||
def _make_config():
|
||||
telegram_cfg = SimpleNamespace(enabled=True, token="fake-token", extra={})
|
||||
telegram_cfg = SimpleNamespace(enabled=True, token="***", extra={})
|
||||
return SimpleNamespace(
|
||||
platforms={Platform.TELEGRAM: telegram_cfg},
|
||||
get_home_channel=lambda _platform: None,
|
||||
), telegram_cfg
|
||||
|
||||
|
||||
def _install_telegram_mock(monkeypatch, bot):
|
||||
telegram_mod = SimpleNamespace(Bot=lambda token: bot)
|
||||
monkeypatch.setitem(sys.modules, "telegram", telegram_mod)
|
||||
|
||||
|
||||
class TestSendMessageTool:
|
||||
def test_sends_to_explicit_telegram_topic_target(self):
|
||||
config, telegram_cfg = _make_config()
|
||||
@@ -41,7 +48,14 @@ class TestSendMessageTool:
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585")
|
||||
send_mock.assert_awaited_once_with(
|
||||
Platform.TELEGRAM,
|
||||
telegram_cfg,
|
||||
"-1001",
|
||||
"hello",
|
||||
thread_id="17585",
|
||||
media_files=[],
|
||||
)
|
||||
mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="17585")
|
||||
|
||||
def test_resolved_telegram_topic_name_preserves_thread_id(self):
|
||||
@@ -64,4 +78,154 @@ class TestSendMessageTool:
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585")
|
||||
send_mock.assert_awaited_once_with(
|
||||
Platform.TELEGRAM,
|
||||
telegram_cfg,
|
||||
"-1001",
|
||||
"hello",
|
||||
thread_id="17585",
|
||||
media_files=[],
|
||||
)
|
||||
|
||||
def test_media_only_message_uses_placeholder_for_mirroring(self):
|
||||
config, telegram_cfg = _make_config()
|
||||
|
||||
with patch("gateway.config.load_gateway_config", return_value=config), \
|
||||
patch("tools.interrupt.is_interrupted", return_value=False), \
|
||||
patch("model_tools._run_async", side_effect=_run_async_immediately), \
|
||||
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
|
||||
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
|
||||
result = json.loads(
|
||||
send_message_tool(
|
||||
{
|
||||
"action": "send",
|
||||
"target": "telegram:-1001",
|
||||
"message": "MEDIA:/tmp/example.ogg",
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
send_mock.assert_awaited_once_with(
|
||||
Platform.TELEGRAM,
|
||||
telegram_cfg,
|
||||
"-1001",
|
||||
"",
|
||||
thread_id=None,
|
||||
media_files=[("/tmp/example.ogg", False)],
|
||||
)
|
||||
mirror_mock.assert_called_once_with(
|
||||
"telegram",
|
||||
"-1001",
|
||||
"[Sent audio attachment]",
|
||||
source_label="cli",
|
||||
thread_id=None,
|
||||
)
|
||||
|
||||
|
||||
class TestSendTelegramMediaDelivery:
|
||||
def test_sends_text_then_photo_for_media_tag(self, tmp_path, monkeypatch):
|
||||
image_path = tmp_path / "photo.png"
|
||||
image_path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 32)
|
||||
|
||||
bot = MagicMock()
|
||||
bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=1))
|
||||
bot.send_photo = AsyncMock(return_value=SimpleNamespace(message_id=2))
|
||||
bot.send_video = AsyncMock()
|
||||
bot.send_voice = AsyncMock()
|
||||
bot.send_audio = AsyncMock()
|
||||
bot.send_document = AsyncMock()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_telegram(
|
||||
"token",
|
||||
"12345",
|
||||
"Hello there",
|
||||
media_files=[(str(image_path), False)],
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
assert result["message_id"] == "2"
|
||||
bot.send_message.assert_awaited_once()
|
||||
bot.send_photo.assert_awaited_once()
|
||||
sent_text = bot.send_message.await_args.kwargs["text"]
|
||||
assert "MEDIA:" not in sent_text
|
||||
assert sent_text == "Hello there"
|
||||
|
||||
def test_sends_voice_for_ogg_with_voice_directive(self, tmp_path, monkeypatch):
|
||||
voice_path = tmp_path / "voice.ogg"
|
||||
voice_path.write_bytes(b"OggS" + b"\x00" * 32)
|
||||
|
||||
bot = MagicMock()
|
||||
bot.send_message = AsyncMock()
|
||||
bot.send_photo = AsyncMock()
|
||||
bot.send_video = AsyncMock()
|
||||
bot.send_voice = AsyncMock(return_value=SimpleNamespace(message_id=7))
|
||||
bot.send_audio = AsyncMock()
|
||||
bot.send_document = AsyncMock()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_telegram(
|
||||
"token",
|
||||
"12345",
|
||||
"",
|
||||
media_files=[(str(voice_path), True)],
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
bot.send_voice.assert_awaited_once()
|
||||
bot.send_audio.assert_not_awaited()
|
||||
bot.send_message.assert_not_awaited()
|
||||
|
||||
def test_sends_audio_for_mp3(self, tmp_path, monkeypatch):
|
||||
audio_path = tmp_path / "clip.mp3"
|
||||
audio_path.write_bytes(b"ID3" + b"\x00" * 32)
|
||||
|
||||
bot = MagicMock()
|
||||
bot.send_message = AsyncMock()
|
||||
bot.send_photo = AsyncMock()
|
||||
bot.send_video = AsyncMock()
|
||||
bot.send_voice = AsyncMock()
|
||||
bot.send_audio = AsyncMock(return_value=SimpleNamespace(message_id=8))
|
||||
bot.send_document = AsyncMock()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_telegram(
|
||||
"token",
|
||||
"12345",
|
||||
"",
|
||||
media_files=[(str(audio_path), False)],
|
||||
)
|
||||
)
|
||||
|
||||
assert result["success"] is True
|
||||
bot.send_audio.assert_awaited_once()
|
||||
bot.send_voice.assert_not_awaited()
|
||||
|
||||
def test_missing_media_returns_error_without_leaking_raw_tag(self, monkeypatch):
|
||||
bot = MagicMock()
|
||||
bot.send_message = AsyncMock()
|
||||
bot.send_photo = AsyncMock()
|
||||
bot.send_video = AsyncMock()
|
||||
bot.send_voice = AsyncMock()
|
||||
bot.send_audio = AsyncMock()
|
||||
bot.send_document = AsyncMock()
|
||||
_install_telegram_mock(monkeypatch, bot)
|
||||
|
||||
result = asyncio.run(
|
||||
_send_telegram(
|
||||
"token",
|
||||
"12345",
|
||||
"",
|
||||
media_files=[("/tmp/does-not-exist.png", False)],
|
||||
)
|
||||
)
|
||||
|
||||
assert "error" in result
|
||||
assert "No deliverable text or media remained" in result["error"]
|
||||
bot.send_message.assert_not_awaited()
|
||||
|
||||
@@ -14,11 +14,10 @@ import time
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$")
|
||||
|
||||
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
|
||||
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'}
|
||||
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
|
||||
_VOICE_EXTS = {'.ogg', '.opus'}
|
||||
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
|
||||
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"}
|
||||
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
|
||||
_VOICE_EXTS = {".ogg", ".opus"}
|
||||
|
||||
|
||||
SEND_MESSAGE_SCHEMA = {
|
||||
@@ -135,6 +134,11 @@ def _handle_send(args):
|
||||
if not pconfig or not pconfig.enabled:
|
||||
return json.dumps({"error": f"Platform '{platform_name}' is not configured. Set up credentials in ~/.hermes/gateway.json or environment variables."})
|
||||
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
|
||||
media_files, cleaned_message = BasePlatformAdapter.extract_media(message)
|
||||
mirror_text = cleaned_message.strip() or _describe_media_for_mirror(media_files)
|
||||
|
||||
used_home_channel = False
|
||||
if not chat_id:
|
||||
home = config.get_home_channel(platform)
|
||||
@@ -150,16 +154,25 @@ def _handle_send(args):
|
||||
|
||||
try:
|
||||
from model_tools import _run_async
|
||||
result = _run_async(_send_to_platform(platform, pconfig, chat_id, message, thread_id=thread_id))
|
||||
result = _run_async(
|
||||
_send_to_platform(
|
||||
platform,
|
||||
pconfig,
|
||||
chat_id,
|
||||
cleaned_message,
|
||||
thread_id=thread_id,
|
||||
media_files=media_files,
|
||||
)
|
||||
)
|
||||
if used_home_channel and isinstance(result, dict) and result.get("success"):
|
||||
result["note"] = f"Sent to {platform_name} home channel (chat_id: {chat_id})"
|
||||
|
||||
# Mirror the sent message into the target's gateway session
|
||||
if isinstance(result, dict) and result.get("success"):
|
||||
if isinstance(result, dict) and result.get("success") and mirror_text:
|
||||
try:
|
||||
from gateway.mirror import mirror_to_session
|
||||
source_label = os.getenv("HERMES_SESSION_PLATFORM", "cli")
|
||||
if mirror_to_session(platform_name, chat_id, message, source_label=source_label, thread_id=thread_id):
|
||||
if mirror_to_session(platform_name, chat_id, mirror_text, source_label=source_label, thread_id=thread_id):
|
||||
result["mirrored"] = True
|
||||
except Exception:
|
||||
pass
|
||||
@@ -180,48 +193,97 @@ def _parse_target_ref(platform_name: str, target_ref: str):
|
||||
return None, None, False
|
||||
|
||||
|
||||
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None):
|
||||
def _describe_media_for_mirror(media_files):
|
||||
"""Return a human-readable mirror summary when a message only contains media."""
|
||||
if not media_files:
|
||||
return ""
|
||||
if len(media_files) == 1:
|
||||
media_path, is_voice = media_files[0]
|
||||
ext = os.path.splitext(media_path)[1].lower()
|
||||
if is_voice and ext in _VOICE_EXTS:
|
||||
return "[Sent voice message]"
|
||||
if ext in _IMAGE_EXTS:
|
||||
return "[Sent image attachment]"
|
||||
if ext in _VIDEO_EXTS:
|
||||
return "[Sent video attachment]"
|
||||
if ext in _AUDIO_EXTS:
|
||||
return "[Sent audio attachment]"
|
||||
return "[Sent document attachment]"
|
||||
return f"[Sent {len(media_files)} media attachments]"
|
||||
|
||||
|
||||
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None):
|
||||
"""Route a message to the appropriate platform sender."""
|
||||
from gateway.config import Platform
|
||||
|
||||
media_files = media_files or []
|
||||
if platform == Platform.TELEGRAM:
|
||||
return await _send_telegram(pconfig.token, chat_id, message, thread_id=thread_id)
|
||||
elif platform == Platform.DISCORD:
|
||||
return await _send_discord(pconfig.token, chat_id, message)
|
||||
return await _send_telegram(
|
||||
pconfig.token,
|
||||
chat_id,
|
||||
message,
|
||||
media_files=media_files,
|
||||
thread_id=thread_id,
|
||||
)
|
||||
if media_files and not message.strip():
|
||||
return {
|
||||
"error": (
|
||||
f"send_message MEDIA delivery is currently only supported for telegram; "
|
||||
f"target {platform.value} had only media attachments"
|
||||
)
|
||||
}
|
||||
warning = None
|
||||
if media_files:
|
||||
warning = (
|
||||
f"MEDIA attachments were omitted for {platform.value}; "
|
||||
"native send_message media delivery is currently only supported for telegram"
|
||||
)
|
||||
|
||||
if platform == Platform.DISCORD:
|
||||
result = await _send_discord(pconfig.token, chat_id, message)
|
||||
elif platform == Platform.SLACK:
|
||||
return await _send_slack(pconfig.token, chat_id, message)
|
||||
result = await _send_slack(pconfig.token, chat_id, message)
|
||||
elif platform == Platform.SIGNAL:
|
||||
return await _send_signal(pconfig.extra, chat_id, message)
|
||||
result = await _send_signal(pconfig.extra, chat_id, message)
|
||||
elif platform == Platform.EMAIL:
|
||||
return await _send_email(pconfig.extra, chat_id, message)
|
||||
return {"error": f"Direct sending not yet implemented for {platform.value}"}
|
||||
result = await _send_email(pconfig.extra, chat_id, message)
|
||||
else:
|
||||
result = {"error": f"Direct sending not yet implemented for {platform.value}"}
|
||||
|
||||
if warning and isinstance(result, dict) and result.get("success"):
|
||||
warnings = list(result.get("warnings", []))
|
||||
warnings.append(warning)
|
||||
result["warnings"] = warnings
|
||||
return result
|
||||
|
||||
|
||||
async def _send_telegram(token, chat_id, message, thread_id=None):
|
||||
async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None):
|
||||
"""Send via Telegram Bot API (one-shot, no polling needed)."""
|
||||
try:
|
||||
from telegram import Bot
|
||||
from gateway.platforms.base import BasePlatformAdapter
|
||||
|
||||
bot = Bot(token=token)
|
||||
int_chat_id = int(chat_id)
|
||||
media_files = media_files or []
|
||||
thread_kwargs = {}
|
||||
if thread_id is not None:
|
||||
thread_kwargs["message_thread_id"] = int(thread_id)
|
||||
|
||||
# Extract MEDIA:<path> tags and send files natively
|
||||
media_files, cleaned = BasePlatformAdapter.extract_media(message)
|
||||
|
||||
last_msg = None
|
||||
# Send text portion if any remains
|
||||
if cleaned.strip():
|
||||
warnings = []
|
||||
|
||||
if message.strip():
|
||||
last_msg = await bot.send_message(
|
||||
chat_id=int_chat_id, text=cleaned, **thread_kwargs
|
||||
chat_id=int_chat_id, text=message, **thread_kwargs
|
||||
)
|
||||
|
||||
# Send extracted media files
|
||||
for media_path, is_voice in media_files:
|
||||
if not os.path.exists(media_path):
|
||||
logger.warning("Media file not found, skipping: %s", media_path)
|
||||
warning = f"Media file not found, skipping: {media_path}"
|
||||
logger.warning(warning)
|
||||
warnings.append(warning)
|
||||
continue
|
||||
|
||||
ext = os.path.splitext(media_path)[1].lower()
|
||||
try:
|
||||
with open(media_path, "rb") as f:
|
||||
@@ -246,15 +308,25 @@ async def _send_telegram(token, chat_id, message, thread_id=None):
|
||||
chat_id=int_chat_id, document=f, **thread_kwargs
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error("Failed to send media %s: %s", media_path, e)
|
||||
warning = f"Failed to send media {media_path}: {e}"
|
||||
logger.error(warning)
|
||||
warnings.append(warning)
|
||||
|
||||
# If no text and no media sent, send cleaned text as fallback
|
||||
if last_msg is None:
|
||||
last_msg = await bot.send_message(
|
||||
chat_id=int_chat_id, text=cleaned if cleaned.strip() else message, **thread_kwargs
|
||||
)
|
||||
error = "No deliverable text or media remained after processing MEDIA tags"
|
||||
if warnings:
|
||||
return {"error": error, "warnings": warnings}
|
||||
return {"error": error}
|
||||
|
||||
return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(last_msg.message_id)}
|
||||
result = {
|
||||
"success": True,
|
||||
"platform": "telegram",
|
||||
"chat_id": chat_id,
|
||||
"message_id": str(last_msg.message_id),
|
||||
}
|
||||
if warnings:
|
||||
result["warnings"] = warnings
|
||||
return result
|
||||
except ImportError:
|
||||
return {"error": "python-telegram-bot not installed. Run: pip install python-telegram-bot"}
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user