fix: complete send_message MEDIA delivery salvage

- prevent raw MEDIA tag leakage outside the gateway pipeline
- make extract_media handle quoted/backticked paths and optional whitespace
- send Telegram media natively with explicit error/warning handling
- add regression tests for Telegram media dispatch and MEDIA parsing
This commit is contained in:
teknium1
2026-03-14 04:01:46 -07:00
parent 50d6659392
commit 5c9a84219d
4 changed files with 309 additions and 44 deletions

View File

@@ -618,16 +618,22 @@ class BasePlatformAdapter(ABC):
has_voice_tag = "[[audio_as_voice]]" in content
cleaned = cleaned.replace("[[audio_as_voice]]", "")
# Extract MEDIA:<path> tags
media_pattern = r'MEDIA:\s*(\S+)'
for match in re.finditer(media_pattern, content):
path = match.group(1).strip().rstrip('`"\',)}')
# Extract MEDIA:<path> tags, allowing optional whitespace after the colon
# and quoted/backticked paths for LLM-formatted outputs.
media_pattern = re.compile(
r'''[`"']?MEDIA:\s*(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|\S+)[`"']?'''
)
for match in media_pattern.finditer(content):
path = match.group("path").strip()
if len(path) >= 2 and path[0] == path[-1] and path[0] in "`\"'":
path = path[1:-1].strip()
path = path.lstrip("`\"'").rstrip("`\"',.;:)}]")
if path:
media.append((path, has_voice_tag))
# Remove MEDIA tags from content (including surrounding backticks/quotes)
# Remove MEDIA tags from content (including surrounding quote/backtick wrappers)
if media:
cleaned = re.sub(r'[`"\']*MEDIA:\s*\S+[`"\']*', '', cleaned)
cleaned = media_pattern.sub('', cleaned)
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
return media, cleaned

View File

@@ -258,6 +258,29 @@ class TestExtractMedia:
_, cleaned = BasePlatformAdapter.extract_media(content)
assert "\n\n\n" not in cleaned
def test_media_tag_allows_optional_whitespace_after_colon(self):
content = "MEDIA: /path/to/audio.ogg"
media, cleaned = BasePlatformAdapter.extract_media(content)
assert media == [("/path/to/audio.ogg", False)]
assert cleaned == ""
def test_media_tag_strips_wrapping_quotes_and_backticks(self):
content = "MEDIA: `/path/to/file.png`\nMEDIA:\"/path/to/file2.png\"\nMEDIA:'/path/to/file3.png'"
media, cleaned = BasePlatformAdapter.extract_media(content)
assert media == [
("/path/to/file.png", False),
("/path/to/file2.png", False),
("/path/to/file3.png", False),
]
assert cleaned == ""
def test_media_tag_supports_quoted_paths_with_spaces(self):
content = "Here\nMEDIA: '/tmp/my image.png'\nAfter"
media, cleaned = BasePlatformAdapter.extract_media(content)
assert media == [("/tmp/my image.png", False)]
assert "Here" in cleaned
assert "After" in cleaned
# ---------------------------------------------------------------------------
# truncate_message

View File

@@ -2,11 +2,13 @@
import asyncio
import json
import sys
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import AsyncMock, patch
from unittest.mock import AsyncMock, MagicMock, patch
from gateway.config import Platform
from tools.send_message_tool import send_message_tool
from tools.send_message_tool import _send_telegram, send_message_tool
def _run_async_immediately(coro):
@@ -14,13 +16,18 @@ def _run_async_immediately(coro):
def _make_config():
telegram_cfg = SimpleNamespace(enabled=True, token="fake-token", extra={})
telegram_cfg = SimpleNamespace(enabled=True, token="***", extra={})
return SimpleNamespace(
platforms={Platform.TELEGRAM: telegram_cfg},
get_home_channel=lambda _platform: None,
), telegram_cfg
def _install_telegram_mock(monkeypatch, bot):
telegram_mod = SimpleNamespace(Bot=lambda token: bot)
monkeypatch.setitem(sys.modules, "telegram", telegram_mod)
class TestSendMessageTool:
def test_sends_to_explicit_telegram_topic_target(self):
config, telegram_cfg = _make_config()
@@ -41,7 +48,14 @@ class TestSendMessageTool:
)
assert result["success"] is True
send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585")
send_mock.assert_awaited_once_with(
Platform.TELEGRAM,
telegram_cfg,
"-1001",
"hello",
thread_id="17585",
media_files=[],
)
mirror_mock.assert_called_once_with("telegram", "-1001", "hello", source_label="cli", thread_id="17585")
def test_resolved_telegram_topic_name_preserves_thread_id(self):
@@ -64,4 +78,154 @@ class TestSendMessageTool:
)
assert result["success"] is True
send_mock.assert_awaited_once_with(Platform.TELEGRAM, telegram_cfg, "-1001", "hello", thread_id="17585")
send_mock.assert_awaited_once_with(
Platform.TELEGRAM,
telegram_cfg,
"-1001",
"hello",
thread_id="17585",
media_files=[],
)
def test_media_only_message_uses_placeholder_for_mirroring(self):
config, telegram_cfg = _make_config()
with patch("gateway.config.load_gateway_config", return_value=config), \
patch("tools.interrupt.is_interrupted", return_value=False), \
patch("model_tools._run_async", side_effect=_run_async_immediately), \
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
patch("gateway.mirror.mirror_to_session", return_value=True) as mirror_mock:
result = json.loads(
send_message_tool(
{
"action": "send",
"target": "telegram:-1001",
"message": "MEDIA:/tmp/example.ogg",
}
)
)
assert result["success"] is True
send_mock.assert_awaited_once_with(
Platform.TELEGRAM,
telegram_cfg,
"-1001",
"",
thread_id=None,
media_files=[("/tmp/example.ogg", False)],
)
mirror_mock.assert_called_once_with(
"telegram",
"-1001",
"[Sent audio attachment]",
source_label="cli",
thread_id=None,
)
class TestSendTelegramMediaDelivery:
def test_sends_text_then_photo_for_media_tag(self, tmp_path, monkeypatch):
image_path = tmp_path / "photo.png"
image_path.write_bytes(b"\x89PNG\r\n\x1a\n" + b"\x00" * 32)
bot = MagicMock()
bot.send_message = AsyncMock(return_value=SimpleNamespace(message_id=1))
bot.send_photo = AsyncMock(return_value=SimpleNamespace(message_id=2))
bot.send_video = AsyncMock()
bot.send_voice = AsyncMock()
bot.send_audio = AsyncMock()
bot.send_document = AsyncMock()
_install_telegram_mock(monkeypatch, bot)
result = asyncio.run(
_send_telegram(
"token",
"12345",
"Hello there",
media_files=[(str(image_path), False)],
)
)
assert result["success"] is True
assert result["message_id"] == "2"
bot.send_message.assert_awaited_once()
bot.send_photo.assert_awaited_once()
sent_text = bot.send_message.await_args.kwargs["text"]
assert "MEDIA:" not in sent_text
assert sent_text == "Hello there"
def test_sends_voice_for_ogg_with_voice_directive(self, tmp_path, monkeypatch):
voice_path = tmp_path / "voice.ogg"
voice_path.write_bytes(b"OggS" + b"\x00" * 32)
bot = MagicMock()
bot.send_message = AsyncMock()
bot.send_photo = AsyncMock()
bot.send_video = AsyncMock()
bot.send_voice = AsyncMock(return_value=SimpleNamespace(message_id=7))
bot.send_audio = AsyncMock()
bot.send_document = AsyncMock()
_install_telegram_mock(monkeypatch, bot)
result = asyncio.run(
_send_telegram(
"token",
"12345",
"",
media_files=[(str(voice_path), True)],
)
)
assert result["success"] is True
bot.send_voice.assert_awaited_once()
bot.send_audio.assert_not_awaited()
bot.send_message.assert_not_awaited()
def test_sends_audio_for_mp3(self, tmp_path, monkeypatch):
audio_path = tmp_path / "clip.mp3"
audio_path.write_bytes(b"ID3" + b"\x00" * 32)
bot = MagicMock()
bot.send_message = AsyncMock()
bot.send_photo = AsyncMock()
bot.send_video = AsyncMock()
bot.send_voice = AsyncMock()
bot.send_audio = AsyncMock(return_value=SimpleNamespace(message_id=8))
bot.send_document = AsyncMock()
_install_telegram_mock(monkeypatch, bot)
result = asyncio.run(
_send_telegram(
"token",
"12345",
"",
media_files=[(str(audio_path), False)],
)
)
assert result["success"] is True
bot.send_audio.assert_awaited_once()
bot.send_voice.assert_not_awaited()
def test_missing_media_returns_error_without_leaking_raw_tag(self, monkeypatch):
bot = MagicMock()
bot.send_message = AsyncMock()
bot.send_photo = AsyncMock()
bot.send_video = AsyncMock()
bot.send_voice = AsyncMock()
bot.send_audio = AsyncMock()
bot.send_document = AsyncMock()
_install_telegram_mock(monkeypatch, bot)
result = asyncio.run(
_send_telegram(
"token",
"12345",
"",
media_files=[("/tmp/does-not-exist.png", False)],
)
)
assert "error" in result
assert "No deliverable text or media remained" in result["error"]
bot.send_message.assert_not_awaited()

View File

@@ -14,11 +14,10 @@ import time
logger = logging.getLogger(__name__)
_TELEGRAM_TOPIC_TARGET_RE = re.compile(r"^\s*(-?\d+)(?::(\d+))?\s*$")
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'}
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
_VOICE_EXTS = {'.ogg', '.opus'}
_IMAGE_EXTS = {".jpg", ".jpeg", ".png", ".webp", ".gif"}
_VIDEO_EXTS = {".mp4", ".mov", ".avi", ".mkv", ".3gp"}
_AUDIO_EXTS = {".ogg", ".opus", ".mp3", ".wav", ".m4a"}
_VOICE_EXTS = {".ogg", ".opus"}
SEND_MESSAGE_SCHEMA = {
@@ -135,6 +134,11 @@ def _handle_send(args):
if not pconfig or not pconfig.enabled:
return json.dumps({"error": f"Platform '{platform_name}' is not configured. Set up credentials in ~/.hermes/gateway.json or environment variables."})
from gateway.platforms.base import BasePlatformAdapter
media_files, cleaned_message = BasePlatformAdapter.extract_media(message)
mirror_text = cleaned_message.strip() or _describe_media_for_mirror(media_files)
used_home_channel = False
if not chat_id:
home = config.get_home_channel(platform)
@@ -150,16 +154,25 @@ def _handle_send(args):
try:
from model_tools import _run_async
result = _run_async(_send_to_platform(platform, pconfig, chat_id, message, thread_id=thread_id))
result = _run_async(
_send_to_platform(
platform,
pconfig,
chat_id,
cleaned_message,
thread_id=thread_id,
media_files=media_files,
)
)
if used_home_channel and isinstance(result, dict) and result.get("success"):
result["note"] = f"Sent to {platform_name} home channel (chat_id: {chat_id})"
# Mirror the sent message into the target's gateway session
if isinstance(result, dict) and result.get("success"):
if isinstance(result, dict) and result.get("success") and mirror_text:
try:
from gateway.mirror import mirror_to_session
source_label = os.getenv("HERMES_SESSION_PLATFORM", "cli")
if mirror_to_session(platform_name, chat_id, message, source_label=source_label, thread_id=thread_id):
if mirror_to_session(platform_name, chat_id, mirror_text, source_label=source_label, thread_id=thread_id):
result["mirrored"] = True
except Exception:
pass
@@ -180,48 +193,97 @@ def _parse_target_ref(platform_name: str, target_ref: str):
return None, None, False
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None):
def _describe_media_for_mirror(media_files):
"""Return a human-readable mirror summary when a message only contains media."""
if not media_files:
return ""
if len(media_files) == 1:
media_path, is_voice = media_files[0]
ext = os.path.splitext(media_path)[1].lower()
if is_voice and ext in _VOICE_EXTS:
return "[Sent voice message]"
if ext in _IMAGE_EXTS:
return "[Sent image attachment]"
if ext in _VIDEO_EXTS:
return "[Sent video attachment]"
if ext in _AUDIO_EXTS:
return "[Sent audio attachment]"
return "[Sent document attachment]"
return f"[Sent {len(media_files)} media attachments]"
async def _send_to_platform(platform, pconfig, chat_id, message, thread_id=None, media_files=None):
"""Route a message to the appropriate platform sender."""
from gateway.config import Platform
media_files = media_files or []
if platform == Platform.TELEGRAM:
return await _send_telegram(pconfig.token, chat_id, message, thread_id=thread_id)
elif platform == Platform.DISCORD:
return await _send_discord(pconfig.token, chat_id, message)
return await _send_telegram(
pconfig.token,
chat_id,
message,
media_files=media_files,
thread_id=thread_id,
)
if media_files and not message.strip():
return {
"error": (
f"send_message MEDIA delivery is currently only supported for telegram; "
f"target {platform.value} had only media attachments"
)
}
warning = None
if media_files:
warning = (
f"MEDIA attachments were omitted for {platform.value}; "
"native send_message media delivery is currently only supported for telegram"
)
if platform == Platform.DISCORD:
result = await _send_discord(pconfig.token, chat_id, message)
elif platform == Platform.SLACK:
return await _send_slack(pconfig.token, chat_id, message)
result = await _send_slack(pconfig.token, chat_id, message)
elif platform == Platform.SIGNAL:
return await _send_signal(pconfig.extra, chat_id, message)
result = await _send_signal(pconfig.extra, chat_id, message)
elif platform == Platform.EMAIL:
return await _send_email(pconfig.extra, chat_id, message)
return {"error": f"Direct sending not yet implemented for {platform.value}"}
result = await _send_email(pconfig.extra, chat_id, message)
else:
result = {"error": f"Direct sending not yet implemented for {platform.value}"}
if warning and isinstance(result, dict) and result.get("success"):
warnings = list(result.get("warnings", []))
warnings.append(warning)
result["warnings"] = warnings
return result
async def _send_telegram(token, chat_id, message, thread_id=None):
async def _send_telegram(token, chat_id, message, media_files=None, thread_id=None):
"""Send via Telegram Bot API (one-shot, no polling needed)."""
try:
from telegram import Bot
from gateway.platforms.base import BasePlatformAdapter
bot = Bot(token=token)
int_chat_id = int(chat_id)
media_files = media_files or []
thread_kwargs = {}
if thread_id is not None:
thread_kwargs["message_thread_id"] = int(thread_id)
# Extract MEDIA:<path> tags and send files natively
media_files, cleaned = BasePlatformAdapter.extract_media(message)
last_msg = None
# Send text portion if any remains
if cleaned.strip():
warnings = []
if message.strip():
last_msg = await bot.send_message(
chat_id=int_chat_id, text=cleaned, **thread_kwargs
chat_id=int_chat_id, text=message, **thread_kwargs
)
# Send extracted media files
for media_path, is_voice in media_files:
if not os.path.exists(media_path):
logger.warning("Media file not found, skipping: %s", media_path)
warning = f"Media file not found, skipping: {media_path}"
logger.warning(warning)
warnings.append(warning)
continue
ext = os.path.splitext(media_path)[1].lower()
try:
with open(media_path, "rb") as f:
@@ -246,15 +308,25 @@ async def _send_telegram(token, chat_id, message, thread_id=None):
chat_id=int_chat_id, document=f, **thread_kwargs
)
except Exception as e:
logger.error("Failed to send media %s: %s", media_path, e)
warning = f"Failed to send media {media_path}: {e}"
logger.error(warning)
warnings.append(warning)
# If no text and no media sent, send cleaned text as fallback
if last_msg is None:
last_msg = await bot.send_message(
chat_id=int_chat_id, text=cleaned if cleaned.strip() else message, **thread_kwargs
)
error = "No deliverable text or media remained after processing MEDIA tags"
if warnings:
return {"error": error, "warnings": warnings}
return {"error": error}
return {"success": True, "platform": "telegram", "chat_id": chat_id, "message_id": str(last_msg.message_id)}
result = {
"success": True,
"platform": "telegram",
"chat_id": chat_id,
"message_id": str(last_msg.message_id),
}
if warnings:
result["warnings"] = warnings
return result
except ImportError:
return {"error": "python-telegram-bot not installed. Run: pip install python-telegram-bot"}
except Exception as e: