diff --git a/gateway/platforms/matrix.py b/gateway/platforms/matrix.py index 3d6a90502..d8593a353 100644 --- a/gateway/platforms/matrix.py +++ b/gateway/platforms/matrix.py @@ -17,6 +17,8 @@ Environment variables: from __future__ import annotations import asyncio +import io +import json import logging import mimetypes import os @@ -512,8 +514,11 @@ class MatrixAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> SendResult: - """Upload an audio file as a voice message.""" - return await self._send_local_file(chat_id, audio_path, "m.audio", caption, reply_to, metadata=metadata) + """Upload an audio file as a voice message (MSC3245 native voice).""" + return await self._send_local_file( + chat_id, audio_path, "m.audio", caption, reply_to, + metadata=metadata, is_voice=True + ) async def send_video( self, @@ -546,13 +551,16 @@ class MatrixAdapter(BasePlatformAdapter): caption: Optional[str] = None, reply_to: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, + is_voice: bool = False, ) -> SendResult: """Upload bytes to Matrix and send as a media message.""" import nio # Upload to homeserver. - resp = await self._client.upload( - data, + # nio expects a DataProvider (callable) or file-like object, not raw bytes. + # nio.upload() returns a tuple (UploadResponse|UploadError, Optional[Dict]) + resp, maybe_encryption_info = await self._client.upload( + io.BytesIO(data), content_type=content_type, filename=filename, ) @@ -574,6 +582,10 @@ class MatrixAdapter(BasePlatformAdapter): }, } + # Add MSC3245 voice flag for native voice messages. + if is_voice: + msg_content["org.matrix.msc3245.voice"] = {} + if reply_to: msg_content["m.relates_to"] = { "m.in_reply_to": {"event_id": reply_to} @@ -601,6 +613,7 @@ class MatrixAdapter(BasePlatformAdapter): reply_to: Optional[str] = None, file_name: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, + is_voice: bool = False, ) -> SendResult: """Read a local file and upload it.""" p = Path(file_path) @@ -613,7 +626,7 @@ class MatrixAdapter(BasePlatformAdapter): ct = mimetypes.guess_type(fname)[0] or "application/octet-stream" data = p.read_bytes() - return await self._upload_and_send(room_id, data, fname, ct, msgtype, caption, reply_to, metadata) + return await self._upload_and_send(room_id, data, fname, ct, msgtype, caption, reply_to, metadata, is_voice) # ------------------------------------------------------------------ # Sync loop @@ -808,11 +821,19 @@ class MatrixAdapter(BasePlatformAdapter): event_mimetype = (content_info.get("info") or {}).get("mimetype", "") media_type = "application/octet-stream" msg_type = MessageType.DOCUMENT + is_voice_message = False + if isinstance(event, nio.RoomMessageImage): msg_type = MessageType.PHOTO media_type = event_mimetype or "image/png" elif isinstance(event, nio.RoomMessageAudio): - msg_type = MessageType.AUDIO + # Check for MSC3245 voice flag: org.matrix.msc3245.voice: {} + source_content = getattr(event, "source", {}).get("content", {}) + if source_content.get("org.matrix.msc3245.voice") is not None: + is_voice_message = True + msg_type = MessageType.VOICE + else: + msg_type = MessageType.AUDIO media_type = event_mimetype or "audio/ogg" elif isinstance(event, nio.RoomMessageVideo): msg_type = MessageType.VIDEO @@ -850,6 +871,31 @@ class MatrixAdapter(BasePlatformAdapter): if relates_to.get("rel_type") == "m.thread": thread_id = relates_to.get("event_id") + # For voice messages, cache audio locally for transcription tools. + # Use the authenticated nio client to download (Matrix requires auth for media). + media_urls = [http_url] if http_url else None + media_types = [media_type] if http_url else None + + if is_voice_message and url and url.startswith("mxc://"): + try: + import nio + from gateway.platforms.base import cache_audio_from_bytes + + resp = await self._client.download(mxc=url) + if isinstance(resp, nio.MemoryDownloadResponse): + # Extract extension from mimetype or default to .ogg + ext = ".ogg" + if media_type and "/" in media_type: + subtype = media_type.split("/")[1] + ext = f".{subtype}" if subtype else ".ogg" + local_path = cache_audio_from_bytes(resp.body, ext) + media_urls = [local_path] + logger.debug("Matrix: cached voice message to %s", local_path) + else: + logger.warning("Matrix: failed to download voice: %s", getattr(resp, "message", resp)) + except Exception as e: + logger.warning("Matrix: failed to cache voice message, using HTTP URL: %s", e) + source = self.build_source( chat_id=room.room_id, chat_type=chat_type, diff --git a/tests/gateway/test_matrix_voice.py b/tests/gateway/test_matrix_voice.py new file mode 100644 index 000000000..79f0947f6 --- /dev/null +++ b/tests/gateway/test_matrix_voice.py @@ -0,0 +1,340 @@ +"""Tests for Matrix voice message support (MSC3245).""" +import io + +import pytest +from unittest.mock import AsyncMock, MagicMock + +nio = pytest.importorskip("nio", reason="matrix-nio not installed") + +from gateway.platforms.base import MessageType + + +# --------------------------------------------------------------------------- +# Adapter helpers +# --------------------------------------------------------------------------- + +def _make_adapter(): + """Create a MatrixAdapter with mocked config.""" + from gateway.platforms.matrix import MatrixAdapter + from gateway.config import PlatformConfig + + config = PlatformConfig( + enabled=True, + token="***", + extra={ + "homeserver": "https://matrix.example.org", + "user_id": "@bot:example.org", + }, + ) + adapter = MatrixAdapter(config) + return adapter + + +def _make_room(room_id: str = "!test:example.org", member_count: int = 2): + """Create a mock Matrix room.""" + room = MagicMock() + room.room_id = room_id + room.member_count = member_count + return room + + +def _make_audio_event( + event_id: str = "$audio_event", + sender: str = "@alice:example.org", + body: str = "Voice message", + url: str = "mxc://example.org/abc123", + is_voice: bool = False, + mimetype: str = "audio/ogg", + timestamp: float = 9999999999000, # ms +): + """ + Create a mock RoomMessageAudio event that passes isinstance checks. + + Args: + is_voice: If True, adds org.matrix.msc3245.voice field to content + """ + import nio + + # Build the source dict that nio events expose via .source + content = { + "msgtype": "m.audio", + "body": body, + "url": url, + "info": { + "mimetype": mimetype, + }, + } + + if is_voice: + content["org.matrix.msc3245.voice"] = {} + + # Create a real nio RoomMessageAudio-like object + # We use MagicMock but configure __class__ to pass isinstance check + event = MagicMock(spec=nio.RoomMessageAudio) + event.event_id = event_id + event.sender = sender + event.body = body + event.url = url + event.server_timestamp = timestamp + event.source = { + "type": "m.room.message", + "content": content, + } + # For MIME type extraction - needs to be a dict + event.content = content + + return event + + +def _make_download_response(body: bytes = b"fake audio data"): + """Create a mock nio.MemoryDownloadResponse.""" + import nio + resp = MagicMock() + resp.body = body + resp.__class__ = nio.MemoryDownloadResponse + return resp + + +# --------------------------------------------------------------------------- +# Tests: MSC3245 Voice Detection (RED -> GREEN) +# --------------------------------------------------------------------------- + +class TestMatrixVoiceMessageDetection: + """Test that MSC3245 voice messages are detected and tagged correctly.""" + + def setup_method(self): + self.adapter = _make_adapter() + self.adapter._user_id = "@bot:example.org" + self.adapter._startup_ts = 0.0 + self.adapter._dm_rooms = {} + self.adapter._message_handler = AsyncMock() + # Mock _mxc_to_http to return a fake HTTP URL + self.adapter._mxc_to_http = lambda url: f"https://matrix.example.org/_matrix/media/v3/download/{url[6:]}" + # Mock client for authenticated download + self.adapter._client = MagicMock() + self.adapter._client.download = AsyncMock(return_value=_make_download_response()) + + @pytest.mark.asyncio + async def test_voice_message_has_type_voice(self): + """Voice messages (with MSC3245 field) should be MessageType.VOICE.""" + room = _make_room() + event = _make_audio_event(is_voice=True) + + # Capture the MessageEvent passed to handle_message + captured_event = None + + async def capture(msg_event): + nonlocal captured_event + captured_event = msg_event + + self.adapter.handle_message = capture + + await self.adapter._on_room_message_media(room, event) + + assert captured_event is not None, "No event was captured" + assert captured_event.message_type == MessageType.VOICE, \ + f"Expected MessageType.VOICE, got {captured_event.message_type}" + + @pytest.mark.asyncio + async def test_voice_message_has_local_path(self): + """Voice messages should have a local cached path in media_urls.""" + room = _make_room() + event = _make_audio_event(is_voice=True) + + captured_event = None + + async def capture(msg_event): + nonlocal captured_event + captured_event = msg_event + + self.adapter.handle_message = capture + + await self.adapter._on_room_message_media(room, event) + + assert captured_event is not None + assert captured_event.media_urls is not None + assert len(captured_event.media_urls) > 0 + # Should be a local path, not an HTTP URL + assert not captured_event.media_urls[0].startswith("http"), \ + f"media_urls should contain local path, got {captured_event.media_urls[0]}" + self.adapter._client.download.assert_awaited_once_with(mxc=event.url) + assert captured_event.media_types == ["audio/ogg"] + + @pytest.mark.asyncio + async def test_audio_without_msc3245_stays_audio_type(self): + """Regular audio uploads (no MSC3245 field) should remain MessageType.AUDIO.""" + room = _make_room() + event = _make_audio_event(is_voice=False) # NOT a voice message + + captured_event = None + + async def capture(msg_event): + nonlocal captured_event + captured_event = msg_event + + self.adapter.handle_message = capture + + await self.adapter._on_room_message_media(room, event) + + assert captured_event is not None + assert captured_event.message_type == MessageType.AUDIO, \ + f"Expected MessageType.AUDIO for non-voice, got {captured_event.message_type}" + + @pytest.mark.asyncio + async def test_regular_audio_has_http_url(self): + """Regular audio uploads should keep HTTP URL (not cached locally).""" + room = _make_room() + event = _make_audio_event(is_voice=False) + + captured_event = None + + async def capture(msg_event): + nonlocal captured_event + captured_event = msg_event + + self.adapter.handle_message = capture + + await self.adapter._on_room_message_media(room, event) + + assert captured_event is not None + assert captured_event.media_urls is not None + # Should be HTTP URL, not local path + assert captured_event.media_urls[0].startswith("http"), \ + f"Non-voice audio should have HTTP URL, got {captured_event.media_urls[0]}" + self.adapter._client.download.assert_not_awaited() + assert captured_event.media_types == ["audio/ogg"] + + +class TestMatrixVoiceCacheFallback: + """Test graceful fallback when voice caching fails.""" + + def setup_method(self): + self.adapter = _make_adapter() + self.adapter._user_id = "@bot:example.org" + self.adapter._startup_ts = 0.0 + self.adapter._dm_rooms = {} + self.adapter._message_handler = AsyncMock() + self.adapter._mxc_to_http = lambda url: f"https://matrix.example.org/_matrix/media/v3/download/{url[6:]}" + self.adapter._client = MagicMock() + + @pytest.mark.asyncio + async def test_voice_cache_failure_falls_back_to_http_url(self): + """If caching fails, voice message should still be delivered with HTTP URL.""" + room = _make_room() + event = _make_audio_event(is_voice=True) + + # Make download fail + import nio + error_resp = MagicMock() + error_resp.__class__ = nio.DownloadError + self.adapter._client.download = AsyncMock(return_value=error_resp) + + captured_event = None + + async def capture(msg_event): + nonlocal captured_event + captured_event = msg_event + + self.adapter.handle_message = capture + + await self.adapter._on_room_message_media(room, event) + + assert captured_event is not None + assert captured_event.media_urls is not None + # Should fall back to HTTP URL + assert captured_event.media_urls[0].startswith("http"), \ + f"Should fall back to HTTP URL on cache failure, got {captured_event.media_urls[0]}" + + @pytest.mark.asyncio + async def test_voice_cache_exception_falls_back_to_http_url(self): + """Unexpected download exceptions should also fall back to HTTP URL.""" + room = _make_room() + event = _make_audio_event(is_voice=True) + + self.adapter._client.download = AsyncMock(side_effect=RuntimeError("boom")) + + captured_event = None + + async def capture(msg_event): + nonlocal captured_event + captured_event = msg_event + + self.adapter.handle_message = capture + + await self.adapter._on_room_message_media(room, event) + + assert captured_event is not None + assert captured_event.media_urls is not None + assert captured_event.media_urls[0].startswith("http"), \ + f"Should fall back to HTTP URL on exception, got {captured_event.media_urls[0]}" + + +# --------------------------------------------------------------------------- +# Tests: send_voice includes MSC3245 field (RED -> GREEN) +# --------------------------------------------------------------------------- + +class TestMatrixSendVoiceMSC3245: + """Test that send_voice includes MSC3245 field for native voice rendering.""" + + def setup_method(self): + self.adapter = _make_adapter() + self.adapter._user_id = "@bot:example.org" + # Mock client with successful upload + self.adapter._client = MagicMock() + self.upload_call = None + + async def mock_upload(*args, **kwargs): + self.upload_call = (args, kwargs) + import nio + resp = MagicMock() + resp.content_uri = "mxc://example.org/uploaded" + resp.__class__ = nio.UploadResponse + return resp, None + + self.adapter._client.upload = mock_upload + + @pytest.mark.asyncio + async def test_send_voice_includes_msc3245_field(self): + """send_voice should include org.matrix.msc3245.voice in message content.""" + import tempfile + import os + + # Create a temp audio file + with tempfile.NamedTemporaryFile(suffix=".ogg", delete=False) as f: + f.write(b"fake audio data") + temp_path = f.name + + try: + # Capture the message content sent to room_send + sent_content = None + + async def mock_room_send(room_id, event_type, content): + nonlocal sent_content + sent_content = content + resp = MagicMock() + resp.event_id = "$sent_event" + import nio + resp.__class__ = nio.RoomSendResponse + return resp + + self.adapter._client.room_send = mock_room_send + + await self.adapter.send_voice( + chat_id="!room:example.org", + audio_path=temp_path, + caption="Test voice", + ) + + assert sent_content is not None, "No message was sent" + assert "org.matrix.msc3245.voice" in sent_content, \ + f"MSC3245 voice field missing from content: {sent_content.keys()}" + assert sent_content["msgtype"] == "m.audio" + assert sent_content["info"]["mimetype"] == "audio/ogg" + assert self.upload_call is not None, "Expected upload() to be called" + args, kwargs = self.upload_call + assert isinstance(args[0], io.BytesIO) + assert kwargs["content_type"] == "audio/ogg" + assert kwargs["filename"].endswith(".ogg") + + finally: + os.unlink(temp_path)