From 1c0183ec71b10c3fcf2bf502aeb6ed11f9fa630b Mon Sep 17 00:00:00 2001 From: WAXLYY Date: Mon, 6 Apr 2026 23:27:54 +0300 Subject: [PATCH] fix(gateway): sanitize media URLs in base platform logs --- gateway/platforms/base.py | 65 ++++++++++++++++++++++++++--- tests/gateway/test_platform_base.py | 26 ++++++++++++ 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 5261aceea..0ba00d890 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -12,6 +12,7 @@ import random import re import uuid from abc import ABC, abstractmethod +from urllib.parse import urlsplit logger = logging.getLogger(__name__) from dataclasses import dataclass, field @@ -36,6 +37,43 @@ GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE = ( ) +def _safe_url_for_log(url: str, max_len: int = 80) -> str: + """Return a URL string safe for logs (no query/fragment/userinfo).""" + if max_len <= 0: + return "" + + if url is None: + return "" + + raw = str(url) + if not raw: + return "" + + try: + parsed = urlsplit(raw) + except Exception: + return raw[:max_len] + + if parsed.scheme and parsed.netloc: + # Strip potential embedded credentials (user:pass@host). + netloc = parsed.netloc.rsplit("@", 1)[-1] + base = f"{parsed.scheme}://{netloc}" + path = parsed.path or "" + if path and path != "/": + basename = path.rsplit("/", 1)[-1] + safe = f"{base}/.../{basename}" if basename else f"{base}/..." + else: + safe = base + else: + safe = raw + + if len(safe) <= max_len: + return safe + if max_len <= 3: + return "." * max_len + return f"{safe[:max_len - 3]}..." + + # --------------------------------------------------------------------------- # Image cache utilities # @@ -112,8 +150,14 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> raise if attempt < retries: wait = 1.5 * (attempt + 1) - _log.debug("Media cache retry %d/%d for %s (%.1fs): %s", - attempt + 1, retries, url[:80], wait, exc) + _log.debug( + "Media cache retry %d/%d for %s (%.1fs): %s", + attempt + 1, + retries, + _safe_url_for_log(url), + wait, + exc, + ) await asyncio.sleep(wait) continue raise @@ -214,8 +258,14 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> raise if attempt < retries: wait = 1.5 * (attempt + 1) - _log.debug("Audio cache retry %d/%d for %s (%.1fs): %s", - attempt + 1, retries, url[:80], wait, exc) + _log.debug( + "Audio cache retry %d/%d for %s (%.1fs): %s", + attempt + 1, + retries, + _safe_url_for_log(url), + wait, + exc, + ) await asyncio.sleep(wait) continue raise @@ -1266,7 +1316,12 @@ class BasePlatformAdapter(ABC): if human_delay > 0: await asyncio.sleep(human_delay) try: - logger.info("[%s] Sending image: %s (alt=%s)", self.name, image_url[:80], alt_text[:30] if alt_text else "") + logger.info( + "[%s] Sending image: %s (alt=%s)", + self.name, + _safe_url_for_log(image_url), + alt_text[:30] if alt_text else "", + ) # Route animated GIFs through send_animation for proper playback if self._is_animation_url(image_url): img_result = await self.send_animation( diff --git a/tests/gateway/test_platform_base.py b/tests/gateway/test_platform_base.py index 13b52f24f..43dd17bd8 100644 --- a/tests/gateway/test_platform_base.py +++ b/tests/gateway/test_platform_base.py @@ -8,6 +8,7 @@ from gateway.platforms.base import ( GATEWAY_SECRET_CAPTURE_UNSUPPORTED_MESSAGE, MessageEvent, MessageType, + _safe_url_for_log, ) @@ -18,6 +19,31 @@ class TestSecretCaptureGuidance: assert "~/.hermes/.env" in message +class TestSafeUrlForLog: + def test_strips_query_fragment_and_userinfo(self): + url = ( + "https://user:pass@example.com/private/path/image.png" + "?X-Amz-Signature=supersecret&token=abc#frag" + ) + result = _safe_url_for_log(url) + assert result == "https://example.com/.../image.png" + assert "supersecret" not in result + assert "token=abc" not in result + assert "user:pass@" not in result + + def test_truncates_long_values(self): + long_url = "https://example.com/" + ("a" * 300) + result = _safe_url_for_log(long_url, max_len=40) + assert len(result) == 40 + assert result.endswith("...") + + def test_handles_small_and_non_positive_max_len(self): + url = "https://example.com/very/long/path/file.png?token=secret" + assert _safe_url_for_log(url, max_len=3) == "..." + assert _safe_url_for_log(url, max_len=2) == ".." + assert _safe_url_for_log(url, max_len=0) == "" + + # --------------------------------------------------------------------------- # MessageEvent — command parsing # ---------------------------------------------------------------------------