Files
hermes-agent/gateway/platforms/telegram.py
Teknium 46176c8029 refactor: centralize slash command registry (#1603)
* refactor: centralize slash command registry

Replace 7+ scattered command definition sites with a single
CommandDef registry in hermes_cli/commands.py. All downstream
consumers now derive from this registry:

- CLI process_command() resolves aliases via resolve_command()
- Gateway _known_commands uses GATEWAY_KNOWN_COMMANDS frozenset
- Gateway help text generated by gateway_help_lines()
- Telegram BotCommands generated by telegram_bot_commands()
- Slack subcommand map generated by slack_subcommand_map()

Adding a command or alias is now a one-line change to
COMMAND_REGISTRY instead of touching 6+ files.

Bugfixes included:
- Telegram now registers /rollback, /background (were missing)
- Slack now has /voice, /update, /reload-mcp (were missing)
- Gateway duplicate 'reasoning' dispatch (dead code) removed
- Gateway help text can no longer drift from CLI help

Backwards-compatible: COMMANDS and COMMANDS_BY_CATEGORY dicts are
rebuilt from the registry, so existing imports work unchanged.

* docs: update developer docs for centralized command registry

Update AGENTS.md with full 'Slash Command Registry' and 'Adding a
Slash Command' sections covering CommandDef fields, registry helpers,
and the one-line alias workflow.

Also update:
- CONTRIBUTING.md: commands.py description
- website/docs/reference/slash-commands.md: reference central registry
- docs/plans/centralize-command-registry.md: mark COMPLETED
- plans/checkpoint-rollback.md: reference new pattern
- hermes-agent-dev skill: architecture table

* chore: remove stale plan docs
2026-03-16 23:21:03 -07:00

1196 lines
49 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Telegram platform adapter.
Uses python-telegram-bot library for:
- Receiving messages from users/groups
- Sending responses back
- Handling media and commands
"""
import asyncio
import logging
import os
import re
from typing import Dict, List, Optional, Any
logger = logging.getLogger(__name__)
try:
from telegram import Update, Bot, Message
from telegram.ext import (
Application,
CommandHandler,
MessageHandler as TelegramMessageHandler,
ContextTypes,
filters,
)
from telegram.constants import ParseMode, ChatType
TELEGRAM_AVAILABLE = True
except ImportError:
TELEGRAM_AVAILABLE = False
Update = Any
Bot = Any
Message = Any
Application = Any
CommandHandler = Any
TelegramMessageHandler = Any
filters = None
ParseMode = None
ChatType = None
# Mock ContextTypes so type annotations using ContextTypes.DEFAULT_TYPE
# don't crash during class definition when the library isn't installed.
class _MockContextTypes:
DEFAULT_TYPE = Any
ContextTypes = _MockContextTypes
import sys
from pathlib import Path as _Path
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import (
BasePlatformAdapter,
MessageEvent,
MessageType,
SendResult,
cache_image_from_bytes,
cache_audio_from_bytes,
cache_document_from_bytes,
SUPPORTED_DOCUMENT_TYPES,
)
def check_telegram_requirements() -> bool:
"""Check if Telegram dependencies are available."""
return TELEGRAM_AVAILABLE
# Matches every character that MarkdownV2 requires to be backslash-escaped
# when it appears outside a code span or fenced code block.
_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
def _escape_mdv2(text: str) -> str:
"""Escape Telegram MarkdownV2 special characters with a preceding backslash."""
return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
def _strip_mdv2(text: str) -> str:
"""Strip MarkdownV2 escape backslashes to produce clean plain text.
Also removes MarkdownV2 bold markers (*text* -> text) so the fallback
doesn't show stray asterisks from header/bold conversion.
"""
# Remove escape backslashes before special characters
cleaned = re.sub(r'\\([_*\[\]()~`>#\+\-=|{}.!\\])', r'\1', text)
# Remove MarkdownV2 bold markers that format_message converted from **bold**
cleaned = re.sub(r'\*([^*]+)\*', r'\1', cleaned)
# Remove MarkdownV2 italic markers that format_message converted from *italic*
# Use word boundary (\b) to avoid breaking snake_case like my_variable_name
cleaned = re.sub(r'(?<!\w)_([^_]+)_(?!\w)', r'\1', cleaned)
return cleaned
class TelegramAdapter(BasePlatformAdapter):
"""
Telegram bot adapter.
Handles:
- Receiving messages from users and groups
- Sending responses with Telegram markdown
- Forum topics (thread_id support)
- Media messages
"""
# Telegram message limits
MAX_MESSAGE_LENGTH = 4096
MEDIA_GROUP_WAIT_SECONDS = 0.8
def __init__(self, config: PlatformConfig):
super().__init__(config, Platform.TELEGRAM)
self._app: Optional[Application] = None
self._bot: Optional[Bot] = None
# Buffer rapid/album photo updates so Telegram image bursts are handled
# as a single MessageEvent instead of self-interrupting multiple turns.
self._media_batch_delay_seconds = float(os.getenv("HERMES_TELEGRAM_MEDIA_BATCH_DELAY_SECONDS", "0.8"))
self._pending_photo_batches: Dict[str, MessageEvent] = {}
self._pending_photo_batch_tasks: Dict[str, asyncio.Task] = {}
self._media_group_events: Dict[str, MessageEvent] = {}
self._media_group_tasks: Dict[str, asyncio.Task] = {}
self._token_lock_identity: Optional[str] = None
self._polling_error_task: Optional[asyncio.Task] = None
@staticmethod
def _looks_like_polling_conflict(error: Exception) -> bool:
text = str(error).lower()
return (
error.__class__.__name__.lower() == "conflict"
or "terminated by other getupdates request" in text
or "another bot instance is running" in text
)
async def _handle_polling_conflict(self, error: Exception) -> None:
if self.has_fatal_error and self.fatal_error_code == "telegram_polling_conflict":
return
message = (
"Another Telegram bot poller is already using this token. "
"Hermes stopped Telegram polling to avoid endless retry spam. "
"Make sure only one gateway instance is running for this bot token."
)
logger.error("[%s] %s Original error: %s", self.name, message, error)
self._set_fatal_error("telegram_polling_conflict", message, retryable=False)
try:
if self._app and self._app.updater:
await self._app.updater.stop()
except Exception as stop_error:
logger.warning("[%s] Failed stopping Telegram polling after conflict: %s", self.name, stop_error, exc_info=True)
await self._notify_fatal_error()
async def connect(self) -> bool:
"""Connect to Telegram and start polling for updates."""
if not TELEGRAM_AVAILABLE:
logger.error(
"[%s] python-telegram-bot not installed. Run: pip install python-telegram-bot",
self.name,
)
return False
if not self.config.token:
logger.error("[%s] No bot token configured", self.name)
return False
try:
from gateway.status import acquire_scoped_lock
self._token_lock_identity = self.config.token
acquired, existing = acquire_scoped_lock(
"telegram-bot-token",
self._token_lock_identity,
metadata={"platform": self.platform.value},
)
if not acquired:
owner_pid = existing.get("pid") if isinstance(existing, dict) else None
message = (
"Another local Hermes gateway is already using this Telegram bot token"
+ (f" (PID {owner_pid})." if owner_pid else ".")
+ " Stop the other gateway before starting a second Telegram poller."
)
logger.error("[%s] %s", self.name, message)
self._set_fatal_error("telegram_token_lock", message, retryable=False)
return False
# Build the application
self._app = Application.builder().token(self.config.token).build()
self._bot = self._app.bot
# Register handlers
self._app.add_handler(TelegramMessageHandler(
filters.TEXT & ~filters.COMMAND,
self._handle_text_message
))
self._app.add_handler(TelegramMessageHandler(
filters.COMMAND,
self._handle_command
))
self._app.add_handler(TelegramMessageHandler(
filters.LOCATION | getattr(filters, "VENUE", filters.LOCATION),
self._handle_location_message
))
self._app.add_handler(TelegramMessageHandler(
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
self._handle_media_message
))
# Start polling — retry initialize() for transient TLS resets
try:
from telegram.error import NetworkError, TimedOut
except ImportError:
NetworkError = TimedOut = OSError # type: ignore[misc,assignment]
_max_connect = 3
for _attempt in range(_max_connect):
try:
await self._app.initialize()
break
except (NetworkError, TimedOut, OSError) as init_err:
if _attempt < _max_connect - 1:
wait = 2 ** _attempt
logger.warning(
"[%s] Connect attempt %d/%d failed: %s — retrying in %ds",
self.name, _attempt + 1, _max_connect, init_err, wait,
)
await asyncio.sleep(wait)
else:
raise
await self._app.start()
loop = asyncio.get_running_loop()
def _polling_error_callback(error: Exception) -> None:
if not self._looks_like_polling_conflict(error):
logger.error("[%s] Telegram polling error: %s", self.name, error, exc_info=True)
return
if self._polling_error_task and not self._polling_error_task.done():
return
self._polling_error_task = loop.create_task(self._handle_polling_conflict(error))
await self._app.updater.start_polling(
allowed_updates=Update.ALL_TYPES,
drop_pending_updates=True,
error_callback=_polling_error_callback,
)
# Register bot commands so Telegram shows a hint menu when users type /
# List is derived from the central COMMAND_REGISTRY — adding a new
# gateway command there automatically adds it to the Telegram menu.
try:
from telegram import BotCommand
from hermes_cli.commands import telegram_bot_commands
await self._bot.set_my_commands([
BotCommand(name, desc) for name, desc in telegram_bot_commands()
])
except Exception as e:
logger.warning(
"[%s] Could not register Telegram command menu: %s",
self.name,
e,
exc_info=True,
)
self._mark_connected()
logger.info("[%s] Connected and polling for Telegram updates", self.name)
return True
except Exception as e:
if self._token_lock_identity:
try:
from gateway.status import release_scoped_lock
release_scoped_lock("telegram-bot-token", self._token_lock_identity)
except Exception:
pass
message = f"Telegram startup failed: {e}"
self._set_fatal_error("telegram_connect_error", message, retryable=True)
logger.error("[%s] Failed to connect to Telegram: %s", self.name, e, exc_info=True)
return False
async def disconnect(self) -> None:
"""Stop polling, cancel pending album flushes, and disconnect."""
pending_media_group_tasks = list(self._media_group_tasks.values())
for task in pending_media_group_tasks:
task.cancel()
if pending_media_group_tasks:
await asyncio.gather(*pending_media_group_tasks, return_exceptions=True)
self._media_group_tasks.clear()
self._media_group_events.clear()
if self._app:
try:
# Only stop the updater if it's running
if self._app.updater and self._app.updater.running:
await self._app.updater.stop()
if self._app.running:
await self._app.stop()
await self._app.shutdown()
except Exception as e:
logger.warning("[%s] Error during Telegram disconnect: %s", self.name, e, exc_info=True)
if self._token_lock_identity:
try:
from gateway.status import release_scoped_lock
release_scoped_lock("telegram-bot-token", self._token_lock_identity)
except Exception as e:
logger.warning("[%s] Error releasing Telegram token lock: %s", self.name, e, exc_info=True)
for task in self._pending_photo_batch_tasks.values():
if task and not task.done():
task.cancel()
self._pending_photo_batch_tasks.clear()
self._pending_photo_batches.clear()
self._mark_disconnected()
self._app = None
self._bot = None
self._token_lock_identity = None
logger.info("[%s] Disconnected from Telegram", self.name)
async def send(
self,
chat_id: str,
content: str,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> SendResult:
"""Send a message to a Telegram chat."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
# Format and split message if needed
formatted = self.format_message(content)
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
if len(chunks) > 1:
# truncate_message appends a raw " (1/2)" suffix. Escape the
# MarkdownV2-special parentheses so Telegram doesn't reject the
# chunk and fall back to plain text.
chunks = [
re.sub(r" \((\d+)/(\d+)\)$", r" \\(\1/\2\\)", chunk)
for chunk in chunks
]
message_ids = []
thread_id = metadata.get("thread_id") if metadata else None
try:
from telegram.error import NetworkError as _NetErr
except ImportError:
_NetErr = OSError # type: ignore[misc,assignment]
for i, chunk in enumerate(chunks):
msg = None
for _send_attempt in range(3):
try:
# Try Markdown first, fall back to plain text if it fails
try:
msg = await self._bot.send_message(
chat_id=int(chat_id),
text=chunk,
parse_mode=ParseMode.MARKDOWN_V2,
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
message_thread_id=int(thread_id) if thread_id else None,
)
except Exception as md_error:
# Markdown parsing failed, try plain text
if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error)
plain_chunk = _strip_mdv2(chunk)
msg = await self._bot.send_message(
chat_id=int(chat_id),
text=plain_chunk,
parse_mode=None,
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
message_thread_id=int(thread_id) if thread_id else None,
)
else:
raise
break # success
except _NetErr as send_err:
if _send_attempt < 2:
wait = 2 ** _send_attempt
logger.warning("[%s] Network error on send (attempt %d/3), retrying in %ds: %s",
self.name, _send_attempt + 1, wait, send_err)
await asyncio.sleep(wait)
else:
raise
message_ids.append(str(msg.message_id))
return SendResult(
success=True,
message_id=message_ids[0] if message_ids else None,
raw_response={"message_ids": message_ids}
)
except Exception as e:
logger.error("[%s] Failed to send Telegram message: %s", self.name, e, exc_info=True)
return SendResult(success=False, error=str(e))
async def edit_message(
self,
chat_id: str,
message_id: str,
content: str,
) -> SendResult:
"""Edit a previously sent Telegram message."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
formatted = self.format_message(content)
try:
await self._bot.edit_message_text(
chat_id=int(chat_id),
message_id=int(message_id),
text=formatted,
parse_mode=ParseMode.MARKDOWN_V2,
)
except Exception:
# Fallback: retry without markdown formatting
await self._bot.edit_message_text(
chat_id=int(chat_id),
message_id=int(message_id),
text=content,
)
return SendResult(success=True, message_id=message_id)
except Exception as e:
logger.error(
"[%s] Failed to edit Telegram message %s: %s",
self.name,
message_id,
e,
exc_info=True,
)
return SendResult(success=False, error=str(e))
async def send_voice(
self,
chat_id: str,
audio_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
**kwargs,
) -> SendResult:
"""Send audio as a native Telegram voice message or audio file."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(audio_path):
return SendResult(success=False, error=f"Audio file not found: {audio_path}")
with open(audio_path, "rb") as audio_file:
# .ogg files -> send as voice (round playable bubble)
if audio_path.endswith(".ogg") or audio_path.endswith(".opus"):
_voice_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_voice(
chat_id=int(chat_id),
voice=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_voice_thread) if _voice_thread else None,
)
else:
# .mp3 and others -> send as audio file
_audio_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_audio(
chat_id=int(chat_id),
audio=audio_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_audio_thread) if _audio_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
logger.error(
"[%s] Failed to send Telegram voice/audio, falling back to base adapter: %s",
self.name,
e,
exc_info=True,
)
return await super().send_voice(chat_id, audio_path, caption, reply_to)
async def send_image_file(
self,
chat_id: str,
image_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a local image file natively as a Telegram photo."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
import os
if not os.path.exists(image_path):
return SendResult(success=False, error=f"Image file not found: {image_path}")
with open(image_path, "rb") as image_file:
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_file,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
logger.error(
"[%s] Failed to send Telegram local image, falling back to base adapter: %s",
self.name,
e,
exc_info=True,
)
return await super().send_image_file(chat_id, image_path, caption, reply_to)
async def send_document(
self,
chat_id: str,
file_path: str,
caption: Optional[str] = None,
file_name: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a document/file natively as a Telegram file attachment."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
if not os.path.exists(file_path):
return SendResult(success=False, error=f"File not found: {file_path}")
display_name = file_name or os.path.basename(file_path)
with open(file_path, "rb") as f:
msg = await self._bot.send_document(
chat_id=int(chat_id),
document=f,
filename=display_name,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
print(f"[{self.name}] Failed to send document: {e}")
return await super().send_document(chat_id, file_path, caption, file_name, reply_to)
async def send_video(
self,
chat_id: str,
video_path: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
**kwargs,
) -> SendResult:
"""Send a video natively as a Telegram video message."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
if not os.path.exists(video_path):
return SendResult(success=False, error=f"Video file not found: {video_path}")
with open(video_path, "rb") as f:
msg = await self._bot.send_video(
chat_id=int(chat_id),
video=f,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
print(f"[{self.name}] Failed to send video: {e}")
return await super().send_video(chat_id, video_path, caption, reply_to)
async def send_image(
self,
chat_id: str,
image_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an image natively as a Telegram photo.
Tries URL-based send first (fast, works for <5MB images).
Falls back to downloading and uploading as file (supports up to 10MB).
"""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
# Telegram can send photos directly from URLs (up to ~5MB)
_photo_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_url,
caption=caption[:1024] if caption else None, # Telegram caption limit
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_photo_thread) if _photo_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
logger.warning(
"[%s] URL-based send_photo failed, trying file upload: %s",
self.name,
e,
exc_info=True,
)
# Fallback: download and upload as file (supports up to 10MB)
try:
import httpx
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.get(image_url)
resp.raise_for_status()
image_data = resp.content
msg = await self._bot.send_photo(
chat_id=int(chat_id),
photo=image_data,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e2:
logger.error(
"[%s] File upload send_photo also failed: %s",
self.name,
e2,
exc_info=True,
)
# Final fallback: send URL as text
return await super().send_image(chat_id, image_url, caption, reply_to)
async def send_animation(
self,
chat_id: str,
animation_url: str,
caption: Optional[str] = None,
reply_to: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
) -> SendResult:
"""Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
if not self._bot:
return SendResult(success=False, error="Not connected")
try:
_anim_thread = metadata.get("thread_id") if metadata else None
msg = await self._bot.send_animation(
chat_id=int(chat_id),
animation=animation_url,
caption=caption[:1024] if caption else None,
reply_to_message_id=int(reply_to) if reply_to else None,
message_thread_id=int(_anim_thread) if _anim_thread else None,
)
return SendResult(success=True, message_id=str(msg.message_id))
except Exception as e:
logger.error(
"[%s] Failed to send Telegram animation, falling back to photo: %s",
self.name,
e,
exc_info=True,
)
# Fallback: try as a regular photo
return await self.send_image(chat_id, animation_url, caption, reply_to)
async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
"""Send typing indicator."""
if self._bot:
try:
_typing_thread = metadata.get("thread_id") if metadata else None
await self._bot.send_chat_action(
chat_id=int(chat_id),
action="typing",
message_thread_id=int(_typing_thread) if _typing_thread else None,
)
except Exception as e:
# Typing failures are non-fatal; log at debug level only.
logger.debug(
"[%s] Failed to send Telegram typing indicator: %s",
self.name,
e,
exc_info=True,
)
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
"""Get information about a Telegram chat."""
if not self._bot:
return {"name": "Unknown", "type": "dm"}
try:
chat = await self._bot.get_chat(int(chat_id))
chat_type = "dm"
if chat.type == ChatType.GROUP:
chat_type = "group"
elif chat.type == ChatType.SUPERGROUP:
chat_type = "group"
if chat.is_forum:
chat_type = "forum"
elif chat.type == ChatType.CHANNEL:
chat_type = "channel"
return {
"name": chat.title or chat.full_name or str(chat_id),
"type": chat_type,
"username": chat.username,
"is_forum": getattr(chat, "is_forum", False),
}
except Exception as e:
logger.error(
"[%s] Failed to get Telegram chat info for %s: %s",
self.name,
chat_id,
e,
exc_info=True,
)
return {"name": str(chat_id), "type": "dm", "error": str(e)}
def format_message(self, content: str) -> str:
"""
Convert standard markdown to Telegram MarkdownV2 format.
Protected regions (code blocks, inline code) are extracted first so
their contents are never modified. Standard markdown constructs
(headers, bold, italic, links) are translated to MarkdownV2 syntax,
and all remaining special characters are escaped.
"""
if not content:
return content
placeholders: dict = {}
counter = [0]
def _ph(value: str) -> str:
"""Stash *value* behind a placeholder token that survives escaping."""
key = f"\x00PH{counter[0]}\x00"
counter[0] += 1
placeholders[key] = value
return key
text = content
# 1) Protect fenced code blocks (``` ... ```)
text = re.sub(
r'(```(?:[^\n]*\n)?[\s\S]*?```)',
lambda m: _ph(m.group(0)),
text,
)
# 2) Protect inline code (`...`)
text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
# 3) Convert markdown links escape the display text; inside the URL
# only ')' and '\' need escaping per the MarkdownV2 spec.
def _convert_link(m):
display = _escape_mdv2(m.group(1))
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
return _ph(f'[{display}]({url})')
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
# 4) Convert markdown headers (## Title) → bold *Title*
def _convert_header(m):
inner = m.group(1).strip()
# Strip redundant bold markers that may appear inside a header
inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
return _ph(f'*{_escape_mdv2(inner)}*')
text = re.sub(
r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
)
# 5) Convert bold: **text** → *text* (MarkdownV2 bold)
text = re.sub(
r'\*\*(.+?)\*\*',
lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
text,
)
# 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
# [^*\n]+ prevents matching across newlines (which would corrupt
# bullet lists using * markers and multi-line content).
text = re.sub(
r'\*([^*\n]+)\*',
lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
text,
)
# 7) Escape remaining special characters in plain text
text = _escape_mdv2(text)
# 8) Restore placeholders in reverse insertion order so that
# nested references (a placeholder inside another) resolve correctly.
for key in reversed(list(placeholders.keys())):
text = text.replace(key, placeholders[key])
return text
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming text messages."""
if not update.message or not update.message.text:
return
event = self._build_message_event(update.message, MessageType.TEXT)
await self.handle_message(event)
async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming command messages."""
if not update.message or not update.message.text:
return
event = self._build_message_event(update.message, MessageType.COMMAND)
await self.handle_message(event)
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming location/venue pin messages."""
if not update.message:
return
msg = update.message
venue = getattr(msg, "venue", None)
location = getattr(venue, "location", None) if venue else getattr(msg, "location", None)
if not location:
return
lat = getattr(location, "latitude", None)
lon = getattr(location, "longitude", None)
if lat is None or lon is None:
return
# Build a text message with coordinates and context
parts = ["[The user shared a location pin.]"]
if venue:
title = getattr(venue, "title", None)
address = getattr(venue, "address", None)
if title:
parts.append(f"Venue: {title}")
if address:
parts.append(f"Address: {address}")
parts.append(f"latitude: {lat}")
parts.append(f"longitude: {lon}")
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
event = self._build_message_event(msg, MessageType.LOCATION)
event.text = "\n".join(parts)
await self.handle_message(event)
def _photo_batch_key(self, event: MessageEvent, msg: Message) -> str:
"""Return a batching key for Telegram photos/albums."""
from gateway.session import build_session_key
session_key = build_session_key(
event.source,
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
)
media_group_id = getattr(msg, "media_group_id", None)
if media_group_id:
return f"{session_key}:album:{media_group_id}"
return f"{session_key}:photo-burst"
async def _flush_photo_batch(self, batch_key: str) -> None:
"""Send a buffered photo burst/album as a single MessageEvent."""
current_task = asyncio.current_task()
try:
await asyncio.sleep(self._media_batch_delay_seconds)
event = self._pending_photo_batches.pop(batch_key, None)
if not event:
return
logger.info("[Telegram] Flushing photo batch %s with %d image(s)", batch_key, len(event.media_urls))
await self.handle_message(event)
finally:
if self._pending_photo_batch_tasks.get(batch_key) is current_task:
self._pending_photo_batch_tasks.pop(batch_key, None)
def _enqueue_photo_event(self, batch_key: str, event: MessageEvent) -> None:
"""Merge photo events into a pending batch and schedule flush."""
existing = self._pending_photo_batches.get(batch_key)
if existing is None:
self._pending_photo_batches[batch_key] = event
else:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
if not existing.text:
existing.text = event.text
elif event.text not in existing.text:
existing.text = f"{existing.text}\n\n{event.text}".strip()
prior_task = self._pending_photo_batch_tasks.get(batch_key)
if prior_task and not prior_task.done():
prior_task.cancel()
self._pending_photo_batch_tasks[batch_key] = asyncio.create_task(self._flush_photo_batch(batch_key))
async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
"""Handle incoming media messages, downloading images to local cache."""
if not update.message:
return
msg = update.message
# Determine media type
if msg.sticker:
msg_type = MessageType.STICKER
elif msg.photo:
msg_type = MessageType.PHOTO
elif msg.video:
msg_type = MessageType.VIDEO
elif msg.audio:
msg_type = MessageType.AUDIO
elif msg.voice:
msg_type = MessageType.VOICE
elif msg.document:
msg_type = MessageType.DOCUMENT
else:
msg_type = MessageType.DOCUMENT
event = self._build_message_event(msg, msg_type)
# Add caption as text
if msg.caption:
event.text = msg.caption
# Handle stickers: describe via vision tool with caching
if msg.sticker:
await self._handle_sticker(msg, event)
await self.handle_message(event)
return
# Download photo to local image cache so the vision tool can access it
# even after Telegram's ephemeral file URLs expire (~1 hour).
if msg.photo:
try:
# msg.photo is a list of PhotoSize sorted by size; take the largest
photo = msg.photo[-1]
file_obj = await photo.get_file()
# Download the image bytes directly into memory
image_bytes = await file_obj.download_as_bytearray()
# Determine extension from the file path if available
ext = ".jpg"
if file_obj.file_path:
for candidate in [".png", ".webp", ".gif", ".jpeg", ".jpg"]:
if file_obj.file_path.lower().endswith(candidate):
ext = candidate
break
# Save to local cache (for vision tool access)
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
event.media_urls = [cached_path]
event.media_types = [f"image/{ext.lstrip('.')}" ]
logger.info("[Telegram] Cached user photo at %s", cached_path)
media_group_id = getattr(msg, "media_group_id", None)
if media_group_id:
await self._queue_media_group_event(str(media_group_id), event)
else:
batch_key = self._photo_batch_key(event, msg)
self._enqueue_photo_event(batch_key, event)
return
except Exception as e:
logger.warning("[Telegram] Failed to cache photo: %s", e, exc_info=True)
# Download voice/audio messages to cache for STT transcription
if msg.voice:
try:
file_obj = await msg.voice.get_file()
audio_bytes = await file_obj.download_as_bytearray()
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
event.media_urls = [cached_path]
event.media_types = ["audio/ogg"]
logger.info("[Telegram] Cached user voice at %s", cached_path)
except Exception as e:
logger.warning("[Telegram] Failed to cache voice: %s", e, exc_info=True)
elif msg.audio:
try:
file_obj = await msg.audio.get_file()
audio_bytes = await file_obj.download_as_bytearray()
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
event.media_urls = [cached_path]
event.media_types = ["audio/mp3"]
logger.info("[Telegram] Cached user audio at %s", cached_path)
except Exception as e:
logger.warning("[Telegram] Failed to cache audio: %s", e, exc_info=True)
# Download document files to cache for agent processing
elif msg.document:
doc = msg.document
try:
# Determine file extension
ext = ""
original_filename = doc.file_name or ""
if original_filename:
_, ext = os.path.splitext(original_filename)
ext = ext.lower()
# If no extension from filename, reverse-lookup from MIME type
if not ext and doc.mime_type:
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
ext = mime_to_ext.get(doc.mime_type, "")
# Check if supported
if ext not in SUPPORTED_DOCUMENT_TYPES:
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
event.text = (
f"Unsupported document type '{ext or 'unknown'}'. "
f"Supported types: {supported_list}"
)
logger.info("[Telegram] Unsupported document type: %s", ext or "unknown")
await self.handle_message(event)
return
# Check file size (Telegram Bot API limit: 20 MB)
MAX_DOC_BYTES = 20 * 1024 * 1024
if not doc.file_size or doc.file_size > MAX_DOC_BYTES:
event.text = (
"The document is too large or its size could not be verified. "
"Maximum: 20 MB."
)
logger.info("[Telegram] Document too large: %s bytes", doc.file_size)
await self.handle_message(event)
return
# Download and cache
file_obj = await doc.get_file()
doc_bytes = await file_obj.download_as_bytearray()
raw_bytes = bytes(doc_bytes)
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext}")
mime_type = SUPPORTED_DOCUMENT_TYPES[ext]
event.media_urls = [cached_path]
event.media_types = [mime_type]
logger.info("[Telegram] Cached user document at %s", cached_path)
# For text files, inject content into event.text (capped at 100 KB)
MAX_TEXT_INJECT_BYTES = 100 * 1024
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
try:
text_content = raw_bytes.decode("utf-8")
display_name = original_filename or f"document{ext}"
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
injection = f"[Content of {display_name}]:\n{text_content}"
if event.text:
event.text = f"{injection}\n\n{event.text}"
else:
event.text = injection
except UnicodeDecodeError:
logger.warning(
"[Telegram] Could not decode text file as UTF-8, skipping content injection",
exc_info=True,
)
except Exception as e:
logger.warning("[Telegram] Failed to cache document: %s", e, exc_info=True)
media_group_id = getattr(msg, "media_group_id", None)
if media_group_id:
await self._queue_media_group_event(str(media_group_id), event)
return
await self.handle_message(event)
async def _queue_media_group_event(self, media_group_id: str, event: MessageEvent) -> None:
"""Buffer Telegram media-group items so albums arrive as one logical event.
Telegram delivers albums as multiple updates with a shared media_group_id.
If we forward each item immediately, the gateway thinks the second image is a
new user message and interrupts the first. We debounce briefly and merge the
attachments into a single MessageEvent.
"""
existing = self._media_group_events.get(media_group_id)
if existing is None:
self._media_group_events[media_group_id] = event
else:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
if event.text:
if existing.text:
if event.text not in existing.text.split("\n\n"):
existing.text = f"{existing.text}\n\n{event.text}"
else:
existing.text = event.text
prior_task = self._media_group_tasks.get(media_group_id)
if prior_task:
prior_task.cancel()
self._media_group_tasks[media_group_id] = asyncio.create_task(
self._flush_media_group_event(media_group_id)
)
async def _flush_media_group_event(self, media_group_id: str) -> None:
try:
await asyncio.sleep(self.MEDIA_GROUP_WAIT_SECONDS)
event = self._media_group_events.pop(media_group_id, None)
if event is not None:
await self.handle_message(event)
except asyncio.CancelledError:
return
finally:
self._media_group_tasks.pop(media_group_id, None)
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
"""
Describe a Telegram sticker via vision analysis, with caching.
For static stickers (WEBP), we download, analyze with vision, and cache
the description by file_unique_id. For animated/video stickers, we inject
a placeholder noting the emoji.
"""
from gateway.sticker_cache import (
get_cached_description,
cache_sticker_description,
build_sticker_injection,
build_animated_sticker_injection,
STICKER_VISION_PROMPT,
)
sticker = msg.sticker
emoji = sticker.emoji or ""
set_name = sticker.set_name or ""
# Animated and video stickers can't be analyzed as static images
if sticker.is_animated or sticker.is_video:
event.text = build_animated_sticker_injection(emoji)
return
# Check the cache first
cached = get_cached_description(sticker.file_unique_id)
if cached:
event.text = build_sticker_injection(
cached["description"], cached.get("emoji", emoji), cached.get("set_name", set_name)
)
logger.info("[Telegram] Sticker cache hit: %s", sticker.file_unique_id)
return
# Cache miss -- download and analyze
try:
file_obj = await sticker.get_file()
image_bytes = await file_obj.download_as_bytearray()
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=".webp")
logger.info("[Telegram] Analyzing sticker at %s", cached_path)
from tools.vision_tools import vision_analyze_tool
import json as _json
result_json = await vision_analyze_tool(
image_url=cached_path,
user_prompt=STICKER_VISION_PROMPT,
)
result = _json.loads(result_json)
if result.get("success"):
description = result.get("analysis", "a sticker")
cache_sticker_description(sticker.file_unique_id, description, emoji, set_name)
event.text = build_sticker_injection(description, emoji, set_name)
else:
# Vision failed -- use emoji as fallback
event.text = build_sticker_injection(
f"a sticker with emoji {emoji}" if emoji else "a sticker",
emoji, set_name,
)
except Exception as e:
logger.warning("[Telegram] Sticker analysis error: %s", e, exc_info=True)
event.text = build_sticker_injection(
f"a sticker with emoji {emoji}" if emoji else "a sticker",
emoji, set_name,
)
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
"""Build a MessageEvent from a Telegram message."""
chat = message.chat
user = message.from_user
# Determine chat type
chat_type = "dm"
if chat.type in (ChatType.GROUP, ChatType.SUPERGROUP):
chat_type = "group"
elif chat.type == ChatType.CHANNEL:
chat_type = "channel"
# Build source
source = self.build_source(
chat_id=str(chat.id),
chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None),
chat_type=chat_type,
user_id=str(user.id) if user else None,
user_name=user.full_name if user else None,
thread_id=str(message.message_thread_id) if message.message_thread_id else None,
)
return MessageEvent(
text=message.text or "",
message_type=msg_type,
source=source,
raw_message=message,
message_id=str(message.message_id),
timestamp=message.date,
)