2026-02-02 19:01:51 -08:00
|
|
|
|
"""
|
|
|
|
|
|
Telegram platform adapter.
|
|
|
|
|
|
|
|
|
|
|
|
Uses python-telegram-bot library for:
|
|
|
|
|
|
- Receiving messages from users/groups
|
|
|
|
|
|
- Sending responses back
|
|
|
|
|
|
- Handling media and commands
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
import asyncio
|
2026-03-07 01:23:18 -08:00
|
|
|
|
import logging
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
import os
|
2026-02-22 02:16:11 -08:00
|
|
|
|
import re
|
2026-02-02 19:01:51 -08:00
|
|
|
|
from typing import Dict, List, Optional, Any
|
|
|
|
|
|
|
2026-03-07 01:23:18 -08:00
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
|
try:
|
|
|
|
|
|
from telegram import Update, Bot, Message
|
|
|
|
|
|
from telegram.ext import (
|
|
|
|
|
|
Application,
|
|
|
|
|
|
CommandHandler,
|
|
|
|
|
|
MessageHandler as TelegramMessageHandler,
|
|
|
|
|
|
ContextTypes,
|
|
|
|
|
|
filters,
|
|
|
|
|
|
)
|
|
|
|
|
|
from telegram.constants import ParseMode, ChatType
|
|
|
|
|
|
TELEGRAM_AVAILABLE = True
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
TELEGRAM_AVAILABLE = False
|
|
|
|
|
|
Update = Any
|
|
|
|
|
|
Bot = Any
|
|
|
|
|
|
Message = Any
|
|
|
|
|
|
Application = Any
|
2026-03-02 22:03:36 -08:00
|
|
|
|
CommandHandler = Any
|
|
|
|
|
|
TelegramMessageHandler = Any
|
|
|
|
|
|
filters = None
|
|
|
|
|
|
ParseMode = None
|
|
|
|
|
|
ChatType = None
|
|
|
|
|
|
|
|
|
|
|
|
# Mock ContextTypes so type annotations using ContextTypes.DEFAULT_TYPE
|
|
|
|
|
|
# don't crash during class definition when the library isn't installed.
|
|
|
|
|
|
class _MockContextTypes:
|
|
|
|
|
|
DEFAULT_TYPE = Any
|
|
|
|
|
|
ContextTypes = _MockContextTypes
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
|
|
import sys
|
2026-02-21 04:17:27 -08:00
|
|
|
|
from pathlib import Path as _Path
|
|
|
|
|
|
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
|
|
from gateway.config import Platform, PlatformConfig
|
|
|
|
|
|
from gateway.platforms.base import (
|
|
|
|
|
|
BasePlatformAdapter,
|
|
|
|
|
|
MessageEvent,
|
|
|
|
|
|
MessageType,
|
|
|
|
|
|
SendResult,
|
2026-02-15 16:10:50 -08:00
|
|
|
|
cache_image_from_bytes,
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
cache_audio_from_bytes,
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
cache_document_from_bytes,
|
|
|
|
|
|
SUPPORTED_DOCUMENT_TYPES,
|
2026-02-02 19:01:51 -08:00
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def check_telegram_requirements() -> bool:
|
|
|
|
|
|
"""Check if Telegram dependencies are available."""
|
|
|
|
|
|
return TELEGRAM_AVAILABLE
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-22 02:16:11 -08:00
|
|
|
|
# Matches every character that MarkdownV2 requires to be backslash-escaped
|
|
|
|
|
|
# when it appears outside a code span or fenced code block.
|
|
|
|
|
|
_MDV2_ESCAPE_RE = re.compile(r'([_*\[\]()~`>#\+\-=|{}.!\\])')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _escape_mdv2(text: str) -> str:
|
|
|
|
|
|
"""Escape Telegram MarkdownV2 special characters with a preceding backslash."""
|
|
|
|
|
|
return _MDV2_ESCAPE_RE.sub(r'\\\1', text)
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-03-07 01:23:18 -08:00
|
|
|
|
def _strip_mdv2(text: str) -> str:
|
|
|
|
|
|
"""Strip MarkdownV2 escape backslashes to produce clean plain text.
|
|
|
|
|
|
|
|
|
|
|
|
Also removes MarkdownV2 bold markers (*text* -> text) so the fallback
|
|
|
|
|
|
doesn't show stray asterisks from header/bold conversion.
|
|
|
|
|
|
"""
|
|
|
|
|
|
# Remove escape backslashes before special characters
|
|
|
|
|
|
cleaned = re.sub(r'\\([_*\[\]()~`>#\+\-=|{}.!\\])', r'\1', text)
|
|
|
|
|
|
# Remove MarkdownV2 bold markers that format_message converted from **bold**
|
|
|
|
|
|
cleaned = re.sub(r'\*([^*]+)\*', r'\1', cleaned)
|
2026-03-07 18:55:25 +03:00
|
|
|
|
# Remove MarkdownV2 italic markers that format_message converted from *italic*
|
|
|
|
|
|
# Use word boundary (\b) to avoid breaking snake_case like my_variable_name
|
|
|
|
|
|
cleaned = re.sub(r'(?<!\w)_([^_]+)_(?!\w)', r'\1', cleaned)
|
2026-03-07 01:23:18 -08:00
|
|
|
|
return cleaned
|
|
|
|
|
|
|
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
|
class TelegramAdapter(BasePlatformAdapter):
|
|
|
|
|
|
"""
|
|
|
|
|
|
Telegram bot adapter.
|
|
|
|
|
|
|
|
|
|
|
|
Handles:
|
|
|
|
|
|
- Receiving messages from users and groups
|
|
|
|
|
|
- Sending responses with Telegram markdown
|
|
|
|
|
|
- Forum topics (thread_id support)
|
|
|
|
|
|
- Media messages
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# Telegram message limits
|
|
|
|
|
|
MAX_MESSAGE_LENGTH = 4096
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, config: PlatformConfig):
|
|
|
|
|
|
super().__init__(config, Platform.TELEGRAM)
|
|
|
|
|
|
self._app: Optional[Application] = None
|
|
|
|
|
|
self._bot: Optional[Bot] = None
|
|
|
|
|
|
|
|
|
|
|
|
async def connect(self) -> bool:
|
|
|
|
|
|
"""Connect to Telegram and start polling for updates."""
|
|
|
|
|
|
if not TELEGRAM_AVAILABLE:
|
|
|
|
|
|
print(f"[{self.name}] python-telegram-bot not installed. Run: pip install python-telegram-bot")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
if not self.config.token:
|
|
|
|
|
|
print(f"[{self.name}] No bot token configured")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Build the application
|
|
|
|
|
|
self._app = Application.builder().token(self.config.token).build()
|
|
|
|
|
|
self._bot = self._app.bot
|
|
|
|
|
|
|
|
|
|
|
|
# Register handlers
|
|
|
|
|
|
self._app.add_handler(TelegramMessageHandler(
|
|
|
|
|
|
filters.TEXT & ~filters.COMMAND,
|
|
|
|
|
|
self._handle_text_message
|
|
|
|
|
|
))
|
|
|
|
|
|
self._app.add_handler(TelegramMessageHandler(
|
|
|
|
|
|
filters.COMMAND,
|
|
|
|
|
|
self._handle_command
|
|
|
|
|
|
))
|
feat: find-nearby skill and Telegram location support
Adds a 'find-nearby' skill for discovering nearby places using
OpenStreetMap (Overpass + Nominatim). No API keys needed. Works with:
- Coordinates (from Telegram location pins)
- Addresses, cities, zip codes, landmarks (auto-geocoded)
- Multiple place types (restaurant, cafe, bar, pharmacy, etc.)
Returns names, distances, cuisine, hours, addresses, and Google Maps
links (pin + directions). 184-line stdlib-only script.
Also adds Telegram location message handling:
- New MessageType.LOCATION in gateway base
- Telegram adapter handles LOCATION and VENUE messages
- Injects lat/lon coordinates into conversation context
- Prompts agent to ask what the user wants nearby
Inspired by PR #422 (reimplemented with simpler script and broader
skill scope — addresses/cities/zips, not just Telegram coordinates).
2026-03-09 05:31:10 -07:00
|
|
|
|
self._app.add_handler(TelegramMessageHandler(
|
|
|
|
|
|
filters.LOCATION | getattr(filters, "VENUE", filters.LOCATION),
|
|
|
|
|
|
self._handle_location_message
|
|
|
|
|
|
))
|
2026-02-02 19:01:51 -08:00
|
|
|
|
self._app.add_handler(TelegramMessageHandler(
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
filters.PHOTO | filters.VIDEO | filters.AUDIO | filters.VOICE | filters.Document.ALL | filters.Sticker.ALL,
|
2026-02-02 19:01:51 -08:00
|
|
|
|
self._handle_media_message
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
# Start polling in background
|
|
|
|
|
|
await self._app.initialize()
|
|
|
|
|
|
await self._app.start()
|
|
|
|
|
|
await self._app.updater.start_polling(allowed_updates=Update.ALL_TYPES)
|
|
|
|
|
|
|
2026-02-19 14:31:53 -08:00
|
|
|
|
# Register bot commands so Telegram shows a hint menu when users type /
|
|
|
|
|
|
try:
|
|
|
|
|
|
from telegram import BotCommand
|
|
|
|
|
|
await self._bot.set_my_commands([
|
|
|
|
|
|
BotCommand("new", "Start a new conversation"),
|
|
|
|
|
|
BotCommand("reset", "Reset conversation history"),
|
|
|
|
|
|
BotCommand("model", "Show or change the model"),
|
|
|
|
|
|
BotCommand("personality", "Set a personality"),
|
|
|
|
|
|
BotCommand("retry", "Retry your last message"),
|
|
|
|
|
|
BotCommand("undo", "Remove the last exchange"),
|
|
|
|
|
|
BotCommand("status", "Show session info"),
|
|
|
|
|
|
BotCommand("stop", "Stop the running agent"),
|
2026-02-23 15:01:22 -08:00
|
|
|
|
BotCommand("sethome", "Set this chat as the home channel"),
|
feat: register title, resume, and other missing commands with platform menus
Add /title, /resume, /compress, /provider, /usage to Telegram's
set_my_commands so they appear in the / autocomplete menu.
Add /title, /resume, /compress, /provider, /usage, /help as Discord
slash commands so they appear in Discord's native command picker.
These commands were functional via text but not registered with the
platform-native command menus, so users couldn't discover them.
2026-03-08 17:11:49 -07:00
|
|
|
|
BotCommand("compress", "Compress conversation context"),
|
|
|
|
|
|
BotCommand("title", "Set or show the session title"),
|
|
|
|
|
|
BotCommand("resume", "Resume a previously-named session"),
|
|
|
|
|
|
BotCommand("usage", "Show token usage for this session"),
|
|
|
|
|
|
BotCommand("provider", "Show available providers"),
|
2026-03-08 17:13:45 -07:00
|
|
|
|
BotCommand("insights", "Show usage insights and analytics"),
|
|
|
|
|
|
BotCommand("update", "Update Hermes to the latest version"),
|
|
|
|
|
|
BotCommand("reload_mcp", "Reload MCP servers from config"),
|
2026-02-19 14:31:53 -08:00
|
|
|
|
BotCommand("help", "Show available commands"),
|
|
|
|
|
|
])
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[{self.name}] Could not register command menu: {e}")
|
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
|
self._running = True
|
|
|
|
|
|
print(f"[{self.name}] Connected and polling for updates")
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[{self.name}] Failed to connect: {e}")
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
async def disconnect(self) -> None:
|
|
|
|
|
|
"""Stop polling and disconnect."""
|
|
|
|
|
|
if self._app:
|
|
|
|
|
|
try:
|
|
|
|
|
|
await self._app.updater.stop()
|
|
|
|
|
|
await self._app.stop()
|
|
|
|
|
|
await self._app.shutdown()
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[{self.name}] Error during disconnect: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
self._running = False
|
|
|
|
|
|
self._app = None
|
|
|
|
|
|
self._bot = None
|
|
|
|
|
|
print(f"[{self.name}] Disconnected")
|
|
|
|
|
|
|
|
|
|
|
|
async def send(
|
|
|
|
|
|
self,
|
|
|
|
|
|
chat_id: str,
|
|
|
|
|
|
content: str,
|
|
|
|
|
|
reply_to: Optional[str] = None,
|
|
|
|
|
|
metadata: Optional[Dict[str, Any]] = None
|
|
|
|
|
|
) -> SendResult:
|
|
|
|
|
|
"""Send a message to a Telegram chat."""
|
|
|
|
|
|
if not self._bot:
|
|
|
|
|
|
return SendResult(success=False, error="Not connected")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Format and split message if needed
|
|
|
|
|
|
formatted = self.format_message(content)
|
|
|
|
|
|
chunks = self.truncate_message(formatted, self.MAX_MESSAGE_LENGTH)
|
|
|
|
|
|
|
|
|
|
|
|
message_ids = []
|
|
|
|
|
|
thread_id = metadata.get("thread_id") if metadata else None
|
|
|
|
|
|
|
|
|
|
|
|
for i, chunk in enumerate(chunks):
|
2026-02-03 15:42:54 -08:00
|
|
|
|
# Try Markdown first, fall back to plain text if it fails
|
|
|
|
|
|
try:
|
|
|
|
|
|
msg = await self._bot.send_message(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
text=chunk,
|
2026-02-22 02:16:11 -08:00
|
|
|
|
parse_mode=ParseMode.MARKDOWN_V2,
|
2026-02-03 15:42:54 -08:00
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
|
|
|
|
|
|
message_thread_id=int(thread_id) if thread_id else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as md_error:
|
|
|
|
|
|
# Markdown parsing failed, try plain text
|
|
|
|
|
|
if "parse" in str(md_error).lower() or "markdown" in str(md_error).lower():
|
2026-03-07 01:23:18 -08:00
|
|
|
|
logger.warning("[%s] MarkdownV2 parse failed, falling back to plain text: %s", self.name, md_error)
|
|
|
|
|
|
# Strip MDV2 escape backslashes so the user doesn't
|
|
|
|
|
|
# see raw backslashes littered through the message.
|
|
|
|
|
|
plain_chunk = _strip_mdv2(chunk)
|
2026-02-03 15:42:54 -08:00
|
|
|
|
msg = await self._bot.send_message(
|
|
|
|
|
|
chat_id=int(chat_id),
|
2026-03-07 01:23:18 -08:00
|
|
|
|
text=plain_chunk,
|
2026-02-03 15:42:54 -08:00
|
|
|
|
parse_mode=None, # Plain text
|
|
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to and i == 0 else None,
|
|
|
|
|
|
message_thread_id=int(thread_id) if thread_id else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
raise # Re-raise if not a parse error
|
2026-02-02 19:01:51 -08:00
|
|
|
|
message_ids.append(str(msg.message_id))
|
|
|
|
|
|
|
|
|
|
|
|
return SendResult(
|
|
|
|
|
|
success=True,
|
|
|
|
|
|
message_id=message_ids[0] if message_ids else None,
|
|
|
|
|
|
raw_response={"message_ids": message_ids}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return SendResult(success=False, error=str(e))
|
2026-03-05 03:47:51 -08:00
|
|
|
|
|
|
|
|
|
|
async def edit_message(
|
|
|
|
|
|
self,
|
|
|
|
|
|
chat_id: str,
|
|
|
|
|
|
message_id: str,
|
|
|
|
|
|
content: str,
|
|
|
|
|
|
) -> SendResult:
|
|
|
|
|
|
"""Edit a previously sent Telegram message."""
|
|
|
|
|
|
if not self._bot:
|
|
|
|
|
|
return SendResult(success=False, error="Not connected")
|
|
|
|
|
|
try:
|
|
|
|
|
|
formatted = self.format_message(content)
|
|
|
|
|
|
try:
|
|
|
|
|
|
await self._bot.edit_message_text(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
message_id=int(message_id),
|
|
|
|
|
|
text=formatted,
|
|
|
|
|
|
parse_mode=ParseMode.MARKDOWN_V2,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
# Fallback: retry without markdown formatting
|
|
|
|
|
|
await self._bot.edit_message_text(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
message_id=int(message_id),
|
|
|
|
|
|
text=content,
|
|
|
|
|
|
)
|
|
|
|
|
|
return SendResult(success=True, message_id=message_id)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return SendResult(success=False, error=str(e))
|
|
|
|
|
|
|
2026-02-12 10:05:08 -08:00
|
|
|
|
async def send_voice(
|
|
|
|
|
|
self,
|
|
|
|
|
|
chat_id: str,
|
|
|
|
|
|
audio_path: str,
|
|
|
|
|
|
caption: Optional[str] = None,
|
|
|
|
|
|
reply_to: Optional[str] = None,
|
2026-03-10 06:26:16 -07:00
|
|
|
|
metadata: Optional[Dict[str, Any]] = None,
|
2026-02-12 10:05:08 -08:00
|
|
|
|
) -> SendResult:
|
|
|
|
|
|
"""Send audio as a native Telegram voice message or audio file."""
|
|
|
|
|
|
if not self._bot:
|
|
|
|
|
|
return SendResult(success=False, error="Not connected")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
import os
|
|
|
|
|
|
if not os.path.exists(audio_path):
|
|
|
|
|
|
return SendResult(success=False, error=f"Audio file not found: {audio_path}")
|
|
|
|
|
|
|
|
|
|
|
|
with open(audio_path, "rb") as audio_file:
|
|
|
|
|
|
# .ogg files -> send as voice (round playable bubble)
|
|
|
|
|
|
if audio_path.endswith(".ogg") or audio_path.endswith(".opus"):
|
2026-03-10 06:26:16 -07:00
|
|
|
|
_voice_thread = metadata.get("thread_id") if metadata else None
|
2026-02-12 10:05:08 -08:00
|
|
|
|
msg = await self._bot.send_voice(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
voice=audio_file,
|
|
|
|
|
|
caption=caption[:1024] if caption else None,
|
|
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to else None,
|
2026-03-10 06:26:16 -07:00
|
|
|
|
message_thread_id=int(_voice_thread) if _voice_thread else None,
|
2026-02-12 10:05:08 -08:00
|
|
|
|
)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# .mp3 and others -> send as audio file
|
2026-03-10 06:26:16 -07:00
|
|
|
|
_audio_thread = metadata.get("thread_id") if metadata else None
|
2026-02-12 10:05:08 -08:00
|
|
|
|
msg = await self._bot.send_audio(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
audio=audio_file,
|
|
|
|
|
|
caption=caption[:1024] if caption else None,
|
|
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to else None,
|
2026-03-10 06:26:16 -07:00
|
|
|
|
message_thread_id=int(_audio_thread) if _audio_thread else None,
|
2026-02-12 10:05:08 -08:00
|
|
|
|
)
|
|
|
|
|
|
return SendResult(success=True, message_id=str(msg.message_id))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[{self.name}] Failed to send voice/audio: {e}")
|
|
|
|
|
|
return await super().send_voice(chat_id, audio_path, caption, reply_to)
|
|
|
|
|
|
|
2026-03-07 22:57:05 -08:00
|
|
|
|
async def send_image_file(
|
|
|
|
|
|
self,
|
|
|
|
|
|
chat_id: str,
|
|
|
|
|
|
image_path: str,
|
|
|
|
|
|
caption: Optional[str] = None,
|
|
|
|
|
|
reply_to: Optional[str] = None,
|
|
|
|
|
|
) -> SendResult:
|
|
|
|
|
|
"""Send a local image file natively as a Telegram photo."""
|
|
|
|
|
|
if not self._bot:
|
|
|
|
|
|
return SendResult(success=False, error="Not connected")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
import os
|
|
|
|
|
|
if not os.path.exists(image_path):
|
|
|
|
|
|
return SendResult(success=False, error=f"Image file not found: {image_path}")
|
|
|
|
|
|
|
|
|
|
|
|
with open(image_path, "rb") as image_file:
|
|
|
|
|
|
msg = await self._bot.send_photo(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
photo=image_file,
|
|
|
|
|
|
caption=caption[:1024] if caption else None,
|
|
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
return SendResult(success=True, message_id=str(msg.message_id))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[{self.name}] Failed to send local image: {e}")
|
|
|
|
|
|
return await super().send_image_file(chat_id, image_path, caption, reply_to)
|
|
|
|
|
|
|
2026-02-10 21:02:40 -08:00
|
|
|
|
async def send_image(
|
|
|
|
|
|
self,
|
|
|
|
|
|
chat_id: str,
|
|
|
|
|
|
image_url: str,
|
|
|
|
|
|
caption: Optional[str] = None,
|
|
|
|
|
|
reply_to: Optional[str] = None,
|
2026-03-10 06:26:16 -07:00
|
|
|
|
metadata: Optional[Dict[str, Any]] = None,
|
2026-02-10 21:02:40 -08:00
|
|
|
|
) -> SendResult:
|
2026-03-07 21:29:45 -08:00
|
|
|
|
"""Send an image natively as a Telegram photo.
|
|
|
|
|
|
|
|
|
|
|
|
Tries URL-based send first (fast, works for <5MB images).
|
|
|
|
|
|
Falls back to downloading and uploading as file (supports up to 10MB).
|
|
|
|
|
|
"""
|
2026-02-10 21:02:40 -08:00
|
|
|
|
if not self._bot:
|
|
|
|
|
|
return SendResult(success=False, error="Not connected")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2026-03-07 21:29:45 -08:00
|
|
|
|
# Telegram can send photos directly from URLs (up to ~5MB)
|
2026-03-10 06:26:16 -07:00
|
|
|
|
_photo_thread = metadata.get("thread_id") if metadata else None
|
2026-02-10 21:02:40 -08:00
|
|
|
|
msg = await self._bot.send_photo(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
photo=image_url,
|
|
|
|
|
|
caption=caption[:1024] if caption else None, # Telegram caption limit
|
|
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to else None,
|
2026-03-10 06:26:16 -07:00
|
|
|
|
message_thread_id=int(_photo_thread) if _photo_thread else None,
|
2026-02-10 21:02:40 -08:00
|
|
|
|
)
|
|
|
|
|
|
return SendResult(success=True, message_id=str(msg.message_id))
|
|
|
|
|
|
except Exception as e:
|
2026-03-07 21:29:45 -08:00
|
|
|
|
logger.warning("[%s] URL-based send_photo failed (%s), trying file upload", self.name, e)
|
|
|
|
|
|
# Fallback: download and upload as file (supports up to 10MB)
|
|
|
|
|
|
try:
|
|
|
|
|
|
import httpx
|
|
|
|
|
|
async with httpx.AsyncClient(timeout=30.0) as client:
|
|
|
|
|
|
resp = await client.get(image_url)
|
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
|
image_data = resp.content
|
|
|
|
|
|
|
|
|
|
|
|
msg = await self._bot.send_photo(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
photo=image_data,
|
|
|
|
|
|
caption=caption[:1024] if caption else None,
|
|
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
return SendResult(success=True, message_id=str(msg.message_id))
|
|
|
|
|
|
except Exception as e2:
|
|
|
|
|
|
logger.error("[%s] File upload send_photo also failed: %s", self.name, e2)
|
|
|
|
|
|
# Final fallback: send URL as text
|
|
|
|
|
|
return await super().send_image(chat_id, image_url, caption, reply_to)
|
2026-02-10 21:02:40 -08:00
|
|
|
|
|
2026-02-28 11:25:44 -08:00
|
|
|
|
async def send_animation(
|
|
|
|
|
|
self,
|
|
|
|
|
|
chat_id: str,
|
|
|
|
|
|
animation_url: str,
|
|
|
|
|
|
caption: Optional[str] = None,
|
|
|
|
|
|
reply_to: Optional[str] = None,
|
2026-03-10 06:26:16 -07:00
|
|
|
|
metadata: Optional[Dict[str, Any]] = None,
|
2026-02-28 11:25:44 -08:00
|
|
|
|
) -> SendResult:
|
|
|
|
|
|
"""Send an animated GIF natively as a Telegram animation (auto-plays inline)."""
|
|
|
|
|
|
if not self._bot:
|
|
|
|
|
|
return SendResult(success=False, error="Not connected")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
2026-03-10 06:26:16 -07:00
|
|
|
|
_anim_thread = metadata.get("thread_id") if metadata else None
|
2026-02-28 11:25:44 -08:00
|
|
|
|
msg = await self._bot.send_animation(
|
|
|
|
|
|
chat_id=int(chat_id),
|
|
|
|
|
|
animation=animation_url,
|
|
|
|
|
|
caption=caption[:1024] if caption else None,
|
|
|
|
|
|
reply_to_message_id=int(reply_to) if reply_to else None,
|
2026-03-10 06:26:16 -07:00
|
|
|
|
message_thread_id=int(_anim_thread) if _anim_thread else None,
|
2026-02-28 11:25:44 -08:00
|
|
|
|
)
|
|
|
|
|
|
return SendResult(success=True, message_id=str(msg.message_id))
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[{self.name}] Failed to send animation, falling back to photo: {e}")
|
|
|
|
|
|
# Fallback: try as a regular photo
|
|
|
|
|
|
return await self.send_image(chat_id, animation_url, caption, reply_to)
|
|
|
|
|
|
|
2026-03-10 06:26:16 -07:00
|
|
|
|
async def send_typing(self, chat_id: str, metadata: Optional[Dict[str, Any]] = None) -> None:
|
2026-02-02 19:01:51 -08:00
|
|
|
|
"""Send typing indicator."""
|
|
|
|
|
|
if self._bot:
|
|
|
|
|
|
try:
|
2026-03-10 06:26:16 -07:00
|
|
|
|
_typing_thread = metadata.get("thread_id") if metadata else None
|
2026-02-02 19:01:51 -08:00
|
|
|
|
await self._bot.send_chat_action(
|
|
|
|
|
|
chat_id=int(chat_id),
|
2026-03-10 06:26:16 -07:00
|
|
|
|
action="typing",
|
|
|
|
|
|
message_thread_id=int(_typing_thread) if _typing_thread else None,
|
2026-02-02 19:01:51 -08:00
|
|
|
|
)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass # Ignore typing indicator failures
|
|
|
|
|
|
|
|
|
|
|
|
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
|
|
|
|
|
"""Get information about a Telegram chat."""
|
|
|
|
|
|
if not self._bot:
|
|
|
|
|
|
return {"name": "Unknown", "type": "dm"}
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
chat = await self._bot.get_chat(int(chat_id))
|
|
|
|
|
|
|
|
|
|
|
|
chat_type = "dm"
|
|
|
|
|
|
if chat.type == ChatType.GROUP:
|
|
|
|
|
|
chat_type = "group"
|
|
|
|
|
|
elif chat.type == ChatType.SUPERGROUP:
|
|
|
|
|
|
chat_type = "group"
|
|
|
|
|
|
if chat.is_forum:
|
|
|
|
|
|
chat_type = "forum"
|
|
|
|
|
|
elif chat.type == ChatType.CHANNEL:
|
|
|
|
|
|
chat_type = "channel"
|
|
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
|
"name": chat.title or chat.full_name or str(chat_id),
|
|
|
|
|
|
"type": chat_type,
|
|
|
|
|
|
"username": chat.username,
|
|
|
|
|
|
"is_forum": getattr(chat, "is_forum", False),
|
|
|
|
|
|
}
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
return {"name": str(chat_id), "type": "dm", "error": str(e)}
|
|
|
|
|
|
|
|
|
|
|
|
def format_message(self, content: str) -> str:
|
|
|
|
|
|
"""
|
2026-02-22 02:16:11 -08:00
|
|
|
|
Convert standard markdown to Telegram MarkdownV2 format.
|
|
|
|
|
|
|
|
|
|
|
|
Protected regions (code blocks, inline code) are extracted first so
|
|
|
|
|
|
their contents are never modified. Standard markdown constructs
|
|
|
|
|
|
(headers, bold, italic, links) are translated to MarkdownV2 syntax,
|
|
|
|
|
|
and all remaining special characters are escaped.
|
2026-02-02 19:01:51 -08:00
|
|
|
|
"""
|
2026-02-22 02:16:11 -08:00
|
|
|
|
if not content:
|
|
|
|
|
|
return content
|
|
|
|
|
|
|
|
|
|
|
|
placeholders: dict = {}
|
|
|
|
|
|
counter = [0]
|
|
|
|
|
|
|
|
|
|
|
|
def _ph(value: str) -> str:
|
|
|
|
|
|
"""Stash *value* behind a placeholder token that survives escaping."""
|
|
|
|
|
|
key = f"\x00PH{counter[0]}\x00"
|
|
|
|
|
|
counter[0] += 1
|
|
|
|
|
|
placeholders[key] = value
|
|
|
|
|
|
return key
|
|
|
|
|
|
|
|
|
|
|
|
text = content
|
|
|
|
|
|
|
|
|
|
|
|
# 1) Protect fenced code blocks (``` ... ```)
|
|
|
|
|
|
text = re.sub(
|
|
|
|
|
|
r'(```(?:[^\n]*\n)?[\s\S]*?```)',
|
|
|
|
|
|
lambda m: _ph(m.group(0)),
|
|
|
|
|
|
text,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 2) Protect inline code (`...`)
|
|
|
|
|
|
text = re.sub(r'(`[^`]+`)', lambda m: _ph(m.group(0)), text)
|
|
|
|
|
|
|
|
|
|
|
|
# 3) Convert markdown links – escape the display text; inside the URL
|
|
|
|
|
|
# only ')' and '\' need escaping per the MarkdownV2 spec.
|
|
|
|
|
|
def _convert_link(m):
|
|
|
|
|
|
display = _escape_mdv2(m.group(1))
|
|
|
|
|
|
url = m.group(2).replace('\\', '\\\\').replace(')', '\\)')
|
|
|
|
|
|
return _ph(f'[{display}]({url})')
|
|
|
|
|
|
|
|
|
|
|
|
text = re.sub(r'\[([^\]]+)\]\(([^)]+)\)', _convert_link, text)
|
|
|
|
|
|
|
|
|
|
|
|
# 4) Convert markdown headers (## Title) → bold *Title*
|
|
|
|
|
|
def _convert_header(m):
|
|
|
|
|
|
inner = m.group(1).strip()
|
|
|
|
|
|
# Strip redundant bold markers that may appear inside a header
|
|
|
|
|
|
inner = re.sub(r'\*\*(.+?)\*\*', r'\1', inner)
|
|
|
|
|
|
return _ph(f'*{_escape_mdv2(inner)}*')
|
|
|
|
|
|
|
|
|
|
|
|
text = re.sub(
|
|
|
|
|
|
r'^#{1,6}\s+(.+)$', _convert_header, text, flags=re.MULTILINE
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 5) Convert bold: **text** → *text* (MarkdownV2 bold)
|
|
|
|
|
|
text = re.sub(
|
|
|
|
|
|
r'\*\*(.+?)\*\*',
|
|
|
|
|
|
lambda m: _ph(f'*{_escape_mdv2(m.group(1))}*'),
|
|
|
|
|
|
text,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 6) Convert italic: *text* (single asterisk) → _text_ (MarkdownV2 italic)
|
2026-02-28 22:01:48 +03:00
|
|
|
|
# [^*\n]+ prevents matching across newlines (which would corrupt
|
|
|
|
|
|
# bullet lists using * markers and multi-line content).
|
2026-02-22 02:16:11 -08:00
|
|
|
|
text = re.sub(
|
2026-02-28 22:01:48 +03:00
|
|
|
|
r'\*([^*\n]+)\*',
|
2026-02-22 02:16:11 -08:00
|
|
|
|
lambda m: _ph(f'_{_escape_mdv2(m.group(1))}_'),
|
|
|
|
|
|
text,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# 7) Escape remaining special characters in plain text
|
|
|
|
|
|
text = _escape_mdv2(text)
|
|
|
|
|
|
|
|
|
|
|
|
# 8) Restore placeholders in reverse insertion order so that
|
|
|
|
|
|
# nested references (a placeholder inside another) resolve correctly.
|
|
|
|
|
|
for key in reversed(list(placeholders.keys())):
|
|
|
|
|
|
text = text.replace(key, placeholders[key])
|
|
|
|
|
|
|
|
|
|
|
|
return text
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
|
|
async def _handle_text_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
|
|
|
|
"""Handle incoming text messages."""
|
|
|
|
|
|
if not update.message or not update.message.text:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
event = self._build_message_event(update.message, MessageType.TEXT)
|
|
|
|
|
|
await self.handle_message(event)
|
|
|
|
|
|
|
|
|
|
|
|
async def _handle_command(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
|
|
|
|
"""Handle incoming command messages."""
|
|
|
|
|
|
if not update.message or not update.message.text:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
event = self._build_message_event(update.message, MessageType.COMMAND)
|
|
|
|
|
|
await self.handle_message(event)
|
|
|
|
|
|
|
feat: find-nearby skill and Telegram location support
Adds a 'find-nearby' skill for discovering nearby places using
OpenStreetMap (Overpass + Nominatim). No API keys needed. Works with:
- Coordinates (from Telegram location pins)
- Addresses, cities, zip codes, landmarks (auto-geocoded)
- Multiple place types (restaurant, cafe, bar, pharmacy, etc.)
Returns names, distances, cuisine, hours, addresses, and Google Maps
links (pin + directions). 184-line stdlib-only script.
Also adds Telegram location message handling:
- New MessageType.LOCATION in gateway base
- Telegram adapter handles LOCATION and VENUE messages
- Injects lat/lon coordinates into conversation context
- Prompts agent to ask what the user wants nearby
Inspired by PR #422 (reimplemented with simpler script and broader
skill scope — addresses/cities/zips, not just Telegram coordinates).
2026-03-09 05:31:10 -07:00
|
|
|
|
async def _handle_location_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
|
|
|
|
|
"""Handle incoming location/venue pin messages."""
|
|
|
|
|
|
if not update.message:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
msg = update.message
|
|
|
|
|
|
venue = getattr(msg, "venue", None)
|
|
|
|
|
|
location = getattr(venue, "location", None) if venue else getattr(msg, "location", None)
|
|
|
|
|
|
|
|
|
|
|
|
if not location:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
lat = getattr(location, "latitude", None)
|
|
|
|
|
|
lon = getattr(location, "longitude", None)
|
|
|
|
|
|
if lat is None or lon is None:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Build a text message with coordinates and context
|
|
|
|
|
|
parts = ["[The user shared a location pin.]"]
|
|
|
|
|
|
if venue:
|
|
|
|
|
|
title = getattr(venue, "title", None)
|
|
|
|
|
|
address = getattr(venue, "address", None)
|
|
|
|
|
|
if title:
|
|
|
|
|
|
parts.append(f"Venue: {title}")
|
|
|
|
|
|
if address:
|
|
|
|
|
|
parts.append(f"Address: {address}")
|
|
|
|
|
|
parts.append(f"latitude: {lat}")
|
|
|
|
|
|
parts.append(f"longitude: {lon}")
|
|
|
|
|
|
parts.append(f"Map: https://www.google.com/maps/search/?api=1&query={lat},{lon}")
|
|
|
|
|
|
parts.append("Ask what they'd like to find nearby (restaurants, cafes, etc.) and any preferences.")
|
|
|
|
|
|
|
|
|
|
|
|
event = self._build_message_event(msg, MessageType.LOCATION)
|
|
|
|
|
|
event.text = "\n".join(parts)
|
|
|
|
|
|
await self.handle_message(event)
|
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
|
async def _handle_media_message(self, update: Update, context: ContextTypes.DEFAULT_TYPE) -> None:
|
2026-02-15 16:10:50 -08:00
|
|
|
|
"""Handle incoming media messages, downloading images to local cache."""
|
2026-02-02 19:01:51 -08:00
|
|
|
|
if not update.message:
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
msg = update.message
|
|
|
|
|
|
|
|
|
|
|
|
# Determine media type
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
if msg.sticker:
|
|
|
|
|
|
msg_type = MessageType.STICKER
|
|
|
|
|
|
elif msg.photo:
|
2026-02-02 19:01:51 -08:00
|
|
|
|
msg_type = MessageType.PHOTO
|
|
|
|
|
|
elif msg.video:
|
|
|
|
|
|
msg_type = MessageType.VIDEO
|
|
|
|
|
|
elif msg.audio:
|
|
|
|
|
|
msg_type = MessageType.AUDIO
|
|
|
|
|
|
elif msg.voice:
|
|
|
|
|
|
msg_type = MessageType.VOICE
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
elif msg.document:
|
|
|
|
|
|
msg_type = MessageType.DOCUMENT
|
2026-02-02 19:01:51 -08:00
|
|
|
|
else:
|
|
|
|
|
|
msg_type = MessageType.DOCUMENT
|
|
|
|
|
|
|
|
|
|
|
|
event = self._build_message_event(msg, msg_type)
|
|
|
|
|
|
|
|
|
|
|
|
# Add caption as text
|
|
|
|
|
|
if msg.caption:
|
|
|
|
|
|
event.text = msg.caption
|
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
# Handle stickers: describe via vision tool with caching
|
|
|
|
|
|
if msg.sticker:
|
|
|
|
|
|
await self._handle_sticker(msg, event)
|
|
|
|
|
|
await self.handle_message(event)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
2026-02-15 16:10:50 -08:00
|
|
|
|
# Download photo to local image cache so the vision tool can access it
|
|
|
|
|
|
# even after Telegram's ephemeral file URLs expire (~1 hour).
|
|
|
|
|
|
if msg.photo:
|
|
|
|
|
|
try:
|
|
|
|
|
|
# msg.photo is a list of PhotoSize sorted by size; take the largest
|
|
|
|
|
|
photo = msg.photo[-1]
|
|
|
|
|
|
file_obj = await photo.get_file()
|
|
|
|
|
|
# Download the image bytes directly into memory
|
|
|
|
|
|
image_bytes = await file_obj.download_as_bytearray()
|
|
|
|
|
|
# Determine extension from the file path if available
|
|
|
|
|
|
ext = ".jpg"
|
|
|
|
|
|
if file_obj.file_path:
|
|
|
|
|
|
for candidate in [".png", ".webp", ".gif", ".jpeg", ".jpg"]:
|
|
|
|
|
|
if file_obj.file_path.lower().endswith(candidate):
|
|
|
|
|
|
ext = candidate
|
|
|
|
|
|
break
|
|
|
|
|
|
# Save to cache and populate media_urls with the local path
|
|
|
|
|
|
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=ext)
|
|
|
|
|
|
event.media_urls = [cached_path]
|
|
|
|
|
|
event.media_types = [f"image/{ext.lstrip('.')}"]
|
|
|
|
|
|
print(f"[Telegram] Cached user photo: {cached_path}", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[Telegram] Failed to cache photo: {e}", flush=True)
|
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
# Download voice/audio messages to cache for STT transcription
|
|
|
|
|
|
if msg.voice:
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_obj = await msg.voice.get_file()
|
|
|
|
|
|
audio_bytes = await file_obj.download_as_bytearray()
|
|
|
|
|
|
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".ogg")
|
|
|
|
|
|
event.media_urls = [cached_path]
|
|
|
|
|
|
event.media_types = ["audio/ogg"]
|
|
|
|
|
|
print(f"[Telegram] Cached user voice: {cached_path}", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[Telegram] Failed to cache voice: {e}", flush=True)
|
|
|
|
|
|
elif msg.audio:
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_obj = await msg.audio.get_file()
|
|
|
|
|
|
audio_bytes = await file_obj.download_as_bytearray()
|
|
|
|
|
|
cached_path = cache_audio_from_bytes(bytes(audio_bytes), ext=".mp3")
|
|
|
|
|
|
event.media_urls = [cached_path]
|
|
|
|
|
|
event.media_types = ["audio/mp3"]
|
|
|
|
|
|
print(f"[Telegram] Cached user audio: {cached_path}", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[Telegram] Failed to cache audio: {e}", flush=True)
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
|
|
|
|
|
|
# Download document files to cache for agent processing
|
|
|
|
|
|
elif msg.document:
|
|
|
|
|
|
doc = msg.document
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Determine file extension
|
|
|
|
|
|
ext = ""
|
|
|
|
|
|
original_filename = doc.file_name or ""
|
|
|
|
|
|
if original_filename:
|
|
|
|
|
|
_, ext = os.path.splitext(original_filename)
|
|
|
|
|
|
ext = ext.lower()
|
|
|
|
|
|
|
|
|
|
|
|
# If no extension from filename, reverse-lookup from MIME type
|
|
|
|
|
|
if not ext and doc.mime_type:
|
|
|
|
|
|
mime_to_ext = {v: k for k, v in SUPPORTED_DOCUMENT_TYPES.items()}
|
|
|
|
|
|
ext = mime_to_ext.get(doc.mime_type, "")
|
|
|
|
|
|
|
|
|
|
|
|
# Check if supported
|
|
|
|
|
|
if ext not in SUPPORTED_DOCUMENT_TYPES:
|
|
|
|
|
|
supported_list = ", ".join(sorted(SUPPORTED_DOCUMENT_TYPES.keys()))
|
|
|
|
|
|
event.text = (
|
|
|
|
|
|
f"Unsupported document type '{ext or 'unknown'}'. "
|
|
|
|
|
|
f"Supported types: {supported_list}"
|
|
|
|
|
|
)
|
|
|
|
|
|
print(f"[Telegram] Unsupported document type: {ext or 'unknown'}", flush=True)
|
|
|
|
|
|
await self.handle_message(event)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Check file size (Telegram Bot API limit: 20 MB)
|
2026-02-27 11:53:46 -05:00
|
|
|
|
MAX_DOC_BYTES = 20 * 1024 * 1024
|
|
|
|
|
|
if not doc.file_size or doc.file_size > MAX_DOC_BYTES:
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
event.text = (
|
2026-02-27 11:53:46 -05:00
|
|
|
|
"The document is too large or its size could not be verified. "
|
|
|
|
|
|
"Maximum: 20 MB."
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
)
|
|
|
|
|
|
print(f"[Telegram] Document too large: {doc.file_size} bytes", flush=True)
|
|
|
|
|
|
await self.handle_message(event)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Download and cache
|
|
|
|
|
|
file_obj = await doc.get_file()
|
|
|
|
|
|
doc_bytes = await file_obj.download_as_bytearray()
|
|
|
|
|
|
raw_bytes = bytes(doc_bytes)
|
|
|
|
|
|
cached_path = cache_document_from_bytes(raw_bytes, original_filename or f"document{ext}")
|
|
|
|
|
|
mime_type = SUPPORTED_DOCUMENT_TYPES[ext]
|
|
|
|
|
|
event.media_urls = [cached_path]
|
|
|
|
|
|
event.media_types = [mime_type]
|
|
|
|
|
|
print(f"[Telegram] Cached user document: {cached_path}", flush=True)
|
|
|
|
|
|
|
2026-02-27 11:53:46 -05:00
|
|
|
|
# For text files, inject content into event.text (capped at 100 KB)
|
|
|
|
|
|
MAX_TEXT_INJECT_BYTES = 100 * 1024
|
|
|
|
|
|
if ext in (".md", ".txt") and len(raw_bytes) <= MAX_TEXT_INJECT_BYTES:
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
try:
|
|
|
|
|
|
text_content = raw_bytes.decode("utf-8")
|
|
|
|
|
|
display_name = original_filename or f"document{ext}"
|
2026-02-27 23:04:32 -08:00
|
|
|
|
display_name = re.sub(r'[^\w.\- ]', '_', display_name)
|
feat(telegram): add document file processing for PDF, text, and Office files
Download, cache, and enrich document files sent via Telegram. Supports
.pdf, .md, .txt, .docx, .xlsx, .pptx with size validation, unsupported
type rejection, text content injection for .md/.txt, and hourly cache
cleanup.
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 11:44:57 -05:00
|
|
|
|
injection = f"[Content of {display_name}]:\n{text_content}"
|
|
|
|
|
|
if event.text:
|
|
|
|
|
|
event.text = f"{injection}\n\n{event.text}"
|
|
|
|
|
|
else:
|
|
|
|
|
|
event.text = injection
|
|
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
|
|
print(f"[Telegram] Could not decode text file as UTF-8, skipping content injection", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[Telegram] Failed to cache document: {e}", flush=True)
|
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
|
await self.handle_message(event)
|
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
async def _handle_sticker(self, msg: Message, event: "MessageEvent") -> None:
|
|
|
|
|
|
"""
|
|
|
|
|
|
Describe a Telegram sticker via vision analysis, with caching.
|
|
|
|
|
|
|
|
|
|
|
|
For static stickers (WEBP), we download, analyze with vision, and cache
|
|
|
|
|
|
the description by file_unique_id. For animated/video stickers, we inject
|
|
|
|
|
|
a placeholder noting the emoji.
|
|
|
|
|
|
"""
|
|
|
|
|
|
from gateway.sticker_cache import (
|
|
|
|
|
|
get_cached_description,
|
|
|
|
|
|
cache_sticker_description,
|
|
|
|
|
|
build_sticker_injection,
|
|
|
|
|
|
build_animated_sticker_injection,
|
|
|
|
|
|
STICKER_VISION_PROMPT,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
sticker = msg.sticker
|
|
|
|
|
|
emoji = sticker.emoji or ""
|
|
|
|
|
|
set_name = sticker.set_name or ""
|
|
|
|
|
|
|
|
|
|
|
|
# Animated and video stickers can't be analyzed as static images
|
|
|
|
|
|
if sticker.is_animated or sticker.is_video:
|
|
|
|
|
|
event.text = build_animated_sticker_injection(emoji)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Check the cache first
|
|
|
|
|
|
cached = get_cached_description(sticker.file_unique_id)
|
|
|
|
|
|
if cached:
|
|
|
|
|
|
event.text = build_sticker_injection(
|
|
|
|
|
|
cached["description"], cached.get("emoji", emoji), cached.get("set_name", set_name)
|
|
|
|
|
|
)
|
|
|
|
|
|
print(f"[Telegram] Sticker cache hit: {sticker.file_unique_id}", flush=True)
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Cache miss -- download and analyze
|
|
|
|
|
|
try:
|
|
|
|
|
|
file_obj = await sticker.get_file()
|
|
|
|
|
|
image_bytes = await file_obj.download_as_bytearray()
|
|
|
|
|
|
cached_path = cache_image_from_bytes(bytes(image_bytes), ext=".webp")
|
|
|
|
|
|
print(f"[Telegram] Analyzing sticker: {cached_path}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
from tools.vision_tools import vision_analyze_tool
|
|
|
|
|
|
import json as _json
|
|
|
|
|
|
|
|
|
|
|
|
result_json = await vision_analyze_tool(
|
|
|
|
|
|
image_url=cached_path,
|
|
|
|
|
|
user_prompt=STICKER_VISION_PROMPT,
|
|
|
|
|
|
)
|
|
|
|
|
|
result = _json.loads(result_json)
|
|
|
|
|
|
|
|
|
|
|
|
if result.get("success"):
|
|
|
|
|
|
description = result.get("analysis", "a sticker")
|
|
|
|
|
|
cache_sticker_description(sticker.file_unique_id, description, emoji, set_name)
|
|
|
|
|
|
event.text = build_sticker_injection(description, emoji, set_name)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Vision failed -- use emoji as fallback
|
|
|
|
|
|
event.text = build_sticker_injection(
|
|
|
|
|
|
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
|
|
|
|
|
emoji, set_name,
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
print(f"[Telegram] Sticker analysis error: {e}", flush=True)
|
|
|
|
|
|
event.text = build_sticker_injection(
|
|
|
|
|
|
f"a sticker with emoji {emoji}" if emoji else "a sticker",
|
|
|
|
|
|
emoji, set_name,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
|
def _build_message_event(self, message: Message, msg_type: MessageType) -> MessageEvent:
|
|
|
|
|
|
"""Build a MessageEvent from a Telegram message."""
|
|
|
|
|
|
chat = message.chat
|
|
|
|
|
|
user = message.from_user
|
|
|
|
|
|
|
|
|
|
|
|
# Determine chat type
|
|
|
|
|
|
chat_type = "dm"
|
|
|
|
|
|
if chat.type in (ChatType.GROUP, ChatType.SUPERGROUP):
|
|
|
|
|
|
chat_type = "group"
|
|
|
|
|
|
elif chat.type == ChatType.CHANNEL:
|
|
|
|
|
|
chat_type = "channel"
|
|
|
|
|
|
|
|
|
|
|
|
# Build source
|
|
|
|
|
|
source = self.build_source(
|
|
|
|
|
|
chat_id=str(chat.id),
|
|
|
|
|
|
chat_name=chat.title or (chat.full_name if hasattr(chat, "full_name") else None),
|
|
|
|
|
|
chat_type=chat_type,
|
|
|
|
|
|
user_id=str(user.id) if user else None,
|
|
|
|
|
|
user_name=user.full_name if user else None,
|
|
|
|
|
|
thread_id=str(message.message_thread_id) if message.message_thread_id else None,
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
return MessageEvent(
|
|
|
|
|
|
text=message.text or "",
|
|
|
|
|
|
message_type=msg_type,
|
|
|
|
|
|
source=source,
|
|
|
|
|
|
raw_message=message,
|
|
|
|
|
|
message_id=str(message.message_id),
|
|
|
|
|
|
timestamp=message.date,
|
|
|
|
|
|
)
|