The MEDIA routing in _process_message_background passes metadata=_thread_metadata to send_video, send_document, and send_image_file — but none accepted it, causing TypeError silently caught by the except handler. Files just failed to send. Fix: add **kwargs to all four base class media methods and their Telegram overrides.
987 lines
36 KiB
Python
987 lines
36 KiB
Python
"""
|
|
Base platform adapter interface.
|
|
|
|
All platform adapters (Telegram, Discord, WhatsApp) inherit from this
|
|
and implement the required methods.
|
|
"""
|
|
|
|
import asyncio
|
|
import logging
|
|
import os
|
|
import re
|
|
import uuid
|
|
from abc import ABC, abstractmethod
|
|
|
|
logger = logging.getLogger(__name__)
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional, Any, Callable, Awaitable, Tuple
|
|
from enum import Enum
|
|
|
|
import sys
|
|
from pathlib import Path as _Path
|
|
sys.path.insert(0, str(_Path(__file__).resolve().parents[2]))
|
|
|
|
from gateway.config import Platform, PlatformConfig
|
|
from gateway.session import SessionSource, build_session_key
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Image cache utilities
|
|
#
|
|
# When users send images on messaging platforms, we download them to a local
|
|
# cache directory so they can be analyzed by the vision tool (which accepts
|
|
# local file paths). This avoids issues with ephemeral platform URLs
|
|
# (e.g. Telegram file URLs expire after ~1 hour).
|
|
# ---------------------------------------------------------------------------
|
|
|
|
# Default location: ~/.hermes/image_cache/
|
|
IMAGE_CACHE_DIR = Path(os.path.expanduser("~/.hermes/image_cache"))
|
|
|
|
|
|
def get_image_cache_dir() -> Path:
|
|
"""Return the image cache directory, creating it if it doesn't exist."""
|
|
IMAGE_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
return IMAGE_CACHE_DIR
|
|
|
|
|
|
def cache_image_from_bytes(data: bytes, ext: str = ".jpg") -> str:
|
|
"""
|
|
Save raw image bytes to the cache and return the absolute file path.
|
|
|
|
Args:
|
|
data: Raw image bytes.
|
|
ext: File extension including the dot (e.g. ".jpg", ".png").
|
|
|
|
Returns:
|
|
Absolute path to the cached image file as a string.
|
|
"""
|
|
cache_dir = get_image_cache_dir()
|
|
filename = f"img_{uuid.uuid4().hex[:12]}{ext}"
|
|
filepath = cache_dir / filename
|
|
filepath.write_bytes(data)
|
|
return str(filepath)
|
|
|
|
|
|
async def cache_image_from_url(url: str, ext: str = ".jpg") -> str:
|
|
"""
|
|
Download an image from a URL and save it to the local cache.
|
|
|
|
Uses httpx for async download with a reasonable timeout.
|
|
|
|
Args:
|
|
url: The HTTP/HTTPS URL to download from.
|
|
ext: File extension including the dot (e.g. ".jpg", ".png").
|
|
|
|
Returns:
|
|
Absolute path to the cached image file as a string.
|
|
"""
|
|
import httpx
|
|
|
|
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
|
response = await client.get(
|
|
url,
|
|
headers={
|
|
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
|
|
"Accept": "image/*,*/*;q=0.8",
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
return cache_image_from_bytes(response.content, ext)
|
|
|
|
|
|
def cleanup_image_cache(max_age_hours: int = 24) -> int:
|
|
"""
|
|
Delete cached images older than *max_age_hours*.
|
|
|
|
Returns the number of files removed.
|
|
"""
|
|
import time
|
|
|
|
cache_dir = get_image_cache_dir()
|
|
cutoff = time.time() - (max_age_hours * 3600)
|
|
removed = 0
|
|
for f in cache_dir.iterdir():
|
|
if f.is_file() and f.stat().st_mtime < cutoff:
|
|
try:
|
|
f.unlink()
|
|
removed += 1
|
|
except OSError:
|
|
pass
|
|
return removed
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Audio cache utilities
|
|
#
|
|
# Same pattern as image cache -- voice messages from platforms are downloaded
|
|
# here so the STT tool (OpenAI Whisper) can transcribe them from local files.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
AUDIO_CACHE_DIR = Path(os.path.expanduser("~/.hermes/audio_cache"))
|
|
|
|
|
|
def get_audio_cache_dir() -> Path:
|
|
"""Return the audio cache directory, creating it if it doesn't exist."""
|
|
AUDIO_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
return AUDIO_CACHE_DIR
|
|
|
|
|
|
def cache_audio_from_bytes(data: bytes, ext: str = ".ogg") -> str:
|
|
"""
|
|
Save raw audio bytes to the cache and return the absolute file path.
|
|
|
|
Args:
|
|
data: Raw audio bytes.
|
|
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
|
|
|
Returns:
|
|
Absolute path to the cached audio file as a string.
|
|
"""
|
|
cache_dir = get_audio_cache_dir()
|
|
filename = f"audio_{uuid.uuid4().hex[:12]}{ext}"
|
|
filepath = cache_dir / filename
|
|
filepath.write_bytes(data)
|
|
return str(filepath)
|
|
|
|
|
|
async def cache_audio_from_url(url: str, ext: str = ".ogg") -> str:
|
|
"""
|
|
Download an audio file from a URL and save it to the local cache.
|
|
|
|
Args:
|
|
url: The HTTP/HTTPS URL to download from.
|
|
ext: File extension including the dot (e.g. ".ogg", ".mp3").
|
|
|
|
Returns:
|
|
Absolute path to the cached audio file as a string.
|
|
"""
|
|
import httpx
|
|
|
|
async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client:
|
|
response = await client.get(
|
|
url,
|
|
headers={
|
|
"User-Agent": "Mozilla/5.0 (compatible; HermesAgent/1.0)",
|
|
"Accept": "audio/*,*/*;q=0.8",
|
|
},
|
|
)
|
|
response.raise_for_status()
|
|
return cache_audio_from_bytes(response.content, ext)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Document cache utilities
|
|
#
|
|
# Same pattern as image/audio cache -- documents from platforms are downloaded
|
|
# here so the agent can reference them by local file path.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
DOCUMENT_CACHE_DIR = Path(os.path.expanduser("~/.hermes/document_cache"))
|
|
|
|
SUPPORTED_DOCUMENT_TYPES = {
|
|
".pdf": "application/pdf",
|
|
".md": "text/markdown",
|
|
".txt": "text/plain",
|
|
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
}
|
|
|
|
|
|
def get_document_cache_dir() -> Path:
|
|
"""Return the document cache directory, creating it if it doesn't exist."""
|
|
DOCUMENT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
return DOCUMENT_CACHE_DIR
|
|
|
|
|
|
def cache_document_from_bytes(data: bytes, filename: str) -> str:
|
|
"""
|
|
Save raw document bytes to the cache and return the absolute file path.
|
|
|
|
The cached filename preserves the original human-readable name with a
|
|
unique prefix: ``doc_{uuid12}_{original_filename}``.
|
|
|
|
Args:
|
|
data: Raw document bytes.
|
|
filename: Original filename (e.g. "report.pdf").
|
|
|
|
Returns:
|
|
Absolute path to the cached document file as a string.
|
|
|
|
Raises:
|
|
ValueError: If the sanitized path escapes the cache directory.
|
|
"""
|
|
cache_dir = get_document_cache_dir()
|
|
# Sanitize: strip directory components, null bytes, and control characters
|
|
safe_name = Path(filename).name if filename else "document"
|
|
safe_name = safe_name.replace("\x00", "").strip()
|
|
if not safe_name or safe_name in (".", ".."):
|
|
safe_name = "document"
|
|
cached_name = f"doc_{uuid.uuid4().hex[:12]}_{safe_name}"
|
|
filepath = cache_dir / cached_name
|
|
# Final safety check: ensure path stays inside cache dir
|
|
if not filepath.resolve().is_relative_to(cache_dir.resolve()):
|
|
raise ValueError(f"Path traversal rejected: {filename!r}")
|
|
filepath.write_bytes(data)
|
|
return str(filepath)
|
|
|
|
|
|
def cleanup_document_cache(max_age_hours: int = 24) -> int:
|
|
"""
|
|
Delete cached documents older than *max_age_hours*.
|
|
|
|
Returns the number of files removed.
|
|
"""
|
|
import time
|
|
|
|
cache_dir = get_document_cache_dir()
|
|
cutoff = time.time() - (max_age_hours * 3600)
|
|
removed = 0
|
|
for f in cache_dir.iterdir():
|
|
if f.is_file() and f.stat().st_mtime < cutoff:
|
|
try:
|
|
f.unlink()
|
|
removed += 1
|
|
except OSError:
|
|
pass
|
|
return removed
|
|
|
|
|
|
class MessageType(Enum):
|
|
"""Types of incoming messages."""
|
|
TEXT = "text"
|
|
LOCATION = "location"
|
|
PHOTO = "photo"
|
|
VIDEO = "video"
|
|
AUDIO = "audio"
|
|
VOICE = "voice"
|
|
DOCUMENT = "document"
|
|
STICKER = "sticker"
|
|
COMMAND = "command" # /command style
|
|
|
|
|
|
@dataclass
|
|
class MessageEvent:
|
|
"""
|
|
Incoming message from a platform.
|
|
|
|
Normalized representation that all adapters produce.
|
|
"""
|
|
# Message content
|
|
text: str
|
|
message_type: MessageType = MessageType.TEXT
|
|
|
|
# Source information
|
|
source: SessionSource = None
|
|
|
|
# Original platform data
|
|
raw_message: Any = None
|
|
message_id: Optional[str] = None
|
|
|
|
# Media attachments
|
|
media_urls: List[str] = field(default_factory=list)
|
|
media_types: List[str] = field(default_factory=list)
|
|
|
|
# Reply context
|
|
reply_to_message_id: Optional[str] = None
|
|
|
|
# Timestamps
|
|
timestamp: datetime = field(default_factory=datetime.now)
|
|
|
|
def is_command(self) -> bool:
|
|
"""Check if this is a command message (e.g., /new, /reset)."""
|
|
return self.text.startswith("/")
|
|
|
|
def get_command(self) -> Optional[str]:
|
|
"""Extract command name if this is a command message."""
|
|
if not self.is_command():
|
|
return None
|
|
# Split on space and get first word, strip the /
|
|
parts = self.text.split(maxsplit=1)
|
|
return parts[0][1:].lower() if parts else None
|
|
|
|
def get_command_args(self) -> str:
|
|
"""Get the arguments after a command."""
|
|
if not self.is_command():
|
|
return self.text
|
|
parts = self.text.split(maxsplit=1)
|
|
return parts[1] if len(parts) > 1 else ""
|
|
|
|
|
|
@dataclass
|
|
class SendResult:
|
|
"""Result of sending a message."""
|
|
success: bool
|
|
message_id: Optional[str] = None
|
|
error: Optional[str] = None
|
|
raw_response: Any = None
|
|
|
|
|
|
# Type for message handlers
|
|
MessageHandler = Callable[[MessageEvent], Awaitable[Optional[str]]]
|
|
|
|
|
|
class BasePlatformAdapter(ABC):
|
|
"""
|
|
Base class for platform adapters.
|
|
|
|
Subclasses implement platform-specific logic for:
|
|
- Connecting and authenticating
|
|
- Receiving messages
|
|
- Sending messages/responses
|
|
- Handling media
|
|
"""
|
|
|
|
def __init__(self, config: PlatformConfig, platform: Platform):
|
|
self.config = config
|
|
self.platform = platform
|
|
self._message_handler: Optional[MessageHandler] = None
|
|
self._running = False
|
|
|
|
# Track active message handlers per session for interrupt support
|
|
# Key: session_key (e.g., chat_id), Value: (event, asyncio.Event for interrupt)
|
|
self._active_sessions: Dict[str, asyncio.Event] = {}
|
|
self._pending_messages: Dict[str, MessageEvent] = {}
|
|
|
|
@property
|
|
def name(self) -> str:
|
|
"""Human-readable name for this adapter."""
|
|
return self.platform.value.title()
|
|
|
|
@property
|
|
def is_connected(self) -> bool:
|
|
"""Check if adapter is currently connected."""
|
|
return self._running
|
|
|
|
def set_message_handler(self, handler: MessageHandler) -> None:
|
|
"""
|
|
Set the handler for incoming messages.
|
|
|
|
The handler receives a MessageEvent and should return
|
|
an optional response string.
|
|
"""
|
|
self._message_handler = handler
|
|
|
|
@abstractmethod
|
|
async def connect(self) -> bool:
|
|
"""
|
|
Connect to the platform and start receiving messages.
|
|
|
|
Returns True if connection was successful.
|
|
"""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def disconnect(self) -> None:
|
|
"""Disconnect from the platform."""
|
|
pass
|
|
|
|
@abstractmethod
|
|
async def send(
|
|
self,
|
|
chat_id: str,
|
|
content: str,
|
|
reply_to: Optional[str] = None,
|
|
metadata: Optional[Dict[str, Any]] = None
|
|
) -> SendResult:
|
|
"""
|
|
Send a message to a chat.
|
|
|
|
Args:
|
|
chat_id: The chat/channel ID to send to
|
|
content: Message content (may be markdown)
|
|
reply_to: Optional message ID to reply to
|
|
metadata: Additional platform-specific options
|
|
|
|
Returns:
|
|
SendResult with success status and message ID
|
|
"""
|
|
pass
|
|
|
|
async def edit_message(
|
|
self,
|
|
chat_id: str,
|
|
message_id: str,
|
|
content: str,
|
|
) -> SendResult:
|
|
"""
|
|
Edit a previously sent message. Optional — platforms that don't
|
|
support editing return success=False and callers fall back to
|
|
sending a new message.
|
|
"""
|
|
return SendResult(success=False, error="Not supported")
|
|
|
|
async def send_typing(self, chat_id: str, metadata=None) -> None:
|
|
"""
|
|
Send a typing indicator.
|
|
|
|
Override in subclasses if the platform supports it.
|
|
metadata: optional dict with platform-specific context (e.g. thread_id for Slack).
|
|
"""
|
|
pass
|
|
|
|
async def send_image(
|
|
self,
|
|
chat_id: str,
|
|
image_url: str,
|
|
caption: Optional[str] = None,
|
|
reply_to: Optional[str] = None,
|
|
) -> SendResult:
|
|
"""
|
|
Send an image natively via the platform API.
|
|
|
|
Override in subclasses to send images as proper attachments
|
|
instead of plain-text URLs. Default falls back to sending the
|
|
URL as a text message.
|
|
"""
|
|
# Fallback: send URL as text (subclasses override for native images)
|
|
text = f"{caption}\n{image_url}" if caption else image_url
|
|
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
|
|
|
async def send_animation(
|
|
self,
|
|
chat_id: str,
|
|
animation_url: str,
|
|
caption: Optional[str] = None,
|
|
reply_to: Optional[str] = None,
|
|
) -> SendResult:
|
|
"""
|
|
Send an animated GIF natively via the platform API.
|
|
|
|
Override in subclasses to send GIFs as proper animations
|
|
(e.g., Telegram send_animation) so they auto-play inline.
|
|
Default falls back to send_image.
|
|
"""
|
|
return await self.send_image(chat_id=chat_id, image_url=animation_url, caption=caption, reply_to=reply_to)
|
|
|
|
@staticmethod
|
|
def _is_animation_url(url: str) -> bool:
|
|
"""Check if a URL points to an animated GIF (vs a static image)."""
|
|
lower = url.lower().split('?')[0] # Strip query params
|
|
return lower.endswith('.gif')
|
|
|
|
@staticmethod
|
|
def extract_images(content: str) -> Tuple[List[Tuple[str, str]], str]:
|
|
"""
|
|
Extract image URLs from markdown and HTML image tags in a response.
|
|
|
|
Finds patterns like:
|
|
- 
|
|
- <img src="https://example.com/image.png">
|
|
- <img src="https://example.com/image.png"></img>
|
|
|
|
Args:
|
|
content: The response text to scan.
|
|
|
|
Returns:
|
|
Tuple of (list of (url, alt_text) pairs, cleaned content with image tags removed).
|
|
"""
|
|
images = []
|
|
cleaned = content
|
|
|
|
# Match markdown images: 
|
|
md_pattern = r'!\[([^\]]*)\]\((https?://[^\s\)]+)\)'
|
|
for match in re.finditer(md_pattern, content):
|
|
alt_text = match.group(1)
|
|
url = match.group(2)
|
|
# Only extract URLs that look like actual images
|
|
if any(url.lower().endswith(ext) or ext in url.lower() for ext in
|
|
['.png', '.jpg', '.jpeg', '.gif', '.webp', 'fal.media', 'fal-cdn', 'replicate.delivery']):
|
|
images.append((url, alt_text))
|
|
|
|
# Match HTML img tags: <img src="url"> or <img src="url"></img> or <img src="url"/>
|
|
html_pattern = r'<img\s+src=["\']?(https?://[^\s"\'<>]+)["\']?\s*/?>\s*(?:</img>)?'
|
|
for match in re.finditer(html_pattern, content):
|
|
url = match.group(1)
|
|
images.append((url, ""))
|
|
|
|
# Remove only the matched image tags from content (not all markdown images)
|
|
if images:
|
|
extracted_urls = {url for url, _ in images}
|
|
def _remove_if_extracted(match):
|
|
url = match.group(2) if match.lastindex >= 2 else match.group(1)
|
|
return '' if url in extracted_urls else match.group(0)
|
|
cleaned = re.sub(md_pattern, _remove_if_extracted, cleaned)
|
|
cleaned = re.sub(html_pattern, _remove_if_extracted, cleaned)
|
|
# Clean up leftover blank lines
|
|
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
|
|
|
return images, cleaned
|
|
|
|
async def send_voice(
|
|
self,
|
|
chat_id: str,
|
|
audio_path: str,
|
|
caption: Optional[str] = None,
|
|
reply_to: Optional[str] = None,
|
|
**kwargs,
|
|
) -> SendResult:
|
|
"""
|
|
Send an audio file as a native voice message via the platform API.
|
|
|
|
Override in subclasses to send audio as voice bubbles (Telegram)
|
|
or file attachments (Discord). Default falls back to sending the
|
|
file path as text.
|
|
"""
|
|
text = f"🔊 Audio: {audio_path}"
|
|
if caption:
|
|
text = f"{caption}\n{text}"
|
|
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
|
|
|
async def send_video(
|
|
self,
|
|
chat_id: str,
|
|
video_path: str,
|
|
caption: Optional[str] = None,
|
|
reply_to: Optional[str] = None,
|
|
**kwargs,
|
|
) -> SendResult:
|
|
"""
|
|
Send a video natively via the platform API.
|
|
|
|
Override in subclasses to send videos as inline playable media.
|
|
Default falls back to sending the file path as text.
|
|
"""
|
|
text = f"🎬 Video: {video_path}"
|
|
if caption:
|
|
text = f"{caption}\n{text}"
|
|
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
|
|
|
async def send_document(
|
|
self,
|
|
chat_id: str,
|
|
file_path: str,
|
|
caption: Optional[str] = None,
|
|
file_name: Optional[str] = None,
|
|
reply_to: Optional[str] = None,
|
|
**kwargs,
|
|
) -> SendResult:
|
|
"""
|
|
Send a document/file natively via the platform API.
|
|
|
|
Override in subclasses to send files as downloadable attachments.
|
|
Default falls back to sending the file path as text.
|
|
"""
|
|
text = f"📎 File: {file_path}"
|
|
if caption:
|
|
text = f"{caption}\n{text}"
|
|
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
|
|
|
async def send_image_file(
|
|
self,
|
|
chat_id: str,
|
|
image_path: str,
|
|
caption: Optional[str] = None,
|
|
reply_to: Optional[str] = None,
|
|
**kwargs,
|
|
) -> SendResult:
|
|
"""
|
|
Send a local image file natively via the platform API.
|
|
|
|
Unlike send_image() which takes a URL, this takes a local file path.
|
|
Override in subclasses for native photo attachments.
|
|
Default falls back to sending the file path as text.
|
|
"""
|
|
text = f"🖼️ Image: {image_path}"
|
|
if caption:
|
|
text = f"{caption}\n{text}"
|
|
return await self.send(chat_id=chat_id, content=text, reply_to=reply_to)
|
|
|
|
@staticmethod
|
|
def extract_media(content: str) -> Tuple[List[Tuple[str, bool]], str]:
|
|
"""
|
|
Extract MEDIA:<path> tags and [[audio_as_voice]] directives from response text.
|
|
|
|
The TTS tool returns responses like:
|
|
[[audio_as_voice]]
|
|
MEDIA:/path/to/audio.ogg
|
|
|
|
Args:
|
|
content: The response text to scan.
|
|
|
|
Returns:
|
|
Tuple of (list of (path, is_voice) pairs, cleaned content with tags removed).
|
|
"""
|
|
media = []
|
|
cleaned = content
|
|
|
|
# Check for [[audio_as_voice]] directive
|
|
has_voice_tag = "[[audio_as_voice]]" in content
|
|
cleaned = cleaned.replace("[[audio_as_voice]]", "")
|
|
|
|
# Extract MEDIA:<path> tags (path may contain spaces)
|
|
media_pattern = r'MEDIA:(\S+)'
|
|
for match in re.finditer(media_pattern, content):
|
|
path = match.group(1).strip()
|
|
if path:
|
|
media.append((path, has_voice_tag))
|
|
|
|
# Remove MEDIA tags from content
|
|
if media:
|
|
cleaned = re.sub(media_pattern, '', cleaned)
|
|
cleaned = re.sub(r'\n{3,}', '\n\n', cleaned).strip()
|
|
|
|
return media, cleaned
|
|
|
|
async def _keep_typing(self, chat_id: str, interval: float = 2.0, metadata=None) -> None:
|
|
"""
|
|
Continuously send typing indicator until cancelled.
|
|
|
|
Telegram/Discord typing status expires after ~5 seconds, so we refresh every 2
|
|
to recover quickly after progress messages interrupt it.
|
|
"""
|
|
try:
|
|
while True:
|
|
await self.send_typing(chat_id, metadata=metadata)
|
|
await asyncio.sleep(interval)
|
|
except asyncio.CancelledError:
|
|
pass # Normal cancellation when handler completes
|
|
|
|
async def handle_message(self, event: MessageEvent) -> None:
|
|
"""
|
|
Process an incoming message.
|
|
|
|
This method returns quickly by spawning background tasks.
|
|
This allows new messages to be processed even while an agent is running,
|
|
enabling interruption support.
|
|
"""
|
|
if not self._message_handler:
|
|
return
|
|
|
|
session_key = build_session_key(event.source)
|
|
|
|
# Check if there's already an active handler for this session
|
|
if session_key in self._active_sessions:
|
|
# Store this as a pending message - it will interrupt the running agent
|
|
print(f"[{self.name}] ⚡ New message while session {session_key} is active - triggering interrupt")
|
|
self._pending_messages[session_key] = event
|
|
# Signal the interrupt (the processing task checks this)
|
|
self._active_sessions[session_key].set()
|
|
return # Don't process now - will be handled after current task finishes
|
|
|
|
# Spawn background task to process this message
|
|
asyncio.create_task(self._process_message_background(event, session_key))
|
|
|
|
@staticmethod
|
|
def _get_human_delay() -> float:
|
|
"""
|
|
Return a random delay in seconds for human-like response pacing.
|
|
|
|
Reads from env vars:
|
|
HERMES_HUMAN_DELAY_MODE: "off" (default) | "natural" | "custom"
|
|
HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode)
|
|
HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode)
|
|
"""
|
|
import random
|
|
|
|
mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower()
|
|
if mode == "off":
|
|
return 0.0
|
|
min_ms = int(os.getenv("HERMES_HUMAN_DELAY_MIN_MS", "800"))
|
|
max_ms = int(os.getenv("HERMES_HUMAN_DELAY_MAX_MS", "2500"))
|
|
if mode == "natural":
|
|
min_ms, max_ms = 800, 2500
|
|
return random.uniform(min_ms / 1000.0, max_ms / 1000.0)
|
|
|
|
async def _process_message_background(self, event: MessageEvent, session_key: str) -> None:
|
|
"""Background task that actually processes the message."""
|
|
# Create interrupt event for this session
|
|
interrupt_event = asyncio.Event()
|
|
self._active_sessions[session_key] = interrupt_event
|
|
|
|
# Start continuous typing indicator (refreshes every 2 seconds)
|
|
_thread_metadata = {"thread_id": event.source.thread_id} if event.source.thread_id else None
|
|
typing_task = asyncio.create_task(self._keep_typing(event.source.chat_id, metadata=_thread_metadata))
|
|
|
|
try:
|
|
# Call the handler (this can take a while with tool calls)
|
|
response = await self._message_handler(event)
|
|
|
|
# Send response if any
|
|
if not response:
|
|
logger.warning("[%s] Handler returned empty/None response for %s", self.name, event.source.chat_id)
|
|
if response:
|
|
# Extract MEDIA:<path> tags (from TTS tool) before other processing
|
|
media_files, response = self.extract_media(response)
|
|
|
|
# Extract image URLs and send them as native platform attachments
|
|
images, text_content = self.extract_images(response)
|
|
if images:
|
|
logger.info("[%s] extract_images found %d image(s) in response (%d chars)", self.name, len(images), len(response))
|
|
|
|
# Send the text portion first (if any remains after extractions)
|
|
if text_content:
|
|
logger.info("[%s] Sending response (%d chars) to %s", self.name, len(text_content), event.source.chat_id)
|
|
result = await self.send(
|
|
chat_id=event.source.chat_id,
|
|
content=text_content,
|
|
reply_to=event.message_id,
|
|
metadata=_thread_metadata,
|
|
)
|
|
|
|
# Log send failures (don't raise - user already saw tool progress)
|
|
if not result.success:
|
|
print(f"[{self.name}] Failed to send response: {result.error}")
|
|
# Try sending without markdown as fallback
|
|
fallback_result = await self.send(
|
|
chat_id=event.source.chat_id,
|
|
content=f"(Response formatting failed, plain text:)\n\n{text_content[:3500]}",
|
|
reply_to=event.message_id,
|
|
metadata=_thread_metadata,
|
|
)
|
|
if not fallback_result.success:
|
|
print(f"[{self.name}] Fallback send also failed: {fallback_result.error}")
|
|
|
|
# Human-like pacing delay between text and media
|
|
human_delay = self._get_human_delay()
|
|
|
|
# Send extracted images as native attachments
|
|
if images:
|
|
logger.info("[%s] Extracted %d image(s) to send as attachments", self.name, len(images))
|
|
for image_url, alt_text in images:
|
|
if human_delay > 0:
|
|
await asyncio.sleep(human_delay)
|
|
try:
|
|
logger.info("[%s] Sending image: %s (alt=%s)", self.name, image_url[:80], alt_text[:30] if alt_text else "")
|
|
# Route animated GIFs through send_animation for proper playback
|
|
if self._is_animation_url(image_url):
|
|
img_result = await self.send_animation(
|
|
chat_id=event.source.chat_id,
|
|
animation_url=image_url,
|
|
caption=alt_text if alt_text else None,
|
|
metadata=_thread_metadata,
|
|
)
|
|
else:
|
|
img_result = await self.send_image(
|
|
chat_id=event.source.chat_id,
|
|
image_url=image_url,
|
|
caption=alt_text if alt_text else None,
|
|
metadata=_thread_metadata,
|
|
)
|
|
if not img_result.success:
|
|
logger.error("[%s] Failed to send image: %s", self.name, img_result.error)
|
|
except Exception as img_err:
|
|
logger.error("[%s] Error sending image: %s", self.name, img_err, exc_info=True)
|
|
|
|
# Send extracted media files — route by file type
|
|
_AUDIO_EXTS = {'.ogg', '.opus', '.mp3', '.wav', '.m4a'}
|
|
_VIDEO_EXTS = {'.mp4', '.mov', '.avi', '.mkv', '.3gp'}
|
|
_IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.webp', '.gif'}
|
|
|
|
for media_path, is_voice in media_files:
|
|
if human_delay > 0:
|
|
await asyncio.sleep(human_delay)
|
|
try:
|
|
ext = Path(media_path).suffix.lower()
|
|
if ext in _AUDIO_EXTS:
|
|
media_result = await self.send_voice(
|
|
chat_id=event.source.chat_id,
|
|
audio_path=media_path,
|
|
metadata=_thread_metadata,
|
|
)
|
|
elif ext in _VIDEO_EXTS:
|
|
media_result = await self.send_video(
|
|
chat_id=event.source.chat_id,
|
|
video_path=media_path,
|
|
metadata=_thread_metadata,
|
|
)
|
|
elif ext in _IMAGE_EXTS:
|
|
media_result = await self.send_image_file(
|
|
chat_id=event.source.chat_id,
|
|
image_path=media_path,
|
|
metadata=_thread_metadata,
|
|
)
|
|
else:
|
|
media_result = await self.send_document(
|
|
chat_id=event.source.chat_id,
|
|
file_path=media_path,
|
|
metadata=_thread_metadata,
|
|
)
|
|
|
|
if not media_result.success:
|
|
print(f"[{self.name}] Failed to send media ({ext}): {media_result.error}")
|
|
except Exception as media_err:
|
|
print(f"[{self.name}] Error sending media: {media_err}")
|
|
|
|
# Check if there's a pending message that was queued during our processing
|
|
if session_key in self._pending_messages:
|
|
pending_event = self._pending_messages.pop(session_key)
|
|
print(f"[{self.name}] 📨 Processing queued message from interrupt")
|
|
# Clean up current session before processing pending
|
|
if session_key in self._active_sessions:
|
|
del self._active_sessions[session_key]
|
|
typing_task.cancel()
|
|
try:
|
|
await typing_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
# Process pending message in new background task
|
|
await self._process_message_background(pending_event, session_key)
|
|
return # Already cleaned up
|
|
|
|
except Exception as e:
|
|
print(f"[{self.name}] Error handling message: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
finally:
|
|
# Stop typing indicator
|
|
typing_task.cancel()
|
|
try:
|
|
await typing_task
|
|
except asyncio.CancelledError:
|
|
pass
|
|
# Clean up session tracking
|
|
if session_key in self._active_sessions:
|
|
del self._active_sessions[session_key]
|
|
|
|
def has_pending_interrupt(self, session_key: str) -> bool:
|
|
"""Check if there's a pending interrupt for a session."""
|
|
return session_key in self._active_sessions and self._active_sessions[session_key].is_set()
|
|
|
|
def get_pending_message(self, session_key: str) -> Optional[MessageEvent]:
|
|
"""Get and clear any pending message for a session."""
|
|
return self._pending_messages.pop(session_key, None)
|
|
|
|
def build_source(
|
|
self,
|
|
chat_id: str,
|
|
chat_name: Optional[str] = None,
|
|
chat_type: str = "dm",
|
|
user_id: Optional[str] = None,
|
|
user_name: Optional[str] = None,
|
|
thread_id: Optional[str] = None,
|
|
chat_topic: Optional[str] = None,
|
|
user_id_alt: Optional[str] = None,
|
|
chat_id_alt: Optional[str] = None,
|
|
) -> SessionSource:
|
|
"""Helper to build a SessionSource for this platform."""
|
|
# Normalize empty topic to None
|
|
if chat_topic is not None and not chat_topic.strip():
|
|
chat_topic = None
|
|
return SessionSource(
|
|
platform=self.platform,
|
|
chat_id=str(chat_id),
|
|
chat_name=chat_name,
|
|
chat_type=chat_type,
|
|
user_id=str(user_id) if user_id else None,
|
|
user_name=user_name,
|
|
thread_id=str(thread_id) if thread_id else None,
|
|
chat_topic=chat_topic.strip() if chat_topic else None,
|
|
user_id_alt=user_id_alt,
|
|
chat_id_alt=chat_id_alt,
|
|
)
|
|
|
|
@abstractmethod
|
|
async def get_chat_info(self, chat_id: str) -> Dict[str, Any]:
|
|
"""
|
|
Get information about a chat/channel.
|
|
|
|
Returns dict with at least:
|
|
- name: Chat name
|
|
- type: "dm", "group", "channel"
|
|
"""
|
|
pass
|
|
|
|
def format_message(self, content: str) -> str:
|
|
"""
|
|
Format a message for this platform.
|
|
|
|
Override in subclasses to handle platform-specific formatting
|
|
(e.g., Telegram MarkdownV2, Discord markdown).
|
|
|
|
Default implementation returns content as-is.
|
|
"""
|
|
return content
|
|
|
|
def truncate_message(self, content: str, max_length: int = 4096) -> List[str]:
|
|
"""
|
|
Split a long message into chunks, preserving code block boundaries.
|
|
|
|
When a split falls inside a triple-backtick code block, the fence is
|
|
closed at the end of the current chunk and reopened (with the original
|
|
language tag) at the start of the next chunk. Multi-chunk responses
|
|
receive indicators like ``(1/3)``.
|
|
|
|
Args:
|
|
content: The full message content
|
|
max_length: Maximum length per chunk (platform-specific)
|
|
|
|
Returns:
|
|
List of message chunks
|
|
"""
|
|
if len(content) <= max_length:
|
|
return [content]
|
|
|
|
INDICATOR_RESERVE = 10 # room for " (XX/XX)"
|
|
FENCE_CLOSE = "\n```"
|
|
|
|
chunks: List[str] = []
|
|
remaining = content
|
|
# When the previous chunk ended mid-code-block, this holds the
|
|
# language tag (possibly "") so we can reopen the fence.
|
|
carry_lang: Optional[str] = None
|
|
|
|
while remaining:
|
|
# If we're continuing a code block from the previous chunk,
|
|
# prepend a new opening fence with the same language tag.
|
|
prefix = f"```{carry_lang}\n" if carry_lang is not None else ""
|
|
|
|
# How much body text we can fit after accounting for the prefix,
|
|
# a potential closing fence, and the chunk indicator.
|
|
headroom = max_length - INDICATOR_RESERVE - len(prefix) - len(FENCE_CLOSE)
|
|
if headroom < 1:
|
|
headroom = max_length // 2
|
|
|
|
# Everything remaining fits in one final chunk
|
|
if len(prefix) + len(remaining) <= max_length - INDICATOR_RESERVE:
|
|
chunks.append(prefix + remaining)
|
|
break
|
|
|
|
# Find a natural split point (prefer newlines, then spaces)
|
|
region = remaining[:headroom]
|
|
split_at = region.rfind("\n")
|
|
if split_at < headroom // 2:
|
|
split_at = region.rfind(" ")
|
|
if split_at < 1:
|
|
split_at = headroom
|
|
|
|
chunk_body = remaining[:split_at]
|
|
remaining = remaining[split_at:].lstrip()
|
|
|
|
full_chunk = prefix + chunk_body
|
|
|
|
# Walk only the chunk_body (not the prefix we prepended) to
|
|
# determine whether we end inside an open code block.
|
|
in_code = carry_lang is not None
|
|
lang = carry_lang or ""
|
|
for line in chunk_body.split("\n"):
|
|
stripped = line.strip()
|
|
if stripped.startswith("```"):
|
|
if in_code:
|
|
in_code = False
|
|
lang = ""
|
|
else:
|
|
in_code = True
|
|
tag = stripped[3:].strip()
|
|
lang = tag.split()[0] if tag else ""
|
|
|
|
if in_code:
|
|
# Close the orphaned fence so the chunk is valid on its own
|
|
full_chunk += FENCE_CLOSE
|
|
carry_lang = lang
|
|
else:
|
|
carry_lang = None
|
|
|
|
chunks.append(full_chunk)
|
|
|
|
# Append chunk indicators when the response spans multiple messages
|
|
if len(chunks) > 1:
|
|
total = len(chunks)
|
|
chunks = [
|
|
f"{chunk} ({i + 1}/{total})" for i, chunk in enumerate(chunks)
|
|
]
|
|
|
|
return chunks
|