Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
112 lines
3.0 KiB
Python
112 lines
3.0 KiB
Python
"""
|
|
Sticker description cache for Telegram.
|
|
|
|
When users send stickers, we describe them via the vision tool and cache
|
|
the descriptions keyed by file_unique_id so we don't re-analyze the same
|
|
sticker image on every send. Descriptions are concise (1-2 sentences).
|
|
|
|
Cache location: ~/.hermes/sticker_cache.json
|
|
"""
|
|
|
|
import json
|
|
import os
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
|
|
CACHE_PATH = Path(os.path.expanduser("~/.hermes/sticker_cache.json"))
|
|
|
|
# Vision prompt for describing stickers -- kept concise to save tokens
|
|
STICKER_VISION_PROMPT = (
|
|
"Describe this sticker in 1-2 sentences. Focus on what it depicts -- "
|
|
"character, action, emotion. Be concise and objective."
|
|
)
|
|
|
|
|
|
def _load_cache() -> dict:
|
|
"""Load the sticker cache from disk."""
|
|
if CACHE_PATH.exists():
|
|
try:
|
|
return json.loads(CACHE_PATH.read_text(encoding="utf-8"))
|
|
except (json.JSONDecodeError, OSError):
|
|
return {}
|
|
return {}
|
|
|
|
|
|
def _save_cache(cache: dict) -> None:
|
|
"""Save the sticker cache to disk."""
|
|
CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
CACHE_PATH.write_text(
|
|
json.dumps(cache, indent=2, ensure_ascii=False),
|
|
encoding="utf-8",
|
|
)
|
|
|
|
|
|
def get_cached_description(file_unique_id: str) -> Optional[dict]:
|
|
"""
|
|
Look up a cached sticker description.
|
|
|
|
Returns:
|
|
dict with keys {description, emoji, set_name, cached_at} or None.
|
|
"""
|
|
cache = _load_cache()
|
|
return cache.get(file_unique_id)
|
|
|
|
|
|
def cache_sticker_description(
|
|
file_unique_id: str,
|
|
description: str,
|
|
emoji: str = "",
|
|
set_name: str = "",
|
|
) -> None:
|
|
"""
|
|
Store a sticker description in the cache.
|
|
|
|
Args:
|
|
file_unique_id: Telegram's stable sticker identifier.
|
|
description: Vision-generated description text.
|
|
emoji: Associated emoji (e.g. "😀").
|
|
set_name: Sticker set name if available.
|
|
"""
|
|
cache = _load_cache()
|
|
cache[file_unique_id] = {
|
|
"description": description,
|
|
"emoji": emoji,
|
|
"set_name": set_name,
|
|
"cached_at": time.time(),
|
|
}
|
|
_save_cache(cache)
|
|
|
|
|
|
def build_sticker_injection(
|
|
description: str,
|
|
emoji: str = "",
|
|
set_name: str = "",
|
|
) -> str:
|
|
"""
|
|
Build the warm-style injection text for a sticker description.
|
|
|
|
Returns a string like:
|
|
[The user sent a sticker 😀 from "MyPack"~ It shows: "A cat waving" (=^.w.^=)]
|
|
"""
|
|
context = ""
|
|
if set_name and emoji:
|
|
context = f" {emoji} from \"{set_name}\""
|
|
elif emoji:
|
|
context = f" {emoji}"
|
|
|
|
return f"[The user sent a sticker{context}~ It shows: \"{description}\" (=^.w.^=)]"
|
|
|
|
|
|
def build_animated_sticker_injection(emoji: str = "") -> str:
|
|
"""
|
|
Build injection text for animated/video stickers we can't analyze.
|
|
"""
|
|
if emoji:
|
|
return (
|
|
f"[The user sent an animated sticker {emoji}~ "
|
|
f"I can't see animated ones yet, but the emoji suggests: {emoji}]"
|
|
)
|
|
return "[The user sent an animated sticker~ I can't see animated ones yet]"
|