2026-02-02 19:01:51 -08:00
|
|
|
"""
|
|
|
|
|
Gateway runner - entry point for messaging platform integrations.
|
|
|
|
|
|
|
|
|
|
This module provides:
|
|
|
|
|
- start_gateway(): Start all configured platform adapters
|
|
|
|
|
- GatewayRunner: Main class managing the gateway lifecycle
|
|
|
|
|
|
|
|
|
|
Usage:
|
|
|
|
|
# Start the gateway
|
|
|
|
|
python -m gateway.run
|
|
|
|
|
|
|
|
|
|
# Or from CLI
|
|
|
|
|
python cli.py --gateway
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
import asyncio
|
2026-02-21 03:11:11 -08:00
|
|
|
import logging
|
2026-02-02 19:01:51 -08:00
|
|
|
import os
|
2026-02-14 16:08:14 -08:00
|
|
|
import re
|
2026-02-02 19:01:51 -08:00
|
|
|
import sys
|
|
|
|
|
import signal
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from typing import Dict, Optional, Any, List
|
|
|
|
|
|
|
|
|
|
# Add parent directory to path
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
|
|
2026-02-03 10:46:23 -08:00
|
|
|
# Load environment variables from ~/.hermes/.env first
|
2026-02-03 07:02:59 -08:00
|
|
|
from dotenv import load_dotenv
|
|
|
|
|
_env_path = Path.home() / '.hermes' / '.env'
|
|
|
|
|
if _env_path.exists():
|
|
|
|
|
load_dotenv(_env_path)
|
|
|
|
|
# Also try project .env as fallback
|
|
|
|
|
load_dotenv()
|
|
|
|
|
|
2026-02-03 10:46:23 -08:00
|
|
|
# Gateway runs in quiet mode - suppress debug output and use cwd directly (no temp dirs)
|
|
|
|
|
os.environ["HERMES_QUIET"] = "1"
|
|
|
|
|
|
2026-02-12 10:05:08 -08:00
|
|
|
# Enable interactive exec approval for dangerous commands on messaging platforms
|
|
|
|
|
os.environ["HERMES_EXEC_ASK"] = "1"
|
|
|
|
|
|
2026-02-03 10:46:23 -08:00
|
|
|
# Set terminal working directory for messaging platforms
|
|
|
|
|
# Uses MESSAGING_CWD if set, otherwise defaults to home directory
|
|
|
|
|
# This is separate from CLI which uses the directory where `hermes` is run
|
|
|
|
|
messaging_cwd = os.getenv("MESSAGING_CWD") or str(Path.home())
|
|
|
|
|
os.environ["TERMINAL_CWD"] = messaging_cwd
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
from gateway.config import (
|
|
|
|
|
Platform,
|
|
|
|
|
GatewayConfig,
|
|
|
|
|
load_gateway_config,
|
|
|
|
|
)
|
|
|
|
|
from gateway.session import (
|
|
|
|
|
SessionStore,
|
|
|
|
|
SessionSource,
|
|
|
|
|
SessionContext,
|
|
|
|
|
build_session_context,
|
|
|
|
|
build_session_context_prompt,
|
|
|
|
|
)
|
|
|
|
|
from gateway.delivery import DeliveryRouter, DeliveryTarget
|
2026-02-15 16:10:50 -08:00
|
|
|
from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageType
|
2026-02-02 19:01:51 -08:00
|
|
|
|
2026-02-21 03:11:11 -08:00
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
class GatewayRunner:
|
|
|
|
|
"""
|
|
|
|
|
Main gateway controller.
|
|
|
|
|
|
|
|
|
|
Manages the lifecycle of all platform adapters and routes
|
|
|
|
|
messages to/from the agent.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def __init__(self, config: Optional[GatewayConfig] = None):
|
|
|
|
|
self.config = config or load_gateway_config()
|
|
|
|
|
self.adapters: Dict[Platform, BasePlatformAdapter] = {}
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
|
|
|
|
# Wire process registry into session store for reset protection
|
|
|
|
|
from tools.process_registry import process_registry
|
|
|
|
|
self.session_store = SessionStore(
|
|
|
|
|
self.config.sessions_dir, self.config,
|
|
|
|
|
has_active_processes_fn=lambda key: process_registry.has_active_for_session(key),
|
|
|
|
|
)
|
2026-02-02 19:01:51 -08:00
|
|
|
self.delivery_router = DeliveryRouter(self.config)
|
|
|
|
|
self._running = False
|
|
|
|
|
self._shutdown_event = asyncio.Event()
|
2026-02-03 16:15:49 -08:00
|
|
|
|
|
|
|
|
# Track running agents per session for interrupt support
|
|
|
|
|
# Key: session_key, Value: AIAgent instance
|
|
|
|
|
self._running_agents: Dict[str, Any] = {}
|
|
|
|
|
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
|
2026-02-12 10:05:08 -08:00
|
|
|
|
|
|
|
|
# Track pending exec approvals per session
|
|
|
|
|
# Key: session_key, Value: {"command": str, "pattern_key": str}
|
|
|
|
|
self._pending_approvals: Dict[str, Dict[str, str]] = {}
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
|
|
|
|
# DM pairing store for code-based user authorization
|
|
|
|
|
from gateway.pairing import PairingStore
|
|
|
|
|
self.pairing_store = PairingStore()
|
|
|
|
|
|
|
|
|
|
# Event hook system
|
|
|
|
|
from gateway.hooks import HookRegistry
|
|
|
|
|
self.hooks = HookRegistry()
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
async def start(self) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Start the gateway and all configured platform adapters.
|
|
|
|
|
|
|
|
|
|
Returns True if at least one adapter connected successfully.
|
|
|
|
|
"""
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("Starting Hermes Gateway...")
|
|
|
|
|
logger.info("Session storage: %s", self.config.sessions_dir)
|
2026-02-02 19:01:51 -08:00
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
# Discover and load event hooks
|
|
|
|
|
self.hooks.discover_and_load()
|
|
|
|
|
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
# Recover background processes from checkpoint (crash recovery)
|
|
|
|
|
try:
|
|
|
|
|
from tools.process_registry import process_registry
|
|
|
|
|
recovered = process_registry.recover_from_checkpoint()
|
|
|
|
|
if recovered:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("Recovered %s background process(es) from previous run", recovered)
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("Process checkpoint recovery: %s", e)
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
connected_count = 0
|
|
|
|
|
|
|
|
|
|
# Initialize and connect each configured platform
|
|
|
|
|
for platform, platform_config in self.config.platforms.items():
|
|
|
|
|
if not platform_config.enabled:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
adapter = self._create_adapter(platform, platform_config)
|
|
|
|
|
if not adapter:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("No adapter available for %s", platform.value)
|
2026-02-02 19:01:51 -08:00
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Set up message handler
|
|
|
|
|
adapter.set_message_handler(self._handle_message)
|
|
|
|
|
|
|
|
|
|
# Try to connect
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("Connecting to %s...", platform.value)
|
2026-02-02 19:01:51 -08:00
|
|
|
try:
|
|
|
|
|
success = await adapter.connect()
|
|
|
|
|
if success:
|
|
|
|
|
self.adapters[platform] = adapter
|
|
|
|
|
connected_count += 1
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("✓ %s connected", platform.value)
|
2026-02-02 19:01:51 -08:00
|
|
|
else:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("✗ %s failed to connect", platform.value)
|
2026-02-02 19:01:51 -08:00
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("✗ %s error: %s", platform.value, e)
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
if connected_count == 0:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("No platforms connected. Check your configuration.")
|
2026-02-02 19:01:51 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
# Update delivery router with adapters
|
|
|
|
|
self.delivery_router.adapters = self.adapters
|
|
|
|
|
|
|
|
|
|
self._running = True
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
|
|
|
|
|
# Emit gateway:startup hook
|
|
|
|
|
hook_count = len(self.hooks.loaded_hooks)
|
|
|
|
|
if hook_count:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("%s hook(s) loaded", hook_count)
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
await self.hooks.emit("gateway:startup", {
|
|
|
|
|
"platforms": [p.value for p in self.adapters.keys()],
|
|
|
|
|
})
|
|
|
|
|
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("Gateway running with %s platform(s)", connected_count)
|
|
|
|
|
logger.info("Press Ctrl+C to stop")
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
async def stop(self) -> None:
|
|
|
|
|
"""Stop the gateway and disconnect all adapters."""
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("Stopping gateway...")
|
2026-02-02 19:01:51 -08:00
|
|
|
self._running = False
|
|
|
|
|
|
|
|
|
|
for platform, adapter in self.adapters.items():
|
|
|
|
|
try:
|
|
|
|
|
await adapter.disconnect()
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("✓ %s disconnected", platform.value)
|
2026-02-02 19:01:51 -08:00
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("✗ %s disconnect error: %s", platform.value, e)
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
self.adapters.clear()
|
|
|
|
|
self._shutdown_event.set()
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("Gateway stopped")
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
async def wait_for_shutdown(self) -> None:
|
|
|
|
|
"""Wait for shutdown signal."""
|
|
|
|
|
await self._shutdown_event.wait()
|
|
|
|
|
|
|
|
|
|
def _create_adapter(
|
|
|
|
|
self,
|
|
|
|
|
platform: Platform,
|
|
|
|
|
config: Any
|
|
|
|
|
) -> Optional[BasePlatformAdapter]:
|
|
|
|
|
"""Create the appropriate adapter for a platform."""
|
|
|
|
|
if platform == Platform.TELEGRAM:
|
|
|
|
|
from gateway.platforms.telegram import TelegramAdapter, check_telegram_requirements
|
|
|
|
|
if not check_telegram_requirements():
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("Telegram: python-telegram-bot not installed")
|
2026-02-02 19:01:51 -08:00
|
|
|
return None
|
|
|
|
|
return TelegramAdapter(config)
|
|
|
|
|
|
|
|
|
|
elif platform == Platform.DISCORD:
|
|
|
|
|
from gateway.platforms.discord import DiscordAdapter, check_discord_requirements
|
|
|
|
|
if not check_discord_requirements():
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("Discord: discord.py not installed")
|
2026-02-02 19:01:51 -08:00
|
|
|
return None
|
|
|
|
|
return DiscordAdapter(config)
|
|
|
|
|
|
|
|
|
|
elif platform == Platform.WHATSAPP:
|
|
|
|
|
from gateway.platforms.whatsapp import WhatsAppAdapter, check_whatsapp_requirements
|
|
|
|
|
if not check_whatsapp_requirements():
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("WhatsApp: Node.js not installed or bridge not configured")
|
2026-02-02 19:01:51 -08:00
|
|
|
return None
|
|
|
|
|
return WhatsAppAdapter(config)
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
elif platform == Platform.SLACK:
|
|
|
|
|
from gateway.platforms.slack import SlackAdapter, check_slack_requirements
|
|
|
|
|
if not check_slack_requirements():
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("Slack: slack-bolt not installed. Run: pip install 'hermes-agent[slack]'")
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
return None
|
|
|
|
|
return SlackAdapter(config)
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
return None
|
|
|
|
|
|
2026-02-03 10:46:23 -08:00
|
|
|
def _is_user_authorized(self, source: SessionSource) -> bool:
|
|
|
|
|
"""
|
|
|
|
|
Check if a user is authorized to use the bot.
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
Checks in order:
|
|
|
|
|
1. Environment variable allowlists (TELEGRAM_ALLOWED_USERS, etc.)
|
|
|
|
|
2. DM pairing approved list
|
|
|
|
|
3. If no allowlists AND no pairing approvals exist, allow all (open access)
|
2026-02-03 10:46:23 -08:00
|
|
|
"""
|
|
|
|
|
user_id = source.user_id
|
|
|
|
|
if not user_id:
|
|
|
|
|
return False # Can't verify unknown users
|
|
|
|
|
|
|
|
|
|
# Check platform-specific allowlist first
|
|
|
|
|
platform_env_map = {
|
|
|
|
|
Platform.TELEGRAM: "TELEGRAM_ALLOWED_USERS",
|
|
|
|
|
Platform.DISCORD: "DISCORD_ALLOWED_USERS",
|
|
|
|
|
Platform.WHATSAPP: "WHATSAPP_ALLOWED_USERS",
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
Platform.SLACK: "SLACK_ALLOWED_USERS",
|
2026-02-03 10:46:23 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
platform_allowlist = os.getenv(platform_env_map.get(source.platform, ""))
|
|
|
|
|
global_allowlist = os.getenv("GATEWAY_ALLOWED_USERS", "")
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
# Check pairing store (always checked, regardless of allowlists)
|
|
|
|
|
platform_name = source.platform.value if source.platform else ""
|
|
|
|
|
if self.pairing_store.is_approved(platform_name, user_id):
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# If no allowlists configured and no pairing approvals, allow all (backward compatible)
|
2026-02-03 10:46:23 -08:00
|
|
|
if not platform_allowlist and not global_allowlist:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# Check if user is in any allowlist
|
|
|
|
|
allowed_ids = set()
|
|
|
|
|
if platform_allowlist:
|
|
|
|
|
allowed_ids.update(uid.strip() for uid in platform_allowlist.split(","))
|
|
|
|
|
if global_allowlist:
|
|
|
|
|
allowed_ids.update(uid.strip() for uid in global_allowlist.split(","))
|
|
|
|
|
|
|
|
|
|
return user_id in allowed_ids
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
async def _handle_message(self, event: MessageEvent) -> Optional[str]:
|
|
|
|
|
"""
|
|
|
|
|
Handle an incoming message from any platform.
|
|
|
|
|
|
|
|
|
|
This is the core message processing pipeline:
|
2026-02-03 10:46:23 -08:00
|
|
|
1. Check user authorization
|
|
|
|
|
2. Check for commands (/new, /reset, etc.)
|
2026-02-03 16:15:49 -08:00
|
|
|
3. Check for running agent and interrupt if needed
|
|
|
|
|
4. Get or create session
|
|
|
|
|
5. Build context for agent
|
|
|
|
|
6. Run agent conversation
|
|
|
|
|
7. Return response
|
2026-02-02 19:01:51 -08:00
|
|
|
"""
|
|
|
|
|
source = event.source
|
|
|
|
|
|
2026-02-03 10:46:23 -08:00
|
|
|
# Check if user is authorized
|
|
|
|
|
if not self._is_user_authorized(source):
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.warning("Unauthorized user: %s (%s) on %s", source.user_id, source.user_name, source.platform.value)
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
# In DMs: offer pairing code. In groups: silently ignore.
|
|
|
|
|
if source.chat_type == "dm":
|
|
|
|
|
platform_name = source.platform.value if source.platform else "unknown"
|
|
|
|
|
code = self.pairing_store.generate_code(
|
|
|
|
|
platform_name, source.user_id, source.user_name or ""
|
|
|
|
|
)
|
|
|
|
|
if code:
|
|
|
|
|
adapter = self.adapters.get(source.platform)
|
|
|
|
|
if adapter:
|
|
|
|
|
await adapter.send(
|
|
|
|
|
source.chat_id,
|
|
|
|
|
f"Hi~ I don't recognize you yet!\n\n"
|
|
|
|
|
f"Here's your pairing code: `{code}`\n\n"
|
|
|
|
|
f"Ask the bot owner to run:\n"
|
|
|
|
|
f"`hermes pairing approve {platform_name} {code}`"
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
adapter = self.adapters.get(source.platform)
|
|
|
|
|
if adapter:
|
|
|
|
|
await adapter.send(
|
|
|
|
|
source.chat_id,
|
|
|
|
|
"Too many pairing requests right now~ "
|
|
|
|
|
"Please try again later!"
|
|
|
|
|
)
|
|
|
|
|
return None
|
2026-02-03 10:46:23 -08:00
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
# Check for commands
|
2026-02-02 19:01:51 -08:00
|
|
|
command = event.get_command()
|
|
|
|
|
if command in ["new", "reset"]:
|
|
|
|
|
return await self._handle_reset_command(event)
|
|
|
|
|
|
2026-02-19 14:31:53 -08:00
|
|
|
if command == "help":
|
|
|
|
|
return await self._handle_help_command(event)
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
if command == "status":
|
|
|
|
|
return await self._handle_status_command(event)
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
if command == "stop":
|
|
|
|
|
return await self._handle_stop_command(event)
|
|
|
|
|
|
2026-02-19 14:31:53 -08:00
|
|
|
if command == "model":
|
|
|
|
|
return await self._handle_model_command(event)
|
|
|
|
|
|
|
|
|
|
if command == "personality":
|
|
|
|
|
return await self._handle_personality_command(event)
|
|
|
|
|
|
|
|
|
|
if command == "retry":
|
|
|
|
|
return await self._handle_retry_command(event)
|
|
|
|
|
|
|
|
|
|
if command == "undo":
|
|
|
|
|
return await self._handle_undo_command(event)
|
|
|
|
|
|
2026-02-12 10:05:08 -08:00
|
|
|
# Check for pending exec approval responses
|
|
|
|
|
session_key_preview = f"agent:main:{source.platform.value}:{source.chat_type}:{source.chat_id}" if source.chat_type != "dm" else f"agent:main:{source.platform.value}:dm"
|
|
|
|
|
if session_key_preview in self._pending_approvals:
|
|
|
|
|
user_text = event.text.strip().lower()
|
|
|
|
|
if user_text in ("yes", "y", "approve", "ok", "go", "do it"):
|
|
|
|
|
approval = self._pending_approvals.pop(session_key_preview)
|
|
|
|
|
cmd = approval["command"]
|
|
|
|
|
pattern_key = approval.get("pattern_key", "")
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.info("User approved dangerous command: %s...", cmd[:60])
|
2026-02-12 10:05:08 -08:00
|
|
|
# Approve for session and re-run via terminal_tool with force=True
|
|
|
|
|
from tools.terminal_tool import terminal_tool, _session_approved_patterns
|
|
|
|
|
_session_approved_patterns.add(pattern_key)
|
|
|
|
|
result = terminal_tool(command=cmd, force=True)
|
|
|
|
|
return f"✅ Command approved and executed.\n\n```\n{result[:3500]}\n```"
|
|
|
|
|
elif user_text in ("no", "n", "deny", "cancel", "nope"):
|
|
|
|
|
self._pending_approvals.pop(session_key_preview)
|
|
|
|
|
return "❌ Command denied."
|
|
|
|
|
# If it's not clearly an approval/denial, fall through to normal processing
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
# Get or create session
|
|
|
|
|
session_entry = self.session_store.get_or_create_session(source)
|
2026-02-03 16:15:49 -08:00
|
|
|
session_key = session_entry.session_key
|
|
|
|
|
|
|
|
|
|
# Check if there's already a running agent for this session
|
|
|
|
|
if session_key in self._running_agents:
|
|
|
|
|
running_agent = self._running_agents[session_key]
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.debug("Interrupting running agent for session %s...", session_key[:20])
|
2026-02-03 16:15:49 -08:00
|
|
|
running_agent.interrupt(event.text)
|
|
|
|
|
# Store the new message to be processed after current agent finishes
|
|
|
|
|
self._pending_messages[session_key] = event.text
|
|
|
|
|
return None # Don't respond yet - let the interrupt handle it
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
# Build session context
|
|
|
|
|
context = build_session_context(source, self.config, session_entry)
|
|
|
|
|
|
|
|
|
|
# Set environment variables for tools
|
|
|
|
|
self._set_session_env(context)
|
|
|
|
|
|
|
|
|
|
# Build the context prompt to inject
|
|
|
|
|
context_prompt = build_session_context_prompt(context)
|
|
|
|
|
|
|
|
|
|
# Load conversation history from transcript
|
|
|
|
|
history = self.session_store.load_transcript(session_entry.session_id)
|
|
|
|
|
|
2026-02-15 16:10:50 -08:00
|
|
|
# -----------------------------------------------------------------
|
|
|
|
|
# Auto-analyze images sent by the user
|
|
|
|
|
#
|
|
|
|
|
# If the user attached image(s), we run the vision tool eagerly so
|
|
|
|
|
# the conversation model always receives a text description. The
|
|
|
|
|
# local file path is also included so the model can re-examine the
|
|
|
|
|
# image later with a more targeted question via vision_analyze.
|
|
|
|
|
#
|
|
|
|
|
# We filter to image paths only (by media_type) so that non-image
|
|
|
|
|
# attachments (documents, audio, etc.) are not sent to the vision
|
|
|
|
|
# tool even when they appear in the same message.
|
|
|
|
|
# -----------------------------------------------------------------
|
|
|
|
|
message_text = event.text or ""
|
|
|
|
|
if event.media_urls:
|
|
|
|
|
image_paths = []
|
|
|
|
|
for i, path in enumerate(event.media_urls):
|
|
|
|
|
# Check media_types if available; otherwise infer from message type
|
|
|
|
|
mtype = event.media_types[i] if i < len(event.media_types) else ""
|
|
|
|
|
is_image = (
|
|
|
|
|
mtype.startswith("image/")
|
|
|
|
|
or event.message_type == MessageType.PHOTO
|
|
|
|
|
)
|
|
|
|
|
if is_image:
|
|
|
|
|
image_paths.append(path)
|
|
|
|
|
if image_paths:
|
|
|
|
|
message_text = await self._enrich_message_with_vision(
|
|
|
|
|
message_text, image_paths
|
|
|
|
|
)
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
# -----------------------------------------------------------------
|
|
|
|
|
# Auto-transcribe voice/audio messages sent by the user
|
|
|
|
|
# -----------------------------------------------------------------
|
|
|
|
|
if event.media_urls:
|
|
|
|
|
audio_paths = []
|
|
|
|
|
for i, path in enumerate(event.media_urls):
|
|
|
|
|
mtype = event.media_types[i] if i < len(event.media_types) else ""
|
|
|
|
|
is_audio = (
|
|
|
|
|
mtype.startswith("audio/")
|
|
|
|
|
or event.message_type in (MessageType.VOICE, MessageType.AUDIO)
|
|
|
|
|
)
|
|
|
|
|
if is_audio:
|
|
|
|
|
audio_paths.append(path)
|
|
|
|
|
if audio_paths:
|
|
|
|
|
message_text = await self._enrich_message_with_transcription(
|
|
|
|
|
message_text, audio_paths
|
|
|
|
|
)
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
try:
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
# Emit agent:start hook
|
|
|
|
|
hook_ctx = {
|
|
|
|
|
"platform": source.platform.value if source.platform else "",
|
|
|
|
|
"user_id": source.user_id,
|
|
|
|
|
"session_id": session_entry.session_id,
|
|
|
|
|
"message": message_text[:500],
|
|
|
|
|
}
|
|
|
|
|
await self.hooks.emit("agent:start", hook_ctx)
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
# Run the agent
|
2026-02-16 00:53:17 -08:00
|
|
|
agent_result = await self._run_agent(
|
2026-02-15 16:10:50 -08:00
|
|
|
message=message_text,
|
2026-02-02 19:01:51 -08:00
|
|
|
context_prompt=context_prompt,
|
|
|
|
|
history=history,
|
|
|
|
|
source=source,
|
2026-02-03 16:15:49 -08:00
|
|
|
session_id=session_entry.session_id,
|
|
|
|
|
session_key=session_key
|
2026-02-02 19:01:51 -08:00
|
|
|
)
|
|
|
|
|
|
2026-02-16 00:53:17 -08:00
|
|
|
response = agent_result.get("final_response", "")
|
|
|
|
|
agent_messages = agent_result.get("messages", [])
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
# Emit agent:end hook
|
|
|
|
|
await self.hooks.emit("agent:end", {
|
|
|
|
|
**hook_ctx,
|
|
|
|
|
"response": (response or "")[:500],
|
|
|
|
|
})
|
|
|
|
|
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
# Check for pending process watchers (check_interval on background processes)
|
|
|
|
|
try:
|
|
|
|
|
from tools.process_registry import process_registry
|
|
|
|
|
while process_registry.pending_watchers:
|
|
|
|
|
watcher = process_registry.pending_watchers.pop(0)
|
|
|
|
|
asyncio.create_task(self._run_process_watcher(watcher))
|
|
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("Process watcher setup error: %s", e)
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
2026-02-12 10:05:08 -08:00
|
|
|
# Check if the agent encountered a dangerous command needing approval
|
|
|
|
|
# The terminal tool stores the last pending approval globally
|
|
|
|
|
try:
|
|
|
|
|
from tools.terminal_tool import _last_pending_approval
|
|
|
|
|
if _last_pending_approval:
|
|
|
|
|
self._pending_approvals[session_key] = _last_pending_approval.copy()
|
|
|
|
|
# Clear the global so it doesn't leak to other sessions
|
|
|
|
|
_last_pending_approval.clear()
|
2026-02-21 03:32:11 -08:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Failed to check pending approvals: %s", e)
|
2026-02-12 10:05:08 -08:00
|
|
|
|
2026-02-16 00:53:17 -08:00
|
|
|
# Save the full conversation to the transcript, including tool calls.
|
|
|
|
|
# This preserves the complete agent loop (tool_calls, tool results,
|
|
|
|
|
# intermediate reasoning) so sessions can be resumed with full context
|
|
|
|
|
# and transcripts are useful for debugging and training data.
|
|
|
|
|
ts = datetime.now().isoformat()
|
|
|
|
|
|
|
|
|
|
# If this is a fresh session (no history), write the full tool
|
|
|
|
|
# definitions as the first entry so the transcript is self-describing
|
|
|
|
|
# -- the same list of dicts sent as tools=[...] in the API request.
|
|
|
|
|
if not history:
|
2026-02-16 00:55:18 -08:00
|
|
|
tool_defs = agent_result.get("tools", [])
|
2026-02-16 00:53:17 -08:00
|
|
|
self.session_store.append_to_transcript(
|
|
|
|
|
session_entry.session_id,
|
|
|
|
|
{
|
|
|
|
|
"role": "session_meta",
|
|
|
|
|
"tools": tool_defs or [],
|
|
|
|
|
"model": os.getenv("HERMES_MODEL", ""),
|
|
|
|
|
"platform": source.platform.value if source.platform else "",
|
|
|
|
|
"timestamp": ts,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Find only the NEW messages from this turn (skip history we loaded)
|
|
|
|
|
history_len = len(history)
|
|
|
|
|
new_messages = agent_messages[history_len:] if len(agent_messages) > history_len else agent_messages
|
|
|
|
|
|
|
|
|
|
# If no new messages found (edge case), fall back to simple user/assistant
|
|
|
|
|
if not new_messages:
|
|
|
|
|
self.session_store.append_to_transcript(
|
|
|
|
|
session_entry.session_id,
|
|
|
|
|
{"role": "user", "content": message_text, "timestamp": ts}
|
|
|
|
|
)
|
|
|
|
|
if response:
|
|
|
|
|
self.session_store.append_to_transcript(
|
|
|
|
|
session_entry.session_id,
|
|
|
|
|
{"role": "assistant", "content": response, "timestamp": ts}
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
for msg in new_messages:
|
|
|
|
|
# Skip system messages (they're rebuilt each run)
|
|
|
|
|
if msg.get("role") == "system":
|
|
|
|
|
continue
|
|
|
|
|
# Add timestamp to each message for debugging
|
|
|
|
|
entry = {**msg, "timestamp": ts}
|
|
|
|
|
self.session_store.append_to_transcript(
|
|
|
|
|
session_entry.session_id, entry
|
|
|
|
|
)
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
# Update session
|
|
|
|
|
self.session_store.update_session(session_entry.session_key)
|
|
|
|
|
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("Agent error: %s", e)
|
2026-02-02 19:01:51 -08:00
|
|
|
return f"Sorry, I encountered an error: {str(e)}"
|
|
|
|
|
finally:
|
|
|
|
|
# Clear session env
|
|
|
|
|
self._clear_session_env()
|
|
|
|
|
|
|
|
|
|
async def _handle_reset_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /new or /reset command."""
|
|
|
|
|
source = event.source
|
|
|
|
|
|
|
|
|
|
# Get existing session key
|
|
|
|
|
session_key = f"agent:main:{source.platform.value}:" + \
|
|
|
|
|
(f"dm" if source.chat_type == "dm" else f"{source.chat_type}:{source.chat_id}")
|
|
|
|
|
|
|
|
|
|
# Reset the session
|
|
|
|
|
new_entry = self.session_store.reset_session(session_key)
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
# Emit session:reset hook
|
|
|
|
|
await self.hooks.emit("session:reset", {
|
|
|
|
|
"platform": source.platform.value if source.platform else "",
|
|
|
|
|
"user_id": source.user_id,
|
|
|
|
|
"session_key": session_key,
|
|
|
|
|
})
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
if new_entry:
|
|
|
|
|
return "✨ Session reset! I've started fresh with no memory of our previous conversation."
|
|
|
|
|
else:
|
|
|
|
|
# No existing session, just create one
|
|
|
|
|
self.session_store.get_or_create_session(source, force_new=True)
|
|
|
|
|
return "✨ New session started!"
|
|
|
|
|
|
|
|
|
|
async def _handle_status_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /status command."""
|
|
|
|
|
source = event.source
|
|
|
|
|
session_entry = self.session_store.get_or_create_session(source)
|
|
|
|
|
|
|
|
|
|
connected_platforms = [p.value for p in self.adapters.keys()]
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
# Check if there's an active agent
|
|
|
|
|
session_key = session_entry.session_key
|
|
|
|
|
is_running = session_key in self._running_agents
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
lines = [
|
|
|
|
|
"📊 **Hermes Gateway Status**",
|
|
|
|
|
"",
|
|
|
|
|
f"**Session ID:** `{session_entry.session_id[:12]}...`",
|
|
|
|
|
f"**Created:** {session_entry.created_at.strftime('%Y-%m-%d %H:%M')}",
|
|
|
|
|
f"**Last Activity:** {session_entry.updated_at.strftime('%Y-%m-%d %H:%M')}",
|
|
|
|
|
f"**Tokens:** {session_entry.total_tokens:,}",
|
2026-02-03 16:15:49 -08:00
|
|
|
f"**Agent Running:** {'Yes ⚡' if is_running else 'No'}",
|
2026-02-02 19:01:51 -08:00
|
|
|
"",
|
|
|
|
|
f"**Connected Platforms:** {', '.join(connected_platforms)}",
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
async def _handle_stop_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /stop command - interrupt a running agent."""
|
|
|
|
|
source = event.source
|
|
|
|
|
session_entry = self.session_store.get_or_create_session(source)
|
|
|
|
|
session_key = session_entry.session_key
|
|
|
|
|
|
|
|
|
|
if session_key in self._running_agents:
|
|
|
|
|
agent = self._running_agents[session_key]
|
|
|
|
|
agent.interrupt()
|
|
|
|
|
return "⚡ Stopping the current task... The agent will finish its current step and respond."
|
|
|
|
|
else:
|
|
|
|
|
return "No active task to stop."
|
|
|
|
|
|
2026-02-19 14:31:53 -08:00
|
|
|
async def _handle_help_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /help command - list available commands."""
|
|
|
|
|
return (
|
|
|
|
|
"📖 **Hermes Commands**\n"
|
|
|
|
|
"\n"
|
|
|
|
|
"`/new` — Start a new conversation\n"
|
|
|
|
|
"`/reset` — Reset conversation history\n"
|
|
|
|
|
"`/status` — Show session info\n"
|
|
|
|
|
"`/stop` — Interrupt the running agent\n"
|
|
|
|
|
"`/model [name]` — Show or change the model\n"
|
|
|
|
|
"`/personality [name]` — Set a personality\n"
|
|
|
|
|
"`/retry` — Retry your last message\n"
|
|
|
|
|
"`/undo` — Remove the last exchange\n"
|
|
|
|
|
"`/help` — Show this message"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
async def _handle_model_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /model command - show or change the current model."""
|
|
|
|
|
args = event.get_command_args().strip()
|
|
|
|
|
current = os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6")
|
|
|
|
|
|
|
|
|
|
if not args:
|
|
|
|
|
return f"🤖 **Current model:** `{current}`\n\nTo change: `/model provider/model-name`"
|
|
|
|
|
|
|
|
|
|
os.environ["HERMES_MODEL"] = args
|
|
|
|
|
return f"🤖 Model changed to `{args}`\n_(takes effect on next message)_"
|
|
|
|
|
|
|
|
|
|
async def _handle_personality_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /personality command - list or set a personality."""
|
|
|
|
|
args = event.get_command_args().strip().lower()
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
import yaml
|
|
|
|
|
config_path = Path.home() / '.hermes' / 'config.yaml'
|
|
|
|
|
if config_path.exists():
|
|
|
|
|
with open(config_path, 'r') as f:
|
|
|
|
|
config = yaml.safe_load(f) or {}
|
|
|
|
|
personalities = config.get("agent", {}).get("personalities", {})
|
|
|
|
|
else:
|
|
|
|
|
personalities = {}
|
|
|
|
|
except Exception:
|
|
|
|
|
personalities = {}
|
|
|
|
|
|
|
|
|
|
if not personalities:
|
|
|
|
|
return "No personalities configured in `~/.hermes/config.yaml`"
|
|
|
|
|
|
|
|
|
|
if not args:
|
|
|
|
|
lines = ["🎭 **Available Personalities**\n"]
|
|
|
|
|
for name, prompt in personalities.items():
|
|
|
|
|
preview = prompt[:50] + "..." if len(prompt) > 50 else prompt
|
|
|
|
|
lines.append(f"• `{name}` — {preview}")
|
|
|
|
|
lines.append(f"\nUsage: `/personality <name>`")
|
|
|
|
|
return "\n".join(lines)
|
|
|
|
|
|
|
|
|
|
if args in personalities:
|
|
|
|
|
os.environ["HERMES_PERSONALITY"] = personalities[args]
|
|
|
|
|
return f"🎭 Personality set to **{args}**\n_(takes effect on next message)_"
|
|
|
|
|
|
|
|
|
|
available = ", ".join(f"`{n}`" for n in personalities.keys())
|
|
|
|
|
return f"Unknown personality: `{args}`\n\nAvailable: {available}"
|
|
|
|
|
|
|
|
|
|
async def _handle_retry_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /retry command - re-send the last user message."""
|
|
|
|
|
source = event.source
|
|
|
|
|
session_entry = self.session_store.get_or_create_session(source)
|
|
|
|
|
history = self.session_store.load_transcript(session_entry.session_id)
|
|
|
|
|
|
|
|
|
|
# Find the last user message
|
|
|
|
|
last_user_msg = None
|
|
|
|
|
last_user_idx = None
|
|
|
|
|
for i in range(len(history) - 1, -1, -1):
|
|
|
|
|
if history[i].get("role") == "user":
|
|
|
|
|
last_user_msg = history[i].get("content", "")
|
|
|
|
|
last_user_idx = i
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if not last_user_msg:
|
|
|
|
|
return "No previous message to retry."
|
|
|
|
|
|
|
|
|
|
# Truncate history to before the last user message
|
|
|
|
|
truncated = history[:last_user_idx]
|
|
|
|
|
session_entry.conversation_history = truncated
|
|
|
|
|
|
|
|
|
|
# Re-send by creating a fake text event with the old message
|
|
|
|
|
retry_event = MessageEvent(
|
|
|
|
|
text=last_user_msg,
|
|
|
|
|
message_type=MessageType.TEXT,
|
|
|
|
|
source=source,
|
|
|
|
|
raw_message=event.raw_message,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Let the normal message handler process it
|
|
|
|
|
await self._handle_message(retry_event)
|
|
|
|
|
return None # Response sent through normal flow
|
|
|
|
|
|
|
|
|
|
async def _handle_undo_command(self, event: MessageEvent) -> str:
|
|
|
|
|
"""Handle /undo command - remove the last user/assistant exchange."""
|
|
|
|
|
source = event.source
|
|
|
|
|
session_entry = self.session_store.get_or_create_session(source)
|
|
|
|
|
history = self.session_store.load_transcript(session_entry.session_id)
|
|
|
|
|
|
|
|
|
|
# Find the last user message and remove everything from it onward
|
|
|
|
|
last_user_idx = None
|
|
|
|
|
for i in range(len(history) - 1, -1, -1):
|
|
|
|
|
if history[i].get("role") == "user":
|
|
|
|
|
last_user_idx = i
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if last_user_idx is None:
|
|
|
|
|
return "Nothing to undo."
|
|
|
|
|
|
|
|
|
|
removed_msg = history[last_user_idx].get("content", "")
|
|
|
|
|
removed_count = len(history) - last_user_idx
|
|
|
|
|
session_entry.conversation_history = history[:last_user_idx]
|
|
|
|
|
|
|
|
|
|
preview = removed_msg[:40] + "..." if len(removed_msg) > 40 else removed_msg
|
|
|
|
|
return f"↩️ Undid {removed_count} message(s).\nRemoved: \"{preview}\""
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
def _set_session_env(self, context: SessionContext) -> None:
|
|
|
|
|
"""Set environment variables for the current session."""
|
|
|
|
|
os.environ["HERMES_SESSION_PLATFORM"] = context.source.platform.value
|
|
|
|
|
os.environ["HERMES_SESSION_CHAT_ID"] = context.source.chat_id
|
|
|
|
|
if context.source.chat_name:
|
|
|
|
|
os.environ["HERMES_SESSION_CHAT_NAME"] = context.source.chat_name
|
|
|
|
|
|
|
|
|
|
def _clear_session_env(self) -> None:
|
|
|
|
|
"""Clear session environment variables."""
|
|
|
|
|
for var in ["HERMES_SESSION_PLATFORM", "HERMES_SESSION_CHAT_ID", "HERMES_SESSION_CHAT_NAME"]:
|
|
|
|
|
if var in os.environ:
|
|
|
|
|
del os.environ[var]
|
|
|
|
|
|
2026-02-15 16:10:50 -08:00
|
|
|
async def _enrich_message_with_vision(
|
|
|
|
|
self,
|
|
|
|
|
user_text: str,
|
|
|
|
|
image_paths: List[str],
|
|
|
|
|
) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Auto-analyze user-attached images with the vision tool and prepend
|
|
|
|
|
the descriptions to the message text.
|
|
|
|
|
|
|
|
|
|
Each image is analyzed with a general-purpose prompt. The resulting
|
|
|
|
|
description *and* the local cache path are injected so the model can:
|
|
|
|
|
1. Immediately understand what the user sent (no extra tool call).
|
|
|
|
|
2. Re-examine the image with vision_analyze if it needs more detail.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
user_text: The user's original caption / message text.
|
|
|
|
|
image_paths: List of local file paths to cached images.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The enriched message string with vision descriptions prepended.
|
|
|
|
|
"""
|
|
|
|
|
from tools.vision_tools import vision_analyze_tool
|
|
|
|
|
import json as _json
|
|
|
|
|
|
|
|
|
|
analysis_prompt = (
|
|
|
|
|
"Describe everything visible in this image in thorough detail. "
|
|
|
|
|
"Include any text, code, data, objects, people, layout, colors, "
|
|
|
|
|
"and any other notable visual information."
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
enriched_parts = []
|
|
|
|
|
for path in image_paths:
|
|
|
|
|
try:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.debug("Auto-analyzing user image: %s", path)
|
2026-02-15 16:10:50 -08:00
|
|
|
result_json = await vision_analyze_tool(
|
|
|
|
|
image_url=path,
|
|
|
|
|
user_prompt=analysis_prompt,
|
|
|
|
|
)
|
|
|
|
|
result = _json.loads(result_json)
|
|
|
|
|
if result.get("success"):
|
|
|
|
|
description = result.get("analysis", "")
|
|
|
|
|
enriched_parts.append(
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
f"[The user sent an image~ Here's what I can see:\n{description}]\n"
|
|
|
|
|
f"[If you need a closer look, use vision_analyze with "
|
|
|
|
|
f"image_url: {path} ~]"
|
2026-02-15 16:10:50 -08:00
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
enriched_parts.append(
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
"[The user sent an image but I couldn't quite see it "
|
|
|
|
|
"this time (>_<) You can try looking at it yourself "
|
|
|
|
|
f"with vision_analyze using image_url: {path}]"
|
2026-02-15 16:10:50 -08:00
|
|
|
)
|
|
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("Vision auto-analysis error: %s", e)
|
2026-02-15 16:10:50 -08:00
|
|
|
enriched_parts.append(
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
f"[The user sent an image but something went wrong when I "
|
|
|
|
|
f"tried to look at it~ You can try examining it yourself "
|
|
|
|
|
f"with vision_analyze using image_url: {path}]"
|
2026-02-15 16:10:50 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Combine: vision descriptions first, then the user's original text
|
|
|
|
|
if enriched_parts:
|
|
|
|
|
prefix = "\n\n".join(enriched_parts)
|
|
|
|
|
if user_text:
|
|
|
|
|
return f"{prefix}\n\n{user_text}"
|
|
|
|
|
return prefix
|
|
|
|
|
return user_text
|
|
|
|
|
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
async def _enrich_message_with_transcription(
|
|
|
|
|
self,
|
|
|
|
|
user_text: str,
|
|
|
|
|
audio_paths: List[str],
|
|
|
|
|
) -> str:
|
|
|
|
|
"""
|
|
|
|
|
Auto-transcribe user voice/audio messages using OpenAI Whisper API
|
|
|
|
|
and prepend the transcript to the message text.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
user_text: The user's original caption / message text.
|
|
|
|
|
audio_paths: List of local file paths to cached audio files.
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
The enriched message string with transcriptions prepended.
|
|
|
|
|
"""
|
|
|
|
|
from tools.transcription_tools import transcribe_audio
|
|
|
|
|
import asyncio
|
|
|
|
|
|
|
|
|
|
enriched_parts = []
|
|
|
|
|
for path in audio_paths:
|
|
|
|
|
try:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.debug("Transcribing user voice: %s", path)
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
result = await asyncio.to_thread(transcribe_audio, path)
|
|
|
|
|
if result["success"]:
|
|
|
|
|
transcript = result["transcript"]
|
|
|
|
|
enriched_parts.append(
|
|
|
|
|
f'[The user sent a voice message~ '
|
|
|
|
|
f'Here\'s what they said: "{transcript}"]'
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
error = result.get("error", "unknown error")
|
2026-02-17 03:11:17 -08:00
|
|
|
if "OPENAI_API_KEY" in error or "HERMES_OPENAI_API_KEY" in error:
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
enriched_parts.append(
|
|
|
|
|
"[The user sent a voice message but I can't listen "
|
2026-02-17 03:11:17 -08:00
|
|
|
"to it right now~ HERMES_OPENAI_API_KEY isn't set up yet "
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
"(';w;') Let them know!]"
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
enriched_parts.append(
|
|
|
|
|
"[The user sent a voice message but I had trouble "
|
|
|
|
|
f"transcribing it~ ({error})]"
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("Transcription error: %s", e)
|
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks
Major feature additions inspired by OpenClaw/ClawdBot integration analysis:
Voice Message Transcription (STT):
- Auto-transcribe voice/audio messages via OpenAI Whisper API
- Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp
- Inject transcript as text so all models can understand voice input
- Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe)
Telegram Sticker Understanding:
- Describe static stickers via vision tool with JSON-backed cache
- Cache keyed by file_unique_id avoids redundant API calls
- Animated/video stickers get emoji-based fallback description
Discord Rich UX:
- Native slash commands (/ask, /reset, /status, /stop) via app_commands
- Button-based exec approvals (Allow Once / Always Allow / Deny)
- ExecApprovalView with user authorization and timeout handling
Slack Integration:
- Full SlackAdapter using slack-bolt with Socket Mode
- DMs, channel messages (mention-gated), /hermes slash command
- File attachment handling with bot-token-authenticated downloads
DM Pairing System:
- Code-based user authorization as alternative to static allowlists
- 8-char codes from unambiguous alphabet, 1-hour expiry
- Rate limiting, lockout after failed attempts, chmod 0600 on data
- CLI: hermes pairing list/approve/revoke/clear-pending
Event Hook System:
- File-based hook discovery from ~/.hermes/hooks/
- HOOK.yaml + handler.py per hook, sync/async handler support
- Events: gateway:startup, session:start/reset, agent:start/step/end
- Wildcard matching (command:* catches all command events)
Cross-Channel Messaging:
- send_message agent tool for delivering to any connected platform
- Enables cron job delivery and cross-platform notifications
Human-Like Response Pacing:
- Configurable delays between message chunks (off/natural/custom)
- HERMES_HUMAN_DELAY_MODE env var with min/max ms settings
Warm Injection Message Style:
- Retrofitted image vision messages with friendly kawaii-consistent tone
- All new injection messages (STT, stickers, errors) use warm style
Also: updated config migration to prompt for optional keys interactively,
bumped config version, updated README, AGENTS.md, .env.example,
cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
|
|
|
enriched_parts.append(
|
|
|
|
|
"[The user sent a voice message but something went wrong "
|
|
|
|
|
"when I tried to listen to it~ Let them know!]"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if enriched_parts:
|
|
|
|
|
prefix = "\n\n".join(enriched_parts)
|
|
|
|
|
if user_text:
|
|
|
|
|
return f"{prefix}\n\n{user_text}"
|
|
|
|
|
return prefix
|
|
|
|
|
return user_text
|
|
|
|
|
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
async def _run_process_watcher(self, watcher: dict) -> None:
|
|
|
|
|
"""
|
|
|
|
|
Periodically check a background process and push updates to the user.
|
|
|
|
|
|
|
|
|
|
Runs as an asyncio task. Stays silent when nothing changed.
|
|
|
|
|
Auto-removes when the process exits or is killed.
|
|
|
|
|
"""
|
|
|
|
|
from tools.process_registry import process_registry
|
|
|
|
|
|
|
|
|
|
session_id = watcher["session_id"]
|
|
|
|
|
interval = watcher["check_interval"]
|
|
|
|
|
session_key = watcher.get("session_key", "")
|
|
|
|
|
platform_name = watcher.get("platform", "")
|
|
|
|
|
chat_id = watcher.get("chat_id", "")
|
|
|
|
|
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.debug("Process watcher started: %s (every %ss)", session_id, interval)
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
|
|
|
|
last_output_len = 0
|
|
|
|
|
while True:
|
|
|
|
|
await asyncio.sleep(interval)
|
|
|
|
|
|
|
|
|
|
session = process_registry.get(session_id)
|
|
|
|
|
if session is None:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
current_output_len = len(session.output_buffer)
|
|
|
|
|
has_new_output = current_output_len > last_output_len
|
|
|
|
|
last_output_len = current_output_len
|
|
|
|
|
|
|
|
|
|
if session.exited:
|
|
|
|
|
# Process finished -- deliver final update
|
|
|
|
|
new_output = session.output_buffer[-1000:] if session.output_buffer else ""
|
|
|
|
|
message_text = (
|
|
|
|
|
f"[Background process {session_id} finished with exit code {session.exit_code}~ "
|
|
|
|
|
f"Here's the final output:\n{new_output}]"
|
|
|
|
|
)
|
|
|
|
|
# Try to deliver to the originating platform
|
|
|
|
|
adapter = None
|
|
|
|
|
for p, a in self.adapters.items():
|
|
|
|
|
if p.value == platform_name:
|
|
|
|
|
adapter = a
|
|
|
|
|
break
|
|
|
|
|
if adapter and chat_id:
|
|
|
|
|
try:
|
|
|
|
|
await adapter.send(chat_id, message_text)
|
|
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("Watcher delivery error: %s", e)
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
break
|
|
|
|
|
|
|
|
|
|
elif has_new_output:
|
|
|
|
|
# New output available -- deliver status update
|
|
|
|
|
new_output = session.output_buffer[-500:] if session.output_buffer else ""
|
|
|
|
|
message_text = (
|
|
|
|
|
f"[Background process {session_id} is still running~ "
|
|
|
|
|
f"New output:\n{new_output}]"
|
|
|
|
|
)
|
|
|
|
|
adapter = None
|
|
|
|
|
for p, a in self.adapters.items():
|
|
|
|
|
if p.value == platform_name:
|
|
|
|
|
adapter = a
|
|
|
|
|
break
|
|
|
|
|
if adapter and chat_id:
|
|
|
|
|
try:
|
|
|
|
|
await adapter.send(chat_id, message_text)
|
|
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("Watcher delivery error: %s", e)
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.debug("Process watcher ended: %s", session_id)
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
async def _run_agent(
|
|
|
|
|
self,
|
|
|
|
|
message: str,
|
|
|
|
|
context_prompt: str,
|
|
|
|
|
history: List[Dict[str, Any]],
|
|
|
|
|
source: SessionSource,
|
2026-02-03 16:15:49 -08:00
|
|
|
session_id: str,
|
|
|
|
|
session_key: str = None
|
2026-02-16 00:53:17 -08:00
|
|
|
) -> Dict[str, Any]:
|
2026-02-02 19:01:51 -08:00
|
|
|
"""
|
|
|
|
|
Run the agent with the given message and context.
|
|
|
|
|
|
2026-02-16 00:53:17 -08:00
|
|
|
Returns the full result dict from run_conversation, including:
|
|
|
|
|
- "final_response": str (the text to send back)
|
|
|
|
|
- "messages": list (full conversation including tool calls)
|
|
|
|
|
- "api_calls": int
|
|
|
|
|
- "completed": bool
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
This is run in a thread pool to not block the event loop.
|
2026-02-03 16:15:49 -08:00
|
|
|
Supports interruption via new messages.
|
2026-02-02 19:01:51 -08:00
|
|
|
"""
|
|
|
|
|
from run_agent import AIAgent
|
2026-02-03 14:54:43 -08:00
|
|
|
import queue
|
2026-02-02 19:01:51 -08:00
|
|
|
|
2026-02-17 23:39:24 -08:00
|
|
|
# Determine toolset based on platform.
|
|
|
|
|
# Check config.yaml for per-platform overrides, fallback to hardcoded defaults.
|
|
|
|
|
default_toolset_map = {
|
2026-02-02 19:01:51 -08:00
|
|
|
Platform.LOCAL: "hermes-cli",
|
|
|
|
|
Platform.TELEGRAM: "hermes-telegram",
|
|
|
|
|
Platform.DISCORD: "hermes-discord",
|
|
|
|
|
Platform.WHATSAPP: "hermes-whatsapp",
|
2026-02-17 23:39:24 -08:00
|
|
|
Platform.SLACK: "hermes-slack",
|
2026-02-02 19:01:51 -08:00
|
|
|
}
|
2026-02-17 23:39:24 -08:00
|
|
|
|
|
|
|
|
# Try to load platform_toolsets from config
|
|
|
|
|
platform_toolsets_config = {}
|
|
|
|
|
try:
|
|
|
|
|
config_path = Path.home() / '.hermes' / 'config.yaml'
|
|
|
|
|
if config_path.exists():
|
|
|
|
|
import yaml
|
|
|
|
|
with open(config_path, 'r') as f:
|
|
|
|
|
user_config = yaml.safe_load(f) or {}
|
|
|
|
|
platform_toolsets_config = user_config.get("platform_toolsets", {})
|
2026-02-21 03:32:11 -08:00
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug("Could not load platform_toolsets config: %s", e)
|
2026-02-17 23:39:24 -08:00
|
|
|
|
|
|
|
|
# Map platform enum to config key
|
|
|
|
|
platform_config_key = {
|
|
|
|
|
Platform.LOCAL: "cli",
|
|
|
|
|
Platform.TELEGRAM: "telegram",
|
|
|
|
|
Platform.DISCORD: "discord",
|
|
|
|
|
Platform.WHATSAPP: "whatsapp",
|
|
|
|
|
Platform.SLACK: "slack",
|
|
|
|
|
}.get(source.platform, "telegram")
|
|
|
|
|
|
|
|
|
|
# Use config override if present (list of toolsets), otherwise hardcoded default
|
|
|
|
|
config_toolsets = platform_toolsets_config.get(platform_config_key)
|
|
|
|
|
if config_toolsets and isinstance(config_toolsets, list):
|
|
|
|
|
enabled_toolsets = config_toolsets
|
|
|
|
|
else:
|
|
|
|
|
default_toolset = default_toolset_map.get(source.platform, "hermes-telegram")
|
|
|
|
|
enabled_toolsets = [default_toolset]
|
2026-02-02 19:01:51 -08:00
|
|
|
|
2026-02-03 14:54:43 -08:00
|
|
|
# Check if tool progress notifications are enabled
|
|
|
|
|
tool_progress_enabled = os.getenv("HERMES_TOOL_PROGRESS", "").lower() in ("1", "true", "yes")
|
|
|
|
|
progress_mode = os.getenv("HERMES_TOOL_PROGRESS_MODE", "new") # "all" or "new" (only new tools)
|
|
|
|
|
|
|
|
|
|
# Queue for progress messages (thread-safe)
|
|
|
|
|
progress_queue = queue.Queue() if tool_progress_enabled else None
|
|
|
|
|
last_tool = [None] # Mutable container for tracking in closure
|
|
|
|
|
|
|
|
|
|
def progress_callback(tool_name: str, preview: str = None):
|
|
|
|
|
"""Callback invoked by agent when a tool is called."""
|
|
|
|
|
if not progress_queue:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# "new" mode: only report when tool changes
|
|
|
|
|
if progress_mode == "new" and tool_name == last_tool[0]:
|
|
|
|
|
return
|
|
|
|
|
last_tool[0] = tool_name
|
|
|
|
|
|
2026-02-12 10:05:08 -08:00
|
|
|
# Build progress message with primary argument preview
|
2026-02-03 14:54:43 -08:00
|
|
|
tool_emojis = {
|
|
|
|
|
"terminal": "💻",
|
2026-02-17 17:11:31 -08:00
|
|
|
"process": "⚙️",
|
2026-02-03 14:54:43 -08:00
|
|
|
"web_search": "🔍",
|
|
|
|
|
"web_extract": "📄",
|
|
|
|
|
"read_file": "📖",
|
|
|
|
|
"write_file": "✍️",
|
2026-02-12 10:05:08 -08:00
|
|
|
"patch": "🔧",
|
|
|
|
|
"search": "🔎",
|
2026-02-03 14:54:43 -08:00
|
|
|
"list_directory": "📂",
|
|
|
|
|
"image_generate": "🎨",
|
2026-02-12 10:05:08 -08:00
|
|
|
"text_to_speech": "🔊",
|
2026-02-03 14:54:43 -08:00
|
|
|
"browser_navigate": "🌐",
|
|
|
|
|
"browser_click": "👆",
|
2026-02-12 10:05:08 -08:00
|
|
|
"browser_type": "⌨️",
|
|
|
|
|
"browser_snapshot": "📸",
|
2026-02-17 17:11:31 -08:00
|
|
|
"browser_scroll": "📜",
|
|
|
|
|
"browser_back": "◀️",
|
|
|
|
|
"browser_press": "⌨️",
|
|
|
|
|
"browser_close": "🚪",
|
|
|
|
|
"browser_get_images": "🖼️",
|
|
|
|
|
"browser_vision": "👁️",
|
2026-02-03 14:54:43 -08:00
|
|
|
"moa_query": "🧠",
|
2026-02-12 10:05:08 -08:00
|
|
|
"mixture_of_agents": "🧠",
|
|
|
|
|
"vision_analyze": "👁️",
|
|
|
|
|
"skill_view": "📚",
|
|
|
|
|
"skills_list": "📋",
|
2026-02-17 17:11:31 -08:00
|
|
|
"todo": "📋",
|
2026-02-19 00:57:31 -08:00
|
|
|
"memory": "🧠",
|
|
|
|
|
"session_search": "🔍",
|
2026-02-17 17:11:31 -08:00
|
|
|
"send_message": "📨",
|
|
|
|
|
"schedule_cronjob": "⏰",
|
|
|
|
|
"list_cronjobs": "⏰",
|
|
|
|
|
"remove_cronjob": "⏰",
|
2026-02-03 14:54:43 -08:00
|
|
|
}
|
|
|
|
|
emoji = tool_emojis.get(tool_name, "⚙️")
|
|
|
|
|
|
2026-02-12 10:05:08 -08:00
|
|
|
if preview:
|
|
|
|
|
# Truncate preview to keep messages clean
|
|
|
|
|
if len(preview) > 40:
|
|
|
|
|
preview = preview[:37] + "..."
|
|
|
|
|
msg = f"{emoji} {tool_name}... \"{preview}\""
|
2026-02-03 14:54:43 -08:00
|
|
|
else:
|
|
|
|
|
msg = f"{emoji} {tool_name}..."
|
|
|
|
|
|
|
|
|
|
progress_queue.put(msg)
|
|
|
|
|
|
|
|
|
|
# Background task to send progress messages
|
|
|
|
|
async def send_progress_messages():
|
|
|
|
|
if not progress_queue:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
adapter = self.adapters.get(source.platform)
|
|
|
|
|
if not adapter:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
# Non-blocking check with small timeout
|
|
|
|
|
msg = progress_queue.get_nowait()
|
|
|
|
|
await adapter.send(chat_id=source.chat_id, content=msg)
|
2026-02-03 15:06:18 -08:00
|
|
|
# Restore typing indicator after sending progress message
|
|
|
|
|
await asyncio.sleep(0.3)
|
|
|
|
|
await adapter.send_typing(source.chat_id)
|
2026-02-03 14:54:43 -08:00
|
|
|
except queue.Empty:
|
|
|
|
|
await asyncio.sleep(0.3) # Check again soon
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
# Drain remaining messages
|
|
|
|
|
while not progress_queue.empty():
|
|
|
|
|
try:
|
|
|
|
|
msg = progress_queue.get_nowait()
|
|
|
|
|
await adapter.send(chat_id=source.chat_id, content=msg)
|
2026-02-20 23:23:32 -08:00
|
|
|
except Exception:
|
2026-02-03 14:54:43 -08:00
|
|
|
break
|
|
|
|
|
return
|
|
|
|
|
except Exception as e:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.error("Progress message error: %s", e)
|
2026-02-03 14:54:43 -08:00
|
|
|
await asyncio.sleep(1)
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
# We need to share the agent instance for interrupt support
|
|
|
|
|
agent_holder = [None] # Mutable container for the agent instance
|
|
|
|
|
result_holder = [None] # Mutable container for the result
|
2026-02-16 00:53:17 -08:00
|
|
|
tools_holder = [None] # Mutable container for the tool definitions
|
2026-02-03 16:15:49 -08:00
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
def run_sync():
|
Add background process management with process tool, wait, PTY, and stdin support
New process registry and tool for managing long-running background processes
across all terminal backends (local, Docker, Singularity, Modal, SSH).
Process Registry (tools/process_registry.py):
- ProcessSession tracking with rolling 200KB output buffer
- spawn_local() with optional PTY via ptyprocess for interactive CLIs
- spawn_via_env() for non-local backends (runs inside sandbox, never on host)
- Background reader threads per process (Popen stdout or PTY)
- wait() with timeout clamping, interrupt support, and transparent limit reporting
- JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery
- Module-level singleton shared across agent loop, gateway, and RL
Process Tool (model_tools.py):
- 7 actions: list, poll, log, wait, kill, write, submit
- Paired with terminal in all toolsets (CLI, messaging, RL)
- Timeout clamping with transparent notes in response
Terminal Tool Updates (tools/terminal_tool.py):
- Replaced nohup background mode with registry spawn (returns session_id)
- Added workdir parameter for per-command working directory
- Added check_interval parameter for gateway auto-check watchers
- Added pty parameter for interactive CLI tools (Codex, Claude Code)
- Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs
- Cleanup thread now respects active background processes (won't reap sandbox)
Gateway Integration (gateway/run.py, session.py, config.py):
- Session reset protection: sessions with active processes exempt from reset
- Default idle timeout increased from 2 hours to 24 hours
- from_dict fallback aligned to match (was 120, now 1440)
- session_key env var propagated to process registry for session mapping
- Crash recovery on gateway startup via checkpoint probe
- check_interval watcher: asyncio task polls process, delivers updates to platform
RL Safety (environments/):
- tool_context.py cleanup() kills background processes on episode end
- hermes_base_env.py warns when enabled_toolsets is None (loads all tools)
- Process tool safe in RL via wait() blocking the agent loop
Also:
- Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all])
- Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP
- Updated AGENTS.md with process management docs and project structure
- Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
|
|
|
# Pass session_key to process registry via env var so background
|
|
|
|
|
# processes can be mapped back to this gateway session
|
|
|
|
|
os.environ["HERMES_SESSION_KEY"] = session_key or ""
|
|
|
|
|
|
2026-02-03 14:48:19 -08:00
|
|
|
# Read from env var or use default (same as CLI)
|
|
|
|
|
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "60"))
|
|
|
|
|
|
2026-02-12 16:11:16 -08:00
|
|
|
# Map platform enum to the platform hint key the agent understands.
|
|
|
|
|
# Platform.LOCAL ("local") maps to "cli"; others pass through as-is.
|
|
|
|
|
platform_key = "cli" if source.platform == Platform.LOCAL else source.platform.value
|
|
|
|
|
|
2026-02-02 19:01:51 -08:00
|
|
|
agent = AIAgent(
|
2026-02-08 10:49:24 +00:00
|
|
|
model=os.getenv("HERMES_MODEL", "anthropic/claude-opus-4.6"),
|
2026-02-03 14:48:19 -08:00
|
|
|
max_iterations=max_iterations,
|
2026-02-02 19:01:51 -08:00
|
|
|
quiet_mode=True,
|
2026-02-17 23:39:24 -08:00
|
|
|
enabled_toolsets=enabled_toolsets,
|
2026-02-02 19:01:51 -08:00
|
|
|
ephemeral_system_prompt=context_prompt,
|
|
|
|
|
session_id=session_id,
|
2026-02-03 14:54:43 -08:00
|
|
|
tool_progress_callback=progress_callback if tool_progress_enabled else None,
|
2026-02-12 16:11:16 -08:00
|
|
|
platform=platform_key, # Tells the agent which interface to format for
|
2026-02-02 19:01:51 -08:00
|
|
|
)
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
# Store agent reference for interrupt support
|
|
|
|
|
agent_holder[0] = agent
|
2026-02-16 00:53:17 -08:00
|
|
|
# Capture the full tool definitions for transcript logging
|
|
|
|
|
tools_holder[0] = agent.tools if hasattr(agent, 'tools') else None
|
2026-02-03 16:15:49 -08:00
|
|
|
|
2026-02-10 16:16:30 -08:00
|
|
|
# Convert history to agent format.
|
|
|
|
|
# Two cases:
|
|
|
|
|
# 1. Normal path (from transcript): simple {role, content, timestamp} dicts
|
|
|
|
|
# - Strip timestamps, keep role+content
|
|
|
|
|
# 2. Interrupt path (from agent result["messages"]): full agent messages
|
|
|
|
|
# that may include tool_calls, tool_call_id, reasoning, etc.
|
|
|
|
|
# - These must be passed through intact so the API sees valid
|
|
|
|
|
# assistant→tool sequences (dropping tool_calls causes 500 errors)
|
2026-02-03 15:42:54 -08:00
|
|
|
agent_history = []
|
|
|
|
|
for msg in history:
|
|
|
|
|
role = msg.get("role")
|
2026-02-10 16:16:30 -08:00
|
|
|
if not role:
|
|
|
|
|
continue
|
|
|
|
|
|
2026-02-16 00:53:17 -08:00
|
|
|
# Skip metadata entries (tool definitions, session info)
|
|
|
|
|
# -- these are for transcript logging, not for the LLM
|
|
|
|
|
if role in ("session_meta",):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Skip system messages -- the agent rebuilds its own system prompt
|
|
|
|
|
if role == "system":
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Rich agent messages (tool_calls, tool results) must be passed
|
|
|
|
|
# through intact so the API sees valid assistant→tool sequences
|
2026-02-10 16:16:30 -08:00
|
|
|
has_tool_calls = "tool_calls" in msg
|
|
|
|
|
has_tool_call_id = "tool_call_id" in msg
|
|
|
|
|
is_tool_message = role == "tool"
|
|
|
|
|
|
|
|
|
|
if has_tool_calls or has_tool_call_id or is_tool_message:
|
|
|
|
|
clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}
|
|
|
|
|
agent_history.append(clean_msg)
|
|
|
|
|
else:
|
|
|
|
|
# Simple text message - just need role and content
|
|
|
|
|
content = msg.get("content")
|
|
|
|
|
if content:
|
|
|
|
|
agent_history.append({"role": role, "content": content})
|
2026-02-02 19:01:51 -08:00
|
|
|
|
2026-02-03 15:42:54 -08:00
|
|
|
result = agent.run_conversation(message, conversation_history=agent_history)
|
2026-02-03 16:15:49 -08:00
|
|
|
result_holder[0] = result
|
2026-02-03 15:26:59 -08:00
|
|
|
|
|
|
|
|
# Return final response, or a message if something went wrong
|
|
|
|
|
final_response = result.get("final_response")
|
2026-02-14 16:08:14 -08:00
|
|
|
if not final_response:
|
2026-02-16 00:53:17 -08:00
|
|
|
error_msg = f"⚠️ {result['error']}" if result.get("error") else "(No response generated)"
|
|
|
|
|
return {
|
|
|
|
|
"final_response": error_msg,
|
|
|
|
|
"messages": result.get("messages", []),
|
|
|
|
|
"api_calls": result.get("api_calls", 0),
|
2026-02-16 00:55:18 -08:00
|
|
|
"tools": tools_holder[0] or [],
|
2026-02-16 00:53:17 -08:00
|
|
|
}
|
2026-02-14 16:08:14 -08:00
|
|
|
|
2026-02-14 16:16:54 -08:00
|
|
|
# Scan tool results for MEDIA:<path> tags that need to be delivered
|
|
|
|
|
# as native audio/file attachments. The TTS tool embeds MEDIA: tags
|
|
|
|
|
# in its JSON response, but the model's final text reply usually
|
|
|
|
|
# doesn't include them. We collect unique tags from tool results and
|
|
|
|
|
# append any that aren't already present in the final response, so the
|
|
|
|
|
# adapter's extract_media() can find and deliver the files exactly once.
|
|
|
|
|
if "MEDIA:" not in final_response:
|
|
|
|
|
media_tags = []
|
|
|
|
|
has_voice_directive = False
|
|
|
|
|
for msg in result.get("messages", []):
|
|
|
|
|
if msg.get("role") == "tool" or msg.get("role") == "function":
|
|
|
|
|
content = msg.get("content", "")
|
|
|
|
|
if "MEDIA:" in content:
|
|
|
|
|
for match in re.finditer(r'MEDIA:(\S+)', content):
|
|
|
|
|
path = match.group(1).strip().rstrip('",}')
|
|
|
|
|
if path:
|
|
|
|
|
media_tags.append(f"MEDIA:{path}")
|
|
|
|
|
if "[[audio_as_voice]]" in content:
|
|
|
|
|
has_voice_directive = True
|
|
|
|
|
|
|
|
|
|
if media_tags:
|
|
|
|
|
# Deduplicate while preserving order
|
|
|
|
|
seen = set()
|
|
|
|
|
unique_tags = []
|
|
|
|
|
for tag in media_tags:
|
|
|
|
|
if tag not in seen:
|
|
|
|
|
seen.add(tag)
|
|
|
|
|
unique_tags.append(tag)
|
|
|
|
|
if has_voice_directive:
|
|
|
|
|
unique_tags.insert(0, "[[audio_as_voice]]")
|
|
|
|
|
final_response = final_response + "\n" + "\n".join(unique_tags)
|
2026-02-14 16:08:14 -08:00
|
|
|
|
2026-02-16 00:53:17 -08:00
|
|
|
return {
|
|
|
|
|
"final_response": final_response,
|
|
|
|
|
"messages": result_holder[0].get("messages", []) if result_holder[0] else [],
|
|
|
|
|
"api_calls": result_holder[0].get("api_calls", 0) if result_holder[0] else 0,
|
2026-02-16 00:55:18 -08:00
|
|
|
"tools": tools_holder[0] or [],
|
2026-02-16 00:53:17 -08:00
|
|
|
}
|
2026-02-02 19:01:51 -08:00
|
|
|
|
2026-02-03 14:54:43 -08:00
|
|
|
# Start progress message sender if enabled
|
|
|
|
|
progress_task = None
|
|
|
|
|
if tool_progress_enabled:
|
|
|
|
|
progress_task = asyncio.create_task(send_progress_messages())
|
|
|
|
|
|
2026-02-03 16:15:49 -08:00
|
|
|
# Track this agent as running for this session (for interrupt support)
|
|
|
|
|
# We do this in a callback after the agent is created
|
|
|
|
|
async def track_agent():
|
|
|
|
|
# Wait for agent to be created
|
|
|
|
|
while agent_holder[0] is None:
|
|
|
|
|
await asyncio.sleep(0.05)
|
|
|
|
|
if session_key:
|
|
|
|
|
self._running_agents[session_key] = agent_holder[0]
|
|
|
|
|
|
|
|
|
|
tracking_task = asyncio.create_task(track_agent())
|
|
|
|
|
|
2026-02-03 20:10:15 -08:00
|
|
|
# Monitor for interrupts from the adapter (new messages arriving)
|
|
|
|
|
async def monitor_for_interrupt():
|
|
|
|
|
adapter = self.adapters.get(source.platform)
|
|
|
|
|
if not adapter:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
chat_id = source.chat_id
|
|
|
|
|
while True:
|
|
|
|
|
await asyncio.sleep(0.2) # Check every 200ms
|
|
|
|
|
# Check if adapter has a pending interrupt for this session
|
|
|
|
|
if hasattr(adapter, 'has_pending_interrupt') and adapter.has_pending_interrupt(chat_id):
|
|
|
|
|
agent = agent_holder[0]
|
|
|
|
|
if agent:
|
|
|
|
|
pending_event = adapter.get_pending_message(chat_id)
|
|
|
|
|
pending_text = pending_event.text if pending_event else None
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.debug("Interrupt detected from adapter, signaling agent...")
|
2026-02-03 20:10:15 -08:00
|
|
|
agent.interrupt(pending_text)
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
interrupt_monitor = asyncio.create_task(monitor_for_interrupt())
|
|
|
|
|
|
2026-02-03 14:54:43 -08:00
|
|
|
try:
|
|
|
|
|
# Run in thread pool to not block
|
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
response = await loop.run_in_executor(None, run_sync)
|
2026-02-03 16:15:49 -08:00
|
|
|
|
|
|
|
|
# Check if we were interrupted and have a pending message
|
|
|
|
|
result = result_holder[0]
|
2026-02-03 20:10:15 -08:00
|
|
|
adapter = self.adapters.get(source.platform)
|
|
|
|
|
|
|
|
|
|
# Get pending message from adapter if interrupted
|
|
|
|
|
pending = None
|
|
|
|
|
if result and result.get("interrupted") and adapter:
|
|
|
|
|
pending_event = adapter.get_pending_message(source.chat_id)
|
|
|
|
|
if pending_event:
|
|
|
|
|
pending = pending_event.text
|
|
|
|
|
elif result.get("interrupt_message"):
|
|
|
|
|
pending = result.get("interrupt_message")
|
|
|
|
|
|
|
|
|
|
if pending:
|
2026-02-21 03:11:11 -08:00
|
|
|
logger.debug("Processing interrupted message: '%s...'", pending[:40])
|
2026-02-11 00:05:30 +00:00
|
|
|
|
|
|
|
|
# Clear the adapter's interrupt event so the next _run_agent call
|
|
|
|
|
# doesn't immediately re-trigger the interrupt before the new agent
|
|
|
|
|
# even makes its first API call (this was causing an infinite loop).
|
|
|
|
|
if adapter and hasattr(adapter, '_active_sessions') and source.chat_id in adapter._active_sessions:
|
|
|
|
|
adapter._active_sessions[source.chat_id].clear()
|
|
|
|
|
|
2026-02-10 16:34:27 -08:00
|
|
|
# Don't send the interrupted response to the user — it's just noise
|
|
|
|
|
# like "Operation interrupted." They already know they sent a new
|
|
|
|
|
# message, so go straight to processing it.
|
2026-02-03 20:10:15 -08:00
|
|
|
|
|
|
|
|
# Now process the pending message with updated history
|
|
|
|
|
updated_history = result.get("messages", history)
|
|
|
|
|
return await self._run_agent(
|
|
|
|
|
message=pending,
|
|
|
|
|
context_prompt=context_prompt,
|
|
|
|
|
history=updated_history,
|
|
|
|
|
source=source,
|
|
|
|
|
session_id=session_id,
|
|
|
|
|
session_key=session_key
|
|
|
|
|
)
|
2026-02-03 14:54:43 -08:00
|
|
|
finally:
|
2026-02-03 20:10:15 -08:00
|
|
|
# Stop progress sender and interrupt monitor
|
2026-02-03 14:54:43 -08:00
|
|
|
if progress_task:
|
|
|
|
|
progress_task.cancel()
|
2026-02-03 20:10:15 -08:00
|
|
|
interrupt_monitor.cancel()
|
2026-02-03 16:15:49 -08:00
|
|
|
|
|
|
|
|
# Clean up tracking
|
|
|
|
|
tracking_task.cancel()
|
|
|
|
|
if session_key and session_key in self._running_agents:
|
|
|
|
|
del self._running_agents[session_key]
|
2026-02-03 20:10:15 -08:00
|
|
|
|
|
|
|
|
# Wait for cancelled tasks
|
|
|
|
|
for task in [progress_task, interrupt_monitor, tracking_task]:
|
|
|
|
|
if task:
|
|
|
|
|
try:
|
|
|
|
|
await task
|
|
|
|
|
except asyncio.CancelledError:
|
|
|
|
|
pass
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
|
2026-02-10 16:01:00 -08:00
|
|
|
async def start_gateway(config: Optional[GatewayConfig] = None) -> bool:
|
2026-02-02 19:01:51 -08:00
|
|
|
"""
|
|
|
|
|
Start the gateway and run until interrupted.
|
|
|
|
|
|
|
|
|
|
This is the main entry point for running the gateway.
|
2026-02-10 16:01:00 -08:00
|
|
|
Returns True if the gateway ran successfully, False if it failed to start.
|
|
|
|
|
A False return causes a non-zero exit code so systemd can auto-restart.
|
2026-02-02 19:01:51 -08:00
|
|
|
"""
|
|
|
|
|
runner = GatewayRunner(config)
|
|
|
|
|
|
|
|
|
|
# Set up signal handlers
|
|
|
|
|
def signal_handler():
|
|
|
|
|
asyncio.create_task(runner.stop())
|
|
|
|
|
|
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
|
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
|
|
|
try:
|
|
|
|
|
loop.add_signal_handler(sig, signal_handler)
|
|
|
|
|
except NotImplementedError:
|
|
|
|
|
# Windows doesn't support add_signal_handler
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Start the gateway
|
|
|
|
|
success = await runner.start()
|
|
|
|
|
if not success:
|
2026-02-10 16:01:00 -08:00
|
|
|
return False
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
# Wait for shutdown
|
|
|
|
|
await runner.wait_for_shutdown()
|
2026-02-10 16:01:00 -08:00
|
|
|
return True
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
"""CLI entry point for the gateway."""
|
|
|
|
|
import argparse
|
|
|
|
|
|
|
|
|
|
parser = argparse.ArgumentParser(description="Hermes Gateway - Multi-platform messaging")
|
|
|
|
|
parser.add_argument("--config", "-c", help="Path to gateway config file")
|
|
|
|
|
parser.add_argument("--verbose", "-v", action="store_true", help="Verbose output")
|
|
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
|
|
config = None
|
|
|
|
|
if args.config:
|
|
|
|
|
import json
|
|
|
|
|
with open(args.config) as f:
|
|
|
|
|
data = json.load(f)
|
|
|
|
|
config = GatewayConfig.from_dict(data)
|
|
|
|
|
|
2026-02-10 16:01:00 -08:00
|
|
|
# Run the gateway - exit with code 1 if no platforms connected,
|
|
|
|
|
# so systemd Restart=on-failure will retry on transient errors (e.g. DNS)
|
|
|
|
|
success = asyncio.run(start_gateway(config))
|
|
|
|
|
if not success:
|
|
|
|
|
sys.exit(1)
|
2026-02-02 19:01:51 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|