Files
hermes-agent/model_tools.py

312 lines
12 KiB
Python
Raw Normal View History

2025-08-09 09:52:25 -07:00
#!/usr/bin/env python3
"""
Model Tools Module
2026-02-21 20:22:33 -08:00
Thin orchestration layer over the tool registry. Each tool file in tools/
self-registers its schema, handler, and metadata via tools.registry.register().
This module triggers discovery (by importing all tool modules), then provides
the public API that run_agent.py, cli.py, batch_runner.py, and the RL
environments consume.
Public API (signatures preserved from the original 2,400-line version):
get_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode) -> list
handle_function_call(function_name, function_args, task_id, user_task) -> str
TOOL_TO_TOOLSET_MAP: dict (for batch_runner.py)
TOOLSET_REQUIREMENTS: dict (for cli.py, doctor.py)
get_all_tool_names() -> list
get_toolset_for_tool(name) -> str
get_available_toolsets() -> dict
check_toolset_requirements() -> dict
check_tool_availability(quiet) -> tuple
2025-08-09 09:52:25 -07:00
"""
import json
import asyncio
import os
2026-02-21 20:22:33 -08:00
import logging
from typing import Dict, Any, List, Optional, Tuple
2025-08-09 09:52:25 -07:00
2026-02-21 20:22:33 -08:00
from tools.registry import registry
2026-02-20 23:23:32 -08:00
from toolsets import resolve_toolset, validate_toolset
2025-08-09 09:52:25 -07:00
2026-02-21 20:22:33 -08:00
logger = logging.getLogger(__name__)
# =============================================================================
2026-02-21 20:22:33 -08:00
# Async Bridging (single source of truth -- used by registry.dispatch too)
# =============================================================================
def _run_async(coro):
"""Run an async coroutine from a sync context.
If the current thread already has a running event loop (e.g., inside
the gateway's async stack or Atropos's event loop), we spin up a
disposable thread so asyncio.run() can create its own loop without
conflicting.
This is the single source of truth for sync->async bridging in tool
handlers. The RL paths (agent_loop.py, tool_context.py) also provide
outer thread-pool wrapping as defense-in-depth, but each handler is
self-protecting via this function.
"""
try:
loop = asyncio.get_running_loop()
except RuntimeError:
loop = None
if loop and loop.is_running():
import concurrent.futures
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
future = pool.submit(asyncio.run, coro)
return future.result(timeout=300)
return asyncio.run(coro)
# =============================================================================
2026-02-21 20:22:33 -08:00
# Tool Discovery (importing each module triggers its registry.register calls)
# =============================================================================
2026-02-21 20:22:33 -08:00
def _discover_tools():
"""Import all tool modules to trigger their registry.register() calls.
Wrapped in a function so import errors in optional tools (e.g., fal_client
not installed) don't prevent the rest from loading.
"""
_modules = [
"tools.web_tools",
"tools.terminal_tool",
"tools.file_tools",
"tools.vision_tools",
"tools.mixture_of_agents_tool",
"tools.image_generation_tool",
"tools.skills_tool",
"tools.skill_manager_tool",
"tools.browser_tool",
"tools.cronjob_tools",
"tools.rl_training_tool",
"tools.tts_tool",
"tools.todo_tool",
"tools.memory_tool",
"tools.session_search_tool",
"tools.clarify_tool",
"tools.code_execution_tool",
"tools.delegate_tool",
"tools.process_registry",
"tools.send_message_tool",
"tools.honcho_tools",
]
2026-02-21 20:22:33 -08:00
import importlib
for mod_name in _modules:
try:
importlib.import_module(mod_name)
except Exception as e:
logger.debug("Could not import %s: %s", mod_name, e)
2026-02-21 20:22:33 -08:00
_discover_tools()
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
2026-02-21 20:22:33 -08:00
# =============================================================================
# Backward-compat constants (built once after discovery)
# =============================================================================
Add background process management with process tool, wait, PTY, and stdin support New process registry and tool for managing long-running background processes across all terminal backends (local, Docker, Singularity, Modal, SSH). Process Registry (tools/process_registry.py): - ProcessSession tracking with rolling 200KB output buffer - spawn_local() with optional PTY via ptyprocess for interactive CLIs - spawn_via_env() for non-local backends (runs inside sandbox, never on host) - Background reader threads per process (Popen stdout or PTY) - wait() with timeout clamping, interrupt support, and transparent limit reporting - JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery - Module-level singleton shared across agent loop, gateway, and RL Process Tool (model_tools.py): - 7 actions: list, poll, log, wait, kill, write, submit - Paired with terminal in all toolsets (CLI, messaging, RL) - Timeout clamping with transparent notes in response Terminal Tool Updates (tools/terminal_tool.py): - Replaced nohup background mode with registry spawn (returns session_id) - Added workdir parameter for per-command working directory - Added check_interval parameter for gateway auto-check watchers - Added pty parameter for interactive CLI tools (Codex, Claude Code) - Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs - Cleanup thread now respects active background processes (won't reap sandbox) Gateway Integration (gateway/run.py, session.py, config.py): - Session reset protection: sessions with active processes exempt from reset - Default idle timeout increased from 2 hours to 24 hours - from_dict fallback aligned to match (was 120, now 1440) - session_key env var propagated to process registry for session mapping - Crash recovery on gateway startup via checkpoint probe - check_interval watcher: asyncio task polls process, delivers updates to platform RL Safety (environments/): - tool_context.py cleanup() kills background processes on episode end - hermes_base_env.py warns when enabled_toolsets is None (loads all tools) - Process tool safe in RL via wait() blocking the agent loop Also: - Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all]) - Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP - Updated AGENTS.md with process management docs and project structure - Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
2026-02-21 20:22:33 -08:00
TOOL_TO_TOOLSET_MAP: Dict[str, str] = registry.get_tool_to_toolset_map()
Add background process management with process tool, wait, PTY, and stdin support New process registry and tool for managing long-running background processes across all terminal backends (local, Docker, Singularity, Modal, SSH). Process Registry (tools/process_registry.py): - ProcessSession tracking with rolling 200KB output buffer - spawn_local() with optional PTY via ptyprocess for interactive CLIs - spawn_via_env() for non-local backends (runs inside sandbox, never on host) - Background reader threads per process (Popen stdout or PTY) - wait() with timeout clamping, interrupt support, and transparent limit reporting - JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery - Module-level singleton shared across agent loop, gateway, and RL Process Tool (model_tools.py): - 7 actions: list, poll, log, wait, kill, write, submit - Paired with terminal in all toolsets (CLI, messaging, RL) - Timeout clamping with transparent notes in response Terminal Tool Updates (tools/terminal_tool.py): - Replaced nohup background mode with registry spawn (returns session_id) - Added workdir parameter for per-command working directory - Added check_interval parameter for gateway auto-check watchers - Added pty parameter for interactive CLI tools (Codex, Claude Code) - Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs - Cleanup thread now respects active background processes (won't reap sandbox) Gateway Integration (gateway/run.py, session.py, config.py): - Session reset protection: sessions with active processes exempt from reset - Default idle timeout increased from 2 hours to 24 hours - from_dict fallback aligned to match (was 120, now 1440) - session_key env var propagated to process registry for session mapping - Crash recovery on gateway startup via checkpoint probe - check_interval watcher: asyncio task polls process, delivers updates to platform RL Safety (environments/): - tool_context.py cleanup() kills background processes on episode end - hermes_base_env.py warns when enabled_toolsets is None (loads all tools) - Process tool safe in RL via wait() blocking the agent loop Also: - Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all]) - Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP - Updated AGENTS.md with process management docs and project structure - Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
2026-02-21 20:22:33 -08:00
TOOLSET_REQUIREMENTS: Dict[str, dict] = registry.get_toolset_requirements()
2025-11-17 01:14:31 -05:00
2026-02-21 20:22:33 -08:00
# Resolved tool names from the last get_tool_definitions() call.
# Used by code_execution_tool to know which tools are available in this session.
_last_resolved_tool_names: List[str] = []
2025-11-17 01:14:31 -05:00
2025-08-09 09:52:25 -07:00
2026-02-21 20:22:33 -08:00
# =============================================================================
# Legacy toolset name mapping (old _tools-suffixed names -> tool name lists)
# =============================================================================
2025-08-09 09:52:25 -07:00
2026-02-21 20:22:33 -08:00
_LEGACY_TOOLSET_MAP = {
"web_tools": ["web_search", "web_extract"],
"terminal_tools": ["terminal"],
"vision_tools": ["vision_analyze"],
"moa_tools": ["mixture_of_agents"],
"image_tools": ["image_generate"],
"skills_tools": ["skills_list", "skill_view", "skill_manage"],
"browser_tools": [
"browser_navigate", "browser_snapshot", "browser_click",
"browser_type", "browser_scroll", "browser_back",
"browser_press", "browser_close", "browser_get_images",
"browser_vision"
],
"cronjob_tools": ["schedule_cronjob", "list_cronjobs", "remove_cronjob"],
"rl_tools": [
"rl_list_environments", "rl_select_environment",
"rl_get_current_config", "rl_edit_config",
"rl_start_training", "rl_check_status",
"rl_stop_training", "rl_get_results",
"rl_list_runs", "rl_test_inference"
],
"file_tools": ["read_file", "write_file", "patch", "search_files"],
"tts_tools": ["text_to_speech"],
}
2026-02-21 20:22:33 -08:00
# =============================================================================
# get_tool_definitions (the main schema provider)
# =============================================================================
2025-08-09 09:52:25 -07:00
def get_tool_definitions(
enabled_toolsets: List[str] = None,
disabled_toolsets: List[str] = None,
quiet_mode: bool = False,
2025-08-09 09:52:25 -07:00
) -> List[Dict[str, Any]]:
"""
Get tool definitions for model API calls with toolset-based filtering.
2026-02-21 20:22:33 -08:00
All tools must be part of a toolset to be accessible.
2025-08-09 09:52:25 -07:00
Args:
2026-02-21 20:22:33 -08:00
enabled_toolsets: Only include tools from these toolsets.
disabled_toolsets: Exclude tools from these toolsets (if enabled_toolsets is None).
quiet_mode: Suppress status prints.
2025-08-09 09:52:25 -07:00
Returns:
2026-02-21 20:22:33 -08:00
Filtered list of OpenAI-format tool definitions.
2025-08-09 09:52:25 -07:00
"""
2026-02-21 20:22:33 -08:00
# Determine which tool names the caller wants
tools_to_include: set = set()
2025-11-17 01:14:31 -05:00
2025-08-09 09:52:25 -07:00
if enabled_toolsets:
for toolset_name in enabled_toolsets:
if validate_toolset(toolset_name):
2026-02-21 20:22:33 -08:00
resolved = resolve_toolset(toolset_name)
tools_to_include.update(resolved)
if not quiet_mode:
print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}")
elif toolset_name in _LEGACY_TOOLSET_MAP:
legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name]
tools_to_include.update(legacy_tools)
if not quiet_mode:
2026-02-21 20:22:33 -08:00
print(f"✅ Enabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
2025-08-09 09:52:25 -07:00
else:
2026-02-21 20:22:33 -08:00
if not quiet_mode:
print(f"⚠️ Unknown toolset: {toolset_name}")
2025-08-09 09:52:25 -07:00
elif disabled_toolsets:
from toolsets import get_all_toolsets
2026-02-21 20:22:33 -08:00
for ts_name in get_all_toolsets():
tools_to_include.update(resolve_toolset(ts_name))
for toolset_name in disabled_toolsets:
if validate_toolset(toolset_name):
2026-02-21 20:22:33 -08:00
resolved = resolve_toolset(toolset_name)
tools_to_include.difference_update(resolved)
if not quiet_mode:
print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}")
elif toolset_name in _LEGACY_TOOLSET_MAP:
legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name]
tools_to_include.difference_update(legacy_tools)
if not quiet_mode:
2026-02-21 20:22:33 -08:00
print(f"🚫 Disabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
else:
2026-02-21 20:22:33 -08:00
if not quiet_mode:
print(f"⚠️ Unknown toolset: {toolset_name}")
else:
from toolsets import get_all_toolsets
2026-02-21 20:22:33 -08:00
for ts_name in get_all_toolsets():
tools_to_include.update(resolve_toolset(ts_name))
# Ask the registry for schemas (only returns tools whose check_fn passes)
filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode)
if not quiet_mode:
if filtered_tools:
tool_names = [t["function"]["name"] for t in filtered_tools]
print(f"🛠️ Final tool selection ({len(filtered_tools)} tools): {', '.join(tool_names)}")
else:
print("🛠️ No tools selected (all filtered out or unavailable)")
2026-02-21 20:22:33 -08:00
global _last_resolved_tool_names
_last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools]
2026-02-21 20:22:33 -08:00
return filtered_tools
2026-02-21 20:22:33 -08:00
# =============================================================================
# handle_function_call (the main dispatcher)
# =============================================================================
2026-02-21 20:22:33 -08:00
# Tools whose execution is intercepted by the agent loop (run_agent.py)
# because they need agent-level state (TodoStore, MemoryStore, etc.).
# The registry still holds their schemas; dispatch just returns a stub error
# so if something slips through, the LLM sees a sensible message.
_AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
2026-02-03 23:41:26 -08:00
2026-02-21 20:22:33 -08:00
def handle_function_call(
function_name: str,
function_args: Dict[str, Any],
task_id: Optional[str] = None,
2026-02-21 20:22:33 -08:00
user_task: Optional[str] = None,
) -> str:
2025-08-09 09:52:25 -07:00
"""
2026-02-21 20:22:33 -08:00
Main function call dispatcher that routes calls to the tool registry.
2025-11-03 17:42:23 -05:00
2025-08-09 09:52:25 -07:00
Args:
2026-02-21 20:22:33 -08:00
function_name: Name of the function to call.
function_args: Arguments for the function.
task_id: Unique identifier for terminal/browser session isolation.
user_task: The user's original task (for browser_snapshot context).
2025-11-03 17:42:23 -05:00
2025-08-09 09:52:25 -07:00
Returns:
2026-02-21 20:22:33 -08:00
Function result as a JSON string.
2025-08-09 09:52:25 -07:00
"""
try:
2026-02-21 20:22:33 -08:00
if function_name in _AGENT_LOOP_TOOLS:
return json.dumps({"error": f"{function_name} must be handled by the agent loop"})
Add background process management with process tool, wait, PTY, and stdin support New process registry and tool for managing long-running background processes across all terminal backends (local, Docker, Singularity, Modal, SSH). Process Registry (tools/process_registry.py): - ProcessSession tracking with rolling 200KB output buffer - spawn_local() with optional PTY via ptyprocess for interactive CLIs - spawn_via_env() for non-local backends (runs inside sandbox, never on host) - Background reader threads per process (Popen stdout or PTY) - wait() with timeout clamping, interrupt support, and transparent limit reporting - JSON checkpoint to ~/.hermes/processes.json for gateway crash recovery - Module-level singleton shared across agent loop, gateway, and RL Process Tool (model_tools.py): - 7 actions: list, poll, log, wait, kill, write, submit - Paired with terminal in all toolsets (CLI, messaging, RL) - Timeout clamping with transparent notes in response Terminal Tool Updates (tools/terminal_tool.py): - Replaced nohup background mode with registry spawn (returns session_id) - Added workdir parameter for per-command working directory - Added check_interval parameter for gateway auto-check watchers - Added pty parameter for interactive CLI tools (Codex, Claude Code) - Updated TERMINAL_TOOL_DESCRIPTION with full background workflow docs - Cleanup thread now respects active background processes (won't reap sandbox) Gateway Integration (gateway/run.py, session.py, config.py): - Session reset protection: sessions with active processes exempt from reset - Default idle timeout increased from 2 hours to 24 hours - from_dict fallback aligned to match (was 120, now 1440) - session_key env var propagated to process registry for session mapping - Crash recovery on gateway startup via checkpoint probe - check_interval watcher: asyncio task polls process, delivers updates to platform RL Safety (environments/): - tool_context.py cleanup() kills background processes on episode end - hermes_base_env.py warns when enabled_toolsets is None (loads all tools) - Process tool safe in RL via wait() blocking the agent loop Also: - Added ptyprocess as optional dependency (in pyproject.toml [pty] extra + [all]) - Fixed pre-existing bug: rl_test_inference missing from TOOL_TO_TOOLSET_MAP - Updated AGENTS.md with process management docs and project structure - Updated README.md terminal section with process management overview
2026-02-17 02:51:31 -08:00
2026-02-21 20:22:33 -08:00
if function_name == "execute_code":
return registry.dispatch(
function_name, function_args,
task_id=task_id,
enabled_tools=_last_resolved_tool_names,
)
2026-02-21 20:22:33 -08:00
return registry.dispatch(
function_name, function_args,
task_id=task_id,
user_task=user_task,
)
2026-02-21 20:22:33 -08:00
except Exception as e:
error_msg = f"Error executing {function_name}: {str(e)}"
logger.error(error_msg)
return json.dumps({"error": error_msg}, ensure_ascii=False)
Add messaging platform enhancements: STT, stickers, Discord UX, Slack, pairing, hooks Major feature additions inspired by OpenClaw/ClawdBot integration analysis: Voice Message Transcription (STT): - Auto-transcribe voice/audio messages via OpenAI Whisper API - Download voice to ~/.hermes/audio_cache/ on Telegram/Discord/WhatsApp - Inject transcript as text so all models can understand voice input - Configurable model (whisper-1, gpt-4o-mini-transcribe, gpt-4o-transcribe) Telegram Sticker Understanding: - Describe static stickers via vision tool with JSON-backed cache - Cache keyed by file_unique_id avoids redundant API calls - Animated/video stickers get emoji-based fallback description Discord Rich UX: - Native slash commands (/ask, /reset, /status, /stop) via app_commands - Button-based exec approvals (Allow Once / Always Allow / Deny) - ExecApprovalView with user authorization and timeout handling Slack Integration: - Full SlackAdapter using slack-bolt with Socket Mode - DMs, channel messages (mention-gated), /hermes slash command - File attachment handling with bot-token-authenticated downloads DM Pairing System: - Code-based user authorization as alternative to static allowlists - 8-char codes from unambiguous alphabet, 1-hour expiry - Rate limiting, lockout after failed attempts, chmod 0600 on data - CLI: hermes pairing list/approve/revoke/clear-pending Event Hook System: - File-based hook discovery from ~/.hermes/hooks/ - HOOK.yaml + handler.py per hook, sync/async handler support - Events: gateway:startup, session:start/reset, agent:start/step/end - Wildcard matching (command:* catches all command events) Cross-Channel Messaging: - send_message agent tool for delivering to any connected platform - Enables cron job delivery and cross-platform notifications Human-Like Response Pacing: - Configurable delays between message chunks (off/natural/custom) - HERMES_HUMAN_DELAY_MODE env var with min/max ms settings Warm Injection Message Style: - Retrofitted image vision messages with friendly kawaii-consistent tone - All new injection messages (STT, stickers, errors) use warm style Also: updated config migration to prompt for optional keys interactively, bumped config version, updated README, AGENTS.md, .env.example, cli-config.yaml.example, install scripts, pyproject.toml, and toolsets.
2026-02-15 21:38:59 -08:00
2026-02-21 20:22:33 -08:00
# =============================================================================
# Backward-compat wrapper functions
# =============================================================================
2026-02-21 20:22:33 -08:00
def get_all_tool_names() -> List[str]:
"""Return all registered tool names."""
return registry.get_all_tool_names()
2026-02-21 20:22:33 -08:00
def get_toolset_for_tool(tool_name: str) -> Optional[str]:
"""Return the toolset a tool belongs to."""
return registry.get_toolset_for_tool(tool_name)
def get_available_toolsets() -> Dict[str, dict]:
"""Return toolset availability info for UI display."""
return registry.get_available_toolsets()
2025-08-09 09:52:25 -07:00
def check_toolset_requirements() -> Dict[str, bool]:
2026-02-21 20:22:33 -08:00
"""Return {toolset: available_bool} for every registered toolset."""
return registry.check_toolset_requirements()
2025-11-17 01:14:31 -05:00
2025-08-09 09:52:25 -07:00
2026-02-21 20:22:33 -08:00
def check_tool_availability(quiet: bool = False) -> Tuple[List[str], List[dict]]:
"""Return (available_toolsets, unavailable_info)."""
return registry.check_tool_availability(quiet=quiet)