From 20c6573e0aa46eff767c2b688ea71b38474f54f3 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Sun, 8 Mar 2026 17:38:05 -0700 Subject: [PATCH 1/4] docs: comprehensive AGENTS.md audit and corrections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Major fixes: - Default model: claude-sonnet-4.6 → claude-opus-4.6 - max_iterations default: 60 → 90 (also fixed in config.py OPTIONAL_ENV_VARS description) - chat() signature: chat(user_message, task_id) → chat(message) - Agent loop: _run_agent_loop() doesn't exist, loop is in run_conversation() - Removed async/await references (agent is entirely synchronous) - KawaiiSpinner location: run_agent.py → agent/display.py - NOUS_API_KEY removed (not used by any tool), replaced with VOICE_TOOLS_OPENAI_KEY - OPENAI_API_KEY for Whisper → VOICE_TOOLS_OPENAI_KEY - check_for_missing_config() → check_config_version() + get_missing_env_vars() - Adding tools: '2 files' → '3 files' (tool + model_tools.py + toolsets.py) - Venv path: venv/ → .venv/ - Trajectory output path: trajectories/*.jsonl → trajectory_samples.jsonl - process_command() location clarified (HermesCLI in cli.py, not commands.py) - REQUIRED_ENV_VARS noted as intentionally empty - _config_version noted as currently at version 5 New content: - Project structure: added 40+ missing files across agent/, hermes_cli/, tools/, gateway/ - Full gateway/ directory listing with all modules and platforms/ - Added honcho_integration/, scripts/, tests/ directories - Added hermes_constants.py, hermes_time.py, trajectory_compressor.py, utils.py - CLI commands table: added 25+ missing commands (model, login, logout, whatsapp, skills subsystem, tools, insights, gateway start/stop/restart/status/uninstall, sessions export/delete/prune/stats, config path/env-path/show) - Gateway slash commands section with all 20+ commands - Platform toolsets: added hermes-cli, hermes-slack, hermes-homeassistant, hermes-gateway - Gateway: added Home Assistant as supported platform --- AGENTS.md | 221 +++++++++++++++++++++++++++++++++++-------- hermes_cli/config.py | 2 +- 2 files changed, 180 insertions(+), 43 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index a7318fd33..d076ac5ea 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -8,7 +8,7 @@ Hermes Agent is an AI agent harness with tool-calling capabilities, interactive **IMPORTANT**: Always use the virtual environment if it exists: ```bash -source venv/bin/activate # Before running any Python commands +source .venv/bin/activate # Before running any Python commands ``` ## Project Structure @@ -21,20 +21,32 @@ hermes-agent/ │ ├── prompt_caching.py # Anthropic prompt caching │ ├── prompt_builder.py # System prompt assembly (identity, skills index, context files) │ ├── display.py # KawaiiSpinner, tool preview formatting -│ └── trajectory.py # Trajectory saving helpers +│ ├── trajectory.py # Trajectory saving helpers +│ ├── skill_commands.py # Skill slash command scanning + invocation (shared CLI/gateway) +│ ├── auxiliary_client.py # Auxiliary LLM client (vision, summarization) +│ ├── insights.py # Usage analytics and session statistics +│ └── redact.py # Sensitive data redaction ├── hermes_cli/ # CLI implementation -│ ├── main.py # Entry point, command dispatcher +│ ├── main.py # Entry point, command dispatcher (all `hermes` subcommands) │ ├── banner.py # Welcome banner, ASCII art, skills summary -│ ├── commands.py # Slash command definitions + autocomplete +│ ├── commands.py # Slash command definitions + SlashCommandCompleter │ ├── callbacks.py # Interactive prompt callbacks (clarify, sudo, approval) │ ├── setup.py # Interactive setup wizard -│ ├── config.py # Config management & migration +│ ├── config.py # Config management, DEFAULT_CONFIG, migration │ ├── status.py # Status display │ ├── doctor.py # Diagnostics -│ ├── gateway.py # Gateway management +│ ├── gateway.py # Gateway management (start/stop/install) │ ├── uninstall.py # Uninstaller │ ├── cron.py # Cron job management -│ └── skills_hub.py # Skills Hub CLI + /skills slash command +│ ├── skills_hub.py # Skills Hub CLI + /skills slash command +│ ├── tools_config.py # `hermes tools` command — per-platform tool toggling +│ ├── pairing.py # DM pairing management CLI +│ ├── auth.py # Provider OAuth authentication +│ ├── models.py # Model selection and listing +│ ├── runtime_provider.py # Runtime provider resolution +│ ├── clipboard.py # Clipboard image paste support +│ ├── colors.py # Terminal color utilities +│ └── codex_models.py # Codex/Responses API model definitions ├── tools/ # Tool implementations │ ├── registry.py # Central tool registry (schemas, handlers, dispatch) │ ├── approval.py # Dangerous command detection + per-session approval @@ -47,22 +59,73 @@ hermes-agent/ │ │ ├── modal.py # Modal cloud execution │ │ └── daytona.py # Daytona cloud sandboxes │ ├── terminal_tool.py # Terminal orchestration (sudo, lifecycle, factory) -│ ├── todo_tool.py # Planning & task management │ ├── process_registry.py # Background process management -│ └── ... # Other tool files +│ ├── todo_tool.py # Planning & task management +│ ├── memory_tool.py # Persistent memory read/write +│ ├── skills_tool.py # Agent-facing skill list/view (progressive disclosure) +│ ├── skill_manager_tool.py # Skill CRUD operations +│ ├── session_search_tool.py # FTS5 session search +│ ├── file_tools.py # File read/write/search/patch tools +│ ├── file_operations.py # File operations helpers +│ ├── web_tools.py # Firecrawl search/extract +│ ├── browser_tool.py # Browserbase browser automation +│ ├── vision_tools.py # Image analysis via auxiliary LLM +│ ├── image_generation_tool.py # FLUX image generation via fal.ai +│ ├── tts_tool.py # Text-to-speech +│ ├── transcription_tools.py # Whisper voice transcription +│ ├── code_execution_tool.py # execute_code sandbox +│ ├── delegate_tool.py # Subagent delegation +│ ├── clarify_tool.py # User clarification prompts +│ ├── send_message_tool.py # Cross-platform message sending +│ ├── cronjob_tools.py # Scheduled task management +│ ├── mcp_tool.py # MCP (Model Context Protocol) client +│ ├── mixture_of_agents_tool.py # Mixture-of-Agents orchestration +│ ├── homeassistant_tool.py # Home Assistant integration +│ ├── honcho_tools.py # Honcho context management +│ ├── rl_training_tool.py # RL training environment tools +│ ├── openrouter_client.py # OpenRouter API helpers +│ ├── patch_parser.py # V4A patch format parser +│ ├── fuzzy_match.py # Multi-strategy fuzzy string matching +│ ├── interrupt.py # Agent interrupt handling +│ ├── debug_helpers.py # Debug/diagnostic helpers +│ ├── skills_guard.py # Security scanner (regex + LLM audit) +│ ├── skills_hub.py # Source adapters for skills marketplace +│ └── skills_sync.py # Skill synchronization ├── gateway/ # Messaging platform adapters -│ ├── platforms/ # Platform-specific adapters (telegram, discord, slack, whatsapp) -│ └── ... +│ ├── run.py # Main gateway loop, slash commands, message dispatch +│ ├── session.py # SessionStore — conversation persistence +│ ├── config.py # Gateway-specific config helpers +│ ├── delivery.py # Message delivery (origin, telegram, discord, etc.) +│ ├── hooks.py # Event hook system +│ ├── pairing.py # DM pairing system (code generation, verification) +│ ├── mirror.py # Message mirroring +│ ├── status.py # Gateway status reporting +│ ├── sticker_cache.py # Telegram sticker description cache +│ ├── channel_directory.py # Channel/chat directory management +│ └── platforms/ # Platform-specific adapters +│ ├── base.py # BasePlatform ABC +│ ├── telegram.py # Telegram bot adapter +│ ├── discord.py # Discord bot adapter +│ ├── slack.py # Slack bot adapter (Socket Mode) +│ ├── whatsapp.py # WhatsApp adapter +│ └── homeassistant.py # Home Assistant adapter ├── cron/ # Scheduler implementation ├── environments/ # RL training environments (Atropos integration) +├── honcho_integration/ # Honcho client & session management ├── skills/ # Bundled skill sources ├── optional-skills/ # Official optional skills (not activated by default) +├── scripts/ # Install scripts, utilities +├── tests/ # Full pytest suite (~2300+ tests) ├── cli.py # Interactive CLI orchestrator (HermesCLI class) ├── hermes_state.py # SessionDB — SQLite session store (schema, titles, FTS5 search) +├── hermes_constants.py # OpenRouter URL constants +├── hermes_time.py # Timezone-aware timestamp utilities ├── run_agent.py # AIAgent class (core conversation loop) ├── model_tools.py # Tool orchestration (thin layer over tools/registry.py) -├── toolsets.py # Tool groupings +├── toolsets.py # Tool groupings and platform toolset definitions ├── toolset_distributions.py # Probability-based tool selection +├── trajectory_compressor.py # Trajectory post-processing +├── utils.py # Shared utilities └── batch_runner.py # Parallel batch processing ``` @@ -99,33 +162,55 @@ The main agent is implemented in `run_agent.py`: class AIAgent: def __init__( self, - model: str = "anthropic/claude-sonnet-4.6", + base_url: str = None, api_key: str = None, - base_url: str = "https://openrouter.ai/api/v1", - max_iterations: int = 60, # Max tool-calling loops + provider: str = None, # Provider identifier (routing hints) + api_mode: str = None, # "chat_completions" or "codex_responses" + model: str = "anthropic/claude-opus-4.6", # OpenRouter format + max_iterations: int = 90, # Max tool-calling loops + tool_delay: float = 1.0, enabled_toolsets: list = None, disabled_toolsets: list = None, + save_trajectories: bool = False, verbose_logging: bool = False, quiet_mode: bool = False, # Suppress progress output + session_id: str = None, tool_progress_callback: callable = None, # Called on each tool use + clarify_callback: callable = None, + step_callback: callable = None, + max_tokens: int = None, + reasoning_config: dict = None, + platform: str = None, # Platform identifier (cli, telegram, etc.) + skip_context_files: bool = False, + skip_memory: bool = False, + session_db = None, + iteration_budget: "IterationBudget" = None, + # ... plus OpenRouter provider routing params ): # Initialize OpenAI client, load tools based on toolsets ... - def chat(self, user_message: str, task_id: str = None) -> str: - # Main entry point - runs the agent loop + def chat(self, message: str) -> str: + # Simple interface — returns just the final response string + ... + + def run_conversation( + self, user_message: str, system_message: str = None, + conversation_history: list = None, task_id: str = None + ) -> dict: + # Full interface — returns dict with final_response + message history ... ``` ### Agent Loop -The core loop in `_run_agent_loop()`: +The core loop is inside `run_conversation()` (there is no separate `_run_agent_loop()` method): ``` 1. Add user message to conversation 2. Call LLM with tools 3. If LLM returns tool calls: - - Execute each tool + - Execute each tool (synchronously) - Add tool results to conversation - Go to step 2 4. If LLM returns text response: @@ -133,7 +218,7 @@ The core loop in `_run_agent_loop()`: ``` ```python -while turns < max_turns: +while api_call_count < self.max_iterations and self.iteration_budget.remaining > 0: response = client.chat.completions.create( model=model, messages=messages, @@ -142,13 +227,15 @@ while turns < max_turns: if response.tool_calls: for tool_call in response.tool_calls: - result = await execute_tool(tool_call) + result = handle_function_call(tool_call.name, tool_call.args, task_id) messages.append(tool_result_message(result)) - turns += 1 + api_call_count += 1 else: return response.content ``` +Note: The agent is **entirely synchronous** — no async/await anywhere. + ### Conversation Management Messages are stored as a list of dicts following OpenAI format: @@ -177,7 +264,7 @@ For models that support chain-of-thought reasoning: The interactive CLI uses: - **Rich** - For the welcome banner and styled panels - **prompt_toolkit** - For fixed input area with history, `patch_stdout`, slash command autocomplete, and floating completion menus -- **KawaiiSpinner** (in run_agent.py) - Animated kawaii faces during API calls; clean `┊` activity feed for tool execution results +- **KawaiiSpinner** (in agent/display.py) - Animated kawaii faces during API calls; clean `┊` activity feed for tool execution results Key components: - `HermesCLI` class - Main CLI controller with commands and conversation loop @@ -213,8 +300,8 @@ Implementation (`agent/skill_commands.py`, shared between CLI and gateway): ### Adding CLI Commands -1. Add to `COMMANDS` dict with description -2. Add handler in `process_command()` method +1. Add to `COMMANDS` dict in `hermes_cli/commands.py` +2. Add handler in `process_command()` method (in `HermesCLI` class, `cli.py`) 3. For persistent settings, use `save_config_value()` to update config --- @@ -227,16 +314,23 @@ The unified `hermes` command provides all functionality: |---------|-------------| | `hermes` | Interactive chat (default) | | `hermes chat -q "..."` | Single query mode | +| `hermes chat -m ` | Chat with a specific model | +| `hermes chat --provider ` | Chat with a specific provider | | `hermes -c` / `hermes --continue` | Resume the most recent session | | `hermes -c "my project"` | Resume a session by name (latest in lineage) | | `hermes --resume ` | Resume a specific session by ID or title | | `hermes -w` / `hermes --worktree` | Start in isolated git worktree (for parallel agents) | +| `hermes model` | Interactive provider and model selection | +| `hermes login ` | OAuth login to inference providers (nous, openai-codex) | +| `hermes logout ` | Clear authentication credentials | | `hermes setup` | Configure API keys and settings | -| `hermes config` | View current configuration | +| `hermes config` / `hermes config show` | View current configuration | | `hermes config edit` | Open config in editor | | `hermes config set KEY VAL` | Set a specific value | | `hermes config check` | Check for missing config | | `hermes config migrate` | Prompt for missing config interactively | +| `hermes config path` | Show config file path | +| `hermes config env-path` | Show .env file path | | `hermes status` | Show configuration status | | `hermes doctor` | Diagnose issues | | `hermes update` | Update to latest (checks for new config) | @@ -244,10 +338,25 @@ The unified `hermes` command provides all functionality: | `hermes gateway` | Start gateway (messaging + cron scheduler) | | `hermes gateway setup` | Configure messaging platforms interactively | | `hermes gateway install` | Install gateway as system service | +| `hermes gateway start/stop/restart` | Manage gateway service | +| `hermes gateway status` | Check gateway service status | +| `hermes gateway uninstall` | Remove gateway service | +| `hermes whatsapp` | WhatsApp setup and QR pairing wizard | +| `hermes tools` | Interactive tool configuration per platform | +| `hermes skills browse/search` | Browse and search skills marketplace | +| `hermes skills install/uninstall` | Install or remove skills | +| `hermes skills list` | List installed skills | +| `hermes skills audit` | Security audit installed skills | +| `hermes skills tap add/remove/list` | Manage custom skill sources | | `hermes sessions list` | List past sessions (title, preview, last active) | | `hermes sessions rename ` | Rename/title a session | +| `hermes sessions export <id>` | Export a session | +| `hermes sessions delete <id>` | Delete a session | +| `hermes sessions prune` | Remove old sessions | +| `hermes sessions stats` | Session statistics | | `hermes cron list` | View scheduled jobs | | `hermes cron status` | Check if cron scheduler is running | +| `hermes insights` | Usage analytics and session statistics | | `hermes version` | Show version info | | `hermes pairing list/approve/revoke` | Manage DM pairing codes | @@ -255,7 +364,7 @@ The unified `hermes` command provides all functionality: ## Messaging Gateway -The gateway connects Hermes to Telegram, Discord, Slack, and WhatsApp. +The gateway connects Hermes to Telegram, Discord, Slack, WhatsApp, and Home Assistant. ### Setup @@ -281,7 +390,7 @@ DISCORD_BOT_TOKEN=MTIz... # From Developer Portal DISCORD_ALLOWED_USERS=123456789012345678 # Comma-separated user IDs # Agent Behavior -HERMES_MAX_ITERATIONS=60 # Max tool-calling iterations +HERMES_MAX_ITERATIONS=90 # Max tool-calling iterations (default: 90) MESSAGING_CWD=/home/myuser # Terminal working directory for messaging # Tool progress is configured in config.yaml (display.tool_progress: off|new|all|verbose) @@ -347,16 +456,43 @@ Modes: - `new`: Only when switching to a different tool (less spam) - `all`: Every single tool call +### Gateway Slash Commands + +The gateway supports these slash commands in messaging chats: +- `/new` - Start a new conversation +- `/reset` - Reset conversation history +- `/retry` - Retry last message +- `/undo` - Remove the last exchange +- `/compress` - Compress conversation context +- `/stop` - Interrupt the running agent +- `/model` - Show/change model +- `/provider` - Show available providers and auth status +- `/personality` - Set a personality +- `/title` - Set or show session title +- `/resume` - Resume a previously-named session +- `/usage` - Show token usage for this session +- `/insights` - Show usage analytics +- `/sethome` - Set this chat as the home channel +- `/reload-mcp` - Reload MCP servers from config +- `/update` - Update Hermes Agent to latest version +- `/help` - Show command list +- `/status` - Show session info +- Plus dynamic `/skill-name` commands (loaded from agent/skill_commands.py) + ### Typing Indicator The gateway keeps the "typing..." indicator active throughout processing, refreshing every 4 seconds. This lets users know the bot is working even during long tool-calling sequences. ### Platform Toolsets: -Each platform has a dedicated toolset in `toolsets.py`: +Each platform has a dedicated toolset in `toolsets.py` (all share the same `_HERMES_CORE_TOOLS` list): +- `hermes-cli`: CLI-specific toolset - `hermes-telegram`: Full tools including terminal (with safety checks) - `hermes-discord`: Full tools including terminal - `hermes-whatsapp`: Full tools including terminal +- `hermes-slack`: Full tools including terminal +- `hermes-homeassistant`: Home Assistant integration tools +- `hermes-gateway`: Meta-toolset including all platform toolsets --- @@ -393,7 +529,7 @@ DEFAULT_CONFIG = { #### For .env variables (API keys/secrets): -1. Add to `REQUIRED_ENV_VARS` or `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` +1. Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` (note: `REQUIRED_ENV_VARS` exists but is intentionally empty — provider setup is handled by the setup wizard) 2. Include metadata for the migration system: ```python @@ -405,6 +541,7 @@ OPTIONAL_ENV_VARS = { "url": "https://where-to-get-it.com/", "tools": ["tools_it_enables"], # What tools need this "password": True, # Mask input + "category": "tool", # One of: provider, tool, messaging, setting }, } ``` @@ -417,11 +554,12 @@ OPTIONAL_ENV_VARS = { ### Config Version Migration -The system uses `_config_version` to detect outdated configs: +The system uses `_config_version` (currently at version 5) to detect outdated configs: -1. `check_for_missing_config()` compares user config to `DEFAULT_CONFIG` -2. `migrate_config()` interactively prompts for missing values -3. Called automatically by `hermes update` and optionally by `hermes setup` +1. `check_config_version()` compares user config version to `DEFAULT_CONFIG` version +2. `get_missing_env_vars()` identifies missing environment variables +3. `migrate_config()` interactively prompts for missing values and handles version-specific migrations (e.g., v3→4: tool progress, v4→5: timezone) +4. Called automatically by `hermes update` and optionally by `hermes setup` --- @@ -433,7 +571,7 @@ API keys are loaded from `~/.hermes/.env`: - `FIRECRAWL_API_URL` - Self-hosted Firecrawl endpoint (optional) - `BROWSERBASE_API_KEY` / `BROWSERBASE_PROJECT_ID` - Browser automation - `FAL_KEY` - Image generation (FLUX model) -- `NOUS_API_KEY` - Vision and Mixture-of-Agents tools +- `VOICE_TOOLS_OPENAI_KEY` - Voice transcription (Whisper STT) and OpenAI TTS Terminal tool configuration (in `~/.hermes/config.yaml`): - `terminal.backend` - Backend: local, docker, singularity, modal, daytona, or ssh @@ -446,10 +584,9 @@ Terminal tool configuration (in `~/.hermes/config.yaml`): - SSH: `TERMINAL_SSH_HOST`, `TERMINAL_SSH_USER`, `TERMINAL_SSH_KEY` in .env Agent behavior (in `~/.hermes/.env`): -- `HERMES_MAX_ITERATIONS` - Max tool-calling iterations (default: 60) +- `HERMES_MAX_ITERATIONS` - Max tool-calling iterations (default: 90) - `MESSAGING_CWD` - Working directory for messaging platforms (default: ~) - `display.tool_progress` in config.yaml - Tool progress: `off`, `new`, `all`, `verbose` -- `OPENAI_API_KEY` - Voice transcription (Whisper STT) - `SLACK_BOT_TOKEN` / `SLACK_APP_TOKEN` - Slack integration (Socket Mode) - `SLACK_ALLOWED_USERS` - Comma-separated Slack user IDs - `HERMES_HUMAN_DELAY_MODE` - Response pacing: off/natural/custom @@ -519,7 +656,7 @@ Files: `tools/process_registry.py` (registry + handler), `tools/terminal_tool.py ## Adding New Tools -Adding a tool requires changes in **2 files** (the tool file and `toolsets.py`): +Adding a tool requires changes in **3 files** (the tool file, `model_tools.py`, and `toolsets.py`): 1. **Create `tools/your_tool.py`** with handler, schema, check function, and registry call: @@ -564,11 +701,11 @@ registry.register( ) ``` -2. **Add to `toolsets.py`**: Add `"example_tool"` to `_HERMES_CORE_TOOLS` if it should be in all platform toolsets, or create a new toolset entry. +2. **Add discovery import** in `model_tools.py`'s `_discover_tools()` list: `"tools.example_tool"`. -3. **Add discovery import** in `model_tools.py`'s `_discover_tools()` list: `"tools.example_tool"`. +3. **Add to `toolsets.py`**: Add `"example_tool"` to `_HERMES_CORE_TOOLS` if it should be in all platform toolsets, or create a new toolset entry. -That's it. The registry handles schema collection, dispatch, availability checking, and error wrapping automatically. No edits to `TOOLSET_REQUIREMENTS`, `handle_function_call()`, `get_all_tool_names()`, or any other data structure. +That's it. The registry handles schema collection, dispatch, availability checking, and error wrapping automatically. No edits to `handle_function_call()`, `get_all_tool_names()`, or any other data structure. **Optional:** Add to `OPTIONAL_ENV_VARS` in `hermes_cli/config.py` for the setup wizard, and to `toolset_distributions.py` for batch processing. @@ -608,7 +745,7 @@ Tool calls use `<tool_call>` XML tags, responses use `<tool_response>` tags, rea ```python agent = AIAgent(save_trajectories=True) agent.chat("Do something") -# Saves to trajectories/*.jsonl in ShareGPT format +# Saves to trajectory_samples.jsonl (or failed_trajectories.jsonl) in ShareGPT format ``` --- diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 0e6f51c1a..52f617f17 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -422,7 +422,7 @@ OPTIONAL_ENV_VARS = { "category": "setting", }, "HERMES_MAX_ITERATIONS": { - "description": "Maximum tool-calling iterations per conversation (default: 60)", + "description": "Maximum tool-calling iterations per conversation (default: 90)", "prompt": "Max iterations", "url": None, "password": False, From 4f0402ed3a516645120f93463ec7bf688db44d3a Mon Sep 17 00:00:00 2001 From: teknium1 <teknium1@gmail.com> Date: Sun, 8 Mar 2026 17:45:38 -0700 Subject: [PATCH 2/4] chore: remove all NOUS_API_KEY references MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NOUS_API_KEY is unused — vision tools use OPENROUTER_API_KEY or Nous Portal OAuth (auth.json), and MoA tools use OPENROUTER_API_KEY. Removed from: - hermes_cli/config.py: api_keys allowlist for config set routing - .env.example: example env file entry and comment - tests/hermes_cli/test_set_config_value.py: parametrize test data - tests/integration/test_web_tools.py: updated comments and log messages to reference 'auxiliary LLM provider' instead of NOUS_API_KEY No HECATE references found in codebase (already cleaned up). --- .env.example | 4 ---- hermes_cli/config.py | 2 +- tests/hermes_cli/test_set_config_value.py | 1 - tests/integration/test_web_tools.py | 8 ++++---- 4 files changed, 5 insertions(+), 10 deletions(-) diff --git a/.env.example b/.env.example index c4c684cde..3cbc375b4 100644 --- a/.env.example +++ b/.env.example @@ -53,10 +53,6 @@ MINIMAX_CN_API_KEY= # Get at: https://firecrawl.dev/ FIRECRAWL_API_KEY= -# Nous Research API Key - Vision analysis and multi-model reasoning -# Get at: https://inference-api.nousresearch.com/ -NOUS_API_KEY= - # FAL.ai API Key - Image generation # Get at: https://fal.ai/ FAL_KEY= diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 52f617f17..e955eae80 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -969,7 +969,7 @@ def set_config_value(key: str, value: str): 'FAL_KEY', 'TELEGRAM_BOT_TOKEN', 'DISCORD_BOT_TOKEN', 'TERMINAL_SSH_HOST', 'TERMINAL_SSH_USER', 'TERMINAL_SSH_KEY', 'SUDO_PASSWORD', 'SLACK_BOT_TOKEN', 'SLACK_APP_TOKEN', - 'GITHUB_TOKEN', 'HONCHO_API_KEY', 'NOUS_API_KEY', 'WANDB_API_KEY', + 'GITHUB_TOKEN', 'HONCHO_API_KEY', 'WANDB_API_KEY', 'TINKER_API_KEY', ] diff --git a/tests/hermes_cli/test_set_config_value.py b/tests/hermes_cli/test_set_config_value.py index 35e885b57..52a9d1a6c 100644 --- a/tests/hermes_cli/test_set_config_value.py +++ b/tests/hermes_cli/test_set_config_value.py @@ -38,7 +38,6 @@ class TestExplicitAllowlist: "OPENROUTER_API_KEY", "OPENAI_API_KEY", "ANTHROPIC_API_KEY", - "NOUS_API_KEY", "WANDB_API_KEY", "TINKER_API_KEY", "HONCHO_API_KEY", diff --git a/tests/integration/test_web_tools.py b/tests/integration/test_web_tools.py index 971d98f2c..cd3de453a 100644 --- a/tests/integration/test_web_tools.py +++ b/tests/integration/test_web_tools.py @@ -12,7 +12,7 @@ Usage: Requirements: - FIRECRAWL_API_KEY environment variable must be set - - NOUS_API_KEY environment variable (optional, for LLM tests) + - An auxiliary LLM provider (OPENROUTER_API_KEY or Nous Portal auth) (optional, for LLM tests) """ import pytest @@ -128,12 +128,12 @@ class WebToolsTester: else: self.log_result("Firecrawl API Key", "passed", "Found") - # Check Nous API key (optional) + # Check auxiliary LLM provider (optional) if not check_auxiliary_model(): - self.log_result("Nous API Key", "skipped", "NOUS_API_KEY not set (LLM tests will be skipped)") + self.log_result("Auxiliary LLM", "skipped", "No auxiliary LLM provider available (LLM tests will be skipped)") self.test_llm = False else: - self.log_result("Nous API Key", "passed", "Found") + self.log_result("Auxiliary LLM", "passed", "Found") # Check debug mode debug_info = get_debug_session_info() From 31b84213e4c715a5668016611e502b7f2c10b6bb Mon Sep 17 00:00:00 2001 From: teknium1 <teknium1@gmail.com> Date: Sun, 8 Mar 2026 19:37:34 -0700 Subject: [PATCH 3/4] docs: add Guides & Tutorials section, restructure sidebar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New documentation pages (1,823 lines): - getting-started/learning-path.md: 3-tier learning path table (beginner/intermediate/advanced) + use-case-based navigation - guides/tips.md: Tips & Best Practices quick-wins collection covering prompting, CLI power user tips, context files, memory, performance/cost, messaging, and security - guides/daily-briefing-bot.md: End-to-end tutorial building an automated daily news briefing with cron + web search + messaging - guides/team-telegram-assistant.md: Full walkthrough setting up a team Telegram bot with BotFather, gateway, DM pairing, and production deployment - guides/python-library.md: Guide to using AIAgent as a Python library — basic usage, multi-turn conversations, toolset config, trajectories, custom prompts, and integration examples (FastAPI, Discord bot, CI/CD) - reference/faq.md: Centralized FAQ (8 questions) + troubleshooting guide (6 categories, 18 specific issues) with problem/cause/solution format Sidebar restructure: - Added 'Guides & Tutorials' as new top-level section - Reorganized flat Features list (17 items) into 5 subcategories: Core Features, Automation, Web & Media, Integrations, Advanced - Added FAQ to Reference section - Updated index.md quick links table Docusaurus build verified clean. --- website/docs/getting-started/learning-path.md | 150 ++++++ website/docs/guides/_category_.json | 6 + website/docs/guides/daily-briefing-bot.md | 263 +++++++++++ website/docs/guides/python-library.md | 340 ++++++++++++++ .../docs/guides/team-telegram-assistant.md | 429 +++++++++++++++++ website/docs/guides/tips.md | 211 +++++++++ website/docs/index.md | 4 +- website/docs/reference/faq.md | 430 ++++++++++++++++++ website/sidebars.ts | 47 +- 9 files changed, 1874 insertions(+), 6 deletions(-) create mode 100644 website/docs/getting-started/learning-path.md create mode 100644 website/docs/guides/_category_.json create mode 100644 website/docs/guides/daily-briefing-bot.md create mode 100644 website/docs/guides/python-library.md create mode 100644 website/docs/guides/team-telegram-assistant.md create mode 100644 website/docs/guides/tips.md create mode 100644 website/docs/reference/faq.md diff --git a/website/docs/getting-started/learning-path.md b/website/docs/getting-started/learning-path.md new file mode 100644 index 000000000..2c08f077e --- /dev/null +++ b/website/docs/getting-started/learning-path.md @@ -0,0 +1,150 @@ +--- +sidebar_position: 3 +title: 'Learning Path' +description: 'Choose your learning path through the Hermes Agent documentation based on your experience level and goals.' +--- + +# Learning Path + +Hermes Agent can do a lot — CLI assistant, Telegram/Discord bot, task automation, RL training, and more. This page helps you figure out where to start and what to read based on your experience level and what you're trying to accomplish. + +:::tip Start Here +If you haven't installed Hermes Agent yet, begin with the [Installation guide](/docs/getting-started/installation) and then run through the [Quickstart](/docs/getting-started/quickstart). Everything below assumes you have a working installation. +::: + +## How to Use This Page + +- **Know your level?** Jump to the [experience-level table](#by-experience-level) and follow the reading order for your tier. +- **Have a specific goal?** Skip to [By Use Case](#by-use-case) and find the scenario that matches. +- **Just browsing?** Check the [Key Features](#key-features-at-a-glance) table for a quick overview of everything Hermes Agent can do. + +## By Experience Level + +| Level | Goal | Recommended Reading | Time Estimate | +|---|---|---|---| +| **Beginner** | Get up and running, have basic conversations, use built-in tools | [Installation](/docs/getting-started/installation) → [Quickstart](/docs/getting-started/quickstart) → [CLI Usage](/docs/user-guide/cli) → [Configuration](/docs/user-guide/configuration) | ~1 hour | +| **Intermediate** | Set up messaging bots, use advanced features like memory, cron jobs, and skills | [Sessions](/docs/user-guide/sessions) → [Messaging](/docs/user-guide/messaging) → [Tools](/docs/user-guide/features/tools) → [Skills](/docs/user-guide/features/skills) → [Memory](/docs/user-guide/features/memory) → [Cron](/docs/user-guide/features/cron) | ~2–3 hours | +| **Advanced** | Build custom tools, create skills, train models with RL, contribute to the project | [Architecture](/docs/developer-guide/architecture) → [Adding Tools](/docs/developer-guide/adding-tools) → [Creating Skills](/docs/developer-guide/creating-skills) → [RL Training](/docs/user-guide/features/rl-training) → [Contributing](/docs/developer-guide/contributing) | ~4–6 hours | + +## By Use Case + +Pick the scenario that matches what you want to do. Each one links you to the relevant docs in the order you should read them. + +### "I want a CLI coding assistant" + +Use Hermes Agent as an interactive terminal assistant for writing, reviewing, and running code. + +1. [Installation](/docs/getting-started/installation) +2. [Quickstart](/docs/getting-started/quickstart) +3. [CLI Usage](/docs/user-guide/cli) +4. [Code Execution](/docs/user-guide/features/code-execution) +5. [Context Files](/docs/user-guide/features/context-files) +6. [Tips & Tricks](/docs/guides/tips) + +:::tip +Pass files directly into your conversation with context files. Hermes Agent can read, edit, and run code in your projects. +::: + +### "I want a Telegram/Discord bot" + +Deploy Hermes Agent as a bot on your favorite messaging platform. + +1. [Installation](/docs/getting-started/installation) +2. [Configuration](/docs/user-guide/configuration) +3. [Messaging Overview](/docs/user-guide/messaging) +4. [Telegram Setup](/docs/user-guide/messaging/telegram) +5. [Discord Setup](/docs/user-guide/messaging/discord) +6. [Security](/docs/user-guide/security) + +For full project examples, see: +- [Daily Briefing Bot](/docs/guides/daily-briefing-bot) +- [Team Telegram Assistant](/docs/guides/team-telegram-assistant) + +### "I want to automate tasks" + +Schedule recurring tasks, run batch jobs, or chain agent actions together. + +1. [Quickstart](/docs/getting-started/quickstart) +2. [Cron Scheduling](/docs/user-guide/features/cron) +3. [Batch Processing](/docs/user-guide/features/batch-processing) +4. [Delegation](/docs/user-guide/features/delegation) +5. [Hooks](/docs/user-guide/features/hooks) + +:::tip +Cron jobs let Hermes Agent run tasks on a schedule — daily summaries, periodic checks, automated reports — without you being present. +::: + +### "I want to build custom tools/skills" + +Extend Hermes Agent with your own tools and reusable skill packages. + +1. [Tools Overview](/docs/user-guide/features/tools) +2. [Skills Overview](/docs/user-guide/features/skills) +3. [MCP (Model Context Protocol)](/docs/user-guide/features/mcp) +4. [Architecture](/docs/developer-guide/architecture) +5. [Adding Tools](/docs/developer-guide/adding-tools) +6. [Creating Skills](/docs/developer-guide/creating-skills) + +:::tip +Tools are individual functions the agent can call. Skills are bundles of tools, prompts, and configuration packaged together. Start with tools, graduate to skills. +::: + +### "I want to train models" + +Use reinforcement learning to fine-tune model behavior with Hermes Agent's built-in RL training pipeline. + +1. [Quickstart](/docs/getting-started/quickstart) +2. [Configuration](/docs/user-guide/configuration) +3. [RL Training](/docs/user-guide/features/rl-training) +4. [Provider Routing](/docs/user-guide/features/provider-routing) +5. [Architecture](/docs/developer-guide/architecture) + +:::tip +RL training works best when you already understand the basics of how Hermes Agent handles conversations and tool calls. Run through the Beginner path first if you're new. +::: + +### "I want to use it as a Python library" + +Integrate Hermes Agent into your own Python applications programmatically. + +1. [Installation](/docs/getting-started/installation) +2. [Quickstart](/docs/getting-started/quickstart) +3. [Python Library Guide](/docs/guides/python-library) +4. [Architecture](/docs/developer-guide/architecture) +5. [Tools](/docs/user-guide/features/tools) +6. [Sessions](/docs/user-guide/sessions) + +## Key Features at a Glance + +Not sure what's available? Here's a quick directory of major features: + +| Feature | What It Does | Link | +|---|---|---| +| **Tools** | Built-in tools the agent can call (file I/O, search, shell, etc.) | [Tools](/docs/user-guide/features/tools) | +| **Skills** | Installable plugin packages that add new capabilities | [Skills](/docs/user-guide/features/skills) | +| **Memory** | Persistent memory across sessions | [Memory](/docs/user-guide/features/memory) | +| **Context Files** | Feed files and directories into conversations | [Context Files](/docs/user-guide/features/context-files) | +| **MCP** | Connect to external tool servers via Model Context Protocol | [MCP](/docs/user-guide/features/mcp) | +| **Cron** | Schedule recurring agent tasks | [Cron](/docs/user-guide/features/cron) | +| **Delegation** | Spawn sub-agents for parallel work | [Delegation](/docs/user-guide/features/delegation) | +| **Code Execution** | Run code in sandboxed environments | [Code Execution](/docs/user-guide/features/code-execution) | +| **Browser** | Web browsing and scraping | [Browser](/docs/user-guide/features/browser) | +| **Hooks** | Event-driven callbacks and middleware | [Hooks](/docs/user-guide/features/hooks) | +| **Batch Processing** | Process multiple inputs in bulk | [Batch Processing](/docs/user-guide/features/batch-processing) | +| **RL Training** | Fine-tune models with reinforcement learning | [RL Training](/docs/user-guide/features/rl-training) | +| **Provider Routing** | Route requests across multiple LLM providers | [Provider Routing](/docs/user-guide/features/provider-routing) | + +## What to Read Next + +Based on where you are right now: + +- **Just finished installing?** → Head to the [Quickstart](/docs/getting-started/quickstart) to run your first conversation. +- **Completed the Quickstart?** → Read [CLI Usage](/docs/user-guide/cli) and [Configuration](/docs/user-guide/configuration) to customize your setup. +- **Comfortable with the basics?** → Explore [Tools](/docs/user-guide/features/tools), [Skills](/docs/user-guide/features/skills), and [Memory](/docs/user-guide/features/memory) to unlock the full power of the agent. +- **Setting up for a team?** → Read [Security](/docs/user-guide/security) and [Sessions](/docs/user-guide/sessions) to understand access control and conversation management. +- **Ready to build?** → Jump into the [Developer Guide](/docs/developer-guide/architecture) to understand the internals and start contributing. +- **Want practical examples?** → Check out the [Guides](/docs/guides/tips) section for real-world projects and tips. + +:::tip +You don't need to read everything. Pick the path that matches your goal, follow the links in order, and you'll be productive quickly. You can always come back to this page to find your next step. +::: diff --git a/website/docs/guides/_category_.json b/website/docs/guides/_category_.json new file mode 100644 index 000000000..6d1d2f0b9 --- /dev/null +++ b/website/docs/guides/_category_.json @@ -0,0 +1,6 @@ +{ + "label": "Guides & Tutorials", + "position": 2, + "collapsible": true, + "collapsed": false +} diff --git a/website/docs/guides/daily-briefing-bot.md b/website/docs/guides/daily-briefing-bot.md new file mode 100644 index 000000000..b6c97e4e8 --- /dev/null +++ b/website/docs/guides/daily-briefing-bot.md @@ -0,0 +1,263 @@ +--- +sidebar_position: 2 +title: "Tutorial: Daily Briefing Bot" +description: "Build an automated daily briefing bot that researches topics, summarizes findings, and delivers them to Telegram or Discord every morning" +--- + +# Tutorial: Build a Daily Briefing Bot + +In this tutorial, you'll build a personal briefing bot that wakes up every morning, researches topics you care about, summarizes the findings, and delivers a concise briefing straight to your Telegram or Discord. + +By the end, you'll have a fully automated workflow combining **web search**, **cron scheduling**, **delegation**, and **messaging delivery** — no code required. + +## What We're Building + +Here's the flow: + +1. **8:00 AM** — The cron scheduler triggers your job +2. **Hermes spins up** a fresh agent session with your prompt +3. **Web search** pulls the latest news on your topics +4. **Summarization** distills it into a clean briefing format +5. **Delivery** sends the briefing to your Telegram or Discord + +The whole thing runs hands-free. You just read your briefing with your morning coffee. + +## Prerequisites + +Before starting, make sure you have: + +- **Hermes Agent installed** — see the [Installation guide](/docs/getting-started/installation) +- **Gateway running** — the gateway daemon handles cron execution: + ```bash + hermes gateway install # Install as system service (recommended) + # or + hermes gateway # Run in foreground + ``` +- **Firecrawl API key** — set `FIRECRAWL_API_KEY` in your environment for web search +- **Messaging configured** (optional but recommended) — [Telegram](/docs/user-guide/messaging/telegram) or Discord set up with a home channel + +:::tip No messaging? No problem +You can still follow this tutorial using `deliver: "local"`. Briefings will be saved to `~/.hermes/cron/output/` and you can read them anytime. +::: + +## Step 1: Test the Workflow Manually + +Before automating anything, let's make sure the briefing works. Start a chat session: + +```bash +hermes +``` + +Then enter this prompt: + +``` +Search for the latest news about AI agents and open source LLMs. +Summarize the top 3 stories in a concise briefing format with links. +``` + +Hermes will search the web, read through results, and produce something like: + +``` +☀️ Your AI Briefing — March 8, 2026 + +1. Qwen 3 Released with 235B Parameters + Alibaba's latest open-weight model matches GPT-4.5 on several + benchmarks while remaining fully open source. + → https://qwenlm.github.io/blog/qwen3/ + +2. LangChain Launches Agent Protocol Standard + A new open standard for agent-to-agent communication gains + adoption from 15 major frameworks in its first week. + → https://blog.langchain.dev/agent-protocol/ + +3. EU AI Act Enforcement Begins for General-Purpose Models + The first compliance deadlines hit, with open source models + receiving exemptions under the 10M parameter threshold. + → https://artificialintelligenceact.eu/updates/ + +--- +3 stories • Sources searched: 8 • Generated by Hermes Agent +``` + +If this works, you're ready to automate it. + +:::tip Iterate on the format +Try different prompts until you get output you love. Add instructions like "use emoji headers" or "keep each summary under 2 sentences." Whatever you settle on goes into the cron job. +::: + +## Step 2: Create the Cron Job + +Now let's schedule this to run automatically every morning. You can do this in two ways. + +### Option A: Natural Language (in chat) + +Just tell Hermes what you want: + +``` +Every morning at 8am, search the web for the latest news about AI agents +and open source LLMs. Summarize the top 3 stories in a concise briefing +with links. Use a friendly, professional tone. Deliver to telegram. +``` + +Hermes will create the cron job for you using the `schedule_cronjob` tool. + +### Option B: CLI Slash Command + +Use the `/cron` command for more control: + +``` +/cron add "0 8 * * *" "Search the web for the latest news about AI agents and open source LLMs. Find at least 5 recent articles from the past 24 hours. Summarize the top 3 most important stories in a concise daily briefing format. For each story include: a clear headline, a 2-sentence summary, and the source URL. Use a friendly, professional tone. Format with emoji bullet points and end with a total story count." +``` + +### The Golden Rule: Self-Contained Prompts + +:::warning Critical concept +Cron jobs run in a **completely fresh session** — no memory of your previous conversations, no context about what you "set up earlier." Your prompt must contain **everything** the agent needs to do the job. +::: + +**Bad prompt:** +``` +Do my usual morning briefing. +``` + +**Good prompt:** +``` +Search the web for the latest news about AI agents and open source LLMs. +Find at least 5 recent articles from the past 24 hours. Summarize the +top 3 most important stories in a concise daily briefing format. For each +story include: a clear headline, a 2-sentence summary, and the source URL. +Use a friendly, professional tone. Format with emoji bullet points. +``` + +The good prompt is specific about **what to search**, **how many articles**, **what format**, and **what tone**. It's everything the agent needs in one shot. + +## Step 3: Customize the Briefing + +Once the basic briefing works, you can get creative. + +### Multi-Topic Briefings + +Cover several areas in one briefing: + +``` +/cron add "0 8 * * *" "Create a morning briefing covering three topics. For each topic, search the web for recent news from the past 24 hours and summarize the top 2 stories with links. + +Topics: +1. AI and machine learning — focus on open source models and agent frameworks +2. Cryptocurrency — focus on Bitcoin, Ethereum, and regulatory news +3. Space exploration — focus on SpaceX, NASA, and commercial space + +Format as a clean briefing with section headers and emoji. End with today's date and a motivational quote." +``` + +### Using Delegation for Parallel Research + +For faster briefings, tell Hermes to delegate each topic to a sub-agent: + +``` +/cron add "0 8 * * *" "Create a morning briefing by delegating research to sub-agents. Delegate three parallel tasks: + +1. Delegate: Search for the top 2 AI/ML news stories from the past 24 hours with links +2. Delegate: Search for the top 2 cryptocurrency news stories from the past 24 hours with links +3. Delegate: Search for the top 2 space exploration news stories from the past 24 hours with links + +Collect all results and combine them into a single clean briefing with section headers, emoji formatting, and source links. Add today's date as a header." +``` + +Each sub-agent searches independently and in parallel, then the main agent combines everything into one polished briefing. See the [Delegation docs](/docs/user-guide/features/delegation) for more on how this works. + +### Weekday-Only Schedule + +Don't need briefings on weekends? Use a cron expression that targets Monday–Friday: + +``` +/cron add "0 8 * * 1-5" "Search for the latest AI and tech news..." +``` + +### Twice-Daily Briefings + +Get a morning overview and an evening recap: + +``` +/cron add "0 8 * * *" "Morning briefing: search for AI news from the past 12 hours..." +/cron add "0 18 * * *" "Evening recap: search for AI news from the past 12 hours..." +``` + +### Adding Personal Context with Memory + +If you have [memory](/docs/user-guide/features/memory) enabled, you can store preferences that persist across sessions. But remember — cron jobs run in fresh sessions without conversational memory. To add personal context, bake it directly into the prompt: + +``` +/cron add "0 8 * * *" "You are creating a briefing for a senior ML engineer who cares about: PyTorch ecosystem, transformer architectures, open-weight models, and AI regulation in the EU. Skip stories about product launches or funding rounds unless they involve open source. + +Search for the latest news on these topics. Summarize the top 3 stories with links. Be concise and technical — this reader doesn't need basic explanations." +``` + +:::tip Tailor the persona +Including details about who the briefing is *for* dramatically improves relevance. Tell the agent your role, interests, and what to skip. +::: + +## Step 4: Manage Your Jobs + +### List All Scheduled Jobs + +In chat: +``` +/cron list +``` + +Or from the terminal: +```bash +hermes cron list +``` + +You'll see output like: + +``` +ID | Name | Schedule | Next Run | Deliver +------------|-------------------|-------------|--------------------|-------- +a1b2c3d4 | Morning Briefing | 0 8 * * * | 2026-03-09 08:00 | telegram +e5f6g7h8 | Evening Recap | 0 18 * * * | 2026-03-08 18:00 | telegram +``` + +### Remove a Job + +In chat: +``` +/cron remove a1b2c3d4 +``` + +Or ask conversationally: +``` +Remove my morning briefing cron job. +``` + +Hermes will use `list_cronjobs` to find it and `remove_cronjob` to delete it. + +### Check Gateway Status + +Make sure the scheduler is actually running: + +```bash +hermes cron status +``` + +If the gateway isn't running, your jobs won't execute. Install it as a system service for reliability: + +```bash +hermes gateway install +``` + +## Going Further + +You've built a working daily briefing bot. Here are some directions to explore next: + +- **[Scheduled Tasks (Cron)](/docs/user-guide/features/cron)** — Full reference for schedule formats, repeat limits, and delivery options +- **[Delegation](/docs/user-guide/features/delegation)** — Deep dive into parallel sub-agent workflows +- **[Messaging Platforms](/docs/user-guide/messaging)** — Set up Telegram, Discord, or other delivery targets +- **[Memory](/docs/user-guide/features/memory)** — Persistent context across sessions +- **[Tips & Best Practices](/docs/guides/tips)** — More prompt engineering advice + +:::tip What else can you schedule? +The briefing bot pattern works for anything: competitor monitoring, GitHub repo summaries, weather forecasts, portfolio tracking, server health checks, or even a daily joke. If you can describe it in a prompt, you can schedule it. +::: diff --git a/website/docs/guides/python-library.md b/website/docs/guides/python-library.md new file mode 100644 index 000000000..5f75f9a0e --- /dev/null +++ b/website/docs/guides/python-library.md @@ -0,0 +1,340 @@ +--- +sidebar_position: 4 +title: "Using Hermes as a Python Library" +description: "Embed AIAgent in your own Python scripts, web apps, or automation pipelines — no CLI required" +--- + +# Using Hermes as a Python Library + +Hermes isn't just a CLI tool. You can import `AIAgent` directly and use it programmatically in your own Python scripts, web applications, or automation pipelines. This guide shows you how. + +--- + +## Installation + +Install Hermes directly from the repository: + +```bash +pip install git+https://github.com/NousResearch/hermes-agent.git +``` + +Or with [uv](https://docs.astral.sh/uv/): + +```bash +uv pip install git+https://github.com/NousResearch/hermes-agent.git +``` + +You can also pin it in your `requirements.txt`: + +```text +hermes-agent @ git+https://github.com/NousResearch/hermes-agent.git +``` + +:::tip +The same environment variables used by the CLI are required when using Hermes as a library. At minimum, set `OPENROUTER_API_KEY` (or `OPENAI_API_KEY` / `ANTHROPIC_API_KEY` if using direct provider access). +::: + +--- + +## Basic Usage + +The simplest way to use Hermes is the `chat()` method — pass a message, get a string back: + +```python +from run_agent import AIAgent + +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, +) +response = agent.chat("What is the capital of France?") +print(response) +``` + +`chat()` handles the full conversation loop internally — tool calls, retries, everything — and returns just the final text response. + +:::warning +Always set `quiet_mode=True` when embedding Hermes in your own code. Without it, the agent prints CLI spinners, progress indicators, and other terminal output that will clutter your application's output. +::: + +--- + +## Full Conversation Control + +For more control over the conversation, use `run_conversation()` directly. It returns a dictionary with the full response, message history, and metadata: + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, +) + +result = agent.run_conversation( + user_message="Search for recent Python 3.13 features", + task_id="my-task-1", +) + +print(result["final_response"]) +print(f"Messages exchanged: {len(result['messages'])}") +``` + +The returned dictionary contains: +- **`final_response`** — The agent's final text reply +- **`messages`** — The complete message history (system, user, assistant, tool calls) +- **`task_id`** — The task identifier used for VM isolation + +You can also pass a custom system message that overrides the ephemeral system prompt for that call: + +```python +result = agent.run_conversation( + user_message="Explain quicksort", + system_message="You are a computer science tutor. Use simple analogies.", +) +``` + +--- + +## Configuring Tools + +Control which toolsets the agent has access to using `enabled_toolsets` or `disabled_toolsets`: + +```python +# Only enable web tools (browsing, search) +agent = AIAgent( + model="anthropic/claude-sonnet-4", + enabled_toolsets=["web"], + quiet_mode=True, +) + +# Enable everything except terminal access +agent = AIAgent( + model="anthropic/claude-sonnet-4", + disabled_toolsets=["terminal"], + quiet_mode=True, +) +``` + +:::tip +Use `enabled_toolsets` when you want a minimal, locked-down agent (e.g., only web search for a research bot). Use `disabled_toolsets` when you want most capabilities but need to restrict specific ones (e.g., no terminal access in a shared environment). +::: + +--- + +## Multi-turn Conversations + +Maintain conversation state across multiple turns by passing the message history back in: + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, +) + +# First turn +result1 = agent.run_conversation("My name is Alice") +history = result1["messages"] + +# Second turn — agent remembers the context +result2 = agent.run_conversation( + "What's my name?", + conversation_history=history, +) +print(result2["final_response"]) # "Your name is Alice." +``` + +The `conversation_history` parameter accepts the `messages` list from a previous result. The agent copies it internally, so your original list is never mutated. + +--- + +## Saving Trajectories + +Enable trajectory saving to capture conversations in ShareGPT format — useful for generating training data or debugging: + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + save_trajectories=True, + quiet_mode=True, +) + +agent.chat("Write a Python function to sort a list") +# Saves to trajectory_samples.jsonl in ShareGPT format +``` + +Each conversation is appended as a single JSONL line, making it easy to collect datasets from automated runs. + +--- + +## Custom System Prompts + +Use `ephemeral_system_prompt` to set a custom system prompt that guides the agent's behavior but is **not** saved to trajectory files (keeping your training data clean): + +```python +agent = AIAgent( + model="anthropic/claude-sonnet-4", + ephemeral_system_prompt="You are a SQL expert. Only answer database questions.", + quiet_mode=True, +) + +response = agent.chat("How do I write a JOIN query?") +print(response) +``` + +This is ideal for building specialized agents — a code reviewer, a documentation writer, a SQL assistant — all using the same underlying tooling. + +--- + +## Batch Processing + +For running many prompts in parallel, Hermes includes `batch_runner.py`. It manages concurrent `AIAgent` instances with proper resource isolation: + +```bash +python batch_runner.py --input prompts.jsonl --output results.jsonl +``` + +Each prompt gets its own `task_id` and isolated environment. If you need custom batch logic, you can build your own using `AIAgent` directly: + +```python +import concurrent.futures +from run_agent import AIAgent + +prompts = [ + "Explain recursion", + "What is a hash table?", + "How does garbage collection work?", +] + +def process_prompt(prompt): + # Create a fresh agent per task for thread safety + agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, + skip_memory=True, + ) + return agent.chat(prompt) + +with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + results = list(executor.map(process_prompt, prompts)) + +for prompt, result in zip(prompts, results): + print(f"Q: {prompt}\nA: {result}\n") +``` + +:::warning +Always create a **new `AIAgent` instance per thread or task**. The agent maintains internal state (conversation history, tool sessions, iteration counters) that is not thread-safe to share. +::: + +--- + +## Integration Examples + +### FastAPI Endpoint + +```python +from fastapi import FastAPI +from pydantic import BaseModel +from run_agent import AIAgent + +app = FastAPI() + +class ChatRequest(BaseModel): + message: str + model: str = "anthropic/claude-sonnet-4" + +@app.post("/chat") +async def chat(request: ChatRequest): + agent = AIAgent( + model=request.model, + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + ) + response = agent.chat(request.message) + return {"response": response} +``` + +### Discord Bot + +```python +import discord +from run_agent import AIAgent + +client = discord.Client(intents=discord.Intents.default()) + +@client.event +async def on_message(message): + if message.author == client.user: + return + if message.content.startswith("!hermes "): + query = message.content[8:] + agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + platform="discord", + ) + response = agent.chat(query) + await message.channel.send(response[:2000]) + +client.run("YOUR_DISCORD_TOKEN") +``` + +### CI/CD Pipeline Step + +```python +#!/usr/bin/env python3 +"""CI step: auto-review a PR diff.""" +import subprocess +from run_agent import AIAgent + +diff = subprocess.check_output(["git", "diff", "main...HEAD"]).decode() + +agent = AIAgent( + model="anthropic/claude-sonnet-4", + quiet_mode=True, + skip_context_files=True, + skip_memory=True, + disabled_toolsets=["terminal", "browser"], +) + +review = agent.chat( + f"Review this PR diff for bugs, security issues, and style problems:\n\n{diff}" +) +print(review) +``` + +--- + +## Key Constructor Parameters + +| Parameter | Type | Default | Description | +|-----------|------|---------|-------------| +| `model` | `str` | `"anthropic/claude-opus-4.6"` | Model in OpenRouter format | +| `quiet_mode` | `bool` | `False` | Suppress CLI output | +| `enabled_toolsets` | `List[str]` | `None` | Whitelist specific toolsets | +| `disabled_toolsets` | `List[str]` | `None` | Blacklist specific toolsets | +| `save_trajectories` | `bool` | `False` | Save conversations to JSONL | +| `ephemeral_system_prompt` | `str` | `None` | Custom system prompt (not saved to trajectories) | +| `max_iterations` | `int` | `90` | Max tool-calling iterations per conversation | +| `skip_context_files` | `bool` | `False` | Skip loading AGENTS.md files | +| `skip_memory` | `bool` | `False` | Disable persistent memory read/write | +| `api_key` | `str` | `None` | API key (falls back to env vars) | +| `base_url` | `str` | `None` | Custom API endpoint URL | +| `platform` | `str` | `None` | Platform hint (`"discord"`, `"telegram"`, etc.) | + +--- + +## Important Notes + +:::tip +- Set **`skip_context_files=True`** if you don't want `AGENTS.md` files from the working directory loaded into the system prompt. +- Set **`skip_memory=True`** to prevent the agent from reading or writing persistent memory — recommended for stateless API endpoints. +- The `platform` parameter (e.g., `"discord"`, `"telegram"`) injects platform-specific formatting hints so the agent adapts its output style. +::: + +:::warning +- **Thread safety**: Create one `AIAgent` per thread or task. Never share an instance across concurrent calls. +- **Resource cleanup**: The agent automatically cleans up resources (terminal sessions, browser instances) when a conversation ends. If you're running in a long-lived process, ensure each conversation completes normally. +- **Iteration limits**: The default `max_iterations=90` is generous. For simple Q&A use cases, consider lowering it (e.g., `max_iterations=10`) to prevent runaway tool-calling loops and control costs. +::: diff --git a/website/docs/guides/team-telegram-assistant.md b/website/docs/guides/team-telegram-assistant.md new file mode 100644 index 000000000..7ab30c335 --- /dev/null +++ b/website/docs/guides/team-telegram-assistant.md @@ -0,0 +1,429 @@ +--- +sidebar_position: 3 +title: "Tutorial: Team Telegram Assistant" +description: "Step-by-step guide to setting up a Telegram bot that your whole team can use for code help, research, system admin, and more" +--- + +# Set Up a Team Telegram Assistant + +This tutorial walks you through setting up a Telegram bot powered by Hermes Agent that multiple team members can use. By the end, your team will have a shared AI assistant they can message for help with code, research, system administration, and anything else — secured with per-user authorization. + +## What We're Building + +A Telegram bot that: + +- **Any authorized team member** can DM for help — code reviews, research, shell commands, debugging +- **Runs on your server** with full tool access — terminal, file editing, web search, code execution +- **Per-user sessions** — each person gets their own conversation context +- **Secure by default** — only approved users can interact, with two authorization methods +- **Scheduled tasks** — daily standups, health checks, and reminders delivered to a team channel + +--- + +## Prerequisites + +Before starting, make sure you have: + +- **Hermes Agent installed** on a server or VPS (not your laptop — the bot needs to stay running). Follow the [installation guide](/getting-started/learning-path) if you haven't yet. +- **A Telegram account** for yourself (the bot owner) +- **An LLM provider configured** — at minimum, an API key for OpenAI, Anthropic, or another supported provider in `~/.hermes/.env` + +:::tip +A $5/month VPS is plenty for running the gateway. Hermes itself is lightweight — the LLM API calls are what cost money, and those happen remotely. +::: + +--- + +## Step 1: Create a Telegram Bot + +Every Telegram bot starts with **@BotFather** — Telegram's official bot for creating bots. + +1. **Open Telegram** and search for `@BotFather`, or go to [t.me/BotFather](https://t.me/BotFather) + +2. **Send `/newbot`** — BotFather will ask you two things: + - **Display name** — what users see (e.g., `Team Hermes Assistant`) + - **Username** — must end in `bot` (e.g., `myteam_hermes_bot`) + +3. **Copy the bot token** — BotFather replies with something like: + ``` + Use this token to access the HTTP API: + 7123456789:AAH1bGciOiJSUzI1NiIsInR5cCI6Ikp... + ``` + Save this token — you'll need it in the next step. + +4. **Set a description** (optional but recommended): + ``` + /setdescription + ``` + Choose your bot, then enter something like: + ``` + Team AI assistant powered by Hermes Agent. DM me for help with code, research, debugging, and more. + ``` + +5. **Set bot commands** (optional — gives users a command menu): + ``` + /setcommands + ``` + Choose your bot, then paste: + ``` + new - Start a fresh conversation + model - Show or change the AI model + status - Show session info + help - Show available commands + stop - Stop the current task + ``` + +:::warning +Keep your bot token secret. Anyone with the token can control the bot. If it leaks, use `/revoke` in BotFather to generate a new one. +::: + +--- + +## Step 2: Configure the Gateway + +You have two options: the interactive setup wizard (recommended) or manual configuration. + +### Option A: Interactive Setup (Recommended) + +```bash +hermes gateway setup +``` + +This walks you through everything with arrow-key selection. Pick **Telegram**, paste your bot token, and enter your user ID when prompted. + +### Option B: Manual Configuration + +Add these lines to `~/.hermes/.env`: + +```bash +# Telegram bot token from BotFather +TELEGRAM_BOT_TOKEN=7123456789:AAH1bGciOiJSUzI1NiIsInR5cCI6Ikp... + +# Your Telegram user ID (numeric) +TELEGRAM_ALLOWED_USERS=123456789 +``` + +### Finding Your User ID + +Your Telegram user ID is a numeric value (not your username). To find it: + +1. Message [@userinfobot](https://t.me/userinfobot) on Telegram +2. It instantly replies with your numeric user ID +3. Copy that number into `TELEGRAM_ALLOWED_USERS` + +:::info +Telegram user IDs are permanent numbers like `123456789`. They're different from your `@username`, which can change. Always use the numeric ID for allowlists. +::: + +--- + +## Step 3: Start the Gateway + +### Quick Test + +Run the gateway in the foreground first to make sure everything works: + +```bash +hermes gateway +``` + +You should see output like: + +``` +[Gateway] Starting Hermes Gateway... +[Gateway] Telegram adapter connected +[Gateway] Cron scheduler started (tick every 60s) +``` + +Open Telegram, find your bot, and send it a message. If it replies, you're in business. Press `Ctrl+C` to stop. + +### Production: Install as a Service + +For a persistent deployment that survives reboots: + +```bash +hermes gateway install +``` + +This creates a **systemd** service (Linux) or **launchd** service (macOS) that runs automatically. + +```bash +# Linux — manage the service +hermes gateway start +hermes gateway stop +hermes gateway status + +# View live logs +journalctl --user -u hermes-gateway -f + +# Keep running after SSH logout +sudo loginctl enable-linger $USER +``` + +```bash +# macOS — manage the service +launchctl start ai.hermes.gateway +launchctl stop ai.hermes.gateway +tail -f ~/.hermes/logs/gateway.log +``` + +### Verify It's Running + +```bash +hermes gateway status +``` + +Then send a test message to your bot on Telegram. You should get a response within a few seconds. + +--- + +## Step 4: Set Up Team Access + +Now let's give your teammates access. There are two approaches. + +### Approach A: Static Allowlist + +Collect each team member's Telegram user ID (have them message [@userinfobot](https://t.me/userinfobot)) and add them as a comma-separated list: + +```bash +# In ~/.hermes/.env +TELEGRAM_ALLOWED_USERS=123456789,987654321,555555555 +``` + +Restart the gateway after changes: + +```bash +hermes gateway stop && hermes gateway start +``` + +### Approach B: DM Pairing (Recommended for Teams) + +DM pairing is more flexible — you don't need to collect user IDs upfront. Here's how it works: + +1. **Teammate DMs the bot** — since they're not on the allowlist, the bot replies with a one-time pairing code: + ``` + 🔐 Pairing code: XKGH5N7P + Send this code to the bot owner for approval. + ``` + +2. **Teammate sends you the code** (via any channel — Slack, email, in person) + +3. **You approve it** on the server: + ```bash + hermes pairing approve telegram XKGH5N7P + ``` + +4. **They're in** — the bot immediately starts responding to their messages + +**Managing paired users:** + +```bash +# See all pending and approved users +hermes pairing list + +# Revoke someone's access +hermes pairing revoke telegram 987654321 + +# Clear expired pending codes +hermes pairing clear-pending +``` + +:::tip +DM pairing is ideal for teams because you don't need to restart the gateway when adding new users. Approvals take effect immediately. +::: + +### Security Considerations + +- **Never set `GATEWAY_ALLOW_ALL_USERS=true`** on a bot with terminal access — anyone who finds your bot could run commands on your server +- Pairing codes expire after **1 hour** and use cryptographic randomness +- Rate limiting prevents brute-force attacks: 1 request per user per 10 minutes, max 3 pending codes per platform +- After 5 failed approval attempts, the platform enters a 1-hour lockout +- All pairing data is stored with `chmod 0600` permissions + +--- + +## Step 5: Configure the Bot + +### Set a Home Channel + +A **home channel** is where the bot delivers cron job results and proactive messages. Without one, scheduled tasks have nowhere to send output. + +**Option 1:** Use the `/sethome` command in any Telegram group or chat where the bot is a member. + +**Option 2:** Set it manually in `~/.hermes/.env`: + +```bash +TELEGRAM_HOME_CHANNEL=-1001234567890 +TELEGRAM_HOME_CHANNEL_NAME="Team Updates" +``` + +To find a channel ID, add [@userinfobot](https://t.me/userinfobot) to the group — it will report the group's chat ID. + +### Configure Tool Progress Display + +Control how much detail the bot shows when using tools. In `~/.hermes/config.yaml`: + +```yaml +display: + tool_progress: new # off | new | all | verbose +``` + +| Mode | What You See | +|------|-------------| +| `off` | Clean responses only — no tool activity | +| `new` | Brief status for each new tool call (recommended for messaging) | +| `all` | Every tool call with details | +| `verbose` | Full tool output including command results | + +Users can also change this per-session with the `/verbose` command in chat. + +### Set Up a Personality with SOUL.md + +Customize how the bot communicates by creating `~/.hermes/SOUL.md`: + +```markdown +# Soul +You are a helpful team assistant. Be concise and technical. +Use code blocks for any code. Skip pleasantries — the team +values directness. When debugging, always ask for error logs +before guessing at solutions. +``` + +### Add Project Context + +If your team works on specific projects, create context files so the bot knows your stack: + +```markdown +<!-- ~/.hermes/AGENTS.md --> +# Team Context +- We use Python 3.12 with FastAPI and SQLAlchemy +- Frontend is React with TypeScript +- CI/CD runs on GitHub Actions +- Production deploys to AWS ECS +- Always suggest writing tests for new code +``` + +:::info +Context files are injected into every session's system prompt. Keep them concise — every character counts against your token budget. +::: + +--- + +## Step 6: Set Up Scheduled Tasks + +With the gateway running, you can schedule recurring tasks that deliver results to your team channel. + +### Daily Standup Summary + +Message the bot on Telegram: + +``` +Every weekday at 9am, check the GitHub repository at +github.com/myorg/myproject for: +1. Pull requests opened/merged in the last 24 hours +2. Issues created or closed +3. Any CI/CD failures on the main branch +Format as a brief standup-style summary. +``` + +The agent creates a cron job automatically and delivers results to the chat where you asked (or the home channel). + +### Server Health Check + +``` +Every 6 hours, check disk usage with 'df -h', memory with 'free -h', +and Docker container status with 'docker ps'. Report anything unusual — +partitions above 80%, containers that have restarted, or high memory usage. +``` + +### Managing Scheduled Tasks + +```bash +# From the CLI +hermes cron list # View all scheduled jobs +hermes cron status # Check if scheduler is running + +# From Telegram chat +/cron list # View jobs +/cron remove <job_id> # Remove a job +``` + +:::warning +Cron job prompts run in completely fresh sessions with no memory of prior conversations. Make sure each prompt contains **all** the context the agent needs — file paths, URLs, server addresses, and clear instructions. +::: + +--- + +## Production Tips + +### Use Docker for Safety + +On a shared team bot, use Docker as the terminal backend so agent commands run in a container instead of on your host: + +```bash +# In ~/.hermes/.env +TERMINAL_BACKEND=docker +TERMINAL_DOCKER_IMAGE=nikolaik/python-nodejs:python3.11-nodejs20 +``` + +Or in `~/.hermes/config.yaml`: + +```yaml +terminal: + backend: docker + container_cpu: 1 + container_memory: 5120 + container_persistent: true +``` + +This way, even if someone asks the bot to run something destructive, your host system is protected. + +### Monitor the Gateway + +```bash +# Check if the gateway is running +hermes gateway status + +# Watch live logs (Linux) +journalctl --user -u hermes-gateway -f + +# Watch live logs (macOS) +tail -f ~/.hermes/logs/gateway.log +``` + +### Keep Hermes Updated + +From Telegram, send `/update` to the bot — it will pull the latest version and restart. Or from the server: + +```bash +hermes update +hermes gateway stop && hermes gateway start +``` + +### Log Locations + +| What | Location | +|------|----------| +| Gateway logs | `journalctl --user -u hermes-gateway` (Linux) or `~/.hermes/logs/gateway.log` (macOS) | +| Cron job output | `~/.hermes/cron/output/{job_id}/{timestamp}.md` | +| Cron job definitions | `~/.hermes/cron/jobs.json` | +| Pairing data | `~/.hermes/pairing/` | +| Session history | `~/.hermes/sessions/` | + +--- + +## Going Further + +You've got a working team Telegram assistant. Here are some next steps: + +- **[Security Guide](/user-guide/security)** — deep dive into authorization, container isolation, and command approval +- **[Messaging Gateway](/user-guide/messaging)** — full reference for gateway architecture, session management, and chat commands +- **[Telegram Setup](/user-guide/messaging/telegram)** — platform-specific details including voice messages and TTS +- **[Scheduled Tasks](/user-guide/features/cron)** — advanced cron scheduling with delivery options and cron expressions +- **[Context Files](/user-guide/features/context-files)** — AGENTS.md, SOUL.md, and .cursorrules for project knowledge +- **[Personality](/user-guide/features/personality)** — built-in personality presets and custom persona definitions +- **Add more platforms** — the same gateway can simultaneously run [Discord](/user-guide/messaging/discord), [Slack](/user-guide/messaging/slack), and [WhatsApp](/user-guide/messaging/whatsapp) + +--- + +*Questions or issues? Open an issue on GitHub — contributions are welcome.* diff --git a/website/docs/guides/tips.md b/website/docs/guides/tips.md new file mode 100644 index 000000000..af4b8fce4 --- /dev/null +++ b/website/docs/guides/tips.md @@ -0,0 +1,211 @@ +--- +sidebar_position: 1 +title: "Tips & Best Practices" +description: "Practical advice to get the most out of Hermes Agent — prompt tips, CLI shortcuts, context files, memory, cost optimization, and security" +--- + +# Tips & Best Practices + +A quick-wins collection of practical tips that make you immediately more effective with Hermes Agent. Each section targets a different aspect — scan the headers and jump to what's relevant. + +--- + +## Getting the Best Results + +### Be Specific About What You Want + +Vague prompts produce vague results. Instead of "fix the code," say "fix the TypeError in `api/handlers.py` on line 47 — the `process_request()` function receives `None` from `parse_body()`." The more context you give, the fewer iterations you need. + +### Provide Context Up Front + +Front-load your request with the relevant details: file paths, error messages, expected behavior. One well-crafted message beats three rounds of clarification. Paste error tracebacks directly — the agent can parse them. + +### Use Context Files for Recurring Instructions + +If you find yourself repeating the same instructions ("use tabs not spaces," "we use pytest," "the API is at `/api/v2`"), put them in an `AGENTS.md` file. The agent reads it automatically every session — zero effort after setup. + +### Let the Agent Use Its Tools + +Don't try to hand-hold every step. Say "find and fix the failing test" rather than "open `tests/test_foo.py`, look at line 42, then..." The agent has file search, terminal access, and code execution — let it explore and iterate. + +### Use Skills for Complex Workflows + +Before writing a long prompt explaining how to do something, check if there's already a skill for it. Type `/skills` to browse available skills, or just invoke one directly like `/axolotl` or `/github-pr-workflow`. + +## CLI Power User Tips + +### Multi-Line Input + +Press **Alt+Enter** (or **Ctrl+J**) to insert a newline without sending. This lets you compose multi-line prompts, paste code blocks, or structure complex requests before hitting Enter to send. + +### Paste Detection + +The CLI auto-detects multi-line pastes. Just paste a code block or error traceback directly — it won't send each line as a separate message. The paste is buffered and sent as one message. + +### Interrupt and Redirect + +Press **Ctrl+C** once to interrupt the agent mid-response. You can then type a new message to redirect it. Double-press Ctrl+C within 2 seconds to force exit. This is invaluable when the agent starts going down the wrong path. + +### Resume Sessions with `-c` + +Forgot something from your last session? Run `hermes -c` to resume exactly where you left off, with full conversation history restored. You can also resume by title: `hermes -r "my research project"`. + +### Clipboard Image Paste + +Press **Ctrl+V** to paste an image from your clipboard directly into the chat. The agent uses vision to analyze screenshots, diagrams, error popups, or UI mockups — no need to save to a file first. + +### Slash Command Autocomplete + +Type `/` and press **Tab** to see all available commands. This includes built-in commands (`/compress`, `/model`, `/title`) and every installed skill. You don't need to memorize anything — Tab completion has you covered. + +:::tip +Use `/verbose` to cycle through tool output display modes: **off → new → all → verbose**. The "all" mode is great for watching what the agent does; "off" is cleanest for simple Q&A. +::: + +## Context Files + +### AGENTS.md: Your Project's Brain + +Create an `AGENTS.md` in your project root with architecture decisions, coding conventions, and project-specific instructions. This is automatically injected into every session, so the agent always knows your project's rules. + +```markdown +# Project Context +- This is a FastAPI backend with SQLAlchemy ORM +- Always use async/await for database operations +- Tests go in tests/ and use pytest-asyncio +- Never commit .env files +``` + +### SOUL.md: Customize Personality + +Want the agent to be more concise? More technical? Place a `SOUL.md` in your project root or `~/.hermes/SOUL.md` for global personality customization. This shapes the agent's tone and communication style. + +```markdown +# Soul +You are a senior backend engineer. Be terse and direct. +Skip explanations unless asked. Prefer one-liners over verbose solutions. +Always consider error handling and edge cases. +``` + +### .cursorrules Compatibility + +Already have a `.cursorrules` or `.cursor/rules/*.mdc` file? Hermes reads those too. No need to duplicate your coding conventions — they're loaded automatically from the working directory. + +### Hierarchical Discovery + +Hermes walks the directory tree and discovers **all** `AGENTS.md` files at every level. In a monorepo, put project-wide conventions at the root and team-specific ones in subdirectories — they're all concatenated together with path headers. + +:::tip +Keep context files focused and concise. Every character counts against your token budget since they're injected into every single message. +::: + +## Memory & Skills + +### Memory vs. Skills: What Goes Where + +**Memory** is for facts: your environment, preferences, project locations, and things the agent has learned about you. **Skills** are for procedures: multi-step workflows, tool-specific instructions, and reusable recipes. Use memory for "what," skills for "how." + +### When to Create Skills + +If you find a task that takes 5+ steps and you'll do it again, ask the agent to create a skill for it. Say "save what you just did as a skill called `deploy-staging`." Next time, just type `/deploy-staging` and the agent loads the full procedure. + +### Managing Memory Capacity + +Memory is intentionally bounded (~2,200 chars for MEMORY.md, ~1,375 chars for USER.md). When it fills up, the agent consolidates entries. You can help by saying "clean up your memory" or "replace the old Python 3.9 note — we're on 3.12 now." + +### Let the Agent Remember + +After a productive session, say "remember this for next time" and the agent will save the key takeaways. You can also be specific: "save to memory that our CI uses GitHub Actions with the `deploy.yml` workflow." + +:::warning +Memory is a frozen snapshot — changes made during a session don't appear in the system prompt until the next session starts. The agent writes to disk immediately, but the prompt cache isn't invalidated mid-session. +::: + +## Performance & Cost + +### Don't Break the Prompt Cache + +Most LLM providers cache the system prompt prefix. If you keep your system prompt stable (same context files, same memory), subsequent messages in a session get **cache hits** that are significantly cheaper. Avoid changing the model or system prompt mid-session. + +### Use /compress Before Hitting Limits + +Long sessions accumulate tokens. When you notice responses slowing down or getting truncated, run `/compress`. This summarizes the conversation history, preserving key context while dramatically reducing token count. Use `/usage` to check where you stand. + +### Delegate for Parallel Work + +Need to research three topics at once? Ask the agent to use `delegate_task` with parallel subtasks. Each subagent runs independently with its own context, and only the final summaries come back — massively reducing your main conversation's token usage. + +### Use execute_code for Batch Operations + +Instead of running terminal commands one at a time, ask the agent to write a script that does everything at once. "Write a Python script to rename all `.jpeg` files to `.jpg` and run it" is cheaper and faster than renaming files individually. + +### Choose the Right Model + +Use `/model` to switch models mid-session. Use a frontier model (Claude Sonnet/Opus, GPT-4o) for complex reasoning and architecture decisions. Switch to a faster model for simple tasks like formatting, renaming, or boilerplate generation. + +:::tip +Run `/usage` periodically to see your token consumption. Run `/insights` for a broader view of usage patterns over the last 30 days. +::: + +## Messaging Tips + +### Set a Home Channel + +Use `/sethome` in your preferred Telegram or Discord chat to designate it as the home channel. Cron job results and scheduled task outputs are delivered here. Without it, the agent has nowhere to send proactive messages. + +### Use /title to Organize Sessions + +Name your sessions with `/title auth-refactor` or `/title research-llm-quantization`. Named sessions are easy to find with `hermes sessions list` and resume with `hermes -r "auth-refactor"`. Unnamed sessions pile up and become impossible to distinguish. + +### DM Pairing for Team Access + +Instead of manually collecting user IDs for allowlists, enable DM pairing. When a teammate DMs the bot, they get a one-time pairing code. You approve it with `hermes pairing approve telegram XKGH5N7P` — simple and secure. + +### Tool Progress Display Modes + +Use `/verbose` to control how much tool activity you see. In messaging platforms, less is usually more — keep it on "new" to see just new tool calls. In the CLI, "all" gives you a satisfying live view of everything the agent does. + +:::tip +On messaging platforms, sessions auto-reset after idle time (default: 120 min) or daily at 4 AM. Adjust per-platform in `~/.hermes/gateway.json` if you need longer sessions. +::: + +## Security + +### Use Docker for Untrusted Code + +When working with untrusted repositories or running unfamiliar code, use Docker or Daytona as your terminal backend. Set `TERMINAL_BACKEND=docker` in your `.env`. Destructive commands inside a container can't harm your host system. + +```bash +# In your .env: +TERMINAL_BACKEND=docker +TERMINAL_DOCKER_IMAGE=hermes-sandbox:latest +``` + +### Review Before Choosing "Always" + +When the agent triggers a dangerous command approval (`rm -rf`, `DROP TABLE`, etc.), you get four options: **once**, **session**, **always**, **deny**. Think carefully before choosing "always" — it permanently allowlists that pattern. Start with "session" until you're comfortable. + +### Command Approval Is Your Safety Net + +Hermes checks every command against a curated list of dangerous patterns before execution. This includes recursive deletes, SQL drops, piping curl to shell, and more. Don't disable this in production — it exists for good reasons. + +:::warning +When running in a container backend (Docker, Singularity, Modal, Daytona), dangerous command checks are **skipped** because the container is the security boundary. Make sure your container images are properly locked down. +::: + +### Use Allowlists for Messaging Bots + +Never set `GATEWAY_ALLOW_ALL_USERS=true` on a bot with terminal access. Always use platform-specific allowlists (`TELEGRAM_ALLOWED_USERS`, `DISCORD_ALLOWED_USERS`) or DM pairing to control who can interact with your agent. + +```bash +# Recommended: explicit allowlists per platform +TELEGRAM_ALLOWED_USERS=123456789,987654321 +DISCORD_ALLOWED_USERS=123456789012345678 + +# Or use cross-platform allowlist +GATEWAY_ALLOWED_USERS=123456789,987654321 +``` + +--- + +*Have a tip that should be on this page? Open an issue or PR — community contributions are welcome.* diff --git a/website/docs/index.md b/website/docs/index.md index e905bd638..a4ea0a8e3 100644 --- a/website/docs/index.md +++ b/website/docs/index.md @@ -25,6 +25,7 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl |---|---| | 🚀 **[Installation](/docs/getting-started/installation)** | Install in 60 seconds on Linux, macOS, or WSL2 | | 📖 **[Quickstart Tutorial](/docs/getting-started/quickstart)** | Your first conversation and key features to try | +| 🗺️ **[Learning Path](/docs/getting-started/learning-path)** | Find the right docs for your experience level | | ⚙️ **[Configuration](/docs/user-guide/configuration)** | Config file, providers, models, and options | | 💬 **[Messaging Gateway](/docs/user-guide/messaging)** | Set up Telegram, Discord, Slack, or WhatsApp | | 🔧 **[Tools & Toolsets](/docs/user-guide/features/tools)** | 40+ built-in tools and how to configure them | @@ -33,8 +34,9 @@ It's not a coding copilot tethered to an IDE or a chatbot wrapper around a singl | 🔌 **[MCP Integration](/docs/user-guide/features/mcp)** | Connect to any MCP server for extended capabilities | | 📄 **[Context Files](/docs/user-guide/features/context-files)** | Project context files that shape every conversation | | 🔒 **[Security](/docs/user-guide/security)** | Command approval, authorization, container isolation | +| 💡 **[Tips & Best Practices](/docs/guides/tips)** | Quick wins to get the most out of Hermes | | 🏗️ **[Architecture](/docs/developer-guide/architecture)** | How it works under the hood | -| 🤝 **[Contributing](/docs/developer-guide/contributing)** | Development setup and PR process | +| ❓ **[FAQ & Troubleshooting](/docs/reference/faq)** | Common questions and solutions | ## Key Features diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md new file mode 100644 index 000000000..a477c5333 --- /dev/null +++ b/website/docs/reference/faq.md @@ -0,0 +1,430 @@ +--- +sidebar_position: 3 +title: "FAQ & Troubleshooting" +description: "Frequently asked questions and solutions to common issues with Hermes Agent" +--- + +# FAQ & Troubleshooting + +Quick answers and fixes for the most common questions and issues. + +--- + +## Frequently Asked Questions + +### What LLM providers work with Hermes? + +Hermes Agent works with any OpenAI-compatible API. Supported providers include: + +- **[OpenRouter](https://openrouter.ai/)** — access hundreds of models through one API key (recommended for flexibility) +- **Nous Portal** — Nous Research's own inference endpoint +- **OpenAI** — GPT-4o, o1, o3, etc. +- **Anthropic** — Claude models (via OpenRouter or compatible proxy) +- **Google** — Gemini models (via OpenRouter or compatible proxy) +- **z.ai / ZhipuAI** — GLM models +- **Kimi / Moonshot AI** — Kimi models +- **MiniMax** — global and China endpoints +- **Local models** — via [Ollama](https://ollama.com/), [vLLM](https://docs.vllm.ai/), [llama.cpp](https://github.com/ggerganov/llama.cpp), [SGLang](https://github.com/sgl-project/sglang), or any OpenAI-compatible server + +Set your provider with `hermes setup` or by editing `~/.hermes/.env`. See the [Environment Variables](./environment-variables.md) reference for all provider keys. + +### Does it work on Windows? + +**Not natively.** Hermes Agent requires a Unix-like environment. On Windows, install [WSL2](https://learn.microsoft.com/en-us/windows/wsl/install) and run Hermes from inside it. The standard install command works perfectly in WSL2: + +```bash +curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash +``` + +### Is my data sent anywhere? + +API calls go **only to the LLM provider you configure** (e.g., OpenRouter, your local Ollama instance). Hermes Agent does not collect telemetry, usage data, or analytics. Your conversations, memory, and skills are stored locally in `~/.hermes/`. + +### Can I use it offline / with local models? + +Yes. Point Hermes at any local OpenAI-compatible server: + +```bash +hermes config set OPENAI_BASE_URL http://localhost:11434/v1 # Ollama +hermes config set OPENAI_API_KEY ollama # Any non-empty value +hermes config set HERMES_MODEL llama3.1 +``` + +This works with Ollama, vLLM, llama.cpp server, SGLang, LocalAI, and others. See the [Configuration guide](../user-guide/configuration.md) for details. + +### How much does it cost? + +Hermes Agent itself is **free and open-source** (MIT license). You pay only for the LLM API usage from your chosen provider. Local models are completely free to run. + +### Can multiple people use one instance? + +Yes. The [messaging gateway](../user-guide/messaging/index.md) lets multiple users interact with the same Hermes Agent instance via Telegram, Discord, Slack, WhatsApp, or Home Assistant. Access is controlled through allowlists (specific user IDs) and DM pairing (first user to message claims access). + +### What's the difference between memory and skills? + +- **Memory** stores **facts** — things the agent knows about you, your projects, and preferences. Memories are retrieved automatically based on relevance. +- **Skills** store **procedures** — step-by-step instructions for how to do things. Skills are recalled when the agent encounters a similar task. + +Both persist across sessions. See [Memory](../user-guide/features/memory.md) and [Skills](../user-guide/features/skills.md) for details. + +### Can I use it in my own Python project? + +Yes. Import the `AIAgent` class and use Hermes programmatically: + +```python +from hermes.agent import AIAgent + +agent = AIAgent(model="openrouter/nous/hermes-3-llama-3.1-70b") +response = await agent.chat("Explain quantum computing briefly") +``` + +See the [Python Library guide](../user-guide/features/code-execution.md) for full API usage. + +--- + +## Troubleshooting + +### Installation Issues + +#### `hermes: command not found` after installation + +**Cause:** Your shell hasn't reloaded the updated PATH. + +**Solution:** +```bash +# Reload your shell profile +source ~/.bashrc # bash +source ~/.zshrc # zsh + +# Or start a new terminal session +``` + +If it still doesn't work, verify the install location: +```bash +which hermes +ls ~/.local/bin/hermes +``` + +:::tip +The installer adds `~/.local/bin` to your PATH. If you use a non-standard shell config, add `export PATH="$HOME/.local/bin:$PATH"` manually. +::: + +#### Python version too old + +**Cause:** Hermes requires Python 3.11 or newer. + +**Solution:** +```bash +python3 --version # Check current version + +# Install a newer Python +sudo apt install python3.12 # Ubuntu/Debian +brew install python@3.12 # macOS +``` + +The installer handles this automatically — if you see this error during manual installation, upgrade Python first. + +#### `uv: command not found` + +**Cause:** The `uv` package manager isn't installed or not in PATH. + +**Solution:** +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +source ~/.bashrc +``` + +#### Permission denied errors during install + +**Cause:** Insufficient permissions to write to the install directory. + +**Solution:** +```bash +# Don't use sudo with the installer — it installs to ~/.local/bin +# If you previously installed with sudo, clean up: +sudo rm /usr/local/bin/hermes +# Then re-run the standard installer +curl -fsSL https://raw.githubusercontent.com/NousResearch/hermes-agent/main/scripts/install.sh | bash +``` + +--- + +### Provider & Model Issues + +#### API key not working + +**Cause:** Key is missing, expired, incorrectly set, or for the wrong provider. + +**Solution:** +```bash +# Check which keys are set +hermes config get OPENROUTER_API_KEY + +# Re-run interactive setup +hermes setup + +# Or set directly +hermes config set OPENROUTER_API_KEY sk-or-v1-xxxxxxxxxxxx +``` + +:::warning +Make sure the key matches the provider. An OpenAI key won't work with OpenRouter and vice versa. Check `~/.hermes/.env` for conflicting entries. +::: + +#### Model not available / model not found + +**Cause:** The model identifier is incorrect or not available on your provider. + +**Solution:** +```bash +# List available models for your provider +hermes models + +# Set a valid model +hermes config set HERMES_MODEL openrouter/nous/hermes-3-llama-3.1-70b + +# Or specify per-session +hermes chat --model openrouter/meta-llama/llama-3.1-70b-instruct +``` + +#### Rate limiting (429 errors) + +**Cause:** You've exceeded your provider's rate limits. + +**Solution:** Wait a moment and retry. For sustained usage, consider: +- Upgrading your provider plan +- Switching to a different model or provider +- Using `hermes chat --provider <alternative>` to route to a different backend + +#### Context length exceeded + +**Cause:** The conversation has grown too long for the model's context window. + +**Solution:** +```bash +# Compress the current session +/compress + +# Or start a fresh session +hermes chat + +# Use a model with a larger context window +hermes chat --model openrouter/google/gemini-2.0-flash-001 +``` + +--- + +### Terminal Issues + +#### Command blocked as dangerous + +**Cause:** Hermes detected a potentially destructive command (e.g., `rm -rf`, `DROP TABLE`). This is a safety feature. + +**Solution:** When prompted, review the command and type `y` to approve it. You can also: +- Ask the agent to use a safer alternative +- See the full list of dangerous patterns in the [Security docs](../user-guide/security.md) + +:::tip +This is working as intended — Hermes never silently runs destructive commands. The approval prompt shows you exactly what will execute. +::: + +#### `sudo` not working via messaging gateway + +**Cause:** The messaging gateway runs without an interactive terminal, so `sudo` cannot prompt for a password. + +**Solution:** +- Avoid `sudo` in messaging — ask the agent to find alternatives +- If you must use `sudo`, configure passwordless sudo for specific commands in `/etc/sudoers` +- Or switch to the terminal interface for administrative tasks: `hermes chat` + +#### Docker backend not connecting + +**Cause:** Docker daemon isn't running or the user lacks permissions. + +**Solution:** +```bash +# Check Docker is running +docker info + +# Add your user to the docker group +sudo usermod -aG docker $USER +newgrp docker + +# Verify +docker run hello-world +``` + +--- + +### Messaging Issues + +#### Bot not responding to messages + +**Cause:** The bot isn't running, isn't authorized, or your user isn't in the allowlist. + +**Solution:** +```bash +# Check if the gateway is running +hermes gateway status + +# Start the gateway +hermes gateway start + +# Check logs for errors +hermes gateway logs +``` + +#### Messages not delivering + +**Cause:** Network issues, bot token expired, or platform webhook misconfiguration. + +**Solution:** +- Verify your bot token is valid with `hermes setup` +- Check gateway logs: `hermes gateway logs` +- For webhook-based platforms (Slack, WhatsApp), ensure your server is publicly accessible + +#### Allowlist confusion — who can talk to the bot? + +**Cause:** Authorization mode determines who gets access. + +**Solution:** + +| Mode | How it works | +|------|-------------| +| **Allowlist** | Only user IDs listed in config can interact | +| **DM pairing** | First user to message in DM claims exclusive access | +| **Open** | Anyone can interact (not recommended for production) | + +Configure in `~/.hermes/config.yaml` under your gateway's settings. See the [Messaging docs](../user-guide/messaging/index.md). + +#### Gateway won't start + +**Cause:** Missing dependencies, port conflicts, or misconfigured tokens. + +**Solution:** +```bash +# Install messaging dependencies +pip install hermes-agent[telegram] # or [discord], [slack], [whatsapp] + +# Check for port conflicts +lsof -i :8080 + +# Verify configuration +hermes config show +``` + +--- + +### Performance Issues + +#### Slow responses + +**Cause:** Large model, distant API server, or heavy system prompt with many tools. + +**Solution:** +- Try a faster/smaller model: `hermes chat --model openrouter/meta-llama/llama-3.1-8b-instruct` +- Reduce active toolsets: `hermes chat -t "terminal"` +- Check your network latency to the provider +- For local models, ensure you have enough GPU VRAM + +#### High token usage + +**Cause:** Long conversations, verbose system prompts, or many tool calls accumulating context. + +**Solution:** +```bash +# Compress the conversation to reduce tokens +/compress + +# Check session token count +/stats +``` + +:::tip +Use `/compress` regularly during long sessions. It summarizes the conversation history and reduces token usage significantly while preserving context. +::: + +#### Session getting too long + +**Cause:** Extended conversations accumulate messages and tool outputs, approaching context limits. + +**Solution:** +```bash +# Compress current session (preserves key context) +/compress + +# Start a new session with a reference to the old one +hermes chat + +# Resume a specific session later if needed +hermes chat --continue +``` + +--- + +### MCP Issues + +#### MCP server not connecting + +**Cause:** Server binary not found, wrong command path, or missing runtime. + +**Solution:** +```bash +# Ensure MCP dependencies are installed +pip install hermes-agent[mcp] + +# For npm-based servers, ensure Node.js is available +node --version +npx --version + +# Test the server manually +npx -y @modelcontextprotocol/server-filesystem /tmp +``` + +Verify your `~/.hermes/config.yaml` MCP configuration: +```yaml +mcp_servers: + filesystem: + command: "npx" + args: ["-y", "@modelcontextprotocol/server-filesystem", "/home/user/docs"] +``` + +#### Tools not showing up from MCP server + +**Cause:** Server started but tool discovery failed, or tools are filtered out. + +**Solution:** +- Check gateway/agent logs for MCP connection errors +- Ensure the server responds to the `tools/list` RPC method +- Restart the agent — MCP tools are discovered at startup + +```bash +# Verify MCP servers are configured +hermes config show | grep -A 5 mcp_servers + +# Restart hermes to re-discover tools +hermes chat +``` + +#### MCP timeout errors + +**Cause:** The MCP server is taking too long to respond, or it crashed during execution. + +**Solution:** +- Increase the timeout in your MCP server config if supported +- Check if the MCP server process is still running +- For remote HTTP MCP servers, check network connectivity + +:::warning +If an MCP server crashes mid-request, Hermes will report a timeout. Check the server's own logs (not just Hermes logs) to diagnose the root cause. +::: + +--- + +## Still Stuck? + +If your issue isn't covered here: + +1. **Search existing issues:** [GitHub Issues](https://github.com/NousResearch/hermes-agent/issues) +2. **Ask the community:** [Nous Research Discord](https://discord.gg/nousresearch) +3. **File a bug report:** Include your OS, Python version (`python3 --version`), Hermes version (`hermes --version`), and the full error message diff --git a/website/sidebars.ts b/website/sidebars.ts index 919647f14..6d767bb1e 100644 --- a/website/sidebars.ts +++ b/website/sidebars.ts @@ -10,6 +10,18 @@ const sidebars: SidebarsConfig = { 'getting-started/quickstart', 'getting-started/installation', 'getting-started/updating', + 'getting-started/learning-path', + ], + }, + { + type: 'category', + label: 'Guides & Tutorials', + collapsed: false, + items: [ + 'guides/tips', + 'guides/daily-briefing-bot', + 'guides/team-telegram-assistant', + 'guides/python-library', ], }, { @@ -35,24 +47,48 @@ const sidebars: SidebarsConfig = { }, { type: 'category', - label: 'Features', + label: 'Core Features', items: [ 'user-guide/features/tools', 'user-guide/features/skills', 'user-guide/features/memory', 'user-guide/features/context-files', 'user-guide/features/personality', - 'user-guide/features/mcp', + ], + }, + { + type: 'category', + label: 'Automation', + items: [ 'user-guide/features/cron', - 'user-guide/features/hooks', 'user-guide/features/delegation', 'user-guide/features/code-execution', + 'user-guide/features/hooks', + ], + }, + { + type: 'category', + label: 'Web & Media', + items: [ 'user-guide/features/browser', - 'user-guide/features/image-generation', 'user-guide/features/vision', + 'user-guide/features/image-generation', 'user-guide/features/tts', - 'user-guide/features/provider-routing', + ], + }, + { + type: 'category', + label: 'Integrations', + items: [ + 'user-guide/features/mcp', 'user-guide/features/honcho', + 'user-guide/features/provider-routing', + ], + }, + { + type: 'category', + label: 'Advanced', + items: [ 'user-guide/features/batch-processing', 'user-guide/features/rl-training', ], @@ -76,6 +112,7 @@ const sidebars: SidebarsConfig = { items: [ 'reference/cli-commands', 'reference/environment-variables', + 'reference/faq', ], }, ], From a9c35f917538cb519671bdd494a3d9cc555021f8 Mon Sep 17 00:00:00 2001 From: teknium1 <teknium1@gmail.com> Date: Sun, 8 Mar 2026 19:51:17 -0700 Subject: [PATCH 4/4] docs: comprehensive rewrite of all messaging platform setup guides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All four platform guides rewritten from thin ~60-line summaries to comprehensive step-by-step setup guides with current (2025-2026) info: telegram.md (74 → 196 lines): - Full BotFather walkthrough with customization commands - Privacy mode section with critical group chat gotcha - Multiple user ID discovery methods - Voice message setup (Whisper STT + TTS bubbles + ffmpeg) - Group chat usage patterns and admin mode - Recent Bot API features (privacy policy requirement, streaming) - Troubleshooting table (6 issues) discord.md (57 → 260 lines): - Complete Developer Portal walkthrough (application, bot, intents) - Detailed Privileged Gateway Intents section with warning about Message Content Intent being #1 failure cause - Invite URL generation via Installation tab (new 2024) and manual - Permission integer calculation (274878286912 recommended) - Developer Mode user ID discovery - Bot behavior documentation (DMs, channels, no-prefix) - Troubleshooting table (6 issues) slack.md (57 → 214 lines): - Warning about classic Slack apps deprecated since March 2025 - Full scope tables (required + optional) with purposes - Socket Mode setup with App-Level Token (xapp-) - Event Subscriptions configuration - User ID discovery via profile - Two-token architecture explained (xoxb- + xapp-) - Troubleshooting table whatsapp.md (77 → 193 lines): - Clarified whatsapp-web.js (not Business API) with ban risk warnings - Linux Chromium dependencies (Debian + Fedora) - Setup wizard QR code scanning workflow - Session persistence with LocalAuth - Second phone number options with cost table - WhatsApp Web protocol update warnings - Troubleshooting table (7 issues) Docusaurus build verified clean. --- website/docs/user-guide/messaging/discord.md | 247 ++++++++++++++++-- website/docs/user-guide/messaging/slack.md | 207 +++++++++++++-- website/docs/user-guide/messaging/telegram.md | 162 ++++++++++-- website/docs/user-guide/messaging/whatsapp.md | 180 ++++++++++--- 4 files changed, 697 insertions(+), 99 deletions(-) diff --git a/website/docs/user-guide/messaging/discord.md b/website/docs/user-guide/messaging/discord.md index 328196ce9..26d1d530b 100644 --- a/website/docs/user-guide/messaging/discord.md +++ b/website/docs/user-guide/messaging/discord.md @@ -6,52 +6,255 @@ description: "Set up Hermes Agent as a Discord bot" # Discord Setup -Connect Hermes Agent to Discord to chat with it in DMs or server channels. +Hermes Agent integrates with Discord as a bot, letting you chat with your AI assistant through direct messages or server channels. The bot receives your messages, processes them through the Hermes Agent pipeline (including tool use, memory, and reasoning), and responds in real time. It supports text, voice messages, file attachments, and slash commands. -## Setup Steps +This guide walks you through the full setup process — from creating your bot on Discord's Developer Portal to sending your first message. -1. **Create a bot:** Go to the [Discord Developer Portal](https://discord.com/developers/applications) -2. **Enable intents:** Bot → Privileged Gateway Intents → enable **Message Content Intent** -3. **Get your user ID:** Enable Developer Mode in Discord settings, right-click your name → Copy ID -4. **Invite to your server:** OAuth2 → URL Generator → scopes: `bot`, `applications.commands` → permissions: Send Messages, Read Message History, Attach Files -5. **Configure:** Run `hermes gateway setup` and select Discord, or add to `~/.hermes/.env` manually: +## Step 1: Create a Discord Application -```bash -DISCORD_BOT_TOKEN=MTIz... -DISCORD_ALLOWED_USERS=YOUR_USER_ID +1. Go to the [Discord Developer Portal](https://discord.com/developers/applications) and sign in with your Discord account. +2. Click **New Application** in the top-right corner. +3. Enter a name for your application (e.g., "Hermes Agent") and accept the Developer Terms of Service. +4. Click **Create**. + +You'll land on the **General Information** page. Note the **Application ID** — you'll need it later to build the invite URL. + +## Step 2: Create the Bot + +1. In the left sidebar, click **Bot**. +2. Discord automatically creates a bot user for your application. You'll see the bot's username, which you can customize. +3. Under **Authorization Flow**: + - Set **Public Bot** to **OFF** — this prevents other people from inviting your bot to their servers. + - Leave **Require OAuth2 Code Grant** set to **OFF**. + +:::tip +You can set a custom avatar and banner for your bot on this page. This is what users will see in Discord. +::: + +## Step 3: Enable Privileged Gateway Intents + +This is the most critical step in the entire setup. Without the correct intents enabled, your bot will connect to Discord but **will not be able to read message content**. + +On the **Bot** page, scroll down to **Privileged Gateway Intents**. You'll see three toggles: + +| Intent | Purpose | Required? | +|--------|---------|-----------| +| **Presence Intent** | See user online/offline status | Optional | +| **Server Members Intent** | Access the member list | Optional | +| **Message Content Intent** | Read the text content of messages | **Required** | + +**Enable Message Content Intent** by toggling it **ON**. Without this, your bot receives message events but the message text is empty — the bot literally cannot see what you typed. + +:::warning[This is the #1 reason Discord bots don't work] +If your bot is online but never responds to messages, the **Message Content Intent** is almost certainly disabled. Go back to the [Developer Portal](https://discord.com/developers/applications), select your application → Bot → Privileged Gateway Intents, and make sure **Message Content Intent** is toggled ON. Click **Save Changes**. +::: + +**Regarding server count:** +- If your bot is in **fewer than 100 servers**, you can simply toggle intents on and off freely. +- If your bot is in **100 or more servers**, Discord requires you to submit a verification application to use privileged intents. For personal use, this is not a concern. + +Click **Save Changes** at the bottom of the page. + +## Step 4: Get the Bot Token + +The bot token is the credential Hermes Agent uses to log in as your bot. Still on the **Bot** page: + +1. Under the **Token** section, click **Reset Token**. +2. If you have two-factor authentication enabled on your Discord account, enter your 2FA code. +3. Discord will display your new token. **Copy it immediately.** + +:::warning[Token shown only once] +The token is only displayed once. If you lose it, you'll need to reset it and generate a new one. Never share your token publicly or commit it to Git — anyone with this token has full control of your bot. +::: + +Store the token somewhere safe (a password manager, for example). You'll need it in Step 8. + +## Step 5: Generate the Invite URL + +You need an OAuth2 URL to invite the bot to your server. There are two ways to do this: + +### Option A: Using the Installation Tab (Recommended) + +1. In the left sidebar, click **Installation**. +2. Under **Installation Contexts**, enable **Guild Install**. +3. For **Install Link**, select **Discord Provided Link**. +4. Under **Default Install Settings** for Guild Install: + - **Scopes**: select `bot` and `applications.commands` + - **Permissions**: select the permissions listed below. + +### Option B: Manual URL + +You can construct the invite URL directly using this format: + +``` +https://discord.com/oauth2/authorize?client_id=YOUR_APP_ID&scope=bot+applications.commands&permissions=274878286912 ``` -6. **Start the gateway:** +Replace `YOUR_APP_ID` with the Application ID from Step 1. + +### Required Permissions + +These are the minimum permissions your bot needs: + +- **View Channels** — see the channels it has access to +- **Send Messages** — respond to your messages +- **Embed Links** — format rich responses +- **Attach Files** — send images, audio, and file outputs +- **Read Message History** — maintain conversation context + +### Recommended Additional Permissions + +- **Send Messages in Threads** — respond in thread conversations +- **Add Reactions** — react to messages for acknowledgment + +### Permission Integers + +| Level | Permissions Integer | What's Included | +|-------|-------------------|-----------------| +| Minimal | `117760` | View Channels, Send Messages, Read Message History, Attach Files | +| Recommended | `274878286912` | All of the above plus Embed Links, Send Messages in Threads, Add Reactions | + +## Step 6: Invite to Your Server + +1. Open the invite URL in your browser (from the Installation tab or the manual URL you constructed). +2. In the **Add to Server** dropdown, select your server. +3. Click **Continue**, then **Authorize**. +4. Complete the CAPTCHA if prompted. + +:::info +You need the **Manage Server** permission on the Discord server to invite a bot. If you don't see your server in the dropdown, ask a server admin to use the invite link instead. +::: + +After authorizing, the bot will appear in your server's member list (it will show as offline until you start the Hermes gateway). + +## Step 7: Find Your Discord User ID + +Hermes Agent uses your Discord User ID to control who can interact with the bot. To find it: + +1. Open Discord (desktop or web app). +2. Go to **Settings** → **Advanced** → toggle **Developer Mode** to **ON**. +3. Close settings. +4. Right-click your own username (in a message, the member list, or your profile) → **Copy User ID**. + +Your User ID is a long number like `284102345871466496`. + +:::tip +Developer Mode also lets you copy **Channel IDs** and **Server IDs** the same way — right-click the channel or server name and select Copy ID. You'll need a Channel ID if you want to set a home channel manually. +::: + +## Step 8: Configure Hermes Agent + +### Option A: Interactive Setup (Recommended) + +Run the guided setup command: + +```bash +hermes gateway setup +``` + +Select **Discord** when prompted, then paste your bot token and user ID when asked. + +### Option B: Manual Configuration + +Add the following to your `~/.hermes/.env` file: + +```bash +# Required +DISCORD_BOT_TOKEN=your-bot-token-from-developer-portal +DISCORD_ALLOWED_USERS=284102345871466496 + +# Multiple allowed users (comma-separated) +# DISCORD_ALLOWED_USERS=284102345871466496,198765432109876543 +``` + +### Start the Gateway + +Once configured, start the Discord gateway: ```bash hermes gateway ``` -## Optional: Home Channel +The bot should come online in Discord within a few seconds. Send it a message — either a DM or in a channel it can see — to test. -Set a default channel for cron job delivery: +:::tip +You can run `hermes gateway` in the background or as a systemd service for persistent operation. See the deployment docs for details. +::: + +## Home Channel + +You can designate a "home channel" where the bot sends proactive messages (such as cron job output, reminders, and notifications). There are two ways to set it: + +### Using the Slash Command + +Type `/sethome` in any Discord channel where the bot is present. That channel becomes the home channel. + +### Manual Configuration + +Add these to your `~/.hermes/.env`: ```bash DISCORD_HOME_CHANNEL=123456789012345678 DISCORD_HOME_CHANNEL_NAME="#bot-updates" ``` -Or use `/sethome` in any Discord channel. +Replace the ID with the actual channel ID (right-click → Copy Channel ID with Developer Mode on). -## Required Bot Permissions +## Bot Behavior -When generating the invite URL, make sure to include: - -- **Send Messages** — bot needs to reply -- **Read Message History** — for context -- **Attach Files** — for audio, images, and file outputs +- **Server channels**: The bot responds to all messages from allowed users in channels it can access. It does **not** require a mention or prefix — any message from an allowed user is treated as a prompt. +- **Direct messages**: DMs always work, even without the Message Content Intent enabled (Discord exempts DMs from this requirement). However, you should still enable the intent for server channel support. +- **Conversations**: Each channel or DM maintains its own conversation context. ## Voice Messages -Voice messages on Discord are automatically transcribed (requires `VOICE_TOOLS_OPENAI_KEY`). TTS audio is sent as MP3 file attachments. +Hermes Agent supports Discord voice messages: + +- **Incoming voice messages** are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY` to be set in your environment). +- **Text-to-speech**: When TTS is enabled, the bot can send spoken responses as MP3 file attachments. + +## Troubleshooting + +### Bot is online but not responding to messages + +**Cause**: Message Content Intent is disabled. + +**Fix**: Go to [Developer Portal](https://discord.com/developers/applications) → your app → Bot → Privileged Gateway Intents → enable **Message Content Intent** → Save Changes. Restart the gateway. + +### "Disallowed Intents" error on startup + +**Cause**: Your code requests intents that aren't enabled in the Developer Portal. + +**Fix**: Enable all three Privileged Gateway Intents (Presence, Server Members, Message Content) in the Bot settings, then restart. + +### Bot can't see messages in a specific channel + +**Cause**: The bot's role doesn't have permission to view that channel. + +**Fix**: In Discord, go to the channel's settings → Permissions → add the bot's role with **View Channel** and **Read Message History** enabled. + +### 403 Forbidden errors + +**Cause**: The bot is missing required permissions. + +**Fix**: Re-invite the bot with the correct permissions using the URL from Step 5, or manually adjust the bot's role permissions in Server Settings → Roles. + +### Bot is offline + +**Cause**: The Hermes gateway isn't running, or the token is incorrect. + +**Fix**: Check that `hermes gateway` is running. Verify `DISCORD_BOT_TOKEN` in your `.env` file. If you recently reset the token, update it. + +### "User not allowed" / Bot ignores you + +**Cause**: Your User ID isn't in `DISCORD_ALLOWED_USERS`. + +**Fix**: Add your User ID to `DISCORD_ALLOWED_USERS` in `~/.hermes/.env` and restart the gateway. ## Security :::warning -Always set `DISCORD_ALLOWED_USERS` to restrict who can use the bot. Without it, the gateway denies all users by default. +Always set `DISCORD_ALLOWED_USERS` to restrict who can interact with the bot. Without it, the gateway denies all users by default as a safety measure. Only add User IDs of people you trust — authorized users have full access to the agent's capabilities, including tool use and system access. ::: + +For more information on securing your Hermes Agent deployment, see the [Security Guide](../security.md). diff --git a/website/docs/user-guide/messaging/slack.md b/website/docs/user-guide/messaging/slack.md index b008d56b3..52dde5f6a 100644 --- a/website/docs/user-guide/messaging/slack.md +++ b/website/docs/user-guide/messaging/slack.md @@ -1,57 +1,214 @@ --- sidebar_position: 4 title: "Slack" -description: "Set up Hermes Agent as a Slack bot" +description: "Set up Hermes Agent as a Slack bot using Socket Mode" --- # Slack Setup -Connect Hermes Agent to Slack using Socket Mode for real-time communication. +Connect Hermes Agent to Slack as a bot using Socket Mode. Socket Mode uses WebSockets instead of +public HTTP endpoints, so your Hermes instance doesn't need to be publicly accessible — it works +behind firewalls, on your laptop, or on a private server. -## Setup Steps +:::warning Classic Slack Apps Deprecated +Classic Slack apps (using RTM API) were **fully deprecated in March 2025**. Hermes uses the modern +Bolt SDK with Socket Mode. If you have an old classic app, you must create a new one following +the steps below. +::: -1. **Create an app:** Go to [Slack API](https://api.slack.com/apps), create a new app -2. **Enable Socket Mode:** In app settings → Socket Mode → Enable -3. **Get tokens:** - - Bot Token (`xoxb-...`): OAuth & Permissions → Install to Workspace - - App Token (`xapp-...`): Basic Information → App-Level Tokens → Generate (with `connections:write` scope) -4. **Configure:** Run `hermes gateway setup` and select Slack, or add to `~/.hermes/.env` manually: +## Overview + +| Component | Value | +|-----------|-------| +| **Library** | `@slack/bolt` (Socket Mode) | +| **Connection** | WebSocket — no public URL required | +| **Auth tokens needed** | Bot Token (`xoxb-`) + App-Level Token (`xapp-`) | +| **User identification** | Slack Member IDs (e.g., `U01ABC2DEF3`) | + +--- + +## Step 1: Create a Slack App + +1. Go to [https://api.slack.com/apps](https://api.slack.com/apps) +2. Click **Create New App** +3. Choose **From scratch** +4. Enter an app name (e.g., "Hermes Agent") and select your workspace +5. Click **Create App** + +You'll land on the app's **Basic Information** page. + +--- + +## Step 2: Configure Bot Token Scopes + +Navigate to **Features → OAuth & Permissions** in the sidebar. Scroll to **Scopes → Bot Token Scopes** and add the following: + +| Scope | Purpose | +|-------|---------| +| `chat:write` | Send messages as the bot | +| `app_mentions:read` | Respond when @mentioned in channels | +| `channels:history` | Read messages in public channels the bot is in | +| `channels:read` | List and get info about public channels | +| `im:history` | Read direct message history | +| `im:read` | View basic DM info | +| `im:write` | Open and manage DMs | +| `users:read` | Look up user information | + +**Optional scopes:** + +| Scope | Purpose | +|-------|---------| +| `groups:history` | Read messages in private channels the bot is invited to | +| `files:write` | Upload files (audio, images) | + +--- + +## Step 3: Enable Socket Mode + +Socket Mode lets the bot connect via WebSocket instead of requiring a public URL. + +1. In the sidebar, go to **Settings → Socket Mode** +2. Toggle **Enable Socket Mode** to ON +3. You'll be prompted to create an **App-Level Token**: + - Name it something like `hermes-socket` (the name doesn't matter) + - Add the **`connections:write`** scope + - Click **Generate** +4. **Copy the token** — it starts with `xapp-`. This is your `SLACK_APP_TOKEN` + +:::tip +You can always find or regenerate app-level tokens under **Settings → Basic Information → App-Level Tokens**. +::: + +--- + +## Step 4: Subscribe to Events + +1. In the sidebar, go to **Features → Event Subscriptions** +2. Toggle **Enable Events** to ON +3. Expand **Subscribe to bot events** and add: + +| Event | Purpose | +|-------|---------| +| `app_mention` | Bot responds when @mentioned in any channel | +| `message.im` | Bot responds to direct messages | + +**Optional event:** + +| Event | Purpose | +|-------|---------| +| `message.channels` | Bot sees all messages in public channels it's added to | + +4. Click **Save Changes** at the bottom of the page + +--- + +## Step 5: Install App to Workspace + +1. In the sidebar, go to **Settings → Install App** +2. Click **Install to Workspace** +3. Review the permissions and click **Allow** +4. After authorization, you'll see a **Bot User OAuth Token** starting with `xoxb-` +5. **Copy this token** — this is your `SLACK_BOT_TOKEN` + +:::tip +If you change scopes later, you'll need to **reinstall the app** for the new scopes to take effect. +The Install App page will show a banner prompting you to do so. +::: + +--- + +## Step 6: Find User IDs for the Allowlist + +Hermes uses Slack **Member IDs** (not usernames or display names) for the allowlist. + +To find a Member ID: + +1. In Slack, click on the user's name or avatar +2. Click **View full profile** +3. Click the **⋮** (more) button +4. Select **Copy member ID** + +Member IDs look like `U01ABC2DEF3`. You need your own Member ID at minimum. + +--- + +## Step 7: Configure Hermes + +Add the following to your `~/.hermes/.env` file: ```bash -SLACK_BOT_TOKEN=xoxb-... -SLACK_APP_TOKEN=xapp-... -SLACK_ALLOWED_USERS=U01234ABCDE # Comma-separated Slack user IDs +# Required +SLACK_BOT_TOKEN=xoxb-your-bot-token-here +SLACK_APP_TOKEN=xapp-your-app-level-token-here +SLACK_ALLOWED_USERS=U01ABC2DEF3 # Comma-separated Member IDs + +# Optional +SLACK_HOME_CHANNEL=C01234567890 # Default channel for cron/scheduled messages ``` -5. **Start the gateway:** +Or run the interactive setup: ```bash -hermes gateway +hermes gateway setup # Select Slack when prompted ``` -## Optional: Home Channel +Then start the gateway: -Set a default channel for cron job delivery: +```bash +hermes gateway # Foreground +hermes gateway install # Install as a system service +``` + +--- + +## Home Channel + +Set `SLACK_HOME_CHANNEL` to a channel ID where Hermes will deliver scheduled messages, +cron job results, and other proactive notifications. To find a channel ID: + +1. Right-click the channel name in Slack +2. Click **View channel details** +3. Scroll to the bottom — the Channel ID is shown there ```bash SLACK_HOME_CHANNEL=C01234567890 ``` -## Required Bot Scopes +Make sure the bot has been **invited to the channel** (`/invite @Hermes Agent`). -Make sure your Slack app has these OAuth scopes: - -- `chat:write` — Send messages -- `channels:history` — Read channel messages -- `im:history` — Read DM messages -- `files:write` — Upload files (audio, images) +--- ## Voice Messages -Voice messages on Slack are automatically transcribed (requires `VOICE_TOOLS_OPENAI_KEY`). TTS audio is sent as file attachments. +Hermes supports voice on Slack: + +- **Incoming:** Voice/audio messages are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`) +- **Outgoing:** TTS responses are sent as audio file attachments + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| Bot doesn't respond to DMs | Verify `message.im` is in your event subscriptions and the app is reinstalled | +| Bot doesn't respond to @mentions | Verify `app_mention` is in your event subscriptions | +| "not_authed" or "invalid_auth" errors | Regenerate your Bot Token and App Token, update `.env` | +| Bot responds but can't post in a channel | Invite the bot to the channel with `/invite @Hermes Agent` | +| "missing_scope" error | Add the required scope in OAuth & Permissions, then **reinstall** the app | +| Socket disconnects frequently | Check your network; Bolt auto-reconnects but unstable connections cause lag | + +--- ## Security :::warning -Always set `SLACK_ALLOWED_USERS` to restrict who can use the bot. Without it, the gateway denies all users by default. +**Always set `SLACK_ALLOWED_USERS`** with the Member IDs of authorized users. Without this setting, +the gateway will **deny all messages** by default as a safety measure. Never share your bot tokens — +treat them like passwords. ::: + +- Tokens should be stored in `~/.hermes/.env` (file permissions `600`) +- Rotate tokens periodically via the Slack app settings +- Audit who has access to your Hermes config directory +- Socket Mode means no public endpoint is exposed — one less attack surface diff --git a/website/docs/user-guide/messaging/telegram.md b/website/docs/user-guide/messaging/telegram.md index b802ba86b..123b81397 100644 --- a/website/docs/user-guide/messaging/telegram.md +++ b/website/docs/user-guide/messaging/telegram.md @@ -1,51 +1,144 @@ --- -sidebar_position: 2 +sidebar_position: 1 title: "Telegram" description: "Set up Hermes Agent as a Telegram bot" --- # Telegram Setup -Connect Hermes Agent to Telegram so you can chat from your phone, send voice memos, and receive scheduled task results. +Hermes Agent integrates with Telegram as a full-featured conversational bot. Once connected, you can chat with your agent from any device, send voice memos that get auto-transcribed, receive scheduled task results, and use the agent in group chats. The integration is built on [python-telegram-bot](https://python-telegram-bot.org/) and supports text, voice, images, and file attachments. -## Setup Steps +## Step 1: Create a Bot via BotFather -1. **Create a bot:** Message [@BotFather](https://t.me/BotFather) on Telegram, use `/newbot` -2. **Get your user ID:** Message [@userinfobot](https://t.me/userinfobot) — it replies with your numeric ID -3. **Configure:** Run `hermes gateway setup` and select Telegram, or add to `~/.hermes/.env` manually: +Every Telegram bot requires an API token issued by [@BotFather](https://t.me/BotFather), Telegram's official bot management tool. -```bash -TELEGRAM_BOT_TOKEN=123456:ABC-DEF... -TELEGRAM_ALLOWED_USERS=YOUR_USER_ID # Comma-separated for multiple users +1. Open Telegram and search for **@BotFather**, or visit [t.me/BotFather](https://t.me/BotFather) +2. Send `/newbot` +3. Choose a **display name** (e.g., "Hermes Agent") — this can be anything +4. Choose a **username** — this must be unique and end in `bot` (e.g., `my_hermes_bot`) +5. BotFather replies with your **API token**. It looks like this: + +``` +123456789:ABCdefGHIjklMNOpqrSTUvwxYZ ``` -4. **Start the gateway:** +:::warning +Keep your bot token secret. Anyone with this token can control your bot. If it leaks, revoke it immediately via `/revoke` in BotFather. +::: + +## Step 2: Customize Your Bot (Optional) + +These BotFather commands improve the user experience. Message @BotFather and use: + +| Command | Purpose | +|---------|---------| +| `/setdescription` | The "What can this bot do?" text shown before a user starts chatting | +| `/setabouttext` | Short text on the bot's profile page | +| `/setuserpic` | Upload an avatar for your bot | +| `/setcommands` | Define the command menu (the `/` button in chat) | +| `/setprivacy` | Control whether the bot sees all group messages (see Step 3) | + +:::tip +For `/setcommands`, a useful starting set: + +``` +help - Show help information +new - Start a new conversation +sethome - Set this chat as the home channel +``` +::: + +## Step 3: Privacy Mode (Critical for Groups) + +Telegram bots have a **privacy mode** that is **enabled by default**. This is the single most common source of confusion when using bots in groups. + +**With privacy mode ON**, your bot can only see: +- Messages that start with a `/` command +- Replies directly to the bot's own messages +- Service messages (member joins/leaves, pinned messages, etc.) +- Messages in channels where the bot is an admin + +**With privacy mode OFF**, the bot receives every message in the group. + +### How to disable privacy mode + +1. Message **@BotFather** +2. Send `/mybots` +3. Select your bot +4. Go to **Bot Settings → Group Privacy → Turn off** + +:::warning +**You must remove and re-add the bot to any group** after changing the privacy setting. Telegram caches the privacy state when a bot joins a group, and it will not update until the bot is removed and re-added. +::: + +:::tip +An alternative to disabling privacy mode: promote the bot to **group admin**. Admin bots always receive all messages regardless of the privacy setting, and this avoids needing to toggle the global privacy mode. +::: + +## Step 4: Find Your User ID + +Hermes Agent uses numeric Telegram user IDs to control access. Your user ID is **not** your username — it's a number like `123456789`. + +**Method 1 (recommended):** Message [@userinfobot](https://t.me/userinfobot) — it instantly replies with your user ID. + +**Method 2:** Message [@get_id_bot](https://t.me/get_id_bot) — another reliable option. + +Save this number; you'll need it for the next step. + +## Step 5: Configure Hermes + +### Option A: Interactive Setup (Recommended) + +```bash +hermes gateway setup +``` + +Select **Telegram** when prompted. The wizard asks for your bot token and allowed user IDs, then writes the configuration for you. + +### Option B: Manual Configuration + +Add the following to `~/.hermes/.env`: + +```bash +TELEGRAM_BOT_TOKEN=123456789:ABCdefGHIjklMNOpqrSTUvwxYZ +TELEGRAM_ALLOWED_USERS=123456789 # Comma-separated for multiple users +``` + +### Start the Gateway ```bash hermes gateway ``` -## Optional: Home Channel +The bot should come online within seconds. Send it a message on Telegram to verify. -Set a home channel for cron job delivery: +## Home Channel + +Use the `/sethome` command in any Telegram chat (DM or group) to designate it as the **home channel**. Scheduled tasks (cron jobs) deliver their results to this channel. + +You can also set it manually in `~/.hermes/.env`: ```bash TELEGRAM_HOME_CHANNEL=-1001234567890 TELEGRAM_HOME_CHANNEL_NAME="My Notes" ``` -Or use the `/sethome` command in any Telegram chat to set it dynamically. +:::tip +Group chat IDs are negative numbers (e.g., `-1001234567890`). Your personal DM chat ID is the same as your user ID. +::: ## Voice Messages -Voice messages sent on Telegram are automatically transcribed using OpenAI's Whisper API and injected as text into the conversation. Requires `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. +### Incoming Voice (Speech-to-Text) -### Voice Bubbles (TTS) +Voice messages you send on Telegram are automatically transcribed using OpenAI's Whisper API and injected as text into the conversation. This requires `VOICE_TOOLS_OPENAI_KEY` in `~/.hermes/.env`. -When the agent generates audio via text-to-speech, it's delivered as native Telegram voice bubbles (the round, inline-playable kind). +### Outgoing Voice (Text-to-Speech) + +When the agent generates audio via TTS, it's delivered as native Telegram **voice bubbles** — the round, inline-playable kind. - **OpenAI and ElevenLabs** produce Opus natively — no extra setup needed -- **Edge TTS** (the default free provider) outputs MP3 and needs **ffmpeg** to convert to Opus: +- **Edge TTS** (the default free provider) outputs MP3 and requires **ffmpeg** to convert to Opus: ```bash # Ubuntu/Debian @@ -55,7 +148,34 @@ sudo apt install ffmpeg brew install ffmpeg ``` -Without ffmpeg, Edge TTS audio is sent as a regular audio file (still playable, but rectangular player instead of voice bubble). +Without ffmpeg, Edge TTS audio is sent as a regular audio file (still playable, but uses the rectangular player instead of a voice bubble). + +Configure the TTS provider in your `config.yaml` under the `tts.provider` key. + +## Group Chat Usage + +Hermes Agent works in Telegram group chats with a few considerations: + +- **Privacy mode** determines what messages the bot can see (see [Step 3](#step-3-privacy-mode-critical-for-groups)) +- When privacy mode is on, **@mention the bot** (e.g., `@my_hermes_bot what's the weather?`) or **reply to its messages** to interact +- When privacy mode is off (or bot is admin), the bot sees all messages and can participate naturally +- `TELEGRAM_ALLOWED_USERS` still applies — only authorized users can trigger the bot, even in groups + +## Recent Bot API Features (2024–2025) + +- **Privacy policy:** Telegram now requires bots to have a privacy policy. Set one via BotFather with `/setprivacy_policy`, or Telegram may auto-generate a placeholder. This is particularly important if your bot is public-facing. +- **Message streaming:** Bot API 9.x added support for streaming long responses, which can improve perceived latency for lengthy agent replies. + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| Bot not responding at all | Verify `TELEGRAM_BOT_TOKEN` is correct. Check `hermes gateway` logs for errors. | +| Bot responds with "unauthorized" | Your user ID is not in `TELEGRAM_ALLOWED_USERS`. Double-check with @userinfobot. | +| Bot ignores group messages | Privacy mode is likely on. Disable it (Step 3) or make the bot a group admin. **Remember to remove and re-add the bot after changing privacy.** | +| Voice messages not transcribed | Check that `VOICE_TOOLS_OPENAI_KEY` is set and valid in `~/.hermes/.env`. | +| Voice replies are files, not bubbles | Install `ffmpeg` (needed for Edge TTS Opus conversion). | +| Bot token revoked/invalid | Generate a new token via `/revoke` then `/newbot` or `/token` in BotFather. Update your `.env` file. | ## Exec Approval @@ -68,7 +188,9 @@ Reply "yes"/"y" to approve or "no"/"n" to deny. ## Security :::warning -Always set `TELEGRAM_ALLOWED_USERS` to restrict who can use the bot. Without it, the gateway denies all users by default. +Always set `TELEGRAM_ALLOWED_USERS` to restrict who can interact with your bot. Without it, the gateway denies all users by default as a safety measure. ::: -You can also use [DM pairing](/user-guide/messaging#dm-pairing-alternative-to-allowlists) for a more dynamic approach. +Never share your bot token publicly. If compromised, revoke it immediately via BotFather's `/revoke` command. + +For more details, see the [Security documentation](/user-guide/security). You can also use [DM pairing](/user-guide/messaging#dm-pairing-alternative-to-allowlists) for a more dynamic approach to user authorization. diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md index 4879db1cc..22285eb63 100644 --- a/website/docs/user-guide/messaging/whatsapp.md +++ b/website/docs/user-guide/messaging/whatsapp.md @@ -6,16 +6,57 @@ description: "Set up Hermes Agent as a WhatsApp bot via the built-in Baileys bri # WhatsApp Setup -WhatsApp doesn't have a simple bot API like Telegram or Discord. Hermes includes a built-in bridge using [Baileys](https://github.com/WhiskeySockets/Baileys) that connects via WhatsApp Web. +Hermes connects to WhatsApp through a built-in bridge using [whatsapp-web.js](https://github.com/pedroslopez/whatsapp-web.js) +(Baileys-based). This works by emulating a WhatsApp Web session — **not** through the official +WhatsApp Business API. No Meta developer account or Business verification is required. + +:::warning Unofficial API — Ban Risk +WhatsApp does **not** officially support third-party bots outside the Business API. Using +whatsapp-web.js carries a small risk of account restrictions. To minimize risk: +- **Use a dedicated phone number** for the bot (not your personal number) +- **Don't send bulk/spam messages** — keep usage conversational +- **Don't automate outbound messaging** to people who haven't messaged first +::: + +:::warning WhatsApp Web Protocol Updates +WhatsApp periodically updates their Web protocol, which can temporarily break compatibility +with whatsapp-web.js. When this happens, Hermes will update the bridge dependency. If the +bot stops working after a WhatsApp update, pull the latest Hermes version and re-pair. +::: ## Two Modes | Mode | How it works | Best for | |------|-------------|----------| -| **Separate bot number** (recommended) | Dedicate a phone number to the bot. People message that number directly. | Clean UX, multiple users | -| **Personal self-chat** | Use your own WhatsApp. You message yourself to talk to the agent. | Quick setup, single user | +| **Separate bot number** (recommended) | Dedicate a phone number to the bot. People message that number directly. | Clean UX, multiple users, lower ban risk | +| **Personal self-chat** | Use your own WhatsApp. You message yourself to talk to the agent. | Quick setup, single user, testing | -## Setup +--- + +## Prerequisites + +- **Node.js v18+** and **npm** — the WhatsApp bridge runs as a Node.js process +- **A phone with WhatsApp** installed (for scanning the QR code) + +**On Linux headless servers**, you also need Chromium/Puppeteer dependencies: + +```bash +# Debian / Ubuntu +sudo apt-get install -y \ + libnss3 libatk1.0-0 libatk-bridge2.0-0 libcups2 libdrm2 \ + libxkbcommon0 libxcomposite1 libxdamage1 libxrandr2 libgbm1 \ + libpango-1.0-0 libcairo2 libasound2 libxshmfence1 + +# Fedora / RHEL +sudo dnf install -y \ + nss atk at-spi2-atk cups-libs libdrm libxkbcommon \ + libXcomposite libXdamage libXrandr mesa-libgbm \ + pango cairo alsa-lib +``` + +--- + +## Step 1: Run the Setup Wizard ```bash hermes whatsapp @@ -23,55 +64,130 @@ hermes whatsapp The wizard will: -1. Ask which mode you want -2. For **bot mode**: guide you through getting a second number -3. Configure the allowlist -4. Install bridge dependencies (Node.js required) -5. Display a QR code — scan from WhatsApp → Settings → Linked Devices → Link a Device -6. Exit once paired +1. Ask which mode you want (**bot** or **self-chat**) +2. Install bridge dependencies if needed +3. Display a **QR code** in your terminal +4. Wait for you to scan it -## Getting a Second Number (Bot Mode) +**To scan the QR code:** + +1. Open WhatsApp on your phone +2. Go to **Settings → Linked Devices** +3. Tap **Link a Device** +4. Point your camera at the terminal QR code + +Once paired, the wizard confirms the connection and exits. Your session is saved automatically. + +:::tip +If the QR code looks garbled, make sure your terminal is at least 60 columns wide and supports +Unicode. You can also try a different terminal emulator. +::: + +--- + +## Step 2: Getting a Second Phone Number (Bot Mode) + +For bot mode, you need a phone number that isn't already registered with WhatsApp. Three options: | Option | Cost | Notes | |--------|------|-------| -| WhatsApp Business app + dual-SIM | Free (if you have dual-SIM) | Install alongside personal WhatsApp, no second phone needed | -| Google Voice | Free (US only) | voice.google.com, verify WhatsApp via the Google Voice app | -| Prepaid SIM | $3-10/month | Any carrier; verify once, phone can go in a drawer on WiFi | +| **Google Voice** | Free | US only. Get a number at [voice.google.com](https://voice.google.com). Verify WhatsApp via SMS through the Google Voice app. | +| **Prepaid SIM** | $5–15 one-time | Any carrier. Activate, verify WhatsApp, then the SIM can sit in a drawer. Number must stay active (make a call every 90 days). | +| **VoIP services** | Free–$5/month | TextNow, TextFree, or similar. Some VoIP numbers are blocked by WhatsApp — try a few if the first doesn't work. | -## Starting the Gateway +After getting the number: + +1. Install WhatsApp on a phone (or use WhatsApp Business app with dual-SIM) +2. Register the new number with WhatsApp +3. Run `hermes whatsapp` and scan the QR code from that WhatsApp account + +--- + +## Step 3: Configure Hermes + +Add the following to your `~/.hermes/.env` file: ```bash -hermes gateway # Foreground -hermes gateway install # Or install as a system service +# Required +WHATSAPP_ENABLED=true +WHATSAPP_MODE=bot # "bot" or "self-chat" +WHATSAPP_ALLOWED_USERS=15551234567 # Comma-separated phone numbers (with country code, no +) + +# Optional +WHATSAPP_HOME_CONTACT=15551234567 # Default contact for proactive/scheduled messages +``` + +Then start the gateway: + +```bash +hermes gateway # Foreground +hermes gateway install # Install as a system service ``` The gateway starts the WhatsApp bridge automatically using the saved session. -## Environment Variables +--- + +## Session Persistence + +The whatsapp-web.js `LocalAuth` strategy saves your session to the `.wwebjs_auth` folder inside +your Hermes data directory (`~/.hermes/`). This means: + +- **Sessions survive restarts** — you don't need to re-scan the QR code every time +- The session data includes encryption keys and device credentials +- **Do not share or commit the `.wwebjs_auth` folder** — it grants full access to the WhatsApp account + +--- + +## Re-pairing + +If the session breaks (phone reset, WhatsApp update, manually unlinked), you'll see connection +errors in the gateway logs. To fix it: ```bash -WHATSAPP_ENABLED=true -WHATSAPP_MODE=bot # "bot" or "self-chat" -WHATSAPP_ALLOWED_USERS=15551234567 # Comma-separated phone numbers with country code +hermes whatsapp ``` -## Important Notes +This generates a fresh QR code. Scan it again and the session is re-established. The gateway +handles **temporary** disconnections (network blips, phone going offline briefly) automatically +with reconnection logic. -- Agent responses are prefixed with "⚕ **Hermes Agent**" for easy identification -- WhatsApp Web sessions can disconnect if WhatsApp updates their protocol -- The gateway reconnects automatically -- If you see persistent failures, re-pair with `hermes whatsapp` - -:::info Re-pairing -If WhatsApp Web sessions disconnect (protocol updates, phone reset), re-pair with `hermes whatsapp`. The gateway handles temporary disconnections automatically. -::: +--- ## Voice Messages -Voice messages sent on WhatsApp are automatically transcribed (requires `VOICE_TOOLS_OPENAI_KEY`). TTS audio is sent as MP3 file attachments. +Hermes supports voice on WhatsApp: + +- **Incoming:** Voice messages (`.ogg` opus) are automatically transcribed using Whisper (requires `VOICE_TOOLS_OPENAI_KEY`) +- **Outgoing:** TTS responses are sent as MP3 audio file attachments +- Agent responses are prefixed with "⚕ **Hermes Agent**" for easy identification + +--- + +## Troubleshooting + +| Problem | Solution | +|---------|----------| +| **QR code not scanning** | Ensure terminal is wide enough (60+ columns). Try a different terminal. Make sure you're scanning from the correct WhatsApp account (bot number, not personal). | +| **QR code expires** | QR codes refresh every ~20 seconds. If it times out, restart `hermes whatsapp`. | +| **Session not persisting** | Check that `~/.hermes/.wwebjs_auth/` exists and is writable. On Docker, mount this as a volume. | +| **Logged out unexpectedly** | WhatsApp unlinks devices after ~14 days of phone inactivity. Keep the phone on and connected to WiFi. Re-pair with `hermes whatsapp`. | +| **"Execution context was destroyed"** | Chromium crashed. Install the Puppeteer dependencies listed in Prerequisites. On low-RAM servers, add swap space. | +| **Bot stops working after WhatsApp update** | Update Hermes to get the latest bridge version, then re-pair. | +| **Messages not being received** | Verify `WHATSAPP_ALLOWED_USERS` includes the sender's number (with country code, no `+` or spaces). | + +--- ## Security :::warning -Always set `WHATSAPP_ALLOWED_USERS` with phone numbers (including country code) to restrict who can use the bot. +**Always set `WHATSAPP_ALLOWED_USERS`** with phone numbers (including country code, without the `+`) +of authorized users. Without this setting, the gateway will **deny all incoming messages** as a +safety measure. ::: + +- The `.wwebjs_auth` folder contains full session credentials — protect it like a password +- Set file permissions: `chmod 700 ~/.hermes/.wwebjs_auth` +- Use a **dedicated phone number** for the bot to isolate risk from your personal account +- If you suspect compromise, unlink the device from WhatsApp → Settings → Linked Devices +- Phone numbers in logs are partially redacted, but review your log retention policy