From 9c0f3462581ffa5fa875d03dd54cdadcd281ae3a Mon Sep 17 00:00:00 2001 From: Test Date: Wed, 18 Mar 2026 03:01:41 -0700 Subject: [PATCH 1/3] fix: direct user message on STT failure + hermes-agent-setup skill When a user sends a voice message and STT isn't configured, the gateway now sends a clear message directly to the user explaining how to set up voice transcription, rather than relying on the agent to relay an injected context note (which often gets misinterpreted). Also adds a hermes-agent-setup bundled skill covering STT/TTS setup, tool configuration, dependency installation, and troubleshooting. --- gateway/run.py | 27 ++++ skills/dogfood/hermes-agent-setup/SKILL.md | 154 +++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 skills/dogfood/hermes-agent-setup/SKILL.md diff --git a/gateway/run.py b/gateway/run.py index ea9f2a283..668977ef6 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1870,6 +1870,33 @@ class GatewayRunner: message_text = await self._enrich_message_with_transcription( message_text, audio_paths ) + # If STT failed, send a direct message to the user so they + # know voice isn't configured — don't rely on the agent to + # relay the error clearly. + _stt_fail_markers = ( + "No STT provider", + "STT is disabled", + "can't listen", + "VOICE_TOOLS_OPENAI_KEY", + ) + if any(m in message_text for m in _stt_fail_markers): + _stt_adapter = self.adapters.get(source.platform) + _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None + if _stt_adapter: + try: + await _stt_adapter.send( + source.chat_id, + "🎤 I received your voice message but can't transcribe it — " + "no speech-to-text provider is configured.\n\n" + "To enable voice: install faster-whisper " + "(`pip install faster-whisper` in the Hermes venv) " + "and set `stt.enabled: true` in config.yaml, " + "then /restart the gateway.\n\n" + "For full setup instructions, type: `/skill hermes-agent-setup`", + metadata=_stt_meta, + ) + except Exception: + pass # ----------------------------------------------------------------- # Enrich document messages with context notes for the agent diff --git a/skills/dogfood/hermes-agent-setup/SKILL.md b/skills/dogfood/hermes-agent-setup/SKILL.md new file mode 100644 index 000000000..275c06866 --- /dev/null +++ b/skills/dogfood/hermes-agent-setup/SKILL.md @@ -0,0 +1,154 @@ +--- +name: hermes-agent-setup +description: Help users configure Hermes Agent — enable tools, set up voice/STT/TTS, install dependencies, and troubleshoot. Use when someone asks to enable features, configure voice, or when the system detects missing config. +version: 1.0.0 +author: Hermes Agent +tags: [setup, configuration, tools, stt, tts, voice, hermes] +--- + +# Hermes Agent Setup & Configuration + +Use this skill when a user asks to enable features, configure voice messages, set up tools, or troubleshoot configuration. + +## Key Paths + +- Config: `~/.hermes/config.yaml` +- API keys: `~/.hermes/.env` +- Skills: `~/.hermes/skills/` +- Hermes install: `~/.hermes/hermes-agent/` + +## Voice Messages (STT) + +Voice messages from Telegram/Discord/WhatsApp/Slack/Signal are auto-transcribed when an STT provider is available. + +### Provider priority (auto-detected): +1. **Local faster-whisper** — free, no API key, runs on CPU/GPU +2. **Groq Whisper** — free tier, needs GROQ_API_KEY +3. **OpenAI Whisper** — paid, needs VOICE_TOOLS_OPENAI_KEY + +### Setup local STT (recommended): + +```bash +cd ~/.hermes/hermes-agent +source .venv/bin/activate # or: source venv/bin/activate +pip install faster-whisper +``` + +Add to config.yaml under the `stt:` section: +```yaml +stt: + enabled: true + provider: local + local: + model: base # Options: tiny, base, small, medium, large-v3 +``` + +Model downloads automatically on first use (~150 MB for base). + +### Setup Groq STT (free cloud): + +1. Get free key from https://console.groq.com +2. Add GROQ_API_KEY to the env file +3. Set provider to groq in config.yaml stt section + +### Verify STT: + +After config changes, restart the gateway (send /restart in chat, or restart `hermes gateway run`). Then send a voice message. + +## Voice Replies (TTS) + +Hermes can reply with voice when users send voice messages. + +### TTS providers (set API key in env file): + +| Provider | Env var | Free? | +|----------|---------|-------| +| ElevenLabs | ELEVENLABS_API_KEY | Free tier | +| OpenAI | VOICE_TOOLS_OPENAI_KEY | Paid | +| Kokoro (local) | None needed | Free | +| Fish Audio | FISH_AUDIO_API_KEY | Free tier | + +### Voice commands (in any chat): +- `/voice on` — voice reply to voice messages only +- `/voice tts` — voice reply to all messages +- `/voice off` — text only (default) + +## Enabling/Disabling Tools + +### Interactive tool config (requires terminal): + +```bash +cd ~/.hermes/hermes-agent +source .venv/bin/activate +python -m hermes_cli.main tools +``` + +This opens a curses UI to enable/disable toolsets per platform. + +### After changing tools: + +Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching). + +### Common toolsets: + +| Toolset | What it provides | +|---------|-----------------| +| terminal | Shell command execution | +| file | File read/write/search/patch | +| web | Web search and extraction | +| browser | Browser automation | +| image_gen | AI image generation | +| mcp | MCP server connections | +| voice | Text-to-speech | +| cronjob | Scheduled tasks | + +## Installing Dependencies + +Some tools need extra packages: + +```bash +cd ~/.hermes/hermes-agent && source .venv/bin/activate + +pip install faster-whisper # Local STT +pip install browserbase # Browser automation +pip install mcp # MCP servers +``` + +## Setup Wizard + +For first-time setup or full reconfiguration: + +```bash +cd ~/.hermes/hermes-agent +source .venv/bin/activate +python -m hermes_cli.main setup +``` + +## Gateway Commands + +| Command | What it does | +|---------|-------------| +| /reset or /new | Fresh session (picks up new tool config) | +| /help | Show all commands | +| /model [name] | Show or change model | +| /compact | Compress conversation to save context | +| /voice [mode] | Configure voice replies | +| /reasoning [effort] | Set reasoning level | +| /sethome | Set home channel for cron/notifications | +| /restart | Restart the gateway (picks up config changes) | + +## Troubleshooting + +### Voice messages not working +1. Check stt.enabled is true in config.yaml +2. Check a provider is available (faster-whisper installed, or API key set) +3. Restart gateway after config changes + +### Tool not available +1. Check if the toolset is enabled for your platform (run `hermes tools`) +2. Some tools need env vars — check the env file +3. Use /reset after enabling tools + +### Changes not taking effect +- Gateway: /reset for tool changes, /restart for config changes +- CLI: start a new session From 764825bbffde01624469ecd3a62d39d789bcb330 Mon Sep 17 00:00:00 2001 From: Test Date: Wed, 18 Mar 2026 03:05:17 -0700 Subject: [PATCH 2/3] feat: expand hermes-agent-setup skill + tell agent about it in STT notes Skill now covers full CLI usage (hermes setup, hermes skills, hermes tools, hermes config, session management, etc.), config file reference, and expanded gateway commands. Agent context notes for STT failure now mention the hermes-agent-setup skill is available to help users configure Hermes features. --- gateway/run.py | 13 +- skills/dogfood/hermes-agent-setup/SKILL.md | 188 ++++++++++++++++++--- 2 files changed, 177 insertions(+), 24 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 668977ef6..022753727 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3966,7 +3966,11 @@ class GatewayRunner: The enriched message string with transcriptions prepended. """ if not getattr(self.config, "stt_enabled", True): - disabled_note = "[The user sent voice message(s), but transcription is disabled in config.]" + disabled_note = ( + "[The user sent voice message(s), but transcription is disabled in config. " + "You have a skill called hermes-agent-setup that can help users configure " + "Hermes features including voice, tools, and more.]" + ) if user_text: return f"{disabled_note}\n\n{user_text}" return disabled_note @@ -3995,8 +3999,11 @@ class GatewayRunner: ): enriched_parts.append( "[The user sent a voice message but I can't listen " - "to it right now~ No STT provider is configured " - "(';w;') Let them know!]" + "to it right now — no STT provider is configured. " + "A direct message has already been sent to the user " + "with setup instructions. You have a skill called " + "hermes-agent-setup that can help users configure " + "Hermes features including voice, tools, and more.]" ) else: enriched_parts.append( diff --git a/skills/dogfood/hermes-agent-setup/SKILL.md b/skills/dogfood/hermes-agent-setup/SKILL.md index 275c06866..7b7b1c215 100644 --- a/skills/dogfood/hermes-agent-setup/SKILL.md +++ b/skills/dogfood/hermes-agent-setup/SKILL.md @@ -1,14 +1,14 @@ --- name: hermes-agent-setup -description: Help users configure Hermes Agent — enable tools, set up voice/STT/TTS, install dependencies, and troubleshoot. Use when someone asks to enable features, configure voice, or when the system detects missing config. -version: 1.0.0 +description: Help users configure Hermes Agent — CLI usage, setup wizard, model/provider selection, tools, skills, voice/STT/TTS, gateway, and troubleshooting. Use when someone asks to enable features, configure settings, or needs help with Hermes itself. +version: 1.1.0 author: Hermes Agent -tags: [setup, configuration, tools, stt, tts, voice, hermes] +tags: [setup, configuration, tools, stt, tts, voice, hermes, cli, skills] --- # Hermes Agent Setup & Configuration -Use this skill when a user asks to enable features, configure voice messages, set up tools, or troubleshoot configuration. +Use this skill when a user asks about configuring Hermes, enabling features, setting up voice, managing tools/skills, or troubleshooting. ## Key Paths @@ -16,6 +16,116 @@ Use this skill when a user asks to enable features, configure voice messages, se - API keys: `~/.hermes/.env` - Skills: `~/.hermes/skills/` - Hermes install: `~/.hermes/hermes-agent/` +- Venv: `~/.hermes/hermes-agent/.venv/` (or `venv/`) + +## CLI Overview + +Hermes is used via the `hermes` command (or `python -m hermes_cli.main` from the repo). + +### Core commands: + +``` +hermes Interactive chat (default) +hermes chat -q "question" Single query, then exit +hermes chat -m MODEL Chat with a specific model +hermes -c Resume most recent session +hermes -c "project name" Resume session by name +hermes --resume SESSION_ID Resume by exact ID +hermes -w Isolated git worktree mode +hermes -s skill1,skill2 Preload skills for the session +hermes --yolo Skip dangerous command approval +``` + +### Configuration & setup: + +``` +hermes setup Interactive setup wizard (provider, API keys, model) +hermes model Interactive model/provider selection +hermes config View current configuration +hermes config edit Open config.yaml in $EDITOR +hermes config set KEY VALUE Set a config value directly +hermes login Authenticate with a provider +hermes logout Clear stored auth +hermes doctor Check configuration and dependencies +``` + +### Tools & skills: + +``` +hermes tools Interactive tool enable/disable per platform +hermes skills list List installed skills +hermes skills search QUERY Search the skills hub +hermes skills install NAME Install a skill from the hub +hermes skills config Enable/disable skills per platform +``` + +### Gateway (messaging platforms): + +``` +hermes gateway run Start the messaging gateway +hermes gateway install Install gateway as background service +hermes gateway status Check gateway status +``` + +### Session management: + +``` +hermes sessions list List past sessions +hermes sessions browse Interactive session picker +hermes sessions rename ID TITLE Rename a session +hermes sessions export ID Export session as markdown +hermes sessions prune Clean up old sessions +``` + +### Other: + +``` +hermes status Show status of all components +hermes cron list List cron jobs +hermes insights Usage analytics +hermes update Update to latest version +hermes pairing Manage DM authorization codes +``` + +## Setup Wizard (`hermes setup`) + +The interactive setup wizard walks through: +1. **Provider selection** — OpenRouter, Anthropic, OpenAI, Google, DeepSeek, and many more +2. **API key entry** — stores securely in the env file +3. **Model selection** — picks from available models for the chosen provider +4. **Basic settings** — reasoning effort, tool preferences + +Run it from terminal: +```bash +cd ~/.hermes/hermes-agent +source .venv/bin/activate +python -m hermes_cli.main setup +``` + +To change just the model/provider later: `hermes model` + +## Skills Configuration (`hermes skills`) + +Skills are reusable instruction sets that extend what Hermes can do. + +### Managing skills: + +```bash +hermes skills list # Show installed skills +hermes skills search "docker" # Search the hub +hermes skills install NAME # Install from hub +hermes skills config # Enable/disable per platform +``` + +### Per-platform skill control: + +`hermes skills config` opens an interactive UI where you can enable or disable specific skills for each platform (cli, telegram, discord, etc.). Disabled skills won't appear in the agent's available skills list for that platform. + +### Loading skills in a session: + +- CLI: `hermes -s skill-name` or `hermes -s skill1,skill2` +- Chat: `/skill skill-name` +- Gateway: type `/skill skill-name` in any chat ## Voice Messages (STT) @@ -73,9 +183,9 @@ Hermes can reply with voice when users send voice messages. - `/voice tts` — voice reply to all messages - `/voice off` — text only (default) -## Enabling/Disabling Tools +## Enabling/Disabling Tools (`hermes tools`) -### Interactive tool config (requires terminal): +### Interactive tool config: ```bash cd ~/.hermes/hermes-agent @@ -83,11 +193,11 @@ source .venv/bin/activate python -m hermes_cli.main tools ``` -This opens a curses UI to enable/disable toolsets per platform. +This opens a curses UI to enable/disable toolsets per platform (cli, telegram, discord, slack, etc.). ### After changing tools: -Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching). +Use `/reset` in the chat to start a fresh session with the new toolset. Tool changes do NOT take effect mid-conversation (this preserves prompt caching and avoids cost spikes). ### Common toolsets: @@ -96,10 +206,10 @@ Use `/reset` in the chat to start a fresh session with the new toolset. Tool cha | terminal | Shell command execution | | file | File read/write/search/patch | | web | Web search and extraction | -| browser | Browser automation | +| browser | Browser automation (needs Browserbase) | | image_gen | AI image generation | | mcp | MCP server connections | -| voice | Text-to-speech | +| voice | Text-to-speech output | | cronjob | Scheduled tasks | ## Installing Dependencies @@ -109,22 +219,43 @@ Some tools need extra packages: ```bash cd ~/.hermes/hermes-agent && source .venv/bin/activate -pip install faster-whisper # Local STT +pip install faster-whisper # Local STT (voice transcription) pip install browserbase # Browser automation -pip install mcp # MCP servers +pip install mcp # MCP server connections ``` -## Setup Wizard +## Config File Reference -For first-time setup or full reconfiguration: +The main config file is `~/.hermes/config.yaml`. Key sections: -```bash -cd ~/.hermes/hermes-agent -source .venv/bin/activate -python -m hermes_cli.main setup +```yaml +# Model and provider +model: + default: anthropic/claude-opus-4.6 + provider: openrouter + +# Agent behavior +agent: + max_turns: 90 + reasoning_effort: high # xhigh, high, medium, low, minimal, none + +# Voice +stt: + enabled: true + provider: local # local, groq, openai +tts: + provider: elevenlabs # elevenlabs, openai, kokoro, fish + +# Display +display: + skin: default # default, ares, mono, slate + tool_progress: full # full, compact, off + background_process_notifications: all # all, result, error, off ``` -## Gateway Commands +Edit with `hermes config edit` or `hermes config set KEY VALUE`. + +## Gateway Commands (Messaging Platforms) | Command | What it does | |---------|-------------| @@ -136,19 +267,34 @@ python -m hermes_cli.main setup | /reasoning [effort] | Set reasoning level | | /sethome | Set home channel for cron/notifications | | /restart | Restart the gateway (picks up config changes) | +| /status | Show session info | +| /retry | Retry last message | +| /undo | Remove last exchange | +| /personality [name] | Set agent personality | +| /skill [name] | Load a skill | ## Troubleshooting ### Voice messages not working 1. Check stt.enabled is true in config.yaml 2. Check a provider is available (faster-whisper installed, or API key set) -3. Restart gateway after config changes +3. Restart gateway after config changes (/restart) ### Tool not available -1. Check if the toolset is enabled for your platform (run `hermes tools`) +1. Run `hermes tools` to check if the toolset is enabled for your platform 2. Some tools need env vars — check the env file 3. Use /reset after enabling tools +### Model/provider issues +1. Run `hermes doctor` to check configuration +2. Run `hermes login` to re-authenticate +3. Check the env file has the right API key + ### Changes not taking effect - Gateway: /reset for tool changes, /restart for config changes - CLI: start a new session + +### Skills not showing up +1. Check `hermes skills list` shows the skill +2. Check `hermes skills config` has it enabled for your platform +3. Load explicitly with `/skill name` or `hermes -s name` From 190c07975d7bdb896bd9106ca25c6cf2668c0e1d Mon Sep 17 00:00:00 2001 From: Test Date: Wed, 18 Mar 2026 03:17:23 -0700 Subject: [PATCH 3/3] fix: check skill availability before hinting at hermes-agent-setup Only mention the hermes-agent-setup skill in STT failure notes (both the direct user message and the agent context note) when the skill is actually installed. Uses _find_skill() from skill_manager_tool. Also confirmed: STT is the only user-facing failure case where the setup skill hint helps. Vision failures are transient API issues, runtime transcription errors indicate a configured-but-broken provider, and platform startup warnings are server logs. --- gateway/run.py | 48 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 022753727..62d16e680 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -432,6 +432,16 @@ class GatewayRunner: for session_key in list(managers.keys()): self._shutdown_gateway_honcho(session_key) + # -- Setup skill availability ---------------------------------------- + + def _has_setup_skill(self) -> bool: + """Check if the hermes-agent-setup skill is installed.""" + try: + from tools.skill_manager_tool import _find_skill + return _find_skill("hermes-agent-setup") is not None + except Exception: + return False + # -- Voice mode persistence ------------------------------------------ _VOICE_MODE_PATH = _hermes_home / "gateway_voice_mode.json" @@ -1884,15 +1894,19 @@ class GatewayRunner: _stt_meta = {"thread_id": source.thread_id} if source.thread_id else None if _stt_adapter: try: - await _stt_adapter.send( - source.chat_id, + _stt_msg = ( "🎤 I received your voice message but can't transcribe it — " "no speech-to-text provider is configured.\n\n" "To enable voice: install faster-whisper " "(`pip install faster-whisper` in the Hermes venv) " "and set `stt.enabled: true` in config.yaml, " - "then /restart the gateway.\n\n" - "For full setup instructions, type: `/skill hermes-agent-setup`", + "then /restart the gateway." + ) + # Point to setup skill if it's installed + if self._has_setup_skill(): + _stt_msg += "\n\nFor full setup instructions, type: `/skill hermes-agent-setup`" + await _stt_adapter.send( + source.chat_id, _stt_msg, metadata=_stt_meta, ) except Exception: @@ -3966,11 +3980,13 @@ class GatewayRunner: The enriched message string with transcriptions prepended. """ if not getattr(self.config, "stt_enabled", True): - disabled_note = ( - "[The user sent voice message(s), but transcription is disabled in config. " - "You have a skill called hermes-agent-setup that can help users configure " - "Hermes features including voice, tools, and more.]" - ) + disabled_note = "[The user sent voice message(s), but transcription is disabled in config." + if self._has_setup_skill(): + disabled_note += ( + " You have a skill called hermes-agent-setup that can help " + "users configure Hermes features including voice, tools, and more." + ) + disabled_note += "]" if user_text: return f"{disabled_note}\n\n{user_text}" return disabled_note @@ -3997,14 +4013,20 @@ class GatewayRunner: "No STT provider" in error or error.startswith("Neither VOICE_TOOLS_OPENAI_KEY nor OPENAI_API_KEY is set") ): - enriched_parts.append( + _no_stt_note = ( "[The user sent a voice message but I can't listen " "to it right now — no STT provider is configured. " "A direct message has already been sent to the user " - "with setup instructions. You have a skill called " - "hermes-agent-setup that can help users configure " - "Hermes features including voice, tools, and more.]" + "with setup instructions." ) + if self._has_setup_skill(): + _no_stt_note += ( + " You have a skill called hermes-agent-setup " + "that can help users configure Hermes features " + "including voice, tools, and more." + ) + _no_stt_note += "]" + enriched_parts.append(_no_stt_note) else: enriched_parts.append( "[The user sent a voice message but I had trouble "