diff --git a/docs/messaging.md b/docs/messaging.md index d45509d08..10474a483 100644 --- a/docs/messaging.md +++ b/docs/messaging.md @@ -34,12 +34,12 @@ python cli.py --gateway # Runs in foreground, useful for debugging │ Hermes Gateway │ ├─────────────────────────────────────────────────────────────────┤ │ │ -│ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ │ -│ │ Telegram │ │ Discord │ │ WhatsApp │ │ -│ │ Adapter │ │ Adapter │ │ Adapter │ │ -│ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ │ -│ │ │ │ │ -│ └─────────────────┼─────────────────┘ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Telegram │ │ Discord │ │ WhatsApp │ │ Slack │ │ +│ │ Adapter │ │ Adapter │ │ Adapter │ │ Adapter │ │ +│ └────┬─────┘ └────┬─────┘ └────┬─────┘ └────┬─────┘ │ +│ │ │ │ │ │ +│ └─────────────┼────────────┼─────────────┘ │ │ │ │ │ ┌────────▼────────┐ │ │ │ Session Store │ │ @@ -134,29 +134,39 @@ pip install discord.py>=2.0 ### WhatsApp -WhatsApp integration is more complex due to the lack of a simple bot API. +WhatsApp uses a built-in bridge powered by [Baileys](https://github.com/WhiskeySockets/Baileys) that connects via WhatsApp Web. The agent links to your WhatsApp account and responds to incoming messages. -**Options:** -1. **WhatsApp Business API** (requires Meta verification) -2. **whatsapp-web.js** via Node.js bridge (for personal accounts) +**Setup:** -**Bridge Setup:** -1. Install Node.js -2. Set up the bridge script (see `scripts/whatsapp-bridge/` for reference) -3. Configure in gateway: - ```json - { - "platforms": { - "whatsapp": { - "enabled": true, - "extra": { - "bridge_script": "/path/to/bridge.js", - "bridge_port": 3000 - } - } - } - } - ``` +```bash +hermes whatsapp +``` + +This will: +- Enable WhatsApp in your `.env` +- Ask for your phone number (for the allowlist) +- Install bridge dependencies (Node.js required) +- Display a QR code — scan it with your phone (WhatsApp → Settings → Linked Devices → Link a Device) +- Exit automatically once paired + +Then start the gateway: + +```bash +hermes gateway +``` + +The gateway starts the WhatsApp bridge automatically using the saved session credentials in `~/.hermes/whatsapp/session/`. + +**Environment variables:** + +```bash +WHATSAPP_ENABLED=true +WHATSAPP_ALLOWED_USERS=15551234567 # Comma-separated phone numbers with country code +``` + +Agent responses are prefixed with "⚕ **Hermes Agent**" so you can distinguish them from your own messages when messaging yourself. + +> **Re-pairing:** If WhatsApp Web sessions disconnect (protocol updates, phone reset), re-pair with `hermes whatsapp`. ## Configuration @@ -187,8 +197,17 @@ DISCORD_ALLOWED_USERS=123456789012345678 # Security: restrict to these user DISCORD_HOME_CHANNEL=123456789012345678 DISCORD_HOME_CHANNEL_NAME="#bot-updates" -# WhatsApp - requires Node.js bridge setup +# Slack - get from Slack API (api.slack.com/apps) +SLACK_BOT_TOKEN=xoxb-your-slack-bot-token +SLACK_APP_TOKEN=xapp-your-slack-app-token # Required for Socket Mode +SLACK_ALLOWED_USERS=U01234ABCDE # Security: restrict to these user IDs + +# Optional: Default channel for cron job delivery +# SLACK_HOME_CHANNEL=C01234567890 + +# WhatsApp - pair via: hermes whatsapp WHATSAPP_ENABLED=true +WHATSAPP_ALLOWED_USERS=15551234567 # Phone numbers with country code # ============================================================================= # AGENT SETTINGS @@ -272,6 +291,7 @@ Each platform has its own toolset for security: | Telegram | `hermes-telegram` | Full tools including terminal | | Discord | `hermes-discord` | Full tools including terminal | | WhatsApp | `hermes-whatsapp` | Full tools including terminal | +| Slack | `hermes-slack` | Full tools including terminal | ## User Experience Features diff --git a/run_agent.py b/run_agent.py index 67121d20f..1cf3808e1 100644 --- a/run_agent.py +++ b/run_agent.py @@ -596,7 +596,7 @@ class AIAgent: if not self._session_db: return try: - start_idx = (len(conversation_history) if conversation_history else 0) + 1 + start_idx = len(conversation_history) if conversation_history else 0 for msg in messages[start_idx:]: role = msg.get("role", "unknown") content = msg.get("content") @@ -943,8 +943,6 @@ class AIAgent: if not content: return content content = convert_scratchpad_to_think(content) - # Strip extra newlines before/after think blocks - import re content = re.sub(r'\n+()', r'\n\1', content) content = re.sub(r'()\n+', r'\1\n', content) return content.strip() @@ -1305,7 +1303,8 @@ class AIAgent: "[System: The session is being compressed. " "Please save anything worth remembering to your memories.]" ) - flush_msg = {"role": "user", "content": flush_content} + _sentinel = f"__flush_{id(self)}_{time.monotonic()}" + flush_msg = {"role": "user", "content": flush_content, "_flush_sentinel": _sentinel} messages.append(flush_msg) try: @@ -1367,10 +1366,13 @@ class AIAgent: except Exception as e: logger.debug("Memory flush API call failed: %s", e) finally: - # Strip flush artifacts: remove everything from the flush message onward - while messages and messages[-1] is not flush_msg and len(messages) > 0: + # Strip flush artifacts: remove everything from the flush message onward. + # Use sentinel marker instead of identity check for robustness. + while messages and messages[-1].get("_flush_sentinel") != _sentinel: messages.pop() - if messages and messages[-1] is flush_msg: + if not messages: + break + if messages and messages[-1].get("_flush_sentinel") == _sentinel: messages.pop() def _compress_context(self, messages: list, system_message: str, *, approx_tokens: int = None) -> tuple: @@ -1565,12 +1567,19 @@ class AIAgent: try: function_result = handle_function_call(function_name, function_args, effective_task_id) _spinner_result = function_result + except Exception as tool_error: + function_result = f"Error executing tool '{function_name}': {tool_error}" + logger.error("handle_function_call raised for %s: %s", function_name, tool_error) finally: tool_duration = time.time() - tool_start_time cute_msg = _get_cute_tool_message_impl(function_name, function_args, tool_duration, result=_spinner_result) spinner.stop(cute_msg) else: - function_result = handle_function_call(function_name, function_args, effective_task_id) + try: + function_result = handle_function_call(function_name, function_args, effective_task_id) + except Exception as tool_error: + function_result = f"Error executing tool '{function_name}': {tool_error}" + logger.error("handle_function_call raised for %s: %s", function_name, tool_error) tool_duration = time.time() - tool_start_time result_preview = function_result[:200] if len(function_result) > 200 else function_result @@ -1877,7 +1886,7 @@ class AIAgent: retry_count = 0 max_retries = 6 # Increased to allow longer backoff periods - while retry_count <= max_retries: + while retry_count < max_retries: try: api_kwargs = self._build_api_kwargs(api_messages) @@ -1971,6 +1980,7 @@ class AIAgent: if self._interrupt_requested: print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.") self._persist_session(messages, conversation_history) + self.clear_interrupt() return { "final_response": "Operation interrupted.", "messages": messages, @@ -2073,6 +2083,7 @@ class AIAgent: if self._interrupt_requested: print(f"{self.log_prefix}⚡ Interrupt detected during error handling, aborting retries.") self._persist_session(messages, conversation_history) + self.clear_interrupt() return { "final_response": "Operation interrupted.", "messages": messages, @@ -2160,6 +2171,7 @@ class AIAgent: if self._interrupt_requested: print(f"{self.log_prefix}⚡ Interrupt detected during retry wait, aborting.") self._persist_session(messages, conversation_history) + self.clear_interrupt() return { "final_response": "Operation interrupted.", "messages": messages,