Files
hermes-agent/tests/gateway/test_runner_fatal_adapter.py

97 lines
3.0 KiB
Python
Raw Permalink Normal View History

from unittest.mock import AsyncMock
fix(gateway): harden Telegram polling conflict handling - detect Telegram getUpdates conflicts and stop polling cleanly instead of retry-spamming forever - add a machine-local token-scoped lock so different HERMES_HOME profiles on the same host can't poll the same bot token at once - persist gateway runtime health/fatal adapter state and surface it in ● hermes-gateway.service - Hermes Agent Gateway - Messaging Platform Integration Loaded: loaded (/home/teknium/.config/systemd/user/hermes-gateway.service; enabled; preset: enabled) Active: active (running) since Sat 2026-03-14 09:25:35 PDT; 2h 45min ago Invocation: 8879379b25994201b98381f4bd80c2af Main PID: 1147926 (python) Tasks: 16 (limit: 76757) Memory: 151.4M (peak: 168.1M) CPU: 47.883s CGroup: /user.slice/user-1000.slice/user@1000.service/app.slice/hermes-gateway.service ├─1147926 /home/teknium/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main gateway run --replace └─1147966 node /home/teknium/.hermes/hermes-agent/scripts/whatsapp-bridge/bridge.js --port 3000 --session /home/teknium/.hermes/whatsapp/session --mode self-chat Mar 14 09:27:03 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)... Mar 14 09:27:04 teknium-dev python[1147926]: [409B blob data] Mar 14 09:27:04 teknium-dev python[1147926]: Content: '' Mar 14 09:27:04 teknium-dev python[1147926]: ❌ Max retries (3) for empty content exceeded. Mar 14 09:27:07 teknium-dev python[1147926]: [1K blob data] Mar 14 09:27:07 teknium-dev python[1147926]: Content: '' Mar 14 09:27:07 teknium-dev python[1147926]: 🔄 Retrying API call (1/3)... Mar 14 09:27:12 teknium-dev python[1147926]: [1.7K blob data] Mar 14 09:27:12 teknium-dev python[1147926]: Content: '' Mar 14 09:27:12 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)... ⚠ Installed gateway service definition is outdated Run: hermes gateway restart # auto-refreshes the unit ✓ Gateway service is running ✓ Systemd linger is enabled (service survives logout) - cleanly exit non-retryable startup conflicts without triggering service restart loops Tests: - gateway status runtime-state helpers - Telegram token-lock and polling-conflict behavior - GatewayRunner clean exit on non-retryable startup conflict - CLI runtime health summary
2026-03-14 12:11:23 -07:00
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import BasePlatformAdapter
from gateway.run import GatewayRunner
class _FatalAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="token"), Platform.TELEGRAM)
async def connect(self) -> bool:
self._set_fatal_error(
"telegram_token_lock",
"Another local Hermes gateway is already using this Telegram bot token.",
retryable=False,
)
return False
async def disconnect(self) -> None:
self._mark_disconnected()
async def send(self, chat_id, content, reply_to=None, metadata=None):
raise NotImplementedError
async def get_chat_info(self, chat_id):
return {"id": chat_id}
class _RuntimeRetryableAdapter(BasePlatformAdapter):
def __init__(self):
super().__init__(PlatformConfig(enabled=True, token="token"), Platform.WHATSAPP)
async def connect(self) -> bool:
return True
async def disconnect(self) -> None:
self._mark_disconnected()
async def send(self, chat_id, content, reply_to=None, metadata=None):
raise NotImplementedError
async def get_chat_info(self, chat_id):
return {"id": chat_id}
fix(gateway): harden Telegram polling conflict handling - detect Telegram getUpdates conflicts and stop polling cleanly instead of retry-spamming forever - add a machine-local token-scoped lock so different HERMES_HOME profiles on the same host can't poll the same bot token at once - persist gateway runtime health/fatal adapter state and surface it in ● hermes-gateway.service - Hermes Agent Gateway - Messaging Platform Integration Loaded: loaded (/home/teknium/.config/systemd/user/hermes-gateway.service; enabled; preset: enabled) Active: active (running) since Sat 2026-03-14 09:25:35 PDT; 2h 45min ago Invocation: 8879379b25994201b98381f4bd80c2af Main PID: 1147926 (python) Tasks: 16 (limit: 76757) Memory: 151.4M (peak: 168.1M) CPU: 47.883s CGroup: /user.slice/user-1000.slice/user@1000.service/app.slice/hermes-gateway.service ├─1147926 /home/teknium/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main gateway run --replace └─1147966 node /home/teknium/.hermes/hermes-agent/scripts/whatsapp-bridge/bridge.js --port 3000 --session /home/teknium/.hermes/whatsapp/session --mode self-chat Mar 14 09:27:03 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)... Mar 14 09:27:04 teknium-dev python[1147926]: [409B blob data] Mar 14 09:27:04 teknium-dev python[1147926]: Content: '' Mar 14 09:27:04 teknium-dev python[1147926]: ❌ Max retries (3) for empty content exceeded. Mar 14 09:27:07 teknium-dev python[1147926]: [1K blob data] Mar 14 09:27:07 teknium-dev python[1147926]: Content: '' Mar 14 09:27:07 teknium-dev python[1147926]: 🔄 Retrying API call (1/3)... Mar 14 09:27:12 teknium-dev python[1147926]: [1.7K blob data] Mar 14 09:27:12 teknium-dev python[1147926]: Content: '' Mar 14 09:27:12 teknium-dev python[1147926]: 🔄 Retrying API call (2/3)... ⚠ Installed gateway service definition is outdated Run: hermes gateway restart # auto-refreshes the unit ✓ Gateway service is running ✓ Systemd linger is enabled (service survives logout) - cleanly exit non-retryable startup conflicts without triggering service restart loops Tests: - gateway status runtime-state helpers - Telegram token-lock and polling-conflict behavior - GatewayRunner clean exit on non-retryable startup conflict - CLI runtime health summary
2026-03-14 12:11:23 -07:00
@pytest.mark.asyncio
async def test_runner_requests_clean_exit_for_nonretryable_startup_conflict(monkeypatch, tmp_path):
config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="token")
},
sessions_dir=tmp_path / "sessions",
)
runner = GatewayRunner(config)
monkeypatch.setattr(runner, "_create_adapter", lambda platform, platform_config: _FatalAdapter())
ok = await runner.start()
assert ok is True
assert runner.should_exit_cleanly is True
assert "already using this Telegram bot token" in runner.exit_reason
@pytest.mark.asyncio
async def test_runner_queues_retryable_runtime_fatal_for_reconnection(monkeypatch, tmp_path):
"""Retryable runtime fatal errors queue the platform for reconnection
instead of shutting down the gateway."""
config = GatewayConfig(
platforms={
Platform.WHATSAPP: PlatformConfig(enabled=True, token="token")
},
sessions_dir=tmp_path / "sessions",
)
runner = GatewayRunner(config)
adapter = _RuntimeRetryableAdapter()
adapter._set_fatal_error(
"whatsapp_bridge_exited",
"WhatsApp bridge process exited unexpectedly (code 1).",
retryable=True,
)
runner.adapters = {Platform.WHATSAPP: adapter}
runner.delivery_router.adapters = runner.adapters
runner.stop = AsyncMock()
await runner._handle_adapter_fatal_error(adapter)
# Should shut down with failure — systemd Restart=on-failure will restart
runner.stop.assert_awaited_once()
assert runner._exit_with_failure is True
assert Platform.WHATSAPP in runner._failed_platforms
assert runner._failed_platforms[Platform.WHATSAPP]["attempts"] == 0