Files
hermes-agent/tests/gateway/test_webhook_integration.py

340 lines
12 KiB
Python
Raw Normal View History

feat(gateway): add webhook platform adapter for external event triggers Add a generic webhook platform adapter that receives HTTP POSTs from external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC signatures, transforms payloads into agent prompts, and routes responses back to the source or to another platform. Features: - Configurable routes with per-route HMAC secrets, event filters, prompt templates with dot-notation payload access, skill loading, and pluggable delivery (github_comment, telegram, discord, log) - HMAC signature validation (GitHub SHA-256, GitLab token, generic) - Rate limiting (30 req/min per route, configurable) - Idempotency cache (1hr TTL, prevents duplicate runs on retries) - Body size limits (1MB default, checked before reading payload) - Setup wizard integration with security warnings and docs links - 33 tests (29 unit + 4 integration), all passing Security: - HMAC secret required per route (startup validation) - Setup wizard warns about internet exposure for webhook/SMS platforms - Sandboxing (Docker/VM) recommended in docs for public-facing deployments Files changed: - gateway/config.py — Platform.WEBHOOK enum + env var overrides - gateway/platforms/webhook.py — WebhookAdapter (~420 lines) - gateway/run.py — factory wiring + auth bypass for webhook events - hermes_cli/config.py — WEBHOOK_* env var definitions - hermes_cli/setup.py — webhook section in setup_gateway() - tests/gateway/test_webhook_adapter.py — 29 unit tests - tests/gateway/test_webhook_integration.py — 4 integration tests - website/docs/user-guide/messaging/webhooks.md — full user docs - website/docs/reference/environment-variables.md — WEBHOOK_* vars - website/sidebars.ts — nav entry
2026-03-20 06:33:36 -07:00
"""Integration tests for the generic webhook platform adapter.
These tests exercise end-to-end flows through the webhook adapter:
1. GitHub PR webhook agent MessageEvent created
2. Skills config injects skill content into the prompt
3. Cross-platform delivery routes to a mock Telegram adapter
4. GitHub comment delivery invokes ``gh`` CLI (mocked subprocess)
"""
import asyncio
import hashlib
import hmac
import json
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from aiohttp import web
from aiohttp.test_utils import TestClient, TestServer
from gateway.config import (
GatewayConfig,
HomeChannel,
Platform,
PlatformConfig,
)
from gateway.platforms.base import MessageEvent, MessageType, SendResult
from gateway.platforms.webhook import WebhookAdapter, _INSECURE_NO_AUTH
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _make_adapter(routes, **extra_kw) -> WebhookAdapter:
"""Create a WebhookAdapter with the given routes."""
extra = {"host": "0.0.0.0", "port": 0, "routes": routes}
extra.update(extra_kw)
config = PlatformConfig(enabled=True, extra=extra)
return WebhookAdapter(config)
def _create_app(adapter: WebhookAdapter) -> web.Application:
"""Build the aiohttp Application from the adapter."""
app = web.Application()
app.router.add_get("/health", adapter._handle_health)
app.router.add_post("/webhooks/{route_name}", adapter._handle_webhook)
return app
def _github_signature(body: bytes, secret: str) -> str:
"""Compute X-Hub-Signature-256 for *body* using *secret*."""
return "sha256=" + hmac.new(
secret.encode(), body, hashlib.sha256
).hexdigest()
# A realistic GitHub pull_request event payload (trimmed)
GITHUB_PR_PAYLOAD = {
"action": "opened",
"number": 42,
"pull_request": {
"title": "Add webhook adapter",
"body": "This PR adds a generic webhook platform adapter.",
"html_url": "https://github.com/org/repo/pull/42",
"user": {"login": "contributor"},
"head": {"ref": "feature/webhooks"},
"base": {"ref": "main"},
},
"repository": {
"full_name": "org/repo",
"html_url": "https://github.com/org/repo",
},
"sender": {"login": "contributor"},
}
# ===================================================================
# Test 1: GitHub PR webhook triggers agent
# ===================================================================
class TestGitHubPRWebhook:
@pytest.mark.asyncio
async def test_github_pr_webhook_triggers_agent(self):
"""POST with a realistic GitHub PR payload should:
1. Return 202 Accepted
2. Call handle_message with a MessageEvent
3. The event text contains the rendered prompt
4. The event source has chat_type 'webhook'
"""
secret = "gh-webhook-test-secret"
routes = {
"github-pr": {
"secret": secret,
"events": ["pull_request"],
"prompt": (
"Review PR #{number} by {sender.login}: "
"{pull_request.title}\n\n{pull_request.body}"
),
"deliver": "log",
}
}
adapter = _make_adapter(routes)
captured_events: list[MessageEvent] = []
async def _capture(event: MessageEvent):
captured_events.append(event)
adapter.handle_message = _capture
app = _create_app(adapter)
body = json.dumps(GITHUB_PR_PAYLOAD).encode()
sig = _github_signature(body, secret)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post(
"/webhooks/github-pr",
data=body,
headers={
"Content-Type": "application/json",
"X-GitHub-Event": "pull_request",
"X-Hub-Signature-256": sig,
"X-GitHub-Delivery": "gh-delivery-001",
},
)
assert resp.status == 202
data = await resp.json()
assert data["status"] == "accepted"
assert data["route"] == "github-pr"
assert data["event"] == "pull_request"
assert data["delivery_id"] == "gh-delivery-001"
# Let the asyncio.create_task fire
await asyncio.sleep(0.05)
assert len(captured_events) == 1
event = captured_events[0]
assert "Review PR #42 by contributor" in event.text
assert "Add webhook adapter" in event.text
assert event.source.chat_type == "webhook"
assert event.source.platform == Platform.WEBHOOK
assert "github-pr" in event.source.chat_id
assert event.message_id == "gh-delivery-001"
# ===================================================================
# Test 2: Skills injected into prompt
# ===================================================================
class TestSkillsInjection:
@pytest.mark.asyncio
async def test_skills_injected_into_prompt(self):
"""When a route has skills: [code-review], the adapter should
call build_skill_invocation_message() and use its output as the
prompt instead of the raw template render."""
routes = {
"pr-review": {
"secret": _INSECURE_NO_AUTH,
"events": ["pull_request"],
"prompt": "Review this PR: {pull_request.title}",
"skills": ["code-review"],
}
}
adapter = _make_adapter(routes)
captured_events: list[MessageEvent] = []
async def _capture(event: MessageEvent):
captured_events.append(event)
adapter.handle_message = _capture
skill_content = (
"You are a code reviewer. Review the following:\n"
"Review this PR: Add webhook adapter"
)
# The imports are lazy (inside the handler), so patch the source module
with patch(
"agent.skill_commands.build_skill_invocation_message",
return_value=skill_content,
) as mock_build, patch(
"agent.skill_commands.get_skill_commands",
return_value={"/code-review": {"name": "code-review"}},
):
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post(
"/webhooks/pr-review",
json=GITHUB_PR_PAYLOAD,
headers={
"X-GitHub-Event": "pull_request",
"X-GitHub-Delivery": "skill-test-001",
},
)
assert resp.status == 202
await asyncio.sleep(0.05)
assert len(captured_events) == 1
event = captured_events[0]
# The prompt should be the skill content, not the raw template
assert "You are a code reviewer" in event.text
mock_build.assert_called_once()
# ===================================================================
# Test 3: Cross-platform delivery (webhook → Telegram)
# ===================================================================
class TestCrossPlatformDelivery:
@pytest.mark.asyncio
async def test_cross_platform_delivery(self):
"""When deliver='telegram', the response is routed to the
Telegram adapter via gateway_runner.adapters."""
routes = {
"alerts": {
"secret": _INSECURE_NO_AUTH,
"prompt": "Alert: {message}",
"deliver": "telegram",
"deliver_extra": {"chat_id": "12345"},
}
}
adapter = _make_adapter(routes)
adapter.handle_message = AsyncMock()
# Set up a mock gateway runner with a mock Telegram adapter
mock_tg_adapter = AsyncMock()
mock_tg_adapter.send = AsyncMock(return_value=SendResult(success=True))
mock_runner = MagicMock()
mock_runner.adapters = {Platform.TELEGRAM: mock_tg_adapter}
mock_runner.config = GatewayConfig(
platforms={Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake")}
)
adapter.gateway_runner = mock_runner
# First, simulate a webhook POST to set up delivery_info
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post(
"/webhooks/alerts",
json={"message": "Server is on fire!"},
headers={"X-GitHub-Delivery": "alert-001"},
)
assert resp.status == 202
# The adapter should have stored delivery info
chat_id = "webhook:alerts:alert-001"
assert chat_id in adapter._delivery_info
# Now call send() as if the agent has finished
result = await adapter.send(chat_id, "I've acknowledged the alert.")
assert result.success is True
mock_tg_adapter.send.assert_awaited_once_with(
"12345", "I've acknowledged the alert.", metadata=None
feat(gateway): add webhook platform adapter for external event triggers Add a generic webhook platform adapter that receives HTTP POSTs from external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC signatures, transforms payloads into agent prompts, and routes responses back to the source or to another platform. Features: - Configurable routes with per-route HMAC secrets, event filters, prompt templates with dot-notation payload access, skill loading, and pluggable delivery (github_comment, telegram, discord, log) - HMAC signature validation (GitHub SHA-256, GitLab token, generic) - Rate limiting (30 req/min per route, configurable) - Idempotency cache (1hr TTL, prevents duplicate runs on retries) - Body size limits (1MB default, checked before reading payload) - Setup wizard integration with security warnings and docs links - 33 tests (29 unit + 4 integration), all passing Security: - HMAC secret required per route (startup validation) - Setup wizard warns about internet exposure for webhook/SMS platforms - Sandboxing (Docker/VM) recommended in docs for public-facing deployments Files changed: - gateway/config.py — Platform.WEBHOOK enum + env var overrides - gateway/platforms/webhook.py — WebhookAdapter (~420 lines) - gateway/run.py — factory wiring + auth bypass for webhook events - hermes_cli/config.py — WEBHOOK_* env var definitions - hermes_cli/setup.py — webhook section in setup_gateway() - tests/gateway/test_webhook_adapter.py — 29 unit tests - tests/gateway/test_webhook_integration.py — 4 integration tests - website/docs/user-guide/messaging/webhooks.md — full user docs - website/docs/reference/environment-variables.md — WEBHOOK_* vars - website/sidebars.ts — nav entry
2026-03-20 06:33:36 -07:00
)
fix(gateway/webhook): don't pop delivery_info on send The webhook adapter stored per-request `deliver`/`deliver_extra` config in `_delivery_info[chat_id]` during POST handling and consumed it via `.pop()` inside `send()`. That worked for routes whose agent run produced exactly one outbound message — the final response — but it broke whenever the agent emitted any interim status message before the final response. Status messages flow through the same `send(chat_id, ...)` path as the final response (see `gateway/run.py::_status_callback_sync` → `adapter.send(...)`). Common triggers include: - "🔄 Primary model failed — switching to fallback: ..." (run_agent.py::_emit_status when `fallback_providers` activates) - context-pressure / compression notices - any other lifecycle event routed through `status_callback` When any of those fired, the first `send()` call popped the entry, so the subsequent final-response `send()` saw an empty dict and silently downgraded `deliver_type` from `"telegram"` (or `discord`/`slack`/etc.) to the default `"log"`. The agent's response was logged to the gateway log instead of being delivered to the configured cross-platform target — no warning, no error, just a missing message. This was easy to hit in practice. Any user with `fallback_providers` configured saw it the first time their primary provider hiccuped on a webhook-triggered run. Routes that worked perfectly in dev (where the primary stays healthy) silently dropped responses in prod. Fix: read `_delivery_info` with `.get()` so multiple `send()` calls for the same `chat_id` all see the same delivery config. To keep the dict bounded without relying on per-send cleanup, add a parallel `_delivery_info_created` timestamp dict and a `_prune_delivery_info()` helper that drops entries older than `_idempotency_ttl` (1h, same window already used by `_seen_deliveries`). Pruning runs on each POST, mirroring the existing `_seen_deliveries` cleanup pattern. Worst-case memory footprint is now `rate_limit * TTL = 30/min * 60min = 1800` entries, each ~1KB → under 2 MB. In practice it'll be far smaller because most webhooks complete in seconds, not the full hour. Test changes: - `test_delivery_info_cleaned_after_send` is replaced with `test_delivery_info_survives_multiple_sends`, which is now the regression test for this bug — it asserts that two consecutive `send()` calls both see the delivery config. - A new `test_delivery_info_pruned_via_ttl` covers the TTL cleanup behavior. - The two integration tests that asserted `chat_id not in adapter._delivery_info` after `send()` now assert the opposite, with a comment explaining why. All 40 tests in `tests/gateway/test_webhook_adapter.py` and `tests/gateway/test_webhook_integration.py` pass. Verified end-to-end locally against a dynamic `hermes webhook subscribe` route configured with `--deliver telegram --deliver-chat-id <user>`: with `gpt-5.4` as the primary (currently flaky) and `claude-opus-4.6` as the fallback, the fallback notification fires, the agent finishes, and the final response is delivered to Telegram as expected.
2026-04-07 11:17:20 -04:00
# Delivery info is retained after send() so interim status messages
# don't strand the final response (TTL-based cleanup happens on POST).
assert chat_id in adapter._delivery_info
feat(gateway): add webhook platform adapter for external event triggers Add a generic webhook platform adapter that receives HTTP POSTs from external services (GitHub, GitLab, JIRA, Stripe, etc.), validates HMAC signatures, transforms payloads into agent prompts, and routes responses back to the source or to another platform. Features: - Configurable routes with per-route HMAC secrets, event filters, prompt templates with dot-notation payload access, skill loading, and pluggable delivery (github_comment, telegram, discord, log) - HMAC signature validation (GitHub SHA-256, GitLab token, generic) - Rate limiting (30 req/min per route, configurable) - Idempotency cache (1hr TTL, prevents duplicate runs on retries) - Body size limits (1MB default, checked before reading payload) - Setup wizard integration with security warnings and docs links - 33 tests (29 unit + 4 integration), all passing Security: - HMAC secret required per route (startup validation) - Setup wizard warns about internet exposure for webhook/SMS platforms - Sandboxing (Docker/VM) recommended in docs for public-facing deployments Files changed: - gateway/config.py — Platform.WEBHOOK enum + env var overrides - gateway/platforms/webhook.py — WebhookAdapter (~420 lines) - gateway/run.py — factory wiring + auth bypass for webhook events - hermes_cli/config.py — WEBHOOK_* env var definitions - hermes_cli/setup.py — webhook section in setup_gateway() - tests/gateway/test_webhook_adapter.py — 29 unit tests - tests/gateway/test_webhook_integration.py — 4 integration tests - website/docs/user-guide/messaging/webhooks.md — full user docs - website/docs/reference/environment-variables.md — WEBHOOK_* vars - website/sidebars.ts — nav entry
2026-03-20 06:33:36 -07:00
# ===================================================================
# Test 4: GitHub comment delivery via gh CLI
# ===================================================================
class TestGitHubCommentDelivery:
@pytest.mark.asyncio
async def test_github_comment_delivery(self):
"""When deliver='github_comment', the adapter invokes
``gh pr comment`` via subprocess.run (mocked)."""
routes = {
"pr-bot": {
"secret": _INSECURE_NO_AUTH,
"prompt": "Review: {pull_request.title}",
"deliver": "github_comment",
"deliver_extra": {
"repo": "{repository.full_name}",
"pr_number": "{number}",
},
}
}
adapter = _make_adapter(routes)
adapter.handle_message = AsyncMock()
# POST a webhook to set up delivery info
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post(
"/webhooks/pr-bot",
json=GITHUB_PR_PAYLOAD,
headers={
"X-GitHub-Event": "pull_request",
"X-GitHub-Delivery": "gh-comment-001",
},
)
assert resp.status == 202
chat_id = "webhook:pr-bot:gh-comment-001"
assert chat_id in adapter._delivery_info
# Verify deliver_extra was rendered with payload data
delivery = adapter._delivery_info[chat_id]
assert delivery["deliver_extra"]["repo"] == "org/repo"
assert delivery["deliver_extra"]["pr_number"] == "42"
# Mock subprocess.run and call send()
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = "Comment posted"
mock_result.stderr = ""
with patch(
"gateway.platforms.webhook.subprocess.run",
return_value=mock_result,
) as mock_run:
result = await adapter.send(
chat_id, "LGTM! The code looks great."
)
assert result.success is True
mock_run.assert_called_once_with(
[
"gh", "pr", "comment", "42",
"--repo", "org/repo",
"--body", "LGTM! The code looks great.",
],
capture_output=True,
text=True,
timeout=30,
)
fix(gateway/webhook): don't pop delivery_info on send The webhook adapter stored per-request `deliver`/`deliver_extra` config in `_delivery_info[chat_id]` during POST handling and consumed it via `.pop()` inside `send()`. That worked for routes whose agent run produced exactly one outbound message — the final response — but it broke whenever the agent emitted any interim status message before the final response. Status messages flow through the same `send(chat_id, ...)` path as the final response (see `gateway/run.py::_status_callback_sync` → `adapter.send(...)`). Common triggers include: - "🔄 Primary model failed — switching to fallback: ..." (run_agent.py::_emit_status when `fallback_providers` activates) - context-pressure / compression notices - any other lifecycle event routed through `status_callback` When any of those fired, the first `send()` call popped the entry, so the subsequent final-response `send()` saw an empty dict and silently downgraded `deliver_type` from `"telegram"` (or `discord`/`slack`/etc.) to the default `"log"`. The agent's response was logged to the gateway log instead of being delivered to the configured cross-platform target — no warning, no error, just a missing message. This was easy to hit in practice. Any user with `fallback_providers` configured saw it the first time their primary provider hiccuped on a webhook-triggered run. Routes that worked perfectly in dev (where the primary stays healthy) silently dropped responses in prod. Fix: read `_delivery_info` with `.get()` so multiple `send()` calls for the same `chat_id` all see the same delivery config. To keep the dict bounded without relying on per-send cleanup, add a parallel `_delivery_info_created` timestamp dict and a `_prune_delivery_info()` helper that drops entries older than `_idempotency_ttl` (1h, same window already used by `_seen_deliveries`). Pruning runs on each POST, mirroring the existing `_seen_deliveries` cleanup pattern. Worst-case memory footprint is now `rate_limit * TTL = 30/min * 60min = 1800` entries, each ~1KB → under 2 MB. In practice it'll be far smaller because most webhooks complete in seconds, not the full hour. Test changes: - `test_delivery_info_cleaned_after_send` is replaced with `test_delivery_info_survives_multiple_sends`, which is now the regression test for this bug — it asserts that two consecutive `send()` calls both see the delivery config. - A new `test_delivery_info_pruned_via_ttl` covers the TTL cleanup behavior. - The two integration tests that asserted `chat_id not in adapter._delivery_info` after `send()` now assert the opposite, with a comment explaining why. All 40 tests in `tests/gateway/test_webhook_adapter.py` and `tests/gateway/test_webhook_integration.py` pass. Verified end-to-end locally against a dynamic `hermes webhook subscribe` route configured with `--deliver telegram --deliver-chat-id <user>`: with `gpt-5.4` as the primary (currently flaky) and `claude-opus-4.6` as the fallback, the fallback notification fires, the agent finishes, and the final response is delivered to Telegram as expected.
2026-04-07 11:17:20 -04:00
# Delivery info is retained after send() so interim status messages
# don't strand the final response (TTL-based cleanup happens on POST).
assert chat_id in adapter._delivery_info