feat(webhook): authenticated webhook runner with allowlists, signature verification, idempotent logging
- Rewrite scripts/gitea_webhook_handler.py as HTTP server with HMAC-SHA256 auth - Add config/webhook.yaml defining allowed repos/events/branches/actions - Implement dispatch_push calling ansible/scripts/deploy_on_webhook.sh safely - SQLite logging table with delivery_id dedup for replay safety - Add tests/test_gitea_webhook_handler.py covering push/PR/signature/idempotency - Add docs/webhook-deployment.md with security model, ops, and #288 alignment Closes #436
This commit is contained in:
@@ -1,82 +1,440 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
[OPS] Gitea Webhook Handler
|
||||
[OPS] Gitea Webhook Handler — Authenticated Runner
|
||||
Part of the Gemini Sovereign Infrastructure Suite.
|
||||
|
||||
Handles real-time events from Gitea to coordinate fleet actions.
|
||||
Replaces the print-only payload parser with a production-hardened
|
||||
webhook receiver: signature verification, config-driven allowlists,
|
||||
idempotent event logging, and safe action dispatch.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
from typing import Dict, Any
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
from datetime import datetime, timezone
|
||||
from http.server import BaseHTTPRequestHandler, HTTPServer
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
class WebhookHandler:
|
||||
def handle_event(self, payload: Dict[str, Any]):
|
||||
# Gitea webhooks often send the event type in a header,
|
||||
# but we'll try to infer it from the payload if not provided.
|
||||
event_type = payload.get("event") or self.infer_event_type(payload)
|
||||
repo_name = payload.get("repository", {}).get("name")
|
||||
sender = payload.get("sender", {}).get("username")
|
||||
|
||||
print(f"[*] Received {event_type} event from {repo_name} (by {sender})")
|
||||
|
||||
if event_type == "push":
|
||||
self.handle_push(payload)
|
||||
elif event_type == "pull_request":
|
||||
self.handle_pr(payload)
|
||||
elif event_type == "issue":
|
||||
self.handle_issue(payload)
|
||||
# ---------------------------------------------------------------------------
|
||||
# CONFIG — Load once at startup (fail fast if missing)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SCRIPT_DIR = Path(__file__).parent.resolve()
|
||||
REPO_ROOT = SCRIPT_DIR.parent.resolve()
|
||||
CONFIG_PATH = REPO_ROOT / "config" / "webhook.yaml"
|
||||
LOG_DB_PATH = REPO_ROOT / "logs" / "webhook_events.sqlite"
|
||||
DEPLOY_SCRIPT = REPO_ROOT / "ansible" / "scripts" / "deploy_on_webhook.sh"
|
||||
|
||||
# Defaults — overridden by config.yaml
|
||||
DEFAULT_ALLOWED_REPOS = {"timmy-config"}
|
||||
DEFAULT_ALLOWED_EVENTS = {"push", "pull_request"}
|
||||
DEFAULT_ALLOWED_BRANCHES = {"refs/heads/main"}
|
||||
DEFAULT_ALLOWED_PR_ACTIONS = {"opened", "closed", "reopened", "synchronized"}
|
||||
|
||||
# Global config (loaded from YAML)
|
||||
CONFIG: Dict[str, Any] = {}
|
||||
|
||||
|
||||
def load_config() -> Dict[str, Any]:
|
||||
"""Load webhook config from YAML. Exits if malformed or missing."""
|
||||
if not CONFIG_PATH.exists():
|
||||
print(f"[FATAL] Webhook config not found: {CONFIG_PATH}", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
|
||||
import yaml
|
||||
with open(CONFIG_PATH) as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
|
||||
# Required: webhook_secret from env var (never in VCS)
|
||||
secret = os.environ.get("GITEA_WEBHOOK_SECRET")
|
||||
if not secret:
|
||||
print("[FATAL] GITEA_WEBHOOK_SECRET not set in environment", file=sys.stderr)
|
||||
sys.exit(2)
|
||||
cfg["webhook_secret"] = secret
|
||||
|
||||
# Allowlist normalization
|
||||
cfg.setdefault("allowed_repos", DEFAULT_ALLOWED_REPOS)
|
||||
cfg.setdefault("allowed_events", DEFAULT_ALLOWED_EVENTS)
|
||||
cfg.setdefault("allowed_branches", DEFAULT_ALLOWED_BRANCHES)
|
||||
cfg.setdefault("allowed_pr_actions", DEFAULT_ALLOWED_PR_ACTIONS)
|
||||
cfg.setdefault("require_signature", True)
|
||||
|
||||
# Normalize to sets
|
||||
for key in ["allowed_repos", "allowed_events", "allowed_branches", "allowed_pr_actions"]:
|
||||
if isinstance(cfg[key], str):
|
||||
cfg[key] = {cfg[key]}
|
||||
else:
|
||||
print(f"[INFO] Ignoring event type: {event_type}")
|
||||
cfg[key] = set(cfg[key])
|
||||
|
||||
def infer_event_type(self, payload: Dict[str, Any]) -> str:
|
||||
if "commits" in payload: return "push"
|
||||
if "pull_request" in payload: return "pull_request"
|
||||
if "issue" in payload: return "issue"
|
||||
return "unknown"
|
||||
return cfg
|
||||
|
||||
def handle_push(self, payload: Dict[str, Any]):
|
||||
ref = payload.get("ref")
|
||||
print(f" [PUSH] Branch: {ref}")
|
||||
# Trigger CI or deployment
|
||||
if ref == "refs/heads/main":
|
||||
print(" [ACTION] Triggering production deployment...")
|
||||
# Example: subprocess.run(["./deploy.sh"])
|
||||
|
||||
def handle_pr(self, payload: Dict[str, Any]):
|
||||
# ---------------------------------------------------------------------------
|
||||
# SIGNATURE VERIFICATION
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def verify_signature(payload: bytes, signature: str, secret: str) -> bool:
|
||||
"""
|
||||
Verify Gitea webhook HMAC-SHA256 signature.
|
||||
Gitea sends: X-Gitea-Signature: sha256=<hexdigest>
|
||||
"""
|
||||
if not signature:
|
||||
return False
|
||||
if not signature.startswith("sha256="):
|
||||
return False
|
||||
expected_hmac = hmac.new(
|
||||
secret.encode("utf-8"), payload, hashlib.sha256
|
||||
).hexdigest()
|
||||
received = signature[7:] # strip "sha256="
|
||||
return hmac.compare_digest(expected_hmac, received)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# IDEMPOTENCY & LOGGING
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def init_log_db() -> sqlite3.Connection:
|
||||
"""Initialize SQLite log DB with idempotency table."""
|
||||
LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(LOG_DB_PATH), timeout=30)
|
||||
conn.execute(
|
||||
"""
|
||||
CREATE TABLE IF NOT EXISTS webhook_events (
|
||||
delivery_id TEXT PRIMARY KEY,
|
||||
received_at TEXT,
|
||||
event_type TEXT,
|
||||
repo TEXT,
|
||||
action TEXT,
|
||||
branch TEXT,
|
||||
sender TEXT,
|
||||
verdict TEXT,
|
||||
reason TEXT,
|
||||
handler_duration_ms INTEGER
|
||||
)
|
||||
"""
|
||||
)
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_received ON webhook_events(received_at)")
|
||||
conn.commit()
|
||||
return conn
|
||||
|
||||
|
||||
def already_processed(conn: sqlite3.Connection, delivery_id: str) -> bool:
|
||||
cur = conn.execute("SELECT 1 FROM webhook_events WHERE delivery_id = ?", (delivery_id,))
|
||||
return cur.fetchone() is not None
|
||||
|
||||
|
||||
def log_event(
|
||||
conn: sqlite3.Connection,
|
||||
delivery_id: str,
|
||||
event_type: str,
|
||||
repo: str,
|
||||
action: str,
|
||||
branch: Optional[str],
|
||||
sender: str,
|
||||
verdict: str,
|
||||
reason: str,
|
||||
duration_ms: int,
|
||||
):
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO webhook_events (
|
||||
delivery_id, received_at, event_type, repo, action, branch,
|
||||
sender, verdict, reason, handler_duration_ms
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
delivery_id,
|
||||
datetime.now(timezone.utc).isoformat(),
|
||||
event_type,
|
||||
repo,
|
||||
action,
|
||||
branch,
|
||||
sender,
|
||||
verdict,
|
||||
reason,
|
||||
duration_ms,
|
||||
),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ACTION DISPATCH — Safe, pre-approved actions only
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def dispatch_push(branch: str, repo_name: str) -> tuple[int, str]:
|
||||
"""Trigger ansible-pull for timmy-config on main branch merge."""
|
||||
if branch not in CONFIG["allowed_branches"]:
|
||||
return 403, f"Branch '{branch}' not in allowed_branches allowlist"
|
||||
|
||||
if not DEPLOY_SCRIPT.exists():
|
||||
return 500, f"Deploy script not found: {DEPLOY_SCRIPT}"
|
||||
|
||||
# Run ansible-pull idempotently; capture output for logging
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["/usr/bin/env", "bash", str(DEPLOY_SCRIPT)],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=300,
|
||||
cwd=str(REPO_ROOT),
|
||||
)
|
||||
if result.returncode == 0:
|
||||
return 200, "deploy triggered successfully"
|
||||
else:
|
||||
return 500, f"deploy script failed: {result.stderr[:200]}"
|
||||
except subprocess.TimeoutExpired:
|
||||
return 504, "deploy script timeout (5m)"
|
||||
except Exception as e:
|
||||
return 500, f"deploy exception: {e}"
|
||||
|
||||
|
||||
def dispatch_pull_request(action: str, pr_number: int, repo_name: str) -> tuple[int, str]:
|
||||
"""Handle PR lifecycle events. Only 'merged' triggers deploy (via review gate later)."""
|
||||
# For now, log PR events; actual merge deploy will be triggered by push to main
|
||||
# After PR merges, Gitea sends both PR 'closed' (with merged=true) AND a push event.
|
||||
# We rely on the push event as the deployment trigger.
|
||||
return 200, f"pr event noted — action={action} pr={pr_number}"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# PAYLOAD PARSING — Defensive, typed access
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def get_header(headers: Dict[str, str], name: str) -> Optional[str]:
|
||||
for key, val in headers.items():
|
||||
if key.lower() == name.lower():
|
||||
return val
|
||||
return None
|
||||
|
||||
|
||||
def parse_payload(body: bytes) -> tuple[Optional[str], Dict[str, Any], Optional[str], Optional[str]]:
|
||||
"""
|
||||
Return (event_type, payload_dict, repo_name, delivery_id).
|
||||
event_type may be inferred from payload key structure.
|
||||
"""
|
||||
try:
|
||||
payload = json.loads(body)
|
||||
except json.JSONDecodeError:
|
||||
return None, {}, None, None
|
||||
|
||||
# Gitea sends X-Gitea-Event header; if absent, infer
|
||||
event_type = payload.get("event")
|
||||
repo_name = payload.get("repository", {}).get("name")
|
||||
delivery_id = payload.get("guid") or payload.get("id") # Gitea includes 'guid'
|
||||
|
||||
# Inference fallback
|
||||
if not event_type:
|
||||
if "commits" in payload:
|
||||
event_type = "push"
|
||||
elif "pull_request" in payload:
|
||||
event_type = "pull_request"
|
||||
elif "issue" in payload:
|
||||
event_type = "issue"
|
||||
|
||||
return event_type, payload, repo_name, delivery_id
|
||||
|
||||
|
||||
def allowed_repo(repo_name: str) -> bool:
|
||||
return repo_name in CONFIG["allowed_repos"]
|
||||
|
||||
|
||||
def allowed_event(event_type: str) -> bool:
|
||||
return event_type in CONFIG["allowed_events"]
|
||||
|
||||
|
||||
def get_branch_ref(payload: Dict[str, Any], event_type: str) -> Optional[str]:
|
||||
"""Extract ref (branch) from payload."""
|
||||
if event_type == "push":
|
||||
return payload.get("ref")
|
||||
if event_type == "pull_request":
|
||||
return payload.get("pull_request", {}).get("base", {}).get("ref")
|
||||
return None
|
||||
|
||||
|
||||
def branch_allowed(branch: Optional[str]) -> bool:
|
||||
if not branch:
|
||||
return False
|
||||
return branch in CONFIG["allowed_branches"]
|
||||
|
||||
|
||||
def pr_action_allowed(action: str) -> bool:
|
||||
return action in CONFIG["allowed_pr_actions"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP HANDLER
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class WebhookHandler(BaseHTTPRequestHandler):
|
||||
"""
|
||||
Minimal HTTP server — one request at a time (Gitea delivers synchronously).
|
||||
"""
|
||||
|
||||
def _respond(self, code: int, body: str):
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.end_headers()
|
||||
self.wfile.write(body.encode("utf-8"))
|
||||
|
||||
def do_POST(self):
|
||||
global CONFIG, db_conn
|
||||
|
||||
start_ns = datetime.now(timezone.utc)
|
||||
|
||||
# Only one endpoint
|
||||
if self.path != "/webhooks/gitea":
|
||||
self._respond(404, json.dumps({"error": "not found"}))
|
||||
return
|
||||
|
||||
# Read body once (needed for both signature check & JSON parse)
|
||||
length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(length)
|
||||
|
||||
# Signature check
|
||||
signature = get_header(self.headers, "X-Gitea-Signature")
|
||||
if CONFIG.get("require_signature"):
|
||||
if not verify_signature(body, signature, CONFIG["webhook_secret"]):
|
||||
self._respond(401, json.dumps({"error": "invalid signature"}))
|
||||
# Still log the rejected event for audit
|
||||
delivery_id = "unknown-signature-violation"
|
||||
log_event(
|
||||
db_conn, delivery_id, "unknown", "unknown", "auth-failure", None,
|
||||
"unknown", "rejected", "invalid signature", 0
|
||||
)
|
||||
return
|
||||
|
||||
# Parse payload
|
||||
event_type, payload, repo_name, delivery_id = parse_payload(body)
|
||||
if not event_type or not repo_name:
|
||||
self._respond(400, json.dumps({"error": "malformed payload"}))
|
||||
return
|
||||
|
||||
# Idempotency check — short-circuit if already processed
|
||||
if delivery_id and already_processed(db_conn, delivery_id):
|
||||
self._respond(200, json.dumps({"status": "already processed"}))
|
||||
return
|
||||
|
||||
sender = payload.get("sender", {}).get("username", "unknown")
|
||||
|
||||
# --- ALLOWLIST CHECKS ---
|
||||
if not allowed_repo(repo_name):
|
||||
reason = f"repo '{repo_name}' not in allowlist"
|
||||
self._respond(403, json.dumps({"error": reason}))
|
||||
log_event(db_conn, delivery_id, event_type, repo_name, "ignored", None, sender,
|
||||
"rejected", reason,
|
||||
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
||||
return
|
||||
|
||||
if not allowed_event(event_type):
|
||||
reason = f"event '{event_type}' not allowed"
|
||||
self._respond(403, json.dumps({"error": reason}))
|
||||
log_event(db_conn, delivery_id, event_type, repo_name, "ignored", None, sender,
|
||||
"rejected", reason,
|
||||
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
||||
return
|
||||
|
||||
# Branch/action allowlist
|
||||
branch = get_branch_ref(payload, event_type)
|
||||
action = payload.get("action")
|
||||
pr_num = payload.get("pull_request", {}).get("number")
|
||||
print(f" [PR] Action: {action} | PR #{pr_num}")
|
||||
|
||||
if action in ["opened", "synchronized"]:
|
||||
print(f" [ACTION] Triggering architecture linter for PR #{pr_num}...")
|
||||
# Example: subprocess.run(["python3", "scripts/architecture_linter_v2.py"])
|
||||
|
||||
def handle_issue(self, payload: Dict[str, Any]):
|
||||
action = payload.get("action")
|
||||
issue_num = payload.get("issue", {}).get("number")
|
||||
print(f" [ISSUE] Action: {action} | Issue #{issue_num}")
|
||||
if event_type == "push":
|
||||
if not branch_allowed(branch):
|
||||
reason = f"branch '{branch}' not in allowed_branches"
|
||||
self._respond(403, json.dumps({"error": reason}))
|
||||
log_event(db_conn, delivery_id, event_type, repo_name, "ignored", str(branch), sender,
|
||||
"rejected", reason,
|
||||
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
||||
return
|
||||
code, msg = dispatch_push(branch, repo_name)
|
||||
verdict = "accepted" if code == 200 else "failed"
|
||||
self._respond(code, json.dumps({"status": msg}))
|
||||
log_event(db_conn, delivery_id, event_type, repo_name, "push", str(branch), sender,
|
||||
verdict, msg,
|
||||
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
||||
|
||||
elif event_type == "pull_request":
|
||||
if not pr_action_allowed(action or ""):
|
||||
reason = f"pr action '{action}' not allowed"
|
||||
self._respond(403, json.dumps({"error": reason}))
|
||||
log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender,
|
||||
"rejected", reason,
|
||||
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
||||
return
|
||||
pr_num = payload.get("pull_request", {}).get("number")
|
||||
code, msg = dispatch_pull_request(action or "", pr_num or 0, repo_name)
|
||||
verdict = "accepted" if code == 200 else "failed"
|
||||
self._respond(code, json.dumps({"status": msg}))
|
||||
log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender,
|
||||
verdict, msg,
|
||||
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
||||
|
||||
else:
|
||||
# Other events (issues, etc.) — accept but no-op for now
|
||||
self._respond(200, json.dumps({"status": "event received but no action configured"}))
|
||||
log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender,
|
||||
"ignored", "no handler",
|
||||
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
||||
|
||||
def log_message(self, format_str, *args):
|
||||
# Suppress default HTTP logging; we use structured logs instead
|
||||
return
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# MAIN
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Gemini Webhook Handler")
|
||||
parser.add_argument("payload_file", help="JSON file containing the webhook payload")
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Gitea Webhook Handler — authenticated, allowlisted, idempotent"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
default=os.environ.get("WEBHOOK_HOST", "127.0.0.1"),
|
||||
help="Bind address (default: 127.0.0.1)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=int(os.environ.get("WEBHOOK_PORT", 9000)),
|
||||
help="Bind port (default: 9000)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not os.path.exists(args.payload_file):
|
||||
print(f"[ERROR] Payload file {args.payload_file} not found.")
|
||||
sys.exit(1)
|
||||
|
||||
with open(args.payload_file, "r") as f:
|
||||
try:
|
||||
payload = json.load(f)
|
||||
except:
|
||||
print("[ERROR] Invalid JSON payload.")
|
||||
sys.exit(1)
|
||||
|
||||
handler = WebhookHandler()
|
||||
handler.handle_event(payload)
|
||||
|
||||
global CONFIG, db_conn
|
||||
CONFIG = load_config()
|
||||
|
||||
# Prepare logs directory
|
||||
LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
db_conn = init_log_db()
|
||||
|
||||
# Startup banner
|
||||
print(f"[webhook] Starting server on {args.host}:{args.port}")
|
||||
print(f"[webhook] allowed_repos: {sorted(CONFIG['allowed_repos'])}")
|
||||
print(f"[webhook] allowed_events: {sorted(CONFIG['allowed_events'])}")
|
||||
print(f"[webhook] allowed_branches: {sorted(CONFIG['allowed_branches'])}")
|
||||
print(f"[webhook] Log DB: {LOG_DB_PATH}")
|
||||
|
||||
# Hook up SSH agent for ansible-pull if needed
|
||||
os.environ.setdefault("SSH_AUTH_SOCK", os.path.expanduser("~/.ssh/ssh_auth_sock"))
|
||||
|
||||
server = HTTPServer((args.host, args.port), WebhookHandler)
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
print("\n[webhook] Shutting down")
|
||||
server.server_close()
|
||||
db_conn.close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user