- Rewrite scripts/gitea_webhook_handler.py as HTTP server with HMAC-SHA256 auth - Add config/webhook.yaml defining allowed repos/events/branches/actions - Implement dispatch_push calling ansible/scripts/deploy_on_webhook.sh safely - SQLite logging table with delivery_id dedup for replay safety - Add tests/test_gitea_webhook_handler.py covering push/PR/signature/idempotency - Add docs/webhook-deployment.md with security model, ops, and #288 alignment Closes #436
441 lines
16 KiB
Python
441 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
[OPS] Gitea Webhook Handler — Authenticated Runner
|
|
Part of the Gemini Sovereign Infrastructure Suite.
|
|
|
|
Replaces the print-only payload parser with a production-hardened
|
|
webhook receiver: signature verification, config-driven allowlists,
|
|
idempotent event logging, and safe action dispatch.
|
|
"""
|
|
|
|
import argparse
|
|
import hashlib
|
|
import hmac
|
|
import json
|
|
import logging
|
|
import os
|
|
import sqlite3
|
|
import subprocess
|
|
import sys
|
|
import threading
|
|
from datetime import datetime, timezone
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
from pathlib import Path
|
|
from typing import Any, Dict, Optional
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CONFIG — Load once at startup (fail fast if missing)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
SCRIPT_DIR = Path(__file__).parent.resolve()
|
|
REPO_ROOT = SCRIPT_DIR.parent.resolve()
|
|
CONFIG_PATH = REPO_ROOT / "config" / "webhook.yaml"
|
|
LOG_DB_PATH = REPO_ROOT / "logs" / "webhook_events.sqlite"
|
|
DEPLOY_SCRIPT = REPO_ROOT / "ansible" / "scripts" / "deploy_on_webhook.sh"
|
|
|
|
# Defaults — overridden by config.yaml
|
|
DEFAULT_ALLOWED_REPOS = {"timmy-config"}
|
|
DEFAULT_ALLOWED_EVENTS = {"push", "pull_request"}
|
|
DEFAULT_ALLOWED_BRANCHES = {"refs/heads/main"}
|
|
DEFAULT_ALLOWED_PR_ACTIONS = {"opened", "closed", "reopened", "synchronized"}
|
|
|
|
# Global config (loaded from YAML)
|
|
CONFIG: Dict[str, Any] = {}
|
|
|
|
|
|
def load_config() -> Dict[str, Any]:
|
|
"""Load webhook config from YAML. Exits if malformed or missing."""
|
|
if not CONFIG_PATH.exists():
|
|
print(f"[FATAL] Webhook config not found: {CONFIG_PATH}", file=sys.stderr)
|
|
sys.exit(2)
|
|
|
|
import yaml
|
|
with open(CONFIG_PATH) as f:
|
|
cfg = yaml.safe_load(f) or {}
|
|
|
|
# Required: webhook_secret from env var (never in VCS)
|
|
secret = os.environ.get("GITEA_WEBHOOK_SECRET")
|
|
if not secret:
|
|
print("[FATAL] GITEA_WEBHOOK_SECRET not set in environment", file=sys.stderr)
|
|
sys.exit(2)
|
|
cfg["webhook_secret"] = secret
|
|
|
|
# Allowlist normalization
|
|
cfg.setdefault("allowed_repos", DEFAULT_ALLOWED_REPOS)
|
|
cfg.setdefault("allowed_events", DEFAULT_ALLOWED_EVENTS)
|
|
cfg.setdefault("allowed_branches", DEFAULT_ALLOWED_BRANCHES)
|
|
cfg.setdefault("allowed_pr_actions", DEFAULT_ALLOWED_PR_ACTIONS)
|
|
cfg.setdefault("require_signature", True)
|
|
|
|
# Normalize to sets
|
|
for key in ["allowed_repos", "allowed_events", "allowed_branches", "allowed_pr_actions"]:
|
|
if isinstance(cfg[key], str):
|
|
cfg[key] = {cfg[key]}
|
|
else:
|
|
cfg[key] = set(cfg[key])
|
|
|
|
return cfg
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SIGNATURE VERIFICATION
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def verify_signature(payload: bytes, signature: str, secret: str) -> bool:
|
|
"""
|
|
Verify Gitea webhook HMAC-SHA256 signature.
|
|
Gitea sends: X-Gitea-Signature: sha256=<hexdigest>
|
|
"""
|
|
if not signature:
|
|
return False
|
|
if not signature.startswith("sha256="):
|
|
return False
|
|
expected_hmac = hmac.new(
|
|
secret.encode("utf-8"), payload, hashlib.sha256
|
|
).hexdigest()
|
|
received = signature[7:] # strip "sha256="
|
|
return hmac.compare_digest(expected_hmac, received)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# IDEMPOTENCY & LOGGING
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def init_log_db() -> sqlite3.Connection:
|
|
"""Initialize SQLite log DB with idempotency table."""
|
|
LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
conn = sqlite3.connect(str(LOG_DB_PATH), timeout=30)
|
|
conn.execute(
|
|
"""
|
|
CREATE TABLE IF NOT EXISTS webhook_events (
|
|
delivery_id TEXT PRIMARY KEY,
|
|
received_at TEXT,
|
|
event_type TEXT,
|
|
repo TEXT,
|
|
action TEXT,
|
|
branch TEXT,
|
|
sender TEXT,
|
|
verdict TEXT,
|
|
reason TEXT,
|
|
handler_duration_ms INTEGER
|
|
)
|
|
"""
|
|
)
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_received ON webhook_events(received_at)")
|
|
conn.commit()
|
|
return conn
|
|
|
|
|
|
def already_processed(conn: sqlite3.Connection, delivery_id: str) -> bool:
|
|
cur = conn.execute("SELECT 1 FROM webhook_events WHERE delivery_id = ?", (delivery_id,))
|
|
return cur.fetchone() is not None
|
|
|
|
|
|
def log_event(
|
|
conn: sqlite3.Connection,
|
|
delivery_id: str,
|
|
event_type: str,
|
|
repo: str,
|
|
action: str,
|
|
branch: Optional[str],
|
|
sender: str,
|
|
verdict: str,
|
|
reason: str,
|
|
duration_ms: int,
|
|
):
|
|
conn.execute(
|
|
"""
|
|
INSERT INTO webhook_events (
|
|
delivery_id, received_at, event_type, repo, action, branch,
|
|
sender, verdict, reason, handler_duration_ms
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(
|
|
delivery_id,
|
|
datetime.now(timezone.utc).isoformat(),
|
|
event_type,
|
|
repo,
|
|
action,
|
|
branch,
|
|
sender,
|
|
verdict,
|
|
reason,
|
|
duration_ms,
|
|
),
|
|
)
|
|
conn.commit()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# ACTION DISPATCH — Safe, pre-approved actions only
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def dispatch_push(branch: str, repo_name: str) -> tuple[int, str]:
|
|
"""Trigger ansible-pull for timmy-config on main branch merge."""
|
|
if branch not in CONFIG["allowed_branches"]:
|
|
return 403, f"Branch '{branch}' not in allowed_branches allowlist"
|
|
|
|
if not DEPLOY_SCRIPT.exists():
|
|
return 500, f"Deploy script not found: {DEPLOY_SCRIPT}"
|
|
|
|
# Run ansible-pull idempotently; capture output for logging
|
|
try:
|
|
result = subprocess.run(
|
|
["/usr/bin/env", "bash", str(DEPLOY_SCRIPT)],
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300,
|
|
cwd=str(REPO_ROOT),
|
|
)
|
|
if result.returncode == 0:
|
|
return 200, "deploy triggered successfully"
|
|
else:
|
|
return 500, f"deploy script failed: {result.stderr[:200]}"
|
|
except subprocess.TimeoutExpired:
|
|
return 504, "deploy script timeout (5m)"
|
|
except Exception as e:
|
|
return 500, f"deploy exception: {e}"
|
|
|
|
|
|
def dispatch_pull_request(action: str, pr_number: int, repo_name: str) -> tuple[int, str]:
|
|
"""Handle PR lifecycle events. Only 'merged' triggers deploy (via review gate later)."""
|
|
# For now, log PR events; actual merge deploy will be triggered by push to main
|
|
# After PR merges, Gitea sends both PR 'closed' (with merged=true) AND a push event.
|
|
# We rely on the push event as the deployment trigger.
|
|
return 200, f"pr event noted — action={action} pr={pr_number}"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# PAYLOAD PARSING — Defensive, typed access
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def get_header(headers: Dict[str, str], name: str) -> Optional[str]:
|
|
for key, val in headers.items():
|
|
if key.lower() == name.lower():
|
|
return val
|
|
return None
|
|
|
|
|
|
def parse_payload(body: bytes) -> tuple[Optional[str], Dict[str, Any], Optional[str], Optional[str]]:
|
|
"""
|
|
Return (event_type, payload_dict, repo_name, delivery_id).
|
|
event_type may be inferred from payload key structure.
|
|
"""
|
|
try:
|
|
payload = json.loads(body)
|
|
except json.JSONDecodeError:
|
|
return None, {}, None, None
|
|
|
|
# Gitea sends X-Gitea-Event header; if absent, infer
|
|
event_type = payload.get("event")
|
|
repo_name = payload.get("repository", {}).get("name")
|
|
delivery_id = payload.get("guid") or payload.get("id") # Gitea includes 'guid'
|
|
|
|
# Inference fallback
|
|
if not event_type:
|
|
if "commits" in payload:
|
|
event_type = "push"
|
|
elif "pull_request" in payload:
|
|
event_type = "pull_request"
|
|
elif "issue" in payload:
|
|
event_type = "issue"
|
|
|
|
return event_type, payload, repo_name, delivery_id
|
|
|
|
|
|
def allowed_repo(repo_name: str) -> bool:
|
|
return repo_name in CONFIG["allowed_repos"]
|
|
|
|
|
|
def allowed_event(event_type: str) -> bool:
|
|
return event_type in CONFIG["allowed_events"]
|
|
|
|
|
|
def get_branch_ref(payload: Dict[str, Any], event_type: str) -> Optional[str]:
|
|
"""Extract ref (branch) from payload."""
|
|
if event_type == "push":
|
|
return payload.get("ref")
|
|
if event_type == "pull_request":
|
|
return payload.get("pull_request", {}).get("base", {}).get("ref")
|
|
return None
|
|
|
|
|
|
def branch_allowed(branch: Optional[str]) -> bool:
|
|
if not branch:
|
|
return False
|
|
return branch in CONFIG["allowed_branches"]
|
|
|
|
|
|
def pr_action_allowed(action: str) -> bool:
|
|
return action in CONFIG["allowed_pr_actions"]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# HTTP HANDLER
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class WebhookHandler(BaseHTTPRequestHandler):
|
|
"""
|
|
Minimal HTTP server — one request at a time (Gitea delivers synchronously).
|
|
"""
|
|
|
|
def _respond(self, code: int, body: str):
|
|
self.send_response(code)
|
|
self.send_header("Content-Type", "application/json")
|
|
self.end_headers()
|
|
self.wfile.write(body.encode("utf-8"))
|
|
|
|
def do_POST(self):
|
|
global CONFIG, db_conn
|
|
|
|
start_ns = datetime.now(timezone.utc)
|
|
|
|
# Only one endpoint
|
|
if self.path != "/webhooks/gitea":
|
|
self._respond(404, json.dumps({"error": "not found"}))
|
|
return
|
|
|
|
# Read body once (needed for both signature check & JSON parse)
|
|
length = int(self.headers.get("Content-Length", 0))
|
|
body = self.rfile.read(length)
|
|
|
|
# Signature check
|
|
signature = get_header(self.headers, "X-Gitea-Signature")
|
|
if CONFIG.get("require_signature"):
|
|
if not verify_signature(body, signature, CONFIG["webhook_secret"]):
|
|
self._respond(401, json.dumps({"error": "invalid signature"}))
|
|
# Still log the rejected event for audit
|
|
delivery_id = "unknown-signature-violation"
|
|
log_event(
|
|
db_conn, delivery_id, "unknown", "unknown", "auth-failure", None,
|
|
"unknown", "rejected", "invalid signature", 0
|
|
)
|
|
return
|
|
|
|
# Parse payload
|
|
event_type, payload, repo_name, delivery_id = parse_payload(body)
|
|
if not event_type or not repo_name:
|
|
self._respond(400, json.dumps({"error": "malformed payload"}))
|
|
return
|
|
|
|
# Idempotency check — short-circuit if already processed
|
|
if delivery_id and already_processed(db_conn, delivery_id):
|
|
self._respond(200, json.dumps({"status": "already processed"}))
|
|
return
|
|
|
|
sender = payload.get("sender", {}).get("username", "unknown")
|
|
|
|
# --- ALLOWLIST CHECKS ---
|
|
if not allowed_repo(repo_name):
|
|
reason = f"repo '{repo_name}' not in allowlist"
|
|
self._respond(403, json.dumps({"error": reason}))
|
|
log_event(db_conn, delivery_id, event_type, repo_name, "ignored", None, sender,
|
|
"rejected", reason,
|
|
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
|
return
|
|
|
|
if not allowed_event(event_type):
|
|
reason = f"event '{event_type}' not allowed"
|
|
self._respond(403, json.dumps({"error": reason}))
|
|
log_event(db_conn, delivery_id, event_type, repo_name, "ignored", None, sender,
|
|
"rejected", reason,
|
|
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
|
return
|
|
|
|
# Branch/action allowlist
|
|
branch = get_branch_ref(payload, event_type)
|
|
action = payload.get("action")
|
|
|
|
if event_type == "push":
|
|
if not branch_allowed(branch):
|
|
reason = f"branch '{branch}' not in allowed_branches"
|
|
self._respond(403, json.dumps({"error": reason}))
|
|
log_event(db_conn, delivery_id, event_type, repo_name, "ignored", str(branch), sender,
|
|
"rejected", reason,
|
|
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
|
return
|
|
code, msg = dispatch_push(branch, repo_name)
|
|
verdict = "accepted" if code == 200 else "failed"
|
|
self._respond(code, json.dumps({"status": msg}))
|
|
log_event(db_conn, delivery_id, event_type, repo_name, "push", str(branch), sender,
|
|
verdict, msg,
|
|
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
|
|
|
elif event_type == "pull_request":
|
|
if not pr_action_allowed(action or ""):
|
|
reason = f"pr action '{action}' not allowed"
|
|
self._respond(403, json.dumps({"error": reason}))
|
|
log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender,
|
|
"rejected", reason,
|
|
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
|
return
|
|
pr_num = payload.get("pull_request", {}).get("number")
|
|
code, msg = dispatch_pull_request(action or "", pr_num or 0, repo_name)
|
|
verdict = "accepted" if code == 200 else "failed"
|
|
self._respond(code, json.dumps({"status": msg}))
|
|
log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender,
|
|
verdict, msg,
|
|
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
|
|
|
else:
|
|
# Other events (issues, etc.) — accept but no-op for now
|
|
self._respond(200, json.dumps({"status": "event received but no action configured"}))
|
|
log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender,
|
|
"ignored", "no handler",
|
|
int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000))
|
|
|
|
def log_message(self, format_str, *args):
|
|
# Suppress default HTTP logging; we use structured logs instead
|
|
return
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# MAIN
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Gitea Webhook Handler — authenticated, allowlisted, idempotent"
|
|
)
|
|
parser.add_argument(
|
|
"--host",
|
|
default=os.environ.get("WEBHOOK_HOST", "127.0.0.1"),
|
|
help="Bind address (default: 127.0.0.1)",
|
|
)
|
|
parser.add_argument(
|
|
"--port",
|
|
type=int,
|
|
default=int(os.environ.get("WEBHOOK_PORT", 9000)),
|
|
help="Bind port (default: 9000)",
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
global CONFIG, db_conn
|
|
CONFIG = load_config()
|
|
|
|
# Prepare logs directory
|
|
LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
db_conn = init_log_db()
|
|
|
|
# Startup banner
|
|
print(f"[webhook] Starting server on {args.host}:{args.port}")
|
|
print(f"[webhook] allowed_repos: {sorted(CONFIG['allowed_repos'])}")
|
|
print(f"[webhook] allowed_events: {sorted(CONFIG['allowed_events'])}")
|
|
print(f"[webhook] allowed_branches: {sorted(CONFIG['allowed_branches'])}")
|
|
print(f"[webhook] Log DB: {LOG_DB_PATH}")
|
|
|
|
# Hook up SSH agent for ansible-pull if needed
|
|
os.environ.setdefault("SSH_AUTH_SOCK", os.path.expanduser("~/.ssh/ssh_auth_sock"))
|
|
|
|
server = HTTPServer((args.host, args.port), WebhookHandler)
|
|
try:
|
|
server.serve_forever()
|
|
except KeyboardInterrupt:
|
|
print("\n[webhook] Shutting down")
|
|
server.server_close()
|
|
db_conn.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|