#!/usr/bin/env python3 """ [OPS] Gitea Webhook Handler — Authenticated Runner Part of the Gemini Sovereign Infrastructure Suite. Replaces the print-only payload parser with a production-hardened webhook receiver: signature verification, config-driven allowlists, idempotent event logging, and safe action dispatch. """ import argparse import hashlib import hmac import json import logging import os import sqlite3 import subprocess import sys import threading from datetime import datetime, timezone from http.server import BaseHTTPRequestHandler, HTTPServer from pathlib import Path from typing import Any, Dict, Optional # --------------------------------------------------------------------------- # CONFIG — Load once at startup (fail fast if missing) # --------------------------------------------------------------------------- SCRIPT_DIR = Path(__file__).parent.resolve() REPO_ROOT = SCRIPT_DIR.parent.resolve() CONFIG_PATH = REPO_ROOT / "config" / "webhook.yaml" LOG_DB_PATH = REPO_ROOT / "logs" / "webhook_events.sqlite" DEPLOY_SCRIPT = REPO_ROOT / "ansible" / "scripts" / "deploy_on_webhook.sh" # Defaults — overridden by config.yaml DEFAULT_ALLOWED_REPOS = {"timmy-config"} DEFAULT_ALLOWED_EVENTS = {"push", "pull_request"} DEFAULT_ALLOWED_BRANCHES = {"refs/heads/main"} DEFAULT_ALLOWED_PR_ACTIONS = {"opened", "closed", "reopened", "synchronized"} # Global config (loaded from YAML) CONFIG: Dict[str, Any] = {} def load_config() -> Dict[str, Any]: """Load webhook config from YAML. Exits if malformed or missing.""" if not CONFIG_PATH.exists(): print(f"[FATAL] Webhook config not found: {CONFIG_PATH}", file=sys.stderr) sys.exit(2) import yaml with open(CONFIG_PATH) as f: cfg = yaml.safe_load(f) or {} # Required: webhook_secret from env var (never in VCS) secret = os.environ.get("GITEA_WEBHOOK_SECRET") if not secret: print("[FATAL] GITEA_WEBHOOK_SECRET not set in environment", file=sys.stderr) sys.exit(2) cfg["webhook_secret"] = secret # Allowlist normalization cfg.setdefault("allowed_repos", DEFAULT_ALLOWED_REPOS) cfg.setdefault("allowed_events", DEFAULT_ALLOWED_EVENTS) cfg.setdefault("allowed_branches", DEFAULT_ALLOWED_BRANCHES) cfg.setdefault("allowed_pr_actions", DEFAULT_ALLOWED_PR_ACTIONS) cfg.setdefault("require_signature", True) # Normalize to sets for key in ["allowed_repos", "allowed_events", "allowed_branches", "allowed_pr_actions"]: if isinstance(cfg[key], str): cfg[key] = {cfg[key]} else: cfg[key] = set(cfg[key]) return cfg # --------------------------------------------------------------------------- # SIGNATURE VERIFICATION # --------------------------------------------------------------------------- def verify_signature(payload: bytes, signature: str, secret: str) -> bool: """ Verify Gitea webhook HMAC-SHA256 signature. Gitea sends: X-Gitea-Signature: sha256= """ if not signature: return False if not signature.startswith("sha256="): return False expected_hmac = hmac.new( secret.encode("utf-8"), payload, hashlib.sha256 ).hexdigest() received = signature[7:] # strip "sha256=" return hmac.compare_digest(expected_hmac, received) # --------------------------------------------------------------------------- # IDEMPOTENCY & LOGGING # --------------------------------------------------------------------------- def init_log_db() -> sqlite3.Connection: """Initialize SQLite log DB with idempotency table.""" LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True) conn = sqlite3.connect(str(LOG_DB_PATH), timeout=30) conn.execute( """ CREATE TABLE IF NOT EXISTS webhook_events ( delivery_id TEXT PRIMARY KEY, received_at TEXT, event_type TEXT, repo TEXT, action TEXT, branch TEXT, sender TEXT, verdict TEXT, reason TEXT, handler_duration_ms INTEGER ) """ ) conn.execute("CREATE INDEX IF NOT EXISTS idx_received ON webhook_events(received_at)") conn.commit() return conn def already_processed(conn: sqlite3.Connection, delivery_id: str) -> bool: cur = conn.execute("SELECT 1 FROM webhook_events WHERE delivery_id = ?", (delivery_id,)) return cur.fetchone() is not None def log_event( conn: sqlite3.Connection, delivery_id: str, event_type: str, repo: str, action: str, branch: Optional[str], sender: str, verdict: str, reason: str, duration_ms: int, ): conn.execute( """ INSERT INTO webhook_events ( delivery_id, received_at, event_type, repo, action, branch, sender, verdict, reason, handler_duration_ms ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, ( delivery_id, datetime.now(timezone.utc).isoformat(), event_type, repo, action, branch, sender, verdict, reason, duration_ms, ), ) conn.commit() # --------------------------------------------------------------------------- # ACTION DISPATCH — Safe, pre-approved actions only # --------------------------------------------------------------------------- def dispatch_push(branch: str, repo_name: str) -> tuple[int, str]: """Trigger ansible-pull for timmy-config on main branch merge.""" if branch not in CONFIG["allowed_branches"]: return 403, f"Branch '{branch}' not in allowed_branches allowlist" if not DEPLOY_SCRIPT.exists(): return 500, f"Deploy script not found: {DEPLOY_SCRIPT}" # Run ansible-pull idempotently; capture output for logging try: result = subprocess.run( ["/usr/bin/env", "bash", str(DEPLOY_SCRIPT)], capture_output=True, text=True, timeout=300, cwd=str(REPO_ROOT), ) if result.returncode == 0: return 200, "deploy triggered successfully" else: return 500, f"deploy script failed: {result.stderr[:200]}" except subprocess.TimeoutExpired: return 504, "deploy script timeout (5m)" except Exception as e: return 500, f"deploy exception: {e}" def dispatch_pull_request(action: str, pr_number: int, repo_name: str) -> tuple[int, str]: """Handle PR lifecycle events. Only 'merged' triggers deploy (via review gate later).""" # For now, log PR events; actual merge deploy will be triggered by push to main # After PR merges, Gitea sends both PR 'closed' (with merged=true) AND a push event. # We rely on the push event as the deployment trigger. return 200, f"pr event noted — action={action} pr={pr_number}" # --------------------------------------------------------------------------- # PAYLOAD PARSING — Defensive, typed access # --------------------------------------------------------------------------- def get_header(headers: Dict[str, str], name: str) -> Optional[str]: for key, val in headers.items(): if key.lower() == name.lower(): return val return None def parse_payload(body: bytes) -> tuple[Optional[str], Dict[str, Any], Optional[str], Optional[str]]: """ Return (event_type, payload_dict, repo_name, delivery_id). event_type may be inferred from payload key structure. """ try: payload = json.loads(body) except json.JSONDecodeError: return None, {}, None, None # Gitea sends X-Gitea-Event header; if absent, infer event_type = payload.get("event") repo_name = payload.get("repository", {}).get("name") delivery_id = payload.get("guid") or payload.get("id") # Gitea includes 'guid' # Inference fallback if not event_type: if "commits" in payload: event_type = "push" elif "pull_request" in payload: event_type = "pull_request" elif "issue" in payload: event_type = "issue" return event_type, payload, repo_name, delivery_id def allowed_repo(repo_name: str) -> bool: return repo_name in CONFIG["allowed_repos"] def allowed_event(event_type: str) -> bool: return event_type in CONFIG["allowed_events"] def get_branch_ref(payload: Dict[str, Any], event_type: str) -> Optional[str]: """Extract ref (branch) from payload.""" if event_type == "push": return payload.get("ref") if event_type == "pull_request": return payload.get("pull_request", {}).get("base", {}).get("ref") return None def branch_allowed(branch: Optional[str]) -> bool: if not branch: return False return branch in CONFIG["allowed_branches"] def pr_action_allowed(action: str) -> bool: return action in CONFIG["allowed_pr_actions"] # --------------------------------------------------------------------------- # HTTP HANDLER # --------------------------------------------------------------------------- class WebhookHandler(BaseHTTPRequestHandler): """ Minimal HTTP server — one request at a time (Gitea delivers synchronously). """ def _respond(self, code: int, body: str): self.send_response(code) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(body.encode("utf-8")) def do_POST(self): global CONFIG, db_conn start_ns = datetime.now(timezone.utc) # Only one endpoint if self.path != "/webhooks/gitea": self._respond(404, json.dumps({"error": "not found"})) return # Read body once (needed for both signature check & JSON parse) length = int(self.headers.get("Content-Length", 0)) body = self.rfile.read(length) # Signature check signature = get_header(self.headers, "X-Gitea-Signature") if CONFIG.get("require_signature"): if not verify_signature(body, signature, CONFIG["webhook_secret"]): self._respond(401, json.dumps({"error": "invalid signature"})) # Still log the rejected event for audit delivery_id = "unknown-signature-violation" log_event( db_conn, delivery_id, "unknown", "unknown", "auth-failure", None, "unknown", "rejected", "invalid signature", 0 ) return # Parse payload event_type, payload, repo_name, delivery_id = parse_payload(body) if not event_type or not repo_name: self._respond(400, json.dumps({"error": "malformed payload"})) return # Idempotency check — short-circuit if already processed if delivery_id and already_processed(db_conn, delivery_id): self._respond(200, json.dumps({"status": "already processed"})) return sender = payload.get("sender", {}).get("username", "unknown") # --- ALLOWLIST CHECKS --- if not allowed_repo(repo_name): reason = f"repo '{repo_name}' not in allowlist" self._respond(403, json.dumps({"error": reason})) log_event(db_conn, delivery_id, event_type, repo_name, "ignored", None, sender, "rejected", reason, int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000)) return if not allowed_event(event_type): reason = f"event '{event_type}' not allowed" self._respond(403, json.dumps({"error": reason})) log_event(db_conn, delivery_id, event_type, repo_name, "ignored", None, sender, "rejected", reason, int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000)) return # Branch/action allowlist branch = get_branch_ref(payload, event_type) action = payload.get("action") if event_type == "push": if not branch_allowed(branch): reason = f"branch '{branch}' not in allowed_branches" self._respond(403, json.dumps({"error": reason})) log_event(db_conn, delivery_id, event_type, repo_name, "ignored", str(branch), sender, "rejected", reason, int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000)) return code, msg = dispatch_push(branch, repo_name) verdict = "accepted" if code == 200 else "failed" self._respond(code, json.dumps({"status": msg})) log_event(db_conn, delivery_id, event_type, repo_name, "push", str(branch), sender, verdict, msg, int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000)) elif event_type == "pull_request": if not pr_action_allowed(action or ""): reason = f"pr action '{action}' not allowed" self._respond(403, json.dumps({"error": reason})) log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender, "rejected", reason, int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000)) return pr_num = payload.get("pull_request", {}).get("number") code, msg = dispatch_pull_request(action or "", pr_num or 0, repo_name) verdict = "accepted" if code == 200 else "failed" self._respond(code, json.dumps({"status": msg})) log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender, verdict, msg, int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000)) else: # Other events (issues, etc.) — accept but no-op for now self._respond(200, json.dumps({"status": "event received but no action configured"})) log_event(db_conn, delivery_id, event_type, repo_name, action, str(branch), sender, "ignored", "no handler", int((datetime.now(timezone.utc) - start_ns).total_seconds() * 1000)) def log_message(self, format_str, *args): # Suppress default HTTP logging; we use structured logs instead return # --------------------------------------------------------------------------- # MAIN # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser( description="Gitea Webhook Handler — authenticated, allowlisted, idempotent" ) parser.add_argument( "--host", default=os.environ.get("WEBHOOK_HOST", "127.0.0.1"), help="Bind address (default: 127.0.0.1)", ) parser.add_argument( "--port", type=int, default=int(os.environ.get("WEBHOOK_PORT", 9000)), help="Bind port (default: 9000)", ) args = parser.parse_args() global CONFIG, db_conn CONFIG = load_config() # Prepare logs directory LOG_DB_PATH.parent.mkdir(parents=True, exist_ok=True) db_conn = init_log_db() # Startup banner print(f"[webhook] Starting server on {args.host}:{args.port}") print(f"[webhook] allowed_repos: {sorted(CONFIG['allowed_repos'])}") print(f"[webhook] allowed_events: {sorted(CONFIG['allowed_events'])}") print(f"[webhook] allowed_branches: {sorted(CONFIG['allowed_branches'])}") print(f"[webhook] Log DB: {LOG_DB_PATH}") # Hook up SSH agent for ansible-pull if needed os.environ.setdefault("SSH_AUTH_SOCK", os.path.expanduser("~/.ssh/ssh_auth_sock")) server = HTTPServer((args.host, args.port), WebhookHandler) try: server.serve_forever() except KeyboardInterrupt: print("\n[webhook] Shutting down") server.server_close() db_conn.close() if __name__ == "__main__": main()