#!/usr/bin/env python3 """ Authenticated Gitea Webhook Runner — GEMINI-HARDEN-04 Replaces print-only payload parser with production-hardened receiver: • HMAC-SHA256 signature verification (X-Gitea-Signature) • Event / repo / branch / action allowlists (config-driven) • Idempotent event processing (X-Gitea-Delivery idempotency key) • Structured JSON logging (stdout + optional file) • Safe dispatch — pre-approved script invocations only, no payload-driven shell Usage: python3 scripts/webhook_runner.py --server # Run HTTP server (default) python3 scripts/webhook_runner.py --test # Test a payload file locally python3 scripts/webhook_runner.py --validate-config Env vars (override config file): WEBHOOK_SECRET — Shared secret with Gitea (required for prod) WEBHOOK_ALLOWED_EVENTS — Comma-separated: push,pull_request,issue WEBHOOK_ALLOWED_REPOS — Comma-separated repo allowlist (org/repo or org/*) WEBHOOK_ALLOWED_BRANCHES — Comma-separated branch patterns (push only) WEBHOOK_ALLOWED_ACTIONS — Comma-separated PR/issue actions (opened,closed,…) WEBHOOK_HOST — Bind host (default 127.0.0.1) WEBHOOK_PORT — Bind port (default 7777) WEBHOOK_LOG_FILE — Optional structured log file path WEBHOOK_IDEMPOTENCY_DB — Path to idempotency state file (.webhook_idempotency.json) Config file (optional YAML at scripts/webhook_config.yaml) may override defaults. """ import argparse import hashlib import hmac import http.server import json import logging import os import re import subprocess import sys import threading import time from datetime import datetime, timezone from pathlib import Path from typing import Dict, Any, Optional, List, Tuple try: import yaml HAS_YAML = True except ImportError: HAS_YAML = False yaml = None # type: ignore # --------------------------------------------------------------------------- # Configuration # --------------------------------------------------------------------------- DEFAULT_CONFIG = { "webhook": { "enabled": True, "host": "127.0.0.1", "port": 7777, "secret": None, "allowed_events": ["push", "pull_request", "issues", "issue_comment"], "allowed_repos": [], "allowed_branches": ["main", "master", "develop"], "allowed_actions": ["opened", "closed", "synchronized", "reopened", "created", "edited"], "idempotency": { "state_file": ".webhook_idempotency.json", "max_entries": 10000, "ttl_days": 30, }, "logging": { "level": "INFO", "file": None, }, "dispatch": { "push": { "refs/heads/main": { "allowed": True, "action": "log_and_ack", "comment": "Main branch push — logged, no auto-deploy configured", }, "refs/heads/": { "allowed": True, "action": "log_and_ack", "comment": "Non-main branch push — logged", }, }, "pull_request": { "opened": { "allowed": True, "action": "log_and_ack", "comment": "PR opened — logged; linter triggers via separate scheduler", }, "synchronized": { "allowed": True, "action": "log_and_ack", "comment": "PR updated — logged", }, }, "issues": { "opened": { "allowed": True, "action": "log_and_ack", "comment": "Issue opened — logged", }, }, }, } } # --------------------------------------------------------------------------- # Structured Logger # --------------------------------------------------------------------------- class StructuredLogger: def __init__(self, log_file: Optional[str] = None, level: str = "INFO"): self.log_file = log_file self.level = getattr(logging, level.upper(), logging.INFO) self._lock = threading.Lock() self._file_handle = None if log_file: log_dir = Path(log_file).parent log_dir.mkdir(parents=True, exist_ok=True) self._file_handle = open(log_file, "a") def _emit(self, record: Dict[str, Any]): with self._lock: line = json.dumps(record, ensure_ascii=False) print(line, flush=True) if self._file_handle: self._file_handle.write(line + "\n") self._file_handle.flush() def log(self, level: str, **fields): if getattr(logging, level.upper(), logging.INFO) < self.level: return record = { "timestamp": datetime.now(timezone.utc).isoformat(), "level": level.upper(), **fields, } self._emit(record) def info(self, **fields): self.log("INFO", **fields) def warning(self, **fields): self.log("WARNING", **fields) def error(self, **fields): self.log("ERROR", **fields) def close(self): if self._file_handle: self._file_handle.close() _logger: Optional[StructuredLogger] = None def get_logger() -> StructuredLogger: global _logger if _logger is None: raise RuntimeError("Logger not initialized") return _logger # --------------------------------------------------------------------------- # Idempotency Store # --------------------------------------------------------------------------- class IdempotencyStore: """File-backed idempotency store with TTL and size cap.""" def __init__(self, path: str, max_entries: int = 10000, ttl_days: int = 30): self.path = Path(path) self.max_entries = max_entries self.ttl_seconds = ttl_days * 86400 self._lock = threading.RLock() self._data: Dict[str, Any] = {} self._load() def _load(self): if self.path.exists(): try: with self.path.open() as f: raw = json.load(f) now = time.time() kept = {} for delivery_id, entry in raw.items(): ts = entry.get("timestamp", 0) if now - ts < self.ttl_seconds: kept[delivery_id] = entry self._data = kept self._save_locked() except Exception: self._data = {} def _save_locked(self): try: self.path.parent.mkdir(parents=True, exist_ok=True) with self.path.open("w") as f: json.dump(self._data, f) except Exception: pass def seen(self, delivery_id: str) -> bool: with self._lock: return delivery_id in self._data def mark_seen(self, delivery_id: str, event_type: str, repo: str): with self._lock: self._data[delivery_id] = { "timestamp": time.time(), "event_type": event_type, "repository": repo, } if len(self._data) > self.max_entries: sorted_items = sorted(self._data.items(), key=lambda kv: kv[1]["timestamp"]) drop_count = len(self._data) - self.max_entries for key, _ in sorted_items[:drop_count]: del self._data[key] self._save_locked() _idempotency_store: Optional[IdempotencyStore] = None def get_idempotency_store() -> IdempotencyStore: global _idempotency_store if _idempotency_store is None: raise RuntimeError("Idempotency store not initialized") return _idempotency_store # --------------------------------------------------------------------------- # Policy Engine # --------------------------------------------------------------------------- class Policy: """Allowlist-based policy enforcement.""" def __init__( self, allowed_events: List[str], allowed_repos: List[str], allowed_branches: List[str], allowed_actions: List[str], dispatch_rules: Dict[str, Any], ): self.allowed_events = set(allowed_events) # Convert repo patterns to regex; org/* → ^org/.*$ self.allowed_repo_patterns = [self._pattern_to_regex(p) for p in allowed_repos] if allowed_repos else [] self.allowed_branches = set(allowed_branches) self.allowed_actions = set(allowed_actions) self.dispatch_rules = dispatch_rules @staticmethod def _pattern_to_regex(pattern: str) -> str: import re as _re escaped = _re.escape(pattern) escaped = escaped.replace(r'\*', '.*') return f"^{escaped}$" def _matches_repo_pattern(self, repo: str) -> bool: import re as _re for pat in self.allowed_repo_patterns: if _re.match(pat, repo): return True return False def validate_event(self, event_type: str, payload: Dict[str, Any]) -> Tuple[bool, Optional[str]]: if event_type not in self.allowed_events: return False, f"event type not allowed: {event_type}" repo_full = payload.get("repository", {}).get("full_name", "") or payload.get("repository", {}).get("name", "") if self.allowed_repo_patterns and not self._matches_repo_pattern(repo_full): return False, f"repository not allowed: {repo_full}" if event_type == "push": ref = payload.get("ref", "") branch = ref.split("/")[-1] if "/" in ref else ref if branch not in self.allowed_branches and ref not in self.allowed_branches: return False, f"branch not allowed: {ref}" elif event_type in ("pull_request", "issues", "issue_comment"): action = payload.get("action", "") if action not in self.allowed_actions: return False, f"action not allowed: {action}" return True, None def get_dispatch_action(self, event_type: str, payload: Dict[str, Any]) -> Tuple[bool, str, str]: rules = self.dispatch_rules.get(event_type, {}) if not rules: return False, "ignore", "No dispatch rules for this event type" if event_type == "push": ref = payload.get("ref", "") rule = rules.get(ref) if rule: return rule.get("allowed", False), rule.get("action", "ignore"), rule.get("comment", "") for pattern_key, rule_cfg in rules.items(): if pattern_key.endswith("/") or pattern_key in ("refs/heads/",): if ref.startswith(pattern_key): return rule_cfg.get("allowed", False), rule_cfg.get("action", "ignore"), rule_cfg.get("comment", "") return False, "ignore", "No matching ref rule" else: action = payload.get("action", "") rule = rules.get(action) if rule: return rule.get("allowed", False), rule.get("action", "ignore"), rule.get("comment", "") return False, "ignore", f"No rule for action '{action}'" # --------------------------------------------------------------------------- # Safe Dispatcher # --------------------------------------------------------------------------- class SafeDispatcher: def __init__(self, policy: Policy, logger: StructuredLogger): self.policy = policy self.log = logger def dispatch(self, event_type: str, payload: Dict[str, Any], delivery_id: str) -> Tuple[bool, str]: allowed, action, comment = self.policy.get_dispatch_action(event_type, payload) if not allowed: self.log.info( event_type=event_type, delivery_id=delivery_id, action="skip", reason=comment, msg="Dispatch skipped — policy disallows", ) return False, comment or "Action not allowed by policy" self.log.info( event_type=event_type, delivery_id=delivery_id, action=action, comment=comment, msg="Dispatch executed", ) if action == "log_and_ack": return True, comment or "Logged and acknowledged" # Example: safe subprocess invocations with hardcoded paths if action == "trigger_deploy": script = Path("/opt/timmy/bin/deploy.sh") if script.exists(): try: result = subprocess.run([str(script)], capture_output=True, text=True, timeout=300, check=False) self.log.info( event_type=event_type, delivery_id=delivery_id, script=str(script), exit_code=result.returncode, msg="Deploy script invoked", ) return result.returncode == 0, f"Deploy exit={result.returncode}" except Exception as exc: self.log.error(event_type=event_type, delivery_id=delivery_id, error=str(exc), msg="Deploy failed") return False, f"Deploy error: {exc}" return False, "Deploy script not found" if action == "trigger_ci_linter": script = Path("/opt/timmy/bin/run-architecture-linter.sh") if script.exists(): try: result = subprocess.run([str(script)], capture_output=True, text=True, timeout=180, check=False) self.log.info( event_type=event_type, delivery_id=delivery_id, script=str(script), exit_code=result.returncode, msg="CI linter invoked", ) return result.returncode == 0, f"CI exit={result.returncode}" except Exception as exc: self.log.error(event_type=event_type, delivery_id=delivery_id, error=str(exc), msg="CI linter failed") return False, f"CI error: {exc}" return False, "CI linter script not found" self.log.warning(event_type=event_type, delivery_id=delivery_id, action=action, msg="Unknown dispatch action") return False, f"Unknown action: {action}" # --------------------------------------------------------------------------- # Signature Verification # --------------------------------------------------------------------------- def verify_signature(payload_bytes: bytes, header_signature: str, secret: Optional[str]) -> bool: if not header_signature: return False if secret is None or secret == "": return True # dev mode sig = header_signature if "=" in header_signature: _, sig = header_signature.split("=", 1) expected = hmac.new(secret.encode(), payload_bytes, hashlib.sha256).hexdigest() return hmac.compare_digest(expected, sig.strip()) # --------------------------------------------------------------------------- # HTTP Request Handler # --------------------------------------------------------------------------- class WebhookHTTPHandler(http.server.BaseHTTPRequestHandler): policy: Optional[Policy] = None dispatcher: Optional[SafeDispatcher] = None idempotency_store: Optional[IdempotencyStore] = None logger: Optional[StructuredLogger] = None def _respond(self, status: int, body: Dict[str, Any]): self.send_response(status) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(json.dumps(body).encode()) def do_POST(self): from urllib.parse import urlparse parsed = urlparse(self.path) if parsed.path != "/webhook/gitea": self._respond(404, {"error": "not found", "path": self.path}) return length = int(self.headers.get("Content-Length", "0")) payload_bytes = self.rfile.read(length) if length > 0 else b"" try: payload = json.loads(payload_bytes.decode("utf-8")) except Exception: self._respond(400, {"error": "invalid_json", "msg": "Malformed JSON payload"}) return signature = self.headers.get("X-Gitea-Signature", "") event_type = self.headers.get("X-Gitea-Event", "") delivery_id = self.headers.get("X-Gitea-Delivery", "") or str(time.time_ns()) logger = self.logger or get_logger() policy = self.policy idemp_store = self.idempotency_store # 1. Signature secret = getattr(policy, "_secret", None) if not verify_signature(payload_bytes, signature, secret): logger.error(event_type=event_type or "unknown", delivery_id=delivery_id, reason="invalid_signature", msg="Signature mismatch") self._respond(401, {"error": "invalid_signature", "msg": "Webhook signature mismatch"}) return # 2. Idempotency if idemp_store and idemp_store.seen(delivery_id): logger.info(event_type=event_type, delivery_id=delivery_id, reason="duplicate", msg="Duplicate — skipped") self._respond(200, {"status": "duplicate", "msg": "Already processed"}) return if idemp_store: idemp_store.mark_seen(delivery_id, event_type, payload.get("repository", {}).get("full_name", "")) # 3. Policy ok, reason = policy.validate_event(event_type, payload) if not ok: logger.warning(event_type=event_type, delivery_id=delivery_id, reason="policy_deny", detail=reason, msg="Rejected by policy") self._respond(403, {"error": "denied", "reason": reason}) return # 4. Dispatch try: success, message = self.dispatcher.dispatch(event_type, payload, delivery_id) except Exception as exc: logger.error(event_type=event_type, delivery_id=delivery_id, error=str(exc), msg="Dispatch exception") success, message = False, f"Internal error: {exc}" status = 200 if success else 500 self._respond(status, {"status": "ok" if success else "error", "msg": message}) def log_message(self, format, *args): pass def log_error(self, format, *args): pass def _run_server(host: str, port: int, policy: Policy, dispatcher: SafeDispatcher, idemp_store: IdempotencyStore, logger: StructuredLogger): WebhookHTTPHandler.policy = policy WebhookHTTPHandler.dispatcher = dispatcher WebhookHTTPHandler.idempotency_store = idemp_store WebhookHTTPHandler.logger = logger server = http.server.HTTPServer((host, port), WebhookHTTPHandler) logger.info(host=host, port=port, msg="Webhook runner starting") try: server.serve_forever() except KeyboardInterrupt: logger.info(msg="Webhook runner shutting down") finally: server.server_close() logger.close() # --------------------------------------------------------------------------- # Local Test Mode # --------------------------------------------------------------------------- def test_payload_file(path: str, secret: Optional[str], policy: Policy, dispatcher: SafeDispatcher, idemp_store: IdempotencyStore, logger: StructuredLogger): p = Path(path) if not p.exists(): print(f"[ERROR] Payload file not found: {path}", file=sys.stderr) sys.exit(1) try: payload = json.loads(p.read_text()) except Exception: print(f"[ERROR] Invalid JSON in {path}", file=sys.stderr) sys.exit(1) delivery_id = f"local-{int(time.time())}" event_type = payload.get("event") or _infer_event_type(payload) repo = payload.get("repository", {}).get("full_name", "") or payload.get("repository", {}).get("name", "unknown") if secret and not verify_signature(p.read_bytes(), "local-test", secret): print("[ERROR] Signature verification failed (use WEBHOOK_SECRET='' for dev)", file=sys.stderr) sys.exit(1) if idemp_store.seen(delivery_id): print(f"[INFO] Duplicate delivery {delivery_id} — skipping") sys.exit(0) idemp_store.mark_seen(delivery_id, event_type, repo) ok, reason = policy.validate_event(event_type, payload) if not ok: print(f"[DENY] Policy rejection: {reason}", file=sys.stderr) sys.exit(1) success, message = dispatcher.dispatch(event_type, payload, delivery_id) print(f"[{'OK' if success else 'FAIL'}] {message}") sys.exit(0 if success else 1) def _infer_event_type(payload: Dict[str, Any]) -> str: if "commits" in payload: return "push" if "pull_request" in payload: return "pull_request" if "issue" in payload: return "issues" return "unknown" # --------------------------------------------------------------------------- # Config Loading # --------------------------------------------------------------------------- def load_config() -> Dict[str, Any]: """Load config from YAML file (if present) merged with environment overrides.""" cfg = DEFAULT_CONFIG.copy() script_dir = Path(__file__).parent yaml_path = script_dir / "webhook_config.yaml" if HAS_YAML and yaml_path.exists(): with yaml_path.open() as f: file_cfg = yaml.safe_load(f) or {} # Simple deep merge for webhook key if "webhook" in file_cfg: cfg["webhook"].update(file_cfg["webhook"]) wc = cfg["webhook"] # Env overrides env_secret = os.environ.get("WEBHOOK_SECRET") if env_secret is not None: wc["secret"] = env_secret # Simple keys for key, env_key in [("host", "WEBHOOK_HOST"), ("port", "WEBHOOK_PORT")]: val = os.environ.get(env_key) if val: wc[key] = val if key != "port" else int(val) # List overrides for key, env_key in [ ("allowed_events", "WEBHOOK_ALLOWED_EVENTS"), ("allowed_repos", "WEBHOOK_ALLOWED_REPOS"), ("allowed_branches", "WEBHOOK_ALLOWED_BRANCHES"), ("allowed_actions", "WEBHOOK_ALLOWED_ACTIONS"), ]: val = os.environ.get(env_key) if val: wc[key] = [v.strip() for v in val.split(",") if v.strip()] # Idempotency overrides idemp = wc.get("idempotency", {}) for ikey, ienv in [("state_file", "WEBHOOK_IDEMPOTENCY_DB"), ("max_entries", "WEBHOOK_IDEMPOTENCY_MAX"), ("ttl_days", "WEBHOOK_IDEMPOTENCY_TTL")]: val = os.environ.get(ienv) if val: if ikey in ("max_entries", "ttl_days"): idemp[ikey] = int(val) else: idemp[ikey] = val wc["idempotency"] = idemp # Logging overrides log_cfg = wc.get("logging", {}) log_file = os.environ.get("WEBHOOK_LOG_FILE") if log_file: log_cfg["file"] = log_file log_level = os.environ.get("WEBHOOK_LOG_LEVEL") if log_level: log_cfg["level"] = log_level wc["logging"] = log_cfg return cfg # --------------------------------------------------------------------------- # Main # --------------------------------------------------------------------------- def main(): parser = argparse.ArgumentParser(description="Authenticated Gitea Webhook Runner") parser.add_argument("--server", action="store_true", help="Run HTTP server (default if no other flag)") parser.add_argument("--test", metavar="PAYLOAD_JSON", help="Test a payload file locally") parser.add_argument("--validate-config", action="store_true", help="Validate config and exit") args = parser.parse_args() cfg = load_config() wc = cfg["webhook"] # Warn if no secret if not wc.get("secret"): print("[WARN] WEBHOOK_SECRET not set — signature verification DISABLED for local dev", file=sys.stderr) logger = StructuredLogger(log_file=wc.get("logging", {}).get("file"), level=wc.get("logging", {}).get("level", "INFO")) idemp_cfg = wc.get("idempotency", {}) idemp_store = IdempotencyStore( path=idemp_cfg.get("state_file", ".webhook_idempotency.json"), max_entries=idemp_cfg.get("max_entries", 10000), ttl_days=idemp_cfg.get("ttl_days", 30), ) policy = Policy( allowed_events=wc.get("allowed_events", []), allowed_repos=wc.get("allowed_repos", []), allowed_branches=wc.get("allowed_branches", []), allowed_actions=wc.get("allowed_actions", []), dispatch_rules=wc.get("dispatch", {}), ) policy._secret = wc.get("secret") dispatcher = SafeDispatcher(policy, logger) if args.validate_config: print("[OK] Config validated") print(f" Events: {sorted(policy.allowed_events)}") print(f" Repos: {policy.allowed_repo_patterns}") print(f" Branches: {sorted(policy.allowed_branches)}") print(f" Actions: {sorted(policy.allowed_actions)}") sys.exit(0) if args.test: test_payload_file(args.test, wc.get("secret"), policy, dispatcher, idemp_store, logger) return host = wc.get("host", "127.0.0.1") port = int(wc.get("port", 7777)) _run_server(host, port, policy, dispatcher, idemp_store, logger) if __name__ == "__main__": main()