Files
timmy-config/scripts/webhook_runner.py
Rockachopa ca27e3f214
Some checks failed
Smoke Test / smoke (pull_request) Failing after 23s
Architecture Lint / Linter Tests (pull_request) Successful in 27s
Validate Config / YAML Lint (pull_request) Failing after 18s
Validate Config / JSON Validate (pull_request) Successful in 20s
Validate Config / Python Syntax & Import Check (pull_request) Failing after 58s
Validate Config / Python Test Suite (pull_request) Has been skipped
Validate Config / Shell Script Lint (pull_request) Failing after 1m1s
Validate Config / Cron Syntax Check (pull_request) Successful in 10s
Validate Config / Deploy Script Dry Run (pull_request) Successful in 12s
Validate Config / Playbook Schema Validation (pull_request) Successful in 27s
Architecture Lint / Lint Repository (pull_request) Failing after 23s
PR Checklist / pr-checklist (pull_request) Successful in 3m4s
feat(webhook): GEMINI-HARDEN-04 — authenticated webhook runner\n\nReplace print-only payload parser with production-hardened receiver:\n - HMAC-SHA256 signature verification (X-Gitea-Signature)\n - Event / repo / branch / action allowlists (config + env driven)\n - Idempotent event processing via X-Gitea-Delivery with TTL state file\n - Structured JSON logging (stdout + optional file)\n - Safe dispatch — pre-approved script invocations only, no arbitrary shell\n\nNew files:\n - scripts/webhook_runner.py — main HTTP server + policy engine\n - scripts/webhook_config.yaml — sample configuration\n - tests/test_webhook_runner.py — 40 pytest unit tests\n - tests/fixtures/webhook/*.json — payload fixtures for push/PR/issue\n\nAcceptance criteria:\n ✓ Verify Gitea webhook secret/signature\n ✓ Event, repo, branch, and action allowlists explicit and config-driven\n ✓ No direct git pull, shell execution, or fleet mutation from payload fields\n ✓ Structured logging with accepted/rejected events\n ✓ Idempotency via X-Gitea-Delivery tracking\n ✓ Local test fixtures: push, PR, issue, invalid-signature, unknown-event\n ✓ Deployment notes: config via env vars or scripts/webhook_config.yaml\n\nCloses #436
2026-04-27 09:36:27 -04:00

660 lines
25 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Authenticated Gitea Webhook Runner — GEMINI-HARDEN-04
Replaces print-only payload parser with production-hardened receiver:
• HMAC-SHA256 signature verification (X-Gitea-Signature)
• Event / repo / branch / action allowlists (config-driven)
• Idempotent event processing (X-Gitea-Delivery idempotency key)
• Structured JSON logging (stdout + optional file)
• Safe dispatch — pre-approved script invocations only, no payload-driven shell
Usage:
python3 scripts/webhook_runner.py --server # Run HTTP server (default)
python3 scripts/webhook_runner.py --test <file> # Test a payload file locally
python3 scripts/webhook_runner.py --validate-config
Env vars (override config file):
WEBHOOK_SECRET — Shared secret with Gitea (required for prod)
WEBHOOK_ALLOWED_EVENTS — Comma-separated: push,pull_request,issue
WEBHOOK_ALLOWED_REPOS — Comma-separated repo allowlist (org/repo or org/*)
WEBHOOK_ALLOWED_BRANCHES — Comma-separated branch patterns (push only)
WEBHOOK_ALLOWED_ACTIONS — Comma-separated PR/issue actions (opened,closed,…)
WEBHOOK_HOST — Bind host (default 127.0.0.1)
WEBHOOK_PORT — Bind port (default 7777)
WEBHOOK_LOG_FILE — Optional structured log file path
WEBHOOK_IDEMPOTENCY_DB — Path to idempotency state file (.webhook_idempotency.json)
Config file (optional YAML at scripts/webhook_config.yaml) may override defaults.
"""
import argparse
import hashlib
import hmac
import http.server
import json
import logging
import os
import re
import subprocess
import sys
import threading
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, Any, Optional, List, Tuple
try:
import yaml
HAS_YAML = True
except ImportError:
HAS_YAML = False
yaml = None # type: ignore
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
DEFAULT_CONFIG = {
"webhook": {
"enabled": True,
"host": "127.0.0.1",
"port": 7777,
"secret": None,
"allowed_events": ["push", "pull_request", "issues", "issue_comment"],
"allowed_repos": [],
"allowed_branches": ["main", "master", "develop"],
"allowed_actions": ["opened", "closed", "synchronized", "reopened", "created", "edited"],
"idempotency": {
"state_file": ".webhook_idempotency.json",
"max_entries": 10000,
"ttl_days": 30,
},
"logging": {
"level": "INFO",
"file": None,
},
"dispatch": {
"push": {
"refs/heads/main": {
"allowed": True,
"action": "log_and_ack",
"comment": "Main branch push — logged, no auto-deploy configured",
},
"refs/heads/": {
"allowed": True,
"action": "log_and_ack",
"comment": "Non-main branch push — logged",
},
},
"pull_request": {
"opened": {
"allowed": True,
"action": "log_and_ack",
"comment": "PR opened — logged; linter triggers via separate scheduler",
},
"synchronized": {
"allowed": True,
"action": "log_and_ack",
"comment": "PR updated — logged",
},
},
"issues": {
"opened": {
"allowed": True,
"action": "log_and_ack",
"comment": "Issue opened — logged",
},
},
},
}
}
# ---------------------------------------------------------------------------
# Structured Logger
# ---------------------------------------------------------------------------
class StructuredLogger:
def __init__(self, log_file: Optional[str] = None, level: str = "INFO"):
self.log_file = log_file
self.level = getattr(logging, level.upper(), logging.INFO)
self._lock = threading.Lock()
self._file_handle = None
if log_file:
log_dir = Path(log_file).parent
log_dir.mkdir(parents=True, exist_ok=True)
self._file_handle = open(log_file, "a")
def _emit(self, record: Dict[str, Any]):
with self._lock:
line = json.dumps(record, ensure_ascii=False)
print(line, flush=True)
if self._file_handle:
self._file_handle.write(line + "\n")
self._file_handle.flush()
def log(self, level: str, **fields):
if getattr(logging, level.upper(), logging.INFO) < self.level:
return
record = {
"timestamp": datetime.now(timezone.utc).isoformat(),
"level": level.upper(),
**fields,
}
self._emit(record)
def info(self, **fields):
self.log("INFO", **fields)
def warning(self, **fields):
self.log("WARNING", **fields)
def error(self, **fields):
self.log("ERROR", **fields)
def close(self):
if self._file_handle:
self._file_handle.close()
_logger: Optional[StructuredLogger] = None
def get_logger() -> StructuredLogger:
global _logger
if _logger is None:
raise RuntimeError("Logger not initialized")
return _logger
# ---------------------------------------------------------------------------
# Idempotency Store
# ---------------------------------------------------------------------------
class IdempotencyStore:
"""File-backed idempotency store with TTL and size cap."""
def __init__(self, path: str, max_entries: int = 10000, ttl_days: int = 30):
self.path = Path(path)
self.max_entries = max_entries
self.ttl_seconds = ttl_days * 86400
self._lock = threading.RLock()
self._data: Dict[str, Any] = {}
self._load()
def _load(self):
if self.path.exists():
try:
with self.path.open() as f:
raw = json.load(f)
now = time.time()
kept = {}
for delivery_id, entry in raw.items():
ts = entry.get("timestamp", 0)
if now - ts < self.ttl_seconds:
kept[delivery_id] = entry
self._data = kept
self._save_locked()
except Exception:
self._data = {}
def _save_locked(self):
try:
self.path.parent.mkdir(parents=True, exist_ok=True)
with self.path.open("w") as f:
json.dump(self._data, f)
except Exception:
pass
def seen(self, delivery_id: str) -> bool:
with self._lock:
return delivery_id in self._data
def mark_seen(self, delivery_id: str, event_type: str, repo: str):
with self._lock:
self._data[delivery_id] = {
"timestamp": time.time(),
"event_type": event_type,
"repository": repo,
}
if len(self._data) > self.max_entries:
sorted_items = sorted(self._data.items(), key=lambda kv: kv[1]["timestamp"])
drop_count = len(self._data) - self.max_entries
for key, _ in sorted_items[:drop_count]:
del self._data[key]
self._save_locked()
_idempotency_store: Optional[IdempotencyStore] = None
def get_idempotency_store() -> IdempotencyStore:
global _idempotency_store
if _idempotency_store is None:
raise RuntimeError("Idempotency store not initialized")
return _idempotency_store
# ---------------------------------------------------------------------------
# Policy Engine
# ---------------------------------------------------------------------------
class Policy:
"""Allowlist-based policy enforcement."""
def __init__(
self,
allowed_events: List[str],
allowed_repos: List[str],
allowed_branches: List[str],
allowed_actions: List[str],
dispatch_rules: Dict[str, Any],
):
self.allowed_events = set(allowed_events)
# Convert repo patterns to regex; org/* → ^org/.*$
self.allowed_repo_patterns = [self._pattern_to_regex(p) for p in allowed_repos] if allowed_repos else []
self.allowed_branches = set(allowed_branches)
self.allowed_actions = set(allowed_actions)
self.dispatch_rules = dispatch_rules
@staticmethod
def _pattern_to_regex(pattern: str) -> str:
import re as _re
escaped = _re.escape(pattern)
escaped = escaped.replace(r'\*', '.*')
return f"^{escaped}$"
def _matches_repo_pattern(self, repo: str) -> bool:
import re as _re
for pat in self.allowed_repo_patterns:
if _re.match(pat, repo):
return True
return False
def validate_event(self, event_type: str, payload: Dict[str, Any]) -> Tuple[bool, Optional[str]]:
if event_type not in self.allowed_events:
return False, f"event type not allowed: {event_type}"
repo_full = payload.get("repository", {}).get("full_name", "") or payload.get("repository", {}).get("name", "")
if self.allowed_repo_patterns and not self._matches_repo_pattern(repo_full):
return False, f"repository not allowed: {repo_full}"
if event_type == "push":
ref = payload.get("ref", "")
branch = ref.split("/")[-1] if "/" in ref else ref
if branch not in self.allowed_branches and ref not in self.allowed_branches:
return False, f"branch not allowed: {ref}"
elif event_type in ("pull_request", "issues", "issue_comment"):
action = payload.get("action", "")
if action not in self.allowed_actions:
return False, f"action not allowed: {action}"
return True, None
def get_dispatch_action(self, event_type: str, payload: Dict[str, Any]) -> Tuple[bool, str, str]:
rules = self.dispatch_rules.get(event_type, {})
if not rules:
return False, "ignore", "No dispatch rules for this event type"
if event_type == "push":
ref = payload.get("ref", "")
rule = rules.get(ref)
if rule:
return rule.get("allowed", False), rule.get("action", "ignore"), rule.get("comment", "")
for pattern_key, rule_cfg in rules.items():
if pattern_key.endswith("/") or pattern_key in ("refs/heads/",):
if ref.startswith(pattern_key):
return rule_cfg.get("allowed", False), rule_cfg.get("action", "ignore"), rule_cfg.get("comment", "")
return False, "ignore", "No matching ref rule"
else:
action = payload.get("action", "")
rule = rules.get(action)
if rule:
return rule.get("allowed", False), rule.get("action", "ignore"), rule.get("comment", "")
return False, "ignore", f"No rule for action '{action}'"
# ---------------------------------------------------------------------------
# Safe Dispatcher
# ---------------------------------------------------------------------------
class SafeDispatcher:
def __init__(self, policy: Policy, logger: StructuredLogger):
self.policy = policy
self.log = logger
def dispatch(self, event_type: str, payload: Dict[str, Any], delivery_id: str) -> Tuple[bool, str]:
allowed, action, comment = self.policy.get_dispatch_action(event_type, payload)
if not allowed:
self.log.info(
event_type=event_type,
delivery_id=delivery_id,
action="skip",
reason=comment,
msg="Dispatch skipped — policy disallows",
)
return False, comment or "Action not allowed by policy"
self.log.info(
event_type=event_type,
delivery_id=delivery_id,
action=action,
comment=comment,
msg="Dispatch executed",
)
if action == "log_and_ack":
return True, comment or "Logged and acknowledged"
# Example: safe subprocess invocations with hardcoded paths
if action == "trigger_deploy":
script = Path("/opt/timmy/bin/deploy.sh")
if script.exists():
try:
result = subprocess.run([str(script)], capture_output=True, text=True, timeout=300, check=False)
self.log.info(
event_type=event_type,
delivery_id=delivery_id,
script=str(script),
exit_code=result.returncode,
msg="Deploy script invoked",
)
return result.returncode == 0, f"Deploy exit={result.returncode}"
except Exception as exc:
self.log.error(event_type=event_type, delivery_id=delivery_id, error=str(exc), msg="Deploy failed")
return False, f"Deploy error: {exc}"
return False, "Deploy script not found"
if action == "trigger_ci_linter":
script = Path("/opt/timmy/bin/run-architecture-linter.sh")
if script.exists():
try:
result = subprocess.run([str(script)], capture_output=True, text=True, timeout=180, check=False)
self.log.info(
event_type=event_type,
delivery_id=delivery_id,
script=str(script),
exit_code=result.returncode,
msg="CI linter invoked",
)
return result.returncode == 0, f"CI exit={result.returncode}"
except Exception as exc:
self.log.error(event_type=event_type, delivery_id=delivery_id, error=str(exc), msg="CI linter failed")
return False, f"CI error: {exc}"
return False, "CI linter script not found"
self.log.warning(event_type=event_type, delivery_id=delivery_id, action=action, msg="Unknown dispatch action")
return False, f"Unknown action: {action}"
# ---------------------------------------------------------------------------
# Signature Verification
# ---------------------------------------------------------------------------
def verify_signature(payload_bytes: bytes, header_signature: str, secret: Optional[str]) -> bool:
if not header_signature:
return False
if secret is None or secret == "":
return True # dev mode
sig = header_signature
if "=" in header_signature:
_, sig = header_signature.split("=", 1)
expected = hmac.new(secret.encode(), payload_bytes, hashlib.sha256).hexdigest()
return hmac.compare_digest(expected, sig.strip())
# ---------------------------------------------------------------------------
# HTTP Request Handler
# ---------------------------------------------------------------------------
class WebhookHTTPHandler(http.server.BaseHTTPRequestHandler):
policy: Optional[Policy] = None
dispatcher: Optional[SafeDispatcher] = None
idempotency_store: Optional[IdempotencyStore] = None
logger: Optional[StructuredLogger] = None
def _respond(self, status: int, body: Dict[str, Any]):
self.send_response(status)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(json.dumps(body).encode())
def do_POST(self):
from urllib.parse import urlparse
parsed = urlparse(self.path)
if parsed.path != "/webhook/gitea":
self._respond(404, {"error": "not found", "path": self.path})
return
length = int(self.headers.get("Content-Length", "0"))
payload_bytes = self.rfile.read(length) if length > 0 else b""
try:
payload = json.loads(payload_bytes.decode("utf-8"))
except Exception:
self._respond(400, {"error": "invalid_json", "msg": "Malformed JSON payload"})
return
signature = self.headers.get("X-Gitea-Signature", "")
event_type = self.headers.get("X-Gitea-Event", "")
delivery_id = self.headers.get("X-Gitea-Delivery", "") or str(time.time_ns())
logger = self.logger or get_logger()
policy = self.policy
idemp_store = self.idempotency_store
# 1. Signature
secret = getattr(policy, "_secret", None)
if not verify_signature(payload_bytes, signature, secret):
logger.error(event_type=event_type or "unknown", delivery_id=delivery_id, reason="invalid_signature", msg="Signature mismatch")
self._respond(401, {"error": "invalid_signature", "msg": "Webhook signature mismatch"})
return
# 2. Idempotency
if idemp_store and idemp_store.seen(delivery_id):
logger.info(event_type=event_type, delivery_id=delivery_id, reason="duplicate", msg="Duplicate — skipped")
self._respond(200, {"status": "duplicate", "msg": "Already processed"})
return
if idemp_store:
idemp_store.mark_seen(delivery_id, event_type, payload.get("repository", {}).get("full_name", ""))
# 3. Policy
ok, reason = policy.validate_event(event_type, payload)
if not ok:
logger.warning(event_type=event_type, delivery_id=delivery_id, reason="policy_deny", detail=reason, msg="Rejected by policy")
self._respond(403, {"error": "denied", "reason": reason})
return
# 4. Dispatch
try:
success, message = self.dispatcher.dispatch(event_type, payload, delivery_id)
except Exception as exc:
logger.error(event_type=event_type, delivery_id=delivery_id, error=str(exc), msg="Dispatch exception")
success, message = False, f"Internal error: {exc}"
status = 200 if success else 500
self._respond(status, {"status": "ok" if success else "error", "msg": message})
def log_message(self, format, *args):
pass
def log_error(self, format, *args):
pass
def _run_server(host: str, port: int, policy: Policy, dispatcher: SafeDispatcher, idemp_store: IdempotencyStore, logger: StructuredLogger):
WebhookHTTPHandler.policy = policy
WebhookHTTPHandler.dispatcher = dispatcher
WebhookHTTPHandler.idempotency_store = idemp_store
WebhookHTTPHandler.logger = logger
server = http.server.HTTPServer((host, port), WebhookHTTPHandler)
logger.info(host=host, port=port, msg="Webhook runner starting")
try:
server.serve_forever()
except KeyboardInterrupt:
logger.info(msg="Webhook runner shutting down")
finally:
server.server_close()
logger.close()
# ---------------------------------------------------------------------------
# Local Test Mode
# ---------------------------------------------------------------------------
def test_payload_file(path: str, secret: Optional[str], policy: Policy, dispatcher: SafeDispatcher, idemp_store: IdempotencyStore, logger: StructuredLogger):
p = Path(path)
if not p.exists():
print(f"[ERROR] Payload file not found: {path}", file=sys.stderr)
sys.exit(1)
try:
payload = json.loads(p.read_text())
except Exception:
print(f"[ERROR] Invalid JSON in {path}", file=sys.stderr)
sys.exit(1)
delivery_id = f"local-{int(time.time())}"
event_type = payload.get("event") or _infer_event_type(payload)
repo = payload.get("repository", {}).get("full_name", "") or payload.get("repository", {}).get("name", "unknown")
if secret and not verify_signature(p.read_bytes(), "local-test", secret):
print("[ERROR] Signature verification failed (use WEBHOOK_SECRET='' for dev)", file=sys.stderr)
sys.exit(1)
if idemp_store.seen(delivery_id):
print(f"[INFO] Duplicate delivery {delivery_id} — skipping")
sys.exit(0)
idemp_store.mark_seen(delivery_id, event_type, repo)
ok, reason = policy.validate_event(event_type, payload)
if not ok:
print(f"[DENY] Policy rejection: {reason}", file=sys.stderr)
sys.exit(1)
success, message = dispatcher.dispatch(event_type, payload, delivery_id)
print(f"[{'OK' if success else 'FAIL'}] {message}")
sys.exit(0 if success else 1)
def _infer_event_type(payload: Dict[str, Any]) -> str:
if "commits" in payload:
return "push"
if "pull_request" in payload:
return "pull_request"
if "issue" in payload:
return "issues"
return "unknown"
# ---------------------------------------------------------------------------
# Config Loading
# ---------------------------------------------------------------------------
def load_config() -> Dict[str, Any]:
"""Load config from YAML file (if present) merged with environment overrides."""
cfg = DEFAULT_CONFIG.copy()
script_dir = Path(__file__).parent
yaml_path = script_dir / "webhook_config.yaml"
if HAS_YAML and yaml_path.exists():
with yaml_path.open() as f:
file_cfg = yaml.safe_load(f) or {}
# Simple deep merge for webhook key
if "webhook" in file_cfg:
cfg["webhook"].update(file_cfg["webhook"])
wc = cfg["webhook"]
# Env overrides
env_secret = os.environ.get("WEBHOOK_SECRET")
if env_secret is not None:
wc["secret"] = env_secret
# Simple keys
for key, env_key in [("host", "WEBHOOK_HOST"), ("port", "WEBHOOK_PORT")]:
val = os.environ.get(env_key)
if val:
wc[key] = val if key != "port" else int(val)
# List overrides
for key, env_key in [
("allowed_events", "WEBHOOK_ALLOWED_EVENTS"),
("allowed_repos", "WEBHOOK_ALLOWED_REPOS"),
("allowed_branches", "WEBHOOK_ALLOWED_BRANCHES"),
("allowed_actions", "WEBHOOK_ALLOWED_ACTIONS"),
]:
val = os.environ.get(env_key)
if val:
wc[key] = [v.strip() for v in val.split(",") if v.strip()]
# Idempotency overrides
idemp = wc.get("idempotency", {})
for ikey, ienv in [("state_file", "WEBHOOK_IDEMPOTENCY_DB"), ("max_entries", "WEBHOOK_IDEMPOTENCY_MAX"), ("ttl_days", "WEBHOOK_IDEMPOTENCY_TTL")]:
val = os.environ.get(ienv)
if val:
if ikey in ("max_entries", "ttl_days"):
idemp[ikey] = int(val)
else:
idemp[ikey] = val
wc["idempotency"] = idemp
# Logging overrides
log_cfg = wc.get("logging", {})
log_file = os.environ.get("WEBHOOK_LOG_FILE")
if log_file:
log_cfg["file"] = log_file
log_level = os.environ.get("WEBHOOK_LOG_LEVEL")
if log_level:
log_cfg["level"] = log_level
wc["logging"] = log_cfg
return cfg
# ---------------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------------
def main():
parser = argparse.ArgumentParser(description="Authenticated Gitea Webhook Runner")
parser.add_argument("--server", action="store_true", help="Run HTTP server (default if no other flag)")
parser.add_argument("--test", metavar="PAYLOAD_JSON", help="Test a payload file locally")
parser.add_argument("--validate-config", action="store_true", help="Validate config and exit")
args = parser.parse_args()
cfg = load_config()
wc = cfg["webhook"]
# Warn if no secret
if not wc.get("secret"):
print("[WARN] WEBHOOK_SECRET not set — signature verification DISABLED for local dev", file=sys.stderr)
logger = StructuredLogger(log_file=wc.get("logging", {}).get("file"), level=wc.get("logging", {}).get("level", "INFO"))
idemp_cfg = wc.get("idempotency", {})
idemp_store = IdempotencyStore(
path=idemp_cfg.get("state_file", ".webhook_idempotency.json"),
max_entries=idemp_cfg.get("max_entries", 10000),
ttl_days=idemp_cfg.get("ttl_days", 30),
)
policy = Policy(
allowed_events=wc.get("allowed_events", []),
allowed_repos=wc.get("allowed_repos", []),
allowed_branches=wc.get("allowed_branches", []),
allowed_actions=wc.get("allowed_actions", []),
dispatch_rules=wc.get("dispatch", {}),
)
policy._secret = wc.get("secret")
dispatcher = SafeDispatcher(policy, logger)
if args.validate_config:
print("[OK] Config validated")
print(f" Events: {sorted(policy.allowed_events)}")
print(f" Repos: {policy.allowed_repo_patterns}")
print(f" Branches: {sorted(policy.allowed_branches)}")
print(f" Actions: {sorted(policy.allowed_actions)}")
sys.exit(0)
if args.test:
test_payload_file(args.test, wc.get("secret"), policy, dispatcher, idemp_store, logger)
return
host = wc.get("host", "127.0.0.1")
port = int(wc.get("port", 7777))
_run_server(host, port, policy, dispatcher, idemp_store, logger)
if __name__ == "__main__":
main()