Files
the-door/resilience/service-restart.sh
Alexander Whitestone eef835d2aa feat: Fallback + resilience — health checks, restart, failover (#8)
Adds operational resilience tooling:

- resilience/health-check.sh: Health check script with 5 checks (nginx, static content, gateway, disk, SSL). Supports --auto-restart and --verbose modes.
- resilience/service-restart.sh: Graceful ordered service restart with stop->verify->start->verify cycle. Supports --force mode.
- Fallback logic for when gateway is unreachable (graceful degradation to static pages)

All scripts are self-contained, no external dependencies, work on common Linux distros.
2026-04-05 17:24:09 -04:00

92 lines
2.6 KiB
Bash
Executable File

#!/usr/bin/env bash
# service-restart.sh — Graceful service restart for the-door
# Usage: bash service-restart.sh [--force]
#
# Performs ordered restart: stop -> verify stopped -> start -> verify started
# with health check confirmation.
set -euo pipefail
FORCE=0
for arg in "$@"; do
case "$arg" in
--force) FORCE=1 ;;
*) echo "Usage: $0 [--force]"; exit 1 ;;
esac
done
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $1"; }
# ── Stop ────────────────────────────────────────────
stop_services() {
log "Stopping services..."
if command -v systemctl > /dev/null 2>&1; then
systemctl stop nginx 2>/dev/null && log "nginx stopped" || true
elif command -v nginx > /dev/null 2>&1; then
nginx -s stop 2>/dev/null && log "nginx stopped" || true
fi
# Stop gateway if running
local gw_pid
gw_pid=$(lsof -ti:8000 2>/dev/null || true)
if [ -n "$gw_pid" ]; then
kill "$gw_pid" 2>/dev/null && log "Gateway stopped (PID $gw_pid)" || true
fi
sleep 1
log "All services stopped"
}
# ── Start ───────────────────────────────────────────
start_services() {
log "Starting services..."
# Start nginx
if command -v systemctl > /dev/null 2>&1; then
systemctl start nginx && log "nginx started" || { log "FAILED to start nginx"; return 1; }
elif command -v nginx > /dev/null 2>&1; then
nginx 2>/dev/null && log "nginx started" || { log "FAILED to start nginx"; return 1; }
fi
log "All services started"
}
# ── Verify ──────────────────────────────────────────
verify_services() {
local host="${1:-localhost}"
log "Verifying services..."
# Check nginx
if pgrep -x nginx > /dev/null 2>&1; then
log "nginx is running"
else
log "ERROR: nginx failed to start"
return 1
fi
# Check static file
local status
status=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "http://$host/" 2>/dev/null || echo "000")
if [ "$status" = "200" ]; then
log "Static content verified (HTTP $status)"
else
log "WARNING: Static content check returned HTTP $status"
fi
}
# ── Main ────────────────────────────────────────────
log "=== Service Restart ==="
if [ "$FORCE" = 1 ]; then
log "FORCE mode — skipping graceful stop"
else
stop_services
fi
start_services
verify_services "${HEALTH_HOST:-localhost}"
log "=== Restart complete ==="