116 lines
5.0 KiB
Bash
Executable File
116 lines
5.0 KiB
Bash
Executable File
#!/usr/bin/env bash
|
||
# cron-heartbeat-write.sh — Bezalel Cron Heartbeat Writer (poka-yoke #1096)
|
||
# Refs: https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/1096
|
||
#
|
||
# POKA-YOKE design:
|
||
# Prevention — Cron jobs declare their identity + expected interval up front.
|
||
# Detection — bezalel_heartbeat_check.py reads these files every 15 min and
|
||
# alerts P1 if any job is silent for > 2× its interval.
|
||
# Correction — Alerts fire fast enough for manual intervention or auto-restart
|
||
# before the next scheduled run window expires.
|
||
#
|
||
# Usage:
|
||
# cron-heartbeat-write.sh <job-name> [interval-seconds]
|
||
#
|
||
# <job-name> Unique identifier for this cron job (e.g. "morning-report")
|
||
# [interval-seconds] Expected run interval in seconds (default: 3600)
|
||
#
|
||
# The heartbeat file is written to:
|
||
# /var/run/bezalel/heartbeats/<job-name>.last
|
||
#
|
||
# File format (JSON):
|
||
# {"job":"<name>","timestamp":<epoch_float>,"interval":<secs>,"pid":<pid>}
|
||
#
|
||
# This script ALWAYS exits 0 — it must never crash the calling cron job.
|
||
#
|
||
# Typical crontab usage:
|
||
# 0 * * * * /root/wizards/the-nexus/scripts/cron-heartbeat-write.sh hourly-job 3600
|
||
# 0 6 * * * /root/wizards/the-nexus/scripts/cron-heartbeat-write.sh morning-report 86400
|
||
|
||
set -uo pipefail
|
||
|
||
# ── Configuration ─────────────────────────────────────────────────────────────
|
||
HEARTBEAT_DIR="${BEZALEL_HEARTBEAT_DIR:-/var/run/bezalel/heartbeats}"
|
||
|
||
# ── Helpers ───────────────────────────────────────────────────────────────────
|
||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] HEARTBEAT: $*"; }
|
||
warn() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] HEARTBEAT WARNING: $*" >&2; }
|
||
|
||
# ── Input validation ──────────────────────────────────────────────────────────
|
||
if [[ $# -lt 1 ]]; then
|
||
warn "Usage: $0 <job-name> [interval-seconds]"
|
||
warn "No job name provided — heartbeat not written."
|
||
exit 0
|
||
fi
|
||
|
||
JOB_NAME="$1"
|
||
INTERVAL_SECS="${2:-3600}"
|
||
|
||
# Sanitize job name to prevent path traversal / weird filenames
|
||
# Allow alphanumeric, dash, underscore, dot only
|
||
SAFE_JOB_NAME="${JOB_NAME//[^a-zA-Z0-9_.-]/}"
|
||
if [[ -z "$SAFE_JOB_NAME" ]]; then
|
||
warn "Job name '${JOB_NAME}' contains only unsafe characters — heartbeat not written."
|
||
exit 0
|
||
fi
|
||
|
||
if [[ "$SAFE_JOB_NAME" != "$JOB_NAME" ]]; then
|
||
warn "Job name sanitized: '${JOB_NAME}' → '${SAFE_JOB_NAME}'"
|
||
fi
|
||
|
||
# Validate interval is a positive integer
|
||
if ! [[ "$INTERVAL_SECS" =~ ^[0-9]+$ ]] || (( INTERVAL_SECS < 1 )); then
|
||
warn "Invalid interval '${INTERVAL_SECS}' — using default 3600."
|
||
INTERVAL_SECS=3600
|
||
fi
|
||
|
||
# ── Create heartbeat directory ────────────────────────────────────────────────
|
||
if ! mkdir -p "$HEARTBEAT_DIR" 2>/dev/null; then
|
||
warn "Cannot create heartbeat dir '${HEARTBEAT_DIR}' — heartbeat not written."
|
||
exit 0
|
||
fi
|
||
|
||
# ── Build JSON payload ────────────────────────────────────────────────────────
|
||
# Use python3 for reliable epoch float and JSON encoding.
|
||
# Falls back to date-based approach if python3 unavailable.
|
||
TIMESTAMP=$(python3 -c "import time; print(time.time())" 2>/dev/null \
|
||
|| date +%s)
|
||
|
||
CURRENT_PID=$$
|
||
|
||
PAYLOAD=$(python3 -c "
|
||
import json, sys
|
||
print(json.dumps({
|
||
'job': sys.argv[1],
|
||
'timestamp': float(sys.argv[2]),
|
||
'interval': int(sys.argv[3]),
|
||
'pid': int(sys.argv[4]),
|
||
}))
|
||
" "$SAFE_JOB_NAME" "$TIMESTAMP" "$INTERVAL_SECS" "$CURRENT_PID" 2>/dev/null)
|
||
|
||
if [[ -z "$PAYLOAD" ]]; then
|
||
# Minimal fallback if python3 fails
|
||
PAYLOAD="{\"job\":\"${SAFE_JOB_NAME}\",\"timestamp\":${TIMESTAMP},\"interval\":${INTERVAL_SECS},\"pid\":${CURRENT_PID}}"
|
||
fi
|
||
|
||
# ── Atomic write via temp + rename ────────────────────────────────────────────
|
||
# Writes to a temp file first then renames, so bezalel_heartbeat_check.py
|
||
# never sees a partial file mid-write. This is the poka-yoke atomic guarantee.
|
||
TARGET_FILE="${HEARTBEAT_DIR}/${SAFE_JOB_NAME}.last"
|
||
TMP_FILE="${HEARTBEAT_DIR}/.${SAFE_JOB_NAME}.last.tmp.$$"
|
||
|
||
if printf '%s\n' "$PAYLOAD" > "$TMP_FILE" 2>/dev/null; then
|
||
if mv "$TMP_FILE" "$TARGET_FILE" 2>/dev/null; then
|
||
log "Heartbeat written: ${TARGET_FILE} (job=${SAFE_JOB_NAME}, interval=${INTERVAL_SECS}s)"
|
||
else
|
||
warn "mv failed for '${TMP_FILE}' → '${TARGET_FILE}' — heartbeat not committed."
|
||
rm -f "$TMP_FILE" 2>/dev/null || true
|
||
fi
|
||
else
|
||
warn "Write to temp file '${TMP_FILE}' failed — heartbeat not written."
|
||
rm -f "$TMP_FILE" 2>/dev/null || true
|
||
fi
|
||
|
||
# Always exit 0 — never crash the calling cron job.
|
||
exit 0
|