#!/usr/bin/env bash # cron-heartbeat-write.sh — Bezalel Cron Heartbeat Writer (poka-yoke #1096) # Refs: https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/1096 # # POKA-YOKE design: # Prevention — Cron jobs declare their identity + expected interval up front. # Detection — bezalel_heartbeat_check.py reads these files every 15 min and # alerts P1 if any job is silent for > 2× its interval. # Correction — Alerts fire fast enough for manual intervention or auto-restart # before the next scheduled run window expires. # # Usage: # cron-heartbeat-write.sh [interval-seconds] # # Unique identifier for this cron job (e.g. "morning-report") # [interval-seconds] Expected run interval in seconds (default: 3600) # # The heartbeat file is written to: # /var/run/bezalel/heartbeats/.last # # File format (JSON): # {"job":"","timestamp":,"interval":,"pid":} # # This script ALWAYS exits 0 — it must never crash the calling cron job. # # Typical crontab usage: # 0 * * * * /root/wizards/the-nexus/scripts/cron-heartbeat-write.sh hourly-job 3600 # 0 6 * * * /root/wizards/the-nexus/scripts/cron-heartbeat-write.sh morning-report 86400 set -uo pipefail # ── Configuration ───────────────────────────────────────────────────────────── HEARTBEAT_DIR="${BEZALEL_HEARTBEAT_DIR:-/var/run/bezalel/heartbeats}" # ── Helpers ─────────────────────────────────────────────────────────────────── log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] HEARTBEAT: $*"; } warn() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] HEARTBEAT WARNING: $*" >&2; } # ── Input validation ────────────────────────────────────────────────────────── if [[ $# -lt 1 ]]; then warn "Usage: $0 [interval-seconds]" warn "No job name provided — heartbeat not written." exit 0 fi JOB_NAME="$1" INTERVAL_SECS="${2:-3600}" # Sanitize job name to prevent path traversal / weird filenames # Allow alphanumeric, dash, underscore, dot only SAFE_JOB_NAME="${JOB_NAME//[^a-zA-Z0-9_.-]/}" if [[ -z "$SAFE_JOB_NAME" ]]; then warn "Job name '${JOB_NAME}' contains only unsafe characters — heartbeat not written." exit 0 fi if [[ "$SAFE_JOB_NAME" != "$JOB_NAME" ]]; then warn "Job name sanitized: '${JOB_NAME}' → '${SAFE_JOB_NAME}'" fi # Validate interval is a positive integer if ! [[ "$INTERVAL_SECS" =~ ^[0-9]+$ ]] || (( INTERVAL_SECS < 1 )); then warn "Invalid interval '${INTERVAL_SECS}' — using default 3600." INTERVAL_SECS=3600 fi # ── Create heartbeat directory ──────────────────────────────────────────────── if ! mkdir -p "$HEARTBEAT_DIR" 2>/dev/null; then warn "Cannot create heartbeat dir '${HEARTBEAT_DIR}' — heartbeat not written." exit 0 fi # ── Build JSON payload ──────────────────────────────────────────────────────── # Use python3 for reliable epoch float and JSON encoding. # Falls back to date-based approach if python3 unavailable. TIMESTAMP=$(python3 -c "import time; print(time.time())" 2>/dev/null \ || date +%s) CURRENT_PID=$$ PAYLOAD=$(python3 -c " import json, sys print(json.dumps({ 'job': sys.argv[1], 'timestamp': float(sys.argv[2]), 'interval': int(sys.argv[3]), 'pid': int(sys.argv[4]), })) " "$SAFE_JOB_NAME" "$TIMESTAMP" "$INTERVAL_SECS" "$CURRENT_PID" 2>/dev/null) if [[ -z "$PAYLOAD" ]]; then # Minimal fallback if python3 fails PAYLOAD="{\"job\":\"${SAFE_JOB_NAME}\",\"timestamp\":${TIMESTAMP},\"interval\":${INTERVAL_SECS},\"pid\":${CURRENT_PID}}" fi # ── Atomic write via temp + rename ──────────────────────────────────────────── # Writes to a temp file first then renames, so bezalel_heartbeat_check.py # never sees a partial file mid-write. This is the poka-yoke atomic guarantee. TARGET_FILE="${HEARTBEAT_DIR}/${SAFE_JOB_NAME}.last" TMP_FILE="${HEARTBEAT_DIR}/.${SAFE_JOB_NAME}.last.tmp.$$" if printf '%s\n' "$PAYLOAD" > "$TMP_FILE" 2>/dev/null; then if mv "$TMP_FILE" "$TARGET_FILE" 2>/dev/null; then log "Heartbeat written: ${TARGET_FILE} (job=${SAFE_JOB_NAME}, interval=${INTERVAL_SECS}s)" else warn "mv failed for '${TMP_FILE}' → '${TARGET_FILE}' — heartbeat not committed." rm -f "$TMP_FILE" 2>/dev/null || true fi else warn "Write to temp file '${TMP_FILE}' failed — heartbeat not written." rm -f "$TMP_FILE" 2>/dev/null || true fi # Always exit 0 — never crash the calling cron job. exit 0