Files
the-nexus/scripts/cron-heartbeat-write.sh
2026-04-07 14:44:05 +00:00

116 lines
5.0 KiB
Bash
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# cron-heartbeat-write.sh — Bezalel Cron Heartbeat Writer (poka-yoke #1096)
# Refs: https://forge.alexanderwhitestone.com/Timmy_Foundation/the-nexus/issues/1096
#
# POKA-YOKE design:
# Prevention — Cron jobs declare their identity + expected interval up front.
# Detection — bezalel_heartbeat_check.py reads these files every 15 min and
# alerts P1 if any job is silent for > 2× its interval.
# Correction — Alerts fire fast enough for manual intervention or auto-restart
# before the next scheduled run window expires.
#
# Usage:
# cron-heartbeat-write.sh <job-name> [interval-seconds]
#
# <job-name> Unique identifier for this cron job (e.g. "morning-report")
# [interval-seconds] Expected run interval in seconds (default: 3600)
#
# The heartbeat file is written to:
# /var/run/bezalel/heartbeats/<job-name>.last
#
# File format (JSON):
# {"job":"<name>","timestamp":<epoch_float>,"interval":<secs>,"pid":<pid>}
#
# This script ALWAYS exits 0 — it must never crash the calling cron job.
#
# Typical crontab usage:
# 0 * * * * /root/wizards/the-nexus/scripts/cron-heartbeat-write.sh hourly-job 3600
# 0 6 * * * /root/wizards/the-nexus/scripts/cron-heartbeat-write.sh morning-report 86400
set -uo pipefail
# ── Configuration ─────────────────────────────────────────────────────────────
HEARTBEAT_DIR="${BEZALEL_HEARTBEAT_DIR:-/var/run/bezalel/heartbeats}"
# ── Helpers ───────────────────────────────────────────────────────────────────
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] HEARTBEAT: $*"; }
warn() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] HEARTBEAT WARNING: $*" >&2; }
# ── Input validation ──────────────────────────────────────────────────────────
if [[ $# -lt 1 ]]; then
warn "Usage: $0 <job-name> [interval-seconds]"
warn "No job name provided — heartbeat not written."
exit 0
fi
JOB_NAME="$1"
INTERVAL_SECS="${2:-3600}"
# Sanitize job name to prevent path traversal / weird filenames
# Allow alphanumeric, dash, underscore, dot only
SAFE_JOB_NAME="${JOB_NAME//[^a-zA-Z0-9_.-]/}"
if [[ -z "$SAFE_JOB_NAME" ]]; then
warn "Job name '${JOB_NAME}' contains only unsafe characters — heartbeat not written."
exit 0
fi
if [[ "$SAFE_JOB_NAME" != "$JOB_NAME" ]]; then
warn "Job name sanitized: '${JOB_NAME}' → '${SAFE_JOB_NAME}'"
fi
# Validate interval is a positive integer
if ! [[ "$INTERVAL_SECS" =~ ^[0-9]+$ ]] || (( INTERVAL_SECS < 1 )); then
warn "Invalid interval '${INTERVAL_SECS}' — using default 3600."
INTERVAL_SECS=3600
fi
# ── Create heartbeat directory ────────────────────────────────────────────────
if ! mkdir -p "$HEARTBEAT_DIR" 2>/dev/null; then
warn "Cannot create heartbeat dir '${HEARTBEAT_DIR}' — heartbeat not written."
exit 0
fi
# ── Build JSON payload ────────────────────────────────────────────────────────
# Use python3 for reliable epoch float and JSON encoding.
# Falls back to date-based approach if python3 unavailable.
TIMESTAMP=$(python3 -c "import time; print(time.time())" 2>/dev/null \
|| date +%s)
CURRENT_PID=$$
PAYLOAD=$(python3 -c "
import json, sys
print(json.dumps({
'job': sys.argv[1],
'timestamp': float(sys.argv[2]),
'interval': int(sys.argv[3]),
'pid': int(sys.argv[4]),
}))
" "$SAFE_JOB_NAME" "$TIMESTAMP" "$INTERVAL_SECS" "$CURRENT_PID" 2>/dev/null)
if [[ -z "$PAYLOAD" ]]; then
# Minimal fallback if python3 fails
PAYLOAD="{\"job\":\"${SAFE_JOB_NAME}\",\"timestamp\":${TIMESTAMP},\"interval\":${INTERVAL_SECS},\"pid\":${CURRENT_PID}}"
fi
# ── Atomic write via temp + rename ────────────────────────────────────────────
# Writes to a temp file first then renames, so bezalel_heartbeat_check.py
# never sees a partial file mid-write. This is the poka-yoke atomic guarantee.
TARGET_FILE="${HEARTBEAT_DIR}/${SAFE_JOB_NAME}.last"
TMP_FILE="${HEARTBEAT_DIR}/.${SAFE_JOB_NAME}.last.tmp.$$"
if printf '%s\n' "$PAYLOAD" > "$TMP_FILE" 2>/dev/null; then
if mv "$TMP_FILE" "$TARGET_FILE" 2>/dev/null; then
log "Heartbeat written: ${TARGET_FILE} (job=${SAFE_JOB_NAME}, interval=${INTERVAL_SECS}s)"
else
warn "mv failed for '${TMP_FILE}' → '${TARGET_FILE}' — heartbeat not committed."
rm -f "$TMP_FILE" 2>/dev/null || true
fi
else
warn "Write to temp file '${TMP_FILE}' failed — heartbeat not written."
rm -f "$TMP_FILE" 2>/dev/null || true
fi
# Always exit 0 — never crash the calling cron job.
exit 0