Files
the-nexus/scripts/meta_heartbeat.sh
Bezalel a0ee7858ff
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
feat(bezalel): MemPalace ecosystem — validation, audit, sync, auto-revert, Evennia integration
2026-04-07 14:47:12 +00:00

54 lines
1.3 KiB
Bash
Executable File

#!/usr/bin/env bash
# Meta-heartbeat — checks all Bezalel cron jobs for stale timestamps
set -euo pipefail
HEARTBEAT_DIR="/var/lib/bezalel/heartbeats"
ALERT_LOG="/var/log/bezalel_meta_heartbeat.log"
STALE_MINUTES=30
log() {
echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"
}
mkdir -p "$HEARTBEAT_DIR"
# Define expected heartbeats: name => max_stale_minutes
HEARTBEATS=(
"nightly_watch:150" # 2.5h — runs at 02:00
"mempalace_nightly:150" # 2.5h — runs at 03:00
"db_backup:150" # 2.5h — runs at 03:30
"runner_health:15" # 15m — every 5 min
)
NOW_EPOCH=$(date +%s)
FAILURES=0
for entry in "${HEARTBEATS[@]}"; do
name="${entry%%:*}"
max_minutes="${entry##*:}"
file="${HEARTBEAT_DIR}/${name}.last"
if [[ ! -f "$file" ]]; then
log "MISSING: $name heartbeat file not found ($file)"
FAILURES=$((FAILURES + 1))
continue
fi
LAST_EPOCH=$(stat -c %Y "$file")
AGE_MIN=$(( (NOW_EPOCH - LAST_EPOCH) / 60 ))
if [[ $AGE_MIN -gt $max_minutes ]]; then
log "STALE: $name is ${AGE_MIN}m old (max ${max_minutes}m)"
FAILURES=$((FAILURES + 1))
else
log "OK: $name is ${AGE_MIN}m old"
fi
done
if [[ $FAILURES -gt 0 ]]; then
log "ALERT: $FAILURES stale/missing heartbeat(s) detected."
exit 1
else
log "ALL_OK: All heartbeats healthy."
fi