- fleet/fleet.sh: cross-VPS health, status, restart, deploy - nexus/retry_helper.py: retry decorator, dead letter queue, checkpoints - nexus/morning_report.py: automated 0600 overnight activity report - fleet/allegro/archived-scripts/README.md: burn script archive placeholder Fixes #910 Fixes #896 Fixes #897 Fixes #898
122 lines
3.7 KiB
Bash
Executable File
122 lines
3.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# fleet.sh — Cross-VPS fleet management
|
|
# Manages both Allegro (167.99.126.228) and Bezalel (159.203.146.185)
|
|
# Usage: fleet.sh <command> [options]
|
|
#
|
|
# Commands:
|
|
# health — Run health checks on all VPSes
|
|
# restart <svc> — Restart a service on all VPSes
|
|
# status — Show fleet status summary
|
|
# ssh <host> — SSH into a specific host (allegro|bezalel)
|
|
# run <command> — Run a command on all VPSes
|
|
# deploy — Deploy latest config to all VPSes
|
|
|
|
set -euo pipefail
|
|
|
|
ALLEGRO="167.99.126.228"
|
|
BEZALEL="159.203.146.185"
|
|
EZRA="143.198.27.163"
|
|
USER="root"
|
|
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=10"
|
|
|
|
hosts="$ALLEGRO $BEZALEL $EZRA"
|
|
host_names="allegro bezalel ezra"
|
|
|
|
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] FLEET: $*"; }
|
|
|
|
remote() {
|
|
local host=$1
|
|
shift
|
|
ssh $SSH_OPTS "$USER@$host" "$@"
|
|
}
|
|
|
|
cmd_health() {
|
|
log "Running fleet health check..."
|
|
paste <(echo "$host_names" | tr ' ' '\n') <(echo "$hosts" | tr ' ' '\n') | while read name host; do
|
|
echo ""
|
|
echo "=== $name ($host) ==="
|
|
if remote "$host" "echo 'SSH: OK'; uptime; free -m | head -2; df -h / | tail -1; systemctl list-units --state=failed --no-pager | head -10" 2>&1; then
|
|
echo "---"
|
|
else
|
|
echo "SSH: FAILED — host unreachable"
|
|
fi
|
|
done
|
|
}
|
|
|
|
cmd_status() {
|
|
log "Fleet status summary..."
|
|
paste <(echo "$host_names" | tr ' ' '\n') <(echo "$hosts" | tr ' ' '\n') | while read name host; do
|
|
printf "%-12s " "$name"
|
|
if remote "$host" "echo -n 'UP' 2>/dev/null" 2>/dev/null; then
|
|
uptime_str=$(remote "$host" "uptime -p 2>/dev/null || uptime" 2>/dev/null || echo "unknown")
|
|
echo " $uptime_str"
|
|
else
|
|
echo " UNREACHABLE"
|
|
fi
|
|
done
|
|
}
|
|
|
|
cmd_restart() {
|
|
local svc=${1:-}
|
|
if [ -z "$svc" ]; then
|
|
echo "Usage: fleet.sh restart <service>"
|
|
echo "Common: hermes-agent evennia nginx docker"
|
|
return 1
|
|
fi
|
|
log "Restarting '$svc' on all hosts..."
|
|
paste <(echo "$host_names" | tr ' ' '\n') <(echo "$hosts" | tr ' ' '\n') | while read name host; do
|
|
printf "%-12s " "$name"
|
|
if remote "$host" "systemctl restart $svc 2>&1 && echo 'restarted' || echo 'FAILED'" 2>/dev/null; then
|
|
echo ""
|
|
else
|
|
echo "UNREACHABLE"
|
|
fi
|
|
done
|
|
}
|
|
|
|
cmd_run() {
|
|
local cmd="${1:-}"
|
|
if [ -z "$cmd" ]; then
|
|
echo "Usage: fleet.sh run '<command>'"
|
|
return 1
|
|
fi
|
|
log "Running '$cmd' on all hosts..."
|
|
paste <(echo "$host_names" | tr ' ' '\n') <(echo "$hosts" | tr ' ' '\n') | while read name host; do
|
|
echo "=== $name ($host) ==="
|
|
remote "$host" "$cmd" 2>&1 || echo "(failed)"
|
|
echo ""
|
|
done
|
|
}
|
|
|
|
cmd_deploy() {
|
|
log "Deploying config to all hosts..."
|
|
# Push timmy-config updates to each host
|
|
for pair in "allegro:$ALLEGRO" "bezalel:$BEZALEL"; do
|
|
name="${pair%%:*}"
|
|
host="${pair##*:}"
|
|
echo ""
|
|
echo "=== $name ==="
|
|
remote "$host" "cd /root && ./update-config.sh 2>/dev/null || echo 'No update script found'; systemctl restart hermes-agent 2>/dev/null && echo 'hermes-agent restarted' || echo 'hermes-agent not found'" 2>&1 || echo "(unreachable)"
|
|
done
|
|
}
|
|
|
|
# Main dispatch
|
|
case "${1:-help}" in
|
|
health) cmd_health ;;
|
|
status) cmd_status ;;
|
|
restart) cmd_restart "${2:-}" ;;
|
|
run) cmd_run "${2:-}" ;;
|
|
deploy) cmd_deploy ;;
|
|
help|*)
|
|
echo "Usage: fleet.sh <command> [options]"
|
|
echo ""
|
|
echo "Commands:"
|
|
echo " health — Run health checks on all VPSes"
|
|
echo " status — Show fleet status summary"
|
|
echo " restart <svc> — Restart a service on all VPSes"
|
|
echo " run '<cmd>' — Run a command on all VPSes"
|
|
echo " deploy — Deploy config to all VPSes"
|
|
echo " ssh <host> — SSH into host (allegro|bezalel|ezra)"
|
|
;;
|
|
esac
|