feat(ops): add cross-VPS fleet management script
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
- Local service control for alpha (4 agents) and beta (bezalel) hosts - Status, restart, stop, start, update, and health commands - Remote proxy via SSH with graceful fallback if keys not configured Closes #910
This commit is contained in:
114
scripts/fleet.sh
Normal file
114
scripts/fleet.sh
Normal file
@@ -0,0 +1,114 @@
|
||||
#!/usr/bin/env bash
|
||||
# fleet.sh — Cross-VPS Fleet Management
|
||||
# Usage: ./fleet.sh {status|restart|stop|start|update|health} [wizard|all]
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
ALPHA_HOST="167.99.126.228"
|
||||
BETA_HOST="104.131.15.18"
|
||||
THIS_HOST=$(hostname -I | awk '{print $1}')
|
||||
WIZARDS=(allegro adagio ezra bezalel bilbobagginshire)
|
||||
|
||||
# Detect which host we're on
|
||||
if [[ "$THIS_HOST" == "$BETA_HOST" ]]; then
|
||||
ROLE="beta"
|
||||
LOCAL_WIZARDS=(bezalel)
|
||||
REMOTE_HOST="$ALPHA_HOST"
|
||||
elif [[ "$THIS_HOST" == "$ALPHA_HOST" ]]; then
|
||||
ROLE="alpha"
|
||||
LOCAL_WIZARDS=(allegro adagio ezra bilbobagginshire)
|
||||
REMOTE_HOST="$BETA_HOST"
|
||||
else
|
||||
ROLE="unknown"
|
||||
LOCAL_WIZARDS=()
|
||||
REMOTE_HOST="$ALPHA_HOST"
|
||||
echo "WARN: Host IP ($THIS_HOST) does not match Alpha or Beta. Running in limited mode."
|
||||
fi
|
||||
|
||||
usage() {
|
||||
echo "Usage: $0 {status|restart|stop|start|update|health} [wizard|all]"
|
||||
echo ""
|
||||
echo "Examples:"
|
||||
echo " $0 status all # Show all services on this host"
|
||||
echo " $0 restart allegro # Restart allegro (only works on Alpha)"
|
||||
echo " $0 health bezalel # Health check bezalel (only works on Beta)"
|
||||
echo " $0 status remote # Attempt status on remote host via SSH"
|
||||
exit 1
|
||||
}
|
||||
|
||||
cmd="${1:-}"
|
||||
target="${2:-}"
|
||||
|
||||
[[ -z "$cmd" || -z "$target" ]] && usage
|
||||
|
||||
run_local() {
|
||||
local svc="hermes-$1"
|
||||
case "$cmd" in
|
||||
status) systemctl is-active "$svc" || true ;;
|
||||
restart) sudo systemctl restart "$svc" ;;
|
||||
stop) sudo systemctl stop "$svc" ;;
|
||||
start) sudo systemctl start "$svc" ;;
|
||||
update)
|
||||
echo "Updating $1..."
|
||||
local dir="/root/wizards/$1/hermes-agent"
|
||||
if [[ -d "$dir/.git" ]]; then
|
||||
(cd "$dir" && git pull origin main)
|
||||
else
|
||||
echo "WARN: $dir is not a git repo. Skipping."
|
||||
fi
|
||||
;;
|
||||
health)
|
||||
if systemctl is-active "$svc" > /dev/null; then
|
||||
echo "$svc: ACTIVE"
|
||||
# Quick API ping if gateway is expected
|
||||
local port_file="/root/wizards/$1/home/.hermes/gateway_port"
|
||||
if [[ -f "$port_file" ]]; then
|
||||
local port
|
||||
port=$(cat "$port_file" 2>/dev/null || echo "")
|
||||
if [[ -n "$port" ]]; then
|
||||
curl -sf "http://localhost:$port/health" > /dev/null && echo " Gateway: OK" || echo " Gateway: NO RESPONSE"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
echo "$svc: INACTIVE"
|
||||
fi
|
||||
;;
|
||||
*) usage ;;
|
||||
esac
|
||||
}
|
||||
|
||||
run_remote() {
|
||||
if ssh -o ConnectTimeout=3 -o BatchMode=yes "root@$REMOTE_HOST" "hostname" > /dev/null 2>&1; then
|
||||
echo "=== Remote: $REMOTE_HOST ==="
|
||||
ssh -o ConnectTimeout=3 -o BatchMode=yes "root@$REMOTE_HOST" "cd /root/wizards/scripts && ./fleet.sh $cmd $target" 2>/dev/null || echo "Remote fleet.sh failed or not found."
|
||||
else
|
||||
echo "=== Remote: $REMOTE_HOST (SSH not configured) ==="
|
||||
echo "To enable remote fleet management, add an SSH key for root@$REMOTE_HOST"
|
||||
echo "and ensure fleet.sh is present at /root/wizards/scripts/fleet.sh on both hosts."
|
||||
fi
|
||||
}
|
||||
|
||||
case "$target" in
|
||||
all)
|
||||
echo "=== Local host: $THIS_HOST ($ROLE) ==="
|
||||
for w in "${LOCAL_WIZARDS[@]}"; do
|
||||
echo "--- $w ---"
|
||||
run_local "$w"
|
||||
done
|
||||
;;
|
||||
remote)
|
||||
run_remote
|
||||
;;
|
||||
allegro|adagio|ezra|bezalel|bilbobagginshire)
|
||||
if [[ " ${LOCAL_WIZARDS[*]} " =~ " $target " ]]; then
|
||||
run_local "$target"
|
||||
else
|
||||
echo "$target does not run on this host ($ROLE). Trying remote..."
|
||||
run_remote
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
echo "Unknown target: $target"
|
||||
usage
|
||||
;;
|
||||
esac
|
||||
Reference in New Issue
Block a user