Files
the-nexus/scripts/fleet.sh
Timmy Time 68ee170bbb
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
feat(ops): add cross-VPS fleet management script
- Local service control for alpha (4 agents) and beta (bezalel) hosts
- Status, restart, stop, start, update, and health commands
- Remote proxy via SSH with graceful fallback if keys not configured

Closes #910
2026-04-07 02:48:05 +00:00

115 lines
3.7 KiB
Bash

#!/usr/bin/env bash
# fleet.sh — Cross-VPS Fleet Management
# Usage: ./fleet.sh {status|restart|stop|start|update|health} [wizard|all]
set -euo pipefail
ALPHA_HOST="167.99.126.228"
BETA_HOST="104.131.15.18"
THIS_HOST=$(hostname -I | awk '{print $1}')
WIZARDS=(allegro adagio ezra bezalel bilbobagginshire)
# Detect which host we're on
if [[ "$THIS_HOST" == "$BETA_HOST" ]]; then
ROLE="beta"
LOCAL_WIZARDS=(bezalel)
REMOTE_HOST="$ALPHA_HOST"
elif [[ "$THIS_HOST" == "$ALPHA_HOST" ]]; then
ROLE="alpha"
LOCAL_WIZARDS=(allegro adagio ezra bilbobagginshire)
REMOTE_HOST="$BETA_HOST"
else
ROLE="unknown"
LOCAL_WIZARDS=()
REMOTE_HOST="$ALPHA_HOST"
echo "WARN: Host IP ($THIS_HOST) does not match Alpha or Beta. Running in limited mode."
fi
usage() {
echo "Usage: $0 {status|restart|stop|start|update|health} [wizard|all]"
echo ""
echo "Examples:"
echo " $0 status all # Show all services on this host"
echo " $0 restart allegro # Restart allegro (only works on Alpha)"
echo " $0 health bezalel # Health check bezalel (only works on Beta)"
echo " $0 status remote # Attempt status on remote host via SSH"
exit 1
}
cmd="${1:-}"
target="${2:-}"
[[ -z "$cmd" || -z "$target" ]] && usage
run_local() {
local svc="hermes-$1"
case "$cmd" in
status) systemctl is-active "$svc" || true ;;
restart) sudo systemctl restart "$svc" ;;
stop) sudo systemctl stop "$svc" ;;
start) sudo systemctl start "$svc" ;;
update)
echo "Updating $1..."
local dir="/root/wizards/$1/hermes-agent"
if [[ -d "$dir/.git" ]]; then
(cd "$dir" && git pull origin main)
else
echo "WARN: $dir is not a git repo. Skipping."
fi
;;
health)
if systemctl is-active "$svc" > /dev/null; then
echo "$svc: ACTIVE"
# Quick API ping if gateway is expected
local port_file="/root/wizards/$1/home/.hermes/gateway_port"
if [[ -f "$port_file" ]]; then
local port
port=$(cat "$port_file" 2>/dev/null || echo "")
if [[ -n "$port" ]]; then
curl -sf "http://localhost:$port/health" > /dev/null && echo " Gateway: OK" || echo " Gateway: NO RESPONSE"
fi
fi
else
echo "$svc: INACTIVE"
fi
;;
*) usage ;;
esac
}
run_remote() {
if ssh -o ConnectTimeout=3 -o BatchMode=yes "root@$REMOTE_HOST" "hostname" > /dev/null 2>&1; then
echo "=== Remote: $REMOTE_HOST ==="
ssh -o ConnectTimeout=3 -o BatchMode=yes "root@$REMOTE_HOST" "cd /root/wizards/scripts && ./fleet.sh $cmd $target" 2>/dev/null || echo "Remote fleet.sh failed or not found."
else
echo "=== Remote: $REMOTE_HOST (SSH not configured) ==="
echo "To enable remote fleet management, add an SSH key for root@$REMOTE_HOST"
echo "and ensure fleet.sh is present at /root/wizards/scripts/fleet.sh on both hosts."
fi
}
case "$target" in
all)
echo "=== Local host: $THIS_HOST ($ROLE) ==="
for w in "${LOCAL_WIZARDS[@]}"; do
echo "--- $w ---"
run_local "$w"
done
;;
remote)
run_remote
;;
allegro|adagio|ezra|bezalel|bilbobagginshire)
if [[ " ${LOCAL_WIZARDS[*]} " =~ " $target " ]]; then
run_local "$target"
else
echo "$target does not run on this host ($ROLE). Trying remote..."
run_remote
fi
;;
*)
echo "Unknown target: $target"
usage
;;
esac