Closes #126: bin/start-loops.sh -- health check + kill stale + launch all loops Closes #129: bin/gitea-api.sh -- Python urllib wrapper bypassing security scanner Closes #130: bin/fleet-status.sh -- one-liner health per wizard with color output All syntax-checked with bash -n.
269 lines
8.7 KiB
Bash
Executable File
269 lines
8.7 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# ── fleet-status.sh ───────────────────────────────────────────────────
|
|
# One-line-per-wizard health check for all Hermes houses.
|
|
# Exit 0 = all healthy, Exit 1 = something down.
|
|
# Usage: fleet-status.sh [--no-color] [--json]
|
|
# ───────────────────────────────────────────────────────────────────────
|
|
set -o pipefail
|
|
|
|
# ── Options ──
|
|
NO_COLOR=false
|
|
JSON_OUT=false
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--no-color) NO_COLOR=true ;;
|
|
--json) JSON_OUT=true ;;
|
|
esac
|
|
done
|
|
|
|
# ── Colors ──
|
|
if [ "$NO_COLOR" = true ] || [ ! -t 1 ]; then
|
|
G="" ; Y="" ; RD="" ; C="" ; M="" ; B="" ; D="" ; R=""
|
|
else
|
|
G='\033[32m' ; Y='\033[33m' ; RD='\033[31m' ; C='\033[36m'
|
|
M='\033[35m' ; B='\033[1m' ; D='\033[2m' ; R='\033[0m'
|
|
fi
|
|
|
|
# ── Config ──
|
|
GITEA_TOKEN=$(cat ~/.hermes/gitea_token_vps 2>/dev/null)
|
|
GITEA_API="http://143.198.27.163:3000/api/v1"
|
|
EZRA_HOST="root@143.198.27.163"
|
|
BEZALEL_HOST="root@67.205.155.108"
|
|
SSH_OPTS="-o ConnectTimeout=4 -o StrictHostKeyChecking=no -o BatchMode=yes"
|
|
|
|
ANY_DOWN=0
|
|
|
|
# ── Helpers ──
|
|
now_epoch() { date +%s; }
|
|
|
|
time_ago() {
|
|
local iso="$1"
|
|
[ -z "$iso" ] && echo "unknown" && return
|
|
local ts
|
|
ts=$(python3 -c "
|
|
from datetime import datetime, timezone
|
|
import sys
|
|
t = '$iso'.replace('Z','+00:00')
|
|
try:
|
|
dt = datetime.fromisoformat(t)
|
|
print(int(dt.timestamp()))
|
|
except:
|
|
print(0)
|
|
" 2>/dev/null)
|
|
[ -z "$ts" ] || [ "$ts" = "0" ] && echo "unknown" && return
|
|
local now
|
|
now=$(now_epoch)
|
|
local diff=$(( now - ts ))
|
|
if [ "$diff" -lt 60 ]; then
|
|
echo "${diff}s ago"
|
|
elif [ "$diff" -lt 3600 ]; then
|
|
echo "$(( diff / 60 ))m ago"
|
|
elif [ "$diff" -lt 86400 ]; then
|
|
echo "$(( diff / 3600 ))h $(( (diff % 3600) / 60 ))m ago"
|
|
else
|
|
echo "$(( diff / 86400 ))d ago"
|
|
fi
|
|
}
|
|
|
|
gitea_last_commit() {
|
|
local repo="$1"
|
|
local result
|
|
result=$(curl -sf --max-time 5 \
|
|
"${GITEA_API}/repos/${repo}/commits?limit=1" \
|
|
-H "Authorization: token ${GITEA_TOKEN}" 2>/dev/null)
|
|
[ -z "$result" ] && echo "" && return
|
|
python3 -c "
|
|
import json, sys
|
|
commits = json.loads('''${result}''')
|
|
if commits and len(commits) > 0:
|
|
ts = commits[0].get('created','')
|
|
msg = commits[0]['commit']['message'].split('\n')[0][:40]
|
|
print(ts + '|' + msg)
|
|
else:
|
|
print('')
|
|
" 2>/dev/null
|
|
}
|
|
|
|
print_line() {
|
|
local name="$1" status="$2" model="$3" activity="$4"
|
|
if [ "$status" = "UP" ]; then
|
|
printf " ${G}●${R} %-12s ${G}%-4s${R} %-18s ${D}%s${R}\n" "$name" "$status" "$model" "$activity"
|
|
elif [ "$status" = "WARN" ]; then
|
|
printf " ${Y}●${R} %-12s ${Y}%-4s${R} %-18s ${D}%s${R}\n" "$name" "$status" "$model" "$activity"
|
|
else
|
|
printf " ${RD}●${R} %-12s ${RD}%-4s${R} %-18s ${D}%s${R}\n" "$name" "$status" "$model" "$activity"
|
|
ANY_DOWN=1
|
|
fi
|
|
}
|
|
|
|
# ── Header ──
|
|
echo ""
|
|
echo -e " ${B}${M}⚡ FLEET STATUS${R} ${D}$(date '+%Y-%m-%d %H:%M:%S')${R}"
|
|
echo -e " ${D}──────────────────────────────────────────────────────────────${R}"
|
|
printf " %-14s %-6s %-18s %s\n" "WIZARD" "STATE" "MODEL/SERVICE" "LAST ACTIVITY"
|
|
echo -e " ${D}──────────────────────────────────────────────────────────────${R}"
|
|
|
|
# ── 1. Timmy (local gateway + loops) ──
|
|
TIMMY_STATUS="DOWN"
|
|
TIMMY_MODEL=""
|
|
TIMMY_ACTIVITY=""
|
|
|
|
# Check gateway process
|
|
GW_PID=$(pgrep -f "hermes.*gateway.*run" 2>/dev/null | head -1)
|
|
if [ -z "$GW_PID" ]; then
|
|
GW_PID=$(pgrep -f "gateway run" 2>/dev/null | head -1)
|
|
fi
|
|
|
|
# Check local loops
|
|
CLAUDE_LOOPS=$(pgrep -cf "claude-loop" 2>/dev/null || echo 0)
|
|
GEMINI_LOOPS=$(pgrep -cf "gemini-loop" 2>/dev/null || echo 0)
|
|
|
|
if [ -n "$GW_PID" ]; then
|
|
TIMMY_STATUS="UP"
|
|
TIMMY_MODEL="gateway(pid:${GW_PID})"
|
|
else
|
|
TIMMY_STATUS="DOWN"
|
|
TIMMY_MODEL="gateway:missing"
|
|
fi
|
|
|
|
# Check local health endpoint
|
|
TIMMY_HEALTH=$(curl -sf --max-time 3 "http://localhost:8000/health" 2>/dev/null)
|
|
if [ -n "$TIMMY_HEALTH" ]; then
|
|
HEALTH_STATUS=$(python3 -c "import json; print(json.loads('''${TIMMY_HEALTH}''').get('status','?'))" 2>/dev/null)
|
|
if [ "$HEALTH_STATUS" = "healthy" ] || [ "$HEALTH_STATUS" = "ok" ]; then
|
|
TIMMY_STATUS="UP"
|
|
fi
|
|
fi
|
|
|
|
TIMMY_ACTIVITY="loops: claude=${CLAUDE_LOOPS} gemini=${GEMINI_LOOPS}"
|
|
|
|
# Git activity for timmy-config
|
|
TC_COMMIT=$(gitea_last_commit "Timmy_Foundation/timmy-config")
|
|
if [ -n "$TC_COMMIT" ]; then
|
|
TC_TIME=$(echo "$TC_COMMIT" | cut -d'|' -f1)
|
|
TC_MSG=$(echo "$TC_COMMIT" | cut -d'|' -f2-)
|
|
TC_AGO=$(time_ago "$TC_TIME")
|
|
TIMMY_ACTIVITY="${TIMMY_ACTIVITY} | cfg:${TC_AGO}"
|
|
fi
|
|
|
|
if [ -z "$GW_PID" ] && [ "$CLAUDE_LOOPS" -eq 0 ] && [ "$GEMINI_LOOPS" -eq 0 ]; then
|
|
TIMMY_STATUS="DOWN"
|
|
elif [ -z "$GW_PID" ]; then
|
|
TIMMY_STATUS="WARN"
|
|
fi
|
|
|
|
print_line "Timmy" "$TIMMY_STATUS" "$TIMMY_MODEL" "$TIMMY_ACTIVITY"
|
|
|
|
# ── 2. Ezra (VPS 143.198.27.163) ──
|
|
EZRA_STATUS="DOWN"
|
|
EZRA_MODEL="hermes-ezra"
|
|
EZRA_ACTIVITY=""
|
|
|
|
EZRA_SVC=$(ssh $SSH_OPTS "$EZRA_HOST" "systemctl is-active hermes-ezra.service" 2>/dev/null)
|
|
if [ "$EZRA_SVC" = "active" ]; then
|
|
EZRA_STATUS="UP"
|
|
# Check health endpoint
|
|
EZRA_HEALTH=$(ssh $SSH_OPTS "$EZRA_HOST" "curl -sf --max-time 3 http://localhost:8080/health 2>/dev/null" 2>/dev/null)
|
|
if [ -n "$EZRA_HEALTH" ]; then
|
|
EZRA_MODEL="hermes-ezra(ok)"
|
|
else
|
|
# Try alternate port
|
|
EZRA_HEALTH=$(ssh $SSH_OPTS "$EZRA_HOST" "curl -sf --max-time 3 http://localhost:8000/health 2>/dev/null" 2>/dev/null)
|
|
if [ -n "$EZRA_HEALTH" ]; then
|
|
EZRA_MODEL="hermes-ezra(ok)"
|
|
else
|
|
EZRA_STATUS="WARN"
|
|
EZRA_MODEL="hermes-ezra(svc:up,http:?)"
|
|
fi
|
|
fi
|
|
# Check uptime
|
|
EZRA_UP=$(ssh $SSH_OPTS "$EZRA_HOST" "systemctl show hermes-ezra.service --property=ActiveEnterTimestamp --value" 2>/dev/null)
|
|
[ -n "$EZRA_UP" ] && EZRA_ACTIVITY="since ${EZRA_UP}"
|
|
else
|
|
EZRA_STATUS="DOWN"
|
|
EZRA_MODEL="hermes-ezra(svc:${EZRA_SVC:-unreachable})"
|
|
fi
|
|
|
|
print_line "Ezra" "$EZRA_STATUS" "$EZRA_MODEL" "$EZRA_ACTIVITY"
|
|
|
|
# ── 3. Bezalel (VPS 67.205.155.108) ──
|
|
BEZ_STATUS="DOWN"
|
|
BEZ_MODEL="hermes-bezalel"
|
|
BEZ_ACTIVITY=""
|
|
|
|
BEZ_SVC=$(ssh $SSH_OPTS "$BEZALEL_HOST" "systemctl is-active hermes-bezalel.service" 2>/dev/null)
|
|
if [ "$BEZ_SVC" = "active" ]; then
|
|
BEZ_STATUS="UP"
|
|
BEZ_HEALTH=$(ssh $SSH_OPTS "$BEZALEL_HOST" "curl -sf --max-time 3 http://localhost:8080/health 2>/dev/null" 2>/dev/null)
|
|
if [ -n "$BEZ_HEALTH" ]; then
|
|
BEZ_MODEL="hermes-bezalel(ok)"
|
|
else
|
|
BEZ_HEALTH=$(ssh $SSH_OPTS "$BEZALEL_HOST" "curl -sf --max-time 3 http://localhost:8000/health 2>/dev/null" 2>/dev/null)
|
|
if [ -n "$BEZ_HEALTH" ]; then
|
|
BEZ_MODEL="hermes-bezalel(ok)"
|
|
else
|
|
BEZ_STATUS="WARN"
|
|
BEZ_MODEL="hermes-bezalel(svc:up,http:?)"
|
|
fi
|
|
fi
|
|
BEZ_UP=$(ssh $SSH_OPTS "$BEZALEL_HOST" "systemctl show hermes-bezalel.service --property=ActiveEnterTimestamp --value" 2>/dev/null)
|
|
[ -n "$BEZ_UP" ] && BEZ_ACTIVITY="since ${BEZ_UP}"
|
|
else
|
|
BEZ_STATUS="DOWN"
|
|
BEZ_MODEL="hermes-bezalel(svc:${BEZ_SVC:-unreachable})"
|
|
fi
|
|
|
|
print_line "Bezalel" "$BEZ_STATUS" "$BEZ_MODEL" "$BEZ_ACTIVITY"
|
|
|
|
# ── 4. the-nexus last commit ──
|
|
NEXUS_STATUS="DOWN"
|
|
NEXUS_MODEL="the-nexus"
|
|
NEXUS_ACTIVITY=""
|
|
|
|
NX_COMMIT=$(gitea_last_commit "Timmy_Foundation/the-nexus")
|
|
if [ -n "$NX_COMMIT" ]; then
|
|
NEXUS_STATUS="UP"
|
|
NX_TIME=$(echo "$NX_COMMIT" | cut -d'|' -f1)
|
|
NX_MSG=$(echo "$NX_COMMIT" | cut -d'|' -f2-)
|
|
NX_AGO=$(time_ago "$NX_TIME")
|
|
NEXUS_MODEL="nexus-repo"
|
|
NEXUS_ACTIVITY="${NX_AGO}: ${NX_MSG}"
|
|
else
|
|
NEXUS_STATUS="WARN"
|
|
NEXUS_MODEL="nexus-repo"
|
|
NEXUS_ACTIVITY="(could not fetch)"
|
|
fi
|
|
|
|
print_line "Nexus" "$NEXUS_STATUS" "$NEXUS_MODEL" "$NEXUS_ACTIVITY"
|
|
|
|
# ── 5. Gitea server itself ──
|
|
GITEA_STATUS="DOWN"
|
|
GITEA_MODEL="gitea"
|
|
GITEA_ACTIVITY=""
|
|
|
|
GITEA_VER=$(curl -sf --max-time 5 "${GITEA_API}/version" 2>/dev/null)
|
|
if [ -n "$GITEA_VER" ]; then
|
|
GITEA_STATUS="UP"
|
|
VER=$(python3 -c "import json; print(json.loads('''${GITEA_VER}''').get('version','?'))" 2>/dev/null)
|
|
GITEA_MODEL="gitea v${VER}"
|
|
GITEA_ACTIVITY="143.198.27.163:3000"
|
|
else
|
|
GITEA_STATUS="DOWN"
|
|
GITEA_MODEL="gitea(unreachable)"
|
|
fi
|
|
|
|
print_line "Gitea" "$GITEA_STATUS" "$GITEA_MODEL" "$GITEA_ACTIVITY"
|
|
|
|
# ── Footer ──
|
|
echo -e " ${D}──────────────────────────────────────────────────────────────${R}"
|
|
|
|
if [ "$ANY_DOWN" -eq 0 ]; then
|
|
echo -e " ${G}${B}All systems operational${R}"
|
|
echo ""
|
|
exit 0
|
|
else
|
|
echo -e " ${RD}${B}⚠ One or more systems DOWN${R}"
|
|
echo ""
|
|
exit 1
|
|
fi
|