Files
timmy-config/wizards/allegro/monitoring/wizard-health.sh
2026-03-31 20:02:01 +00:00

195 lines
6.3 KiB
Bash
Executable File

#!/bin/bash
# Multi-Wizard Health Monitor
# Monitors all wizards on Hermes VPS
LOG_FILE=/root/wizards/allegro/logs/wizard-health.log
REPORT_FILE=/root/wizards/allegro/monitoring/status.html
ALERT_FILE=/root/wizards/allegro/father-messages/wizard-alert.txt
mkdir -p $(dirname $LOG_FILE)
mkdir -p $(dirname $REPORT_FILE)
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S UTC')
echo "[$TIMESTAMP] Health check starting..." >> $LOG_FILE
# Initialize status variables
AP_GATEWAY_STATUS="DOWN"
EZRA_GATEWAY_STATUS="DOWN"
OLLAMA_STATUS="DOWN"
GITEA_STATUS="DOWN"
SEARXNG_STATUS="DOWN"
# Check Allegro-Primus Gateway (port 8644)
if curl -s http://127.0.0.1:8644/health > /dev/null 2>&1; then
AP_GATEWAY_STATUS="UP"
AP_PID=$(pgrep -f "allegro-primus.*gateway" | head -1)
else
echo "[$TIMESTAMP] ALERT: Allegro-Primus gateway DOWN" >> $LOG_FILE
fi
# Check Ezra Gateway (port 8643)
if curl -s http://127.0.0.1:8643/health > /dev/null 2>&1; then
EZRA_GATEWAY_STATUS="UP"
EZRA_PID=$(pgrep -f "ezra.*gateway" | head -1)
else
echo "[$TIMESTAMP] ALERT: Ezra gateway DOWN" >> $LOG_FILE
fi
# Check Ollama (port 11434)
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
OLLAMA_STATUS="UP"
OLLAMA_PID=$(pgrep -f "ollama serve" | head -1)
else
echo "[$TIMESTAMP] ALERT: Ollama DOWN" >> $LOG_FILE
fi
# Check Gitea (port 3000)
if curl -s http://localhost:3000/api/v1/version > /dev/null 2>&1; then
GITEA_STATUS="UP"
GITEA_PID=$(pgrep -f "gitea" | head -1)
else
echo "[$TIMESTAMP] ALERT: Gitea DOWN" >> $LOG_FILE
fi
# Check SearXNG (port 8080)
if curl -s http://localhost:8080 > /dev/null 2>&1; then
SEARXNG_STATUS="UP"
else
echo "[$TIMESTAMP] ALERT: SearXNG DOWN" >> $LOG_FILE
fi
# Count cron jobs
AP_CRON=$(crontab -l 2>/dev/null | grep -c "allegro-primus" || echo 0)
ALLEGRO_CRON=$(crontab -l 2>/dev/null | grep -c "wizards/allegro" || echo 0)
# Generate HTML Dashboard
cat > $REPORT_FILE << HTML
<!DOCTYPE html>
<html>
<head>
<title>Hermes VPS - Wizard Health Dashboard</title>
<meta http-equiv="refresh" content="60">
<style>
body { font-family: monospace; background: #1a1a1a; color: #eee; padding: 20px; }
h1 { color: #00ff88; border-bottom: 2px solid #00ff88; }
h2 { color: #66ccff; margin-top: 30px; }
.status { display: inline-block; padding: 5px 15px; border-radius: 3px; font-weight: bold; }
.up { background: #2d5016; color: #7fff00; }
.down { background: #501616; color: #ff4444; }
table { border-collapse: collapse; width: 100%; margin: 20px 0; }
th, td { padding: 12px; text-align: left; border-bottom: 1px solid #333; }
th { background: #2a2a2a; color: #66ccff; }
tr:hover { background: #2a2a2a; }
.metric { display: inline-block; margin: 10px 20px 10px 0; padding: 10px 20px;
background: #2a2a2a; border-left: 4px solid #66ccff; }
.timestamp { color: #888; font-size: 0.9em; }
.alert { background: #501616; padding: 10px; margin: 10px 0; border-left: 4px solid #ff4444; }
</style>
</head>
<body>
<h1>🏰 Hermes VPS - Wizard Health Dashboard</h1>
<p class="timestamp">Last updated: $TIMESTAMP</p>
<h2>🧙 Wizard Status</h2>
<table>
<tr>
<th>Wizard</th>
<th>Port</th>
<th>Status</th>
<th>Purpose</th>
</tr>
<tr>
<td>Allegro-Primus (AP)</td>
<td>8644</td>
<td><span class="status ${AP_GATEWAY_STATUS,,}">$AP_GATEWAY_STATUS</span></td>
<td>Child wizard, Ollama-local, autonomous</td>
</tr>
<tr>
<td>Ezra</td>
<td>8643</td>
<td><span class="status ${EZRA_GATEWAY_STATUS,,}">$EZRA_GATEWAY_STATUS</span></td>
<td>Scribe, archivist, Claude-Hermes</td>
</tr>
</table>
<h2>⚙️ Infrastructure Services</h2>
<table>
<tr>
<th>Service</th>
<th>Port</th>
<th>Status</th>
<th>Purpose</th>
</tr>
<tr>
<td>Ollama</td>
<td>11434</td>
<td><span class="status ${OLLAMA_STATUS,,}">$OLLAMA_STATUS</span></td>
<td>Local LLM inference engine</td>
</tr>
<tr>
<td>Gitea</td>
<td>3000</td>
<td><span class="status ${GITEA_STATUS,,}">$GITEA_STATUS</span></td>
<td>Git hosting, issue tracking</td>
</tr>
<tr>
<td>SearXNG</td>
<td>8080</td>
<td><span class="status ${SEARXNG_STATUS,,}">$SEARXNG_STATUS</span></td>
<td>Private search engine</td>
</tr>
</table>
<h2>📊 Metrics</h2>
<div class="metric">
<strong>Active Wizards:</strong>
$([ "$AP_GATEWAY_STATUS" = "UP" ] && echo 1 || echo 0) / 2
</div>
<div class="metric">
<strong>Services Healthy:</strong>
$([ "$OLLAMA_STATUS" = "UP" ] && echo 1 || echo 0)\/
$([ "$GITEA_STATUS" = "UP" ] && echo 1 || echo 0)\/
$([ "$SEARXNG_STATUS" = "UP" ] && echo 1 || echo 0) / 3
</div>
<div class="metric">
<strong>AP Cron Jobs:</strong> $AP_CRON
</div>
<div class="metric">
<strong>Allegro Cron Jobs:</strong> $ALLEGRO_CRON
</div>
<h2>🔄 Auto-Refresh</h2>
<p>This page refreshes every 60 seconds.</p>
<p><a href="file:///root/wizards/allegro/monitoring/status.html"
style="color: #66ccff;">View local file</a></p>
</body>
</html>
HTML
echo "[$TIMESTAMP] Dashboard updated: $REPORT_FILE" >> $LOG_FILE
# Alert if critical services down
if [ "$OLLAMA_STATUS" = "DOWN" ] || [ "$GITEA_STATUS" = "DOWN" ]; then
cat > $ALERT_FILE << ALERT
CRITICAL ALERT - $(date)
The following critical services are DOWN:
$([ "$OLLAMA_STATUS" = "DOWN" ] && echo "- Ollama (port 11434) - LLM engine")
$([ "$GITEA_STATUS" = "DOWN" ] && echo "- Gitea (port 3000) - Git hosting")
Check logs:
- /root/wizards/allegro/logs/wizard-health.log
- /var/log/syslog
Auto-generated by wizard-health.sh
ALERT
echo "[$TIMESTAMP] ALERT written to $ALERT_FILE" >> $LOG_FILE
fi
echo "[$TIMESTAMP] Health check complete" >> $LOG_FILE
# Summary output
SUMMARY="AP:$AP_GATEWAY_STATUS Ezra:$EZRA_GATEWAY_STATUS Ollama:$OLLAMA_STATUS Gitea:$GITEA_STATUS SearXNG:$SEARXNG_STATUS"
echo $SUMMARY