143 lines
3.5 KiB
Bash
143 lines
3.5 KiB
Bash
|
|
#!/bin/bash
|
||
|
|
# Conduit Matrix Homeserver Health Check
|
||
|
|
# Location: /opt/conduit/scripts/health.sh
|
||
|
|
# Reference: docs/matrix-fleet-comms/README.md
|
||
|
|
|
||
|
|
set -euo pipefail
|
||
|
|
|
||
|
|
HOMESERVER_URL="https://matrix.timmy.foundation"
|
||
|
|
ADMIN_EMAIL="admin@timmy.foundation"
|
||
|
|
|
||
|
|
# Colors for output
|
||
|
|
RED='\033[0;31m'
|
||
|
|
GREEN='\033[0;32m'
|
||
|
|
YELLOW='\033[1;33m'
|
||
|
|
NC='\033[0m' # No Color
|
||
|
|
|
||
|
|
log_info() {
|
||
|
|
echo -e "${GREEN}[INFO]${NC} $*"
|
||
|
|
}
|
||
|
|
|
||
|
|
log_warn() {
|
||
|
|
echo -e "${YELLOW}[WARN]${NC} $*"
|
||
|
|
}
|
||
|
|
|
||
|
|
log_error() {
|
||
|
|
echo -e "${RED}[ERROR]${NC} $*"
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check if Conduit process is running
|
||
|
|
check_process() {
|
||
|
|
if systemctl is-active --quiet conduit; then
|
||
|
|
log_info "Conduit service is running"
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
log_error "Conduit service is not running"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check Matrix client-server API
|
||
|
|
check_client_api() {
|
||
|
|
local response
|
||
|
|
response=$(curl -s -o /dev/null -w "%{http_code}" "$HOMESERVER_URL/_matrix/client/versions" 2>/dev/null || echo "000")
|
||
|
|
|
||
|
|
if [ "$response" = "200" ]; then
|
||
|
|
log_info "Client-server API is responding (HTTP 200)"
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
log_error "Client-server API returned HTTP $response"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check Matrix versions endpoint
|
||
|
|
check_versions() {
|
||
|
|
local versions
|
||
|
|
versions=$(curl -s "$HOMESERVER_URL/_matrix/client/versions" 2>/dev/null | jq -r '.versions | join(", ")' 2>/dev/null || echo "unknown")
|
||
|
|
|
||
|
|
if [ "$versions" != "unknown" ]; then
|
||
|
|
log_info "Supported Matrix versions: $versions"
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
log_warn "Could not determine Matrix versions"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check federation (self-test)
|
||
|
|
check_federation() {
|
||
|
|
local response
|
||
|
|
response=$(curl -s -o /dev/null -w "%{http_code}" "https://federationtester.matrix.org/api/report?server_name=timmy.foundation" 2>/dev/null || echo "000")
|
||
|
|
|
||
|
|
if [ "$response" = "200" ]; then
|
||
|
|
log_info "Federation tester can reach server"
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
log_warn "Federation tester returned HTTP $response (may be DNS propagation)"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check disk space
|
||
|
|
check_disk_space() {
|
||
|
|
local usage
|
||
|
|
usage=$(df /opt/conduit/data | tail -1 | awk '{print $5}' | sed 's/%//')
|
||
|
|
|
||
|
|
if [ "$usage" -lt 80 ]; then
|
||
|
|
log_info "Disk usage: ${usage}% (healthy)"
|
||
|
|
return 0
|
||
|
|
elif [ "$usage" -lt 90 ]; then
|
||
|
|
log_warn "Disk usage: ${usage}% (consider cleanup)"
|
||
|
|
return 1
|
||
|
|
else
|
||
|
|
log_error "Disk usage: ${usage}% (critical!)"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Check database size
|
||
|
|
check_database() {
|
||
|
|
local db_path="/opt/conduit/data/conduit.db"
|
||
|
|
|
||
|
|
if [ -f "$db_path" ]; then
|
||
|
|
local size
|
||
|
|
size=$(du -h "$db_path" | cut -f1)
|
||
|
|
log_info "Database size: $size"
|
||
|
|
return 0
|
||
|
|
else
|
||
|
|
log_warn "Database file not found at $db_path"
|
||
|
|
return 1
|
||
|
|
fi
|
||
|
|
}
|
||
|
|
|
||
|
|
# Main health check
|
||
|
|
main() {
|
||
|
|
echo "========================================"
|
||
|
|
echo "Conduit Matrix Homeserver Health Check"
|
||
|
|
echo "Server: $HOMESERVER_URL"
|
||
|
|
echo "Time: $(date)"
|
||
|
|
echo "========================================"
|
||
|
|
echo
|
||
|
|
|
||
|
|
local exit_code=0
|
||
|
|
|
||
|
|
check_process || exit_code=1
|
||
|
|
check_client_api || exit_code=1
|
||
|
|
check_versions || true # Non-critical
|
||
|
|
check_federation || true # Non-critical during initial setup
|
||
|
|
check_disk_space || exit_code=1
|
||
|
|
check_database || true # Non-critical
|
||
|
|
|
||
|
|
echo
|
||
|
|
if [ $exit_code -eq 0 ]; then
|
||
|
|
log_info "All critical checks passed ✓"
|
||
|
|
else
|
||
|
|
log_error "Some critical checks failed ✗"
|
||
|
|
fi
|
||
|
|
|
||
|
|
return $exit_code
|
||
|
|
}
|
||
|
|
|
||
|
|
main "$@"
|