Architecture: - ADR-1: Conduit selected over Synapse/Dendrite (Rust, low resource) - ADR-2: Deploy on existing Gitea VPS initially - ADR-3: Full federation enabled Artifacts: - docs/matrix-fleet-comms/README.md (architecture + runbooks) - deploy/conduit/conduit.toml (production config) - deploy/conduit/conduit.service (systemd) - deploy/conduit/Caddyfile (reverse proxy) - deploy/conduit/install.sh (one-command installer) - deploy/conduit/scripts/backup.sh (automated backups) - deploy/conduit/scripts/health.sh (health monitoring) Closes #183 (scaffold complete) Progresses #166 (implementation unblocked)
143 lines
3.5 KiB
Bash
143 lines
3.5 KiB
Bash
#!/bin/bash
|
|
# Conduit Matrix Homeserver Health Check
|
|
# Location: /opt/conduit/scripts/health.sh
|
|
# Reference: docs/matrix-fleet-comms/README.md
|
|
|
|
set -euo pipefail
|
|
|
|
HOMESERVER_URL="https://matrix.timmy.foundation"
|
|
ADMIN_EMAIL="admin@timmy.foundation"
|
|
|
|
# Colors for output
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
NC='\033[0m' # No Color
|
|
|
|
log_info() {
|
|
echo -e "${GREEN}[INFO]${NC} $*"
|
|
}
|
|
|
|
log_warn() {
|
|
echo -e "${YELLOW}[WARN]${NC} $*"
|
|
}
|
|
|
|
log_error() {
|
|
echo -e "${RED}[ERROR]${NC} $*"
|
|
}
|
|
|
|
# Check if Conduit process is running
|
|
check_process() {
|
|
if systemctl is-active --quiet conduit; then
|
|
log_info "Conduit service is running"
|
|
return 0
|
|
else
|
|
log_error "Conduit service is not running"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check Matrix client-server API
|
|
check_client_api() {
|
|
local response
|
|
response=$(curl -s -o /dev/null -w "%{http_code}" "$HOMESERVER_URL/_matrix/client/versions" 2>/dev/null || echo "000")
|
|
|
|
if [ "$response" = "200" ]; then
|
|
log_info "Client-server API is responding (HTTP 200)"
|
|
return 0
|
|
else
|
|
log_error "Client-server API returned HTTP $response"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check Matrix versions endpoint
|
|
check_versions() {
|
|
local versions
|
|
versions=$(curl -s "$HOMESERVER_URL/_matrix/client/versions" 2>/dev/null | jq -r '.versions | join(", ")' 2>/dev/null || echo "unknown")
|
|
|
|
if [ "$versions" != "unknown" ]; then
|
|
log_info "Supported Matrix versions: $versions"
|
|
return 0
|
|
else
|
|
log_warn "Could not determine Matrix versions"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check federation (self-test)
|
|
check_federation() {
|
|
local response
|
|
response=$(curl -s -o /dev/null -w "%{http_code}" "https://federationtester.matrix.org/api/report?server_name=timmy.foundation" 2>/dev/null || echo "000")
|
|
|
|
if [ "$response" = "200" ]; then
|
|
log_info "Federation tester can reach server"
|
|
return 0
|
|
else
|
|
log_warn "Federation tester returned HTTP $response (may be DNS propagation)"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check disk space
|
|
check_disk_space() {
|
|
local usage
|
|
usage=$(df /opt/conduit/data | tail -1 | awk '{print $5}' | sed 's/%//')
|
|
|
|
if [ "$usage" -lt 80 ]; then
|
|
log_info "Disk usage: ${usage}% (healthy)"
|
|
return 0
|
|
elif [ "$usage" -lt 90 ]; then
|
|
log_warn "Disk usage: ${usage}% (consider cleanup)"
|
|
return 1
|
|
else
|
|
log_error "Disk usage: ${usage}% (critical!)"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Check database size
|
|
check_database() {
|
|
local db_path="/opt/conduit/data/conduit.db"
|
|
|
|
if [ -f "$db_path" ]; then
|
|
local size
|
|
size=$(du -h "$db_path" | cut -f1)
|
|
log_info "Database size: $size"
|
|
return 0
|
|
else
|
|
log_warn "Database file not found at $db_path"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Main health check
|
|
main() {
|
|
echo "========================================"
|
|
echo "Conduit Matrix Homeserver Health Check"
|
|
echo "Server: $HOMESERVER_URL"
|
|
echo "Time: $(date)"
|
|
echo "========================================"
|
|
echo
|
|
|
|
local exit_code=0
|
|
|
|
check_process || exit_code=1
|
|
check_client_api || exit_code=1
|
|
check_versions || true # Non-critical
|
|
check_federation || true # Non-critical during initial setup
|
|
check_disk_space || exit_code=1
|
|
check_database || true # Non-critical
|
|
|
|
echo
|
|
if [ $exit_code -eq 0 ]; then
|
|
log_info "All critical checks passed ✓"
|
|
else
|
|
log_error "Some critical checks failed ✗"
|
|
fi
|
|
|
|
return $exit_code
|
|
}
|
|
|
|
main "$@"
|