Files
timmy-home/tests/test_nexus_alert.sh
Timmy Bot 64a954f4d9 Enhance Kimi heartbeat with Nexus Watchdog alerting for stale lockfiles (#386)
- Add nexus_alert() function to send alerts to Nexus Watchdog
- Alerts are written as JSON files to $NEXUS_ALERT_DIR (default: /tmp/nexus-alerts)
- Alert includes: alert_id, timestamp, source, host, alert_type, severity, message, data
- Send 'stale_lock_reclaimed' warning alert when stale lock detected (age > 600s)
- Send 'heartbeat_resumed' info alert after successful recovery
- Include lock age, lockfile path, action taken, and stat info in alert data
- Add configurable NEXUS_ALERT_DIR and NEXUS_ALERT_ENABLED settings
- Add test script for validating alert functionality
2026-04-05 07:04:57 +00:00

147 lines
3.8 KiB
Bash
Executable File

#!/bin/bash
# Test script for Nexus Watchdog alerting functionality
set -euo pipefail
TEST_DIR="/tmp/test-nexus-alerts-$$"
export NEXUS_ALERT_DIR="$TEST_DIR"
export NEXUS_ALERT_ENABLED=true
echo "=== Nexus Watchdog Alert Test ==="
echo "Test alert directory: $TEST_DIR"
# Source the alert function from the heartbeat script
# Extract just the nexus_alert function for testing
cat > /tmp/test_alert_func.sh << 'ALEOF'
#!/bin/bash
NEXUS_ALERT_DIR="${NEXUS_ALERT_DIR:-/tmp/nexus-alerts}"
NEXUS_ALERT_ENABLED=true
HOSTNAME=$(hostname -s 2>/dev/null || echo "unknown")
SCRIPT_NAME="kimi-heartbeat-test"
nexus_alert() {
local alert_type="$1"
local message="$2"
local severity="${3:-info}"
local extra_data="${4:-{}}"
if [ "$NEXUS_ALERT_ENABLED" != "true" ]; then
return 0
fi
mkdir -p "$NEXUS_ALERT_DIR" 2>/dev/null || return 0
local timestamp
timestamp=$(date -u '+%Y-%m-%dT%H:%M:%SZ')
local nanoseconds=$(date +%N 2>/dev/null || echo "$$")
local alert_id="${SCRIPT_NAME}_$(date +%s)_${nanoseconds}_$$"
local alert_file="$NEXUS_ALERT_DIR/${alert_id}.json"
cat > "$alert_file" << EOF
{
"alert_id": "$alert_id",
"timestamp": "$timestamp",
"source": "$SCRIPT_NAME",
"host": "$HOSTNAME",
"alert_type": "$alert_type",
"severity": "$severity",
"message": "$message",
"data": $extra_data
}
EOF
if [ -f "$alert_file" ]; then
echo "NEXUS_ALERT: $alert_type [$severity] - $message"
return 0
else
echo "NEXUS_ALERT_FAILED: Could not write alert"
return 1
fi
}
ALEOF
source /tmp/test_alert_func.sh
# Test 1: Basic alert
echo -e "\n[TEST 1] Sending basic info alert..."
nexus_alert "test_alert" "Test message from heartbeat" "info" '{"test": true}'
# Test 2: Stale lock alert simulation
echo -e "\n[TEST 2] Sending stale lock alert..."
nexus_alert \
"stale_lock_reclaimed" \
"Stale lockfile deadlock cleared after 650s" \
"warning" \
'{"lock_age_seconds": 650, "lockfile": "/tmp/kimi-heartbeat.lock", "action": "removed"}'
# Test 3: Heartbeat resumed alert
echo -e "\n[TEST 3] Sending heartbeat resumed alert..."
nexus_alert \
"heartbeat_resumed" \
"Kimi heartbeat resumed after clearing stale lock" \
"info" \
'{"recovery": "successful", "continuing": true}'
# Check results
echo -e "\n=== Alert Files Created ==="
alert_count=$(find "$TEST_DIR" -name "*.json" 2>/dev/null | wc -l)
echo "Total alert files: $alert_count"
if [ "$alert_count" -eq 3 ]; then
echo "✅ All 3 alerts were created successfully"
else
echo "❌ Expected 3 alerts, found $alert_count"
exit 1
fi
echo -e "\n=== Alert Contents ==="
for f in "$TEST_DIR"/*.json; do
echo -e "\n--- $(basename "$f") ---"
cat "$f" | python3 -m json.tool 2>/dev/null || cat "$f"
done
# Validate JSON structure
echo -e "\n=== JSON Validation ==="
all_valid=true
for f in "$TEST_DIR"/*.json; do
if python3 -c "import json; json.load(open('$f'))" 2>/dev/null; then
echo "$(basename "$f") - Valid JSON"
else
echo "$(basename "$f") - Invalid JSON"
all_valid=false
fi
done
# Check for required fields
echo -e "\n=== Required Fields Check ==="
for f in "$TEST_DIR"/*.json; do
basename=$(basename "$f")
missing=()
python3 -c "import json; d=json.load(open('$f'))" 2>/dev/null || continue
for field in alert_id timestamp source host alert_type severity message data; do
if ! python3 -c "import json; d=json.load(open('$f')); exit(0 if '$field' in d else 1)" 2>/dev/null; then
missing+=("$field")
fi
done
if [ ${#missing[@]} -eq 0 ]; then
echo "$basename - All required fields present"
else
echo "$basename - Missing fields: ${missing[*]}"
all_valid=false
fi
done
# Cleanup
rm -rf "$TEST_DIR" /tmp/test_alert_func.sh
echo -e "\n=== Test Summary ==="
if [ "$all_valid" = true ]; then
echo "✅ All tests passed!"
exit 0
else
echo "❌ Some tests failed"
exit 1
fi