41 lines
1.6 KiB
Python
41 lines
1.6 KiB
Python
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
|
||
|
|
HEALTH_PROBE = Path("scripts/fleet_health_probe.sh")
|
||
|
|
AUTO_RESTART = Path("scripts/auto_restart_agent.sh")
|
||
|
|
BACKUP_PIPELINE = Path("scripts/backup_pipeline.sh")
|
||
|
|
HEALTH_SERVICE = Path("configs/timmy-health.service")
|
||
|
|
TASK_ROUTER_SERVICE = Path("configs/timmy-task-router.service")
|
||
|
|
AGENT_SERVICE = Path("configs/timmy-agent.service")
|
||
|
|
|
||
|
|
|
||
|
|
def test_health_probe_has_thresholds_and_heartbeat() -> None:
|
||
|
|
content = HEALTH_PROBE.read_text()
|
||
|
|
assert "DISK_THRESHOLD=90" in content
|
||
|
|
assert "MEM_THRESHOLD=90" in content
|
||
|
|
assert 'touch "${HEARTBEAT_DIR}/fleet_health.last"' in content
|
||
|
|
assert 'CRITICAL_PROCESSES="${CRITICAL_PROCESSES:-act_runner}"' in content
|
||
|
|
|
||
|
|
|
||
|
|
def test_auto_restart_agent_has_retry_cap_and_escalation() -> None:
|
||
|
|
content = AUTO_RESTART.read_text()
|
||
|
|
assert 'count=$((count + 1))' in content
|
||
|
|
assert '[[ "$count" -le 3 ]]' in content
|
||
|
|
assert 'ESCALATION: $proc_name still dead after 3 restart attempts.' in content
|
||
|
|
assert 'touch "${STATE_DIR}/auto_restart.last"' in content
|
||
|
|
|
||
|
|
|
||
|
|
def test_backup_pipeline_has_offsite_sync_and_retention() -> None:
|
||
|
|
content = BACKUP_PIPELINE.read_text()
|
||
|
|
assert 'OFFSITE_TARGET="${OFFSITE_TARGET:-}"' in content
|
||
|
|
assert 'rsync -az --delete' in content
|
||
|
|
assert 'find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -mtime +7 -exec rm -rf {} +' in content
|
||
|
|
assert 'send_telegram "✅ Daily backup completed: ${DATESTAMP}"' in content
|
||
|
|
|
||
|
|
|
||
|
|
def test_self_healing_services_restart_automatically() -> None:
|
||
|
|
for path in [HEALTH_SERVICE, TASK_ROUTER_SERVICE, AGENT_SERVICE]:
|
||
|
|
content = path.read_text()
|
||
|
|
assert "Restart=always" in content
|
||
|
|
assert "RestartSec=" in content
|