[kimi] Implement graceful shutdown and health checks (#1397) (#1457)
Some checks failed
Tests / lint (push) Failing after 7s
Tests / test (push) Has been skipped

This commit was merged in pull request #1457.
This commit is contained in:
2026-03-24 19:31:14 +00:00
parent 640d78742a
commit 8518db921e
3 changed files with 641 additions and 11 deletions

View File

@@ -15,13 +15,19 @@ import pytest
from dashboard.routes.health import (
DependencyStatus,
DetailedHealthStatus,
HealthStatus,
LivenessStatus,
ReadinessStatus,
SovereigntyReport,
_calculate_overall_score,
_check_lightning,
_check_ollama_sync,
_check_sqlite,
_generate_recommendations,
get_shutdown_info,
is_shutting_down,
request_shutdown,
)
# ---------------------------------------------------------------------------
@@ -497,3 +503,283 @@ class TestSnapshotEndpoint:
data = client.get("/health/snapshot").json()
assert data["overall_status"] == "unknown"
# -----------------------------------------------------------------------------
# Shutdown State Tests
# -----------------------------------------------------------------------------
class TestShutdownState:
"""Tests for shutdown state tracking."""
@pytest.fixture(autouse=True)
def _reset_shutdown_state(self):
"""Reset shutdown state before each test."""
import dashboard.routes.health as mod
mod._shutdown_requested = False
mod._shutdown_reason = None
mod._shutdown_start_time = None
yield
mod._shutdown_requested = False
mod._shutdown_reason = None
mod._shutdown_start_time = None
def test_is_shutting_down_initial(self):
assert is_shutting_down() is False
def test_request_shutdown_sets_state(self):
request_shutdown(reason="test")
assert is_shutting_down() is True
def test_get_shutdown_info_when_not_shutting_down(self):
info = get_shutdown_info()
assert info is None
def test_get_shutdown_info_when_shutting_down(self):
request_shutdown(reason="test_reason")
info = get_shutdown_info()
assert info is not None
assert info["requested"] is True
assert info["reason"] == "test_reason"
assert "elapsed_seconds" in info
assert "timeout_seconds" in info
# -----------------------------------------------------------------------------
# Detailed Health Endpoint Tests
# -----------------------------------------------------------------------------
class TestDetailedHealthEndpoint:
"""Tests for GET /health/detailed."""
def test_returns_200_when_healthy(self, client):
with patch(
"dashboard.routes.health._check_ollama_sync",
return_value=DependencyStatus(
name="Ollama AI", status="healthy", sovereignty_score=10, details={}
),
):
response = client.get("/health/detailed")
assert response.status_code == 200
data = response.json()
assert data["status"] in ["healthy", "degraded", "unhealthy"]
assert "timestamp" in data
assert "version" in data
assert "uptime_seconds" in data
assert "services" in data
assert "system" in data
def test_returns_503_when_service_unhealthy(self, client):
with patch(
"dashboard.routes.health._check_ollama_sync",
return_value=DependencyStatus(
name="Ollama AI",
status="unavailable",
sovereignty_score=10,
details={"error": "down"},
),
):
response = client.get("/health/detailed")
assert response.status_code == 503
data = response.json()
assert data["status"] == "unhealthy"
def test_includes_shutdown_info_when_shutting_down(self, client):
with patch(
"dashboard.routes.health._check_ollama_sync",
return_value=DependencyStatus(
name="Ollama AI", status="healthy", sovereignty_score=10, details={}
),
):
with patch("dashboard.routes.health.is_shutting_down", return_value=True):
with patch(
"dashboard.routes.health.get_shutdown_info",
return_value={
"requested": True,
"reason": "test",
"elapsed_seconds": 1.5,
"timeout_seconds": 30.0,
},
):
response = client.get("/health/detailed")
assert response.status_code == 503
data = response.json()
assert "shutdown" in data
assert data["shutdown"]["requested"] is True
def test_services_structure(self, client):
with patch(
"dashboard.routes.health._check_ollama_sync",
return_value=DependencyStatus(
name="Ollama AI", status="healthy", sovereignty_score=10, details={"model": "test"}
),
):
response = client.get("/health/detailed")
data = response.json()
assert "services" in data
assert "ollama" in data["services"]
assert "sqlite" in data["services"]
# Each service should have status, healthy flag, and details
for _svc_name, svc_data in data["services"].items():
assert "status" in svc_data
assert "healthy" in svc_data
assert isinstance(svc_data["healthy"], bool)
# -----------------------------------------------------------------------------
# Readiness Probe Tests
# -----------------------------------------------------------------------------
class TestReadinessProbe:
"""Tests for GET /ready."""
def test_returns_200_when_ready(self, client):
# Wait for startup to complete
response = client.get("/ready")
data = response.json()
# Should return either 200 (ready) or 503 (not ready)
assert response.status_code in [200, 503]
assert "ready" in data
assert isinstance(data["ready"], bool)
assert "timestamp" in data
assert "checks" in data
def test_checks_structure(self, client):
response = client.get("/ready")
data = response.json()
assert "checks" in data
checks = data["checks"]
# Core checks that should be present
assert "startup_complete" in checks
assert "database" in checks
assert "not_shutting_down" in checks
def test_not_ready_during_shutdown(self, client):
with patch("dashboard.routes.health.is_shutting_down", return_value=True):
with patch(
"dashboard.routes.health._shutdown_reason",
"test shutdown",
):
response = client.get("/ready")
assert response.status_code == 503
data = response.json()
assert data["ready"] is False
assert data["checks"]["not_shutting_down"] is False
assert "reason" in data
# -----------------------------------------------------------------------------
# Liveness Probe Tests
# -----------------------------------------------------------------------------
class TestLivenessProbe:
"""Tests for GET /live."""
def test_returns_200_when_alive(self, client):
response = client.get("/live")
assert response.status_code == 200
data = response.json()
assert data["alive"] is True
assert "timestamp" in data
assert "uptime_seconds" in data
assert "shutdown_requested" in data
def test_shutdown_requested_field(self, client):
with patch("dashboard.routes.health.is_shutting_down", return_value=False):
response = client.get("/live")
data = response.json()
assert data["shutdown_requested"] is False
def test_alive_false_after_shutdown_timeout(self, client):
import dashboard.routes.health as mod
with patch.object(mod, "_shutdown_requested", True):
with patch.object(mod, "_shutdown_start_time", time.monotonic() - 999):
with patch.object(mod, "GRACEFUL_SHUTDOWN_TIMEOUT", 30.0):
response = client.get("/live")
assert response.status_code == 503
data = response.json()
assert data["alive"] is False
assert data["shutdown_requested"] is True
# -----------------------------------------------------------------------------
# New Pydantic Model Tests
# -----------------------------------------------------------------------------
class TestDetailedHealthStatusModel:
"""Validate DetailedHealthStatus model."""
def test_fields(self):
hs = DetailedHealthStatus(
status="healthy",
timestamp="2026-01-01T00:00:00+00:00",
version="2.0.0",
uptime_seconds=42.5,
services={"db": {"status": "up", "healthy": True, "details": {}}},
system={"memory_mb": 100.5},
)
assert hs.status == "healthy"
assert hs.services["db"]["healthy"] is True
class TestReadinessStatusModel:
"""Validate ReadinessStatus model."""
def test_fields(self):
rs = ReadinessStatus(
ready=True,
timestamp="2026-01-01T00:00:00+00:00",
checks={"db": True, "cache": True},
)
assert rs.ready is True
assert rs.checks["db"] is True
def test_with_reason(self):
rs = ReadinessStatus(
ready=False,
timestamp="2026-01-01T00:00:00+00:00",
checks={"db": False},
reason="Database unavailable",
)
assert rs.ready is False
assert rs.reason == "Database unavailable"
class TestLivenessStatusModel:
"""Validate LivenessStatus model."""
def test_fields(self):
ls = LivenessStatus(
alive=True,
timestamp="2026-01-01T00:00:00+00:00",
uptime_seconds=3600.0,
shutdown_requested=False,
)
assert ls.alive is True
assert ls.uptime_seconds == 3600.0
assert ls.shutdown_requested is False
def test_defaults(self):
ls = LivenessStatus(
alive=True,
timestamp="2026-01-01T00:00:00+00:00",
uptime_seconds=0.0,
)
assert ls.shutdown_requested is False