"""Unit tests for the Hermes health monitor. Tests all five checks (memory, disk, Ollama, processes, network) using mocks so no real subprocesses or network calls are made. Refs: #1073 """ import json from unittest.mock import MagicMock, patch import pytest from infrastructure.hermes.monitor import CheckResult, HealthLevel, HealthReport, HermesMonitor @pytest.fixture() def monitor(): return HermesMonitor() # ── Unit helpers ────────────────────────────────────────────────────────────── class _FakeHTTPResponse: """Minimal urllib response stub.""" def __init__(self, body: bytes, status: int = 200): self._body = body self.status = status def read(self) -> bytes: return self._body def __enter__(self): return self def __exit__(self, *_): pass # ── Memory check ────────────────────────────────────────────────────────────── def test_get_memory_info_parses_vm_stat(monitor): vm_stat_output = ( "Mach Virtual Memory Statistics: (page size of 16384 bytes)\n" "Pages free: 12800.\n" "Pages active: 50000.\n" "Pages inactive: 25600.\n" "Pages speculative: 1000.\n" ) with ( patch("subprocess.run") as mock_run, ): # First call: sysctl hw.memsize (total) sysctl_result = MagicMock() sysctl_result.stdout = "68719476736\n" # 64 GB # Second call: vm_stat vmstat_result = MagicMock() vmstat_result.stdout = vm_stat_output mock_run.side_effect = [sysctl_result, vmstat_result] info = monitor._get_memory_info() assert info["total_gb"] == pytest.approx(64.0, abs=0.1) # pages free (12800) + inactive (25600) = 38400 * 16384 bytes = 629145600 bytes ≈ 0.586 GB expected_free_gb = (38400 * 16384) / (1024**3) assert info["free_gb"] == pytest.approx(expected_free_gb, abs=0.001) def test_get_memory_info_handles_subprocess_failure(monitor): with patch("subprocess.run", side_effect=OSError("no sysctl")): info = monitor._get_memory_info() assert info["total_gb"] == 0.0 assert info["free_gb"] == 0.0 @pytest.mark.asyncio async def test_check_memory_ok(monitor): with patch.object( monitor, "_get_memory_info", return_value={"free_gb": 20.0, "total_gb": 64.0} ): result = await monitor._check_memory() assert result.name == "memory" assert result.level == HealthLevel.OK assert "20.0GB" in result.message @pytest.mark.asyncio async def test_check_memory_low_triggers_unload(monitor): with ( patch.object(monitor, "_get_memory_info", return_value={"free_gb": 2.0, "total_gb": 64.0}), patch.object(monitor, "_unload_ollama_models", return_value=2), ): result = await monitor._check_memory() assert result.level == HealthLevel.WARNING assert result.auto_resolved is True assert "unloaded 2" in result.message @pytest.mark.asyncio async def test_check_memory_critical_no_models_to_unload(monitor): with ( patch.object(monitor, "_get_memory_info", return_value={"free_gb": 1.0, "total_gb": 64.0}), patch.object(monitor, "_unload_ollama_models", return_value=0), ): result = await monitor._check_memory() assert result.level == HealthLevel.CRITICAL assert result.needs_human is True @pytest.mark.asyncio async def test_check_memory_exception_returns_unknown(monitor): with patch.object(monitor, "_get_memory_info", side_effect=RuntimeError("boom")): result = await monitor._check_memory() assert result.level == HealthLevel.UNKNOWN # ── Disk check ──────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_check_disk_ok(monitor): usage = MagicMock() usage.free = 100 * (1024**3) # 100 GB usage.total = 500 * (1024**3) # 500 GB usage.used = 400 * (1024**3) with patch("shutil.disk_usage", return_value=usage): result = await monitor._check_disk() assert result.level == HealthLevel.OK assert "100.0GB free" in result.message @pytest.mark.asyncio async def test_check_disk_low_triggers_cleanup(monitor): usage = MagicMock() usage.free = 5 * (1024**3) # 5 GB — below threshold usage.total = 500 * (1024**3) usage.used = 495 * (1024**3) with ( patch("shutil.disk_usage", return_value=usage), patch.object(monitor, "_cleanup_temp_files", return_value=2.5), ): result = await monitor._check_disk() assert result.level == HealthLevel.WARNING assert result.auto_resolved is True assert "cleaned 2.50GB" in result.message @pytest.mark.asyncio async def test_check_disk_critical_when_cleanup_fails(monitor): usage = MagicMock() usage.free = 5 * (1024**3) usage.total = 500 * (1024**3) usage.used = 495 * (1024**3) with ( patch("shutil.disk_usage", return_value=usage), patch.object(monitor, "_cleanup_temp_files", return_value=0.0), ): result = await monitor._check_disk() assert result.level == HealthLevel.CRITICAL assert result.needs_human is True # ── Ollama check ────────────────────────────────────────────────────────────── def test_get_ollama_status_reachable(monitor): tags_body = json.dumps({"models": [{"name": "qwen3:30b"}, {"name": "llama3.1:8b"}]}).encode() ps_body = json.dumps({"models": [{"name": "qwen3:30b", "size": 1000}]}).encode() responses = [ _FakeHTTPResponse(tags_body), _FakeHTTPResponse(ps_body), ] with patch("urllib.request.urlopen", side_effect=responses): status = monitor._get_ollama_status() assert status["reachable"] is True assert len(status["models"]) == 2 assert len(status["loaded_models"]) == 1 def test_get_ollama_status_unreachable(monitor): with patch("urllib.request.urlopen", side_effect=OSError("connection refused")): status = monitor._get_ollama_status() assert status["reachable"] is False assert status["models"] == [] assert status["loaded_models"] == [] @pytest.mark.asyncio async def test_check_ollama_ok(monitor): status = { "reachable": True, "models": [{"name": "qwen3:30b"}], "loaded_models": [], } with patch.object(monitor, "_get_ollama_status", return_value=status): result = await monitor._check_ollama() assert result.level == HealthLevel.OK assert result.details["reachable"] is True @pytest.mark.asyncio async def test_check_ollama_unreachable_restart_success(monitor): status = {"reachable": False, "models": [], "loaded_models": []} with ( patch.object(monitor, "_get_ollama_status", return_value=status), patch.object(monitor, "_restart_ollama", return_value=True), ): result = await monitor._check_ollama() assert result.level == HealthLevel.WARNING assert result.auto_resolved is True @pytest.mark.asyncio async def test_check_ollama_unreachable_restart_fails(monitor): status = {"reachable": False, "models": [], "loaded_models": []} with ( patch.object(monitor, "_get_ollama_status", return_value=status), patch.object(monitor, "_restart_ollama", return_value=False), ): result = await monitor._check_ollama() assert result.level == HealthLevel.CRITICAL assert result.needs_human is True # ── Process check ───────────────────────────────────────────────────────────── def test_get_zombie_processes_none(monitor): ps_output = ( "USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND\n" "alex 123 0.1 0.2 100 200 s0 S 1:00 0:01 python\n" "alex 456 0.0 0.1 50 100 s0 S 1:01 0:00 bash\n" ) result = MagicMock() result.stdout = ps_output with patch("subprocess.run", return_value=result): info = monitor._get_zombie_processes() assert info["zombies"] == [] def test_get_zombie_processes_found(monitor): ps_output = ( "USER PID %CPU %MEM VSZ RSS TT STAT STARTED TIME COMMAND\n" "alex 123 0.1 0.2 100 200 s0 S 1:00 0:01 python\n" "alex 789 0.0 0.0 0 0 s0 Z 1:02 0:00 defunct\n" ) result = MagicMock() result.stdout = ps_output with patch("subprocess.run", return_value=result): info = monitor._get_zombie_processes() assert len(info["zombies"]) == 1 assert info["zombies"][0]["pid"] == "789" @pytest.mark.asyncio async def test_check_processes_no_zombies(monitor): with patch.object(monitor, "_get_zombie_processes", return_value={"zombies": []}): result = await monitor._check_processes() assert result.level == HealthLevel.OK @pytest.mark.asyncio async def test_check_processes_zombies_warning(monitor): zombies = [{"pid": "100", "command": "defunct"}, {"pid": "101", "command": "defunct"}] with patch.object(monitor, "_get_zombie_processes", return_value={"zombies": zombies}): result = await monitor._check_processes() assert result.level == HealthLevel.WARNING assert result.needs_human is False # Only 2, threshold is >3 @pytest.mark.asyncio async def test_check_processes_many_zombies_needs_human(monitor): zombies = [{"pid": str(i), "command": "defunct"} for i in range(5)] with patch.object(monitor, "_get_zombie_processes", return_value={"zombies": zombies}): result = await monitor._check_processes() assert result.needs_human is True # ── Network check ───────────────────────────────────────────────────────────── def test_check_gitea_connectivity_ok(monitor): body = json.dumps({"version": "1.22.0"}).encode() with patch("urllib.request.urlopen", return_value=_FakeHTTPResponse(body, status=200)): info = monitor._check_gitea_connectivity() assert info["reachable"] is True assert info["latency_ms"] >= 0 def test_check_gitea_connectivity_unreachable(monitor): with patch("urllib.request.urlopen", side_effect=OSError("refused")): info = monitor._check_gitea_connectivity() assert info["reachable"] is False assert "error" in info @pytest.mark.asyncio async def test_check_network_ok(monitor): with patch.object( monitor, "_check_gitea_connectivity", return_value={"reachable": True, "latency_ms": 5.0, "url": "http://localhost:3000"}, ): result = await monitor._check_network() assert result.level == HealthLevel.OK assert "Gitea reachable" in result.message @pytest.mark.asyncio async def test_check_network_unreachable(monitor): with patch.object( monitor, "_check_gitea_connectivity", return_value={"reachable": False, "error": "refused", "url": "http://localhost:3000"}, ): result = await monitor._check_network() assert result.level == HealthLevel.WARNING assert result.needs_human is True # ── Full cycle ──────────────────────────────────────────────────────────────── @pytest.mark.asyncio async def test_run_cycle_all_ok(monitor): ok_result = CheckResult(name="test", level=HealthLevel.OK, message="ok") async def _ok_check(): return ok_result with ( patch.object(monitor, "_check_memory", _ok_check), patch.object(monitor, "_check_disk", _ok_check), patch.object(monitor, "_check_ollama", _ok_check), patch.object(monitor, "_check_processes", _ok_check), patch.object(monitor, "_check_network", _ok_check), patch.object(monitor, "_handle_alerts"), ): report = await monitor.run_cycle() assert report.overall == HealthLevel.OK assert not report.has_issues assert monitor.last_report is report @pytest.mark.asyncio async def test_run_cycle_sets_overall_to_worst(monitor): async def _ok(): return CheckResult(name="ok", level=HealthLevel.OK, message="ok") async def _critical(): return CheckResult(name="critical", level=HealthLevel.CRITICAL, message="bad") with ( patch.object(monitor, "_check_memory", _ok), patch.object(monitor, "_check_disk", _critical), patch.object(monitor, "_check_ollama", _ok), patch.object(monitor, "_check_processes", _ok), patch.object(monitor, "_check_network", _ok), patch.object(monitor, "_handle_alerts"), ): report = await monitor.run_cycle() assert report.overall == HealthLevel.CRITICAL assert report.has_issues is True @pytest.mark.asyncio async def test_run_cycle_exception_becomes_unknown(monitor): async def _ok(): return CheckResult(name="ok", level=HealthLevel.OK, message="ok") async def _boom(): raise RuntimeError("unexpected error") with ( patch.object(monitor, "_check_memory", _ok), patch.object(monitor, "_check_disk", _ok), patch.object(monitor, "_check_ollama", _boom), patch.object(monitor, "_check_processes", _ok), patch.object(monitor, "_check_network", _ok), patch.object(monitor, "_handle_alerts"), ): report = await monitor.run_cycle() levels = {c.level for c in report.checks} assert HealthLevel.UNKNOWN in levels # ── to_dict serialisation ──────────────────────────────────────────────────── def test_check_result_to_dict(): c = CheckResult( name="memory", level=HealthLevel.WARNING, message="low", details={"free_gb": 3.5}, auto_resolved=True, ) d = c.to_dict() assert d["name"] == "memory" assert d["level"] == "warning" assert d["auto_resolved"] is True assert d["details"]["free_gb"] == 3.5 def test_health_report_to_dict(): checks = [ CheckResult(name="disk", level=HealthLevel.OK, message="ok"), ] report = HealthReport( timestamp="2026-01-01T00:00:00+00:00", checks=checks, overall=HealthLevel.OK, ) d = report.to_dict() assert d["overall"] == "ok" assert d["has_issues"] is False assert len(d["checks"]) == 1