forked from Rockachopa/Timmy-time-dashboard
Co-authored-by: Claude (Opus 4.6) <claude@hermes.local> Co-committed-by: Claude (Opus 4.6) <claude@hermes.local>
This commit is contained in:
@@ -4,6 +4,6 @@ Monitors the local machine (Hermes/M3 Max) for memory pressure, disk usage,
|
||||
Ollama model health, zombie processes, and network connectivity.
|
||||
"""
|
||||
|
||||
from infrastructure.hermes.monitor import HermesMonitor, HealthLevel, HealthReport, hermes_monitor
|
||||
from infrastructure.hermes.monitor import HealthLevel, HealthReport, HermesMonitor, hermes_monitor
|
||||
|
||||
__all__ = ["HermesMonitor", "HealthLevel", "HealthReport", "hermes_monitor"]
|
||||
|
||||
@@ -19,11 +19,12 @@ import json
|
||||
import logging
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import time
|
||||
import urllib.request
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from enum import Enum
|
||||
from enum import StrEnum
|
||||
from typing import Any
|
||||
|
||||
from config import settings
|
||||
@@ -31,7 +32,7 @@ from config import settings
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class HealthLevel(str, Enum):
|
||||
class HealthLevel(StrEnum):
|
||||
"""Severity level for a health check result."""
|
||||
|
||||
OK = "ok"
|
||||
@@ -194,8 +195,7 @@ class HermesMonitor:
|
||||
name="memory",
|
||||
level=HealthLevel.CRITICAL,
|
||||
message=(
|
||||
f"Critical: only {free_gb:.1f}GB free "
|
||||
f"(threshold: {memory_free_min_gb}GB)"
|
||||
f"Critical: only {free_gb:.1f}GB free (threshold: {memory_free_min_gb}GB)"
|
||||
),
|
||||
details=details,
|
||||
needs_human=True,
|
||||
@@ -302,8 +302,7 @@ class HermesMonitor:
|
||||
name="disk",
|
||||
level=HealthLevel.CRITICAL,
|
||||
message=(
|
||||
f"Critical: only {free_gb:.1f}GB free "
|
||||
f"(threshold: {disk_free_min_gb}GB)"
|
||||
f"Critical: only {free_gb:.1f}GB free (threshold: {disk_free_min_gb}GB)"
|
||||
),
|
||||
details=details,
|
||||
needs_human=True,
|
||||
@@ -335,7 +334,7 @@ class HermesMonitor:
|
||||
cutoff = time.time() - 86400 # 24 hours ago
|
||||
|
||||
try:
|
||||
tmp = Path("/tmp")
|
||||
tmp = Path(tempfile.gettempdir())
|
||||
for item in tmp.iterdir():
|
||||
try:
|
||||
stat = item.stat()
|
||||
@@ -345,11 +344,7 @@ class HermesMonitor:
|
||||
freed_bytes += stat.st_size
|
||||
item.unlink(missing_ok=True)
|
||||
elif item.is_dir():
|
||||
dir_size = sum(
|
||||
f.stat().st_size
|
||||
for f in item.rglob("*")
|
||||
if f.is_file()
|
||||
)
|
||||
dir_size = sum(f.stat().st_size for f in item.rglob("*") if f.is_file())
|
||||
freed_bytes += dir_size
|
||||
shutil.rmtree(str(item), ignore_errors=True)
|
||||
except (PermissionError, OSError):
|
||||
@@ -392,10 +387,7 @@ class HermesMonitor:
|
||||
return CheckResult(
|
||||
name="ollama",
|
||||
level=HealthLevel.OK,
|
||||
message=(
|
||||
f"Ollama OK — {len(models)} model(s) available, "
|
||||
f"{len(loaded)} loaded"
|
||||
),
|
||||
message=(f"Ollama OK — {len(models)} model(s) available, {len(loaded)} loaded"),
|
||||
details={
|
||||
"reachable": True,
|
||||
"model_count": len(models),
|
||||
|
||||
Reference in New Issue
Block a user