Files
Timmy-time-dashboard/src/infrastructure/world/hardening/monitor.py

197 lines
6.0 KiB
Python
Raw Normal View History

"""Resource monitoring — CPU, RAM, and disk usage under load.
``ResourceMonitor`` collects lightweight resource snapshots. When
``psutil`` is installed it uses richer per-process metrics; otherwise it
falls back to stdlib primitives (``shutil.disk_usage``, ``os.getloadavg``).
Usage::
monitor = ResourceMonitor()
monitor.sample() # single reading
monitor.sample_n(10, interval_s=0.5) # 10 readings, 0.5 s apart
print(monitor.summary())
"""
from __future__ import annotations
import logging
import os
import shutil
import time
from dataclasses import dataclass
from datetime import UTC, datetime
logger = logging.getLogger(__name__)
@dataclass
class ResourceSnapshot:
"""Point-in-time resource usage reading.
Attributes:
timestamp: ISO-8601 timestamp.
cpu_percent: CPU usage 0100; ``-1`` if unavailable.
memory_used_mb: Resident memory in MiB; ``-1`` if unavailable.
memory_total_mb: Total system memory in MiB; ``-1`` if unavailable.
disk_used_gb: Disk used for the watched path in GiB.
disk_total_gb: Total disk for the watched path in GiB.
load_avg_1m: 1-minute load average; ``-1`` on Windows.
"""
timestamp: str
cpu_percent: float = -1.0
memory_used_mb: float = -1.0
memory_total_mb: float = -1.0
disk_used_gb: float = -1.0
disk_total_gb: float = -1.0
load_avg_1m: float = -1.0
class ResourceMonitor:
"""Lightweight resource monitor for multi-agent load testing.
Captures ``ResourceSnapshot`` readings and retains the last
*max_history* entries. Uses ``psutil`` when available, with a
graceful fallback to stdlib primitives.
Parameters
----------
max_history:
Maximum number of snapshots retained in memory.
watch_path:
Filesystem path used for disk-usage measurement.
"""
def __init__(
self,
*,
max_history: int = 100,
watch_path: str = ".",
) -> None:
self._max = max_history
self._watch = watch_path
self._history: list[ResourceSnapshot] = []
self._psutil = self._try_import_psutil()
# -- public API --------------------------------------------------------
def sample(self) -> ResourceSnapshot:
"""Take a single resource snapshot and add it to history."""
snap = self._collect()
self._history.append(snap)
if len(self._history) > self._max:
self._history = self._history[-self._max :]
return snap
def sample_n(
self,
n: int,
*,
interval_s: float = 0.1,
) -> list[ResourceSnapshot]:
"""Take *n* samples spaced *interval_s* seconds apart.
Useful for profiling resource usage during a stress test run.
"""
results: list[ResourceSnapshot] = []
for i in range(n):
results.append(self.sample())
if i < n - 1:
time.sleep(interval_s)
return results
@property
def history(self) -> list[ResourceSnapshot]:
return list(self._history)
def peak_cpu(self) -> float:
"""Return the highest cpu_percent seen, or ``-1`` if no samples."""
valid = [s.cpu_percent for s in self._history if s.cpu_percent >= 0]
return max(valid) if valid else -1.0
def peak_memory_mb(self) -> float:
"""Return the highest memory_used_mb seen, or ``-1`` if no samples."""
valid = [s.memory_used_mb for s in self._history if s.memory_used_mb >= 0]
return max(valid) if valid else -1.0
def summary(self) -> str:
"""Human-readable summary of recorded resource snapshots."""
if not self._history:
return "ResourceMonitor: no samples collected"
return (
f"ResourceMonitor: {len(self._history)} samples — "
f"peak CPU {self.peak_cpu():.1f}%, "
f"peak RAM {self.peak_memory_mb():.1f} MiB"
)
# -- internal ----------------------------------------------------------
def _collect(self) -> ResourceSnapshot:
ts = datetime.now(UTC).isoformat()
# Disk (always available via stdlib)
try:
usage = shutil.disk_usage(self._watch)
disk_used_gb = round((usage.total - usage.free) / (1024**3), 3)
disk_total_gb = round(usage.total / (1024**3), 3)
except OSError:
disk_used_gb = -1.0
disk_total_gb = -1.0
# Load average (POSIX only)
try:
load_avg_1m = round(os.getloadavg()[0], 3)
except AttributeError:
load_avg_1m = -1.0 # Windows
if self._psutil:
return self._collect_psutil(ts, disk_used_gb, disk_total_gb, load_avg_1m)
return ResourceSnapshot(
timestamp=ts,
disk_used_gb=disk_used_gb,
disk_total_gb=disk_total_gb,
load_avg_1m=load_avg_1m,
)
def _collect_psutil(
self,
ts: str,
disk_used_gb: float,
disk_total_gb: float,
load_avg_1m: float,
) -> ResourceSnapshot:
psutil = self._psutil
try:
cpu = round(psutil.cpu_percent(interval=None), 2)
except Exception:
cpu = -1.0
try:
vm = psutil.virtual_memory()
mem_used = round(vm.used / (1024**2), 2)
mem_total = round(vm.total / (1024**2), 2)
except Exception:
mem_used = -1.0
mem_total = -1.0
return ResourceSnapshot(
timestamp=ts,
cpu_percent=cpu,
memory_used_mb=mem_used,
memory_total_mb=mem_total,
disk_used_gb=disk_used_gb,
disk_total_gb=disk_total_gb,
load_avg_1m=load_avg_1m,
)
@staticmethod
def _try_import_psutil():
try:
import psutil
return psutil
except ImportError:
logger.debug(
"ResourceMonitor: psutil not available — using stdlib fallback"
)
return None