Files
Timmy-time-dashboard/src/infrastructure/energy/monitor.py
Claude (Opus 4.6) 6b2e6d9e8c
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
[claude] feat: Agent Energy Budget Monitoring (#1009) (#1267)
2026-03-24 01:35:49 +00:00

372 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Energy Budget Monitor — estimates GPU/CPU power draw during LLM inference.
Tracks estimated power consumption to optimize for "metabolic efficiency".
Three estimation strategies attempted in priority order:
1. Battery discharge via ioreg (macOS — works without sudo, on-battery only)
2. CPU utilisation proxy via sysctl hw.cpufrequency + top
3. Model-size heuristic (tokens/s × model_size_gb × 2W/GB estimate)
Energy Efficiency score (010):
efficiency = tokens_per_second / estimated_watts, normalised to 010.
Low Power Mode:
Activated manually or automatically when draw exceeds the configured
threshold. In low power mode the cascade router is advised to prefer the
configured low_power_model (e.g. qwen3:1b or similar compact model).
Refs: #1009
"""
import asyncio
import json
import logging
import subprocess
import time
from collections import deque
from dataclasses import dataclass, field
from datetime import UTC, datetime
from typing import Any
from config import settings
logger = logging.getLogger(__name__)
# Approximate model-size lookup (GB) used for heuristic power estimate.
# Keys are lowercase substring matches against the model name.
_MODEL_SIZE_GB: dict[str, float] = {
"qwen3:1b": 0.8,
"qwen3:3b": 2.0,
"qwen3:4b": 2.5,
"qwen3:8b": 5.5,
"qwen3:14b": 9.0,
"qwen3:30b": 20.0,
"qwen3:32b": 20.0,
"llama3:8b": 5.5,
"llama3:70b": 45.0,
"mistral:7b": 4.5,
"gemma3:4b": 2.5,
"gemma3:12b": 8.0,
"gemma3:27b": 17.0,
"phi4:14b": 9.0,
}
_DEFAULT_MODEL_SIZE_GB = 5.0 # fallback when model not in table
_WATTS_PER_GB_HEURISTIC = 2.0 # rough W/GB for Apple Silicon unified memory
# Efficiency score normalisation: score 10 at this efficiency (tok/s per W).
_EFFICIENCY_SCORE_CEILING = 5.0 # tok/s per W → score 10
# Rolling window for recent samples
_HISTORY_MAXLEN = 60
@dataclass
class InferenceSample:
"""A single inference event captured by record_inference()."""
timestamp: str
model: str
tokens_per_second: float
estimated_watts: float
efficiency: float # tokens/s per watt
efficiency_score: float # 010
@dataclass
class EnergyReport:
"""Snapshot of current energy budget state."""
timestamp: str
low_power_mode: bool
current_watts: float
strategy: str # "battery", "cpu_proxy", "heuristic", "unavailable"
efficiency_score: float # 010; -1 if no inference samples yet
recent_samples: list[InferenceSample]
recommendation: str
details: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
return {
"timestamp": self.timestamp,
"low_power_mode": self.low_power_mode,
"current_watts": round(self.current_watts, 2),
"strategy": self.strategy,
"efficiency_score": round(self.efficiency_score, 2),
"recent_samples": [
{
"timestamp": s.timestamp,
"model": s.model,
"tokens_per_second": round(s.tokens_per_second, 1),
"estimated_watts": round(s.estimated_watts, 2),
"efficiency": round(s.efficiency, 3),
"efficiency_score": round(s.efficiency_score, 2),
}
for s in self.recent_samples
],
"recommendation": self.recommendation,
"details": self.details,
}
class EnergyBudgetMonitor:
"""Estimates power consumption and tracks LLM inference efficiency.
All blocking I/O (subprocess calls) is wrapped in asyncio.to_thread()
so the event loop is never blocked. Results are cached.
Usage::
# Record an inference event
energy_monitor.record_inference("qwen3:8b", tokens_per_second=42.0)
# Get the current report
report = await energy_monitor.get_report()
# Toggle low power mode
energy_monitor.set_low_power_mode(True)
"""
_POWER_CACHE_TTL = 10.0 # seconds between fresh power readings
def __init__(self) -> None:
self._low_power_mode: bool = False
self._samples: deque[InferenceSample] = deque(maxlen=_HISTORY_MAXLEN)
self._cached_watts: float = 0.0
self._cached_strategy: str = "unavailable"
self._cache_ts: float = 0.0
# ── Public API ────────────────────────────────────────────────────────────
@property
def low_power_mode(self) -> bool:
return self._low_power_mode
def set_low_power_mode(self, enabled: bool) -> None:
"""Enable or disable low power mode."""
self._low_power_mode = enabled
state = "enabled" if enabled else "disabled"
logger.info("Energy budget: low power mode %s", state)
def record_inference(self, model: str, tokens_per_second: float) -> InferenceSample:
"""Record an inference event for efficiency tracking.
Call this after each LLM inference completes with the model name and
measured throughput. The current power estimate is used to compute
the efficiency score.
Args:
model: Ollama model name (e.g. "qwen3:8b").
tokens_per_second: Measured decode throughput.
Returns:
The recorded InferenceSample.
"""
watts = self._cached_watts if self._cached_watts > 0 else self._estimate_watts_sync(model)
efficiency = tokens_per_second / max(watts, 0.1)
score = min(10.0, (efficiency / _EFFICIENCY_SCORE_CEILING) * 10.0)
sample = InferenceSample(
timestamp=datetime.now(UTC).isoformat(),
model=model,
tokens_per_second=tokens_per_second,
estimated_watts=watts,
efficiency=efficiency,
efficiency_score=score,
)
self._samples.append(sample)
# Auto-engage low power mode if above threshold and budget is enabled
threshold = getattr(settings, "energy_budget_watts_threshold", 15.0)
if watts > threshold and not self._low_power_mode:
logger.info(
"Energy budget: %.1fW exceeds threshold %.1fW — auto-engaging low power mode",
watts,
threshold,
)
self.set_low_power_mode(True)
return sample
async def get_report(self) -> EnergyReport:
"""Return the current energy budget report.
Refreshes the power estimate if the cache is stale.
"""
await self._refresh_power_cache()
score = self._compute_mean_efficiency_score()
recommendation = self._build_recommendation(score)
return EnergyReport(
timestamp=datetime.now(UTC).isoformat(),
low_power_mode=self._low_power_mode,
current_watts=self._cached_watts,
strategy=self._cached_strategy,
efficiency_score=score,
recent_samples=list(self._samples)[-10:],
recommendation=recommendation,
details={"sample_count": len(self._samples)},
)
# ── Power estimation ──────────────────────────────────────────────────────
async def _refresh_power_cache(self) -> None:
"""Refresh the cached power reading if stale."""
now = time.monotonic()
if now - self._cache_ts < self._POWER_CACHE_TTL:
return
try:
watts, strategy = await asyncio.to_thread(self._read_power)
except Exception as exc:
logger.debug("Energy: power read failed: %s", exc)
watts, strategy = 0.0, "unavailable"
self._cached_watts = watts
self._cached_strategy = strategy
self._cache_ts = now
def _read_power(self) -> tuple[float, str]:
"""Synchronous power reading — tries strategies in priority order.
Returns:
Tuple of (watts, strategy_name).
"""
# Strategy 1: battery discharge via ioreg (on-battery Macs)
try:
watts = self._read_battery_watts()
if watts > 0:
return watts, "battery"
except Exception:
pass
# Strategy 2: CPU utilisation proxy via top
try:
cpu_pct = self._read_cpu_pct()
if cpu_pct >= 0:
# M3 Max TDP ≈ 40W; scale linearly
watts = (cpu_pct / 100.0) * 40.0
return watts, "cpu_proxy"
except Exception:
pass
# Strategy 3: heuristic from loaded model size
return 0.0, "unavailable"
def _estimate_watts_sync(self, model: str) -> float:
"""Estimate watts from model size when no live reading is available."""
size_gb = self._model_size_gb(model)
return size_gb * _WATTS_PER_GB_HEURISTIC
def _read_battery_watts(self) -> float:
"""Read instantaneous battery discharge via ioreg.
Returns watts if on battery, 0.0 if plugged in or unavailable.
Requires macOS; no sudo needed.
"""
result = subprocess.run(
["ioreg", "-r", "-c", "AppleSmartBattery", "-d", "1"],
capture_output=True,
text=True,
timeout=3,
)
amperage_ma = 0.0
voltage_mv = 0.0
is_charging = True # assume charging unless we see ExternalConnected = No
for line in result.stdout.splitlines():
stripped = line.strip()
if '"InstantAmperage"' in stripped:
try:
amperage_ma = float(stripped.split("=")[-1].strip())
except ValueError:
pass
elif '"Voltage"' in stripped:
try:
voltage_mv = float(stripped.split("=")[-1].strip())
except ValueError:
pass
elif '"ExternalConnected"' in stripped:
is_charging = "Yes" in stripped
if is_charging or voltage_mv == 0 or amperage_ma <= 0:
return 0.0
# ioreg reports amperage in mA, voltage in mV
return (abs(amperage_ma) * voltage_mv) / 1_000_000
def _read_cpu_pct(self) -> float:
"""Read CPU utilisation from macOS top.
Returns aggregate CPU% (0100), or -1.0 on failure.
"""
result = subprocess.run(
["top", "-l", "1", "-n", "0", "-stats", "cpu"],
capture_output=True,
text=True,
timeout=5,
)
for line in result.stdout.splitlines():
if "CPU usage:" in line:
# "CPU usage: 12.5% user, 8.3% sys, 79.1% idle"
parts = line.split()
try:
user = float(parts[2].rstrip("%"))
sys_ = float(parts[4].rstrip("%"))
return user + sys_
except (IndexError, ValueError):
pass
return -1.0
# ── Helpers ───────────────────────────────────────────────────────────────
@staticmethod
def _model_size_gb(model: str) -> float:
"""Look up approximate model size in GB by name substring."""
lower = model.lower()
# Exact match first
if lower in _MODEL_SIZE_GB:
return _MODEL_SIZE_GB[lower]
# Substring match
for key, size in _MODEL_SIZE_GB.items():
if key in lower:
return size
return _DEFAULT_MODEL_SIZE_GB
def _compute_mean_efficiency_score(self) -> float:
"""Mean efficiency score over recent samples, or -1 if none."""
if not self._samples:
return -1.0
recent = list(self._samples)[-10:]
return sum(s.efficiency_score for s in recent) / len(recent)
def _build_recommendation(self, score: float) -> str:
"""Generate a human-readable recommendation from the efficiency score."""
threshold = getattr(settings, "energy_budget_watts_threshold", 15.0)
low_power_model = getattr(settings, "energy_low_power_model", "qwen3:1b")
if score < 0:
return "No inference data yet — run some tasks to populate efficiency metrics."
if self._low_power_mode:
return (
f"Low power mode active — routing to {low_power_model}. "
"Disable when power draw normalises."
)
if score < 3.0:
return (
f"Low efficiency (score {score:.1f}/10). "
f"Consider enabling low power mode to favour smaller models "
f"(threshold: {threshold}W)."
)
if score < 6.0:
return f"Moderate efficiency (score {score:.1f}/10). System operating normally."
return f"Good efficiency (score {score:.1f}/10). No action needed."
# Module-level singleton
energy_monitor = EnergyBudgetMonitor()