Files
Timmy-time-dashboard/tests/unit/test_energy_monitor.py
Claude (Opus 4.6) 6b2e6d9e8c
Some checks failed
Tests / lint (push) Has been cancelled
Tests / test (push) Has been cancelled
[claude] feat: Agent Energy Budget Monitoring (#1009) (#1267)
2026-03-24 01:35:49 +00:00

298 lines
9.4 KiB
Python

"""Unit tests for the Energy Budget Monitor.
Tests power estimation strategies, inference recording, efficiency scoring,
and low power mode logic — all without real subprocesses.
Refs: #1009
"""
from unittest.mock import MagicMock, patch
import pytest
from infrastructure.energy.monitor import (
EnergyBudgetMonitor,
InferenceSample,
_DEFAULT_MODEL_SIZE_GB,
_EFFICIENCY_SCORE_CEILING,
_WATTS_PER_GB_HEURISTIC,
)
@pytest.fixture()
def monitor():
return EnergyBudgetMonitor()
# ── Model size lookup ─────────────────────────────────────────────────────────
def test_model_size_exact_match(monitor):
assert monitor._model_size_gb("qwen3:8b") == 5.5
def test_model_size_substring_match(monitor):
assert monitor._model_size_gb("some-qwen3:14b-custom") == 9.0
def test_model_size_unknown_returns_default(monitor):
assert monitor._model_size_gb("unknownmodel:99b") == _DEFAULT_MODEL_SIZE_GB
# ── Battery power reading ─────────────────────────────────────────────────────
def test_read_battery_watts_on_battery(monitor):
ioreg_output = (
"{\n"
' "InstantAmperage" = 2500\n'
' "Voltage" = 12000\n'
' "ExternalConnected" = No\n'
"}"
)
mock_result = MagicMock()
mock_result.stdout = ioreg_output
with patch("subprocess.run", return_value=mock_result):
watts = monitor._read_battery_watts()
# 2500 mA * 12000 mV / 1_000_000 = 30 W
assert watts == pytest.approx(30.0, abs=0.01)
def test_read_battery_watts_plugged_in_returns_zero(monitor):
ioreg_output = (
"{\n"
' "InstantAmperage" = 1000\n'
' "Voltage" = 12000\n'
' "ExternalConnected" = Yes\n'
"}"
)
mock_result = MagicMock()
mock_result.stdout = ioreg_output
with patch("subprocess.run", return_value=mock_result):
watts = monitor._read_battery_watts()
assert watts == 0.0
def test_read_battery_watts_subprocess_failure_raises(monitor):
with patch("subprocess.run", side_effect=OSError("no ioreg")):
with pytest.raises(OSError):
monitor._read_battery_watts()
# ── CPU proxy reading ─────────────────────────────────────────────────────────
def test_read_cpu_pct_parses_top(monitor):
top_output = (
"Processes: 450 total\n"
"CPU usage: 15.2% user, 8.8% sys, 76.0% idle\n"
)
mock_result = MagicMock()
mock_result.stdout = top_output
with patch("subprocess.run", return_value=mock_result):
pct = monitor._read_cpu_pct()
assert pct == pytest.approx(24.0, abs=0.1)
def test_read_cpu_pct_no_match_returns_negative(monitor):
mock_result = MagicMock()
mock_result.stdout = "No CPU line here\n"
with patch("subprocess.run", return_value=mock_result):
pct = monitor._read_cpu_pct()
assert pct == -1.0
# ── Power strategy selection ──────────────────────────────────────────────────
def test_read_power_uses_battery_first(monitor):
with patch.object(monitor, "_read_battery_watts", return_value=25.0):
watts, strategy = monitor._read_power()
assert watts == 25.0
assert strategy == "battery"
def test_read_power_falls_back_to_cpu_proxy(monitor):
with (
patch.object(monitor, "_read_battery_watts", return_value=0.0),
patch.object(monitor, "_read_cpu_pct", return_value=50.0),
):
watts, strategy = monitor._read_power()
assert strategy == "cpu_proxy"
assert watts == pytest.approx(20.0, abs=0.1) # 50% of 40W TDP
def test_read_power_unavailable_when_both_fail(monitor):
with (
patch.object(monitor, "_read_battery_watts", side_effect=OSError),
patch.object(monitor, "_read_cpu_pct", return_value=-1.0),
):
watts, strategy = monitor._read_power()
assert strategy == "unavailable"
assert watts == 0.0
# ── Inference recording ───────────────────────────────────────────────────────
def test_record_inference_produces_sample(monitor):
monitor._cached_watts = 10.0
monitor._cache_ts = 9999999999.0 # far future — cache won't expire
sample = monitor.record_inference("qwen3:8b", tokens_per_second=40.0)
assert isinstance(sample, InferenceSample)
assert sample.model == "qwen3:8b"
assert sample.tokens_per_second == 40.0
assert sample.estimated_watts == pytest.approx(10.0)
# efficiency = 40 / 10 = 4.0 tok/s per W
assert sample.efficiency == pytest.approx(4.0)
# score = min(10, (4.0 / 5.0) * 10) = 8.0
assert sample.efficiency_score == pytest.approx(8.0)
def test_record_inference_stores_in_history(monitor):
monitor._cached_watts = 5.0
monitor._cache_ts = 9999999999.0
monitor.record_inference("qwen3:8b", 30.0)
monitor.record_inference("qwen3:14b", 20.0)
assert len(monitor._samples) == 2
def test_record_inference_auto_activates_low_power(monitor):
monitor._cached_watts = 20.0 # above default 15W threshold
monitor._cache_ts = 9999999999.0
assert not monitor.low_power_mode
monitor.record_inference("qwen3:30b", 8.0)
assert monitor.low_power_mode
def test_record_inference_no_auto_low_power_below_threshold(monitor):
monitor._cached_watts = 10.0 # below default 15W threshold
monitor._cache_ts = 9999999999.0
monitor.record_inference("qwen3:8b", 40.0)
assert not monitor.low_power_mode
# ── Efficiency score ──────────────────────────────────────────────────────────
def test_efficiency_score_caps_at_10(monitor):
monitor._cached_watts = 1.0
monitor._cache_ts = 9999999999.0
sample = monitor.record_inference("qwen3:1b", tokens_per_second=1000.0)
assert sample.efficiency_score == pytest.approx(10.0)
def test_efficiency_score_no_samples_returns_negative_one(monitor):
assert monitor._compute_mean_efficiency_score() == -1.0
def test_mean_efficiency_score_averages_last_10(monitor):
monitor._cached_watts = 10.0
monitor._cache_ts = 9999999999.0
for _ in range(15):
monitor.record_inference("qwen3:8b", tokens_per_second=25.0) # efficiency=2.5 → score=5.0
score = monitor._compute_mean_efficiency_score()
assert score == pytest.approx(5.0, abs=0.01)
# ── Low power mode ────────────────────────────────────────────────────────────
def test_set_low_power_mode_toggle(monitor):
assert not monitor.low_power_mode
monitor.set_low_power_mode(True)
assert monitor.low_power_mode
monitor.set_low_power_mode(False)
assert not monitor.low_power_mode
# ── get_report ────────────────────────────────────────────────────────────────
@pytest.mark.asyncio
async def test_get_report_structure(monitor):
with patch.object(monitor, "_read_power", return_value=(8.0, "battery")):
report = await monitor.get_report()
assert report.timestamp
assert isinstance(report.low_power_mode, bool)
assert isinstance(report.current_watts, float)
assert report.strategy in ("battery", "cpu_proxy", "heuristic", "unavailable")
assert isinstance(report.recommendation, str)
@pytest.mark.asyncio
async def test_get_report_to_dict(monitor):
with patch.object(monitor, "_read_power", return_value=(5.0, "cpu_proxy")):
report = await monitor.get_report()
data = report.to_dict()
assert "timestamp" in data
assert "low_power_mode" in data
assert "current_watts" in data
assert "strategy" in data
assert "efficiency_score" in data
assert "recent_samples" in data
assert "recommendation" in data
@pytest.mark.asyncio
async def test_get_report_caches_power_reading(monitor):
call_count = 0
def counting_read_power():
nonlocal call_count
call_count += 1
return (10.0, "battery")
with patch.object(monitor, "_read_power", side_effect=counting_read_power):
await monitor.get_report()
await monitor.get_report()
# Cache TTL is 10s — should only call once
assert call_count == 1
# ── Recommendation text ───────────────────────────────────────────────────────
def test_recommendation_no_data(monitor):
rec = monitor._build_recommendation(-1.0)
assert "No inference data" in rec
def test_recommendation_low_power_mode(monitor):
monitor.set_low_power_mode(True)
rec = monitor._build_recommendation(2.0)
assert "Low power mode active" in rec
def test_recommendation_low_efficiency(monitor):
rec = monitor._build_recommendation(1.5)
assert "Low efficiency" in rec
def test_recommendation_good_efficiency(monitor):
rec = monitor._build_recommendation(8.0)
assert "Good efficiency" in rec