122 lines
3.9 KiB
Python
122 lines
3.9 KiB
Python
"""Energy Budget Monitoring routes.
|
||
|
||
Exposes the energy budget monitor via REST API so the dashboard and
|
||
external tools can query power draw, efficiency scores, and toggle
|
||
low power mode.
|
||
|
||
Refs: #1009
|
||
"""
|
||
|
||
import logging
|
||
|
||
from fastapi import APIRouter, HTTPException
|
||
from pydantic import BaseModel
|
||
|
||
from config import settings
|
||
from infrastructure.energy.monitor import energy_monitor
|
||
|
||
logger = logging.getLogger(__name__)
|
||
|
||
router = APIRouter(prefix="/energy", tags=["energy"])
|
||
|
||
|
||
class LowPowerRequest(BaseModel):
|
||
"""Request body for toggling low power mode."""
|
||
|
||
enabled: bool
|
||
|
||
|
||
class InferenceEventRequest(BaseModel):
|
||
"""Request body for recording an inference event."""
|
||
|
||
model: str
|
||
tokens_per_second: float
|
||
|
||
|
||
@router.get("/status")
|
||
async def energy_status():
|
||
"""Return the current energy budget status.
|
||
|
||
Returns the live power estimate, efficiency score (0–10), recent
|
||
inference samples, and whether low power mode is active.
|
||
"""
|
||
if not getattr(settings, "energy_budget_enabled", True):
|
||
return {
|
||
"enabled": False,
|
||
"message": "Energy budget monitoring is disabled (ENERGY_BUDGET_ENABLED=false)",
|
||
}
|
||
|
||
report = await energy_monitor.get_report()
|
||
return {**report.to_dict(), "enabled": True}
|
||
|
||
|
||
@router.get("/report")
|
||
async def energy_report():
|
||
"""Detailed energy budget report with all recent samples.
|
||
|
||
Same as /energy/status but always includes the full sample history.
|
||
"""
|
||
if not getattr(settings, "energy_budget_enabled", True):
|
||
raise HTTPException(status_code=503, detail="Energy budget monitoring is disabled")
|
||
|
||
report = await energy_monitor.get_report()
|
||
data = report.to_dict()
|
||
# Override recent_samples to include the full window (not just last 10)
|
||
data["recent_samples"] = [
|
||
{
|
||
"timestamp": s.timestamp,
|
||
"model": s.model,
|
||
"tokens_per_second": round(s.tokens_per_second, 1),
|
||
"estimated_watts": round(s.estimated_watts, 2),
|
||
"efficiency": round(s.efficiency, 3),
|
||
"efficiency_score": round(s.efficiency_score, 2),
|
||
}
|
||
for s in list(energy_monitor._samples)
|
||
]
|
||
return {**data, "enabled": True}
|
||
|
||
|
||
@router.post("/low-power")
|
||
async def set_low_power_mode(body: LowPowerRequest):
|
||
"""Enable or disable low power mode.
|
||
|
||
In low power mode the cascade router is advised to prefer the
|
||
configured energy_low_power_model (see settings).
|
||
"""
|
||
if not getattr(settings, "energy_budget_enabled", True):
|
||
raise HTTPException(status_code=503, detail="Energy budget monitoring is disabled")
|
||
|
||
energy_monitor.set_low_power_mode(body.enabled)
|
||
low_power_model = getattr(settings, "energy_low_power_model", "qwen3:1b")
|
||
return {
|
||
"low_power_mode": body.enabled,
|
||
"preferred_model": low_power_model if body.enabled else None,
|
||
"message": (
|
||
f"Low power mode {'enabled' if body.enabled else 'disabled'}. "
|
||
+ (f"Routing to {low_power_model}." if body.enabled else "Routing restored to default.")
|
||
),
|
||
}
|
||
|
||
|
||
@router.post("/record")
|
||
async def record_inference_event(body: InferenceEventRequest):
|
||
"""Record an inference event for efficiency tracking.
|
||
|
||
Called after each LLM inference completes. Updates the rolling
|
||
efficiency score and may auto-activate low power mode if watts
|
||
exceed the configured threshold.
|
||
"""
|
||
if not getattr(settings, "energy_budget_enabled", True):
|
||
return {"recorded": False, "message": "Energy budget monitoring is disabled"}
|
||
|
||
if body.tokens_per_second <= 0:
|
||
raise HTTPException(status_code=422, detail="tokens_per_second must be positive")
|
||
|
||
sample = energy_monitor.record_inference(body.model, body.tokens_per_second)
|
||
return {
|
||
"recorded": True,
|
||
"efficiency_score": round(sample.efficiency_score, 2),
|
||
"estimated_watts": round(sample.estimated_watts, 2),
|
||
"low_power_mode": energy_monitor.low_power_mode,
|
||
}
|