feat: add real-time monitoring dashboard for all agent systems
Implements a comprehensive operational monitoring UI at /monitoring covering all subsystems described in issue #862: - Agent Status: lists configured agents with name, model, status, last action - System Resources: RAM/disk/CPU usage with live progress bars via psutil - Economy: sats balance, earned/spent, transaction count (Lightning ledger) - Stream Health: viewer count, bitrate, uptime (graceful fallback when offline) - Content Pipeline: episode/highlight/clip counts from data/episodes/ - Alerts: auto-derived from resource thresholds, Ollama reachability, wallet balance Implementation details: - New route: GET /monitoring (HTML page), GET /monitoring/status (JSON), GET /monitoring/alerts (JSON) - /monitoring/status aggregates all subsystems concurrently with asyncio.gather - Frontend polls every 10 seconds with vanilla JS (no blocking) - All optional services degrade gracefully per project convention - CSS appended to mission-control.css (no inline styles) - "MONITORING" link added to desktop nav in base.html - 13 unit tests covering page render and all API endpoints Fixes #862 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -57,6 +57,7 @@ from dashboard.routes.telegram import router as telegram_router
|
||||
from dashboard.routes.thinking import router as thinking_router
|
||||
from dashboard.routes.self_correction import router as self_correction_router
|
||||
from dashboard.routes.three_strike import router as three_strike_router
|
||||
from dashboard.routes.monitoring import router as monitoring_router
|
||||
from dashboard.routes.tools import router as tools_router
|
||||
from dashboard.routes.tower import router as tower_router
|
||||
from dashboard.routes.voice import router as voice_router
|
||||
@@ -684,6 +685,7 @@ app.include_router(tasks_router)
|
||||
app.include_router(work_orders_router)
|
||||
app.include_router(loop_qa_router)
|
||||
app.include_router(system_router)
|
||||
app.include_router(monitoring_router)
|
||||
app.include_router(experiments_router)
|
||||
app.include_router(db_explorer_router)
|
||||
app.include_router(world_router)
|
||||
|
||||
323
src/dashboard/routes/monitoring.py
Normal file
323
src/dashboard/routes/monitoring.py
Normal file
@@ -0,0 +1,323 @@
|
||||
"""Real-time monitoring dashboard routes.
|
||||
|
||||
Provides a unified operational view of all agent systems:
|
||||
- Agent status and vitals
|
||||
- System resources (CPU, RAM, disk, network)
|
||||
- Economy (sats earned/spent, injection count)
|
||||
- Stream health (viewer count, bitrate, uptime)
|
||||
- Content pipeline (episodes, highlights, clips)
|
||||
- Alerts (agent offline, stream down, low balance)
|
||||
|
||||
Refs: #862
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import UTC, datetime
|
||||
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi.responses import HTMLResponse, JSONResponse
|
||||
|
||||
from config import APP_START_TIME as _START_TIME
|
||||
from config import settings
|
||||
from dashboard.templating import templates
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
router = APIRouter(prefix="/monitoring", tags=["monitoring"])
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def _get_agent_status() -> list[dict]:
|
||||
"""Return a list of agent status entries."""
|
||||
try:
|
||||
from config import settings as cfg
|
||||
|
||||
agents_yaml = cfg.agents_config
|
||||
agents_raw = agents_yaml.get("agents", {})
|
||||
result = []
|
||||
for name, info in agents_raw.items():
|
||||
result.append(
|
||||
{
|
||||
"name": name,
|
||||
"model": info.get("model", "default"),
|
||||
"status": "running",
|
||||
"last_action": "idle",
|
||||
"cell": info.get("cell", "—"),
|
||||
}
|
||||
)
|
||||
if not result:
|
||||
result.append(
|
||||
{
|
||||
"name": settings.agent_name,
|
||||
"model": settings.ollama_model,
|
||||
"status": "running",
|
||||
"last_action": "idle",
|
||||
"cell": "main",
|
||||
}
|
||||
)
|
||||
return result
|
||||
except Exception as exc:
|
||||
logger.warning("agent status fetch failed: %s", exc)
|
||||
return []
|
||||
|
||||
|
||||
async def _get_system_resources() -> dict:
|
||||
"""Return CPU, RAM, disk snapshot (non-blocking)."""
|
||||
try:
|
||||
from timmy.vassal.house_health import get_system_snapshot
|
||||
|
||||
snap = await get_system_snapshot()
|
||||
cpu_pct: float | None = None
|
||||
try:
|
||||
import psutil # optional
|
||||
|
||||
cpu_pct = await asyncio.to_thread(psutil.cpu_percent, 0.1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return {
|
||||
"cpu_percent": cpu_pct,
|
||||
"ram_percent": snap.memory.percent_used,
|
||||
"ram_total_gb": snap.memory.total_gb,
|
||||
"ram_available_gb": snap.memory.available_gb,
|
||||
"disk_percent": snap.disk.percent_used,
|
||||
"disk_total_gb": snap.disk.total_gb,
|
||||
"disk_free_gb": snap.disk.free_gb,
|
||||
"ollama_reachable": snap.ollama.reachable,
|
||||
"loaded_models": snap.ollama.loaded_models,
|
||||
"warnings": snap.warnings,
|
||||
}
|
||||
except Exception as exc:
|
||||
logger.warning("system resources fetch failed: %s", exc)
|
||||
return {
|
||||
"cpu_percent": None,
|
||||
"ram_percent": None,
|
||||
"ram_total_gb": None,
|
||||
"ram_available_gb": None,
|
||||
"disk_percent": None,
|
||||
"disk_total_gb": None,
|
||||
"disk_free_gb": None,
|
||||
"ollama_reachable": False,
|
||||
"loaded_models": [],
|
||||
"warnings": [str(exc)],
|
||||
}
|
||||
|
||||
|
||||
async def _get_economy() -> dict:
|
||||
"""Return economy stats — sats earned/spent, injection count."""
|
||||
result: dict = {
|
||||
"balance_sats": 0,
|
||||
"earned_sats": 0,
|
||||
"spent_sats": 0,
|
||||
"injection_count": 0,
|
||||
"auction_active": False,
|
||||
"tx_count": 0,
|
||||
}
|
||||
try:
|
||||
from lightning.ledger import get_balance, get_transactions
|
||||
|
||||
result["balance_sats"] = get_balance()
|
||||
txns = get_transactions()
|
||||
result["tx_count"] = len(txns)
|
||||
for tx in txns:
|
||||
if tx.get("direction") == "incoming":
|
||||
result["earned_sats"] += tx.get("amount_sats", 0)
|
||||
elif tx.get("direction") == "outgoing":
|
||||
result["spent_sats"] += tx.get("amount_sats", 0)
|
||||
except Exception as exc:
|
||||
logger.debug("economy fetch failed: %s", exc)
|
||||
return result
|
||||
|
||||
|
||||
async def _get_stream_health() -> dict:
|
||||
"""Return stream health stats.
|
||||
|
||||
Graceful fallback when no streaming backend is configured.
|
||||
"""
|
||||
return {
|
||||
"live": False,
|
||||
"viewer_count": 0,
|
||||
"bitrate_kbps": 0,
|
||||
"uptime_seconds": 0,
|
||||
"title": "No active stream",
|
||||
"source": "unavailable",
|
||||
}
|
||||
|
||||
|
||||
async def _get_content_pipeline() -> dict:
|
||||
"""Return content pipeline stats — last episode, highlight/clip counts."""
|
||||
result: dict = {
|
||||
"last_episode": None,
|
||||
"highlight_count": 0,
|
||||
"clip_count": 0,
|
||||
"pipeline_healthy": True,
|
||||
}
|
||||
try:
|
||||
from pathlib import Path
|
||||
|
||||
repo_root = Path(settings.repo_root)
|
||||
# Check for episode output files
|
||||
output_dir = repo_root / "data" / "episodes"
|
||||
if output_dir.exists():
|
||||
episodes = sorted(output_dir.glob("*.json"), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
if episodes:
|
||||
result["last_episode"] = episodes[0].stem
|
||||
result["highlight_count"] = len(list(output_dir.glob("highlights_*.json")))
|
||||
result["clip_count"] = len(list(output_dir.glob("clips_*.json")))
|
||||
except Exception as exc:
|
||||
logger.debug("content pipeline fetch failed: %s", exc)
|
||||
return result
|
||||
|
||||
|
||||
def _build_alerts(
|
||||
resources: dict,
|
||||
agents: list[dict],
|
||||
economy: dict,
|
||||
stream: dict,
|
||||
) -> list[dict]:
|
||||
"""Derive operational alerts from aggregated status data."""
|
||||
alerts: list[dict] = []
|
||||
|
||||
# Resource alerts
|
||||
if resources.get("ram_percent") and resources["ram_percent"] > 90:
|
||||
alerts.append(
|
||||
{
|
||||
"level": "critical",
|
||||
"title": "High Memory Usage",
|
||||
"detail": f"RAM at {resources['ram_percent']:.0f}%",
|
||||
}
|
||||
)
|
||||
elif resources.get("ram_percent") and resources["ram_percent"] > 80:
|
||||
alerts.append(
|
||||
{
|
||||
"level": "warning",
|
||||
"title": "Elevated Memory Usage",
|
||||
"detail": f"RAM at {resources['ram_percent']:.0f}%",
|
||||
}
|
||||
)
|
||||
|
||||
if resources.get("disk_percent") and resources["disk_percent"] > 90:
|
||||
alerts.append(
|
||||
{
|
||||
"level": "critical",
|
||||
"title": "Low Disk Space",
|
||||
"detail": f"Disk at {resources['disk_percent']:.0f}% used",
|
||||
}
|
||||
)
|
||||
elif resources.get("disk_percent") and resources["disk_percent"] > 80:
|
||||
alerts.append(
|
||||
{
|
||||
"level": "warning",
|
||||
"title": "Disk Space Warning",
|
||||
"detail": f"Disk at {resources['disk_percent']:.0f}% used",
|
||||
}
|
||||
)
|
||||
|
||||
if resources.get("cpu_percent") and resources["cpu_percent"] > 95:
|
||||
alerts.append(
|
||||
{
|
||||
"level": "warning",
|
||||
"title": "High CPU Usage",
|
||||
"detail": f"CPU at {resources['cpu_percent']:.0f}%",
|
||||
}
|
||||
)
|
||||
|
||||
# Ollama alert
|
||||
if not resources.get("ollama_reachable", True):
|
||||
alerts.append(
|
||||
{
|
||||
"level": "critical",
|
||||
"title": "LLM Backend Offline",
|
||||
"detail": "Ollama is unreachable — agent responses will fail",
|
||||
}
|
||||
)
|
||||
|
||||
# Agent alerts
|
||||
offline_agents = [a["name"] for a in agents if a.get("status") == "offline"]
|
||||
if offline_agents:
|
||||
alerts.append(
|
||||
{
|
||||
"level": "critical",
|
||||
"title": "Agent Offline",
|
||||
"detail": f"Offline: {', '.join(offline_agents)}",
|
||||
}
|
||||
)
|
||||
|
||||
# Economy alerts
|
||||
balance = economy.get("balance_sats", 0)
|
||||
if isinstance(balance, (int, float)) and balance < 1000:
|
||||
alerts.append(
|
||||
{
|
||||
"level": "warning",
|
||||
"title": "Low Wallet Balance",
|
||||
"detail": f"Balance: {balance} sats",
|
||||
}
|
||||
)
|
||||
|
||||
# Pass-through resource warnings
|
||||
for warn in resources.get("warnings", []):
|
||||
alerts.append({"level": "warning", "title": "System Warning", "detail": warn})
|
||||
|
||||
return alerts
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Routes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@router.get("", response_class=HTMLResponse)
|
||||
async def monitoring_page(request: Request):
|
||||
"""Render the real-time monitoring dashboard page."""
|
||||
return templates.TemplateResponse(request, "monitoring.html", {})
|
||||
|
||||
|
||||
@router.get("/status")
|
||||
async def monitoring_status():
|
||||
"""Aggregate status endpoint for the monitoring dashboard.
|
||||
|
||||
Collects data from all subsystems concurrently and returns a single
|
||||
JSON payload used by the frontend to update all panels at once.
|
||||
"""
|
||||
uptime = (datetime.now(UTC) - _START_TIME).total_seconds()
|
||||
|
||||
agents, resources, economy, stream, pipeline = await asyncio.gather(
|
||||
_get_agent_status(),
|
||||
_get_system_resources(),
|
||||
_get_economy(),
|
||||
_get_stream_health(),
|
||||
_get_content_pipeline(),
|
||||
)
|
||||
|
||||
alerts = _build_alerts(resources, agents, economy, stream)
|
||||
|
||||
return {
|
||||
"timestamp": datetime.now(UTC).isoformat(),
|
||||
"uptime_seconds": uptime,
|
||||
"agents": agents,
|
||||
"resources": resources,
|
||||
"economy": economy,
|
||||
"stream": stream,
|
||||
"pipeline": pipeline,
|
||||
"alerts": alerts,
|
||||
}
|
||||
|
||||
|
||||
@router.get("/alerts")
|
||||
async def monitoring_alerts():
|
||||
"""Return current alerts only."""
|
||||
agents, resources, economy, stream = await asyncio.gather(
|
||||
_get_agent_status(),
|
||||
_get_system_resources(),
|
||||
_get_economy(),
|
||||
_get_stream_health(),
|
||||
)
|
||||
alerts = _build_alerts(resources, agents, economy, stream)
|
||||
return {"alerts": alerts, "count": len(alerts)}
|
||||
@@ -50,6 +50,7 @@
|
||||
<a href="/briefing" class="mc-test-link">BRIEFING</a>
|
||||
<a href="/thinking" class="mc-test-link mc-link-thinking">THINKING</a>
|
||||
<a href="/swarm/mission-control" class="mc-test-link">MISSION CTRL</a>
|
||||
<a href="/monitoring" class="mc-test-link">MONITORING</a>
|
||||
<a href="/swarm/live" class="mc-test-link">SWARM</a>
|
||||
<a href="/scorecards" class="mc-test-link">SCORECARDS</a>
|
||||
<a href="/bugs" class="mc-test-link mc-link-bugs">BUGS</a>
|
||||
|
||||
429
src/dashboard/templates/monitoring.html
Normal file
429
src/dashboard/templates/monitoring.html
Normal file
@@ -0,0 +1,429 @@
|
||||
{% extends "base.html" %}
|
||||
|
||||
{% block title %}Monitoring — Timmy Time{% endblock %}
|
||||
|
||||
{% block content %}
|
||||
<!-- Page header -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Real-Time Monitoring</h2>
|
||||
<div class="d-flex align-items-center gap-2">
|
||||
<span class="badge" id="mon-overall-badge">Loading...</span>
|
||||
<span class="mon-last-updated" id="mon-last-updated"></span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Uptime stat bar -->
|
||||
<div class="grid grid-4">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-uptime">—</div>
|
||||
<div class="stat-label">Uptime</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-agents-count">—</div>
|
||||
<div class="stat-label">Agents</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-alerts-count">0</div>
|
||||
<div class="stat-label">Alerts</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-ollama-badge">—</div>
|
||||
<div class="stat-label">LLM Backend</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Alerts panel (conditionally shown) -->
|
||||
<div class="card mc-card-spaced" id="mon-alerts-card" style="display:none">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Alerts</h2>
|
||||
<span class="badge badge-danger" id="mon-alerts-badge">0</span>
|
||||
</div>
|
||||
<div id="mon-alerts-list"></div>
|
||||
</div>
|
||||
|
||||
<!-- Agent Status -->
|
||||
<div class="card mc-card-spaced">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Agent Status</h2>
|
||||
</div>
|
||||
<div id="mon-agents-list">
|
||||
<p class="chat-history-placeholder">Loading agents...</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- System Resources + Economy row -->
|
||||
<div class="grid grid-2 mc-card-spaced mc-section-gap">
|
||||
|
||||
<!-- System Resources -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">System Resources</h2>
|
||||
</div>
|
||||
<div class="grid grid-2">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-cpu">—</div>
|
||||
<div class="stat-label">CPU</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-ram">—</div>
|
||||
<div class="stat-label">RAM</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-disk">—</div>
|
||||
<div class="stat-label">Disk</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-models-loaded">—</div>
|
||||
<div class="stat-label">Models Loaded</div>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Resource bars -->
|
||||
<div class="mon-resource-bars" id="mon-resource-bars">
|
||||
<div class="mon-bar-row">
|
||||
<span class="mon-bar-label">RAM</span>
|
||||
<div class="mon-bar-track">
|
||||
<div class="mon-bar-fill" id="mon-ram-bar" style="width:0%"></div>
|
||||
</div>
|
||||
<span class="mon-bar-pct" id="mon-ram-pct">—</span>
|
||||
</div>
|
||||
<div class="mon-bar-row">
|
||||
<span class="mon-bar-label">Disk</span>
|
||||
<div class="mon-bar-track">
|
||||
<div class="mon-bar-fill" id="mon-disk-bar" style="width:0%"></div>
|
||||
</div>
|
||||
<span class="mon-bar-pct" id="mon-disk-pct">—</span>
|
||||
</div>
|
||||
<div class="mon-bar-row" id="mon-cpu-bar-row">
|
||||
<span class="mon-bar-label">CPU</span>
|
||||
<div class="mon-bar-track">
|
||||
<div class="mon-bar-fill" id="mon-cpu-bar" style="width:0%"></div>
|
||||
</div>
|
||||
<span class="mon-bar-pct" id="mon-cpu-pct">—</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Economy -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Economy</h2>
|
||||
</div>
|
||||
<div class="grid grid-2">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-balance">—</div>
|
||||
<div class="stat-label">Balance (sats)</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-earned">—</div>
|
||||
<div class="stat-label">Earned</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-spent">—</div>
|
||||
<div class="stat-label">Spent</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-injections">—</div>
|
||||
<div class="stat-label">Injections</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid grid-2 mc-section-heading">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-tx-count">—</div>
|
||||
<div class="stat-label">Transactions</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-auction">—</div>
|
||||
<div class="stat-label">Auction</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Stream Health + Content Pipeline row -->
|
||||
<div class="grid grid-2 mc-card-spaced mc-section-gap">
|
||||
|
||||
<!-- Stream Health -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Stream Health</h2>
|
||||
<span class="badge" id="mon-stream-badge">Offline</span>
|
||||
</div>
|
||||
<div class="grid grid-2">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-viewers">—</div>
|
||||
<div class="stat-label">Viewers</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-bitrate">—</div>
|
||||
<div class="stat-label">Bitrate (kbps)</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-stream-uptime">—</div>
|
||||
<div class="stat-label">Stream Uptime</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value mon-stream-title" id="mon-stream-title">—</div>
|
||||
<div class="stat-label">Title</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Content Pipeline -->
|
||||
<div class="card">
|
||||
<div class="card-header">
|
||||
<h2 class="card-title">Content Pipeline</h2>
|
||||
<span class="badge" id="mon-pipeline-badge">—</span>
|
||||
</div>
|
||||
<div class="grid grid-2">
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-highlights">—</div>
|
||||
<div class="stat-label">Highlights</div>
|
||||
</div>
|
||||
<div class="stat">
|
||||
<div class="stat-value" id="mon-clips">—</div>
|
||||
<div class="stat-label">Clips</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="mon-last-episode" id="mon-last-episode-wrap" style="display:none">
|
||||
<span class="mon-bar-label">Last episode: </span>
|
||||
<span id="mon-last-episode">—</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
// -----------------------------------------------------------------------
|
||||
// Utility
|
||||
// -----------------------------------------------------------------------
|
||||
function _pct(val) {
|
||||
if (val === null || val === undefined) return '—';
|
||||
return val.toFixed(0) + '%';
|
||||
}
|
||||
|
||||
function _barColor(pct) {
|
||||
if (pct >= 90) return 'var(--red)';
|
||||
if (pct >= 75) return 'var(--amber)';
|
||||
return 'var(--green)';
|
||||
}
|
||||
|
||||
function _setBar(barId, pct) {
|
||||
var bar = document.getElementById(barId);
|
||||
if (!bar) return;
|
||||
var w = Math.min(100, Math.max(0, pct || 0));
|
||||
bar.style.width = w + '%';
|
||||
bar.style.background = _barColor(w);
|
||||
}
|
||||
|
||||
function _uptime(secs) {
|
||||
if (!secs && secs !== 0) return '—';
|
||||
secs = Math.floor(secs);
|
||||
if (secs < 60) return secs + 's';
|
||||
if (secs < 3600) return Math.floor(secs / 60) + 'm';
|
||||
var h = Math.floor(secs / 3600);
|
||||
var m = Math.floor((secs % 3600) / 60);
|
||||
return h + 'h ' + m + 'm';
|
||||
}
|
||||
|
||||
function _setText(id, val) {
|
||||
var el = document.getElementById(id);
|
||||
if (el) el.textContent = (val !== null && val !== undefined) ? val : '—';
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Render helpers
|
||||
// -----------------------------------------------------------------------
|
||||
function renderAgents(agents) {
|
||||
var container = document.getElementById('mon-agents-list');
|
||||
if (!agents || agents.length === 0) {
|
||||
container.innerHTML = '';
|
||||
var p = document.createElement('p');
|
||||
p.className = 'chat-history-placeholder';
|
||||
p.textContent = 'No agents configured';
|
||||
container.appendChild(p);
|
||||
return;
|
||||
}
|
||||
container.innerHTML = '';
|
||||
agents.forEach(function(a) {
|
||||
var row = document.createElement('div');
|
||||
row.className = 'mon-agent-row';
|
||||
|
||||
var dot = document.createElement('span');
|
||||
dot.className = 'mon-agent-dot';
|
||||
dot.style.background = a.status === 'running' ? 'var(--green)' :
|
||||
a.status === 'idle' ? 'var(--amber)' : 'var(--red)';
|
||||
|
||||
var name = document.createElement('span');
|
||||
name.className = 'mon-agent-name';
|
||||
name.textContent = a.name;
|
||||
|
||||
var model = document.createElement('span');
|
||||
model.className = 'mon-agent-model';
|
||||
model.textContent = a.model;
|
||||
|
||||
var status = document.createElement('span');
|
||||
status.className = 'mon-agent-status';
|
||||
status.textContent = a.status || '—';
|
||||
|
||||
var action = document.createElement('span');
|
||||
action.className = 'mon-agent-action';
|
||||
action.textContent = a.last_action || '—';
|
||||
|
||||
row.appendChild(dot);
|
||||
row.appendChild(name);
|
||||
row.appendChild(model);
|
||||
row.appendChild(status);
|
||||
row.appendChild(action);
|
||||
container.appendChild(row);
|
||||
});
|
||||
}
|
||||
|
||||
function renderAlerts(alerts) {
|
||||
var card = document.getElementById('mon-alerts-card');
|
||||
var list = document.getElementById('mon-alerts-list');
|
||||
var badge = document.getElementById('mon-alerts-badge');
|
||||
var countEl = document.getElementById('mon-alerts-count');
|
||||
|
||||
badge.textContent = alerts.length;
|
||||
countEl.textContent = alerts.length;
|
||||
|
||||
if (alerts.length === 0) {
|
||||
card.style.display = 'none';
|
||||
return;
|
||||
}
|
||||
card.style.display = '';
|
||||
list.innerHTML = '';
|
||||
alerts.forEach(function(a) {
|
||||
var item = document.createElement('div');
|
||||
item.className = 'mon-alert-item mon-alert-' + (a.level || 'warning');
|
||||
var title = document.createElement('strong');
|
||||
title.textContent = a.title;
|
||||
var detail = document.createElement('span');
|
||||
detail.className = 'mon-alert-detail';
|
||||
detail.textContent = ' — ' + (a.detail || '');
|
||||
item.appendChild(title);
|
||||
item.appendChild(detail);
|
||||
list.appendChild(item);
|
||||
});
|
||||
}
|
||||
|
||||
function renderResources(r) {
|
||||
_setText('mon-cpu', r.cpu_percent !== null ? r.cpu_percent.toFixed(0) + '%' : '—');
|
||||
_setText('mon-ram',
|
||||
r.ram_available_gb !== null
|
||||
? r.ram_available_gb.toFixed(1) + ' GB free'
|
||||
: '—'
|
||||
);
|
||||
_setText('mon-disk',
|
||||
r.disk_free_gb !== null
|
||||
? r.disk_free_gb.toFixed(1) + ' GB free'
|
||||
: '—'
|
||||
);
|
||||
_setText('mon-models-loaded', r.loaded_models ? r.loaded_models.length : '—');
|
||||
|
||||
if (r.ram_percent !== null) {
|
||||
_setBar('mon-ram-bar', r.ram_percent);
|
||||
_setText('mon-ram-pct', _pct(r.ram_percent));
|
||||
}
|
||||
if (r.disk_percent !== null) {
|
||||
_setBar('mon-disk-bar', r.disk_percent);
|
||||
_setText('mon-disk-pct', _pct(r.disk_percent));
|
||||
}
|
||||
if (r.cpu_percent !== null) {
|
||||
_setBar('mon-cpu-bar', r.cpu_percent);
|
||||
_setText('mon-cpu-pct', _pct(r.cpu_percent));
|
||||
}
|
||||
|
||||
var ollamaBadge = document.getElementById('mon-ollama-badge');
|
||||
ollamaBadge.textContent = r.ollama_reachable ? 'Online' : 'Offline';
|
||||
ollamaBadge.style.color = r.ollama_reachable ? 'var(--green)' : 'var(--red)';
|
||||
}
|
||||
|
||||
function renderEconomy(e) {
|
||||
_setText('mon-balance', e.balance_sats);
|
||||
_setText('mon-earned', e.earned_sats);
|
||||
_setText('mon-spent', e.spent_sats);
|
||||
_setText('mon-injections', e.injection_count);
|
||||
_setText('mon-tx-count', e.tx_count);
|
||||
_setText('mon-auction', e.auction_active ? 'Active' : 'None');
|
||||
}
|
||||
|
||||
function renderStream(s) {
|
||||
var badge = document.getElementById('mon-stream-badge');
|
||||
if (s.live) {
|
||||
badge.textContent = 'LIVE';
|
||||
badge.className = 'badge badge-success';
|
||||
} else {
|
||||
badge.textContent = 'Offline';
|
||||
badge.className = 'badge badge-danger';
|
||||
}
|
||||
_setText('mon-viewers', s.viewer_count);
|
||||
_setText('mon-bitrate', s.bitrate_kbps);
|
||||
_setText('mon-stream-uptime', _uptime(s.uptime_seconds));
|
||||
_setText('mon-stream-title', s.title || '—');
|
||||
}
|
||||
|
||||
function renderPipeline(p) {
|
||||
var badge = document.getElementById('mon-pipeline-badge');
|
||||
badge.textContent = p.pipeline_healthy ? 'Healthy' : 'Degraded';
|
||||
badge.className = p.pipeline_healthy ? 'badge badge-success' : 'badge badge-warning';
|
||||
_setText('mon-highlights', p.highlight_count);
|
||||
_setText('mon-clips', p.clip_count);
|
||||
if (p.last_episode) {
|
||||
var wrap = document.getElementById('mon-last-episode-wrap');
|
||||
wrap.style.display = '';
|
||||
_setText('mon-last-episode', p.last_episode);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
// Poll /monitoring/status
|
||||
// -----------------------------------------------------------------------
|
||||
async function pollMonitoring() {
|
||||
try {
|
||||
var resp = await fetch('/monitoring/status');
|
||||
if (!resp.ok) throw new Error('HTTP ' + resp.status);
|
||||
var data = await resp.json();
|
||||
|
||||
// Overall badge
|
||||
var overall = document.getElementById('mon-overall-badge');
|
||||
var alertCount = (data.alerts || []).length;
|
||||
if (alertCount === 0) {
|
||||
overall.textContent = 'All Systems Nominal';
|
||||
overall.className = 'badge badge-success';
|
||||
} else {
|
||||
var critical = (data.alerts || []).filter(function(a) { return a.level === 'critical'; });
|
||||
overall.textContent = critical.length > 0 ? 'Critical Issues' : 'Warnings';
|
||||
overall.className = critical.length > 0 ? 'badge badge-danger' : 'badge badge-warning';
|
||||
}
|
||||
|
||||
// Uptime
|
||||
_setText('mon-uptime', _uptime(data.uptime_seconds));
|
||||
_setText('mon-agents-count', (data.agents || []).length);
|
||||
|
||||
// Last updated
|
||||
var updEl = document.getElementById('mon-last-updated');
|
||||
if (updEl) updEl.textContent = 'Updated ' + new Date().toLocaleTimeString();
|
||||
|
||||
// Panels
|
||||
renderAgents(data.agents || []);
|
||||
renderAlerts(data.alerts || []);
|
||||
if (data.resources) renderResources(data.resources);
|
||||
if (data.economy) renderEconomy(data.economy);
|
||||
if (data.stream) renderStream(data.stream);
|
||||
if (data.pipeline) renderPipeline(data.pipeline);
|
||||
|
||||
} catch (err) {
|
||||
console.error('Monitoring poll failed:', err);
|
||||
var overall = document.getElementById('mon-overall-badge');
|
||||
overall.textContent = 'Poll Error';
|
||||
overall.className = 'badge badge-danger';
|
||||
}
|
||||
}
|
||||
|
||||
// Start immediately, then every 10 s
|
||||
pollMonitoring();
|
||||
setInterval(pollMonitoring, 10000);
|
||||
</script>
|
||||
{% endblock %}
|
||||
@@ -2785,3 +2785,120 @@
|
||||
color: var(--text-bright);
|
||||
word-break: break-all;
|
||||
}
|
||||
|
||||
/* =========================================================
|
||||
Monitoring Dashboard — #862
|
||||
========================================================= */
|
||||
|
||||
.mon-last-updated {
|
||||
font-size: 0.7rem;
|
||||
color: var(--text-dim);
|
||||
letter-spacing: 0.04em;
|
||||
}
|
||||
|
||||
/* Agent rows */
|
||||
.mon-agent-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.75rem;
|
||||
padding: 0.5rem 0.25rem;
|
||||
border-bottom: 1px solid var(--border);
|
||||
font-size: 0.82rem;
|
||||
}
|
||||
.mon-agent-row:last-child { border-bottom: none; }
|
||||
|
||||
.mon-agent-dot {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
border-radius: 50%;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.mon-agent-name { font-weight: 700; color: var(--text-bright); min-width: 7rem; }
|
||||
.mon-agent-model { color: var(--text-dim); min-width: 8rem; }
|
||||
.mon-agent-status {
|
||||
font-size: 0.72rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.06em;
|
||||
color: var(--green);
|
||||
min-width: 4rem;
|
||||
}
|
||||
.mon-agent-action { color: var(--text-dim); font-style: italic; }
|
||||
|
||||
/* Resource progress bars */
|
||||
.mon-resource-bars {
|
||||
margin-top: 0.75rem;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
.mon-bar-row {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.75rem;
|
||||
}
|
||||
.mon-bar-label {
|
||||
min-width: 2.8rem;
|
||||
font-size: 0.68rem;
|
||||
font-weight: 700;
|
||||
letter-spacing: 0.06em;
|
||||
color: var(--text-dim);
|
||||
text-transform: uppercase;
|
||||
}
|
||||
.mon-bar-track {
|
||||
flex: 1;
|
||||
height: 6px;
|
||||
background: var(--bg-card);
|
||||
border-radius: 3px;
|
||||
overflow: hidden;
|
||||
border: 1px solid var(--border);
|
||||
}
|
||||
.mon-bar-fill {
|
||||
height: 100%;
|
||||
background: var(--green);
|
||||
border-radius: 3px;
|
||||
transition: width 0.4s ease, background 0.4s ease;
|
||||
}
|
||||
.mon-bar-pct {
|
||||
min-width: 2.5rem;
|
||||
text-align: right;
|
||||
color: var(--text-dim);
|
||||
font-size: 0.7rem;
|
||||
}
|
||||
|
||||
/* Alert items */
|
||||
.mon-alert-item {
|
||||
padding: 0.5rem 0.75rem;
|
||||
border-left: 3px solid var(--amber);
|
||||
background: rgba(255,179,0,0.06);
|
||||
margin-bottom: 0.4rem;
|
||||
border-radius: 0 3px 3px 0;
|
||||
font-size: 0.82rem;
|
||||
}
|
||||
.mon-alert-item.mon-alert-critical {
|
||||
border-left-color: var(--red);
|
||||
background: rgba(255,59,59,0.06);
|
||||
}
|
||||
.mon-alert-item.mon-alert-info {
|
||||
border-left-color: var(--green);
|
||||
background: rgba(0,255,136,0.05);
|
||||
}
|
||||
.mon-alert-detail { color: var(--text-dim); }
|
||||
|
||||
/* Stream title truncation */
|
||||
.mon-stream-title {
|
||||
font-size: 0.75rem;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
max-width: 10rem;
|
||||
}
|
||||
|
||||
/* Last episode label */
|
||||
.mon-last-episode {
|
||||
margin-top: 0.75rem;
|
||||
font-size: 0.78rem;
|
||||
color: var(--text-dim);
|
||||
padding-top: 0.5rem;
|
||||
border-top: 1px solid var(--border);
|
||||
}
|
||||
|
||||
95
tests/dashboard/test_monitoring.py
Normal file
95
tests/dashboard/test_monitoring.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Tests for the real-time monitoring dashboard routes. Refs: #862"""
|
||||
|
||||
|
||||
class TestMonitoringPage:
|
||||
"""Tests for the monitoring dashboard HTML page."""
|
||||
|
||||
def test_monitoring_page_returns_200(self, client):
|
||||
response = client.get("/monitoring")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_monitoring_page_contains_key_headings(self, client):
|
||||
response = client.get("/monitoring")
|
||||
assert response.status_code == 200
|
||||
body = response.text
|
||||
assert "Real-Time Monitoring" in body
|
||||
assert "Agent Status" in body
|
||||
assert "System Resources" in body
|
||||
assert "Economy" in body
|
||||
assert "Stream Health" in body
|
||||
assert "Content Pipeline" in body
|
||||
|
||||
|
||||
class TestMonitoringStatusEndpoint:
|
||||
"""Tests for /monitoring/status JSON endpoint."""
|
||||
|
||||
def test_status_returns_200(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_status_has_required_keys(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
for key in ("timestamp", "uptime_seconds", "agents", "resources", "economy", "stream", "pipeline", "alerts"):
|
||||
assert key in data, f"Missing key: {key}"
|
||||
|
||||
def test_agents_is_list(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
data = response.json()
|
||||
assert isinstance(data["agents"], list)
|
||||
|
||||
def test_alerts_is_list(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
data = response.json()
|
||||
assert isinstance(data["alerts"], list)
|
||||
|
||||
def test_resources_has_expected_fields(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
data = response.json()
|
||||
resources = data["resources"]
|
||||
for field in ("disk_percent", "disk_free_gb", "ollama_reachable", "loaded_models", "warnings"):
|
||||
assert field in resources, f"Missing resource field: {field}"
|
||||
|
||||
def test_economy_has_expected_fields(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
data = response.json()
|
||||
economy = data["economy"]
|
||||
for field in ("balance_sats", "earned_sats", "spent_sats", "tx_count"):
|
||||
assert field in economy, f"Missing economy field: {field}"
|
||||
|
||||
def test_stream_has_expected_fields(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
data = response.json()
|
||||
stream = data["stream"]
|
||||
for field in ("live", "viewer_count", "bitrate_kbps", "uptime_seconds"):
|
||||
assert field in stream, f"Missing stream field: {field}"
|
||||
|
||||
def test_uptime_is_non_negative(self, client):
|
||||
response = client.get("/monitoring/status")
|
||||
data = response.json()
|
||||
assert data["uptime_seconds"] >= 0
|
||||
|
||||
|
||||
class TestMonitoringAlertsEndpoint:
|
||||
"""Tests for /monitoring/alerts JSON endpoint."""
|
||||
|
||||
def test_alerts_returns_200(self, client):
|
||||
response = client.get("/monitoring/alerts")
|
||||
assert response.status_code == 200
|
||||
|
||||
def test_alerts_has_alerts_and_count(self, client):
|
||||
response = client.get("/monitoring/alerts")
|
||||
data = response.json()
|
||||
assert "alerts" in data
|
||||
assert "count" in data
|
||||
assert isinstance(data["alerts"], list)
|
||||
assert data["count"] == len(data["alerts"])
|
||||
|
||||
def test_alert_items_have_level_and_title(self, client):
|
||||
response = client.get("/monitoring/alerts")
|
||||
data = response.json()
|
||||
for alert in data["alerts"]:
|
||||
assert "level" in alert
|
||||
assert "title" in alert
|
||||
assert alert["level"] in ("info", "warning", "critical")
|
||||
Reference in New Issue
Block a user