Compare commits

..

1 Commits

Author SHA1 Message Date
Alexander Whitestone
c7dfb8a5e6 fix: [MONITORING] Integrate Kimi Heartbeat status into Nexus Watchdog (closes #800)
Some checks failed
CI / test (pull_request) Failing after 8s
CI / validate (pull_request) Failing after 12s
2026-04-10 20:16:43 -04:00
5 changed files with 139 additions and 170 deletions

79
app.js
View File

@@ -1,5 +1,3 @@
shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
chdir: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
import * as THREE from 'three';
import { EffectComposer } from 'three/addons/postprocessing/EffectComposer.js';
import { RenderPass } from 'three/addons/postprocessing/RenderPass.js';
@@ -1984,67 +1982,24 @@ function setupControls() {
document.getElementById('chat-quick-actions').addEventListener('click', (e) => {
const btn = e.target.closest('.quick-action-btn');
if (!btn) return;
handleQuickAction(btn.dataset.action);
const action = btn.dataset.action;
switch(action) {
case 'status':
sendChatMessage("Timmy, what is the current system status?");
break;
case 'agents':
sendChatMessage("Timmy, check on all active agents.");
break;
case 'portals':
openPortalAtlas();
break;
case 'help':
sendChatMessage("Timmy, I need assistance with Nexus navigation.");
break;
}
});
// ═══ QUICK ACTION HANDLER ═══
function handleQuickAction(action) {
switch(action) {
case 'status': {
const portalCount = portals.length;
const onlinePortals = portals.filter(p => p.userData && p.userData.status === 'online').length;
const agentCount = agents.length;
const wsState = wsConnected ? 'ONLINE' : 'OFFLINE';
const wsColor = wsConnected ? '#4af0c0' : '#ff4466';
addChatMessage('system', `[SYSTEM STATUS]`);
addChatMessage('timmy', `Nexus operational. ${portalCount} portals registered (${onlinePortals} online). ${agentCount} agent presences active. Hermes WebSocket: ${wsState}. Navigation mode: ${NAV_MODES[navModeIdx].toUpperCase()}. Performance tier: ${performanceTier.toUpperCase()}.`);
break;
}
case 'agents': {
addChatMessage('system', `[AGENT ROSTER]`);
if (agents.length === 0) {
addChatMessage('timmy', 'No active agent presences detected in the Nexus. The thought stream and harness pulse are the primary indicators of system activity.');
} else {
const roster = agents.map(a => `- ${(a.userData && a.userData.name) || a.name || 'Unknown'}: ${(a.userData && a.userData.status) || 'active'}`).join('\n');
addChatMessage('timmy', `Active agents:\n${roster}`);
}
break;
}
case 'portals':
openPortalAtlas();
break;
case 'heartbeat': {
const agentLog = document.getElementById('agent-log-content');
const recentEntries = agentLog ? agentLog.querySelectorAll('.agent-log-entry') : [];
const entryCount = recentEntries.length;
addChatMessage('system', `[HEARTBEAT INSPECTION]`);
addChatMessage('timmy', `Hermes heartbeat ${wsConnected ? 'active' : 'inactive'}. ${entryCount} recent entries in thought stream. WebSocket reconnect timer: ${wsReconnectTimer ? 'active' : 'idle'}. Harness pulse mesh: ${harnessPulseMesh ? 'rendering' : 'standby'}.`);
break;
}
case 'thoughts': {
const agentLog = document.getElementById('agent-log-content');
const entries = agentLog ? Array.from(agentLog.querySelectorAll('.agent-log-entry')).slice(0, 5) : [];
addChatMessage('system', `[THOUGHT STREAM]`);
if (entries.length === 0) {
addChatMessage('timmy', 'The thought stream is quiet. No recent agent entries detected.');
} else {
const summary = entries.map(e => '> ' + e.textContent.trim()).join('\n');
addChatMessage('timmy', `Recent thoughts:\n${summary}`);
}
break;
}
case 'help': {
addChatMessage('system', `[NEXUS HELP]`);
addChatMessage('timmy', `Navigation: WASD to move, mouse to look around.\n` +
`Press V to cycle: Walk / Orbit / Fly mode.\n` +
`Enter to chat. Escape to close overlays.\n` +
`Press F near a portal to enter. Press E near a vision point to read.\n` +
`Press Tab for Portal Atlas.\n` +
`The Batcave Terminal shows system logs. The Workshop Terminal shows tool output.`);
break;
}
}
}
document.getElementById('portal-close-btn').addEventListener('click', closePortalOverlay);
document.getElementById('vision-close-btn').addEventListener('click', closeVisionOverlay);

View File

@@ -60,6 +60,23 @@ If the heartbeat is older than --stale-threshold seconds, the
mind is considered dead even if the process is still running
(e.g., hung on a blocking call).
KIMI HEARTBEAT
==============
The Kimi triage pipeline writes a cron heartbeat file after each run:
/var/run/bezalel/heartbeats/kimi-heartbeat.last
(fallback: ~/.bezalel/heartbeats/kimi-heartbeat.last)
{
"job": "kimi-heartbeat",
"timestamp": 1711843200.0,
"interval_seconds": 900,
"pid": 12345,
"status": "ok"
}
If the heartbeat is stale (>2x declared interval), the watchdog reports
a Kimi Heartbeat failure alongside the other checks.
ZERO DEPENDENCIES
=================
Pure stdlib. No pip installs. Same machine as the nexus.
@@ -104,6 +121,10 @@ DEFAULT_HEARTBEAT_PATH = Path.home() / ".nexus" / "heartbeat.json"
DEFAULT_STALE_THRESHOLD = 300 # 5 minutes without a heartbeat = dead
DEFAULT_INTERVAL = 60 # seconds between checks in watch mode
# Kimi Heartbeat — cron job heartbeat file written by the triage pipeline
KIMI_HEARTBEAT_JOB = "kimi-heartbeat"
KIMI_HEARTBEAT_STALE_MULTIPLIER = 2.0 # stale at 2x declared interval
GITEA_URL = os.environ.get("GITEA_URL", "https://forge.alexanderwhitestone.com")
GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "")
GITEA_REPO = os.environ.get("NEXUS_REPO", "Timmy_Foundation/the-nexus")
@@ -345,6 +366,93 @@ def check_syntax_health() -> CheckResult:
)
def check_kimi_heartbeat(
job: str = KIMI_HEARTBEAT_JOB,
stale_multiplier: float = KIMI_HEARTBEAT_STALE_MULTIPLIER,
) -> CheckResult:
"""Check if the Kimi Heartbeat cron job is alive.
Reads the ``<job>.last`` file from the standard Bezalel heartbeat
directory (``/var/run/bezalel/heartbeats/`` or fallback
``~/.bezalel/heartbeats/``). The file is written atomically by the
cron_heartbeat module after each successful triage pipeline run.
A job is stale when:
``time.time() - timestamp > stale_multiplier * interval_seconds``
(same rule used by ``check_cron_heartbeats.py``).
"""
# Resolve heartbeat directory — same logic as cron_heartbeat._resolve
primary = Path("/var/run/bezalel/heartbeats")
fallback = Path.home() / ".bezalel" / "heartbeats"
env_dir = os.environ.get("BEZALEL_HEARTBEAT_DIR")
if env_dir:
hb_dir = Path(env_dir)
elif primary.exists():
hb_dir = primary
elif fallback.exists():
hb_dir = fallback
else:
return CheckResult(
name="Kimi Heartbeat",
healthy=False,
message="Heartbeat directory not found — no triage pipeline deployed yet",
details={"searched": [str(primary), str(fallback)]},
)
hb_file = hb_dir / f"{job}.last"
if not hb_file.exists():
return CheckResult(
name="Kimi Heartbeat",
healthy=False,
message=f"No heartbeat file at {hb_file} — Kimi triage pipeline has never reported",
details={"path": str(hb_file)},
)
try:
data = json.loads(hb_file.read_text())
except (json.JSONDecodeError, OSError) as e:
return CheckResult(
name="Kimi Heartbeat",
healthy=False,
message=f"Heartbeat file corrupt: {e}",
details={"path": str(hb_file), "error": str(e)},
)
timestamp = float(data.get("timestamp", 0))
interval = int(data.get("interval_seconds", 0))
raw_status = data.get("status", "unknown")
age = time.time() - timestamp
if interval <= 0:
# No declared interval — use raw timestamp age (30 min default)
interval = 1800
threshold = stale_multiplier * interval
is_stale = age > threshold
age_str = f"{int(age)}s" if age < 3600 else f"{int(age // 3600)}h {int((age % 3600) // 60)}m"
interval_str = f"{int(interval)}s" if interval < 3600 else f"{int(interval // 3600)}h {int((interval % 3600) // 60)}m"
if is_stale:
return CheckResult(
name="Kimi Heartbeat",
healthy=False,
message=(
f"Silent for {age_str} "
f"(threshold: {stale_multiplier}x {interval_str} = {int(threshold)}s). "
f"Status: {raw_status}"
),
details=data,
)
return CheckResult(
name="Kimi Heartbeat",
healthy=True,
message=f"Alive — last beat {age_str} ago (interval {interval_str}, status={raw_status})",
details=data,
)
# ── Gitea alerting ───────────────────────────────────────────────────
def _gitea_request(method: str, path: str, data: Optional[dict] = None) -> Any:
@@ -446,6 +554,7 @@ def run_health_checks(
check_mind_process(),
check_heartbeat(heartbeat_path, stale_threshold),
check_syntax_health(),
check_kimi_heartbeat(),
]
return HealthReport(timestamp=time.time(), checks=checks)
@@ -545,6 +654,14 @@ def main():
"--json", action="store_true", dest="output_json",
help="Output results as JSON (for integration with other tools)",
)
parser.add_argument(
"--kimi-job", default=KIMI_HEARTBEAT_JOB,
help=f"Kimi heartbeat job name (default: {KIMI_HEARTBEAT_JOB})",
)
parser.add_argument(
"--kimi-stale-multiplier", type=float, default=KIMI_HEARTBEAT_STALE_MULTIPLIER,
help=f"Kimi heartbeat staleness multiplier (default: {KIMI_HEARTBEAT_STALE_MULTIPLIER})",
)
args = parser.parse_args()

View File

@@ -1,5 +1,3 @@
shell-init: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
chdir: error retrieving current directory: getcwd: cannot access parent directories: No such file or directory
<!DOCTYPE html>
<html lang="en" data-theme="dark">
<head>
@@ -145,39 +143,10 @@ chdir: error retrieving current directory: getcwd: cannot access parent director
</div>
</div>
<div id="chat-quick-actions" class="chat-quick-actions">
<div class="starter-label">STARTER PROMPTS</div>
<div class="starter-grid">
<button class="starter-btn" data-action="heartbeat" title="Check Timmy heartbeat and system health">
<span class="starter-icon"></span>
<span class="starter-text">Inspect Heartbeat</span>
<span class="starter-desc">System health &amp; connectivity</span>
</button>
<button class="starter-btn" data-action="portals" title="Browse the portal atlas">
<span class="starter-icon">🌐</span>
<span class="starter-text">Portal Atlas</span>
<span class="starter-desc">Browse connected worlds</span>
</button>
<button class="starter-btn" data-action="agents" title="Check active agent status">
<span class="starter-icon"></span>
<span class="starter-text">Agent Status</span>
<span class="starter-desc">Who is in the fleet</span>
</button>
<button class="starter-btn" data-action="memory" title="View memory crystals">
<span class="starter-icon"></span>
<span class="starter-text">Memory Crystals</span>
<span class="starter-desc">Inspect stored knowledge</span>
</button>
<button class="starter-btn" data-action="ask" title="Ask Timmy anything">
<span class="starter-icon"></span>
<span class="starter-text">Ask Timmy</span>
<span class="starter-desc">Start a conversation</span>
</button>
<button class="starter-btn" data-action="sovereignty" title="Learn about sovereignty">
<span class="starter-icon"></span>
<span class="starter-text">Sovereignty</span>
<span class="starter-desc">What this space is</span>
</button>
</div>
<button class="quick-action-btn" data-action="status">System Status</button>
<button class="quick-action-btn" data-action="agents">Agent Check</button>
<button class="quick-action-btn" data-action="portals">Portal Atlas</button>
<button class="quick-action-btn" data-action="help">Help</button>
</div>
<div class="chat-input-row">
<input type="text" id="chat-input" class="chat-input" placeholder="Speak to Timmy..." autocomplete="off">

View File

@@ -983,7 +983,7 @@ canvas#nexus-canvas {
.chat-quick-actions {
display: flex;
flex-direction: column;
flex-wrap: wrap;
gap: 6px;
padding: 8px 12px;
border-top: 1px solid var(--color-border);
@@ -991,75 +991,6 @@ canvas#nexus-canvas {
pointer-events: auto;
}
.chat-quick-actions.hidden {
display: none;
}
.starter-label {
font-family: var(--font-display);
font-size: 9px;
letter-spacing: 0.15em;
color: var(--color-primary-dim);
text-transform: uppercase;
padding: 0 2px;
}
.starter-grid {
display: grid;
grid-template-columns: repeat(3, 1fr);
gap: 4px;
}
.starter-btn {
display: flex;
flex-direction: column;
align-items: flex-start;
gap: 1px;
background: rgba(74, 240, 192, 0.06);
border: 1px solid rgba(74, 240, 192, 0.15);
color: var(--color-primary);
font-family: var(--font-body);
padding: 6px 8px;
cursor: pointer;
transition: all var(--transition-ui);
text-align: left;
}
.starter-btn:hover {
background: rgba(74, 240, 192, 0.15);
border-color: var(--color-primary);
color: #fff;
}
.starter-btn:hover .starter-icon {
color: #fff;
}
.starter-btn:active {
transform: scale(0.97);
}
.starter-icon {
font-size: 12px;
color: var(--color-primary);
line-height: 1;
}
.starter-text {
font-size: 10px;
font-weight: 600;
white-space: nowrap;
}
.starter-desc {
font-size: 8px;
color: rgba(74, 240, 192, 0.5);
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
max-width: 100%;
}
/* Add hover effect for MemPalace mining button */
.quick-action-btn:hover {
background: var(--color-primary-dim);
@@ -1205,9 +1136,6 @@ canvas#nexus-canvas {
.hud-location {
font-size: var(--text-xs);
}
.starter-grid {
grid-template-columns: repeat(2, 1fr);
}
}
@media (max-width: 480px) {