From cd0c288e9f5933b2fee2fb8aee6d1a4fa01af0c2 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Tue, 28 Apr 2026 22:51:03 -0400 Subject: [PATCH] =?UTF-8?q?Audit=20cron/launchd/daemon=20=E2=80=94=20remov?= =?UTF-8?q?e=20dead=20jobs=20and=20document=20canonical=20services?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove Triage Heartbeat and PR Review Sweep (dashboard-era dead jobs) - These were paused on 2026-04-04: "Dashboard repo frozen - loops redirected to the-nexus" - Document current canonical fleet services in docs/CANONICAL_SERVICES.md - Update cron/audit-report.json to reflect removal Hard rule compliance: VPS crontabs untouched (per #880) Closes #880 --- cron/audit-report.json | 32 ++------------ cron/jobs.json | 59 +------------------------- docs/CANONICAL_SERVICES.md | 85 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 86 deletions(-) create mode 100644 docs/CANONICAL_SERVICES.md diff --git a/cron/audit-report.json b/cron/audit-report.json index 13869499..fbf3f5c4 100644 --- a/cron/audit-report.json +++ b/cron/audit-report.json @@ -1,42 +1,16 @@ { "audit_time": "2026-04-17T05:34:45.162227+00:00", - "total_jobs": 33, - "hermes_jobs": 8, + "total_jobs": 31, + "hermes_jobs": 6, "crontab_jobs": 25, "summary": { - "healthy": 33, + "healthy": 31, "transient_errors": 0, "systemic_failures": 0 }, "systemic_jobs": [], "transient_jobs": [], "all_jobs": [ - { - "id": "9e0624269ba7", - "name": "Triage Heartbeat", - "schedule": "every 15m", - "state": "paused", - "enabled": false, - "last_status": "ok", - "last_error": null, - "last_run_at": "2026-03-24T15:33:57.749458-04:00", - "category": "healthy", - "reason": "Dashboard repo frozen - loops redirected to the-nexus", - "action": "none \u2014 paused intentionally" - }, - { - "id": "e29eda4a8548", - "name": "PR Review Sweep", - "schedule": "every 30m", - "state": "paused", - "enabled": false, - "last_status": "ok", - "last_error": null, - "last_run_at": "2026-03-24T15:21:42.995715-04:00", - "category": "healthy", - "reason": "Dashboard repo frozen - loops redirected to the-nexus", - "action": "none \u2014 paused intentionally" - }, { "id": "a77a87392582", "name": "Health Monitor", diff --git a/cron/jobs.json b/cron/jobs.json index f08a8ec3..9e641a2d 100644 --- a/cron/jobs.json +++ b/cron/jobs.json @@ -1,61 +1,5 @@ { "jobs": [ - { - "id": "9e0624269ba7", - "name": "Triage Heartbeat", - "prompt": "Scan all Timmy_Foundation/* repos for unassigned issues, auto-assign to appropriate agents based on labels/complexity", - "schedule": { - "kind": "interval", - "minutes": 15, - "display": "every 15m" - }, - "schedule_display": "every 15m", - "repeat": { - "times": null, - "completed": 6 - }, - "enabled": false, - "created_at": "2026-03-24T11:28:46.408551-04:00", - "next_run_at": "2026-03-24T15:48:57.749458-04:00", - "last_run_at": "2026-03-24T15:33:57.749458-04:00", - "last_status": "ok", - "last_error": null, - "deliver": "local", - "origin": null, - "state": "paused", - "paused_at": "2026-03-24T16:23:01.614552-04:00", - "paused_reason": "Dashboard repo frozen - loops redirected to the-nexus", - "skills": [], - "skill": null - }, - { - "id": "e29eda4a8548", - "name": "PR Review Sweep", - "prompt": "Check all Timmy_Foundation/* repos for open PRs, review diffs, merge passing ones, comment on problems", - "schedule": { - "kind": "interval", - "minutes": 30, - "display": "every 30m" - }, - "schedule_display": "every 30m", - "repeat": { - "times": null, - "completed": 2 - }, - "enabled": false, - "created_at": "2026-03-24T11:28:46.408986-04:00", - "next_run_at": "2026-03-24T15:51:42.995715-04:00", - "last_run_at": "2026-03-24T15:21:42.995715-04:00", - "last_status": "ok", - "last_error": null, - "deliver": "local", - "origin": null, - "state": "paused", - "paused_at": "2026-03-24T16:23:02.731437-04:00", - "paused_reason": "Dashboard repo frozen - loops redirected to the-nexus", - "skills": [], - "skill": null - }, { "id": "a77a87392582", "name": "Health Monitor", @@ -108,7 +52,8 @@ "deliver": "local", "origin": null, "skills": [], - "skill": null + "skill": null, + "state": "unknown" }, { "id": "muda-audit-weekly", diff --git a/docs/CANONICAL_SERVICES.md b/docs/CANONICAL_SERVICES.md new file mode 100644 index 00000000..f245639e --- /dev/null +++ b/docs/CANONICAL_SERVICES.md @@ -0,0 +1,85 @@ +# Canonical Fleet Services + +**Last updated:** 2026-04-28 (audit #880) +**Parent:** #478 +**Scope:** Local cron jobs, launchd agents, daemon scripts, and watchdog processes in Timmy's sovereign fleet. + +> This document is the source-of-truth inventory of what services are **intentionally running** and what has been deliberately removed. It is not a live diagnostic — for that, see `docs/automation-inventory.md` (launchd) and `scripts/cron-audit-662.py` (cron health). + +--- + +## Quick state summary + +| Layer | Total | Canonical | Dead / superseded | Action taken | +|-------|-------|-----------|-------------------|--------------| +| Hermes cron jobs | 8 → **6** | 6 | 2 (Triage Heartbeat, PR Review Sweep) | Removed from `cron/jobs.json` | +| VPS crontab jobs | 25 | 25 | 0 | Untouched (per #880 hard rule) | +| launchd agents | 5 (live) | 5 | 3 quarantined in 2026-04-04 cleanup | Documented only | +| daemon/watchdog | see automation-inventory.md | — | — | — | + +--- + +## Hermes cron jobs (source: `cron/jobs.json`) + +These are managed by the Hermes cron system (`~/.hermes/cron/jobs.json`). Jobs marked **REMOVED** have been excised from source control as dead, superseded, or non-canonical. + +| Name | Schedule | Enabled | Owner | Purpose | Status | +|------|----------|---------|-------|---------|--------| +| Health Monitor | every 5m | yes | Ops | Ollama/disk/memory/GPU health check | ✅ Canonical | +| Muda Audit | 0 21 * * 0 (Sun) | yes | Ezra | Weekly fleet audit (`fleet/muda-audit.sh`) | ✅ Canonical | +| Kaizen Retro | daily 07:30 | yes | Ezra | Post-burn retrospective (`scripts/kaizen_retro.py`) | ✅ Canonical | +| Overnight R&D Loop | nightly 22:00 EDT | yes | Research | Deep dive papers, tool-use training data | ✅ Canonical | +| Autonomous Cron Supervisor | every 7m | yes | Timmy | Monitors dev/timmy tmux sessions (`tmux-supervisor`) | ✅ Canonical | +| Hermes Philosophy Loop | every 1440m | no | Timmy | Draft — issues to hermes-agent | ⏸️ Disabled (draft) | +| **Triage Heartbeat** | every 15m | no | **Dashboard** | Scan & auto-assign issues | **❌ REMOVED** — dashboard repo frozen, loops redirected to the-nexus | +| **PR Review Sweep** | every 30m | no | **Dashboard** | Review diffs, merge passing PRs | **❌ REMOVED** — dashboard repo frozen, loops redirected to the-nexus | + +**Removal rationale (issue #880):** Triage Heartbeat and PR Review Sweep were dashboard-era jobs paused on 2026-04-04 with the explicit reason: *"Dashboard repo frozen - loops redirected to the-nexus."* They have been superseded by the-nexus coordinator flows and pose state-rot risk if accidentally re-enabled. They are deleted from `cron/jobs.json`. + +--- + +## VPS crontab jobs + +Per the hard rule in #880, VPS-specific crontab entries are **NOT modified** in this issue. They remain as-is in `cron/vps/*-crontab-backup.txt`. + +**Allegro** (7 jobs) — model download guard, heartbeat daemon, burn-mode loops, dead-man monitor +**Ezra** (8 jobs) — burn-mode, gitea/awareness loops, kt compiler, mempalace nightly, dispatch +**Bezalel** (8 jobs) — nightly watch, act runner daemon, backups, heartbeat, secret guard, ultraplan + +See individual files for accurate listings: +- `cron/vps/allegro-crontab-backup.txt` +- `cron/vps/ezra-crontab-backup.txt` +- `cron/vps/bezalel-crontab-backup.txt` + +--- + +## Launchd agents (macOS local) + +Fully documented in [`docs/automation-inventory.md`](docs/automation-inventory.md#current-live-automations). + +| Name | Plist | Interval | Status | +|------|-------|----------|--------| +| ai.hermes.gateway | `~/Library/LaunchAgents/ai.hermes.gateway.plist` | KeepAlive | ✅ Active | +| ai.hermes.gateway-fenrir | `~/Library/LaunchAgents/ai.hermes.gateway-fenrir.plist` | KeepAlive | ✅ Active | +| ai.timmy.kimi-heartbeat | `~/Library/LaunchAgents/ai.timmy.kimi-heartbeat.plist` | 300s | ✅ Active | +| ai.timmy.claudemax-watchdog | `~/Library/LaunchAgents/ai.timmy.claudemax-watchdog.plist` | 300s | ✅ Active | +| (quarantined legacy) | — | — | ❌ Moved 2026-04-04 | + +--- + +## Daemons / tmux watchdogs + +Long-running autonomous processes managed by launchd or tmux supervisors. Status is not tracked here — see live diagnostics or the automation-inventory for details. + +- `autonomous-cron-supervisor` (Hermes cron job above triggers this) +- `tmux-supervisor` — monitors dev/timmy tmux panes +- `claudemax-watchdog` — watches Claude loop quota +- ` burn-mode` loops on each VPS (via crontab) + +--- + +## Change log + +| Date | Change | By | +|------|--------|-----| +| 2026-04-28 | Removed Triage Heartbeat & PR Review Sweep from `cron/jobs.json` (issue #880) | STEP35 audit |