ops: source-control nightly burn automation
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -8,3 +8,7 @@
|
||||
*.db-wal
|
||||
*.db-shm
|
||||
__pycache__/
|
||||
|
||||
# Logs and runtime churn
|
||||
logs/
|
||||
*.log
|
||||
|
||||
52
bin/burn-cycle-deadman.sh
Executable file
52
bin/burn-cycle-deadman.sh
Executable file
@@ -0,0 +1,52 @@
|
||||
#!/usr/bin/env bash
|
||||
# burn-cycle-deadman.sh — Dead-man switch for burn mode cron jobs
|
||||
# Run after each burn cycle to detect silent failures.
|
||||
# Alert if cron ran but no log/heartbeat was produced.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
LOG_DIR="$HOME/.hermes/burn-logs"
|
||||
ALERT_FILE="${LOG_DIR}/ALERT.log"
|
||||
STATUS_FILE="${LOG_DIR}/deadman-status.log"
|
||||
MAIN_LOG="${LOG_DIR}/timmy.log"
|
||||
HEARTBEAT_FILE="${LOG_DIR}/bounded-burn-heartbeat.txt"
|
||||
|
||||
# Bound the allowed silence. The overnight burn runs every 15m.
|
||||
MAX_SILENT_MINS=120
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
last_log_mod=0
|
||||
last_heartbeat_mod=0
|
||||
if [ -f "$MAIN_LOG" ]; then
|
||||
last_log_mod=$(stat -f %m "$MAIN_LOG" 2>/dev/null || stat -c %Y "$MAIN_LOG" 2>/dev/null || echo "0")
|
||||
fi
|
||||
if [ -f "$HEARTBEAT_FILE" ]; then
|
||||
last_heartbeat_mod=$(stat -f %m "$HEARTBEAT_FILE" 2>/dev/null || stat -c %Y "$HEARTBEAT_FILE" 2>/dev/null || echo "0")
|
||||
fi
|
||||
|
||||
latest_mod=$last_log_mod
|
||||
if [ "$last_heartbeat_mod" -gt "$latest_mod" ]; then
|
||||
latest_mod=$last_heartbeat_mod
|
||||
fi
|
||||
|
||||
if [ "$latest_mod" -eq 0 ]; then
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ALERT: no burn proof files exist (timmy.log or bounded-burn-heartbeat.txt)" >> "$ALERT_FILE"
|
||||
echo "DEAD" > "$STATUS_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
now=$(date +%s)
|
||||
gap_secs=$((now - latest_mod))
|
||||
gap_mins=$((gap_secs / 60))
|
||||
|
||||
if [ "$gap_secs" -gt "$((MAX_SILENT_MINS * 60))" ]; then
|
||||
last_update=$(date -r "$latest_mod" '+%Y-%m-%d %H:%M:%S' 2>/dev/null || date '+%Y-%m-%d %H:%M:%S')
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] ALERT: No burn proof output for ${gap_mins}m (threshold: ${MAX_SILENT_MINS}m). Last update: ${last_update}" >> "$ALERT_FILE"
|
||||
echo "ALERT:${gap_mins}" > "$STATUS_FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "[$(date '+%Y-%m-%d %H:%M:%S')] OK: Burn proof updated ${gap_mins}m ago (threshold: ${MAX_SILENT_MINS}m)" >> "$STATUS_FILE"
|
||||
echo "OK:${gap_mins}" > "$STATUS_FILE"
|
||||
exit 0
|
||||
177
bin/morning-report-compiler.py
Executable file
177
bin/morning-report-compiler.py
Executable file
@@ -0,0 +1,177 @@
|
||||
#!/usr/bin/env python3
|
||||
"""morning-report-compiler.py — Aggregate burn-logs into a raw overnight brief.
|
||||
Runs at 6 AM via cron / manual trigger.
|
||||
|
||||
Note: this compiler writes the raw cycle brief.
|
||||
The delivery cron can reformat that artifact into a phone-readable report.
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
HERMES_HOME = Path.home() / ".hermes"
|
||||
BURN_LOGS = HERMES_HOME / "burn-logs"
|
||||
ALERT_LOG = BURN_LOGS / "ALERT.log"
|
||||
ALERT_STATUS_FILE = BURN_LOGS / "deadman-status.log"
|
||||
CYCLE_HEADER_RE = re.compile(r"=== BURN CYCLE:?\s*(.+?)\s*===")
|
||||
|
||||
|
||||
def parse_cycle_timestamp(raw: str) -> datetime | None:
|
||||
match = re.search(r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2})", raw)
|
||||
if not match:
|
||||
return None
|
||||
return datetime.strptime(match.group(1), "%Y-%m-%d %H:%M")
|
||||
|
||||
|
||||
def extract_repo_lines(block: str) -> list[dict]:
|
||||
repos = []
|
||||
section = re.search(r"REPOS SURVEYED:\s*\n((?:\s*-\s+.*\n?)*)", block)
|
||||
if not section:
|
||||
return repos
|
||||
for line in section.group(1).splitlines():
|
||||
line = line.strip()
|
||||
if not line.startswith("-"):
|
||||
continue
|
||||
body = line.lstrip("- ").strip()
|
||||
if ":" in body:
|
||||
name, status = body.split(":", 1)
|
||||
repos.append({"name": name.strip(), "status": status.strip()})
|
||||
else:
|
||||
repos.append({"name": body, "status": ""})
|
||||
return repos
|
||||
|
||||
|
||||
def extract_next_tasks(block: str) -> list[str]:
|
||||
match = re.search(r"NEXT(?: CYCLE TARGET| TARGET| cycle targets)?\s*:\s*\n((?:\s*-\s+.*\n?)*)", block, re.IGNORECASE)
|
||||
if not match:
|
||||
return []
|
||||
tasks = []
|
||||
for line in match.group(1).splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("-"):
|
||||
tasks.append(line.lstrip("- ").strip())
|
||||
return tasks
|
||||
|
||||
|
||||
def find_cycles(log_path: Path) -> list[dict]:
|
||||
if not log_path.exists():
|
||||
return []
|
||||
|
||||
text = log_path.read_text()
|
||||
headers = list(CYCLE_HEADER_RE.finditer(text))
|
||||
cycles = []
|
||||
|
||||
for idx, match in enumerate(headers):
|
||||
raw_timestamp = match.group(1).strip()
|
||||
parsed = parse_cycle_timestamp(raw_timestamp)
|
||||
if parsed is None:
|
||||
continue
|
||||
|
||||
start = match.start()
|
||||
end = headers[idx + 1].start() if idx + 1 < len(headers) else len(text)
|
||||
block = text[start:end]
|
||||
cycles.append(
|
||||
{
|
||||
"timestamp": raw_timestamp,
|
||||
"parsed_at": parsed,
|
||||
"repos": extract_repo_lines(block),
|
||||
"next_tasks": extract_next_tasks(block),
|
||||
}
|
||||
)
|
||||
|
||||
return cycles
|
||||
|
||||
|
||||
def get_alerts(hours: int = 12) -> list[str]:
|
||||
if not ALERT_LOG.exists():
|
||||
return []
|
||||
|
||||
cutoff = datetime.now() - timedelta(hours=hours)
|
||||
alerts = []
|
||||
for line in ALERT_LOG.read_text().splitlines():
|
||||
if not line.startswith("["):
|
||||
continue
|
||||
ts_match = re.match(r"\[([^\]]+)\]", line)
|
||||
if not ts_match:
|
||||
continue
|
||||
try:
|
||||
ts = datetime.strptime(ts_match.group(1), "%Y-%m-%d %H:%M:%S")
|
||||
except ValueError:
|
||||
continue
|
||||
if ts >= cutoff:
|
||||
alerts.append(line)
|
||||
return alerts
|
||||
|
||||
|
||||
def build_report(cycles: list[dict], alerts: list[str], hours: int = 12) -> str:
|
||||
now = datetime.now().strftime("%Y-%m-%d %H:%M")
|
||||
lines = [
|
||||
f"# Burn Mode Daily Brief — {now}",
|
||||
f"## Period: Last {hours} hours",
|
||||
"",
|
||||
"## Overview",
|
||||
f"- Total cycles: {len(cycles)}",
|
||||
f"- Alerts raised: {len(alerts)}",
|
||||
"",
|
||||
]
|
||||
|
||||
if alerts:
|
||||
lines.append("## Alerts")
|
||||
for alert in alerts[-5:]:
|
||||
lines.append(f"⚠️ {alert}")
|
||||
lines.append("")
|
||||
|
||||
if cycles:
|
||||
lines.append("## Cycles")
|
||||
lines.append("")
|
||||
for cycle in cycles[:12]:
|
||||
lines.append(f"### Cycle: {cycle['timestamp']}")
|
||||
if cycle["repos"]:
|
||||
for repo in cycle["repos"]:
|
||||
status = f": {repo['status']}" if repo["status"] else ""
|
||||
lines.append(f"- **{repo['name']}**{status}")
|
||||
if cycle["next_tasks"]:
|
||||
lines.append("")
|
||||
lines.append("**Next cycle targets:**")
|
||||
for task in cycle["next_tasks"][:5]:
|
||||
lines.append(f"- {task}")
|
||||
lines.append("")
|
||||
else:
|
||||
lines.append("No parseable burn-cycle activity in the reporting period.")
|
||||
lines.append("")
|
||||
|
||||
lines.append("---")
|
||||
lines.append(f"*Generated by morning-report-compiler.py at {now}*")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
hours = int(sys.argv[1]) if len(sys.argv) > 1 else 12
|
||||
cutoff = datetime.now() - timedelta(hours=hours)
|
||||
|
||||
logs = sorted(BURN_LOGS.glob("*.log"), key=lambda p: p.stat().st_mtime, reverse=True)
|
||||
all_cycles = []
|
||||
for log in logs:
|
||||
all_cycles.extend(find_cycles(log))
|
||||
|
||||
recent = [cycle for cycle in all_cycles if cycle["parsed_at"] >= cutoff]
|
||||
recent.sort(key=lambda cycle: cycle["parsed_at"], reverse=True)
|
||||
|
||||
alerts = get_alerts(hours)
|
||||
deadman_status = ALERT_STATUS_FILE.read_text().strip() if ALERT_STATUS_FILE.exists() else ""
|
||||
report = build_report(recent, alerts, hours)
|
||||
|
||||
report_path = BURN_LOGS / f"morning-report-{datetime.now().strftime('%Y-%m-%d-%H%M')}.md"
|
||||
report_path.write_text(report)
|
||||
|
||||
print(f"Morning report saved: {report_path}")
|
||||
print(f"Cycles found: {len(recent)}")
|
||||
print(f"Alerts found: {len(alerts)}")
|
||||
print(f"Dead-man status: {deadman_status}")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
@@ -39,7 +39,8 @@ Rule:
|
||||
|
||||
### A. launchd-loaded automations
|
||||
|
||||
These are loaded right now according to `launchctl list`.
|
||||
These are loaded right now according to `launchctl list` after the 2026-04-04 phase-2 cleanup.
|
||||
The only Timmy-specific launchd jobs still loaded are the ones below.
|
||||
|
||||
#### 1. ai.hermes.gateway
|
||||
- Plist: ~/Library/LaunchAgents/ai.hermes.gateway.plist
|
||||
@@ -91,6 +92,25 @@ launchctl bootstrap gui/$(id -u) ~/Library/LaunchAgents/ai.hermes.gateway.plist
|
||||
- Old-state risk:
|
||||
- long-lived gateway survives toolchain assumptions and keeps accepting work even if upstream routing changed
|
||||
|
||||
#### 3a. ai.hermes.gateway-bezalel
|
||||
- Plist: ~/Library/LaunchAgents/ai.hermes.gateway-bezalel.plist
|
||||
- Command: Hermes gateway under the Bezalel profile
|
||||
- HERMES_HOME: `~/.hermes/profiles/bezalel`
|
||||
- Logs:
|
||||
- `~/.hermes/profiles/bezalel/logs/gateway.log`
|
||||
- `~/.hermes/profiles/bezalel/logs/gateway.error.log`
|
||||
- KeepAlive: yes
|
||||
- RunAtLoad: yes
|
||||
- Old-state risk:
|
||||
- Bezalel can keep reviving a broken provider/auth chain unless the profile itself is repaired
|
||||
|
||||
#### 3b. ai.timmy.codeclaw-qwen-heartbeat
|
||||
- Plist: ~/Library/LaunchAgents/ai.timmy.codeclaw-qwen-heartbeat.plist
|
||||
- Purpose: monitor/revive the CodeClaw Qwen lane
|
||||
- Old-state risk:
|
||||
- can resurrect a side lane whose model/provider assumptions no longer match current truth
|
||||
- should be audited any time CodeClaw routing changes
|
||||
|
||||
#### 4. ai.timmy.kimi-heartbeat
|
||||
- Plist: ~/Library/LaunchAgents/ai.timmy.kimi-heartbeat.plist
|
||||
- Command: `/bin/bash ~/.timmy/uniwizard/kimi-heartbeat.sh`
|
||||
@@ -137,31 +157,41 @@ rm -f /tmp/kimi-heartbeat.lock
|
||||
- any watchdog can resurrect a loop you meant to leave dead
|
||||
- this is the first place to check when a loop "comes back"
|
||||
|
||||
### B. quarantined legacy launch agents
|
||||
|
||||
These were moved out of `~/Library/LaunchAgents` on 2026-04-04 to:
|
||||
`~/Library/LaunchAgents.quarantine/timmy-legacy-20260404/`
|
||||
|
||||
#### 6. com.timmy.dashboard-backend
|
||||
- Plist: ~/Library/LaunchAgents/com.timmy.dashboard-backend.plist
|
||||
- Command: uvicorn `dashboard.app:app`
|
||||
- Working directory: `~/worktrees/kimi-repo`
|
||||
- Port: 8100
|
||||
- Logs: `~/.hermes/logs/dashboard-backend.log`
|
||||
- KeepAlive: yes
|
||||
- RunAtLoad: yes
|
||||
- Old-state risk:
|
||||
- this serves code from a specific worktree, not from current repo truth in the abstract
|
||||
- if `~/worktrees/kimi-repo` is stale, launchd will faithfully keep serving stale code
|
||||
- Former plist: `com.timmy.dashboard-backend.plist`
|
||||
- Former command: uvicorn `dashboard.app:app`
|
||||
- Former working directory: `~/worktrees/kimi-repo`
|
||||
- Quarantine reason:
|
||||
- served code from a specific stale worktree
|
||||
- could revive old backend state by launchd KeepAlive alone
|
||||
|
||||
#### 7. com.timmy.matrix-frontend
|
||||
- Plist: ~/Library/LaunchAgents/com.timmy.matrix-frontend.plist
|
||||
- Command: `npx vite --host`
|
||||
- Working directory: `~/worktrees/the-matrix`
|
||||
- Logs: `~/.hermes/logs/matrix-frontend.log`
|
||||
- KeepAlive: yes
|
||||
- RunAtLoad: yes
|
||||
- Old-state risk:
|
||||
- HIGH
|
||||
- this still points at `~/worktrees/the-matrix`, even though the live 3D world work moved to `Timmy_Foundation/the-nexus`
|
||||
- if this is left loaded, it can revive the old frontend lineage
|
||||
- Former plist: `com.timmy.matrix-frontend.plist`
|
||||
- Former command: `npx vite --host`
|
||||
- Former working directory: `~/worktrees/the-matrix`
|
||||
- Quarantine reason:
|
||||
- pointed at the old `the-matrix` lineage instead of current nexus truth
|
||||
- could revive a stale frontend every login
|
||||
|
||||
### B. running now but NOT launchd-managed
|
||||
#### 8. ai.hermes.startup
|
||||
- Former plist: `ai.hermes.startup.plist`
|
||||
- Former command: `~/.hermes/bin/hermes-startup.sh`
|
||||
- Quarantine reason:
|
||||
- startup path still expected missing `timmy-tmux.sh`
|
||||
- could recreate old webhook/tmux assumptions at login
|
||||
|
||||
#### 9. com.timmy.tick
|
||||
- Former plist: `com.timmy.tick.plist`
|
||||
- Former command: `/Users/apayne/Timmy-time-dashboard/deploy/timmy-tick-mac.sh`
|
||||
- Quarantine reason:
|
||||
- pure dashboard-era legacy path
|
||||
|
||||
### C. running now but NOT launchd-managed
|
||||
|
||||
These are live processes, but not currently represented by a loaded launchd plist.
|
||||
They can still persist because they were started with `nohup` or by other parent scripts.
|
||||
@@ -207,7 +237,7 @@ printf '{}\n' > ~/.hermes/logs/gemini-active.json
|
||||
- can repopulate agent queues even after you thought they were cleared
|
||||
- not represented in timmy-config/bin yet as of this audit
|
||||
|
||||
### C. Hermes cron automations
|
||||
### D. Hermes cron automations
|
||||
|
||||
Current cron inventory from `cronjob(list, include_disabled=true)`:
|
||||
|
||||
@@ -224,27 +254,11 @@ Old-state risk:
|
||||
- paused crons are not dead forever; they are resumable state
|
||||
- LLM-wrapped crons can revive old routing/model assumptions if resumed blindly
|
||||
|
||||
### D. file exists but NOT currently loaded
|
||||
### E. file exists but NOT currently loaded
|
||||
|
||||
These are the ones most likely to surprise us later because they still exist and point at old realities.
|
||||
|
||||
#### 10. ai.hermes.startup
|
||||
- Plist: `~/Library/LaunchAgents/ai.hermes.startup.plist`
|
||||
- Points to: `~/.hermes/bin/hermes-startup.sh`
|
||||
- Not loaded in launchctl at audit time
|
||||
- High-risk notes:
|
||||
- startup script still expects `~/.hermes/bin/timmy-tmux.sh`
|
||||
- that file is MISSING at audit time
|
||||
- script also tries to start webhook listener and the old `timmy-loop` tmux world
|
||||
- This is a dormant old-state resurrection path
|
||||
|
||||
#### 11. com.timmy.tick
|
||||
- Plist: `~/Library/LaunchAgents/com.timmy.tick.plist`
|
||||
- Points to: `/Users/apayne/Timmy-time-dashboard/deploy/timmy-tick-mac.sh`
|
||||
- Not loaded at audit time
|
||||
- Definitely legacy dashboard-era automation
|
||||
|
||||
#### 12. com.tower.pr-automerge
|
||||
#### 10. com.tower.pr-automerge
|
||||
- Plist: `~/Library/LaunchAgents/com.tower.pr-automerge.plist`
|
||||
- Points to: `/Users/apayne/hermes-config/bin/pr-automerge.sh`
|
||||
- Not loaded at audit time
|
||||
@@ -317,8 +331,6 @@ launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.timmy.claudemax-watchdo
|
||||
launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.hermes.gateway.plist || true
|
||||
launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.hermes.gateway-fenrir.plist || true
|
||||
launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/ai.openclaw.gateway.plist || true
|
||||
launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/com.timmy.dashboard-backend.plist || true
|
||||
launchctl bootout gui/$(id -u) ~/Library/LaunchAgents/com.timmy.matrix-frontend.plist || true
|
||||
```
|
||||
|
||||
2. Kill manual loops
|
||||
@@ -349,10 +361,9 @@ cp ~/.hermes/sessions/sessions.json ~/.hermes/sessions/sessions.json.bak.$(date
|
||||
|
||||
## Current contradictions to fix later
|
||||
|
||||
1. README still describes `bin/` as "NOT deprecated loops" but live runtime still contains revived loop scripts.
|
||||
2. `DEPRECATED.md` says claude-loop/gemini-loop/timmy-orchestrator/claudemax-watchdog were removed, but reality disagrees.
|
||||
3. `com.timmy.matrix-frontend` still points at `~/worktrees/the-matrix` rather than the nexus lineage.
|
||||
4. `ai.hermes.startup` still points at a startup path that expects missing `timmy-tmux.sh`.
|
||||
5. `gemini-loop.sh` and `timmy-orchestrator.sh` are live but not yet mirrored into timmy-config/bin/.
|
||||
1. README and DEPRECATED were corrected on 2026-04-04, but older local clones may still have stale prose.
|
||||
2. The quarantined launch agents now live under `~/Library/LaunchAgents.quarantine/timmy-legacy-20260404/`; if someone moves them back, the old state can return.
|
||||
3. `gemini-loop.sh` and `timmy-orchestrator.sh` are live but not yet mirrored into timmy-config/bin/.
|
||||
4. The open docs PR must be kept clean: do not mix operational script recovery and documentation history on the same branch.
|
||||
|
||||
Until those are reconciled, trust this inventory over older prose.
|
||||
|
||||
133
docs/nightly-burn-mode.md
Normal file
133
docs/nightly-burn-mode.md
Normal file
@@ -0,0 +1,133 @@
|
||||
# Nightly Burn Mode — Canonical Lineup
|
||||
|
||||
Status: active pattern as of 2026-04-06
|
||||
Owner: Timmy
|
||||
Scope: overnight burn automation for the local Mac
|
||||
|
||||
## Canonical overnight lineup
|
||||
|
||||
Keep the overnight stack small, bounded, and proof-bearing.
|
||||
|
||||
1. one bounded burn job
|
||||
2. one dead-man job
|
||||
3. one morning-report job
|
||||
4. one health monitor
|
||||
|
||||
Do not run duplicate burn jobs on the same lane.
|
||||
Do not leave structurally broken jobs enabled overnight.
|
||||
|
||||
## Canonical jobs
|
||||
|
||||
### 1. Burn Mode — Timmy Orchestrator
|
||||
Purpose: one bounded overnight burn cycle every 15 minutes.
|
||||
|
||||
Rules:
|
||||
- no repo cloning
|
||||
- no rebases
|
||||
- no deep repairs
|
||||
- at most one tangible action per cycle
|
||||
- if there is no clear quick win, leave proof of a healthy no-op and stay silent
|
||||
- do not step into Evennia automation from this lane
|
||||
|
||||
Quick-win priority:
|
||||
1. merge one obviously safe PR
|
||||
2. answer one unresolved human comment on a Timmy-touched issue
|
||||
3. leave one stale/unblocked proof-first comment
|
||||
4. otherwise write a healthy no-op heartbeat
|
||||
|
||||
Required proof per non-silent cycle:
|
||||
- what was touched
|
||||
- evidence link(s)
|
||||
- next target
|
||||
|
||||
### 2. Burn Deadman
|
||||
Purpose: detect when the burn lane has gone silent.
|
||||
|
||||
Command:
|
||||
```bash
|
||||
bash ~/.hermes/bin/burn-cycle-deadman.sh
|
||||
```
|
||||
|
||||
Signal source:
|
||||
- `~/.hermes/burn-logs/timmy.log`
|
||||
- `~/.hermes/burn-logs/bounded-burn-heartbeat.txt`
|
||||
|
||||
A healthy no-op cycle must still update one of those proof files, otherwise deadman will false-alert.
|
||||
|
||||
### 3. Morning Report — Burn Mode
|
||||
Purpose: compile the overnight burn into a raw structured brief, then let the delivery cron reformat it into a phone-readable morning report.
|
||||
|
||||
Command:
|
||||
```bash
|
||||
python3 ~/.hermes/bin/morning-report-compiler.py 12
|
||||
```
|
||||
|
||||
Delivery shape (applied by the cron prompt that reads the generated markdown):
|
||||
- Shipped
|
||||
- Failed
|
||||
- Fleet Status
|
||||
- Stakes Cleared
|
||||
- Next 3
|
||||
|
||||
### 4. Health Monitor
|
||||
Purpose: basic local machine health.
|
||||
|
||||
Checks:
|
||||
- Ollama reachability
|
||||
- disk
|
||||
- memory
|
||||
- process count
|
||||
|
||||
## Jobs to keep paused unless repaired
|
||||
|
||||
### Duplicate burn loops
|
||||
If two 15-minute burns point at the same lane, pause one.
|
||||
|
||||
### velocity-engine
|
||||
Pause overnight if it shows any of:
|
||||
- 0 claimed
|
||||
- 0 created
|
||||
- repeated HTTP 422 self-generation failures
|
||||
- KeyError `total_claimed`
|
||||
|
||||
### wolf-eval-cycle
|
||||
Pause overnight if it times out and does not directly help the burn lane.
|
||||
|
||||
## Source-of-truth rule
|
||||
|
||||
Do not leave this pattern as live-only cron state.
|
||||
Repo-truth must include:
|
||||
- the canonical overnight lineup
|
||||
- the deadman script
|
||||
- the morning report compiler
|
||||
- the bounded-burn prompt/rules
|
||||
|
||||
## No-op heartbeat rule
|
||||
|
||||
Bounded overnight burns often find no safe merge and no fresh human comment.
|
||||
That is fine.
|
||||
|
||||
What is not fine:
|
||||
- returning silent with no proof of liveness
|
||||
- letting deadman conclude the lane died when the lane merely had no quick win
|
||||
|
||||
Healthy no-op cycles should update:
|
||||
- `~/.hermes/burn-logs/bounded-burn-heartbeat.txt`
|
||||
|
||||
Recommended contents:
|
||||
- UTC timestamp
|
||||
- repos polled
|
||||
- blocker proof links
|
||||
- note that no low-risk quick win existed
|
||||
|
||||
## Why this pattern won
|
||||
|
||||
Compared with the old sprawling burn loop, the bounded pattern produced:
|
||||
- one real merge when available
|
||||
- multiple proof-first human-comment wins
|
||||
- useful stale/unblocked nudges
|
||||
- much better morning visibility
|
||||
- less falsework
|
||||
|
||||
The goal is not drama.
|
||||
The goal is consistent, bounded, sovereign overnight work.
|
||||
266529
logs/huey.error.log
266529
logs/huey.error.log
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user