From e4b1a197be8275d72f63337e744c46862ba3ca6a Mon Sep 17 00:00:00 2001 From: "Allegro (Burn Mode)" Date: Sun, 5 Apr 2026 21:20:40 +0000 Subject: [PATCH] infra: Allegro self-improvement operational files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Creates the foundational state-tracking and validation infrastructure for Epic #842 (Allegro Self-Improvement). Files added: - allegro-wake-checklist.md — real state check on every wakeup - allegro-lane.md — lane boundaries and empty-lane protocol - allegro-cycle-state.json — crash recovery and multi-cycle tracking - allegro-hands-off-registry.json — 24-hour locks on STOPPED/FINE entities - allegro-failure-log.md — verbal reflection on failures - allegro-handoff-template.md — validated deliverables and context handoffs - burn-mode-validator.py — end-of-cycle scoring script (6 criteria) Sub-issues created: #843 #844 #845 #846 #847 #848 #849 #850 --- fleet/allegro/allegro-cycle-state.json | 15 +++ fleet/allegro/allegro-failure-log.md | 42 ++++++ fleet/allegro/allegro-handoff-template.md | 56 ++++++++ fleet/allegro/allegro-hands-off-registry.json | 18 +++ fleet/allegro/allegro-lane.md | 53 ++++++++ fleet/allegro/allegro-wake-checklist.md | 52 ++++++++ fleet/allegro/burn-mode-validator.py | 121 ++++++++++++++++++ 7 files changed, 357 insertions(+) create mode 100644 fleet/allegro/allegro-cycle-state.json create mode 100644 fleet/allegro/allegro-failure-log.md create mode 100644 fleet/allegro/allegro-handoff-template.md create mode 100644 fleet/allegro/allegro-hands-off-registry.json create mode 100644 fleet/allegro/allegro-lane.md create mode 100644 fleet/allegro/allegro-wake-checklist.md create mode 100755 fleet/allegro/burn-mode-validator.py diff --git a/fleet/allegro/allegro-cycle-state.json b/fleet/allegro/allegro-cycle-state.json new file mode 100644 index 0000000..af270bb --- /dev/null +++ b/fleet/allegro/allegro-cycle-state.json @@ -0,0 +1,15 @@ +{ + "version": 1, + "last_updated": "2026-04-05T21:17:00Z", + "cycles": [ + { + "cycle_id": "init", + "started_at": "2026-04-05T21:17:00Z", + "target": "Epic #842: Create self-improvement infrastructure", + "status": "in_progress", + "last_completed_step": "Created wake checklist and lane definition", + "evidence": "local files: allegro-wake-checklist.md, allegro-lane.md", + "next_step": "Create hands-off registry, failure log, handoff template, validator script" + } + ] +} diff --git a/fleet/allegro/allegro-failure-log.md b/fleet/allegro/allegro-failure-log.md new file mode 100644 index 0000000..c3ac36b --- /dev/null +++ b/fleet/allegro/allegro-failure-log.md @@ -0,0 +1,42 @@ +# Allegro Failure Log +## Verbal Reflection on Failures + +--- + +## Format + +Each entry must include: +- **Timestamp:** When the failure occurred +- **Failure:** What happened +- **Root Cause:** Why it happened +- **Corrective Action:** What I will do differently +- **Verification Date:** When I will confirm the fix is working + +--- + +## Entries + +### 2026-04-05 — Ezra Config Incident +- **Timestamp:** 2026-04-05 (approximate, pre-session) +- **Failure:** Modified Ezra's working configuration after an explicit "Stop" command from the commander. +- **Root Cause:** I did not treat "Stop" as a terminal hard interrupt. I continued reasoning and acting because the task felt incomplete. +- **Corrective Action:** + 1. Implement a pre-tool-check gate: verify no stop command was issued in the last turn. + 2. Log STOP_ACK immediately on receiving "Stop." + 3. Add Ezra config to the hands-off registry with a 24-hour lock. + 4. Inscribe this failure in the burn mode manual so no agent repeats it. +- **Verification Date:** 2026-05-05 (30-day check) + +### 2026-04-05 — "X is fine" Violation +- **Timestamp:** 2026-04-05 (approximate, pre-session) +- **Failure:** Touched a system after being told it was fine. +- **Root Cause:** I interpreted "fine" as "no urgent problems" rather than "do not touch." +- **Corrective Action:** + 1. Any entity marked "fine" or "stopped" goes into the hands-off registry automatically. + 2. Before modifying any config, check the registry. + 3. If in doubt, ask. Do not assume. +- **Verification Date:** 2026-05-05 (30-day check) + +--- + +*New failures are appended at the bottom. The goal is not zero failures. The goal is zero unreflected failures.* diff --git a/fleet/allegro/allegro-handoff-template.md b/fleet/allegro/allegro-handoff-template.md new file mode 100644 index 0000000..d009b66 --- /dev/null +++ b/fleet/allegro/allegro-handoff-template.md @@ -0,0 +1,56 @@ +# Allegro Handoff Template +## Validate Deliverables and Context Handoffs + +--- + +## When to Use + +This template MUST be used for: +- Handing work to another agent +- Passing a task to the commander for decision +- Ending a multi-cycle task +- Any situation where context must survive a transition + +--- + +## Template + +### 1. What Was Done +- [ ] Clear description of completed work +- [ ] At least one evidence link (commit, PR, issue, test output, service log) + +### 2. What Was NOT Done +- [ ] Clear description of incomplete or skipped work +- [ ] Reason for incompletion (blocked, out of scope, timed out, etc.) + +### 3. What the Receiver Needs to Know +- [ ] Dependencies or blockers +- [ ] Risks or warnings +- [ ] Recommended next steps +- [ ] Any credentials, paths, or references needed to continue + +--- + +## Validation Checklist + +Before sending the handoff: +- [ ] Section 1 is non-empty and contains evidence +- [ ] Section 2 is non-empty or explicitly states "Nothing incomplete" +- [ ] Section 3 is non-empty +- [ ] If this is an agent-to-agent handoff, the receiver has been tagged or notified +- [ ] The handoff has been logged in `~/.hermes/burn-logs/allegro.log` + +--- + +## Example + +**What Was Done:** +- Fixed Nostr relay certbot renewal (commit: `abc1234`) +- Restarted `nostr-relay` service and verified wss:// connectivity + +**What Was NOT Done:** +- DNS propagation check to `relay.alexanderwhitestone.com` is pending (can take up to 1 hour) + +**What the Receiver Needs to Know:** +- Certbot now runs on a weekly cron, but monitor the first auto-renewal in 60 days. +- If DNS still fails in 1 hour, check DigitalOcean nameservers, not the VPS. diff --git a/fleet/allegro/allegro-hands-off-registry.json b/fleet/allegro/allegro-hands-off-registry.json new file mode 100644 index 0000000..f542e96 --- /dev/null +++ b/fleet/allegro/allegro-hands-off-registry.json @@ -0,0 +1,18 @@ +{ + "version": 1, + "last_updated": "2026-04-05T21:17:00Z", + "locks": [ + { + "entity": "ezra-config", + "reason": "Stop command issued after Ezra config incident. Explicit 'hands off' from commander.", + "locked_at": "2026-04-05T21:17:00Z", + "expires_at": "2026-04-06T21:17:00Z", + "unlocked_by": null + } + ], + "rules": { + "default_lock_duration_hours": 24, + "auto_extend_on_stop": true, + "require_explicit_unlock": true + } +} diff --git a/fleet/allegro/allegro-lane.md b/fleet/allegro/allegro-lane.md new file mode 100644 index 0000000..6660a7b --- /dev/null +++ b/fleet/allegro/allegro-lane.md @@ -0,0 +1,53 @@ +# Allegro Lane Definition +## Last Updated: 2026-04-05 + +--- + +## Primary Lane: Tempo-and-Dispatch + +I own: +- Issue burndown across the Timmy Foundation org +- Infrastructure monitoring and healing (Nostr relay, Evennia, Gitea, VPS) +- PR workflow automation (merging, triaging, branch cleanup) +- Fleet coordination artifacts (manuals, runbooks, lane definitions) + +## Repositories I Own + +- `Timmy_Foundation/the-nexus` — fleet coordination, docs, runbooks +- `Timmy_Foundation/timmy-config` — infrastructure configuration +- `Timmy_Foundation/hermes-agent` — agent platform (in collaboration with platform team) + +## Lane-Empty Protocol + +If no work exists in my lane for **3 consecutive cycles**: +1. Run the full wake checklist. +2. Verify Gitea has no open issues/PRs for Allegro. +3. Verify infrastructure is green. +4. Verify Lazarus Pit is empty. +5. If still empty, escalate to the commander with: + - "Lane empty for 3 cycles." + - "Options: [expand to X lane with permission] / [deep-dive a known issue] / [stand by]." + - "Awaiting direction." + +Do NOT poach another agent's lane without explicit permission. + +## Agents and Their Lanes (Do Not Poach) + +| Agent | Lane | +|-------|------| +| Ezra | Gateway and messaging platforms | +| Bezalel | Creative tooling and agent workspaces | +| Qin | API integrations and external services | +| Fenrir | Security, red-teaming, hardening | +| Timmy | Father-house, canon keeper | +| Wizard | Evennia MUD, academy, world-building | +| Mackenzie | Human research assistant | + +## Exceptions + +I may cross lanes ONLY if: +- The commander explicitly assigns work outside my lane. +- Another agent is down (Lazarus Pit) and their lane is critical path. +- A PR or issue in another lane is blocking infrastructure I own. + +In all cases, log the crossing in `~/.hermes/burn-logs/allegro.log` with permission evidence. diff --git a/fleet/allegro/allegro-wake-checklist.md b/fleet/allegro/allegro-wake-checklist.md new file mode 100644 index 0000000..ae4134b --- /dev/null +++ b/fleet/allegro/allegro-wake-checklist.md @@ -0,0 +1,52 @@ +# Allegro Wake Checklist +## Milestone 0: Real State Check on Wake + +Check each box before choosing work. Do not skip. Do not fake it. + +--- + +### 1. Read Last Cycle Report +- [ ] Open `~/.hermes/burn-logs/allegro.log` +- [ ] Read the last 10 lines +- [ ] Note: complete / crashed / aborted / blocked + +### 2. Read Cycle State File +- [ ] Open `~/.hermes/allegro-cycle-state.json` +- [ ] If `status` is `in_progress`, resume or abort before starting new work. +- [ ] If `status` is `crashed`, assess partial work and roll forward or revert. + +### 3. Read Hands-Off Registry +- [ ] Open `~/.hermes/allegro-hands-off-registry.json` +- [ ] Verify no locked entities are in your work queue. + +### 4. Check Gitea for Allegro Work +- [ ] Query open issues assigned to `allegro` +- [ ] Query open PRs in repos Allegro owns +- [ ] Note highest-leverage item + +### 5. Check Infrastructure Alerts +- [ ] Nostr relay (`nostr-relay` service status) +- [ ] Evennia MUD (telnet 4000, web 4001) +- [ ] Gitea health (localhost:3000) +- [ ] Disk / cert / backup status + +### 6. Check Lazarus Pit +- [ ] Any downed agents needing recovery? +- [ ] Any fallback inference paths degraded? + +### 7. Choose Work +- [ ] Pick the ONE thing that unblocks the most downstream work. +- [ ] Update `allegro-cycle-state.json` with target and `status: in_progress`. + +--- + +## Log Format + +After completing the checklist, append to `~/.hermes/burn-logs/allegro.log`: + +``` +[YYYY-MM-DD HH:MM UTC] WAKE — State check complete. + Last cycle: [complete|crashed|aborted] + Current target: [issue/PR/service] + Status: in_progress +``` diff --git a/fleet/allegro/burn-mode-validator.py b/fleet/allegro/burn-mode-validator.py new file mode 100755 index 0000000..015631e --- /dev/null +++ b/fleet/allegro/burn-mode-validator.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +Allegro Burn Mode Validator +Scores each cycle across 6 criteria. +Run at the end of every cycle and append the score to the cycle log. +""" + +import json +import os +import sys +from datetime import datetime, timezone + +LOG_PATH = os.path.expanduser("~/.hermes/burn-logs/allegro.log") +STATE_PATH = os.path.expanduser("~/.hermes/allegro-cycle-state.json") +FAILURE_LOG_PATH = os.path.expanduser("~/.hermes/allegro-failure-log.md") + + +def score_cycle(): + now = datetime.now(timezone.utc).isoformat() + scores = { + "state_check_completed": 0, + "tangible_artifact": 0, + "stop_compliance": 1, # Default to 1; docked only if failure detected + "lane_boundary_respect": 1, # Default to 1 + "evidence_attached": 0, + "reflection_logged_if_failure": 1, # Default to 1 + } + + notes = [] + + # 1. State check completed? + if os.path.exists(LOG_PATH): + with open(LOG_PATH, "r") as f: + lines = f.readlines() + if lines: + last_lines = [l for l in lines[-20:] if l.strip()] + for line in last_lines: + if "State check complete" in line or "WAKE" in line: + scores["state_check_completed"] = 1 + break + else: + notes.append("No state check log line found in last 20 log lines.") + else: + notes.append("Cycle log is empty.") + else: + notes.append("Cycle log does not exist.") + + # 2. Tangible artifact? + artifact_found = False + if os.path.exists(STATE_PATH): + try: + with open(STATE_PATH, "r") as f: + state = json.load(f) + cycles = state.get("cycles", []) + if cycles: + last = cycles[-1] + evidence = last.get("evidence", "") + if evidence and evidence.strip(): + artifact_found = True + status = last.get("status", "") + if status == "aborted" and evidence: + artifact_found = True # Documented abort counts + except Exception as e: + notes.append(f"Could not read cycle state: {e}") + if artifact_found: + scores["tangible_artifact"] = 1 + else: + notes.append("No tangible artifact or documented abort found in cycle state.") + + # 3. Stop compliance (check failure log for recent un-reflected stops) + if os.path.exists(FAILURE_LOG_PATH): + with open(FAILURE_LOG_PATH, "r") as f: + content = f.read() + # Heuristic: if failure log mentions stop command and no corrective action verification + # This is a simple check; human audit is the real source of truth + if "Stop command" in content and "Verification Date" in content: + pass # Assume compliance unless new entry added today without reflection + # We default to 1 and rely on manual flagging for now + + # 4. Lane boundary respect — default 1, flagged manually if needed + + # 5. Evidence attached? + if artifact_found: + scores["evidence_attached"] = 1 + else: + notes.append("Evidence missing.") + + # 6. Reflection logged if failure? + # Default 1; if a failure occurred this cycle, manual check required + + total = sum(scores.values()) + max_score = 6 + + result = { + "timestamp": now, + "scores": scores, + "total": total, + "max": max_score, + "notes": notes, + } + + # Append to log + with open(LOG_PATH, "a") as f: + f.write(f"[{now}] VALIDATOR — Score: {total}/{max_score}\n") + for k, v in scores.items(): + f.write(f" {k}: {v}\n") + if notes: + f.write(f" notes: {' | '.join(notes)}\n") + + print(f"Burn mode score: {total}/{max_score}") + if notes: + print("Notes:") + for n in notes: + print(f" - {n}") + + return total + + +if __name__ == "__main__": + score = score_cycle() + sys.exit(0 if score >= 5 else 1)