From afd377af42c6af8b9101ea3f3c2f2e9c50d9469d Mon Sep 17 00:00:00 2001 From: Allegro Date: Thu, 2 Apr 2026 01:05:01 +0000 Subject: [PATCH] Allegro heartbeat for Adagio: 2026-04-02 01:05:01 UTC --- config/agent-registry.json | 48 +++++ docs/agent-tick-protocol.md | 195 +++++++++++++++++++ heartbeats/allegro/20260402_010501.json | 12 ++ heartbeats/allegro/latest.json | 2 +- scripts/agent_tick_submitter.py | 239 ++++++++++++++++++++++++ 5 files changed, 495 insertions(+), 1 deletion(-) create mode 100644 config/agent-registry.json create mode 100644 docs/agent-tick-protocol.md create mode 100644 heartbeats/allegro/20260402_010501.json create mode 100644 scripts/agent_tick_submitter.py diff --git a/config/agent-registry.json b/config/agent-registry.json new file mode 100644 index 0000000..1a9aeb8 --- /dev/null +++ b/config/agent-registry.json @@ -0,0 +1,48 @@ +{ + "version": "1.0", + "last_updated": "2026-04-02T00:00:00Z", + "agents": [ + { + "id": "allegro", + "name": "Allegro", + "role": "tempo-and-dispatch", + "owner": "alexander", + "home_dir": "/root/wizards/allegro", + "config_path": "/root/wizards/allegro/home/config.yaml", + "expected_capabilities": ["telegram", "api_server", "gitea"], + "tick_schedule": "monthly", + "active": true + }, + { + "id": "adagio", + "name": "Adagio", + "role": "breath-and-design", + "owner": "alexander", + "home_dir": "/root/wizards/adagio", + "config_path": "/root/wizards/adagio/home/config.yaml", + "expected_capabilities": ["telegram", "api_server", "gitea"], + "tick_schedule": "monthly", + "active": true + }, + { + "id": "timmy", + "name": "Timmy Time", + "role": "father-house", + "owner": "alexander", + "home_dir": "/root/timmy", + "config_path": "/root/timmy/config.yaml", + "expected_capabilities": ["telegram", "api_server"], + "tick_schedule": "monthly", + "active": true + } + ], + "groups": { + "timmy-time": { + "name": "Timmy Time Core", + "agents": ["allegro", "adagio", "timmy"], + "tick_frequency": "monthly", + "tick_window_hours": 72, + "auto_rca": true + } + } +} diff --git a/docs/agent-tick-protocol.md b/docs/agent-tick-protocol.md new file mode 100644 index 0000000..53fce4e --- /dev/null +++ b/docs/agent-tick-protocol.md @@ -0,0 +1,195 @@ +# Agent Tick Protocol v1.0 + +**Purpose:** Establish baseline health monitoring for all Timmy Time agents with automated RCA on failure. + +**Frequency:** Monthly (1st of each month) + +**Scope:** All agents in Timmy Time group + +--- + +## Protocol Specification + +### What Constitutes a Tick + +Every agent must emit a **Tick Record** containing: + +```json +{ + "agent_id": "allegro", + "agent_name": "Allegro", + "role": "tempo-and-dispatch", + "timestamp": "2026-04-02T00:00:00Z", + "tick_month": "2026-04", + "status": "healthy", + "vitals": { + "gateway_running": true, + "home_directory_accessible": true, + "config_valid": true, + "last_user_interaction": "2026-04-01T23:45:00Z", + "work_items_completed_this_month": 47 + }, + "capabilities": { + "telegram": true, + "api_server": true, + "gitea_access": true + }, + "notes": "" +} +``` + +### Tick Status Values + +| Status | Meaning | Action Required | +|--------|---------|-----------------| +| `healthy` | All systems operational | None | +| `degraded` | Some capabilities impaired | Monitor | +| `critical` | Core functionality compromised | RCA required | +| `offline` | Agent unreachable | RCA + escalation | + +--- + +## Tick Submission + +### Method 1: Direct Commit to household-snapshots + +Agents commit their tick to: +``` +ticks/2026-04/allegro.json +ticks/2026-04/adagio.json +``` + +### Method 2: API Endpoint (Future) + +POST to Evenia world tick with tick payload. + +--- + +## Monitoring & Enforcement + +### Monthly Tick Collection Window + +- **Opens:** 1st of month at 00:00 UTC +- **Closes:** 3rd of month at 23:59 UTC +- **Grace Period:** 72 hours + +### Automated Checks + +1. **Tick Presence Check** (4th of month) + - Verify all registered agents have submitted ticks + - Missing ticks → Gitea issue created + +2. **Status Validation** (4th of month) + - Check all submitted ticks for `critical` or `offline` status + - Failed status → Gitea issue created + +3. **RCA Auto-Generation** (4th of month) + - Issues created with RCA template pre-filled + - Assigned to agent owner + - Due date: 7 days + +--- + +## RCA Template + +When an agent fails to tick or reports critical status, this template is used: + +```markdown +## Agent Health Failure: [AGENT_ID] + +**Detected:** [DATE] +**Agent:** [AGENT_NAME] +**Failure Type:** [MISSING_TICK | CRITICAL_STATUS | OFFLINE] + +### Expected Behavior +Agent should emit monthly tick within 72-hour window. + +### Actual Behavior +- Tick Status: [STATUS] +- Last Known Good: [DATE] +- Capabilities Lost: [LIST] + +### Root Cause Analysis Required + +Please investigate and document: + +1. **What happened?** + - Last successful operation + - Error logs (if any) + - System state at failure + +2. **Why did it happen?** + - Configuration drift + - Resource exhaustion + - External dependency failure + - Code regression + +3. **How do we prevent recurrence?** + - Monitoring improvements + - Automated recovery + - Alert tuning + +4. **Recovery steps taken** + - Actions performed + - Current status + - Validation performed + +### Timeline + +- [ ] T+0: Issue created (auto) +- [ ] T+1h: Initial response +- [ ] T+24h: RCA submitted +- [ ] T+7d: Resolution verified + +### Related +- Previous tick: [LINK] +- Agent config: [LINK] +- Logs: [LINK] + +--- +*Auto-generated by Agent Tick Monitor* +``` + +--- + +## Agent Registry + +Current agents in scope: + +| Agent ID | Name | Role | Owner | Status | +|----------|------|------|-------|--------| +| allegro | Allegro | tempo-and-dispatch | Alexander | active | +| adagio | Adagio | breath-and-design | Alexander | active | +| timmy | Timmy Time | father-house | Alexander | active | + +--- + +## Implementation + +### Files + +- `scripts/agent_tick_monitor.py` - Monthly monitoring +- `scripts/agent_tick_submitter.py` - Agent self-reporting +- `templates/rca-template.md` - RCA issue template +- `config/agent-registry.json` - Agent definitions + +### Cron Schedule + +```cron +# Monthly tick collection - 1st of month at 00:00 +0 0 1 * * /usr/bin/python3 /root/wizards/household-snapshots/scripts/agent_tick_submitter.py --all + +# Tick validation and RCA trigger - 4th of month at 00:00 +0 0 4 * * /usr/bin/python3 /root/wizards/household-snapshots/scripts/agent_tick_monitor.py --check-and-report +``` + +--- + +## Version History + +| Version | Date | Changes | +|---------|------|---------| +| 1.0 | 2026-04-02 | Initial protocol | + +--- +*Evenia binds us. Health is monitored. Failures are learned from.* diff --git a/heartbeats/allegro/20260402_010501.json b/heartbeats/allegro/20260402_010501.json new file mode 100644 index 0000000..ad6829e --- /dev/null +++ b/heartbeats/allegro/20260402_010501.json @@ -0,0 +1,12 @@ +{ + "timestamp": "2026-04-02T01:05:01.485463Z", + "wizard": "adagio", + "checked_by": "allegro", + "checks": { + "home_exists": true, + "soul_present": true, + "config_present": true, + "gateway_running": true + }, + "status": "healthy" +} \ No newline at end of file diff --git a/heartbeats/allegro/latest.json b/heartbeats/allegro/latest.json index 3cacd09..5abb363 120000 --- a/heartbeats/allegro/latest.json +++ b/heartbeats/allegro/latest.json @@ -1 +1 @@ -20260402_005001.json \ No newline at end of file +20260402_010501.json \ No newline at end of file diff --git a/scripts/agent_tick_submitter.py b/scripts/agent_tick_submitter.py new file mode 100644 index 0000000..f3388fe --- /dev/null +++ b/scripts/agent_tick_submitter.py @@ -0,0 +1,239 @@ +#!/usr/bin/env python3 +""" +Agent Tick Submitter + +Each agent runs this monthly to submit their health tick. +Can also be run centrally for all agents. +""" + +import os +import sys +import json +import subprocess +import argparse +from datetime import datetime +from pathlib import Path + +REPO_DIR = Path("/root/wizards/household-snapshots") +TICKS_DIR = REPO_DIR / "ticks" +REGISTRY_FILE = REPO_DIR / "config" / "agent-registry.json" +GITEA_URL = os.environ.get("CLAW_CODE_GITEA_URL", "http://143.198.27.163:3000") +GITEA_TOKEN = os.environ.get("GITEA_TOKEN", "") + +def run_cmd(cmd, cwd=None): + """Run shell command and return output.""" + result = subprocess.run( + cmd, shell=True, cwd=cwd, capture_output=True, text=True + ) + return result.stdout.strip(), result.stderr.strip(), result.returncode + +def load_registry(): + """Load agent registry.""" + with open(REGISTRY_FILE) as f: + return json.load(f) + +def check_gateway_running(agent_id): + """Check if agent's gateway is running.""" + stdout, _, _ = run_cmd(f"pgrep -f 'hermes gateway.*{agent_id}' || true") + return bool(stdout) + +def check_home_accessible(home_dir): + """Check if home directory is accessible.""" + return Path(home_dir).exists() + +def check_config_valid(config_path): + """Basic config validation.""" + if not Path(config_path).exists(): + return False + try: + with open(config_path) as f: + content = f.read() + return 'model:' in content and 'platforms:' in content + except: + return False + +def get_last_interaction(agent_id): + """Get last user interaction from logs (if available).""" + log_dir = Path(f"/root/wizards/{agent_id}/logs") + if not log_dir.exists(): + return None + + try: + log_files = sorted(log_dir.glob("*.log"), key=lambda x: x.stat().st_mtime, reverse=True) + if log_files: + mtime = datetime.fromtimestamp(log_files[0].stat().st_mtime) + return mtime.isoformat() + "Z" + except: + pass + return None + +def count_work_items(agent_id): + """Count work items completed this month.""" + work_dir = Path(f"/root/wizards/{agent_id}/work") + if not work_dir.exists(): + return 0 + + count = 0 + current_month = datetime.utcnow().strftime("%Y-%m") + + for item in work_dir.rglob("*"): + if item.is_file(): + try: + mtime = datetime.fromtimestamp(item.stat().st_mtime) + if mtime.strftime("%Y-%m") == current_month: + count += 1 + except: + pass + + return count + +def generate_tick(agent_id, registry_data): + """Generate tick record for an agent.""" + agent = None + for a in registry_data["agents"]: + if a["id"] == agent_id: + agent = a + break + + if not agent: + raise ValueError(f"Agent {agent_id} not found in registry") + + now = datetime.utcnow() + + # Check vitals + gateway_running = check_gateway_running(agent_id) + home_accessible = check_home_accessible(agent["home_dir"]) + config_valid = check_config_valid(agent["config_path"]) + + # Determine status + if not gateway_running or not home_accessible: + status = "critical" + elif not config_valid: + status = "degraded" + else: + status = "healthy" + + # Check capabilities + capabilities = {} + for cap in agent.get("expected_capabilities", []): + if cap == "telegram": + # Check if telegram is enabled in config + try: + with open(agent["config_path"]) as f: + capabilities[cap] = "enabled: true" in f.read() + except: + capabilities[cap] = False + elif cap == "api_server": + capabilities[cap] = gateway_running + elif cap == "gitea": + capabilities[cap] = bool(GITEA_TOKEN) + else: + capabilities[cap] = True + + tick = { + "agent_id": agent_id, + "agent_name": agent["name"], + "role": agent["role"], + "timestamp": now.isoformat() + "Z", + "tick_month": now.strftime("%Y-%m"), + "status": status, + "vitals": { + "gateway_running": gateway_running, + "home_directory_accessible": home_accessible, + "config_valid": config_valid, + "last_user_interaction": get_last_interaction(agent_id), + "work_items_completed_this_month": count_work_items(agent_id) + }, + "capabilities": capabilities, + "notes": "" + } + + return tick + +def save_tick(tick): + """Save tick to repository.""" + month_dir = TICKS_DIR / tick["tick_month"] + month_dir.mkdir(parents=True, exist_ok=True) + + tick_file = month_dir / f"{tick['agent_id']}.json" + + with open(tick_file, 'w') as f: + json.dump(tick, f, indent=2) + + return tick_file + +def commit_tick(agent_id, tick_month): + """Commit tick to Gitea.""" + tick_file = TICKS_DIR / tick_month / f"{agent_id}.json" + + if not tick_file.exists(): + return False, "Tick file not found" + + # Git operations + run_cmd("git add -A", cwd=REPO_DIR) + stdout, stderr, code = run_cmd( + f'git commit -m "Monthly tick: {agent_id} for {tick_month}"', + cwd=REPO_DIR + ) + + if code != 0 and "nothing to commit" not in stderr.lower(): + return False, f"Commit failed: {stderr}" + + # Push + stdout, stderr, code = run_cmd("git push origin main", cwd=REPO_DIR) + if code != 0: + return False, f"Push failed: {stderr}" + + return True, "Tick committed successfully" + +def submit_agent_tick(agent_id, registry_data, commit=True): + """Submit tick for a single agent.""" + print(f"Generating tick for {agent_id}...") + + tick = generate_tick(agent_id, registry_data) + tick_file = save_tick(tick) + print(f" ✓ Tick saved: {tick_file}") + print(f" Status: {tick['status']}") + + if commit: + success, msg = commit_tick(agent_id, tick["tick_month"]) + if success: + print(f" ✓ Committed to Gitea") + else: + print(f" ✗ Commit failed: {msg}") + return False + + return tick + +def main(): + parser = argparse.ArgumentParser(description="Agent Tick Submitter") + parser.add_argument("--agent", help="Submit tick for specific agent") + parser.add_argument("--all", action="store_true", help="Submit ticks for all active agents") + parser.add_argument("--no-commit", action="store_true", help="Save locally but don't commit") + args = parser.parse_args() + + registry = load_registry() + + if args.all: + print(f"=== Submitting ticks for all agents ===") + for agent in registry["agents"]: + if agent["active"]: + submit_agent_tick(agent["id"], registry, commit=not args.no_commit) + print() + elif args.agent: + submit_agent_tick(args.agent, registry, commit=not args.no_commit) + else: + # Auto-detect current agent from hostname or env + hostname = os.uname().nodename + agent_id = hostname if any(a["id"] == hostname for a in registry["agents"]) else None + + if agent_id: + submit_agent_tick(agent_id, registry, commit=not args.no_commit) + else: + print("Error: Could not auto-detect agent. Use --agent or --all") + return 1 + + return 0 + +if __name__ == "__main__": + sys.exit(main())