#!/usr/bin/env python3 """ Lazarus Checkpoint / Restore ============================ Save and resume mission cell state for agent resurrection. Usage: python scripts/lazarus_checkpoint.py python scripts/lazarus_checkpoint.py --restore python scripts/lazarus_checkpoint.py --list """ import os import sys import argparse import json import tarfile import subprocess from datetime import datetime, timezone from pathlib import Path CHECKPOINT_DIR = Path("/var/lib/lazarus/checkpoints") MISSION_DIRS = { "bezalel": "/root/wizards/bezalel", "the-nexus": "/root/wizards/bezalel/workspace/the-nexus", "hermes-agent": "/root/wizards/bezalel/workspace/hermes-agent", } def shell(cmd: str, timeout: int = 60) -> tuple[int, str, str]: try: r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout) return r.returncode, r.stdout.strip(), r.stderr.strip() except Exception as e: return -1, "", str(e) def checkpoint(mission: str) -> Path: src = Path(MISSION_DIRS.get(mission, mission)) if not src.exists(): print(f"ERROR: Source directory not found: {src}") sys.exit(1) ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") out_dir = CHECKPOINT_DIR / mission out_dir.mkdir(parents=True, exist_ok=True) tar_path = out_dir / f"{mission}_{ts}.tar.gz" # Git commit checkpoint git_sha = "" git_path = src / ".git" if git_path.exists(): code, out, _ = shell(f"cd {src} && git rev-parse HEAD") if code == 0: git_sha = out meta = { "mission": mission, "created_at": datetime.now(timezone.utc).isoformat(), "source": str(src), "git_sha": git_sha, } meta_path = out_dir / f"{mission}_{ts}.json" with open(meta_path, "w") as f: json.dump(meta, f, indent=2) # Tar.gz checkpoint (respect .gitignore if possible) with tarfile.open(tar_path, "w:gz") as tar: tar.add(src, arcname=src.name) print(f"CHECKPOINT {mission}: {tar_path}") print(f" Meta: {meta_path}") print(f" Git SHA: {git_sha or 'n/a'}") return tar_path def restore(mission: str, identifier: str | None = None): out_dir = CHECKPOINT_DIR / mission if not out_dir.exists(): print(f"ERROR: No checkpoints found for {mission}") sys.exit(1) tars = sorted(out_dir.glob("*.tar.gz")) if not tars: print(f"ERROR: No tar.gz checkpoints for {mission}") sys.exit(1) if identifier: tar_path = out_dir / f"{mission}_{identifier}.tar.gz" if not tar_path.exists(): print(f"ERROR: Checkpoint not found: {tar_path}") sys.exit(1) else: tar_path = tars[-1] src = Path(MISSION_DIRS.get(mission, mission)) print(f"RESTORE {mission}: {tar_path} → {src}") with tarfile.open(tar_path, "r:gz") as tar: tar.extractall(path=src.parent) print("Restore complete. Restart agent to resume from checkpoint.") def list_checkpoints(): if not CHECKPOINT_DIR.exists(): print("No checkpoints stored.") return for mission_dir in sorted(CHECKPOINT_DIR.iterdir()): if mission_dir.is_dir(): tars = sorted(mission_dir.glob("*.tar.gz")) print(f"{mission_dir.name}: {len(tars)} checkpoint(s)") for t in tars[-5:]: print(f" {t.name}") def main() -> int: parser = argparse.ArgumentParser(description="Lazarus Checkpoint / Restore") parser.add_argument("mission", nargs="?", help="Mission name to checkpoint/restore") parser.add_argument("--restore", action="store_true", help="Restore mode") parser.add_argument("--identifier", help="Specific checkpoint identifier (YYYYMMDD_HHMMSS)") parser.add_argument("--list", action="store_true", help="List all checkpoints") args = parser.parse_args() if args.list: list_checkpoints() return 0 if not args.mission: print("ERROR: mission name required (or use --list)") return 1 if args.restore: restore(args.mission, args.identifier) else: checkpoint(args.mission) return 0 if __name__ == "__main__": raise SystemExit(main())