141 lines
4.2 KiB
Python
141 lines
4.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Lazarus Checkpoint / Restore
|
|
============================
|
|
Save and resume mission cell state for agent resurrection.
|
|
|
|
Usage:
|
|
python scripts/lazarus_checkpoint.py <mission_name>
|
|
python scripts/lazarus_checkpoint.py --restore <mission_name>
|
|
python scripts/lazarus_checkpoint.py --list
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import argparse
|
|
import json
|
|
import tarfile
|
|
import subprocess
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
CHECKPOINT_DIR = Path("/var/lib/lazarus/checkpoints")
|
|
MISSION_DIRS = {
|
|
"bezalel": "/root/wizards/bezalel",
|
|
"the-nexus": "/root/wizards/bezalel/workspace/the-nexus",
|
|
"hermes-agent": "/root/wizards/bezalel/workspace/hermes-agent",
|
|
}
|
|
|
|
|
|
def shell(cmd: str, timeout: int = 60) -> tuple[int, str, str]:
|
|
try:
|
|
r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=timeout)
|
|
return r.returncode, r.stdout.strip(), r.stderr.strip()
|
|
except Exception as e:
|
|
return -1, "", str(e)
|
|
|
|
|
|
def checkpoint(mission: str) -> Path:
|
|
src = Path(MISSION_DIRS.get(mission, mission))
|
|
if not src.exists():
|
|
print(f"ERROR: Source directory not found: {src}")
|
|
sys.exit(1)
|
|
|
|
ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
|
|
out_dir = CHECKPOINT_DIR / mission
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
tar_path = out_dir / f"{mission}_{ts}.tar.gz"
|
|
|
|
# Git commit checkpoint
|
|
git_sha = ""
|
|
git_path = src / ".git"
|
|
if git_path.exists():
|
|
code, out, _ = shell(f"cd {src} && git rev-parse HEAD")
|
|
if code == 0:
|
|
git_sha = out
|
|
|
|
meta = {
|
|
"mission": mission,
|
|
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
"source": str(src),
|
|
"git_sha": git_sha,
|
|
}
|
|
meta_path = out_dir / f"{mission}_{ts}.json"
|
|
with open(meta_path, "w") as f:
|
|
json.dump(meta, f, indent=2)
|
|
|
|
# Tar.gz checkpoint (respect .gitignore if possible)
|
|
with tarfile.open(tar_path, "w:gz") as tar:
|
|
tar.add(src, arcname=src.name)
|
|
|
|
print(f"CHECKPOINT {mission}: {tar_path}")
|
|
print(f" Meta: {meta_path}")
|
|
print(f" Git SHA: {git_sha or 'n/a'}")
|
|
return tar_path
|
|
|
|
|
|
def restore(mission: str, identifier: str | None = None):
|
|
out_dir = CHECKPOINT_DIR / mission
|
|
if not out_dir.exists():
|
|
print(f"ERROR: No checkpoints found for {mission}")
|
|
sys.exit(1)
|
|
|
|
tars = sorted(out_dir.glob("*.tar.gz"))
|
|
if not tars:
|
|
print(f"ERROR: No tar.gz checkpoints for {mission}")
|
|
sys.exit(1)
|
|
|
|
if identifier:
|
|
tar_path = out_dir / f"{mission}_{identifier}.tar.gz"
|
|
if not tar_path.exists():
|
|
print(f"ERROR: Checkpoint not found: {tar_path}")
|
|
sys.exit(1)
|
|
else:
|
|
tar_path = tars[-1]
|
|
|
|
src = Path(MISSION_DIRS.get(mission, mission))
|
|
print(f"RESTORE {mission}: {tar_path} → {src}")
|
|
with tarfile.open(tar_path, "r:gz") as tar:
|
|
tar.extractall(path=src.parent)
|
|
print("Restore complete. Restart agent to resume from checkpoint.")
|
|
|
|
|
|
def list_checkpoints():
|
|
if not CHECKPOINT_DIR.exists():
|
|
print("No checkpoints stored.")
|
|
return
|
|
for mission_dir in sorted(CHECKPOINT_DIR.iterdir()):
|
|
if mission_dir.is_dir():
|
|
tars = sorted(mission_dir.glob("*.tar.gz"))
|
|
print(f"{mission_dir.name}: {len(tars)} checkpoint(s)")
|
|
for t in tars[-5:]:
|
|
print(f" {t.name}")
|
|
|
|
|
|
def main() -> int:
|
|
parser = argparse.ArgumentParser(description="Lazarus Checkpoint / Restore")
|
|
parser.add_argument("mission", nargs="?", help="Mission name to checkpoint/restore")
|
|
parser.add_argument("--restore", action="store_true", help="Restore mode")
|
|
parser.add_argument("--identifier", help="Specific checkpoint identifier (YYYYMMDD_HHMMSS)")
|
|
parser.add_argument("--list", action="store_true", help="List all checkpoints")
|
|
args = parser.parse_args()
|
|
|
|
if args.list:
|
|
list_checkpoints()
|
|
return 0
|
|
|
|
if not args.mission:
|
|
print("ERROR: mission name required (or use --list)")
|
|
return 1
|
|
|
|
if args.restore:
|
|
restore(args.mission, args.identifier)
|
|
else:
|
|
checkpoint(args.mission)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|