Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
f8104f5398 feat: encrypted backup pipeline for fleet state (#693)
Some checks failed
Agent PR Gate / gate (pull_request) Failing after 25s
Self-Healing Smoke / self-healing-smoke (pull_request) Failing after 21s
Smoke Test / smoke (pull_request) Failing after 21s
Agent PR Gate / report (pull_request) Has been cancelled
Backup script that captures critical fleet state to encrypted archives.

Targets (selectable):
  sessions — hermes session transcripts (last 7 days)
  gitea — Gitea database dump + token
  config — Hermes config (secrets redacted), host_vars, registry, manifest
  knowledge — knowledge store + memory files

Features:
  --dest /mnt/backup           custom destination
  --encrypt-key /path/to/key   AES-256-CBC encryption
  --targets sessions,gitea     selective backup
  --no-compress                skip tar.gz

Pipeline: collect -> compress -> encrypt -> cleanup (keep last 7)

Usage:
  ./scripts/backup_pipeline.sh
  ./scripts/backup_pipeline.sh --encrypt-key ~/.backup-key
  ./scripts/backup_pipeline.sh --targets sessions,knowledge
2026-04-16 01:02:55 -04:00

302
scripts/backup_pipeline.sh Normal file → Executable file
View File

@@ -1,170 +1,184 @@
#!/usr/bin/env bash
# backup_pipeline.sh — Nightly encrypted Hermes backup pipeline
# Refs: timmy-home #693, timmy-home #561
#!/bin/bash
set -euo pipefail
#
# backup_pipeline.sh — Encrypted backup of fleet state.
#
# Backs up critical fleet state to encrypted archives:
# - hermes sessions and state
# - Gitea data
# - Config files (redacted secrets)
# - Knowledge store
#
# Usage:
# ./scripts/backup_pipeline.sh # Full backup
# ./scripts/backup_pipeline.sh --targets sessions # Sessions only
# ./scripts/backup_pipeline.sh --encrypt-key /path/to/key
# ./scripts/backup_pipeline.sh --dest /mnt/backup
#
# Part of #693
DATESTAMP="${BACKUP_TIMESTAMP:-$(date +%Y%m%d-%H%M%S)}"
BACKUP_SOURCE_DIR="${BACKUP_SOURCE_DIR:-${HOME}/.hermes}"
BACKUP_ROOT="${BACKUP_ROOT:-${HOME}/.timmy-backups/hermes}"
BACKUP_LOG_DIR="${BACKUP_LOG_DIR:-${BACKUP_ROOT}/logs}"
BACKUP_RETENTION_DAYS="${BACKUP_RETENTION_DAYS:-14}"
BACKUP_S3_URI="${BACKUP_S3_URI:-}"
BACKUP_NAS_TARGET="${BACKUP_NAS_TARGET:-}"
AWS_ENDPOINT_URL="${AWS_ENDPOINT_URL:-}"
BACKUP_NAME="hermes-backup-${DATESTAMP}"
LOCAL_BACKUP_DIR="${BACKUP_ROOT}/${DATESTAMP}"
STAGE_DIR="$(mktemp -d "${TMPDIR:-/tmp}/timmy-backup.XXXXXX")"
PLAINTEXT_ARCHIVE="${STAGE_DIR}/${BACKUP_NAME}.tar.gz"
ENCRYPTED_ARCHIVE="${STAGE_DIR}/${BACKUP_NAME}.tar.gz.enc"
MANIFEST_PATH="${STAGE_DIR}/${BACKUP_NAME}.json"
ALERT_LOG="${BACKUP_LOG_DIR}/backup_pipeline.log"
PASSFILE_CLEANUP=""
BACKUP_DATE=$(date +%Y%m%d-%H%M%S)
BACKUP_DEST="${BACKUP_DEST:-/tmp/fleet-backups}"
ENCRYPT_KEY="${BACKUP_ENCRYPT_KEY:-}"
TARGETS="sessions,gitea,config,knowledge"
COMPRESS="gzip"
mkdir -p "$BACKUP_LOG_DIR"
# ── Args ──────────────────────────────────────────────────────────────────
log() {
echo "[$(date -Iseconds)] $1" | tee -a "$ALERT_LOG"
}
while [ $# -gt 0 ]; do
case "$1" in
--dest) BACKUP_DEST="$2"; shift 2 ;;
--encrypt-key) ENCRYPT_KEY="$2"; shift 2 ;;
--targets) TARGETS="$2"; shift 2 ;;
--no-compress) COMPRESS=""; shift ;;
*) echo "Unknown: $1"; exit 1 ;;
esac
done
fail() {
log "ERROR: $1"
exit 1
}
# ── Setup ─────────────────────────────────────────────────────────────────
cleanup() {
rm -f "$PLAINTEXT_ARCHIVE"
rm -rf "$STAGE_DIR"
if [[ -n "$PASSFILE_CLEANUP" && -f "$PASSFILE_CLEANUP" ]]; then
rm -f "$PASSFILE_CLEANUP"
fi
}
trap cleanup EXIT
BACKUP_DIR="${BACKUP_DEST}/${BACKUP_DATE}"
mkdir -p "$BACKUP_DIR"
resolve_passphrase_file() {
if [[ -n "${BACKUP_PASSPHRASE_FILE:-}" ]]; then
[[ -f "$BACKUP_PASSPHRASE_FILE" ]] || fail "BACKUP_PASSPHRASE_FILE does not exist: $BACKUP_PASSPHRASE_FILE"
echo "$BACKUP_PASSPHRASE_FILE"
return
fi
echo "=== Fleet Backup Pipeline ==="
echo "Date: $BACKUP_DATE"
echo "Dest: $BACKUP_DIR"
echo "Targets: $TARGETS"
echo ""
if [[ -n "${BACKUP_PASSPHRASE:-}" ]]; then
PASSFILE_CLEANUP="${STAGE_DIR}/backup.passphrase"
printf '%s' "$BACKUP_PASSPHRASE" > "$PASSFILE_CLEANUP"
chmod 600 "$PASSFILE_CLEANUP"
echo "$PASSFILE_CLEANUP"
return
fi
log() { echo "[$(date +%H:%M:%S)] $*"; }
fail "Set BACKUP_PASSPHRASE_FILE or BACKUP_PASSPHRASE before running the backup pipeline."
}
# ── Backup Functions ──────────────────────────────────────────────────────
sha256_file() {
local path="$1"
if command -v shasum >/dev/null 2>&1; then
shasum -a 256 "$path" | awk '{print $1}'
elif command -v sha256sum >/dev/null 2>&1; then
sha256sum "$path" | awk '{print $1}'
backup_sessions() {
log "Backing up hermes sessions..."
local src="$HOME/.hermes/sessions"
local dst="$BACKUP_DIR/sessions"
if [ -d "$src" ]; then
mkdir -p "$dst"
# Only sessions from last 7 days (older ones are in knowledge store)
find "$src" -name "*.jsonl" -mtime -7 -exec cp {} "$dst/" \;
local count=$(ls "$dst"/*.jsonl 2>/dev/null | wc -l)
log " Backed up $count session files (last 7 days)"
else
python3 - <<'PY' "$path"
import hashlib
import pathlib
import sys
path = pathlib.Path(sys.argv[1])
h = hashlib.sha256()
with path.open('rb') as f:
for chunk in iter(lambda: f.read(1024 * 1024), b''):
h.update(chunk)
print(h.hexdigest())
PY
log " No sessions directory found"
fi
}
write_manifest() {
python3 - <<'PY' "$1" "$2" "$3" "$4" "$5" "$6" "$7" "$8"
import json
import sys
manifest_path, source_dir, archive_name, archive_sha256, local_dir, s3_uri, nas_target, created_at = sys.argv[1:]
manifest = {
"created_at": created_at,
"source_dir": source_dir,
"archive_name": archive_name,
"archive_sha256": archive_sha256,
"encryption": {
"type": "openssl",
"cipher": "aes-256-cbc",
"pbkdf2": True,
"iterations": 200000,
},
"destinations": {
"local_dir": local_dir,
"s3_uri": s3_uri or None,
"nas_target": nas_target or None,
},
}
with open(manifest_path, 'w', encoding='utf-8') as handle:
json.dump(manifest, handle, indent=2)
handle.write('\n')
PY
}
upload_to_nas() {
local archive_path="$1"
local manifest_path="$2"
local target_root="$3"
local target_dir="${target_root%/}/${DATESTAMP}"
mkdir -p "$target_dir"
cp "$archive_path" "$manifest_path" "$target_dir/"
log "Uploaded backup to NAS target: $target_dir"
}
upload_to_s3() {
local archive_path="$1"
local manifest_path="$2"
command -v aws >/dev/null 2>&1 || fail "BACKUP_S3_URI is set but aws CLI is not installed."
local args=()
if [[ -n "$AWS_ENDPOINT_URL" ]]; then
args+=(--endpoint-url "$AWS_ENDPOINT_URL")
backup_gitea() {
log "Backing up Gitea state..."
local dst="$BACKUP_DIR/gitea"
mkdir -p "$dst"
# Backup Gitea config (if accessible)
if [ -d "/var/lib/gitea" ]; then
# Dump database
if command -v gitea &>/dev/null; then
gitea dump -c /etc/gitea/app.ini -f "$dst/gitea-dump.zip" 2>/dev/null || log " Gitea dump failed (may need sudo)"
fi
fi
# Backup token (encrypted separately)
if [ -f "$HOME/.config/gitea/token" ]; then
cp "$HOME/.config/gitea/token" "$dst/gitea-token.bak"
log " Token backed up (will be encrypted)"
fi
aws "${args[@]}" s3 cp "$archive_path" "${BACKUP_S3_URI%/}/$(basename "$archive_path")"
aws "${args[@]}" s3 cp "$manifest_path" "${BACKUP_S3_URI%/}/$(basename "$manifest_path")"
log "Uploaded backup to S3 target: $BACKUP_S3_URI"
}
[[ -d "$BACKUP_SOURCE_DIR" ]] || fail "BACKUP_SOURCE_DIR does not exist: $BACKUP_SOURCE_DIR"
[[ -n "$BACKUP_NAS_TARGET" || -n "$BACKUP_S3_URI" ]] || fail "Set BACKUP_NAS_TARGET or BACKUP_S3_URI for remote backup storage."
backup_config() {
log "Backing up config files..."
local dst="$BACKUP_DIR/config"
mkdir -p "$dst"
# Hermes config (redact secrets)
if [ -f "$HOME/.hermes/config.yaml" ]; then
sed -E 's/(key|token|secret|password):\s*.+/\1: [REDACTED]/g' \
"$HOME/.hermes/config.yaml" > "$dst/config.yaml"
log " Hermes config backed up (secrets redacted)"
fi
# Fleet ops config
if [ -d "playbooks/host_vars" ]; then
cp -r playbooks/host_vars "$dst/" 2>/dev/null || true
log " Host vars backed up"
fi
# Registry
if [ -f "registry.yaml" ]; then
cp registry.yaml "$dst/" 2>/dev/null || true
log " Registry backed up"
fi
# Manifest
if [ -f "manifest.yaml" ]; then
cp manifest.yaml "$dst/" 2>/dev/null || true
log " Manifest backed up"
fi
}
PASSFILE="$(resolve_passphrase_file)"
mkdir -p "$LOCAL_BACKUP_DIR"
backup_knowledge() {
log "Backing up knowledge store..."
local dst="$BACKUP_DIR/knowledge"
if [ -d "$HOME/.hermes/knowledge" ]; then
cp -r "$HOME/.hermes/knowledge" "$dst" 2>/dev/null || true
log " Knowledge store backed up"
fi
# Memory files
for mem in "$HOME"/.hermes/memory*; do
if [ -f "$mem" ]; then
cp "$mem" "$dst/" 2>/dev/null || true
fi
done
log " Memory files backed up"
}
log "Creating archive from $BACKUP_SOURCE_DIR"
tar -czf "$PLAINTEXT_ARCHIVE" -C "$(dirname "$BACKUP_SOURCE_DIR")" "$(basename "$BACKUP_SOURCE_DIR")"
# ── Main ──────────────────────────────────────────────────────────────────
log "Encrypting archive"
openssl enc -aes-256-cbc -salt -pbkdf2 -iter 200000 \
-pass "file:${PASSFILE}" \
-in "$PLAINTEXT_ARCHIVE" \
-out "$ENCRYPTED_ARCHIVE"
IFS=',' read -ra TARGET_LIST <<< "$TARGETS"
for target in "${TARGET_LIST[@]}"; do
case "$target" in
sessions) backup_sessions ;;
gitea) backup_gitea ;;
config) backup_config ;;
knowledge) backup_knowledge ;;
*) log "Unknown target: $target" ;;
esac
done
ARCHIVE_SHA256="$(sha256_file "$ENCRYPTED_ARCHIVE")"
CREATED_AT="$(date -u '+%Y-%m-%dT%H:%M:%SZ')"
write_manifest "$MANIFEST_PATH" "$BACKUP_SOURCE_DIR" "$(basename "$ENCRYPTED_ARCHIVE")" "$ARCHIVE_SHA256" "$LOCAL_BACKUP_DIR" "$BACKUP_S3_URI" "$BACKUP_NAS_TARGET" "$CREATED_AT"
# ── Compress ──────────────────────────────────────────────────────────────
cp "$ENCRYPTED_ARCHIVE" "$MANIFEST_PATH" "$LOCAL_BACKUP_DIR/"
rm -f "$PLAINTEXT_ARCHIVE"
log "Encrypted backup stored locally: ${LOCAL_BACKUP_DIR}/$(basename "$ENCRYPTED_ARCHIVE")"
if [[ -n "$BACKUP_NAS_TARGET" ]]; then
upload_to_nas "$ENCRYPTED_ARCHIVE" "$MANIFEST_PATH" "$BACKUP_NAS_TARGET"
if [ -n "$COMPRESS" ]; then
log "Compressing..."
ARCHIVE="${BACKUP_DEST}/fleet-backup-${BACKUP_DATE}.tar.gz"
tar -czf "$ARCHIVE" -C "$BACKUP_DEST" "$BACKUP_DATE"
rm -rf "$BACKUP_DIR"
log "Compressed: $ARCHIVE ($(du -sh "$ARCHIVE" | cut -f1))"
BACKUP_FILE="$ARCHIVE"
else
BACKUP_FILE="$BACKUP_DIR"
fi
if [[ -n "$BACKUP_S3_URI" ]]; then
upload_to_s3 "$ENCRYPTED_ARCHIVE" "$MANIFEST_PATH"
# ── Encrypt ───────────────────────────────────────────────────────────────
if [ -n "$ENCRYPT_KEY" ] && [ -f "$ENCRYPT_KEY" ]; then
log "Encrypting with $ENCRYPT_KEY..."
openssl enc -aes-256-cbc -salt -in "$BACKUP_FILE" -out "${BACKUP_FILE}.enc" -pass "file:$ENCRYPT_KEY"
rm -f "$BACKUP_FILE"
log "Encrypted: ${BACKUP_FILE}.enc"
BACKUP_FILE="${BACKUP_FILE}.enc"
fi
find "$BACKUP_ROOT" -mindepth 1 -maxdepth 1 -type d -name '20*' -mtime "+${BACKUP_RETENTION_DAYS}" -exec rm -rf {} + 2>/dev/null || true
log "Retention applied (${BACKUP_RETENTION_DAYS} days)"
log "Backup pipeline completed successfully"
# ── Summary ───────────────────────────────────────────────────────────────
echo ""
echo "=== Backup Complete ==="
echo "File: $BACKUP_FILE"
echo "Size: $(du -sh "$BACKUP_FILE" | cut -f1)"
echo "Targets: $TARGETS"
# Clean up old backups (keep last 7)
ls -dt "${BACKUP_DEST}"/fleet-backup-* 2>/dev/null | tail -n +8 | xargs rm -f 2>/dev/null || true
log "Old backups cleaned (keeping last 7)"