feat: [Mnemosyne] add archive snapshot — point-in-time backup and restore
Some checks failed
CI / test (pull_request) Failing after 9s
CI / validate (pull_request) Failing after 15s
Review Approval Gate / verify-review (pull_request) Failing after 3s

Implements snapshot_create, snapshot_list, snapshot_restore, and
snapshot_diff on MnemosyneArchive. Adds CLI subcommand
`mnemosyne snapshot create|list|restore|diff`. 17 new tests covering
all operations. Snapshots are stored as JSON files in
~/.hermes/mnemosyne/snapshots/ next to the archive.

Fixes #1268

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Alexander Whitestone
2026-04-12 05:47:36 -04:00
parent bb21beccdd
commit 025e16cf4a
3 changed files with 487 additions and 1 deletions

View File

@@ -1105,6 +1105,175 @@ class MnemosyneArchive:
steps.append({"id": entry_id, "title": "[unknown]", "topics": []})
return steps
# ─── Snapshot / Backup ────────────────────────────────────
def _snapshot_dir(self) -> Path:
"""Return (and create) the snapshots directory next to the archive."""
d = self.path.parent / "snapshots"
d.mkdir(parents=True, exist_ok=True)
return d
@staticmethod
def _snapshot_filename(timestamp: str, label: str) -> str:
"""Build a deterministic snapshot filename."""
safe_label = "".join(c if c.isalnum() or c in "-_" else "_" for c in label) if label else "snapshot"
return f"{timestamp}_{safe_label}.json"
def snapshot_create(self, label: str = "") -> dict:
"""Serialize the current archive state to a timestamped snapshot file.
Args:
label: Human-readable label for the snapshot (optional).
Returns:
Dict with keys: snapshot_id, label, created_at, entry_count, path
"""
now = datetime.now(timezone.utc)
timestamp = now.strftime("%Y%m%d_%H%M%S")
filename = self._snapshot_filename(timestamp, label)
snapshot_id = filename[:-5] # strip .json
snap_path = self._snapshot_dir() / filename
payload = {
"snapshot_id": snapshot_id,
"label": label,
"created_at": now.isoformat(),
"entry_count": len(self._entries),
"archive_path": str(self.path),
"entries": [e.to_dict() for e in self._entries.values()],
}
with open(snap_path, "w") as f:
json.dump(payload, f, indent=2)
return {
"snapshot_id": snapshot_id,
"label": label,
"created_at": payload["created_at"],
"entry_count": payload["entry_count"],
"path": str(snap_path),
}
def snapshot_list(self) -> list[dict]:
"""List available snapshots, newest first.
Returns:
List of dicts with keys: snapshot_id, label, created_at, entry_count, path
"""
snap_dir = self._snapshot_dir()
snapshots = []
for snap_path in sorted(snap_dir.glob("*.json"), reverse=True):
try:
with open(snap_path) as f:
data = json.load(f)
snapshots.append({
"snapshot_id": data.get("snapshot_id", snap_path.stem),
"label": data.get("label", ""),
"created_at": data.get("created_at", ""),
"entry_count": data.get("entry_count", len(data.get("entries", []))),
"path": str(snap_path),
})
except (json.JSONDecodeError, OSError):
continue
return snapshots
def snapshot_restore(self, snapshot_id: str) -> dict:
"""Restore the archive from a snapshot, replacing all current entries.
Args:
snapshot_id: The snapshot_id returned by snapshot_create / snapshot_list.
Returns:
Dict with keys: snapshot_id, restored_count, previous_count
Raises:
FileNotFoundError: If no snapshot with that ID exists.
"""
snap_dir = self._snapshot_dir()
snap_path = snap_dir / f"{snapshot_id}.json"
if not snap_path.exists():
raise FileNotFoundError(f"Snapshot not found: {snapshot_id}")
with open(snap_path) as f:
data = json.load(f)
previous_count = len(self._entries)
self._entries = {}
for entry_data in data.get("entries", []):
entry = ArchiveEntry.from_dict(entry_data)
self._entries[entry.id] = entry
self._save()
return {
"snapshot_id": snapshot_id,
"restored_count": len(self._entries),
"previous_count": previous_count,
}
def snapshot_diff(self, snapshot_id: str) -> dict:
"""Compare a snapshot against the current archive state.
Args:
snapshot_id: The snapshot_id to compare against current state.
Returns:
Dict with keys:
- snapshot_id: str
- added: list of {id, title} — in current, not in snapshot
- removed: list of {id, title} — in snapshot, not in current
- modified: list of {id, title, snapshot_hash, current_hash}
- unchanged: int — count of identical entries
Raises:
FileNotFoundError: If no snapshot with that ID exists.
"""
snap_dir = self._snapshot_dir()
snap_path = snap_dir / f"{snapshot_id}.json"
if not snap_path.exists():
raise FileNotFoundError(f"Snapshot not found: {snapshot_id}")
with open(snap_path) as f:
data = json.load(f)
snap_entries: dict[str, dict] = {}
for entry_data in data.get("entries", []):
snap_entries[entry_data["id"]] = entry_data
current_ids = set(self._entries.keys())
snap_ids = set(snap_entries.keys())
added = []
for eid in current_ids - snap_ids:
e = self._entries[eid]
added.append({"id": e.id, "title": e.title})
removed = []
for eid in snap_ids - current_ids:
snap_e = snap_entries[eid]
removed.append({"id": snap_e["id"], "title": snap_e.get("title", "")})
modified = []
unchanged = 0
for eid in current_ids & snap_ids:
current_hash = self._entries[eid].content_hash
snap_hash = snap_entries[eid].get("content_hash")
if current_hash != snap_hash:
modified.append({
"id": eid,
"title": self._entries[eid].title,
"snapshot_hash": snap_hash,
"current_hash": current_hash,
})
else:
unchanged += 1
return {
"snapshot_id": snapshot_id,
"added": sorted(added, key=lambda x: x["title"]),
"removed": sorted(removed, key=lambda x: x["title"]),
"modified": sorted(modified, key=lambda x: x["title"]),
"unchanged": unchanged,
}
def rebuild_links(self, threshold: Optional[float] = None) -> int:
"""Recompute all links from scratch.

View File

@@ -6,7 +6,8 @@ Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
mnemosyne tag, mnemosyne untag, mnemosyne retag,
mnemosyne timeline, mnemosyne neighbors, mnemosyne path,
mnemosyne touch, mnemosyne decay, mnemosyne vitality,
mnemosyne fading, mnemosyne vibrant
mnemosyne fading, mnemosyne vibrant,
mnemosyne snapshot create|list|restore|diff
"""
from __future__ import annotations
@@ -303,6 +304,68 @@ def cmd_fading(args):
print()
def cmd_snapshot(args):
archive = MnemosyneArchive()
if args.snapshot_cmd == "create":
result = archive.snapshot_create(label=args.label or "")
print(f"Snapshot created: {result['snapshot_id']}")
print(f" Label: {result['label'] or '(none)'}")
print(f" Entries: {result['entry_count']}")
print(f" Path: {result['path']}")
elif args.snapshot_cmd == "list":
snapshots = archive.snapshot_list()
if not snapshots:
print("No snapshots found.")
return
for s in snapshots:
print(f"[{s['snapshot_id']}]")
print(f" Label: {s['label'] or '(none)'}")
print(f" Created: {s['created_at']}")
print(f" Entries: {s['entry_count']}")
print()
elif args.snapshot_cmd == "restore":
try:
result = archive.snapshot_restore(args.snapshot_id)
except FileNotFoundError as e:
print(str(e))
sys.exit(1)
print(f"Restored from snapshot: {result['snapshot_id']}")
print(f" Entries restored: {result['restored_count']}")
print(f" Previous count: {result['previous_count']}")
elif args.snapshot_cmd == "diff":
try:
diff = archive.snapshot_diff(args.snapshot_id)
except FileNotFoundError as e:
print(str(e))
sys.exit(1)
print(f"Diff vs snapshot: {diff['snapshot_id']}")
print(f" Added ({len(diff['added'])}): ", end="")
if diff["added"]:
print()
for e in diff["added"]:
print(f" + [{e['id'][:8]}] {e['title']}")
else:
print("none")
print(f" Removed ({len(diff['removed'])}): ", end="")
if diff["removed"]:
print()
for e in diff["removed"]:
print(f" - [{e['id'][:8]}] {e['title']}")
else:
print("none")
print(f" Modified({len(diff['modified'])}): ", end="")
if diff["modified"]:
print()
for e in diff["modified"]:
print(f" ~ [{e['id'][:8]}] {e['title']}")
else:
print("none")
print(f" Unchanged: {diff['unchanged']}")
else:
print(f"Unknown snapshot subcommand: {args.snapshot_cmd}")
sys.exit(1)
def cmd_vibrant(args):
archive = MnemosyneArchive()
results = archive.vibrant(limit=args.limit)
@@ -401,10 +464,23 @@ def main():
vb = sub.add_parser("vibrant", help="Show most alive entries (highest vitality)")
vb.add_argument("-n", "--limit", type=int, default=10, help="Max entries to show")
sn = sub.add_parser("snapshot", help="Point-in-time backup and restore")
sn_sub = sn.add_subparsers(dest="snapshot_cmd")
sn_create = sn_sub.add_parser("create", help="Create a new snapshot")
sn_create.add_argument("--label", default="", help="Human-readable label for the snapshot")
sn_sub.add_parser("list", help="List available snapshots")
sn_restore = sn_sub.add_parser("restore", help="Restore archive from a snapshot")
sn_restore.add_argument("snapshot_id", help="Snapshot ID to restore")
sn_diff = sn_sub.add_parser("diff", help="Show what changed since a snapshot")
sn_diff.add_argument("snapshot_id", help="Snapshot ID to compare against")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
if args.command == "snapshot" and not args.snapshot_cmd:
sn.print_help()
sys.exit(1)
dispatch = {
"stats": cmd_stats,
@@ -430,6 +506,7 @@ def main():
"vitality": cmd_vitality,
"fading": cmd_fading,
"vibrant": cmd_vibrant,
"snapshot": cmd_snapshot,
}
dispatch[args.command](args)

View File

@@ -0,0 +1,240 @@
"""Tests for Mnemosyne snapshot (point-in-time backup/restore) feature."""
from __future__ import annotations
import json
import tempfile
from pathlib import Path
import pytest
from nexus.mnemosyne.archive import MnemosyneArchive
from nexus.mnemosyne.ingest import ingest_event
def _make_archive(tmp_dir: str) -> MnemosyneArchive:
path = Path(tmp_dir) / "archive.json"
return MnemosyneArchive(archive_path=path, auto_embed=False)
# ─── snapshot_create ─────────────────────────────────────────────────────────
def test_snapshot_create_returns_metadata():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
ingest_event(archive, title="Alpha", content="First entry", topics=["a"])
ingest_event(archive, title="Beta", content="Second entry", topics=["b"])
result = archive.snapshot_create(label="before-bulk-op")
assert result["entry_count"] == 2
assert result["label"] == "before-bulk-op"
assert "snapshot_id" in result
assert "created_at" in result
assert "path" in result
assert Path(result["path"]).exists()
def test_snapshot_create_no_label():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
ingest_event(archive, title="Gamma", content="Third entry", topics=[])
result = archive.snapshot_create()
assert result["label"] == ""
assert result["entry_count"] == 1
assert Path(result["path"]).exists()
def test_snapshot_file_contains_entries():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
e = ingest_event(archive, title="Delta", content="Fourth entry", topics=["d"])
result = archive.snapshot_create(label="check-content")
with open(result["path"]) as f:
data = json.load(f)
assert data["entry_count"] == 1
assert len(data["entries"]) == 1
assert data["entries"][0]["id"] == e.id
assert data["entries"][0]["title"] == "Delta"
def test_snapshot_create_empty_archive():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
result = archive.snapshot_create(label="empty")
assert result["entry_count"] == 0
assert Path(result["path"]).exists()
# ─── snapshot_list ───────────────────────────────────────────────────────────
def test_snapshot_list_empty():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
assert archive.snapshot_list() == []
def test_snapshot_list_returns_all():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
ingest_event(archive, title="One", content="c1", topics=[])
archive.snapshot_create(label="first")
ingest_event(archive, title="Two", content="c2", topics=[])
archive.snapshot_create(label="second")
snapshots = archive.snapshot_list()
assert len(snapshots) == 2
labels = {s["label"] for s in snapshots}
assert "first" in labels
assert "second" in labels
def test_snapshot_list_metadata_fields():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
archive.snapshot_create(label="meta-check")
snapshots = archive.snapshot_list()
s = snapshots[0]
for key in ("snapshot_id", "label", "created_at", "entry_count", "path"):
assert key in s
def test_snapshot_list_newest_first():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
archive.snapshot_create(label="a")
archive.snapshot_create(label="b")
snapshots = archive.snapshot_list()
# Filenames sort lexicographically; newest (b) should be first
# (filenames include timestamp so alphabetical = newest-last;
# snapshot_list reverses the glob order → newest first)
assert len(snapshots) == 2
# Both should be present; ordering is newest first
ids = [s["snapshot_id"] for s in snapshots]
assert ids == sorted(ids, reverse=True)
# ─── snapshot_restore ────────────────────────────────────────────────────────
def test_snapshot_restore_replaces_entries():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
ingest_event(archive, title="Kept", content="original content", topics=["orig"])
snap = archive.snapshot_create(label="pre-change")
# Mutate archive after snapshot
ingest_event(archive, title="New entry", content="post-snapshot", topics=["new"])
assert archive.count == 2
result = archive.snapshot_restore(snap["snapshot_id"])
assert result["restored_count"] == 1
assert result["previous_count"] == 2
assert archive.count == 1
entry = list(archive._entries.values())[0]
assert entry.title == "Kept"
def test_snapshot_restore_persists_to_disk():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "archive.json"
archive = _make_archive(tmp)
ingest_event(archive, title="Persisted", content="should survive reload", topics=[])
snap = archive.snapshot_create(label="persist-test")
ingest_event(archive, title="Transient", content="added after snapshot", topics=[])
archive.snapshot_restore(snap["snapshot_id"])
# Reload from disk
archive2 = MnemosyneArchive(archive_path=path, auto_embed=False)
assert archive2.count == 1
assert list(archive2._entries.values())[0].title == "Persisted"
def test_snapshot_restore_missing_raises():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
with pytest.raises(FileNotFoundError):
archive.snapshot_restore("nonexistent_snapshot_id")
# ─── snapshot_diff ───────────────────────────────────────────────────────────
def test_snapshot_diff_no_changes():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
ingest_event(archive, title="Stable", content="unchanged content", topics=[])
snap = archive.snapshot_create(label="baseline")
diff = archive.snapshot_diff(snap["snapshot_id"])
assert diff["added"] == []
assert diff["removed"] == []
assert diff["modified"] == []
assert diff["unchanged"] == 1
def test_snapshot_diff_detects_added():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
ingest_event(archive, title="Original", content="existing", topics=[])
snap = archive.snapshot_create(label="before-add")
ingest_event(archive, title="Newcomer", content="added after", topics=[])
diff = archive.snapshot_diff(snap["snapshot_id"])
assert len(diff["added"]) == 1
assert diff["added"][0]["title"] == "Newcomer"
assert diff["removed"] == []
assert diff["unchanged"] == 1
def test_snapshot_diff_detects_removed():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
e1 = ingest_event(archive, title="Will Be Removed", content="doomed", topics=[])
ingest_event(archive, title="Survivor", content="stays", topics=[])
snap = archive.snapshot_create(label="pre-removal")
archive.remove(e1.id)
diff = archive.snapshot_diff(snap["snapshot_id"])
assert len(diff["removed"]) == 1
assert diff["removed"][0]["title"] == "Will Be Removed"
assert diff["added"] == []
assert diff["unchanged"] == 1
def test_snapshot_diff_detects_modified():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
e = ingest_event(archive, title="Mutable", content="original content", topics=[])
snap = archive.snapshot_create(label="pre-edit")
archive.update_entry(e.id, content="updated content", auto_link=False)
diff = archive.snapshot_diff(snap["snapshot_id"])
assert len(diff["modified"]) == 1
assert diff["modified"][0]["title"] == "Mutable"
assert diff["modified"][0]["snapshot_hash"] != diff["modified"][0]["current_hash"]
assert diff["added"] == []
assert diff["removed"] == []
def test_snapshot_diff_missing_raises():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
with pytest.raises(FileNotFoundError):
archive.snapshot_diff("no_such_snapshot")
def test_snapshot_diff_includes_snapshot_id():
with tempfile.TemporaryDirectory() as tmp:
archive = _make_archive(tmp)
snap = archive.snapshot_create(label="id-check")
diff = archive.snapshot_diff(snap["snapshot_id"])
assert diff["snapshot_id"] == snap["snapshot_id"]