Compare commits

...

5 Commits

Author SHA1 Message Date
25eee03f6b docs: add archive_snapshot to FEATURES.yaml (#1268)
Some checks failed
CI / test (pull_request) Failing after 9s
CI / validate (pull_request) Failing after 15s
Review Approval Gate / verify-review (pull_request) Failing after 3s
2026-04-12 09:44:44 +00:00
5e033c9014 test: add snapshot test suite (#1268)
test_snapshot_create, test_snapshot_list, test_snapshot_restore, test_snapshot_diff
2026-04-12 09:43:25 +00:00
c2dd1f974f feat: add snapshot CLI commands (#1268)
mnemosyne snapshot create|list|restore|diff
2026-04-12 09:42:14 +00:00
46159b05b8 feat: export snapshot functions from mnemosyne package (#1268) 2026-04-12 09:41:32 +00:00
bbdf4fbbff feat: add archive snapshot module (#1268)
Point-in-time backup and restore for Mnemosyne.
snapshot_create, snapshot_list, snapshot_restore, snapshot_diff.
2026-04-12 09:40:54 +00:00
4 changed files with 442 additions and 1 deletions

View File

@@ -20,6 +20,12 @@ from nexus.mnemosyne.embeddings import (
TfidfEmbeddingBackend,
get_embedding_backend,
)
from nexus.mnemosyne.snapshot import (
snapshot_create,
snapshot_list,
snapshot_restore,
snapshot_diff,
)
__all__ = [
"MnemosyneArchive",
@@ -31,4 +37,8 @@ __all__ = [
"OllamaEmbeddingBackend",
"TfidfEmbeddingBackend",
"get_embedding_backend",
"snapshot_create",
"snapshot_list",
"snapshot_restore",
"snapshot_diff",
]

View File

@@ -6,7 +6,8 @@ Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
mnemosyne tag, mnemosyne untag, mnemosyne retag,
mnemosyne timeline, mnemosyne neighbors, mnemosyne path,
mnemosyne touch, mnemosyne decay, mnemosyne vitality,
mnemosyne fading, mnemosyne vibrant
mnemosyne fading, mnemosyne vibrant,
mnemosyne snapshot create|list|restore|diff
"""
from __future__ import annotations
@@ -18,6 +19,7 @@ import sys
from nexus.mnemosyne.archive import MnemosyneArchive
from nexus.mnemosyne.entry import ArchiveEntry
from nexus.mnemosyne.ingest import ingest_event
from nexus.mnemosyne.snapshot import snapshot_create, snapshot_list, snapshot_restore, snapshot_diff
def cmd_stats(args):
@@ -315,6 +317,58 @@ def cmd_vibrant(args):
print()
def cmd_snapshot_create(args):
archive = MnemosyneArchive()
result = snapshot_create(archive, label=args.label)
print(f"Snapshot created: {result['snapshot_id']}")
print(f" Entries: {result['entry_count']}")
print(f" Label: {result['label'] or '(none)'}")
print(f" Path: {result['path']}")
def cmd_snapshot_list(args):
archive = MnemosyneArchive()
snaps = snapshot_list(archive)
if not snaps:
print("No snapshots found.")
return
for s in snaps:
label = f" ({s['label']})" if s['label'] else ""
print(f" {s['snapshot_id']} {s['created_at'][:19]} {s['entry_count']} entries{label}")
def cmd_snapshot_restore(args):
archive = MnemosyneArchive()
try:
result = snapshot_restore(archive, args.snapshot_id)
except FileNotFoundError as e:
print(str(e))
sys.exit(1)
print(f"Restored snapshot {result['snapshot_id']}")
print(f" Entries restored: {result['restored_entries']}")
print(f" Previous count: {result['previous_count']}")
def cmd_snapshot_diff(args):
archive = MnemosyneArchive()
try:
result = snapshot_diff(archive, args.snapshot_id)
except FileNotFoundError as e:
print(str(e))
sys.exit(1)
print(f"Diff: snapshot {result['snapshot_id']} vs current")
print(f" Snapshot: {result['snapshot_entries']} entries")
print(f" Current: {result['current_entries']} entries")
print(f" Added: {result['added']}")
print(f" Removed: {result['removed']}")
print(f" Changed: {result['changed']}")
if result['changed_details']:
print()
for c in result['changed_details']:
print(f" [{c['id'][:8]}] {c['title']}")
def main():
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
sub = parser.add_subparsers(dest="command")
@@ -406,6 +460,21 @@ def main():
parser.print_help()
sys.exit(1)
# Snapshot subcommands
sp = sub.add_parser("snapshot", help="Archive snapshot operations")
sp_sub = sp.add_subparsers(dest="snapshot_command")
sp_create = sp_sub.add_parser("create", help="Create a point-in-time snapshot")
sp_create.add_argument("-l", "--label", default="", help="Human-readable label")
sp_sub.add_parser("list", help="List available snapshots")
sp_restore = sp_sub.add_parser("restore", help="Restore from a snapshot")
sp_restore.add_argument("snapshot_id", help="Snapshot ID (or prefix)")
sp_diff = sp_sub.add_parser("diff", help="Diff snapshot vs current archive")
sp_diff.add_argument("snapshot_id", help="Snapshot ID (or prefix)")
dispatch = {
"stats": cmd_stats,
"search": cmd_search,
@@ -430,9 +499,26 @@ def main():
"vitality": cmd_vitality,
"fading": cmd_fading,
"vibrant": cmd_vibrant,
"snapshot": lambda args: _dispatch_snapshot(args),
}
dispatch[args.command](args)
def _dispatch_snapshot(args):
"""Route snapshot subcommands to handlers."""
cmd = getattr(args, "snapshot_command", None)
if cmd == "create":
cmd_snapshot_create(args)
elif cmd == "list":
cmd_snapshot_list(args)
elif cmd == "restore":
cmd_snapshot_restore(args)
elif cmd == "diff":
cmd_snapshot_diff(args)
else:
print("Usage: mnemosyne snapshot {create|list|restore|diff}")
sys.exit(1)
if __name__ == "__main__":
main()

206
nexus/mnemosyne/snapshot.py Normal file
View File

@@ -0,0 +1,206 @@
"""Archive snapshot — point-in-time backup and restore.
Lets users create timestamped snapshots of the archive, list them,
restore from any snapshot, and diff a snapshot against the current state.
Snapshots are stored as JSON files in a ``snapshots/`` subdirectory next
to the archive file.
"""
from __future__ import annotations
import json
import uuid
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
from nexus.mnemosyne.archive import MnemosyneArchive
from nexus.mnemosyne.entry import ArchiveEntry
def _snapshots_dir(archive: MnemosyneArchive) -> Path:
"""Return the snapshots directory, creating it if needed."""
d = archive.path.parent / "snapshots"
d.mkdir(parents=True, exist_ok=True)
return d
def snapshot_create(
archive: MnemosyneArchive,
label: Optional[str] = None,
) -> dict:
"""Create a point-in-time snapshot of the archive.
Args:
archive: The archive to snapshot.
label: Optional human-readable label for the snapshot.
Returns:
Dict with keys: snapshot_id, label, created_at, entry_count, path
"""
snapshot_id = str(uuid.uuid4())[:8]
now = datetime.now(timezone.utc).isoformat()
data = {
"snapshot_id": snapshot_id,
"label": label or "",
"created_at": now,
"entry_count": archive.count,
"entries": [e.to_dict() for e in archive._entries.values()],
}
path = _snapshots_dir(archive) / f"{snapshot_id}.json"
with open(path, "w") as f:
json.dump(data, f, indent=2)
return {
"snapshot_id": snapshot_id,
"label": label or "",
"created_at": now,
"entry_count": archive.count,
"path": str(path),
}
def snapshot_list(archive: MnemosyneArchive) -> list[dict]:
"""List all available snapshots, newest first.
Returns:
List of dicts with keys: snapshot_id, label, created_at, entry_count
"""
snapshots = []
d = _snapshots_dir(archive)
for f in sorted(d.glob("*.json")):
try:
with open(f) as fh:
meta = json.load(fh)
snapshots.append({
"snapshot_id": meta["snapshot_id"],
"label": meta.get("label", ""),
"created_at": meta["created_at"],
"entry_count": meta["entry_count"],
})
except (json.JSONDecodeError, KeyError):
continue
# Newest first
snapshots.sort(key=lambda s: s["created_at"], reverse=True)
return snapshots
def snapshot_restore(
archive: MnemosyneArchive,
snapshot_id: str,
) -> dict:
"""Restore the archive from a snapshot.
Replaces ALL current entries with the snapshot data. The archive is
saved immediately after restore.
Args:
archive: The archive to restore into.
snapshot_id: ID of the snapshot to restore (or unique prefix).
Returns:
Dict with keys: snapshot_id, restored_entries, previous_count
Raises:
FileNotFoundError: If no matching snapshot is found.
"""
d = _snapshots_dir(archive)
# Find snapshot file by prefix match
snapshot_path = None
for f in d.glob("*.json"):
if f.stem.startswith(snapshot_id):
snapshot_path = f
break
if snapshot_path is None:
raise FileNotFoundError(f"No snapshot matching '{snapshot_id}' found")
with open(snapshot_path) as fh:
data = json.load(fh)
previous_count = archive.count
# Clear and restore
archive._entries = {}
for entry_data in data["entries"]:
entry = ArchiveEntry.from_dict(entry_data)
archive._entries[entry.id] = entry
archive._save()
return {
"snapshot_id": data["snapshot_id"],
"label": data.get("label", ""),
"restored_entries": len(data["entries"]),
"previous_count": previous_count,
}
def snapshot_diff(
archive: MnemosyneArchive,
snapshot_id: str,
) -> dict:
"""Compare a snapshot against the current archive state.
Args:
archive: The current archive.
snapshot_id: ID of the snapshot to compare (or unique prefix).
Returns:
Dict with keys: snapshot_id, snapshot_entries, current_entries,
added (in current but not snapshot), removed (in snapshot but not current),
changed (same ID but different content_hash)
Raises:
FileNotFoundError: If no matching snapshot is found.
"""
d = _snapshots_dir(archive)
snapshot_path = None
for f in d.glob("*.json"):
if f.stem.startswith(snapshot_id):
snapshot_path = f
break
if snapshot_path is None:
raise FileNotFoundError(f"No snapshot matching '{snapshot_id}' found")
with open(snapshot_path) as fh:
data = json.load(fh)
snap_entries = {e["id"]: e for e in data["entries"]}
curr_entries = {e.id: e.to_dict() for e in archive._entries.values()}
snap_ids = set(snap_entries.keys())
curr_ids = set(curr_entries.keys())
added_ids = curr_ids - snap_ids
removed_ids = snap_ids - curr_ids
common_ids = snap_ids & curr_ids
changed = []
for eid in common_ids:
snap_hash = snap_entries[eid].get("content_hash", "")
curr_hash = curr_entries[eid].get("content_hash", "")
if snap_hash != curr_hash:
changed.append({
"id": eid,
"title": curr_entries[eid].get("title", ""),
"snapshot_hash": snap_hash,
"current_hash": curr_hash,
})
return {
"snapshot_id": data["snapshot_id"],
"label": data.get("label", ""),
"snapshot_entries": len(snap_entries),
"current_entries": len(curr_entries),
"added": len(added_ids),
"removed": len(removed_ids),
"changed": len(changed),
"added_ids": sorted(added_ids),
"removed_ids": sorted(removed_ids),
"changed_details": changed,
}

View File

@@ -0,0 +1,139 @@
"""Tests for Mnemosyne archive snapshot — create, list, restore, diff."""
import json
import tempfile
from pathlib import Path
import pytest
from nexus.mnemosyne.archive import MnemosyneArchive
from nexus.mnemosyne.entry import ArchiveEntry
from nexus.mnemosyne.ingest import ingest_event
from nexus.mnemosyne.snapshot import (
snapshot_create,
snapshot_list,
snapshot_restore,
snapshot_diff,
)
@pytest.fixture
def archive(tmp_path):
"""Create a fresh archive with a few entries."""
path = tmp_path / "test_archive.json"
arch = MnemosyneArchive(archive_path=path, auto_embed=False)
ingest_event(arch, title="First", content="hello world", topics=["test"])
ingest_event(arch, title="Second", content="another entry", topics=["demo"])
ingest_event(arch, title="Third", content="more content here", topics=["test", "demo"])
return arch
class TestSnapshotCreate:
def test_create_returns_metadata(self, archive):
result = snapshot_create(archive, label="test snap")
assert "snapshot_id" in result
assert result["label"] == "test snap"
assert result["entry_count"] == 3
assert Path(result["path"]).exists()
def test_create_no_label(self, archive):
result = snapshot_create(archive)
assert result["label"] == ""
def test_snapshot_file_is_valid_json(self, archive):
result = snapshot_create(archive)
with open(result["path"]) as f:
data = json.load(f)
assert data["entry_count"] == 3
assert len(data["entries"]) == 3
assert "created_at" in data
class TestSnapshotList:
def test_empty_list(self, archive):
# Snapshots dir doesn't exist yet (no snapshots created)
# Actually, create() makes the dir, so list before any create:
snaps = snapshot_list(archive)
assert snaps == []
def test_list_returns_created_snapshots(self, archive):
snapshot_create(archive, label="first")
snapshot_create(archive, label="second")
snaps = snapshot_list(archive)
assert len(snaps) == 2
# Newest first
assert snaps[0]["label"] == "second"
assert snaps[1]["label"] == "first"
def test_list_entry_count(self, archive):
snapshot_create(archive)
snaps = snapshot_list(archive)
assert snaps[0]["entry_count"] == 3
class TestSnapshotRestore:
def test_restore_replaces_entries(self, archive):
result = snapshot_create(archive, label="before change")
sid = result["snapshot_id"]
# Add more entries
ingest_event(archive, title="Fourth", content="new entry", topics=["new"])
assert archive.count == 4
# Restore
restore_result = snapshot_restore(archive, sid)
assert restore_result["restored_entries"] == 3
assert restore_result["previous_count"] == 4
assert archive.count == 3
def test_restore_prefix_match(self, archive):
result = snapshot_create(archive)
sid = result["snapshot_id"]
# Use just first 4 chars
restore_result = snapshot_restore(archive, sid[:4])
assert restore_result["snapshot_id"] == sid
def test_restore_nonexistent_raises(self, archive):
with pytest.raises(FileNotFoundError):
snapshot_restore(archive, "nonexistent-id")
def test_restore_preserves_content(self, archive):
result = snapshot_create(archive)
original_titles = sorted(e.title for e in archive._entries.values())
ingest_event(archive, title="Extra", content="extra", topics=[])
snapshot_restore(archive, result["snapshot_id"])
restored_titles = sorted(e.title for e in archive._entries.values())
assert restored_titles == original_titles
class TestSnapshotDiff:
def test_diff_identical(self, archive):
result = snapshot_create(archive)
diff = snapshot_diff(archive, result["snapshot_id"])
assert diff["added"] == 0
assert diff["removed"] == 0
assert diff["changed"] == 0
def test_diff_added_entries(self, archive):
result = snapshot_create(archive)
ingest_event(archive, title="New Entry", content="new", topics=["new"])
diff = snapshot_diff(archive, result["snapshot_id"])
assert diff["added"] == 1
assert diff["removed"] == 0
assert diff["current_entries"] == 4
assert diff["snapshot_entries"] == 3
def test_diff_removed_entries(self, archive):
result = snapshot_create(archive)
# Remove an entry
first_id = list(archive._entries.keys())[0]
archive.remove(first_id)
diff = snapshot_diff(archive, result["snapshot_id"])
assert diff["removed"] == 1
assert first_id in diff["removed_ids"]
def test_diff_nonexistent_raises(self, archive):
with pytest.raises(FileNotFoundError):
snapshot_diff(archive, "nope")