Compare commits
5 Commits
mimo/code/
...
feat/mnemo
| Author | SHA1 | Date | |
|---|---|---|---|
| 25eee03f6b | |||
| 5e033c9014 | |||
| c2dd1f974f | |||
| 46159b05b8 | |||
| bbdf4fbbff |
@@ -20,6 +20,12 @@ from nexus.mnemosyne.embeddings import (
|
||||
TfidfEmbeddingBackend,
|
||||
get_embedding_backend,
|
||||
)
|
||||
from nexus.mnemosyne.snapshot import (
|
||||
snapshot_create,
|
||||
snapshot_list,
|
||||
snapshot_restore,
|
||||
snapshot_diff,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"MnemosyneArchive",
|
||||
@@ -31,4 +37,8 @@ __all__ = [
|
||||
"OllamaEmbeddingBackend",
|
||||
"TfidfEmbeddingBackend",
|
||||
"get_embedding_backend",
|
||||
"snapshot_create",
|
||||
"snapshot_list",
|
||||
"snapshot_restore",
|
||||
"snapshot_diff",
|
||||
]
|
||||
|
||||
@@ -6,7 +6,8 @@ Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
|
||||
mnemosyne tag, mnemosyne untag, mnemosyne retag,
|
||||
mnemosyne timeline, mnemosyne neighbors, mnemosyne path,
|
||||
mnemosyne touch, mnemosyne decay, mnemosyne vitality,
|
||||
mnemosyne fading, mnemosyne vibrant
|
||||
mnemosyne fading, mnemosyne vibrant,
|
||||
mnemosyne snapshot create|list|restore|diff
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -18,6 +19,7 @@ import sys
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
from nexus.mnemosyne.ingest import ingest_event
|
||||
from nexus.mnemosyne.snapshot import snapshot_create, snapshot_list, snapshot_restore, snapshot_diff
|
||||
|
||||
|
||||
def cmd_stats(args):
|
||||
@@ -315,6 +317,58 @@ def cmd_vibrant(args):
|
||||
print()
|
||||
|
||||
|
||||
|
||||
def cmd_snapshot_create(args):
|
||||
archive = MnemosyneArchive()
|
||||
result = snapshot_create(archive, label=args.label)
|
||||
print(f"Snapshot created: {result['snapshot_id']}")
|
||||
print(f" Entries: {result['entry_count']}")
|
||||
print(f" Label: {result['label'] or '(none)'}")
|
||||
print(f" Path: {result['path']}")
|
||||
|
||||
|
||||
def cmd_snapshot_list(args):
|
||||
archive = MnemosyneArchive()
|
||||
snaps = snapshot_list(archive)
|
||||
if not snaps:
|
||||
print("No snapshots found.")
|
||||
return
|
||||
for s in snaps:
|
||||
label = f" ({s['label']})" if s['label'] else ""
|
||||
print(f" {s['snapshot_id']} {s['created_at'][:19]} {s['entry_count']} entries{label}")
|
||||
|
||||
|
||||
def cmd_snapshot_restore(args):
|
||||
archive = MnemosyneArchive()
|
||||
try:
|
||||
result = snapshot_restore(archive, args.snapshot_id)
|
||||
except FileNotFoundError as e:
|
||||
print(str(e))
|
||||
sys.exit(1)
|
||||
print(f"Restored snapshot {result['snapshot_id']}")
|
||||
print(f" Entries restored: {result['restored_entries']}")
|
||||
print(f" Previous count: {result['previous_count']}")
|
||||
|
||||
|
||||
def cmd_snapshot_diff(args):
|
||||
archive = MnemosyneArchive()
|
||||
try:
|
||||
result = snapshot_diff(archive, args.snapshot_id)
|
||||
except FileNotFoundError as e:
|
||||
print(str(e))
|
||||
sys.exit(1)
|
||||
print(f"Diff: snapshot {result['snapshot_id']} vs current")
|
||||
print(f" Snapshot: {result['snapshot_entries']} entries")
|
||||
print(f" Current: {result['current_entries']} entries")
|
||||
print(f" Added: {result['added']}")
|
||||
print(f" Removed: {result['removed']}")
|
||||
print(f" Changed: {result['changed']}")
|
||||
if result['changed_details']:
|
||||
print()
|
||||
for c in result['changed_details']:
|
||||
print(f" [{c['id'][:8]}] {c['title']}")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
|
||||
sub = parser.add_subparsers(dest="command")
|
||||
@@ -406,6 +460,21 @@ def main():
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
# Snapshot subcommands
|
||||
sp = sub.add_parser("snapshot", help="Archive snapshot operations")
|
||||
sp_sub = sp.add_subparsers(dest="snapshot_command")
|
||||
|
||||
sp_create = sp_sub.add_parser("create", help="Create a point-in-time snapshot")
|
||||
sp_create.add_argument("-l", "--label", default="", help="Human-readable label")
|
||||
|
||||
sp_sub.add_parser("list", help="List available snapshots")
|
||||
|
||||
sp_restore = sp_sub.add_parser("restore", help="Restore from a snapshot")
|
||||
sp_restore.add_argument("snapshot_id", help="Snapshot ID (or prefix)")
|
||||
|
||||
sp_diff = sp_sub.add_parser("diff", help="Diff snapshot vs current archive")
|
||||
sp_diff.add_argument("snapshot_id", help="Snapshot ID (or prefix)")
|
||||
|
||||
dispatch = {
|
||||
"stats": cmd_stats,
|
||||
"search": cmd_search,
|
||||
@@ -430,9 +499,26 @@ def main():
|
||||
"vitality": cmd_vitality,
|
||||
"fading": cmd_fading,
|
||||
"vibrant": cmd_vibrant,
|
||||
"snapshot": lambda args: _dispatch_snapshot(args),
|
||||
}
|
||||
dispatch[args.command](args)
|
||||
|
||||
|
||||
def _dispatch_snapshot(args):
|
||||
"""Route snapshot subcommands to handlers."""
|
||||
cmd = getattr(args, "snapshot_command", None)
|
||||
if cmd == "create":
|
||||
cmd_snapshot_create(args)
|
||||
elif cmd == "list":
|
||||
cmd_snapshot_list(args)
|
||||
elif cmd == "restore":
|
||||
cmd_snapshot_restore(args)
|
||||
elif cmd == "diff":
|
||||
cmd_snapshot_diff(args)
|
||||
else:
|
||||
print("Usage: mnemosyne snapshot {create|list|restore|diff}")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
206
nexus/mnemosyne/snapshot.py
Normal file
206
nexus/mnemosyne/snapshot.py
Normal file
@@ -0,0 +1,206 @@
|
||||
"""Archive snapshot — point-in-time backup and restore.
|
||||
|
||||
Lets users create timestamped snapshots of the archive, list them,
|
||||
restore from any snapshot, and diff a snapshot against the current state.
|
||||
Snapshots are stored as JSON files in a ``snapshots/`` subdirectory next
|
||||
to the archive file.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
|
||||
|
||||
def _snapshots_dir(archive: MnemosyneArchive) -> Path:
|
||||
"""Return the snapshots directory, creating it if needed."""
|
||||
d = archive.path.parent / "snapshots"
|
||||
d.mkdir(parents=True, exist_ok=True)
|
||||
return d
|
||||
|
||||
|
||||
def snapshot_create(
|
||||
archive: MnemosyneArchive,
|
||||
label: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Create a point-in-time snapshot of the archive.
|
||||
|
||||
Args:
|
||||
archive: The archive to snapshot.
|
||||
label: Optional human-readable label for the snapshot.
|
||||
|
||||
Returns:
|
||||
Dict with keys: snapshot_id, label, created_at, entry_count, path
|
||||
"""
|
||||
snapshot_id = str(uuid.uuid4())[:8]
|
||||
now = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
data = {
|
||||
"snapshot_id": snapshot_id,
|
||||
"label": label or "",
|
||||
"created_at": now,
|
||||
"entry_count": archive.count,
|
||||
"entries": [e.to_dict() for e in archive._entries.values()],
|
||||
}
|
||||
|
||||
path = _snapshots_dir(archive) / f"{snapshot_id}.json"
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
return {
|
||||
"snapshot_id": snapshot_id,
|
||||
"label": label or "",
|
||||
"created_at": now,
|
||||
"entry_count": archive.count,
|
||||
"path": str(path),
|
||||
}
|
||||
|
||||
|
||||
def snapshot_list(archive: MnemosyneArchive) -> list[dict]:
|
||||
"""List all available snapshots, newest first.
|
||||
|
||||
Returns:
|
||||
List of dicts with keys: snapshot_id, label, created_at, entry_count
|
||||
"""
|
||||
snapshots = []
|
||||
d = _snapshots_dir(archive)
|
||||
for f in sorted(d.glob("*.json")):
|
||||
try:
|
||||
with open(f) as fh:
|
||||
meta = json.load(fh)
|
||||
snapshots.append({
|
||||
"snapshot_id": meta["snapshot_id"],
|
||||
"label": meta.get("label", ""),
|
||||
"created_at": meta["created_at"],
|
||||
"entry_count": meta["entry_count"],
|
||||
})
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
continue
|
||||
# Newest first
|
||||
snapshots.sort(key=lambda s: s["created_at"], reverse=True)
|
||||
return snapshots
|
||||
|
||||
|
||||
def snapshot_restore(
|
||||
archive: MnemosyneArchive,
|
||||
snapshot_id: str,
|
||||
) -> dict:
|
||||
"""Restore the archive from a snapshot.
|
||||
|
||||
Replaces ALL current entries with the snapshot data. The archive is
|
||||
saved immediately after restore.
|
||||
|
||||
Args:
|
||||
archive: The archive to restore into.
|
||||
snapshot_id: ID of the snapshot to restore (or unique prefix).
|
||||
|
||||
Returns:
|
||||
Dict with keys: snapshot_id, restored_entries, previous_count
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If no matching snapshot is found.
|
||||
"""
|
||||
d = _snapshots_dir(archive)
|
||||
|
||||
# Find snapshot file by prefix match
|
||||
snapshot_path = None
|
||||
for f in d.glob("*.json"):
|
||||
if f.stem.startswith(snapshot_id):
|
||||
snapshot_path = f
|
||||
break
|
||||
|
||||
if snapshot_path is None:
|
||||
raise FileNotFoundError(f"No snapshot matching '{snapshot_id}' found")
|
||||
|
||||
with open(snapshot_path) as fh:
|
||||
data = json.load(fh)
|
||||
|
||||
previous_count = archive.count
|
||||
|
||||
# Clear and restore
|
||||
archive._entries = {}
|
||||
for entry_data in data["entries"]:
|
||||
entry = ArchiveEntry.from_dict(entry_data)
|
||||
archive._entries[entry.id] = entry
|
||||
archive._save()
|
||||
|
||||
return {
|
||||
"snapshot_id": data["snapshot_id"],
|
||||
"label": data.get("label", ""),
|
||||
"restored_entries": len(data["entries"]),
|
||||
"previous_count": previous_count,
|
||||
}
|
||||
|
||||
|
||||
def snapshot_diff(
|
||||
archive: MnemosyneArchive,
|
||||
snapshot_id: str,
|
||||
) -> dict:
|
||||
"""Compare a snapshot against the current archive state.
|
||||
|
||||
Args:
|
||||
archive: The current archive.
|
||||
snapshot_id: ID of the snapshot to compare (or unique prefix).
|
||||
|
||||
Returns:
|
||||
Dict with keys: snapshot_id, snapshot_entries, current_entries,
|
||||
added (in current but not snapshot), removed (in snapshot but not current),
|
||||
changed (same ID but different content_hash)
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If no matching snapshot is found.
|
||||
"""
|
||||
d = _snapshots_dir(archive)
|
||||
|
||||
snapshot_path = None
|
||||
for f in d.glob("*.json"):
|
||||
if f.stem.startswith(snapshot_id):
|
||||
snapshot_path = f
|
||||
break
|
||||
|
||||
if snapshot_path is None:
|
||||
raise FileNotFoundError(f"No snapshot matching '{snapshot_id}' found")
|
||||
|
||||
with open(snapshot_path) as fh:
|
||||
data = json.load(fh)
|
||||
|
||||
snap_entries = {e["id"]: e for e in data["entries"]}
|
||||
curr_entries = {e.id: e.to_dict() for e in archive._entries.values()}
|
||||
|
||||
snap_ids = set(snap_entries.keys())
|
||||
curr_ids = set(curr_entries.keys())
|
||||
|
||||
added_ids = curr_ids - snap_ids
|
||||
removed_ids = snap_ids - curr_ids
|
||||
common_ids = snap_ids & curr_ids
|
||||
|
||||
changed = []
|
||||
for eid in common_ids:
|
||||
snap_hash = snap_entries[eid].get("content_hash", "")
|
||||
curr_hash = curr_entries[eid].get("content_hash", "")
|
||||
if snap_hash != curr_hash:
|
||||
changed.append({
|
||||
"id": eid,
|
||||
"title": curr_entries[eid].get("title", ""),
|
||||
"snapshot_hash": snap_hash,
|
||||
"current_hash": curr_hash,
|
||||
})
|
||||
|
||||
return {
|
||||
"snapshot_id": data["snapshot_id"],
|
||||
"label": data.get("label", ""),
|
||||
"snapshot_entries": len(snap_entries),
|
||||
"current_entries": len(curr_entries),
|
||||
"added": len(added_ids),
|
||||
"removed": len(removed_ids),
|
||||
"changed": len(changed),
|
||||
"added_ids": sorted(added_ids),
|
||||
"removed_ids": sorted(removed_ids),
|
||||
"changed_details": changed,
|
||||
}
|
||||
139
nexus/mnemosyne/tests/test_snapshot.py
Normal file
139
nexus/mnemosyne/tests/test_snapshot.py
Normal file
@@ -0,0 +1,139 @@
|
||||
"""Tests for Mnemosyne archive snapshot — create, list, restore, diff."""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
from nexus.mnemosyne.ingest import ingest_event
|
||||
from nexus.mnemosyne.snapshot import (
|
||||
snapshot_create,
|
||||
snapshot_list,
|
||||
snapshot_restore,
|
||||
snapshot_diff,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def archive(tmp_path):
|
||||
"""Create a fresh archive with a few entries."""
|
||||
path = tmp_path / "test_archive.json"
|
||||
arch = MnemosyneArchive(archive_path=path, auto_embed=False)
|
||||
ingest_event(arch, title="First", content="hello world", topics=["test"])
|
||||
ingest_event(arch, title="Second", content="another entry", topics=["demo"])
|
||||
ingest_event(arch, title="Third", content="more content here", topics=["test", "demo"])
|
||||
return arch
|
||||
|
||||
|
||||
class TestSnapshotCreate:
|
||||
def test_create_returns_metadata(self, archive):
|
||||
result = snapshot_create(archive, label="test snap")
|
||||
assert "snapshot_id" in result
|
||||
assert result["label"] == "test snap"
|
||||
assert result["entry_count"] == 3
|
||||
assert Path(result["path"]).exists()
|
||||
|
||||
def test_create_no_label(self, archive):
|
||||
result = snapshot_create(archive)
|
||||
assert result["label"] == ""
|
||||
|
||||
def test_snapshot_file_is_valid_json(self, archive):
|
||||
result = snapshot_create(archive)
|
||||
with open(result["path"]) as f:
|
||||
data = json.load(f)
|
||||
assert data["entry_count"] == 3
|
||||
assert len(data["entries"]) == 3
|
||||
assert "created_at" in data
|
||||
|
||||
|
||||
class TestSnapshotList:
|
||||
def test_empty_list(self, archive):
|
||||
# Snapshots dir doesn't exist yet (no snapshots created)
|
||||
# Actually, create() makes the dir, so list before any create:
|
||||
snaps = snapshot_list(archive)
|
||||
assert snaps == []
|
||||
|
||||
def test_list_returns_created_snapshots(self, archive):
|
||||
snapshot_create(archive, label="first")
|
||||
snapshot_create(archive, label="second")
|
||||
snaps = snapshot_list(archive)
|
||||
assert len(snaps) == 2
|
||||
# Newest first
|
||||
assert snaps[0]["label"] == "second"
|
||||
assert snaps[1]["label"] == "first"
|
||||
|
||||
def test_list_entry_count(self, archive):
|
||||
snapshot_create(archive)
|
||||
snaps = snapshot_list(archive)
|
||||
assert snaps[0]["entry_count"] == 3
|
||||
|
||||
|
||||
class TestSnapshotRestore:
|
||||
def test_restore_replaces_entries(self, archive):
|
||||
result = snapshot_create(archive, label="before change")
|
||||
sid = result["snapshot_id"]
|
||||
|
||||
# Add more entries
|
||||
ingest_event(archive, title="Fourth", content="new entry", topics=["new"])
|
||||
assert archive.count == 4
|
||||
|
||||
# Restore
|
||||
restore_result = snapshot_restore(archive, sid)
|
||||
assert restore_result["restored_entries"] == 3
|
||||
assert restore_result["previous_count"] == 4
|
||||
assert archive.count == 3
|
||||
|
||||
def test_restore_prefix_match(self, archive):
|
||||
result = snapshot_create(archive)
|
||||
sid = result["snapshot_id"]
|
||||
# Use just first 4 chars
|
||||
restore_result = snapshot_restore(archive, sid[:4])
|
||||
assert restore_result["snapshot_id"] == sid
|
||||
|
||||
def test_restore_nonexistent_raises(self, archive):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
snapshot_restore(archive, "nonexistent-id")
|
||||
|
||||
def test_restore_preserves_content(self, archive):
|
||||
result = snapshot_create(archive)
|
||||
original_titles = sorted(e.title for e in archive._entries.values())
|
||||
|
||||
ingest_event(archive, title="Extra", content="extra", topics=[])
|
||||
snapshot_restore(archive, result["snapshot_id"])
|
||||
|
||||
restored_titles = sorted(e.title for e in archive._entries.values())
|
||||
assert restored_titles == original_titles
|
||||
|
||||
|
||||
class TestSnapshotDiff:
|
||||
def test_diff_identical(self, archive):
|
||||
result = snapshot_create(archive)
|
||||
diff = snapshot_diff(archive, result["snapshot_id"])
|
||||
assert diff["added"] == 0
|
||||
assert diff["removed"] == 0
|
||||
assert diff["changed"] == 0
|
||||
|
||||
def test_diff_added_entries(self, archive):
|
||||
result = snapshot_create(archive)
|
||||
ingest_event(archive, title="New Entry", content="new", topics=["new"])
|
||||
diff = snapshot_diff(archive, result["snapshot_id"])
|
||||
assert diff["added"] == 1
|
||||
assert diff["removed"] == 0
|
||||
assert diff["current_entries"] == 4
|
||||
assert diff["snapshot_entries"] == 3
|
||||
|
||||
def test_diff_removed_entries(self, archive):
|
||||
result = snapshot_create(archive)
|
||||
# Remove an entry
|
||||
first_id = list(archive._entries.keys())[0]
|
||||
archive.remove(first_id)
|
||||
diff = snapshot_diff(archive, result["snapshot_id"])
|
||||
assert diff["removed"] == 1
|
||||
assert first_id in diff["removed_ids"]
|
||||
|
||||
def test_diff_nonexistent_raises(self, archive):
|
||||
with pytest.raises(FileNotFoundError):
|
||||
snapshot_diff(archive, "nope")
|
||||
Reference in New Issue
Block a user