diff --git a/nexus/mnemosyne/archive.py b/nexus/mnemosyne/archive.py index e8973032..76e2f7e4 100644 --- a/nexus/mnemosyne/archive.py +++ b/nexus/mnemosyne/archive.py @@ -1105,6 +1105,175 @@ class MnemosyneArchive: steps.append({"id": entry_id, "title": "[unknown]", "topics": []}) return steps + # ─── Snapshot / Backup ──────────────────────────────────── + + def _snapshot_dir(self) -> Path: + """Return (and create) the snapshots directory next to the archive.""" + d = self.path.parent / "snapshots" + d.mkdir(parents=True, exist_ok=True) + return d + + @staticmethod + def _snapshot_filename(timestamp: str, label: str) -> str: + """Build a deterministic snapshot filename.""" + safe_label = "".join(c if c.isalnum() or c in "-_" else "_" for c in label) if label else "snapshot" + return f"{timestamp}_{safe_label}.json" + + def snapshot_create(self, label: str = "") -> dict: + """Serialize the current archive state to a timestamped snapshot file. + + Args: + label: Human-readable label for the snapshot (optional). + + Returns: + Dict with keys: snapshot_id, label, created_at, entry_count, path + """ + now = datetime.now(timezone.utc) + timestamp = now.strftime("%Y%m%d_%H%M%S") + filename = self._snapshot_filename(timestamp, label) + snapshot_id = filename[:-5] # strip .json + snap_path = self._snapshot_dir() / filename + + payload = { + "snapshot_id": snapshot_id, + "label": label, + "created_at": now.isoformat(), + "entry_count": len(self._entries), + "archive_path": str(self.path), + "entries": [e.to_dict() for e in self._entries.values()], + } + with open(snap_path, "w") as f: + json.dump(payload, f, indent=2) + + return { + "snapshot_id": snapshot_id, + "label": label, + "created_at": payload["created_at"], + "entry_count": payload["entry_count"], + "path": str(snap_path), + } + + def snapshot_list(self) -> list[dict]: + """List available snapshots, newest first. + + Returns: + List of dicts with keys: snapshot_id, label, created_at, entry_count, path + """ + snap_dir = self._snapshot_dir() + snapshots = [] + for snap_path in sorted(snap_dir.glob("*.json"), reverse=True): + try: + with open(snap_path) as f: + data = json.load(f) + snapshots.append({ + "snapshot_id": data.get("snapshot_id", snap_path.stem), + "label": data.get("label", ""), + "created_at": data.get("created_at", ""), + "entry_count": data.get("entry_count", len(data.get("entries", []))), + "path": str(snap_path), + }) + except (json.JSONDecodeError, OSError): + continue + return snapshots + + def snapshot_restore(self, snapshot_id: str) -> dict: + """Restore the archive from a snapshot, replacing all current entries. + + Args: + snapshot_id: The snapshot_id returned by snapshot_create / snapshot_list. + + Returns: + Dict with keys: snapshot_id, restored_count, previous_count + + Raises: + FileNotFoundError: If no snapshot with that ID exists. + """ + snap_dir = self._snapshot_dir() + snap_path = snap_dir / f"{snapshot_id}.json" + if not snap_path.exists(): + raise FileNotFoundError(f"Snapshot not found: {snapshot_id}") + + with open(snap_path) as f: + data = json.load(f) + + previous_count = len(self._entries) + self._entries = {} + for entry_data in data.get("entries", []): + entry = ArchiveEntry.from_dict(entry_data) + self._entries[entry.id] = entry + + self._save() + return { + "snapshot_id": snapshot_id, + "restored_count": len(self._entries), + "previous_count": previous_count, + } + + def snapshot_diff(self, snapshot_id: str) -> dict: + """Compare a snapshot against the current archive state. + + Args: + snapshot_id: The snapshot_id to compare against current state. + + Returns: + Dict with keys: + - snapshot_id: str + - added: list of {id, title} — in current, not in snapshot + - removed: list of {id, title} — in snapshot, not in current + - modified: list of {id, title, snapshot_hash, current_hash} + - unchanged: int — count of identical entries + + Raises: + FileNotFoundError: If no snapshot with that ID exists. + """ + snap_dir = self._snapshot_dir() + snap_path = snap_dir / f"{snapshot_id}.json" + if not snap_path.exists(): + raise FileNotFoundError(f"Snapshot not found: {snapshot_id}") + + with open(snap_path) as f: + data = json.load(f) + + snap_entries: dict[str, dict] = {} + for entry_data in data.get("entries", []): + snap_entries[entry_data["id"]] = entry_data + + current_ids = set(self._entries.keys()) + snap_ids = set(snap_entries.keys()) + + added = [] + for eid in current_ids - snap_ids: + e = self._entries[eid] + added.append({"id": e.id, "title": e.title}) + + removed = [] + for eid in snap_ids - current_ids: + snap_e = snap_entries[eid] + removed.append({"id": snap_e["id"], "title": snap_e.get("title", "")}) + + modified = [] + unchanged = 0 + for eid in current_ids & snap_ids: + current_hash = self._entries[eid].content_hash + snap_hash = snap_entries[eid].get("content_hash") + if current_hash != snap_hash: + modified.append({ + "id": eid, + "title": self._entries[eid].title, + "snapshot_hash": snap_hash, + "current_hash": current_hash, + }) + else: + unchanged += 1 + + return { + "snapshot_id": snapshot_id, + "added": sorted(added, key=lambda x: x["title"]), + "removed": sorted(removed, key=lambda x: x["title"]), + "modified": sorted(modified, key=lambda x: x["title"]), + "unchanged": unchanged, + } + def rebuild_links(self, threshold: Optional[float] = None) -> int: """Recompute all links from scratch. diff --git a/nexus/mnemosyne/cli.py b/nexus/mnemosyne/cli.py index bf1f1feb..103d8e8c 100644 --- a/nexus/mnemosyne/cli.py +++ b/nexus/mnemosyne/cli.py @@ -6,7 +6,8 @@ Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats, mnemosyne tag, mnemosyne untag, mnemosyne retag, mnemosyne timeline, mnemosyne neighbors, mnemosyne path, mnemosyne touch, mnemosyne decay, mnemosyne vitality, - mnemosyne fading, mnemosyne vibrant + mnemosyne fading, mnemosyne vibrant, + mnemosyne snapshot create|list|restore|diff """ from __future__ import annotations @@ -303,6 +304,68 @@ def cmd_fading(args): print() +def cmd_snapshot(args): + archive = MnemosyneArchive() + if args.snapshot_cmd == "create": + result = archive.snapshot_create(label=args.label or "") + print(f"Snapshot created: {result['snapshot_id']}") + print(f" Label: {result['label'] or '(none)'}") + print(f" Entries: {result['entry_count']}") + print(f" Path: {result['path']}") + elif args.snapshot_cmd == "list": + snapshots = archive.snapshot_list() + if not snapshots: + print("No snapshots found.") + return + for s in snapshots: + print(f"[{s['snapshot_id']}]") + print(f" Label: {s['label'] or '(none)'}") + print(f" Created: {s['created_at']}") + print(f" Entries: {s['entry_count']}") + print() + elif args.snapshot_cmd == "restore": + try: + result = archive.snapshot_restore(args.snapshot_id) + except FileNotFoundError as e: + print(str(e)) + sys.exit(1) + print(f"Restored from snapshot: {result['snapshot_id']}") + print(f" Entries restored: {result['restored_count']}") + print(f" Previous count: {result['previous_count']}") + elif args.snapshot_cmd == "diff": + try: + diff = archive.snapshot_diff(args.snapshot_id) + except FileNotFoundError as e: + print(str(e)) + sys.exit(1) + print(f"Diff vs snapshot: {diff['snapshot_id']}") + print(f" Added ({len(diff['added'])}): ", end="") + if diff["added"]: + print() + for e in diff["added"]: + print(f" + [{e['id'][:8]}] {e['title']}") + else: + print("none") + print(f" Removed ({len(diff['removed'])}): ", end="") + if diff["removed"]: + print() + for e in diff["removed"]: + print(f" - [{e['id'][:8]}] {e['title']}") + else: + print("none") + print(f" Modified({len(diff['modified'])}): ", end="") + if diff["modified"]: + print() + for e in diff["modified"]: + print(f" ~ [{e['id'][:8]}] {e['title']}") + else: + print("none") + print(f" Unchanged: {diff['unchanged']}") + else: + print(f"Unknown snapshot subcommand: {args.snapshot_cmd}") + sys.exit(1) + + def cmd_vibrant(args): archive = MnemosyneArchive() results = archive.vibrant(limit=args.limit) @@ -401,10 +464,23 @@ def main(): vb = sub.add_parser("vibrant", help="Show most alive entries (highest vitality)") vb.add_argument("-n", "--limit", type=int, default=10, help="Max entries to show") + sn = sub.add_parser("snapshot", help="Point-in-time backup and restore") + sn_sub = sn.add_subparsers(dest="snapshot_cmd") + sn_create = sn_sub.add_parser("create", help="Create a new snapshot") + sn_create.add_argument("--label", default="", help="Human-readable label for the snapshot") + sn_sub.add_parser("list", help="List available snapshots") + sn_restore = sn_sub.add_parser("restore", help="Restore archive from a snapshot") + sn_restore.add_argument("snapshot_id", help="Snapshot ID to restore") + sn_diff = sn_sub.add_parser("diff", help="Show what changed since a snapshot") + sn_diff.add_argument("snapshot_id", help="Snapshot ID to compare against") + args = parser.parse_args() if not args.command: parser.print_help() sys.exit(1) + if args.command == "snapshot" and not args.snapshot_cmd: + sn.print_help() + sys.exit(1) dispatch = { "stats": cmd_stats, @@ -430,6 +506,7 @@ def main(): "vitality": cmd_vitality, "fading": cmd_fading, "vibrant": cmd_vibrant, + "snapshot": cmd_snapshot, } dispatch[args.command](args) diff --git a/nexus/mnemosyne/tests/test_snapshots.py b/nexus/mnemosyne/tests/test_snapshots.py new file mode 100644 index 00000000..d9c90f09 --- /dev/null +++ b/nexus/mnemosyne/tests/test_snapshots.py @@ -0,0 +1,240 @@ +"""Tests for Mnemosyne snapshot (point-in-time backup/restore) feature.""" + +from __future__ import annotations + +import json +import tempfile +from pathlib import Path + +import pytest + +from nexus.mnemosyne.archive import MnemosyneArchive +from nexus.mnemosyne.ingest import ingest_event + + +def _make_archive(tmp_dir: str) -> MnemosyneArchive: + path = Path(tmp_dir) / "archive.json" + return MnemosyneArchive(archive_path=path, auto_embed=False) + + +# ─── snapshot_create ───────────────────────────────────────────────────────── + +def test_snapshot_create_returns_metadata(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + ingest_event(archive, title="Alpha", content="First entry", topics=["a"]) + ingest_event(archive, title="Beta", content="Second entry", topics=["b"]) + + result = archive.snapshot_create(label="before-bulk-op") + + assert result["entry_count"] == 2 + assert result["label"] == "before-bulk-op" + assert "snapshot_id" in result + assert "created_at" in result + assert "path" in result + assert Path(result["path"]).exists() + + +def test_snapshot_create_no_label(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + ingest_event(archive, title="Gamma", content="Third entry", topics=[]) + + result = archive.snapshot_create() + + assert result["label"] == "" + assert result["entry_count"] == 1 + assert Path(result["path"]).exists() + + +def test_snapshot_file_contains_entries(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + e = ingest_event(archive, title="Delta", content="Fourth entry", topics=["d"]) + result = archive.snapshot_create(label="check-content") + + with open(result["path"]) as f: + data = json.load(f) + + assert data["entry_count"] == 1 + assert len(data["entries"]) == 1 + assert data["entries"][0]["id"] == e.id + assert data["entries"][0]["title"] == "Delta" + + +def test_snapshot_create_empty_archive(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + result = archive.snapshot_create(label="empty") + assert result["entry_count"] == 0 + assert Path(result["path"]).exists() + + +# ─── snapshot_list ─────────────────────────────────────────────────────────── + +def test_snapshot_list_empty(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + assert archive.snapshot_list() == [] + + +def test_snapshot_list_returns_all(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + ingest_event(archive, title="One", content="c1", topics=[]) + archive.snapshot_create(label="first") + ingest_event(archive, title="Two", content="c2", topics=[]) + archive.snapshot_create(label="second") + + snapshots = archive.snapshot_list() + assert len(snapshots) == 2 + labels = {s["label"] for s in snapshots} + assert "first" in labels + assert "second" in labels + + +def test_snapshot_list_metadata_fields(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + archive.snapshot_create(label="meta-check") + snapshots = archive.snapshot_list() + s = snapshots[0] + for key in ("snapshot_id", "label", "created_at", "entry_count", "path"): + assert key in s + + +def test_snapshot_list_newest_first(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + archive.snapshot_create(label="a") + archive.snapshot_create(label="b") + snapshots = archive.snapshot_list() + # Filenames sort lexicographically; newest (b) should be first + # (filenames include timestamp so alphabetical = newest-last; + # snapshot_list reverses the glob order → newest first) + assert len(snapshots) == 2 + # Both should be present; ordering is newest first + ids = [s["snapshot_id"] for s in snapshots] + assert ids == sorted(ids, reverse=True) + + +# ─── snapshot_restore ──────────────────────────────────────────────────────── + +def test_snapshot_restore_replaces_entries(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + ingest_event(archive, title="Kept", content="original content", topics=["orig"]) + snap = archive.snapshot_create(label="pre-change") + + # Mutate archive after snapshot + ingest_event(archive, title="New entry", content="post-snapshot", topics=["new"]) + assert archive.count == 2 + + result = archive.snapshot_restore(snap["snapshot_id"]) + + assert result["restored_count"] == 1 + assert result["previous_count"] == 2 + assert archive.count == 1 + entry = list(archive._entries.values())[0] + assert entry.title == "Kept" + + +def test_snapshot_restore_persists_to_disk(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "archive.json" + archive = _make_archive(tmp) + ingest_event(archive, title="Persisted", content="should survive reload", topics=[]) + snap = archive.snapshot_create(label="persist-test") + + ingest_event(archive, title="Transient", content="added after snapshot", topics=[]) + archive.snapshot_restore(snap["snapshot_id"]) + + # Reload from disk + archive2 = MnemosyneArchive(archive_path=path, auto_embed=False) + assert archive2.count == 1 + assert list(archive2._entries.values())[0].title == "Persisted" + + +def test_snapshot_restore_missing_raises(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + with pytest.raises(FileNotFoundError): + archive.snapshot_restore("nonexistent_snapshot_id") + + +# ─── snapshot_diff ─────────────────────────────────────────────────────────── + +def test_snapshot_diff_no_changes(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + ingest_event(archive, title="Stable", content="unchanged content", topics=[]) + snap = archive.snapshot_create(label="baseline") + + diff = archive.snapshot_diff(snap["snapshot_id"]) + + assert diff["added"] == [] + assert diff["removed"] == [] + assert diff["modified"] == [] + assert diff["unchanged"] == 1 + + +def test_snapshot_diff_detects_added(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + ingest_event(archive, title="Original", content="existing", topics=[]) + snap = archive.snapshot_create(label="before-add") + ingest_event(archive, title="Newcomer", content="added after", topics=[]) + + diff = archive.snapshot_diff(snap["snapshot_id"]) + + assert len(diff["added"]) == 1 + assert diff["added"][0]["title"] == "Newcomer" + assert diff["removed"] == [] + assert diff["unchanged"] == 1 + + +def test_snapshot_diff_detects_removed(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + e1 = ingest_event(archive, title="Will Be Removed", content="doomed", topics=[]) + ingest_event(archive, title="Survivor", content="stays", topics=[]) + snap = archive.snapshot_create(label="pre-removal") + archive.remove(e1.id) + + diff = archive.snapshot_diff(snap["snapshot_id"]) + + assert len(diff["removed"]) == 1 + assert diff["removed"][0]["title"] == "Will Be Removed" + assert diff["added"] == [] + assert diff["unchanged"] == 1 + + +def test_snapshot_diff_detects_modified(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + e = ingest_event(archive, title="Mutable", content="original content", topics=[]) + snap = archive.snapshot_create(label="pre-edit") + archive.update_entry(e.id, content="updated content", auto_link=False) + + diff = archive.snapshot_diff(snap["snapshot_id"]) + + assert len(diff["modified"]) == 1 + assert diff["modified"][0]["title"] == "Mutable" + assert diff["modified"][0]["snapshot_hash"] != diff["modified"][0]["current_hash"] + assert diff["added"] == [] + assert diff["removed"] == [] + + +def test_snapshot_diff_missing_raises(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + with pytest.raises(FileNotFoundError): + archive.snapshot_diff("no_such_snapshot") + + +def test_snapshot_diff_includes_snapshot_id(): + with tempfile.TemporaryDirectory() as tmp: + archive = _make_archive(tmp) + snap = archive.snapshot_create(label="id-check") + diff = archive.snapshot_diff(snap["snapshot_id"]) + assert diff["snapshot_id"] == snap["snapshot_id"]