diff --git a/nexus/mnemosyne/tests/__pycache__/test_archive.cpython-311.pyc b/nexus/mnemosyne/tests/__pycache__/test_archive.cpython-311.pyc new file mode 100644 index 00000000..8d83ad0a Binary files /dev/null and b/nexus/mnemosyne/tests/__pycache__/test_archive.cpython-311.pyc differ diff --git a/nexus/mnemosyne/tests/test_archive.py b/nexus/mnemosyne/tests/test_archive.py index 4a4ab610..04109bf7 100644 --- a/nexus/mnemosyne/tests/test_archive.py +++ b/nexus/mnemosyne/tests/test_archive.py @@ -491,3 +491,191 @@ def test_tag_persistence_across_reload(): fresh = a2.get(e.id) assert "beta" in fresh.topics assert "alpha" not in fresh.topics + + +# --- Entry update + dedup tests --- + +def test_content_hash_deterministic(): + e1 = ArchiveEntry(title="Test", content="Hello") + e2 = ArchiveEntry(title="Test", content="Hello") + assert e1.content_hash == e2.content_hash + + +def test_content_hash_differs_on_change(): + e = ArchiveEntry(title="Test", content="Hello") + h1 = e.content_hash + e.content = "World" + assert e.content_hash != h1 + + +def test_updated_at_set_on_creation(): + e = ArchiveEntry(title="T", content="c") + assert e.updated_at == e.created_at + + +def test_touch_updates_timestamp(): + import time + e = ArchiveEntry(title="T", content="c") + before = e.updated_at + time.sleep(0.01) + e.touch() + assert e.updated_at >= before + + +def test_update_entry_title(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e = ingest_event(archive, title="Old", content="content", topics=["x"]) + old_hash = e.content_hash + updated = archive.update_entry(e.id, title="New Title") + assert updated.title == "New Title" + assert updated.content == "content" + assert updated.updated_at >= e.created_at + # Content unchanged, so hash should be same (only title changed) + assert updated.content_hash != old_hash # title is in hash + + +def test_update_entry_content_relinks(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Python", content="Python programming language") + e2 = ingest_event(archive, title="Java", content="Java programming language") + # e1 and e2 should be linked via shared tokens + assert e2.id in e1.links or e1.id in e2.links + + # Update e1 to completely different content + archive.update_entry(e1.id, content="Cooking recipes for dinner") + e1_fresh = archive.get(e1.id) + e2_fresh = archive.get(e2.id) + # e1 should have been re-linked (likely unlinked from e2 now) + # e2 should no longer reference e1 + assert e1_fresh.content == "Cooking recipes for dinner" + + +def test_update_entry_metadata(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e = ingest_event(archive, title="T", content="c") + archive.update_entry(e.id, metadata={"key": "value"}) + fresh = archive.get(e.id) + assert fresh.metadata == {"key": "value"} + + +def test_update_entry_missing_raises(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + try: + archive.update_entry("nonexistent", title="X") + assert False, "Expected KeyError" + except KeyError: + pass + + +def test_update_entry_no_change_no_relink(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e = ingest_event(archive, title="T", content="c", topics=["x"]) + orig_links = list(e.links) + # Update only metadata (no content change) + archive.update_entry(e.id, metadata={"k": "v"}) + fresh = archive.get(e.id) + assert fresh.links == orig_links + + +def test_find_by_hash(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e = ingest_event(archive, title="Unique", content="Unique content xyz") + found = archive.find_by_hash(e.content_hash) + assert found is not None + assert found.id == e.id + + +def test_find_by_hash_miss(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + found = archive.find_by_hash("nonexistent-hash") + assert found is None + + +def test_find_duplicates(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Same", content="Duplicate content") + # Manually add a second entry with identical title+content + e2 = ArchiveEntry(title="Same", content="Duplicate content", source="manual") + archive._entries[e2.id] = e2 + archive._save() + + dups = archive.find_duplicates() + assert len(dups) == 1 + assert len(dups[0]) == 2 + dup_ids = {d.id for d in dups[0]} + assert e1.id in dup_ids + assert e2.id in dup_ids + + +def test_find_duplicates_none(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + ingest_event(archive, title="A", content="unique a") + ingest_event(archive, title="B", content="unique b") + dups = archive.find_duplicates() + assert dups == [] + + +def test_add_skip_dups(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Test", content="Content here") + # Try to add exact same entry with skip_dups=True + e2 = ArchiveEntry(title="Test", content="Content here") + result = archive.add(e2, skip_dups=True) + assert result.id == e1.id # returned existing, not new + assert archive.count == 1 + + +def test_add_skip_dups_allows_different(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="A", content="Content A") + e2 = ArchiveEntry(title="B", content="Content B") + result = archive.add(e2, skip_dups=True) + assert result.id == e2.id # new entry added + assert archive.count == 2 + + +def test_entry_roundtrip_with_updated_at(): + e = ArchiveEntry(title="T", content="c", topics=["x"]) + d = e.to_dict() + e2 = ArchiveEntry.from_dict(d) + assert e2.updated_at == e.updated_at + assert "content_hash" in d + + +def test_entry_from_dict_backfills_updated_at(): + """Legacy entries without updated_at should get it from created_at.""" + data = { + "id": "test-id", + "title": "Legacy", + "content": "old entry", + "source": "manual", + "source_ref": None, + "topics": [], + "metadata": {}, + "created_at": "2025-01-01T00:00:00+00:00", + "links": [], + } + e = ArchiveEntry.from_dict(data) + assert e.updated_at == "2025-01-01T00:00:00+00:00"