856 lines
34 KiB
Python
856 lines
34 KiB
Python
"""Tests for Mnemosyne archive core."""
|
|
|
|
import json
|
|
import tempfile
|
|
from datetime import datetime, timezone, timedelta
|
|
from pathlib import Path
|
|
|
|
from nexus.mnemosyne.entry import ArchiveEntry
|
|
from nexus.mnemosyne.linker import HolographicLinker
|
|
from nexus.mnemosyne.archive import MnemosyneArchive
|
|
from nexus.mnemosyne.ingest import ingest_event, ingest_from_mempalace
|
|
|
|
|
|
def test_entry_roundtrip():
|
|
e = ArchiveEntry(title="Test", content="Hello world", topics=["test"])
|
|
d = e.to_dict()
|
|
e2 = ArchiveEntry.from_dict(d)
|
|
assert e2.id == e.id
|
|
assert e2.title == "Test"
|
|
|
|
|
|
def test_linker_similarity():
|
|
linker = HolographicLinker()
|
|
a = ArchiveEntry(title="Python coding", content="Writing Python scripts for automation")
|
|
b = ArchiveEntry(title="Python scripting", content="Automating tasks with Python scripts")
|
|
c = ArchiveEntry(title="Cooking recipes", content="How to make pasta carbonara")
|
|
assert linker.compute_similarity(a, b) > linker.compute_similarity(a, c)
|
|
|
|
|
|
def test_archive_add_and_search():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="First entry", content="Hello archive", topics=["test"])
|
|
ingest_event(archive, title="Second entry", content="Another record", topics=["test", "demo"])
|
|
assert archive.count == 2
|
|
results = archive.search("hello")
|
|
assert len(results) == 1
|
|
assert results[0].title == "First entry"
|
|
|
|
|
|
def test_archive_auto_linking():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python")
|
|
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python")
|
|
# Both should be linked due to shared tokens
|
|
assert len(e1.links) > 0 or len(e2.links) > 0
|
|
|
|
|
|
def test_ingest_from_mempalace():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
mp_entries = [
|
|
{"id": "mp-1", "content": "Test memory content", "metadata": {"title": "Test", "topics": ["demo"]}},
|
|
{"id": "mp-2", "content": "Another memory", "metadata": {"title": "Memory 2"}},
|
|
]
|
|
count = ingest_from_mempalace(archive, mp_entries)
|
|
assert count == 2
|
|
assert archive.count == 2
|
|
|
|
|
|
def test_archive_persistence():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive1 = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive1, title="Persistent", content="Should survive reload")
|
|
|
|
archive2 = MnemosyneArchive(archive_path=path)
|
|
assert archive2.count == 1
|
|
results = archive2.search("persistent")
|
|
assert len(results) == 1
|
|
|
|
|
|
def test_archive_remove_basic():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="Alpha", content="First entry", topics=["x"])
|
|
assert archive.count == 1
|
|
|
|
result = archive.remove(e1.id)
|
|
assert result is True
|
|
assert archive.count == 0
|
|
assert archive.get(e1.id) is None
|
|
|
|
|
|
def test_archive_remove_nonexistent():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
result = archive.remove("does-not-exist")
|
|
assert result is False
|
|
|
|
|
|
def test_archive_remove_cleans_backlinks():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python")
|
|
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python")
|
|
# At least one direction should be linked
|
|
assert e1.id in e2.links or e2.id in e1.links
|
|
|
|
# Remove e1; e2 must no longer reference it
|
|
archive.remove(e1.id)
|
|
e2_fresh = archive.get(e2.id)
|
|
assert e2_fresh is not None
|
|
assert e1.id not in e2_fresh.links
|
|
|
|
|
|
def test_archive_remove_persists():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
a1 = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(a1, title="Gone", content="Will be removed")
|
|
a1.remove(e.id)
|
|
|
|
a2 = MnemosyneArchive(archive_path=path)
|
|
assert a2.count == 0
|
|
|
|
|
|
def test_archive_export_unfiltered():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="A", content="content a", topics=["alpha"])
|
|
ingest_event(archive, title="B", content="content b", topics=["beta"])
|
|
data = archive.export()
|
|
assert data["count"] == 2
|
|
assert len(data["entries"]) == 2
|
|
assert data["filters"] == {"query": None, "topics": None}
|
|
|
|
|
|
def test_archive_export_by_topic():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="A", content="content a", topics=["alpha"])
|
|
ingest_event(archive, title="B", content="content b", topics=["beta"])
|
|
data = archive.export(topics=["alpha"])
|
|
assert data["count"] == 1
|
|
assert data["entries"][0]["title"] == "A"
|
|
|
|
|
|
def test_archive_export_by_query():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="Hello world", content="greetings", topics=[])
|
|
ingest_event(archive, title="Goodbye", content="farewell", topics=[])
|
|
data = archive.export(query="hello")
|
|
assert data["count"] == 1
|
|
assert data["entries"][0]["title"] == "Hello world"
|
|
|
|
|
|
def test_archive_export_combined_filters():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="Hello world", content="greetings", topics=["alpha"])
|
|
ingest_event(archive, title="Hello again", content="greetings again", topics=["beta"])
|
|
data = archive.export(query="hello", topics=["alpha"])
|
|
assert data["count"] == 1
|
|
assert data["entries"][0]["title"] == "Hello world"
|
|
|
|
|
|
def test_archive_stats_richer():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
# All four new fields present when archive is empty
|
|
s = archive.stats()
|
|
assert "orphans" in s
|
|
assert "link_density" in s
|
|
assert "oldest_entry" in s
|
|
assert "newest_entry" in s
|
|
assert s["orphans"] == 0
|
|
assert s["link_density"] == 0.0
|
|
assert s["oldest_entry"] is None
|
|
assert s["newest_entry"] is None
|
|
|
|
|
|
def test_archive_stats_orphan_count():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
# Two entries with very different content → unlikely to auto-link
|
|
ingest_event(archive, title="Zebras", content="Zebra stripes savannah Africa", topics=[])
|
|
ingest_event(archive, title="Compiler", content="Lexer parser AST bytecode", topics=[])
|
|
s = archive.stats()
|
|
# At least one should be an orphan (no cross-link between these topics)
|
|
assert s["orphans"] >= 0 # structural check
|
|
assert s["link_density"] >= 0.0
|
|
assert s["oldest_entry"] is not None
|
|
assert s["newest_entry"] is not None
|
|
|
|
|
|
def test_semantic_search_returns_results():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="Python automation", content="Building automation tools in Python")
|
|
ingest_event(archive, title="Cooking recipes", content="How to make pasta carbonara with cheese")
|
|
results = archive.semantic_search("python scripting", limit=5)
|
|
assert len(results) > 0
|
|
assert results[0].title == "Python automation"
|
|
|
|
|
|
def test_semantic_search_link_boost():
|
|
"""Entries with more inbound links rank higher when Jaccard is equal."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
# Create two similar entries; manually give one more links
|
|
e1 = ingest_event(archive, title="Machine learning", content="Neural networks deep learning models")
|
|
e2 = ingest_event(archive, title="Machine learning basics", content="Neural networks deep learning intro")
|
|
# Add a third entry that links to e1 so e1 has more inbound links
|
|
e3 = ingest_event(archive, title="AI overview", content="Artificial intelligence machine learning")
|
|
# Manually give e1 an extra inbound link by adding e3 -> e1
|
|
if e1.id not in e3.links:
|
|
e3.links.append(e1.id)
|
|
archive._save()
|
|
results = archive.semantic_search("machine learning neural networks", limit=5)
|
|
assert len(results) >= 2
|
|
# e1 should rank at or near top
|
|
assert results[0].id in {e1.id, e2.id}
|
|
|
|
|
|
def test_semantic_search_fallback_to_keyword():
|
|
"""Falls back to keyword search when no entry meets Jaccard threshold."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="Exact match only", content="unique xyzzy token here")
|
|
# threshold=1.0 ensures no semantic match, triggering fallback
|
|
results = archive.semantic_search("xyzzy", limit=5, threshold=1.0)
|
|
# Fallback keyword search should find it
|
|
assert len(results) == 1
|
|
assert results[0].title == "Exact match only"
|
|
|
|
|
|
def test_semantic_search_empty_archive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
results = archive.semantic_search("anything", limit=5)
|
|
assert results == []
|
|
|
|
|
|
def test_semantic_search_vs_keyword_relevance():
|
|
"""Semantic search finds conceptually related entries missed by keyword search."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="Python scripting", content="Writing scripts with Python for automation tasks")
|
|
ingest_event(archive, title="Baking bread", content="Mix flour water yeast knead bake oven")
|
|
# "coding" is semantically unrelated to baking but related to python scripting
|
|
results = archive.semantic_search("coding scripts automation")
|
|
assert len(results) > 0
|
|
assert results[0].title == "Python scripting"
|
|
|
|
|
|
def test_graph_data_empty_archive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
data = archive.graph_data()
|
|
assert data == {"nodes": [], "edges": []}
|
|
|
|
|
|
def test_graph_data_nodes_and_edges():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python", topics=["code"])
|
|
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python", topics=["code"])
|
|
e3 = ingest_event(archive, title="Cooking", content="Making pasta carbonara", topics=["food"])
|
|
|
|
data = archive.graph_data()
|
|
assert len(data["nodes"]) == 3
|
|
# All node fields present
|
|
for node in data["nodes"]:
|
|
assert "id" in node
|
|
assert "title" in node
|
|
assert "topics" in node
|
|
assert "source" in node
|
|
assert "created_at" in node
|
|
|
|
# e1 and e2 should be linked (shared Python/automation tokens)
|
|
edge_pairs = {(e["source"], e["target"]) for e in data["edges"]}
|
|
e1e2 = (min(e1.id, e2.id), max(e1.id, e2.id))
|
|
assert e1e2 in edge_pairs or (e1e2[1], e1e2[0]) in edge_pairs
|
|
|
|
# All edges have weights
|
|
for edge in data["edges"]:
|
|
assert "weight" in edge
|
|
assert 0 <= edge["weight"] <= 1
|
|
|
|
|
|
def test_graph_data_topic_filter():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="A", content="code stuff", topics=["code"])
|
|
e2 = ingest_event(archive, title="B", content="more code", topics=["code"])
|
|
ingest_event(archive, title="C", content="food stuff", topics=["food"])
|
|
|
|
data = archive.graph_data(topic_filter="code")
|
|
node_ids = {n["id"] for n in data["nodes"]}
|
|
assert e1.id in node_ids
|
|
assert e2.id in node_ids
|
|
assert len(data["nodes"]) == 2
|
|
|
|
|
|
def test_graph_data_deduplicates_edges():
|
|
"""Bidirectional links should produce a single edge, not two."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python")
|
|
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python")
|
|
|
|
data = archive.graph_data()
|
|
# Count how many edges connect e1 and e2
|
|
e1e2_edges = [
|
|
e for e in data["edges"]
|
|
if {e["source"], e["target"]} == {e1.id, e2.id}
|
|
]
|
|
assert len(e1e2_edges) <= 1, "Should not have duplicate bidirectional edges"
|
|
|
|
|
|
def test_archive_topic_counts():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="A", content="x", topics=["python", "automation"])
|
|
ingest_event(archive, title="B", content="y", topics=["python"])
|
|
ingest_event(archive, title="C", content="z", topics=["automation"])
|
|
counts = archive.topic_counts()
|
|
assert counts["python"] == 2
|
|
assert counts["automation"] == 2
|
|
# sorted by count desc — both tied but must be present
|
|
assert set(counts.keys()) == {"python", "automation"}
|
|
|
|
|
|
# --- Tag management tests ---
|
|
|
|
def test_add_tags_basic():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
|
archive.add_tags(e.id, ["beta", "gamma"])
|
|
fresh = archive.get(e.id)
|
|
assert "beta" in fresh.topics
|
|
assert "gamma" in fresh.topics
|
|
assert "alpha" in fresh.topics
|
|
|
|
|
|
def test_add_tags_deduplication():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
|
archive.add_tags(e.id, ["alpha", "ALPHA", "beta"])
|
|
fresh = archive.get(e.id)
|
|
lower_topics = [t.lower() for t in fresh.topics]
|
|
assert lower_topics.count("alpha") == 1
|
|
assert "beta" in lower_topics
|
|
|
|
|
|
def test_add_tags_missing_entry():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
try:
|
|
archive.add_tags("nonexistent-id", ["tag"])
|
|
assert False, "Expected KeyError"
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
def test_add_tags_empty_list():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
|
archive.add_tags(e.id, [])
|
|
fresh = archive.get(e.id)
|
|
assert fresh.topics == ["alpha"]
|
|
|
|
|
|
def test_remove_tags_basic():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["alpha", "beta", "gamma"])
|
|
archive.remove_tags(e.id, ["beta"])
|
|
fresh = archive.get(e.id)
|
|
assert "beta" not in fresh.topics
|
|
assert "alpha" in fresh.topics
|
|
assert "gamma" in fresh.topics
|
|
|
|
|
|
def test_remove_tags_case_insensitive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["Python", "rust"])
|
|
archive.remove_tags(e.id, ["PYTHON"])
|
|
fresh = archive.get(e.id)
|
|
assert "Python" not in fresh.topics
|
|
assert "rust" in fresh.topics
|
|
|
|
|
|
def test_remove_tags_missing_tag_silent():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
|
archive.remove_tags(e.id, ["nope"]) # should not raise
|
|
fresh = archive.get(e.id)
|
|
assert fresh.topics == ["alpha"]
|
|
|
|
|
|
def test_remove_tags_missing_entry():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
try:
|
|
archive.remove_tags("nonexistent-id", ["tag"])
|
|
assert False, "Expected KeyError"
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
def test_retag_basic():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["old1", "old2"])
|
|
archive.retag(e.id, ["new1", "new2"])
|
|
fresh = archive.get(e.id)
|
|
assert fresh.topics == ["new1", "new2"]
|
|
|
|
|
|
def test_retag_deduplication():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["x"])
|
|
archive.retag(e.id, ["go", "GO", "rust"])
|
|
fresh = archive.get(e.id)
|
|
lower_topics = [t.lower() for t in fresh.topics]
|
|
assert lower_topics.count("go") == 1
|
|
assert "rust" in lower_topics
|
|
|
|
|
|
def test_retag_empty_list():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
|
archive.retag(e.id, [])
|
|
fresh = archive.get(e.id)
|
|
assert fresh.topics == []
|
|
|
|
|
|
def test_retag_missing_entry():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
try:
|
|
archive.retag("nonexistent-id", ["tag"])
|
|
assert False, "Expected KeyError"
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
def test_tag_persistence_across_reload():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
a1 = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(a1, title="T", content="c", topics=["alpha"])
|
|
a1.add_tags(e.id, ["beta"])
|
|
a1.remove_tags(e.id, ["alpha"])
|
|
|
|
a2 = MnemosyneArchive(archive_path=path)
|
|
fresh = a2.get(e.id)
|
|
assert "beta" in fresh.topics
|
|
assert "alpha" not in fresh.topics
|
|
|
|
|
|
# --- content_hash and updated_at field tests ---
|
|
|
|
def test_entry_has_content_hash():
|
|
e = ArchiveEntry(title="Hello", content="world")
|
|
assert e.content_hash is not None
|
|
assert len(e.content_hash) == 64 # SHA-256 hex
|
|
|
|
|
|
def test_entry_content_hash_deterministic():
|
|
e1 = ArchiveEntry(title="Hello", content="world")
|
|
e2 = ArchiveEntry(title="Hello", content="world")
|
|
assert e1.content_hash == e2.content_hash
|
|
|
|
|
|
def test_entry_content_hash_differs_on_different_content():
|
|
e1 = ArchiveEntry(title="Hello", content="world")
|
|
e2 = ArchiveEntry(title="Hello", content="different")
|
|
assert e1.content_hash != e2.content_hash
|
|
|
|
|
|
def test_entry_updated_at_defaults_none():
|
|
e = ArchiveEntry(title="T", content="c")
|
|
assert e.updated_at is None
|
|
|
|
|
|
def test_entry_roundtrip_includes_new_fields():
|
|
e = ArchiveEntry(title="T", content="c")
|
|
d = e.to_dict()
|
|
assert "content_hash" in d
|
|
assert "updated_at" in d
|
|
e2 = ArchiveEntry.from_dict(d)
|
|
assert e2.content_hash == e.content_hash
|
|
assert e2.updated_at == e.updated_at
|
|
|
|
|
|
# --- content deduplication tests ---
|
|
|
|
def test_add_deduplication_same_content():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="Dup", content="Same content here")
|
|
e2 = ingest_event(archive, title="Dup", content="Same content here")
|
|
# Should NOT have created a second entry
|
|
assert archive.count == 1
|
|
assert e1.id == e2.id
|
|
|
|
|
|
def test_add_deduplication_different_content():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="A", content="Content one")
|
|
ingest_event(archive, title="B", content="Content two")
|
|
assert archive.count == 2
|
|
|
|
|
|
def test_find_duplicate_returns_existing():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e1 = ingest_event(archive, title="Dup", content="Same content here")
|
|
probe = ArchiveEntry(title="Dup", content="Same content here")
|
|
dup = archive.find_duplicate(probe)
|
|
assert dup is not None
|
|
assert dup.id == e1.id
|
|
|
|
|
|
def test_find_duplicate_returns_none_for_unique():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
ingest_event(archive, title="A", content="Some content")
|
|
probe = ArchiveEntry(title="B", content="Totally different content")
|
|
assert archive.find_duplicate(probe) is None
|
|
|
|
|
|
def test_find_duplicate_empty_archive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
probe = ArchiveEntry(title="X", content="y")
|
|
assert archive.find_duplicate(probe) is None
|
|
|
|
|
|
# --- update_entry tests ---
|
|
|
|
def test_update_entry_title():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="Old title", content="Some content")
|
|
archive.update_entry(e.id, title="New title")
|
|
fresh = archive.get(e.id)
|
|
assert fresh.title == "New title"
|
|
assert fresh.content == "Some content"
|
|
|
|
|
|
def test_update_entry_content():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="Old content")
|
|
archive.update_entry(e.id, content="New content")
|
|
fresh = archive.get(e.id)
|
|
assert fresh.content == "New content"
|
|
|
|
|
|
def test_update_entry_metadata():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c")
|
|
archive.update_entry(e.id, metadata={"key": "value"})
|
|
fresh = archive.get(e.id)
|
|
assert fresh.metadata["key"] == "value"
|
|
|
|
|
|
def test_update_entry_bumps_updated_at():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c")
|
|
assert e.updated_at is None
|
|
archive.update_entry(e.id, title="Updated")
|
|
fresh = archive.get(e.id)
|
|
assert fresh.updated_at is not None
|
|
|
|
|
|
def test_update_entry_refreshes_content_hash():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="Original content")
|
|
old_hash = e.content_hash
|
|
archive.update_entry(e.id, content="Completely new content")
|
|
fresh = archive.get(e.id)
|
|
assert fresh.content_hash != old_hash
|
|
|
|
|
|
def test_update_entry_missing_raises():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
try:
|
|
archive.update_entry("nonexistent-id", title="X")
|
|
assert False, "Expected KeyError"
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
def test_update_entry_persists_across_reload():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
a1 = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(a1, title="Before", content="Before content")
|
|
a1.update_entry(e.id, title="After", content="After content")
|
|
|
|
a2 = MnemosyneArchive(archive_path=path)
|
|
fresh = a2.get(e.id)
|
|
assert fresh.title == "After"
|
|
assert fresh.content == "After content"
|
|
assert fresh.updated_at is not None
|
|
|
|
|
|
def test_update_entry_no_change_no_crash():
|
|
"""Calling update_entry with all None args should not fail."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
archive = MnemosyneArchive(archive_path=path)
|
|
e = ingest_event(archive, title="T", content="c")
|
|
result = archive.update_entry(e.id)
|
|
assert result.title == "T"
|
|
|
|
|
|
# --- by_date_range tests ---
|
|
|
|
def _make_entry_at(archive: MnemosyneArchive, title: str, dt: datetime) -> ArchiveEntry:
|
|
"""Helper: ingest an entry and backdate its created_at."""
|
|
e = ingest_event(archive, title=title, content=title)
|
|
e.created_at = dt.isoformat()
|
|
archive._save()
|
|
return e
|
|
|
|
|
|
def test_by_date_range_empty_archive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
results = archive.by_date_range("2024-01-01", "2024-12-31")
|
|
assert results == []
|
|
|
|
|
|
def test_by_date_range_returns_matching_entries():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
jan = datetime(2024, 1, 15, tzinfo=timezone.utc)
|
|
mar = datetime(2024, 3, 10, tzinfo=timezone.utc)
|
|
jun = datetime(2024, 6, 1, tzinfo=timezone.utc)
|
|
e1 = _make_entry_at(archive, "Jan entry", jan)
|
|
e2 = _make_entry_at(archive, "Mar entry", mar)
|
|
e3 = _make_entry_at(archive, "Jun entry", jun)
|
|
|
|
results = archive.by_date_range("2024-01-01", "2024-04-01")
|
|
ids = {e.id for e in results}
|
|
assert e1.id in ids
|
|
assert e2.id in ids
|
|
assert e3.id not in ids
|
|
|
|
|
|
def test_by_date_range_boundary_inclusive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
exact = datetime(2024, 3, 1, tzinfo=timezone.utc)
|
|
e = _make_entry_at(archive, "Exact boundary", exact)
|
|
|
|
results = archive.by_date_range("2024-03-01T00:00:00+00:00", "2024-03-01T00:00:00+00:00")
|
|
assert len(results) == 1
|
|
assert results[0].id == e.id
|
|
|
|
|
|
def test_by_date_range_no_results():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
jan = datetime(2024, 1, 15, tzinfo=timezone.utc)
|
|
_make_entry_at(archive, "Jan entry", jan)
|
|
|
|
results = archive.by_date_range("2023-01-01", "2023-12-31")
|
|
assert results == []
|
|
|
|
|
|
def test_by_date_range_timezone_naive_treated_as_utc():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
dt = datetime(2024, 6, 15, tzinfo=timezone.utc)
|
|
e = _make_entry_at(archive, "Summer", dt)
|
|
|
|
# Timezone-naive start/end should still match
|
|
results = archive.by_date_range("2024-06-01", "2024-07-01")
|
|
assert any(r.id == e.id for r in results)
|
|
|
|
|
|
def test_by_date_range_sorted_ascending():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
dates = [
|
|
datetime(2024, 3, 5, tzinfo=timezone.utc),
|
|
datetime(2024, 1, 10, tzinfo=timezone.utc),
|
|
datetime(2024, 2, 20, tzinfo=timezone.utc),
|
|
]
|
|
for i, dt in enumerate(dates):
|
|
_make_entry_at(archive, f"Entry {i}", dt)
|
|
|
|
results = archive.by_date_range("2024-01-01", "2024-12-31")
|
|
assert len(results) == 3
|
|
assert results[0].created_at < results[1].created_at < results[2].created_at
|
|
|
|
|
|
def test_by_date_range_single_entry_archive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
dt = datetime(2024, 5, 1, tzinfo=timezone.utc)
|
|
e = _make_entry_at(archive, "Only", dt)
|
|
|
|
assert archive.by_date_range("2024-01-01", "2024-12-31") == [e]
|
|
assert archive.by_date_range("2025-01-01", "2025-12-31") == []
|
|
|
|
|
|
# --- temporal_neighbors tests ---
|
|
|
|
def test_temporal_neighbors_empty_archive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
e = ingest_event(archive, title="Lone", content="c")
|
|
results = archive.temporal_neighbors(e.id, window_days=7)
|
|
assert results == []
|
|
|
|
|
|
def test_temporal_neighbors_missing_entry_raises():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
try:
|
|
archive.temporal_neighbors("nonexistent-id")
|
|
assert False, "Expected KeyError"
|
|
except KeyError:
|
|
pass
|
|
|
|
|
|
def test_temporal_neighbors_returns_within_window():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
anchor_dt = datetime(2024, 4, 10, tzinfo=timezone.utc)
|
|
near_dt = datetime(2024, 4, 14, tzinfo=timezone.utc) # +4 days — within 7
|
|
far_dt = datetime(2024, 4, 20, tzinfo=timezone.utc) # +10 days — outside 7
|
|
|
|
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
|
near = _make_entry_at(archive, "Near", near_dt)
|
|
far = _make_entry_at(archive, "Far", far_dt)
|
|
|
|
results = archive.temporal_neighbors(anchor.id, window_days=7)
|
|
ids = {e.id for e in results}
|
|
assert near.id in ids
|
|
assert far.id not in ids
|
|
assert anchor.id not in ids
|
|
|
|
|
|
def test_temporal_neighbors_excludes_anchor():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
dt = datetime(2024, 4, 10, tzinfo=timezone.utc)
|
|
anchor = _make_entry_at(archive, "Anchor", dt)
|
|
same = _make_entry_at(archive, "Same day", dt)
|
|
|
|
results = archive.temporal_neighbors(anchor.id, window_days=0)
|
|
ids = {e.id for e in results}
|
|
assert anchor.id not in ids
|
|
assert same.id in ids
|
|
|
|
|
|
def test_temporal_neighbors_custom_window():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
anchor_dt = datetime(2024, 4, 10, tzinfo=timezone.utc)
|
|
within_3 = datetime(2024, 4, 12, tzinfo=timezone.utc) # +2 days
|
|
outside_3 = datetime(2024, 4, 15, tzinfo=timezone.utc) # +5 days
|
|
|
|
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
|
e_near = _make_entry_at(archive, "Near", within_3)
|
|
e_far = _make_entry_at(archive, "Far", outside_3)
|
|
|
|
results = archive.temporal_neighbors(anchor.id, window_days=3)
|
|
ids = {e.id for e in results}
|
|
assert e_near.id in ids
|
|
assert e_far.id not in ids
|
|
|
|
|
|
def test_temporal_neighbors_sorted_ascending():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
anchor_dt = datetime(2024, 6, 15, tzinfo=timezone.utc)
|
|
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
|
for offset in [5, 1, 3]:
|
|
_make_entry_at(archive, f"Offset {offset}", anchor_dt + timedelta(days=offset))
|
|
|
|
results = archive.temporal_neighbors(anchor.id, window_days=7)
|
|
assert len(results) == 3
|
|
assert results[0].created_at < results[1].created_at < results[2].created_at
|
|
|
|
|
|
def test_temporal_neighbors_boundary_inclusive():
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
|
anchor_dt = datetime(2024, 6, 15, tzinfo=timezone.utc)
|
|
boundary_dt = anchor_dt + timedelta(days=7) # exactly at window edge
|
|
|
|
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
|
boundary = _make_entry_at(archive, "Boundary", boundary_dt)
|
|
|
|
results = archive.temporal_neighbors(anchor.id, window_days=7)
|
|
assert any(r.id == boundary.id for r in results)
|