diff --git a/nexus/mnemosyne/tests/test_archive.py b/nexus/mnemosyne/tests/test_archive.py index 11371e82..e269c1d0 100644 --- a/nexus/mnemosyne/tests/test_archive.py +++ b/nexus/mnemosyne/tests/test_archive.py @@ -262,6 +262,75 @@ def test_semantic_search_vs_keyword_relevance(): assert results[0].title == "Python scripting" +def test_graph_data_empty_archive(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + data = archive.graph_data() + assert data == {"nodes": [], "edges": []} + + +def test_graph_data_nodes_and_edges(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python", topics=["code"]) + e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python", topics=["code"]) + e3 = ingest_event(archive, title="Cooking", content="Making pasta carbonara", topics=["food"]) + + data = archive.graph_data() + assert len(data["nodes"]) == 3 + # All node fields present + for node in data["nodes"]: + assert "id" in node + assert "title" in node + assert "topics" in node + assert "source" in node + assert "created_at" in node + + # e1 and e2 should be linked (shared Python/automation tokens) + edge_pairs = {(e["source"], e["target"]) for e in data["edges"]} + e1e2 = (min(e1.id, e2.id), max(e1.id, e2.id)) + assert e1e2 in edge_pairs or (e1e2[1], e1e2[0]) in edge_pairs + + # All edges have weights + for edge in data["edges"]: + assert "weight" in edge + assert 0 <= edge["weight"] <= 1 + + +def test_graph_data_topic_filter(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="A", content="code stuff", topics=["code"]) + e2 = ingest_event(archive, title="B", content="more code", topics=["code"]) + ingest_event(archive, title="C", content="food stuff", topics=["food"]) + + data = archive.graph_data(topic_filter="code") + node_ids = {n["id"] for n in data["nodes"]} + assert e1.id in node_ids + assert e2.id in node_ids + assert len(data["nodes"]) == 2 + + +def test_graph_data_deduplicates_edges(): + """Bidirectional links should produce a single edge, not two.""" + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python") + e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python") + + data = archive.graph_data() + # Count how many edges connect e1 and e2 + e1e2_edges = [ + e for e in data["edges"] + if {e["source"], e["target"]} == {e1.id, e2.id} + ] + assert len(e1e2_edges) <= 1, "Should not have duplicate bidirectional edges" + + def test_archive_topic_counts(): with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "test_archive.json"