diff --git a/nexus/mnemosyne/archive.py b/nexus/mnemosyne/archive.py index 8a8a2eb8..7481ad52 100644 --- a/nexus/mnemosyne/archive.py +++ b/nexus/mnemosyne/archive.py @@ -212,6 +212,65 @@ class MnemosyneArchive: def count(self) -> int: return len(self._entries) + def graph_data( + self, + topic_filter: Optional[str] = None, + ) -> dict: + """Export the full connection graph for 3D constellation visualization. + + Returns a dict with: + - nodes: list of {id, title, topics, source, created_at} + - edges: list of {source, target, weight} from holographic links + + Args: + topic_filter: If set, only include entries matching this topic + and edges between them. + """ + entries = list(self._entries.values()) + + if topic_filter: + topic_lower = topic_filter.lower() + entries = [ + e for e in entries + if topic_lower in [t.lower() for t in e.topics] + ] + + entry_ids = {e.id for e in entries} + + nodes = [ + { + "id": e.id, + "title": e.title, + "topics": e.topics, + "source": e.source, + "created_at": e.created_at, + } + for e in entries + ] + + # Build edges from links, dedup (A→B and B→A become one edge) + seen_edges: set[tuple[str, str]] = set() + edges = [] + for e in entries: + for linked_id in e.links: + if linked_id not in entry_ids: + continue + pair = (min(e.id, linked_id), max(e.id, linked_id)) + if pair in seen_edges: + continue + seen_edges.add(pair) + # Compute weight via linker for live similarity score + linked = self._entries.get(linked_id) + if linked: + weight = self.linker.compute_similarity(e, linked) + edges.append({ + "source": pair[0], + "target": pair[1], + "weight": round(weight, 4), + }) + + return {"nodes": nodes, "edges": edges} + def stats(self) -> dict: entries = list(self._entries.values()) total_links = sum(len(e.links) for e in entries) diff --git a/nexus/mnemosyne/tests/test_archive.py b/nexus/mnemosyne/tests/test_archive.py index 11371e82..e269c1d0 100644 --- a/nexus/mnemosyne/tests/test_archive.py +++ b/nexus/mnemosyne/tests/test_archive.py @@ -262,6 +262,75 @@ def test_semantic_search_vs_keyword_relevance(): assert results[0].title == "Python scripting" +def test_graph_data_empty_archive(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + data = archive.graph_data() + assert data == {"nodes": [], "edges": []} + + +def test_graph_data_nodes_and_edges(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python", topics=["code"]) + e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python", topics=["code"]) + e3 = ingest_event(archive, title="Cooking", content="Making pasta carbonara", topics=["food"]) + + data = archive.graph_data() + assert len(data["nodes"]) == 3 + # All node fields present + for node in data["nodes"]: + assert "id" in node + assert "title" in node + assert "topics" in node + assert "source" in node + assert "created_at" in node + + # e1 and e2 should be linked (shared Python/automation tokens) + edge_pairs = {(e["source"], e["target"]) for e in data["edges"]} + e1e2 = (min(e1.id, e2.id), max(e1.id, e2.id)) + assert e1e2 in edge_pairs or (e1e2[1], e1e2[0]) in edge_pairs + + # All edges have weights + for edge in data["edges"]: + assert "weight" in edge + assert 0 <= edge["weight"] <= 1 + + +def test_graph_data_topic_filter(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="A", content="code stuff", topics=["code"]) + e2 = ingest_event(archive, title="B", content="more code", topics=["code"]) + ingest_event(archive, title="C", content="food stuff", topics=["food"]) + + data = archive.graph_data(topic_filter="code") + node_ids = {n["id"] for n in data["nodes"]} + assert e1.id in node_ids + assert e2.id in node_ids + assert len(data["nodes"]) == 2 + + +def test_graph_data_deduplicates_edges(): + """Bidirectional links should produce a single edge, not two.""" + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python") + e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python") + + data = archive.graph_data() + # Count how many edges connect e1 and e2 + e1e2_edges = [ + e for e in data["edges"] + if {e["source"], e["target"]} == {e1.id, e2.id} + ] + assert len(e1e2_edges) <= 1, "Should not have duplicate bidirectional edges" + + def test_archive_topic_counts(): with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "test_archive.json"