Compare commits

...

2 Commits

Author SHA1 Message Date
Alexander Whitestone
c961cf9122 test(mnemosyne): add graph_data() tests
Some checks failed
CI / test (pull_request) Failing after 12s
CI / validate (pull_request) Failing after 13s
Review Approval Gate / verify-review (pull_request) Failing after 2s
- empty archive returns empty nodes/edges
- nodes have all required fields
- edges have weights in [0,1]
- topic_filter returns subgraph
- bidirectional edges deduplicated
2026-04-11 18:14:34 -04:00
Alexander Whitestone
a1c038672b feat(mnemosyne): add graph_data() for 3D constellation export
Returns {nodes, edges} with live link weights. Supports topic_filter
for subgraph extraction. Edges are deduplicated (bidirectional links
become single undirected edges).

Closes #1232
2026-04-11 18:14:16 -04:00
2 changed files with 128 additions and 0 deletions

View File

@@ -212,6 +212,65 @@ class MnemosyneArchive:
def count(self) -> int:
return len(self._entries)
def graph_data(
self,
topic_filter: Optional[str] = None,
) -> dict:
"""Export the full connection graph for 3D constellation visualization.
Returns a dict with:
- nodes: list of {id, title, topics, source, created_at}
- edges: list of {source, target, weight} from holographic links
Args:
topic_filter: If set, only include entries matching this topic
and edges between them.
"""
entries = list(self._entries.values())
if topic_filter:
topic_lower = topic_filter.lower()
entries = [
e for e in entries
if topic_lower in [t.lower() for t in e.topics]
]
entry_ids = {e.id for e in entries}
nodes = [
{
"id": e.id,
"title": e.title,
"topics": e.topics,
"source": e.source,
"created_at": e.created_at,
}
for e in entries
]
# Build edges from links, dedup (A→B and B→A become one edge)
seen_edges: set[tuple[str, str]] = set()
edges = []
for e in entries:
for linked_id in e.links:
if linked_id not in entry_ids:
continue
pair = (min(e.id, linked_id), max(e.id, linked_id))
if pair in seen_edges:
continue
seen_edges.add(pair)
# Compute weight via linker for live similarity score
linked = self._entries.get(linked_id)
if linked:
weight = self.linker.compute_similarity(e, linked)
edges.append({
"source": pair[0],
"target": pair[1],
"weight": round(weight, 4),
})
return {"nodes": nodes, "edges": edges}
def stats(self) -> dict:
entries = list(self._entries.values())
total_links = sum(len(e.links) for e in entries)

View File

@@ -262,6 +262,75 @@ def test_semantic_search_vs_keyword_relevance():
assert results[0].title == "Python scripting"
def test_graph_data_empty_archive():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
data = archive.graph_data()
assert data == {"nodes": [], "edges": []}
def test_graph_data_nodes_and_edges():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python", topics=["code"])
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python", topics=["code"])
e3 = ingest_event(archive, title="Cooking", content="Making pasta carbonara", topics=["food"])
data = archive.graph_data()
assert len(data["nodes"]) == 3
# All node fields present
for node in data["nodes"]:
assert "id" in node
assert "title" in node
assert "topics" in node
assert "source" in node
assert "created_at" in node
# e1 and e2 should be linked (shared Python/automation tokens)
edge_pairs = {(e["source"], e["target"]) for e in data["edges"]}
e1e2 = (min(e1.id, e2.id), max(e1.id, e2.id))
assert e1e2 in edge_pairs or (e1e2[1], e1e2[0]) in edge_pairs
# All edges have weights
for edge in data["edges"]:
assert "weight" in edge
assert 0 <= edge["weight"] <= 1
def test_graph_data_topic_filter():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
e1 = ingest_event(archive, title="A", content="code stuff", topics=["code"])
e2 = ingest_event(archive, title="B", content="more code", topics=["code"])
ingest_event(archive, title="C", content="food stuff", topics=["food"])
data = archive.graph_data(topic_filter="code")
node_ids = {n["id"] for n in data["nodes"]}
assert e1.id in node_ids
assert e2.id in node_ids
assert len(data["nodes"]) == 2
def test_graph_data_deduplicates_edges():
"""Bidirectional links should produce a single edge, not two."""
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python")
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python")
data = archive.graph_data()
# Count how many edges connect e1 and e2
e1e2_edges = [
e for e in data["edges"]
if {e["source"], e["target"]} == {e1.id, e2.id}
]
assert len(e1e2_edges) <= 1, "Should not have duplicate bidirectional edges"
def test_archive_topic_counts():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"