test(mnemosyne): graph cluster analysis tests — 22 tests
- graph_clusters: empty, orphans, linked pairs, separate clusters, topics, density - hub_entries: ordering, limit, inbound/outbound counting - bridge_entries: triangle (none), chain (B is bridge), small cluster filtered - rebuild_links: creates links, threshold override, persistence
This commit is contained in:
271
nexus/mnemosyne/tests/test_graph_clusters.py
Normal file
271
nexus/mnemosyne/tests/test_graph_clusters.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""Tests for Mnemosyne graph cluster analysis features.
|
||||
|
||||
Tests: graph_clusters, hub_entries, bridge_entries, rebuild_links.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def archive():
|
||||
"""Create a fresh archive in a temp directory."""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
a = MnemosyneArchive(archive_path=path)
|
||||
yield a
|
||||
|
||||
|
||||
def _make_entry(title="Test", content="test content", topics=None):
|
||||
return ArchiveEntry(title=title, content=content, topics=topics or [])
|
||||
|
||||
|
||||
class TestGraphClusters:
|
||||
"""Test graph_clusters() connected component discovery."""
|
||||
|
||||
def test_empty_archive(self, archive):
|
||||
clusters = archive.graph_clusters()
|
||||
assert clusters == []
|
||||
|
||||
def test_single_orphan(self, archive):
|
||||
archive.add(_make_entry("Lone entry"), auto_link=False)
|
||||
# min_size=1 includes orphans
|
||||
clusters = archive.graph_clusters(min_size=1)
|
||||
assert len(clusters) == 1
|
||||
assert clusters[0]["size"] == 1
|
||||
assert clusters[0]["density"] == 0.0
|
||||
|
||||
def test_single_orphan_filtered(self, archive):
|
||||
archive.add(_make_entry("Lone entry"), auto_link=False)
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert clusters == []
|
||||
|
||||
def test_two_linked_entries(self, archive):
|
||||
"""Two manually linked entries form a cluster."""
|
||||
e1 = archive.add(_make_entry("Alpha dogs", "canine training"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Beta cats", "feline behavior"), auto_link=False)
|
||||
# Manual link
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert len(clusters) == 1
|
||||
assert clusters[0]["size"] == 2
|
||||
assert clusters[0]["internal_edges"] == 1
|
||||
assert clusters[0]["density"] == 1.0 # 1 edge out of 1 possible
|
||||
|
||||
def test_two_separate_clusters(self, archive):
|
||||
"""Two disconnected groups form separate clusters."""
|
||||
a1 = archive.add(_make_entry("AI models", "neural networks"), auto_link=False)
|
||||
a2 = archive.add(_make_entry("AI training", "gradient descent"), auto_link=False)
|
||||
b1 = archive.add(_make_entry("Cooking pasta", "italian recipes"), auto_link=False)
|
||||
b2 = archive.add(_make_entry("Cooking sauces", "tomato basil"), auto_link=False)
|
||||
|
||||
# Link cluster A
|
||||
a1.links.append(a2.id)
|
||||
a2.links.append(a1.id)
|
||||
# Link cluster B
|
||||
b1.links.append(b2.id)
|
||||
b2.links.append(b1.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert len(clusters) == 2
|
||||
sizes = sorted(c["size"] for c in clusters)
|
||||
assert sizes == [2, 2]
|
||||
|
||||
def test_cluster_topics(self, archive):
|
||||
"""Cluster includes aggregated topics."""
|
||||
e1 = archive.add(_make_entry("Alpha", "content", topics=["ai", "models"]), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Beta", "content", topics=["ai", "training"]), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert "ai" in clusters[0]["top_topics"]
|
||||
|
||||
def test_density_calculation(self, archive):
|
||||
"""Triangle (3 nodes, 3 edges) has density 1.0."""
|
||||
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
|
||||
# Fully connected triangle
|
||||
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
|
||||
for o in others:
|
||||
e.links.append(o.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert len(clusters) == 1
|
||||
assert clusters[0]["internal_edges"] == 3
|
||||
assert clusters[0]["density"] == 1.0 # 3 edges / 3 possible
|
||||
|
||||
def test_chain_density(self, archive):
|
||||
"""A-B-C chain has density 2/3 (2 edges out of 3 possible)."""
|
||||
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
|
||||
# Chain: A-B-C
|
||||
e1.links.append(e2.id)
|
||||
e2.links.extend([e1.id, e3.id])
|
||||
e3.links.append(e2.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert abs(clusters[0]["density"] - 2/3) < 0.01
|
||||
|
||||
|
||||
class TestHubEntries:
|
||||
"""Test hub_entries() degree centrality ranking."""
|
||||
|
||||
def test_empty(self, archive):
|
||||
assert archive.hub_entries() == []
|
||||
|
||||
def test_no_links(self, archive):
|
||||
archive.add(_make_entry("Lone"), auto_link=False)
|
||||
assert archive.hub_entries() == []
|
||||
|
||||
def test_hub_ordering(self, archive):
|
||||
"""Entry with most links is ranked first."""
|
||||
e1 = archive.add(_make_entry("Hub", "central node"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Spoke 1", "content"), auto_link=False)
|
||||
e3 = archive.add(_make_entry("Spoke 2", "content"), auto_link=False)
|
||||
e4 = archive.add(_make_entry("Spoke 3", "content"), auto_link=False)
|
||||
|
||||
# e1 connects to all spokes
|
||||
e1.links.extend([e2.id, e3.id, e4.id])
|
||||
e2.links.append(e1.id)
|
||||
e3.links.append(e1.id)
|
||||
e4.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
hubs = archive.hub_entries()
|
||||
assert len(hubs) == 4
|
||||
assert hubs[0]["entry"].id == e1.id
|
||||
assert hubs[0]["degree"] == 3
|
||||
|
||||
def test_limit(self, archive):
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
assert len(archive.hub_entries(limit=1)) == 1
|
||||
|
||||
def test_inbound_outbound(self, archive):
|
||||
"""Inbound counts links TO an entry, outbound counts links FROM it."""
|
||||
e1 = archive.add(_make_entry("Source", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Target", ""), auto_link=False)
|
||||
# Only e1 links to e2
|
||||
e1.links.append(e2.id)
|
||||
archive._save()
|
||||
|
||||
hubs = archive.hub_entries()
|
||||
h1 = next(h for h in hubs if h["entry"].id == e1.id)
|
||||
h2 = next(h for h in hubs if h["entry"].id == e2.id)
|
||||
assert h1["inbound"] == 0
|
||||
assert h1["outbound"] == 1
|
||||
assert h2["inbound"] == 1
|
||||
assert h2["outbound"] == 0
|
||||
|
||||
|
||||
class TestBridgeEntries:
|
||||
"""Test bridge_entries() articulation point detection."""
|
||||
|
||||
def test_empty(self, archive):
|
||||
assert archive.bridge_entries() == []
|
||||
|
||||
def test_no_bridges_in_triangle(self, archive):
|
||||
"""Fully connected triangle has no articulation points."""
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", ""), auto_link=False)
|
||||
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
|
||||
for o in others:
|
||||
e.links.append(o.id)
|
||||
archive._save()
|
||||
|
||||
assert archive.bridge_entries() == []
|
||||
|
||||
def test_bridge_in_chain(self, archive):
|
||||
"""A-B-C chain: B is the articulation point."""
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", ""), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.extend([e1.id, e3.id])
|
||||
e3.links.append(e2.id)
|
||||
archive._save()
|
||||
|
||||
bridges = archive.bridge_entries()
|
||||
assert len(bridges) == 1
|
||||
assert bridges[0]["entry"].id == e2.id
|
||||
assert bridges[0]["components_after_removal"] == 2
|
||||
|
||||
def test_no_bridges_in_small_cluster(self, archive):
|
||||
"""Two-node clusters are too small for bridge detection."""
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
assert archive.bridge_entries() == []
|
||||
|
||||
|
||||
class TestRebuildLinks:
|
||||
"""Test rebuild_links() full recomputation."""
|
||||
|
||||
def test_empty_archive(self, archive):
|
||||
assert archive.rebuild_links() == 0
|
||||
|
||||
def test_creates_links(self, archive):
|
||||
"""Rebuild creates links between similar entries."""
|
||||
archive.add(_make_entry("Alpha dogs canine training", "obedience training"), auto_link=False)
|
||||
archive.add(_make_entry("Beta dogs canine behavior", "behavior training"), auto_link=False)
|
||||
archive.add(_make_entry("Cat food feline nutrition", "fish meals"), auto_link=False)
|
||||
|
||||
total = archive.rebuild_links()
|
||||
assert total > 0
|
||||
|
||||
# Check that dog entries are linked to each other
|
||||
entries = list(archive._entries.values())
|
||||
dog_entries = [e for e in entries if "dog" in e.title.lower()]
|
||||
assert any(len(e.links) > 0 for e in dog_entries)
|
||||
|
||||
def test_override_threshold(self, archive):
|
||||
"""Lower threshold creates more links."""
|
||||
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
|
||||
archive.add(_make_entry("Beta cats", "training"), auto_link=False)
|
||||
archive.add(_make_entry("Gamma birds", "training"), auto_link=False)
|
||||
|
||||
# Very low threshold = more links
|
||||
low_links = archive.rebuild_links(threshold=0.01)
|
||||
|
||||
# Reset
|
||||
for e in archive._entries.values():
|
||||
e.links = []
|
||||
|
||||
# Higher threshold = fewer links
|
||||
high_links = archive.rebuild_links(threshold=0.9)
|
||||
|
||||
assert low_links >= high_links
|
||||
|
||||
def test_rebuild_persists(self, archive):
|
||||
"""Rebuild saves to disk."""
|
||||
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
|
||||
archive.add(_make_entry("Beta dogs", "training"), auto_link=False)
|
||||
archive.rebuild_links()
|
||||
|
||||
# Reload and verify links survived
|
||||
archive2 = MnemosyneArchive(archive_path=archive.path)
|
||||
entries = list(archive2._entries.values())
|
||||
total_links = sum(len(e.links) for e in entries)
|
||||
assert total_links > 0
|
||||
Reference in New Issue
Block a user