diff --git a/nexus/mnemosyne/tests/test_graph_clusters.py b/nexus/mnemosyne/tests/test_graph_clusters.py new file mode 100644 index 00000000..28434395 --- /dev/null +++ b/nexus/mnemosyne/tests/test_graph_clusters.py @@ -0,0 +1,271 @@ +"""Tests for Mnemosyne graph cluster analysis features. + +Tests: graph_clusters, hub_entries, bridge_entries, rebuild_links. +""" + +import pytest +from pathlib import Path +import tempfile + +from nexus.mnemosyne.archive import MnemosyneArchive +from nexus.mnemosyne.entry import ArchiveEntry + + +@pytest.fixture +def archive(): + """Create a fresh archive in a temp directory.""" + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + a = MnemosyneArchive(archive_path=path) + yield a + + +def _make_entry(title="Test", content="test content", topics=None): + return ArchiveEntry(title=title, content=content, topics=topics or []) + + +class TestGraphClusters: + """Test graph_clusters() connected component discovery.""" + + def test_empty_archive(self, archive): + clusters = archive.graph_clusters() + assert clusters == [] + + def test_single_orphan(self, archive): + archive.add(_make_entry("Lone entry"), auto_link=False) + # min_size=1 includes orphans + clusters = archive.graph_clusters(min_size=1) + assert len(clusters) == 1 + assert clusters[0]["size"] == 1 + assert clusters[0]["density"] == 0.0 + + def test_single_orphan_filtered(self, archive): + archive.add(_make_entry("Lone entry"), auto_link=False) + clusters = archive.graph_clusters(min_size=2) + assert clusters == [] + + def test_two_linked_entries(self, archive): + """Two manually linked entries form a cluster.""" + e1 = archive.add(_make_entry("Alpha dogs", "canine training"), auto_link=False) + e2 = archive.add(_make_entry("Beta cats", "feline behavior"), auto_link=False) + # Manual link + e1.links.append(e2.id) + e2.links.append(e1.id) + archive._save() + + clusters = archive.graph_clusters(min_size=2) + assert len(clusters) == 1 + assert clusters[0]["size"] == 2 + assert clusters[0]["internal_edges"] == 1 + assert clusters[0]["density"] == 1.0 # 1 edge out of 1 possible + + def test_two_separate_clusters(self, archive): + """Two disconnected groups form separate clusters.""" + a1 = archive.add(_make_entry("AI models", "neural networks"), auto_link=False) + a2 = archive.add(_make_entry("AI training", "gradient descent"), auto_link=False) + b1 = archive.add(_make_entry("Cooking pasta", "italian recipes"), auto_link=False) + b2 = archive.add(_make_entry("Cooking sauces", "tomato basil"), auto_link=False) + + # Link cluster A + a1.links.append(a2.id) + a2.links.append(a1.id) + # Link cluster B + b1.links.append(b2.id) + b2.links.append(b1.id) + archive._save() + + clusters = archive.graph_clusters(min_size=2) + assert len(clusters) == 2 + sizes = sorted(c["size"] for c in clusters) + assert sizes == [2, 2] + + def test_cluster_topics(self, archive): + """Cluster includes aggregated topics.""" + e1 = archive.add(_make_entry("Alpha", "content", topics=["ai", "models"]), auto_link=False) + e2 = archive.add(_make_entry("Beta", "content", topics=["ai", "training"]), auto_link=False) + e1.links.append(e2.id) + e2.links.append(e1.id) + archive._save() + + clusters = archive.graph_clusters(min_size=2) + assert "ai" in clusters[0]["top_topics"] + + def test_density_calculation(self, archive): + """Triangle (3 nodes, 3 edges) has density 1.0.""" + e1 = archive.add(_make_entry("A", "aaa"), auto_link=False) + e2 = archive.add(_make_entry("B", "bbb"), auto_link=False) + e3 = archive.add(_make_entry("C", "ccc"), auto_link=False) + # Fully connected triangle + for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]: + for o in others: + e.links.append(o.id) + archive._save() + + clusters = archive.graph_clusters(min_size=2) + assert len(clusters) == 1 + assert clusters[0]["internal_edges"] == 3 + assert clusters[0]["density"] == 1.0 # 3 edges / 3 possible + + def test_chain_density(self, archive): + """A-B-C chain has density 2/3 (2 edges out of 3 possible).""" + e1 = archive.add(_make_entry("A", "aaa"), auto_link=False) + e2 = archive.add(_make_entry("B", "bbb"), auto_link=False) + e3 = archive.add(_make_entry("C", "ccc"), auto_link=False) + # Chain: A-B-C + e1.links.append(e2.id) + e2.links.extend([e1.id, e3.id]) + e3.links.append(e2.id) + archive._save() + + clusters = archive.graph_clusters(min_size=2) + assert abs(clusters[0]["density"] - 2/3) < 0.01 + + +class TestHubEntries: + """Test hub_entries() degree centrality ranking.""" + + def test_empty(self, archive): + assert archive.hub_entries() == [] + + def test_no_links(self, archive): + archive.add(_make_entry("Lone"), auto_link=False) + assert archive.hub_entries() == [] + + def test_hub_ordering(self, archive): + """Entry with most links is ranked first.""" + e1 = archive.add(_make_entry("Hub", "central node"), auto_link=False) + e2 = archive.add(_make_entry("Spoke 1", "content"), auto_link=False) + e3 = archive.add(_make_entry("Spoke 2", "content"), auto_link=False) + e4 = archive.add(_make_entry("Spoke 3", "content"), auto_link=False) + + # e1 connects to all spokes + e1.links.extend([e2.id, e3.id, e4.id]) + e2.links.append(e1.id) + e3.links.append(e1.id) + e4.links.append(e1.id) + archive._save() + + hubs = archive.hub_entries() + assert len(hubs) == 4 + assert hubs[0]["entry"].id == e1.id + assert hubs[0]["degree"] == 3 + + def test_limit(self, archive): + e1 = archive.add(_make_entry("A", ""), auto_link=False) + e2 = archive.add(_make_entry("B", ""), auto_link=False) + e1.links.append(e2.id) + e2.links.append(e1.id) + archive._save() + + assert len(archive.hub_entries(limit=1)) == 1 + + def test_inbound_outbound(self, archive): + """Inbound counts links TO an entry, outbound counts links FROM it.""" + e1 = archive.add(_make_entry("Source", ""), auto_link=False) + e2 = archive.add(_make_entry("Target", ""), auto_link=False) + # Only e1 links to e2 + e1.links.append(e2.id) + archive._save() + + hubs = archive.hub_entries() + h1 = next(h for h in hubs if h["entry"].id == e1.id) + h2 = next(h for h in hubs if h["entry"].id == e2.id) + assert h1["inbound"] == 0 + assert h1["outbound"] == 1 + assert h2["inbound"] == 1 + assert h2["outbound"] == 0 + + +class TestBridgeEntries: + """Test bridge_entries() articulation point detection.""" + + def test_empty(self, archive): + assert archive.bridge_entries() == [] + + def test_no_bridges_in_triangle(self, archive): + """Fully connected triangle has no articulation points.""" + e1 = archive.add(_make_entry("A", ""), auto_link=False) + e2 = archive.add(_make_entry("B", ""), auto_link=False) + e3 = archive.add(_make_entry("C", ""), auto_link=False) + for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]: + for o in others: + e.links.append(o.id) + archive._save() + + assert archive.bridge_entries() == [] + + def test_bridge_in_chain(self, archive): + """A-B-C chain: B is the articulation point.""" + e1 = archive.add(_make_entry("A", ""), auto_link=False) + e2 = archive.add(_make_entry("B", ""), auto_link=False) + e3 = archive.add(_make_entry("C", ""), auto_link=False) + e1.links.append(e2.id) + e2.links.extend([e1.id, e3.id]) + e3.links.append(e2.id) + archive._save() + + bridges = archive.bridge_entries() + assert len(bridges) == 1 + assert bridges[0]["entry"].id == e2.id + assert bridges[0]["components_after_removal"] == 2 + + def test_no_bridges_in_small_cluster(self, archive): + """Two-node clusters are too small for bridge detection.""" + e1 = archive.add(_make_entry("A", ""), auto_link=False) + e2 = archive.add(_make_entry("B", ""), auto_link=False) + e1.links.append(e2.id) + e2.links.append(e1.id) + archive._save() + + assert archive.bridge_entries() == [] + + +class TestRebuildLinks: + """Test rebuild_links() full recomputation.""" + + def test_empty_archive(self, archive): + assert archive.rebuild_links() == 0 + + def test_creates_links(self, archive): + """Rebuild creates links between similar entries.""" + archive.add(_make_entry("Alpha dogs canine training", "obedience training"), auto_link=False) + archive.add(_make_entry("Beta dogs canine behavior", "behavior training"), auto_link=False) + archive.add(_make_entry("Cat food feline nutrition", "fish meals"), auto_link=False) + + total = archive.rebuild_links() + assert total > 0 + + # Check that dog entries are linked to each other + entries = list(archive._entries.values()) + dog_entries = [e for e in entries if "dog" in e.title.lower()] + assert any(len(e.links) > 0 for e in dog_entries) + + def test_override_threshold(self, archive): + """Lower threshold creates more links.""" + archive.add(_make_entry("Alpha dogs", "training"), auto_link=False) + archive.add(_make_entry("Beta cats", "training"), auto_link=False) + archive.add(_make_entry("Gamma birds", "training"), auto_link=False) + + # Very low threshold = more links + low_links = archive.rebuild_links(threshold=0.01) + + # Reset + for e in archive._entries.values(): + e.links = [] + + # Higher threshold = fewer links + high_links = archive.rebuild_links(threshold=0.9) + + assert low_links >= high_links + + def test_rebuild_persists(self, archive): + """Rebuild saves to disk.""" + archive.add(_make_entry("Alpha dogs", "training"), auto_link=False) + archive.add(_make_entry("Beta dogs", "training"), auto_link=False) + archive.rebuild_links() + + # Reload and verify links survived + archive2 = MnemosyneArchive(archive_path=archive.path) + entries = list(archive2._entries.values()) + total_links = sum(len(e.links) for e in entries) + assert total_links > 0