"""Tests for Mnemosyne graph cluster analysis features. Tests: graph_clusters, hub_entries, bridge_entries, rebuild_links. """ import pytest from pathlib import Path import tempfile from nexus.mnemosyne.archive import MnemosyneArchive from nexus.mnemosyne.entry import ArchiveEntry @pytest.fixture def archive(): """Create a fresh archive in a temp directory.""" with tempfile.TemporaryDirectory() as tmp: path = Path(tmp) / "test_archive.json" a = MnemosyneArchive(archive_path=path) yield a def _make_entry(title="Test", content="test content", topics=None): return ArchiveEntry(title=title, content=content, topics=topics or []) class TestGraphClusters: """Test graph_clusters() connected component discovery.""" def test_empty_archive(self, archive): clusters = archive.graph_clusters() assert clusters == [] def test_single_orphan(self, archive): archive.add(_make_entry("Lone entry"), auto_link=False) # min_size=1 includes orphans clusters = archive.graph_clusters(min_size=1) assert len(clusters) == 1 assert clusters[0]["size"] == 1 assert clusters[0]["density"] == 0.0 def test_single_orphan_filtered(self, archive): archive.add(_make_entry("Lone entry"), auto_link=False) clusters = archive.graph_clusters(min_size=2) assert clusters == [] def test_two_linked_entries(self, archive): """Two manually linked entries form a cluster.""" e1 = archive.add(_make_entry("Alpha dogs", "canine training"), auto_link=False) e2 = archive.add(_make_entry("Beta cats", "feline behavior"), auto_link=False) # Manual link e1.links.append(e2.id) e2.links.append(e1.id) archive._save() clusters = archive.graph_clusters(min_size=2) assert len(clusters) == 1 assert clusters[0]["size"] == 2 assert clusters[0]["internal_edges"] == 1 assert clusters[0]["density"] == 1.0 # 1 edge out of 1 possible def test_two_separate_clusters(self, archive): """Two disconnected groups form separate clusters.""" a1 = archive.add(_make_entry("AI models", "neural networks"), auto_link=False) a2 = archive.add(_make_entry("AI training", "gradient descent"), auto_link=False) b1 = archive.add(_make_entry("Cooking pasta", "italian recipes"), auto_link=False) b2 = archive.add(_make_entry("Cooking sauces", "tomato basil"), auto_link=False) # Link cluster A a1.links.append(a2.id) a2.links.append(a1.id) # Link cluster B b1.links.append(b2.id) b2.links.append(b1.id) archive._save() clusters = archive.graph_clusters(min_size=2) assert len(clusters) == 2 sizes = sorted(c["size"] for c in clusters) assert sizes == [2, 2] def test_cluster_topics(self, archive): """Cluster includes aggregated topics.""" e1 = archive.add(_make_entry("Alpha", "content", topics=["ai", "models"]), auto_link=False) e2 = archive.add(_make_entry("Beta", "content", topics=["ai", "training"]), auto_link=False) e1.links.append(e2.id) e2.links.append(e1.id) archive._save() clusters = archive.graph_clusters(min_size=2) assert "ai" in clusters[0]["top_topics"] def test_density_calculation(self, archive): """Triangle (3 nodes, 3 edges) has density 1.0.""" e1 = archive.add(_make_entry("A", "aaa"), auto_link=False) e2 = archive.add(_make_entry("B", "bbb"), auto_link=False) e3 = archive.add(_make_entry("C", "ccc"), auto_link=False) # Fully connected triangle for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]: for o in others: e.links.append(o.id) archive._save() clusters = archive.graph_clusters(min_size=2) assert len(clusters) == 1 assert clusters[0]["internal_edges"] == 3 assert clusters[0]["density"] == 1.0 # 3 edges / 3 possible def test_chain_density(self, archive): """A-B-C chain has density 2/3 (2 edges out of 3 possible).""" e1 = archive.add(_make_entry("A", "aaa"), auto_link=False) e2 = archive.add(_make_entry("B", "bbb"), auto_link=False) e3 = archive.add(_make_entry("C", "ccc"), auto_link=False) # Chain: A-B-C e1.links.append(e2.id) e2.links.extend([e1.id, e3.id]) e3.links.append(e2.id) archive._save() clusters = archive.graph_clusters(min_size=2) assert abs(clusters[0]["density"] - 2/3) < 0.01 class TestHubEntries: """Test hub_entries() degree centrality ranking.""" def test_empty(self, archive): assert archive.hub_entries() == [] def test_no_links(self, archive): archive.add(_make_entry("Lone"), auto_link=False) assert archive.hub_entries() == [] def test_hub_ordering(self, archive): """Entry with most links is ranked first.""" e1 = archive.add(_make_entry("Hub", "central node"), auto_link=False) e2 = archive.add(_make_entry("Spoke 1", "content"), auto_link=False) e3 = archive.add(_make_entry("Spoke 2", "content"), auto_link=False) e4 = archive.add(_make_entry("Spoke 3", "content"), auto_link=False) # e1 connects to all spokes e1.links.extend([e2.id, e3.id, e4.id]) e2.links.append(e1.id) e3.links.append(e1.id) e4.links.append(e1.id) archive._save() hubs = archive.hub_entries() assert len(hubs) == 4 assert hubs[0]["entry"].id == e1.id assert hubs[0]["degree"] == 3 def test_limit(self, archive): e1 = archive.add(_make_entry("A", ""), auto_link=False) e2 = archive.add(_make_entry("B", ""), auto_link=False) e1.links.append(e2.id) e2.links.append(e1.id) archive._save() assert len(archive.hub_entries(limit=1)) == 1 def test_inbound_outbound(self, archive): """Inbound counts links TO an entry, outbound counts links FROM it.""" e1 = archive.add(_make_entry("Source", ""), auto_link=False) e2 = archive.add(_make_entry("Target", ""), auto_link=False) # Only e1 links to e2 e1.links.append(e2.id) archive._save() hubs = archive.hub_entries() h1 = next(h for h in hubs if h["entry"].id == e1.id) h2 = next(h for h in hubs if h["entry"].id == e2.id) assert h1["inbound"] == 0 assert h1["outbound"] == 1 assert h2["inbound"] == 1 assert h2["outbound"] == 0 class TestBridgeEntries: """Test bridge_entries() articulation point detection.""" def test_empty(self, archive): assert archive.bridge_entries() == [] def test_no_bridges_in_triangle(self, archive): """Fully connected triangle has no articulation points.""" e1 = archive.add(_make_entry("A", ""), auto_link=False) e2 = archive.add(_make_entry("B", ""), auto_link=False) e3 = archive.add(_make_entry("C", ""), auto_link=False) for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]: for o in others: e.links.append(o.id) archive._save() assert archive.bridge_entries() == [] def test_bridge_in_chain(self, archive): """A-B-C chain: B is the articulation point.""" e1 = archive.add(_make_entry("A", ""), auto_link=False) e2 = archive.add(_make_entry("B", ""), auto_link=False) e3 = archive.add(_make_entry("C", ""), auto_link=False) e1.links.append(e2.id) e2.links.extend([e1.id, e3.id]) e3.links.append(e2.id) archive._save() bridges = archive.bridge_entries() assert len(bridges) == 1 assert bridges[0]["entry"].id == e2.id assert bridges[0]["components_after_removal"] == 2 def test_no_bridges_in_small_cluster(self, archive): """Two-node clusters are too small for bridge detection.""" e1 = archive.add(_make_entry("A", ""), auto_link=False) e2 = archive.add(_make_entry("B", ""), auto_link=False) e1.links.append(e2.id) e2.links.append(e1.id) archive._save() assert archive.bridge_entries() == [] class TestRebuildLinks: """Test rebuild_links() full recomputation.""" def test_empty_archive(self, archive): assert archive.rebuild_links() == 0 def test_creates_links(self, archive): """Rebuild creates links between similar entries.""" archive.add(_make_entry("Alpha dogs canine training", "obedience training"), auto_link=False) archive.add(_make_entry("Beta dogs canine behavior", "behavior training"), auto_link=False) archive.add(_make_entry("Cat food feline nutrition", "fish meals"), auto_link=False) total = archive.rebuild_links() assert total > 0 # Check that dog entries are linked to each other entries = list(archive._entries.values()) dog_entries = [e for e in entries if "dog" in e.title.lower()] assert any(len(e.links) > 0 for e in dog_entries) def test_override_threshold(self, archive): """Lower threshold creates more links.""" archive.add(_make_entry("Alpha dogs", "training"), auto_link=False) archive.add(_make_entry("Beta cats", "training"), auto_link=False) archive.add(_make_entry("Gamma birds", "training"), auto_link=False) # Very low threshold = more links low_links = archive.rebuild_links(threshold=0.01) # Reset for e in archive._entries.values(): e.links = [] # Higher threshold = fewer links high_links = archive.rebuild_links(threshold=0.9) assert low_links >= high_links def test_rebuild_persists(self, archive): """Rebuild saves to disk.""" archive.add(_make_entry("Alpha dogs", "training"), auto_link=False) archive.add(_make_entry("Beta dogs", "training"), auto_link=False) archive.rebuild_links() # Reload and verify links survived archive2 = MnemosyneArchive(archive_path=archive.path) entries = list(archive2._entries.values()) total_links = sum(len(e.links) for e in entries) assert total_links > 0