- graph_clusters: empty, orphans, linked pairs, separate clusters, topics, density - hub_entries: ordering, limit, inbound/outbound counting - bridge_entries: triangle (none), chain (B is bridge), small cluster filtered - rebuild_links: creates links, threshold override, persistence
272 lines
10 KiB
Python
272 lines
10 KiB
Python
"""Tests for Mnemosyne graph cluster analysis features.
|
|
|
|
Tests: graph_clusters, hub_entries, bridge_entries, rebuild_links.
|
|
"""
|
|
|
|
import pytest
|
|
from pathlib import Path
|
|
import tempfile
|
|
|
|
from nexus.mnemosyne.archive import MnemosyneArchive
|
|
from nexus.mnemosyne.entry import ArchiveEntry
|
|
|
|
|
|
@pytest.fixture
|
|
def archive():
|
|
"""Create a fresh archive in a temp directory."""
|
|
with tempfile.TemporaryDirectory() as tmp:
|
|
path = Path(tmp) / "test_archive.json"
|
|
a = MnemosyneArchive(archive_path=path)
|
|
yield a
|
|
|
|
|
|
def _make_entry(title="Test", content="test content", topics=None):
|
|
return ArchiveEntry(title=title, content=content, topics=topics or [])
|
|
|
|
|
|
class TestGraphClusters:
|
|
"""Test graph_clusters() connected component discovery."""
|
|
|
|
def test_empty_archive(self, archive):
|
|
clusters = archive.graph_clusters()
|
|
assert clusters == []
|
|
|
|
def test_single_orphan(self, archive):
|
|
archive.add(_make_entry("Lone entry"), auto_link=False)
|
|
# min_size=1 includes orphans
|
|
clusters = archive.graph_clusters(min_size=1)
|
|
assert len(clusters) == 1
|
|
assert clusters[0]["size"] == 1
|
|
assert clusters[0]["density"] == 0.0
|
|
|
|
def test_single_orphan_filtered(self, archive):
|
|
archive.add(_make_entry("Lone entry"), auto_link=False)
|
|
clusters = archive.graph_clusters(min_size=2)
|
|
assert clusters == []
|
|
|
|
def test_two_linked_entries(self, archive):
|
|
"""Two manually linked entries form a cluster."""
|
|
e1 = archive.add(_make_entry("Alpha dogs", "canine training"), auto_link=False)
|
|
e2 = archive.add(_make_entry("Beta cats", "feline behavior"), auto_link=False)
|
|
# Manual link
|
|
e1.links.append(e2.id)
|
|
e2.links.append(e1.id)
|
|
archive._save()
|
|
|
|
clusters = archive.graph_clusters(min_size=2)
|
|
assert len(clusters) == 1
|
|
assert clusters[0]["size"] == 2
|
|
assert clusters[0]["internal_edges"] == 1
|
|
assert clusters[0]["density"] == 1.0 # 1 edge out of 1 possible
|
|
|
|
def test_two_separate_clusters(self, archive):
|
|
"""Two disconnected groups form separate clusters."""
|
|
a1 = archive.add(_make_entry("AI models", "neural networks"), auto_link=False)
|
|
a2 = archive.add(_make_entry("AI training", "gradient descent"), auto_link=False)
|
|
b1 = archive.add(_make_entry("Cooking pasta", "italian recipes"), auto_link=False)
|
|
b2 = archive.add(_make_entry("Cooking sauces", "tomato basil"), auto_link=False)
|
|
|
|
# Link cluster A
|
|
a1.links.append(a2.id)
|
|
a2.links.append(a1.id)
|
|
# Link cluster B
|
|
b1.links.append(b2.id)
|
|
b2.links.append(b1.id)
|
|
archive._save()
|
|
|
|
clusters = archive.graph_clusters(min_size=2)
|
|
assert len(clusters) == 2
|
|
sizes = sorted(c["size"] for c in clusters)
|
|
assert sizes == [2, 2]
|
|
|
|
def test_cluster_topics(self, archive):
|
|
"""Cluster includes aggregated topics."""
|
|
e1 = archive.add(_make_entry("Alpha", "content", topics=["ai", "models"]), auto_link=False)
|
|
e2 = archive.add(_make_entry("Beta", "content", topics=["ai", "training"]), auto_link=False)
|
|
e1.links.append(e2.id)
|
|
e2.links.append(e1.id)
|
|
archive._save()
|
|
|
|
clusters = archive.graph_clusters(min_size=2)
|
|
assert "ai" in clusters[0]["top_topics"]
|
|
|
|
def test_density_calculation(self, archive):
|
|
"""Triangle (3 nodes, 3 edges) has density 1.0."""
|
|
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
|
|
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
|
|
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
|
|
# Fully connected triangle
|
|
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
|
|
for o in others:
|
|
e.links.append(o.id)
|
|
archive._save()
|
|
|
|
clusters = archive.graph_clusters(min_size=2)
|
|
assert len(clusters) == 1
|
|
assert clusters[0]["internal_edges"] == 3
|
|
assert clusters[0]["density"] == 1.0 # 3 edges / 3 possible
|
|
|
|
def test_chain_density(self, archive):
|
|
"""A-B-C chain has density 2/3 (2 edges out of 3 possible)."""
|
|
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
|
|
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
|
|
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
|
|
# Chain: A-B-C
|
|
e1.links.append(e2.id)
|
|
e2.links.extend([e1.id, e3.id])
|
|
e3.links.append(e2.id)
|
|
archive._save()
|
|
|
|
clusters = archive.graph_clusters(min_size=2)
|
|
assert abs(clusters[0]["density"] - 2/3) < 0.01
|
|
|
|
|
|
class TestHubEntries:
|
|
"""Test hub_entries() degree centrality ranking."""
|
|
|
|
def test_empty(self, archive):
|
|
assert archive.hub_entries() == []
|
|
|
|
def test_no_links(self, archive):
|
|
archive.add(_make_entry("Lone"), auto_link=False)
|
|
assert archive.hub_entries() == []
|
|
|
|
def test_hub_ordering(self, archive):
|
|
"""Entry with most links is ranked first."""
|
|
e1 = archive.add(_make_entry("Hub", "central node"), auto_link=False)
|
|
e2 = archive.add(_make_entry("Spoke 1", "content"), auto_link=False)
|
|
e3 = archive.add(_make_entry("Spoke 2", "content"), auto_link=False)
|
|
e4 = archive.add(_make_entry("Spoke 3", "content"), auto_link=False)
|
|
|
|
# e1 connects to all spokes
|
|
e1.links.extend([e2.id, e3.id, e4.id])
|
|
e2.links.append(e1.id)
|
|
e3.links.append(e1.id)
|
|
e4.links.append(e1.id)
|
|
archive._save()
|
|
|
|
hubs = archive.hub_entries()
|
|
assert len(hubs) == 4
|
|
assert hubs[0]["entry"].id == e1.id
|
|
assert hubs[0]["degree"] == 3
|
|
|
|
def test_limit(self, archive):
|
|
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
|
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
|
e1.links.append(e2.id)
|
|
e2.links.append(e1.id)
|
|
archive._save()
|
|
|
|
assert len(archive.hub_entries(limit=1)) == 1
|
|
|
|
def test_inbound_outbound(self, archive):
|
|
"""Inbound counts links TO an entry, outbound counts links FROM it."""
|
|
e1 = archive.add(_make_entry("Source", ""), auto_link=False)
|
|
e2 = archive.add(_make_entry("Target", ""), auto_link=False)
|
|
# Only e1 links to e2
|
|
e1.links.append(e2.id)
|
|
archive._save()
|
|
|
|
hubs = archive.hub_entries()
|
|
h1 = next(h for h in hubs if h["entry"].id == e1.id)
|
|
h2 = next(h for h in hubs if h["entry"].id == e2.id)
|
|
assert h1["inbound"] == 0
|
|
assert h1["outbound"] == 1
|
|
assert h2["inbound"] == 1
|
|
assert h2["outbound"] == 0
|
|
|
|
|
|
class TestBridgeEntries:
|
|
"""Test bridge_entries() articulation point detection."""
|
|
|
|
def test_empty(self, archive):
|
|
assert archive.bridge_entries() == []
|
|
|
|
def test_no_bridges_in_triangle(self, archive):
|
|
"""Fully connected triangle has no articulation points."""
|
|
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
|
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
|
e3 = archive.add(_make_entry("C", ""), auto_link=False)
|
|
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
|
|
for o in others:
|
|
e.links.append(o.id)
|
|
archive._save()
|
|
|
|
assert archive.bridge_entries() == []
|
|
|
|
def test_bridge_in_chain(self, archive):
|
|
"""A-B-C chain: B is the articulation point."""
|
|
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
|
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
|
e3 = archive.add(_make_entry("C", ""), auto_link=False)
|
|
e1.links.append(e2.id)
|
|
e2.links.extend([e1.id, e3.id])
|
|
e3.links.append(e2.id)
|
|
archive._save()
|
|
|
|
bridges = archive.bridge_entries()
|
|
assert len(bridges) == 1
|
|
assert bridges[0]["entry"].id == e2.id
|
|
assert bridges[0]["components_after_removal"] == 2
|
|
|
|
def test_no_bridges_in_small_cluster(self, archive):
|
|
"""Two-node clusters are too small for bridge detection."""
|
|
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
|
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
|
e1.links.append(e2.id)
|
|
e2.links.append(e1.id)
|
|
archive._save()
|
|
|
|
assert archive.bridge_entries() == []
|
|
|
|
|
|
class TestRebuildLinks:
|
|
"""Test rebuild_links() full recomputation."""
|
|
|
|
def test_empty_archive(self, archive):
|
|
assert archive.rebuild_links() == 0
|
|
|
|
def test_creates_links(self, archive):
|
|
"""Rebuild creates links between similar entries."""
|
|
archive.add(_make_entry("Alpha dogs canine training", "obedience training"), auto_link=False)
|
|
archive.add(_make_entry("Beta dogs canine behavior", "behavior training"), auto_link=False)
|
|
archive.add(_make_entry("Cat food feline nutrition", "fish meals"), auto_link=False)
|
|
|
|
total = archive.rebuild_links()
|
|
assert total > 0
|
|
|
|
# Check that dog entries are linked to each other
|
|
entries = list(archive._entries.values())
|
|
dog_entries = [e for e in entries if "dog" in e.title.lower()]
|
|
assert any(len(e.links) > 0 for e in dog_entries)
|
|
|
|
def test_override_threshold(self, archive):
|
|
"""Lower threshold creates more links."""
|
|
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
|
|
archive.add(_make_entry("Beta cats", "training"), auto_link=False)
|
|
archive.add(_make_entry("Gamma birds", "training"), auto_link=False)
|
|
|
|
# Very low threshold = more links
|
|
low_links = archive.rebuild_links(threshold=0.01)
|
|
|
|
# Reset
|
|
for e in archive._entries.values():
|
|
e.links = []
|
|
|
|
# Higher threshold = fewer links
|
|
high_links = archive.rebuild_links(threshold=0.9)
|
|
|
|
assert low_links >= high_links
|
|
|
|
def test_rebuild_persists(self, archive):
|
|
"""Rebuild saves to disk."""
|
|
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
|
|
archive.add(_make_entry("Beta dogs", "training"), auto_link=False)
|
|
archive.rebuild_links()
|
|
|
|
# Reload and verify links survived
|
|
archive2 = MnemosyneArchive(archive_path=archive.path)
|
|
entries = list(archive2._entries.values())
|
|
total_links = sum(len(e.links) for e in entries)
|
|
assert total_links > 0
|