Files
the-nexus/nexus/mnemosyne/tests/test_graph_clusters.py
Alexander Whitestone 3848b6f4ea
Some checks failed
CI / test (pull_request) Failing after 10s
CI / validate (pull_request) Failing after 14s
Review Approval Gate / verify-review (pull_request) Failing after 3s
test(mnemosyne): graph cluster analysis tests — 22 tests
- graph_clusters: empty, orphans, linked pairs, separate clusters, topics, density
- hub_entries: ordering, limit, inbound/outbound counting
- bridge_entries: triangle (none), chain (B is bridge), small cluster filtered
- rebuild_links: creates links, threshold override, persistence
2026-04-11 18:44:58 -04:00

272 lines
10 KiB
Python

"""Tests for Mnemosyne graph cluster analysis features.
Tests: graph_clusters, hub_entries, bridge_entries, rebuild_links.
"""
import pytest
from pathlib import Path
import tempfile
from nexus.mnemosyne.archive import MnemosyneArchive
from nexus.mnemosyne.entry import ArchiveEntry
@pytest.fixture
def archive():
"""Create a fresh archive in a temp directory."""
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
a = MnemosyneArchive(archive_path=path)
yield a
def _make_entry(title="Test", content="test content", topics=None):
return ArchiveEntry(title=title, content=content, topics=topics or [])
class TestGraphClusters:
"""Test graph_clusters() connected component discovery."""
def test_empty_archive(self, archive):
clusters = archive.graph_clusters()
assert clusters == []
def test_single_orphan(self, archive):
archive.add(_make_entry("Lone entry"), auto_link=False)
# min_size=1 includes orphans
clusters = archive.graph_clusters(min_size=1)
assert len(clusters) == 1
assert clusters[0]["size"] == 1
assert clusters[0]["density"] == 0.0
def test_single_orphan_filtered(self, archive):
archive.add(_make_entry("Lone entry"), auto_link=False)
clusters = archive.graph_clusters(min_size=2)
assert clusters == []
def test_two_linked_entries(self, archive):
"""Two manually linked entries form a cluster."""
e1 = archive.add(_make_entry("Alpha dogs", "canine training"), auto_link=False)
e2 = archive.add(_make_entry("Beta cats", "feline behavior"), auto_link=False)
# Manual link
e1.links.append(e2.id)
e2.links.append(e1.id)
archive._save()
clusters = archive.graph_clusters(min_size=2)
assert len(clusters) == 1
assert clusters[0]["size"] == 2
assert clusters[0]["internal_edges"] == 1
assert clusters[0]["density"] == 1.0 # 1 edge out of 1 possible
def test_two_separate_clusters(self, archive):
"""Two disconnected groups form separate clusters."""
a1 = archive.add(_make_entry("AI models", "neural networks"), auto_link=False)
a2 = archive.add(_make_entry("AI training", "gradient descent"), auto_link=False)
b1 = archive.add(_make_entry("Cooking pasta", "italian recipes"), auto_link=False)
b2 = archive.add(_make_entry("Cooking sauces", "tomato basil"), auto_link=False)
# Link cluster A
a1.links.append(a2.id)
a2.links.append(a1.id)
# Link cluster B
b1.links.append(b2.id)
b2.links.append(b1.id)
archive._save()
clusters = archive.graph_clusters(min_size=2)
assert len(clusters) == 2
sizes = sorted(c["size"] for c in clusters)
assert sizes == [2, 2]
def test_cluster_topics(self, archive):
"""Cluster includes aggregated topics."""
e1 = archive.add(_make_entry("Alpha", "content", topics=["ai", "models"]), auto_link=False)
e2 = archive.add(_make_entry("Beta", "content", topics=["ai", "training"]), auto_link=False)
e1.links.append(e2.id)
e2.links.append(e1.id)
archive._save()
clusters = archive.graph_clusters(min_size=2)
assert "ai" in clusters[0]["top_topics"]
def test_density_calculation(self, archive):
"""Triangle (3 nodes, 3 edges) has density 1.0."""
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
# Fully connected triangle
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
for o in others:
e.links.append(o.id)
archive._save()
clusters = archive.graph_clusters(min_size=2)
assert len(clusters) == 1
assert clusters[0]["internal_edges"] == 3
assert clusters[0]["density"] == 1.0 # 3 edges / 3 possible
def test_chain_density(self, archive):
"""A-B-C chain has density 2/3 (2 edges out of 3 possible)."""
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
# Chain: A-B-C
e1.links.append(e2.id)
e2.links.extend([e1.id, e3.id])
e3.links.append(e2.id)
archive._save()
clusters = archive.graph_clusters(min_size=2)
assert abs(clusters[0]["density"] - 2/3) < 0.01
class TestHubEntries:
"""Test hub_entries() degree centrality ranking."""
def test_empty(self, archive):
assert archive.hub_entries() == []
def test_no_links(self, archive):
archive.add(_make_entry("Lone"), auto_link=False)
assert archive.hub_entries() == []
def test_hub_ordering(self, archive):
"""Entry with most links is ranked first."""
e1 = archive.add(_make_entry("Hub", "central node"), auto_link=False)
e2 = archive.add(_make_entry("Spoke 1", "content"), auto_link=False)
e3 = archive.add(_make_entry("Spoke 2", "content"), auto_link=False)
e4 = archive.add(_make_entry("Spoke 3", "content"), auto_link=False)
# e1 connects to all spokes
e1.links.extend([e2.id, e3.id, e4.id])
e2.links.append(e1.id)
e3.links.append(e1.id)
e4.links.append(e1.id)
archive._save()
hubs = archive.hub_entries()
assert len(hubs) == 4
assert hubs[0]["entry"].id == e1.id
assert hubs[0]["degree"] == 3
def test_limit(self, archive):
e1 = archive.add(_make_entry("A", ""), auto_link=False)
e2 = archive.add(_make_entry("B", ""), auto_link=False)
e1.links.append(e2.id)
e2.links.append(e1.id)
archive._save()
assert len(archive.hub_entries(limit=1)) == 1
def test_inbound_outbound(self, archive):
"""Inbound counts links TO an entry, outbound counts links FROM it."""
e1 = archive.add(_make_entry("Source", ""), auto_link=False)
e2 = archive.add(_make_entry("Target", ""), auto_link=False)
# Only e1 links to e2
e1.links.append(e2.id)
archive._save()
hubs = archive.hub_entries()
h1 = next(h for h in hubs if h["entry"].id == e1.id)
h2 = next(h for h in hubs if h["entry"].id == e2.id)
assert h1["inbound"] == 0
assert h1["outbound"] == 1
assert h2["inbound"] == 1
assert h2["outbound"] == 0
class TestBridgeEntries:
"""Test bridge_entries() articulation point detection."""
def test_empty(self, archive):
assert archive.bridge_entries() == []
def test_no_bridges_in_triangle(self, archive):
"""Fully connected triangle has no articulation points."""
e1 = archive.add(_make_entry("A", ""), auto_link=False)
e2 = archive.add(_make_entry("B", ""), auto_link=False)
e3 = archive.add(_make_entry("C", ""), auto_link=False)
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
for o in others:
e.links.append(o.id)
archive._save()
assert archive.bridge_entries() == []
def test_bridge_in_chain(self, archive):
"""A-B-C chain: B is the articulation point."""
e1 = archive.add(_make_entry("A", ""), auto_link=False)
e2 = archive.add(_make_entry("B", ""), auto_link=False)
e3 = archive.add(_make_entry("C", ""), auto_link=False)
e1.links.append(e2.id)
e2.links.extend([e1.id, e3.id])
e3.links.append(e2.id)
archive._save()
bridges = archive.bridge_entries()
assert len(bridges) == 1
assert bridges[0]["entry"].id == e2.id
assert bridges[0]["components_after_removal"] == 2
def test_no_bridges_in_small_cluster(self, archive):
"""Two-node clusters are too small for bridge detection."""
e1 = archive.add(_make_entry("A", ""), auto_link=False)
e2 = archive.add(_make_entry("B", ""), auto_link=False)
e1.links.append(e2.id)
e2.links.append(e1.id)
archive._save()
assert archive.bridge_entries() == []
class TestRebuildLinks:
"""Test rebuild_links() full recomputation."""
def test_empty_archive(self, archive):
assert archive.rebuild_links() == 0
def test_creates_links(self, archive):
"""Rebuild creates links between similar entries."""
archive.add(_make_entry("Alpha dogs canine training", "obedience training"), auto_link=False)
archive.add(_make_entry("Beta dogs canine behavior", "behavior training"), auto_link=False)
archive.add(_make_entry("Cat food feline nutrition", "fish meals"), auto_link=False)
total = archive.rebuild_links()
assert total > 0
# Check that dog entries are linked to each other
entries = list(archive._entries.values())
dog_entries = [e for e in entries if "dog" in e.title.lower()]
assert any(len(e.links) > 0 for e in dog_entries)
def test_override_threshold(self, archive):
"""Lower threshold creates more links."""
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
archive.add(_make_entry("Beta cats", "training"), auto_link=False)
archive.add(_make_entry("Gamma birds", "training"), auto_link=False)
# Very low threshold = more links
low_links = archive.rebuild_links(threshold=0.01)
# Reset
for e in archive._entries.values():
e.links = []
# Higher threshold = fewer links
high_links = archive.rebuild_links(threshold=0.9)
assert low_links >= high_links
def test_rebuild_persists(self, archive):
"""Rebuild saves to disk."""
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
archive.add(_make_entry("Beta dogs", "training"), auto_link=False)
archive.rebuild_links()
# Reload and verify links survived
archive2 = MnemosyneArchive(archive_path=archive.path)
entries = list(archive2._entries.values())
total_links = sum(len(e.links) for e in entries)
assert total_links > 0