Compare commits
3 Commits
feat/mnemo
...
feat/mnemo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3848b6f4ea | ||
|
|
3ed129ad2b | ||
|
|
392c73eb03 |
@@ -241,3 +241,247 @@ class MnemosyneArchive:
|
||||
"oldest_entry": oldest_entry,
|
||||
"newest_entry": newest_entry,
|
||||
}
|
||||
|
||||
def _build_adjacency(self) -> dict[str, set[str]]:
|
||||
"""Build adjacency dict from entry links. Only includes valid references."""
|
||||
adj: dict[str, set[str]] = {eid: set() for eid in self._entries}
|
||||
for eid, entry in self._entries.items():
|
||||
for linked_id in entry.links:
|
||||
if linked_id in self._entries and linked_id != eid:
|
||||
adj[eid].add(linked_id)
|
||||
adj[linked_id].add(eid)
|
||||
return adj
|
||||
|
||||
def graph_clusters(self, min_size: int = 1) -> list[dict]:
|
||||
"""Find connected component clusters in the holographic graph.
|
||||
|
||||
Uses BFS to discover groups of entries that are reachable from each
|
||||
other through their links. Returns clusters sorted by size descending.
|
||||
|
||||
Args:
|
||||
min_size: Minimum cluster size to include (filters out isolated entries).
|
||||
|
||||
Returns:
|
||||
List of dicts with keys: cluster_id, size, entries, topics, density
|
||||
"""
|
||||
adj = self._build_adjacency()
|
||||
visited: set[str] = set()
|
||||
clusters: list[dict] = []
|
||||
cluster_id = 0
|
||||
|
||||
for eid in self._entries:
|
||||
if eid in visited:
|
||||
continue
|
||||
# BFS from this entry
|
||||
component: list[str] = []
|
||||
queue = [eid]
|
||||
while queue:
|
||||
current = queue.pop(0)
|
||||
if current in visited:
|
||||
continue
|
||||
visited.add(current)
|
||||
component.append(current)
|
||||
for neighbor in adj.get(current, set()):
|
||||
if neighbor not in visited:
|
||||
queue.append(neighbor)
|
||||
|
||||
# Single-entry clusters are orphans
|
||||
if len(component) < min_size:
|
||||
continue
|
||||
|
||||
# Collect topics from cluster entries
|
||||
cluster_topics: dict[str, int] = {}
|
||||
internal_edges = 0
|
||||
for cid in component:
|
||||
entry = self._entries[cid]
|
||||
for t in entry.topics:
|
||||
cluster_topics[t] = cluster_topics.get(t, 0) + 1
|
||||
internal_edges += len(adj.get(cid, set()))
|
||||
internal_edges //= 2 # undirected, counted twice
|
||||
|
||||
# Density: actual edges / possible edges
|
||||
n = len(component)
|
||||
max_edges = n * (n - 1) // 2
|
||||
density = round(internal_edges / max_edges, 4) if max_edges > 0 else 0.0
|
||||
|
||||
# Top topics by frequency
|
||||
top_topics = sorted(cluster_topics.items(), key=lambda x: x[1], reverse=True)[:5]
|
||||
|
||||
clusters.append({
|
||||
"cluster_id": cluster_id,
|
||||
"size": n,
|
||||
"entries": component,
|
||||
"top_topics": [t for t, _ in top_topics],
|
||||
"internal_edges": internal_edges,
|
||||
"density": density,
|
||||
})
|
||||
cluster_id += 1
|
||||
|
||||
clusters.sort(key=lambda c: c["size"], reverse=True)
|
||||
return clusters
|
||||
|
||||
def hub_entries(self, limit: int = 10) -> list[dict]:
|
||||
"""Find the most connected entries (highest degree centrality).
|
||||
|
||||
These are the "hubs" of the holographic graph — entries that bridge
|
||||
many topics and attract many links.
|
||||
|
||||
Args:
|
||||
limit: Maximum number of hubs to return.
|
||||
|
||||
Returns:
|
||||
List of dicts with keys: entry, degree, inbound, outbound, topics
|
||||
"""
|
||||
adj = self._build_adjacency()
|
||||
inbound: dict[str, int] = {eid: 0 for eid in self._entries}
|
||||
|
||||
for entry in self._entries.values():
|
||||
for lid in entry.links:
|
||||
if lid in inbound:
|
||||
inbound[lid] += 1
|
||||
|
||||
hubs = []
|
||||
for eid, entry in self._entries.items():
|
||||
degree = len(adj.get(eid, set()))
|
||||
if degree == 0:
|
||||
continue
|
||||
hubs.append({
|
||||
"entry": entry,
|
||||
"degree": degree,
|
||||
"inbound": inbound.get(eid, 0),
|
||||
"outbound": len(entry.links),
|
||||
"topics": entry.topics,
|
||||
})
|
||||
|
||||
hubs.sort(key=lambda h: h["degree"], reverse=True)
|
||||
return hubs[:limit]
|
||||
|
||||
def bridge_entries(self) -> list[dict]:
|
||||
"""Find articulation points — entries whose removal would split a cluster.
|
||||
|
||||
These are "bridge" entries in the holographic graph. Removing them
|
||||
disconnects members that were previously reachable through the bridge.
|
||||
Uses Tarjan's algorithm for finding articulation points.
|
||||
|
||||
Returns:
|
||||
List of dicts with keys: entry, cluster_size, bridges_between
|
||||
"""
|
||||
adj = self._build_adjacency()
|
||||
|
||||
# Find clusters first
|
||||
clusters = self.graph_clusters(min_size=3)
|
||||
if not clusters:
|
||||
return []
|
||||
|
||||
# For each cluster, run Tarjan's algorithm
|
||||
bridges: list[dict] = []
|
||||
for cluster in clusters:
|
||||
members = set(cluster["entries"])
|
||||
if len(members) < 3:
|
||||
continue
|
||||
|
||||
# Build subgraph adjacency
|
||||
sub_adj = {eid: adj[eid] & members for eid in members}
|
||||
|
||||
# Tarjan's DFS for articulation points
|
||||
discovery: dict[str, int] = {}
|
||||
low: dict[str, int] = {}
|
||||
parent: dict[str, Optional[str]] = {}
|
||||
ap: set[str] = set()
|
||||
timer = [0]
|
||||
|
||||
def dfs(u: str):
|
||||
children = 0
|
||||
discovery[u] = low[u] = timer[0]
|
||||
timer[0] += 1
|
||||
for v in sub_adj[u]:
|
||||
if v not in discovery:
|
||||
children += 1
|
||||
parent[v] = u
|
||||
dfs(v)
|
||||
low[u] = min(low[u], low[v])
|
||||
|
||||
# u is AP if: root with 2+ children, or non-root with low[v] >= disc[u]
|
||||
if parent.get(u) is None and children > 1:
|
||||
ap.add(u)
|
||||
if parent.get(u) is not None and low[v] >= discovery[u]:
|
||||
ap.add(u)
|
||||
elif v != parent.get(u):
|
||||
low[u] = min(low[u], discovery[v])
|
||||
|
||||
for eid in members:
|
||||
if eid not in discovery:
|
||||
parent[eid] = None
|
||||
dfs(eid)
|
||||
|
||||
# For each articulation point, estimate what it bridges
|
||||
for ap_id in ap:
|
||||
ap_entry = self._entries[ap_id]
|
||||
# Remove it temporarily and count resulting components
|
||||
temp_adj = {k: v.copy() for k, v in sub_adj.items()}
|
||||
del temp_adj[ap_id]
|
||||
for k in temp_adj:
|
||||
temp_adj[k].discard(ap_id)
|
||||
|
||||
# BFS count components after removal
|
||||
temp_visited: set[str] = set()
|
||||
component_count = 0
|
||||
for mid in members:
|
||||
if mid == ap_id or mid in temp_visited:
|
||||
continue
|
||||
component_count += 1
|
||||
queue = [mid]
|
||||
while queue:
|
||||
cur = queue.pop(0)
|
||||
if cur in temp_visited:
|
||||
continue
|
||||
temp_visited.add(cur)
|
||||
for nb in temp_adj.get(cur, set()):
|
||||
if nb not in temp_visited:
|
||||
queue.append(nb)
|
||||
|
||||
if component_count > 1:
|
||||
bridges.append({
|
||||
"entry": ap_entry,
|
||||
"cluster_size": cluster["size"],
|
||||
"components_after_removal": component_count,
|
||||
"topics": ap_entry.topics,
|
||||
})
|
||||
|
||||
bridges.sort(key=lambda b: b["components_after_removal"], reverse=True)
|
||||
return bridges
|
||||
|
||||
def rebuild_links(self, threshold: Optional[float] = None) -> int:
|
||||
"""Recompute all links from scratch.
|
||||
|
||||
Clears existing links and re-applies the holographic linker to every
|
||||
entry pair. Useful after bulk ingestion or threshold changes.
|
||||
|
||||
Args:
|
||||
threshold: Override the linker's default similarity threshold.
|
||||
|
||||
Returns:
|
||||
Total number of links created.
|
||||
"""
|
||||
if threshold is not None:
|
||||
old_threshold = self.linker.threshold
|
||||
self.linker.threshold = threshold
|
||||
|
||||
# Clear all links
|
||||
for entry in self._entries.values():
|
||||
entry.links = []
|
||||
|
||||
entries = list(self._entries.values())
|
||||
total_links = 0
|
||||
|
||||
# Re-link each entry against all others
|
||||
for entry in entries:
|
||||
candidates = [e for e in entries if e.id != entry.id]
|
||||
new_links = self.linker.apply_links(entry, candidates)
|
||||
total_links += new_links
|
||||
|
||||
if threshold is not None:
|
||||
self.linker.threshold = old_threshold
|
||||
|
||||
self._save()
|
||||
return total_links
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
"""CLI interface for Mnemosyne.
|
||||
|
||||
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
|
||||
mnemosyne topics, mnemosyne remove, mnemosyne export
|
||||
mnemosyne topics, mnemosyne remove, mnemosyne export,
|
||||
mnemosyne clusters, mnemosyne hubs, mnemosyne bridges, mnemosyne rebuild
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -90,6 +91,58 @@ def cmd_export(args):
|
||||
print(json.dumps(data, indent=2))
|
||||
|
||||
|
||||
def cmd_clusters(args):
|
||||
archive = MnemosyneArchive()
|
||||
clusters = archive.graph_clusters(min_size=args.min_size)
|
||||
if not clusters:
|
||||
print("No clusters found.")
|
||||
return
|
||||
for c in clusters:
|
||||
print(f"Cluster {c['cluster_id']}: {c['size']} entries, density={c['density']}")
|
||||
print(f" Topics: {', '.join(c['top_topics']) if c['top_topics'] else '(none)'}")
|
||||
if args.verbose:
|
||||
for eid in c["entries"]:
|
||||
entry = archive.get(eid)
|
||||
if entry:
|
||||
print(f" [{eid[:8]}] {entry.title}")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_hubs(args):
|
||||
archive = MnemosyneArchive()
|
||||
hubs = archive.hub_entries(limit=args.limit)
|
||||
if not hubs:
|
||||
print("No hubs found.")
|
||||
return
|
||||
for h in hubs:
|
||||
e = h["entry"]
|
||||
print(f"[{e.id[:8]}] {e.title}")
|
||||
print(f" Degree: {h['degree']} (in: {h['inbound']}, out: {h['outbound']})")
|
||||
print(f" Topics: {', '.join(h['topics']) if h['topics'] else '(none)'}")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_bridges(args):
|
||||
archive = MnemosyneArchive()
|
||||
bridges = archive.bridge_entries()
|
||||
if not bridges:
|
||||
print("No bridge entries found.")
|
||||
return
|
||||
for b in bridges:
|
||||
e = b["entry"]
|
||||
print(f"[{e.id[:8]}] {e.title}")
|
||||
print(f" Bridges {b['components_after_removal']} components (cluster: {b['cluster_size']} entries)")
|
||||
print(f" Topics: {', '.join(b['topics']) if b['topics'] else '(none)'}")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_rebuild(args):
|
||||
archive = MnemosyneArchive()
|
||||
threshold = args.threshold if args.threshold else None
|
||||
total = archive.rebuild_links(threshold=threshold)
|
||||
print(f"Rebuilt links: {total} connections across {archive.count} entries")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
|
||||
sub = parser.add_subparsers(dest="command")
|
||||
@@ -119,6 +172,18 @@ def main():
|
||||
ex.add_argument("-q", "--query", default="", help="Keyword filter")
|
||||
ex.add_argument("-t", "--topics", default="", help="Comma-separated topic filter")
|
||||
|
||||
cl = sub.add_parser("clusters", help="Show graph clusters (connected components)")
|
||||
cl.add_argument("-m", "--min-size", type=int, default=1, help="Minimum cluster size")
|
||||
cl.add_argument("-v", "--verbose", action="store_true", help="List entries in each cluster")
|
||||
|
||||
hu = sub.add_parser("hubs", help="Show most connected entries (hub analysis)")
|
||||
hu.add_argument("-n", "--limit", type=int, default=10, help="Max hubs to show")
|
||||
|
||||
sub.add_parser("bridges", help="Show bridge entries (articulation points)")
|
||||
|
||||
rb = sub.add_parser("rebuild", help="Recompute all links from scratch")
|
||||
rb.add_argument("-t", "--threshold", type=float, default=None, help="Similarity threshold override")
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
@@ -132,6 +197,10 @@ def main():
|
||||
"topics": cmd_topics,
|
||||
"remove": cmd_remove,
|
||||
"export": cmd_export,
|
||||
"clusters": cmd_clusters,
|
||||
"hubs": cmd_hubs,
|
||||
"bridges": cmd_bridges,
|
||||
"rebuild": cmd_rebuild,
|
||||
}
|
||||
dispatch[args.command](args)
|
||||
|
||||
|
||||
271
nexus/mnemosyne/tests/test_graph_clusters.py
Normal file
271
nexus/mnemosyne/tests/test_graph_clusters.py
Normal file
@@ -0,0 +1,271 @@
|
||||
"""Tests for Mnemosyne graph cluster analysis features.
|
||||
|
||||
Tests: graph_clusters, hub_entries, bridge_entries, rebuild_links.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def archive():
|
||||
"""Create a fresh archive in a temp directory."""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
a = MnemosyneArchive(archive_path=path)
|
||||
yield a
|
||||
|
||||
|
||||
def _make_entry(title="Test", content="test content", topics=None):
|
||||
return ArchiveEntry(title=title, content=content, topics=topics or [])
|
||||
|
||||
|
||||
class TestGraphClusters:
|
||||
"""Test graph_clusters() connected component discovery."""
|
||||
|
||||
def test_empty_archive(self, archive):
|
||||
clusters = archive.graph_clusters()
|
||||
assert clusters == []
|
||||
|
||||
def test_single_orphan(self, archive):
|
||||
archive.add(_make_entry("Lone entry"), auto_link=False)
|
||||
# min_size=1 includes orphans
|
||||
clusters = archive.graph_clusters(min_size=1)
|
||||
assert len(clusters) == 1
|
||||
assert clusters[0]["size"] == 1
|
||||
assert clusters[0]["density"] == 0.0
|
||||
|
||||
def test_single_orphan_filtered(self, archive):
|
||||
archive.add(_make_entry("Lone entry"), auto_link=False)
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert clusters == []
|
||||
|
||||
def test_two_linked_entries(self, archive):
|
||||
"""Two manually linked entries form a cluster."""
|
||||
e1 = archive.add(_make_entry("Alpha dogs", "canine training"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Beta cats", "feline behavior"), auto_link=False)
|
||||
# Manual link
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert len(clusters) == 1
|
||||
assert clusters[0]["size"] == 2
|
||||
assert clusters[0]["internal_edges"] == 1
|
||||
assert clusters[0]["density"] == 1.0 # 1 edge out of 1 possible
|
||||
|
||||
def test_two_separate_clusters(self, archive):
|
||||
"""Two disconnected groups form separate clusters."""
|
||||
a1 = archive.add(_make_entry("AI models", "neural networks"), auto_link=False)
|
||||
a2 = archive.add(_make_entry("AI training", "gradient descent"), auto_link=False)
|
||||
b1 = archive.add(_make_entry("Cooking pasta", "italian recipes"), auto_link=False)
|
||||
b2 = archive.add(_make_entry("Cooking sauces", "tomato basil"), auto_link=False)
|
||||
|
||||
# Link cluster A
|
||||
a1.links.append(a2.id)
|
||||
a2.links.append(a1.id)
|
||||
# Link cluster B
|
||||
b1.links.append(b2.id)
|
||||
b2.links.append(b1.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert len(clusters) == 2
|
||||
sizes = sorted(c["size"] for c in clusters)
|
||||
assert sizes == [2, 2]
|
||||
|
||||
def test_cluster_topics(self, archive):
|
||||
"""Cluster includes aggregated topics."""
|
||||
e1 = archive.add(_make_entry("Alpha", "content", topics=["ai", "models"]), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Beta", "content", topics=["ai", "training"]), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert "ai" in clusters[0]["top_topics"]
|
||||
|
||||
def test_density_calculation(self, archive):
|
||||
"""Triangle (3 nodes, 3 edges) has density 1.0."""
|
||||
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
|
||||
# Fully connected triangle
|
||||
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
|
||||
for o in others:
|
||||
e.links.append(o.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert len(clusters) == 1
|
||||
assert clusters[0]["internal_edges"] == 3
|
||||
assert clusters[0]["density"] == 1.0 # 3 edges / 3 possible
|
||||
|
||||
def test_chain_density(self, archive):
|
||||
"""A-B-C chain has density 2/3 (2 edges out of 3 possible)."""
|
||||
e1 = archive.add(_make_entry("A", "aaa"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", "bbb"), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", "ccc"), auto_link=False)
|
||||
# Chain: A-B-C
|
||||
e1.links.append(e2.id)
|
||||
e2.links.extend([e1.id, e3.id])
|
||||
e3.links.append(e2.id)
|
||||
archive._save()
|
||||
|
||||
clusters = archive.graph_clusters(min_size=2)
|
||||
assert abs(clusters[0]["density"] - 2/3) < 0.01
|
||||
|
||||
|
||||
class TestHubEntries:
|
||||
"""Test hub_entries() degree centrality ranking."""
|
||||
|
||||
def test_empty(self, archive):
|
||||
assert archive.hub_entries() == []
|
||||
|
||||
def test_no_links(self, archive):
|
||||
archive.add(_make_entry("Lone"), auto_link=False)
|
||||
assert archive.hub_entries() == []
|
||||
|
||||
def test_hub_ordering(self, archive):
|
||||
"""Entry with most links is ranked first."""
|
||||
e1 = archive.add(_make_entry("Hub", "central node"), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Spoke 1", "content"), auto_link=False)
|
||||
e3 = archive.add(_make_entry("Spoke 2", "content"), auto_link=False)
|
||||
e4 = archive.add(_make_entry("Spoke 3", "content"), auto_link=False)
|
||||
|
||||
# e1 connects to all spokes
|
||||
e1.links.extend([e2.id, e3.id, e4.id])
|
||||
e2.links.append(e1.id)
|
||||
e3.links.append(e1.id)
|
||||
e4.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
hubs = archive.hub_entries()
|
||||
assert len(hubs) == 4
|
||||
assert hubs[0]["entry"].id == e1.id
|
||||
assert hubs[0]["degree"] == 3
|
||||
|
||||
def test_limit(self, archive):
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
assert len(archive.hub_entries(limit=1)) == 1
|
||||
|
||||
def test_inbound_outbound(self, archive):
|
||||
"""Inbound counts links TO an entry, outbound counts links FROM it."""
|
||||
e1 = archive.add(_make_entry("Source", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("Target", ""), auto_link=False)
|
||||
# Only e1 links to e2
|
||||
e1.links.append(e2.id)
|
||||
archive._save()
|
||||
|
||||
hubs = archive.hub_entries()
|
||||
h1 = next(h for h in hubs if h["entry"].id == e1.id)
|
||||
h2 = next(h for h in hubs if h["entry"].id == e2.id)
|
||||
assert h1["inbound"] == 0
|
||||
assert h1["outbound"] == 1
|
||||
assert h2["inbound"] == 1
|
||||
assert h2["outbound"] == 0
|
||||
|
||||
|
||||
class TestBridgeEntries:
|
||||
"""Test bridge_entries() articulation point detection."""
|
||||
|
||||
def test_empty(self, archive):
|
||||
assert archive.bridge_entries() == []
|
||||
|
||||
def test_no_bridges_in_triangle(self, archive):
|
||||
"""Fully connected triangle has no articulation points."""
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", ""), auto_link=False)
|
||||
for e, others in [(e1, [e2, e3]), (e2, [e1, e3]), (e3, [e1, e2])]:
|
||||
for o in others:
|
||||
e.links.append(o.id)
|
||||
archive._save()
|
||||
|
||||
assert archive.bridge_entries() == []
|
||||
|
||||
def test_bridge_in_chain(self, archive):
|
||||
"""A-B-C chain: B is the articulation point."""
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e3 = archive.add(_make_entry("C", ""), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.extend([e1.id, e3.id])
|
||||
e3.links.append(e2.id)
|
||||
archive._save()
|
||||
|
||||
bridges = archive.bridge_entries()
|
||||
assert len(bridges) == 1
|
||||
assert bridges[0]["entry"].id == e2.id
|
||||
assert bridges[0]["components_after_removal"] == 2
|
||||
|
||||
def test_no_bridges_in_small_cluster(self, archive):
|
||||
"""Two-node clusters are too small for bridge detection."""
|
||||
e1 = archive.add(_make_entry("A", ""), auto_link=False)
|
||||
e2 = archive.add(_make_entry("B", ""), auto_link=False)
|
||||
e1.links.append(e2.id)
|
||||
e2.links.append(e1.id)
|
||||
archive._save()
|
||||
|
||||
assert archive.bridge_entries() == []
|
||||
|
||||
|
||||
class TestRebuildLinks:
|
||||
"""Test rebuild_links() full recomputation."""
|
||||
|
||||
def test_empty_archive(self, archive):
|
||||
assert archive.rebuild_links() == 0
|
||||
|
||||
def test_creates_links(self, archive):
|
||||
"""Rebuild creates links between similar entries."""
|
||||
archive.add(_make_entry("Alpha dogs canine training", "obedience training"), auto_link=False)
|
||||
archive.add(_make_entry("Beta dogs canine behavior", "behavior training"), auto_link=False)
|
||||
archive.add(_make_entry("Cat food feline nutrition", "fish meals"), auto_link=False)
|
||||
|
||||
total = archive.rebuild_links()
|
||||
assert total > 0
|
||||
|
||||
# Check that dog entries are linked to each other
|
||||
entries = list(archive._entries.values())
|
||||
dog_entries = [e for e in entries if "dog" in e.title.lower()]
|
||||
assert any(len(e.links) > 0 for e in dog_entries)
|
||||
|
||||
def test_override_threshold(self, archive):
|
||||
"""Lower threshold creates more links."""
|
||||
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
|
||||
archive.add(_make_entry("Beta cats", "training"), auto_link=False)
|
||||
archive.add(_make_entry("Gamma birds", "training"), auto_link=False)
|
||||
|
||||
# Very low threshold = more links
|
||||
low_links = archive.rebuild_links(threshold=0.01)
|
||||
|
||||
# Reset
|
||||
for e in archive._entries.values():
|
||||
e.links = []
|
||||
|
||||
# Higher threshold = fewer links
|
||||
high_links = archive.rebuild_links(threshold=0.9)
|
||||
|
||||
assert low_links >= high_links
|
||||
|
||||
def test_rebuild_persists(self, archive):
|
||||
"""Rebuild saves to disk."""
|
||||
archive.add(_make_entry("Alpha dogs", "training"), auto_link=False)
|
||||
archive.add(_make_entry("Beta dogs", "training"), auto_link=False)
|
||||
archive.rebuild_links()
|
||||
|
||||
# Reload and verify links survived
|
||||
archive2 = MnemosyneArchive(archive_path=archive.path)
|
||||
entries = list(archive2._entries.values())
|
||||
total_links = sum(len(e.links) for e in entries)
|
||||
assert total_links > 0
|
||||
Reference in New Issue
Block a user