Files
the-nexus/nexus/mnemosyne/linker.py
Alexander Whitestone b9f1602067
Some checks failed
Deploy Nexus / deploy (push) Failing after 3s
Staging Verification Gate / verify-staging (push) Failing after 3s
merge: Mnemosyne Phase 1 — Living Holographic Archive
Co-authored-by: Alexander Whitestone <alexander@alexanderwhitestone.com>
Co-committed-by: Alexander Whitestone <alexander@alexanderwhitestone.com>
2026-04-11 12:10:14 +00:00

74 lines
2.6 KiB
Python

"""Holographic link engine.
Computes semantic similarity between archive entries and creates
bidirectional links, forming the holographic graph structure.
"""
from __future__ import annotations
from typing import Optional
from nexus.mnemosyne.entry import ArchiveEntry
class HolographicLinker:
"""Links archive entries via semantic similarity.
Phase 1 uses simple keyword overlap as the similarity metric.
Phase 2 will integrate ChromaDB embeddings from MemPalace.
"""
def __init__(self, similarity_threshold: float = 0.15):
self.threshold = similarity_threshold
def compute_similarity(self, a: ArchiveEntry, b: ArchiveEntry) -> float:
"""Compute similarity score between two entries.
Returns float in [0, 1]. Phase 1: Jaccard similarity on
combined title+content tokens. Phase 2: cosine similarity
on ChromaDB embeddings.
"""
tokens_a = self._tokenize(f"{a.title} {a.content}")
tokens_b = self._tokenize(f"{b.title} {b.content}")
if not tokens_a or not tokens_b:
return 0.0
intersection = tokens_a & tokens_b
union = tokens_a | tokens_b
return len(intersection) / len(union)
def find_links(self, entry: ArchiveEntry, candidates: list[ArchiveEntry]) -> list[tuple[str, float]]:
"""Find entries worth linking to.
Returns list of (entry_id, similarity_score) tuples above threshold.
"""
results = []
for candidate in candidates:
if candidate.id == entry.id:
continue
score = self.compute_similarity(entry, candidate)
if score >= self.threshold:
results.append((candidate.id, score))
results.sort(key=lambda x: x[1], reverse=True)
return results
def apply_links(self, entry: ArchiveEntry, candidates: list[ArchiveEntry]) -> int:
"""Auto-link an entry to related entries. Returns count of new links."""
matches = self.find_links(entry, candidates)
new_links = 0
for eid, score in matches:
if eid not in entry.links:
entry.links.append(eid)
new_links += 1
# Bidirectional
for c in candidates:
if c.id == eid and entry.id not in c.links:
c.links.append(entry.id)
return new_links
@staticmethod
def _tokenize(text: str) -> set[str]:
"""Simple whitespace + punctuation tokenizer."""
import re
tokens = set(re.findall(r"\w+", text.lower()))
# Remove very short tokens
return {t for t in tokens if len(t) > 2}