Compare commits
8 Commits
mimo/code/
...
feat/mnemo
| Author | SHA1 | Date | |
|---|---|---|---|
| ef1fb5b735 | |||
| fb7b71d77e | |||
| af9ce582df | |||
| e16b513693 | |||
| 01f3e7fba0 | |||
| 99d0c0a308 | |||
| 7f3b9f9678 | |||
| 41b46c5e1c |
24
nexus/mnemosyne/__init__.py
Normal file
24
nexus/mnemosyne/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""nexus.mnemosyne — The Living Holographic Archive.
|
||||
|
||||
Phase 1: Foundation — core archive, entry model, holographic linker,
|
||||
ingestion pipeline, and CLI.
|
||||
|
||||
Builds on MemPalace vector memory to create interconnected meaning:
|
||||
entries auto-reference related entries via semantic similarity,
|
||||
forming a living archive that surfaces relevant context autonomously.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
from nexus.mnemosyne.linker import HolographicLinker
|
||||
from nexus.mnemosyne.ingest import ingest_from_mempalace, ingest_event
|
||||
|
||||
__all__ = [
|
||||
"MnemosyneArchive",
|
||||
"ArchiveEntry",
|
||||
"HolographicLinker",
|
||||
"ingest_from_mempalace",
|
||||
"ingest_event",
|
||||
]
|
||||
114
nexus/mnemosyne/archive.py
Normal file
114
nexus/mnemosyne/archive.py
Normal file
@@ -0,0 +1,114 @@
|
||||
"""MnemosyneArchive — core archive class.
|
||||
|
||||
The living holographic archive. Stores entries, maintains links,
|
||||
and provides query interfaces for retrieving connected knowledge.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
from nexus.mnemosyne.linker import HolographicLinker
|
||||
|
||||
|
||||
class MnemosyneArchive:
|
||||
"""The holographic archive — stores and links entries.
|
||||
|
||||
Phase 1 uses JSON file storage. Phase 2 will integrate with
|
||||
MemPalace (ChromaDB) for vector-semantic search.
|
||||
"""
|
||||
|
||||
def __init__(self, archive_path: Optional[Path] = None):
|
||||
self.path = archive_path or Path.home() / ".hermes" / "mnemosyne" / "archive.json"
|
||||
self.path.parent.mkdir(parents=True, exist_ok=True)
|
||||
self.linker = HolographicLinker()
|
||||
self._entries: dict[str, ArchiveEntry] = {}
|
||||
self._load()
|
||||
|
||||
def _load(self):
|
||||
if self.path.exists():
|
||||
try:
|
||||
with open(self.path) as f:
|
||||
data = json.load(f)
|
||||
for entry_data in data.get("entries", []):
|
||||
entry = ArchiveEntry.from_dict(entry_data)
|
||||
self._entries[entry.id] = entry
|
||||
except (json.JSONDecodeError, KeyError):
|
||||
pass # Start fresh on corrupt data
|
||||
|
||||
def _save(self):
|
||||
data = {
|
||||
"entries": [e.to_dict() for e in self._entries.values()],
|
||||
"count": len(self._entries),
|
||||
}
|
||||
with open(self.path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def add(self, entry: ArchiveEntry, auto_link: bool = True) -> ArchiveEntry:
|
||||
"""Add an entry to the archive. Auto-links to related entries."""
|
||||
self._entries[entry.id] = entry
|
||||
if auto_link:
|
||||
self.linker.apply_links(entry, list(self._entries.values()))
|
||||
self._save()
|
||||
return entry
|
||||
|
||||
def get(self, entry_id: str) -> Optional[ArchiveEntry]:
|
||||
return self._entries.get(entry_id)
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> list[ArchiveEntry]:
|
||||
"""Simple keyword search across titles and content."""
|
||||
query_tokens = set(query.lower().split())
|
||||
scored = []
|
||||
for entry in self._entries.values():
|
||||
text = f"{entry.title} {entry.content} {' '.join(entry.topics)}".lower()
|
||||
hits = sum(1 for t in query_tokens if t in text)
|
||||
if hits > 0:
|
||||
scored.append((hits, entry))
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [e for _, e in scored[:limit]]
|
||||
|
||||
def get_linked(self, entry_id: str, depth: int = 1) -> list[ArchiveEntry]:
|
||||
"""Get entries linked to a given entry, up to specified depth."""
|
||||
visited = set()
|
||||
frontier = {entry_id}
|
||||
result = []
|
||||
for _ in range(depth):
|
||||
next_frontier = set()
|
||||
for eid in frontier:
|
||||
if eid in visited:
|
||||
continue
|
||||
visited.add(eid)
|
||||
entry = self._entries.get(eid)
|
||||
if entry:
|
||||
for linked_id in entry.links:
|
||||
if linked_id not in visited:
|
||||
linked = self._entries.get(linked_id)
|
||||
if linked:
|
||||
result.append(linked)
|
||||
next_frontier.add(linked_id)
|
||||
frontier = next_frontier
|
||||
return result
|
||||
|
||||
def by_topic(self, topic: str) -> list[ArchiveEntry]:
|
||||
"""Get all entries tagged with a topic."""
|
||||
topic_lower = topic.lower()
|
||||
return [e for e in self._entries.values() if topic_lower in [t.lower() for t in e.topics]]
|
||||
|
||||
@property
|
||||
def count(self) -> int:
|
||||
return len(self._entries)
|
||||
|
||||
def stats(self) -> dict:
|
||||
total_links = sum(len(e.links) for e in self._entries.values())
|
||||
topics = set()
|
||||
for e in self._entries.values():
|
||||
topics.update(e.topics)
|
||||
return {
|
||||
"entries": len(self._entries),
|
||||
"total_links": total_links,
|
||||
"unique_topics": len(topics),
|
||||
"topics": sorted(topics),
|
||||
}
|
||||
90
nexus/mnemosyne/cli.py
Normal file
90
nexus/mnemosyne/cli.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""CLI interface for Mnemosyne.
|
||||
|
||||
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import sys
|
||||
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
from nexus.mnemosyne.ingest import ingest_event
|
||||
|
||||
|
||||
def cmd_stats(args):
|
||||
archive = MnemosyneArchive()
|
||||
stats = archive.stats()
|
||||
print(json.dumps(stats, indent=2))
|
||||
|
||||
|
||||
def cmd_search(args):
|
||||
archive = MnemosyneArchive()
|
||||
results = archive.search(args.query, limit=args.limit)
|
||||
if not results:
|
||||
print("No results found.")
|
||||
return
|
||||
for entry in results:
|
||||
linked = len(entry.links)
|
||||
print(f"[{entry.id[:8]}] {entry.title}")
|
||||
print(f" Source: {entry.source} | Topics: {', '.join(entry.topics)} | Links: {linked}")
|
||||
print(f" {entry.content[:120]}...")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_ingest(args):
|
||||
archive = MnemosyneArchive()
|
||||
entry = ingest_event(
|
||||
archive,
|
||||
title=args.title,
|
||||
content=args.content,
|
||||
topics=args.topics.split(",") if args.topics else [],
|
||||
)
|
||||
print(f"Ingested: [{entry.id[:8]}] {entry.title} ({len(entry.links)} links)")
|
||||
|
||||
|
||||
def cmd_link(args):
|
||||
archive = MnemosyneArchive()
|
||||
entry = archive.get(args.entry_id)
|
||||
if not entry:
|
||||
print(f"Entry not found: {args.entry_id}")
|
||||
sys.exit(1)
|
||||
linked = archive.get_linked(entry.id, depth=args.depth)
|
||||
if not linked:
|
||||
print("No linked entries found.")
|
||||
return
|
||||
for e in linked:
|
||||
print(f" [{e.id[:8]}] {e.title} (source: {e.source})")
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
|
||||
sub = parser.add_subparsers(dest="command")
|
||||
|
||||
sub.add_parser("stats", help="Show archive statistics")
|
||||
|
||||
s = sub.add_parser("search", help="Search the archive")
|
||||
s.add_argument("query", help="Search query")
|
||||
s.add_argument("-n", "--limit", type=int, default=10)
|
||||
|
||||
i = sub.add_parser("ingest", help="Ingest a new entry")
|
||||
i.add_argument("--title", required=True)
|
||||
i.add_argument("--content", required=True)
|
||||
i.add_argument("--topics", default="", help="Comma-separated topics")
|
||||
|
||||
l = sub.add_parser("link", help="Show linked entries")
|
||||
l.add_argument("entry_id", help="Entry ID (or prefix)")
|
||||
l.add_argument("-d", "--depth", type=int, default=1)
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
{"stats": cmd_stats, "search": cmd_search, "ingest": cmd_ingest, "link": cmd_link}[args.command](args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
44
nexus/mnemosyne/entry.py
Normal file
44
nexus/mnemosyne/entry.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""Archive entry model for Mnemosyne.
|
||||
|
||||
Each entry is a node in the holographic graph — a piece of meaning
|
||||
with metadata, content, and links to related entries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
import uuid
|
||||
|
||||
|
||||
@dataclass
|
||||
class ArchiveEntry:
|
||||
"""A single node in the Mnemosyne holographic archive."""
|
||||
|
||||
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
||||
title: str = ""
|
||||
content: str = ""
|
||||
source: str = "" # "mempalace", "event", "manual", etc.
|
||||
source_ref: Optional[str] = None # original MemPalace ID, event URI, etc.
|
||||
topics: list[str] = field(default_factory=list)
|
||||
metadata: dict = field(default_factory=dict)
|
||||
created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||
links: list[str] = field(default_factory=list) # IDs of related entries
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"id": self.id,
|
||||
"title": self.title,
|
||||
"content": self.content,
|
||||
"source": self.source,
|
||||
"source_ref": self.source_ref,
|
||||
"topics": self.topics,
|
||||
"metadata": self.metadata,
|
||||
"created_at": self.created_at,
|
||||
"links": self.links,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: dict) -> ArchiveEntry:
|
||||
return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
||||
62
nexus/mnemosyne/ingest.py
Normal file
62
nexus/mnemosyne/ingest.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""Ingestion pipeline — feeds data into the archive.
|
||||
|
||||
Supports ingesting from MemPalace, raw events, and manual entries.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
|
||||
|
||||
def ingest_from_mempalace(
|
||||
archive: MnemosyneArchive,
|
||||
mempalace_entries: list[dict],
|
||||
) -> int:
|
||||
"""Ingest entries from a MemPalace export.
|
||||
|
||||
Each dict should have at least: content, metadata (optional).
|
||||
Returns count of new entries added.
|
||||
"""
|
||||
added = 0
|
||||
for mp_entry in mempalace_entries:
|
||||
content = mp_entry.get("content", "")
|
||||
metadata = mp_entry.get("metadata", {})
|
||||
source_ref = mp_entry.get("id", "")
|
||||
|
||||
# Skip if already ingested
|
||||
if any(e.source_ref == source_ref for e in archive._entries.values()):
|
||||
continue
|
||||
|
||||
entry = ArchiveEntry(
|
||||
title=metadata.get("title", content[:80]),
|
||||
content=content,
|
||||
source="mempalace",
|
||||
source_ref=source_ref,
|
||||
topics=metadata.get("topics", []),
|
||||
metadata=metadata,
|
||||
)
|
||||
archive.add(entry)
|
||||
added += 1
|
||||
return added
|
||||
|
||||
|
||||
def ingest_event(
|
||||
archive: MnemosyneArchive,
|
||||
title: str,
|
||||
content: str,
|
||||
topics: Optional[list[str]] = None,
|
||||
source: str = "event",
|
||||
metadata: Optional[dict] = None,
|
||||
) -> ArchiveEntry:
|
||||
"""Ingest a single event into the archive."""
|
||||
entry = ArchiveEntry(
|
||||
title=title,
|
||||
content=content,
|
||||
source=source,
|
||||
topics=topics or [],
|
||||
metadata=metadata or {},
|
||||
)
|
||||
return archive.add(entry)
|
||||
73
nexus/mnemosyne/linker.py
Normal file
73
nexus/mnemosyne/linker.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""Holographic link engine.
|
||||
|
||||
Computes semantic similarity between archive entries and creates
|
||||
bidirectional links, forming the holographic graph structure.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
|
||||
|
||||
class HolographicLinker:
|
||||
"""Links archive entries via semantic similarity.
|
||||
|
||||
Phase 1 uses simple keyword overlap as the similarity metric.
|
||||
Phase 2 will integrate ChromaDB embeddings from MemPalace.
|
||||
"""
|
||||
|
||||
def __init__(self, similarity_threshold: float = 0.15):
|
||||
self.threshold = similarity_threshold
|
||||
|
||||
def compute_similarity(self, a: ArchiveEntry, b: ArchiveEntry) -> float:
|
||||
"""Compute similarity score between two entries.
|
||||
|
||||
Returns float in [0, 1]. Phase 1: Jaccard similarity on
|
||||
combined title+content tokens. Phase 2: cosine similarity
|
||||
on ChromaDB embeddings.
|
||||
"""
|
||||
tokens_a = self._tokenize(f"{a.title} {a.content}")
|
||||
tokens_b = self._tokenize(f"{b.title} {b.content}")
|
||||
if not tokens_a or not tokens_b:
|
||||
return 0.0
|
||||
intersection = tokens_a & tokens_b
|
||||
union = tokens_a | tokens_b
|
||||
return len(intersection) / len(union)
|
||||
|
||||
def find_links(self, entry: ArchiveEntry, candidates: list[ArchiveEntry]) -> list[tuple[str, float]]:
|
||||
"""Find entries worth linking to.
|
||||
|
||||
Returns list of (entry_id, similarity_score) tuples above threshold.
|
||||
"""
|
||||
results = []
|
||||
for candidate in candidates:
|
||||
if candidate.id == entry.id:
|
||||
continue
|
||||
score = self.compute_similarity(entry, candidate)
|
||||
if score >= self.threshold:
|
||||
results.append((candidate.id, score))
|
||||
results.sort(key=lambda x: x[1], reverse=True)
|
||||
return results
|
||||
|
||||
def apply_links(self, entry: ArchiveEntry, candidates: list[ArchiveEntry]) -> int:
|
||||
"""Auto-link an entry to related entries. Returns count of new links."""
|
||||
matches = self.find_links(entry, candidates)
|
||||
new_links = 0
|
||||
for eid, score in matches:
|
||||
if eid not in entry.links:
|
||||
entry.links.append(eid)
|
||||
new_links += 1
|
||||
# Bidirectional
|
||||
for c in candidates:
|
||||
if c.id == eid and entry.id not in c.links:
|
||||
c.links.append(entry.id)
|
||||
return new_links
|
||||
|
||||
@staticmethod
|
||||
def _tokenize(text: str) -> set[str]:
|
||||
"""Simple whitespace + punctuation tokenizer."""
|
||||
import re
|
||||
tokens = set(re.findall(r"\w+", text.lower()))
|
||||
# Remove very short tokens
|
||||
return {t for t in tokens if len(t) > 2}
|
||||
0
nexus/mnemosyne/tests/__init__.py
Normal file
0
nexus/mnemosyne/tests/__init__.py
Normal file
73
nexus/mnemosyne/tests/test_archive.py
Normal file
73
nexus/mnemosyne/tests/test_archive.py
Normal file
@@ -0,0 +1,73 @@
|
||||
"""Tests for Mnemosyne archive core."""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
from nexus.mnemosyne.linker import HolographicLinker
|
||||
from nexus.mnemosyne.archive import MnemosyneArchive
|
||||
from nexus.mnemosyne.ingest import ingest_event, ingest_from_mempalace
|
||||
|
||||
|
||||
def test_entry_roundtrip():
|
||||
e = ArchiveEntry(title="Test", content="Hello world", topics=["test"])
|
||||
d = e.to_dict()
|
||||
e2 = ArchiveEntry.from_dict(d)
|
||||
assert e2.id == e.id
|
||||
assert e2.title == "Test"
|
||||
|
||||
|
||||
def test_linker_similarity():
|
||||
linker = HolographicLinker()
|
||||
a = ArchiveEntry(title="Python coding", content="Writing Python scripts for automation")
|
||||
b = ArchiveEntry(title="Python scripting", content="Automating tasks with Python scripts")
|
||||
c = ArchiveEntry(title="Cooking recipes", content="How to make pasta carbonara")
|
||||
assert linker.compute_similarity(a, b) > linker.compute_similarity(a, c)
|
||||
|
||||
|
||||
def test_archive_add_and_search():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
ingest_event(archive, title="First entry", content="Hello archive", topics=["test"])
|
||||
ingest_event(archive, title="Second entry", content="Another record", topics=["test", "demo"])
|
||||
assert archive.count == 2
|
||||
results = archive.search("hello")
|
||||
assert len(results) == 1
|
||||
assert results[0].title == "First entry"
|
||||
|
||||
|
||||
def test_archive_auto_linking():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python")
|
||||
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python")
|
||||
# Both should be linked due to shared tokens
|
||||
assert len(e1.links) > 0 or len(e2.links) > 0
|
||||
|
||||
|
||||
def test_ingest_from_mempalace():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
mp_entries = [
|
||||
{"id": "mp-1", "content": "Test memory content", "metadata": {"title": "Test", "topics": ["demo"]}},
|
||||
{"id": "mp-2", "content": "Another memory", "metadata": {"title": "Memory 2"}},
|
||||
]
|
||||
count = ingest_from_mempalace(archive, mp_entries)
|
||||
assert count == 2
|
||||
assert archive.count == 2
|
||||
|
||||
|
||||
def test_archive_persistence():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive1 = MnemosyneArchive(archive_path=path)
|
||||
ingest_event(archive1, title="Persistent", content="Should survive reload")
|
||||
|
||||
archive2 = MnemosyneArchive(archive_path=path)
|
||||
assert archive2.count == 1
|
||||
results = archive2.search("persistent")
|
||||
assert len(results) == 1
|
||||
Reference in New Issue
Block a user