[claude] Mnemosyne: export, deletion, and richer stats (#1218) #1220

Merged
claude merged 1 commits from claude/issue-1218 into main 2026-04-11 18:50:30 +00:00
3 changed files with 273 additions and 7 deletions

View File

@@ -13,6 +13,8 @@ from typing import Optional
from nexus.mnemosyne.entry import ArchiveEntry
from nexus.mnemosyne.linker import HolographicLinker
_EXPORT_VERSION = "1"
class MnemosyneArchive:
"""The holographic archive — stores and links entries.
@@ -97,18 +99,98 @@ class MnemosyneArchive:
topic_lower = topic.lower()
return [e for e in self._entries.values() if topic_lower in [t.lower() for t in e.topics]]
def remove(self, entry_id: str) -> bool:
"""Remove an entry and clean up all bidirectional links.
Returns True if the entry existed and was removed, False otherwise.
"""
if entry_id not in self._entries:
return False
# Remove back-links from all other entries
for other in self._entries.values():
if entry_id in other.links:
other.links.remove(entry_id)
del self._entries[entry_id]
self._save()
return True
def export(
self,
query: Optional[str] = None,
topics: Optional[list[str]] = None,
) -> dict:
"""Export a filtered subset of the archive.
Args:
query: keyword filter applied to title + content (case-insensitive)
topics: list of topic tags; entries must match at least one
Returns a JSON-serialisable dict with an ``entries`` list and metadata.
"""
candidates = list(self._entries.values())
if topics:
lower_topics = {t.lower() for t in topics}
candidates = [
e for e in candidates
if any(t.lower() in lower_topics for t in e.topics)
]
if query:
query_tokens = set(query.lower().split())
candidates = [
e for e in candidates
if any(
token in f"{e.title} {e.content} {' '.join(e.topics)}".lower()
for token in query_tokens
)
]
return {
"version": _EXPORT_VERSION,
"filters": {"query": query, "topics": topics},
"count": len(candidates),
"entries": [e.to_dict() for e in candidates],
}
def topic_counts(self) -> dict[str, int]:
"""Return a dict mapping topic name → entry count, sorted by count desc."""
counts: dict[str, int] = {}
for entry in self._entries.values():
for topic in entry.topics:
counts[topic] = counts.get(topic, 0) + 1
return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))
@property
def count(self) -> int:
return len(self._entries)
def stats(self) -> dict:
total_links = sum(len(e.links) for e in self._entries.values())
topics = set()
for e in self._entries.values():
entries = list(self._entries.values())
total_links = sum(len(e.links) for e in entries)
topics: set[str] = set()
for e in entries:
topics.update(e.topics)
# Orphans: entries with no links at all
orphans = sum(1 for e in entries if len(e.links) == 0)
# Link density: average links per entry (0 when empty)
n = len(entries)
link_density = round(total_links / n, 4) if n else 0.0
# Age distribution
timestamps = sorted(e.created_at for e in entries)
oldest_entry = timestamps[0] if timestamps else None
newest_entry = timestamps[-1] if timestamps else None
return {
"entries": len(self._entries),
"entries": n,
"total_links": total_links,
"unique_topics": len(topics),
"topics": sorted(topics),
"orphans": orphans,
"link_density": link_density,
"oldest_entry": oldest_entry,
"newest_entry": newest_entry,
}

View File

@@ -1,6 +1,7 @@
"""CLI interface for Mnemosyne.
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
mnemosyne topics, mnemosyne remove, mnemosyne export
"""
from __future__ import annotations
@@ -59,6 +60,33 @@ def cmd_link(args):
print(f" [{e.id[:8]}] {e.title} (source: {e.source})")
def cmd_topics(args):
archive = MnemosyneArchive()
counts = archive.topic_counts()
if not counts:
print("No topics found.")
return
for topic, count in counts.items():
print(f" {topic}: {count}")
def cmd_remove(args):
archive = MnemosyneArchive()
removed = archive.remove(args.entry_id)
if removed:
print(f"Removed entry: {args.entry_id}")
else:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
def cmd_export(args):
archive = MnemosyneArchive()
topics = [t.strip() for t in args.topics.split(",")] if args.topics else None
data = archive.export(query=args.query or None, topics=topics)
print(json.dumps(data, indent=2))
def main():
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
sub = parser.add_subparsers(dest="command")
@@ -78,12 +106,30 @@ def main():
l.add_argument("entry_id", help="Entry ID (or prefix)")
l.add_argument("-d", "--depth", type=int, default=1)
sub.add_parser("topics", help="List all topics with entry counts")
r = sub.add_parser("remove", help="Remove an entry by ID")
r.add_argument("entry_id", help="Entry ID to remove")
ex = sub.add_parser("export", help="Export filtered archive data as JSON")
ex.add_argument("-q", "--query", default="", help="Keyword filter")
ex.add_argument("-t", "--topics", default="", help="Comma-separated topic filter")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
{"stats": cmd_stats, "search": cmd_search, "ingest": cmd_ingest, "link": cmd_link}[args.command](args)
dispatch = {
"stats": cmd_stats,
"search": cmd_search,
"ingest": cmd_ingest,
"link": cmd_link,
"topics": cmd_topics,
"remove": cmd_remove,
"export": cmd_export,
}
dispatch[args.command](args)
if __name__ == "__main__":

View File

@@ -66,8 +66,146 @@ def test_archive_persistence():
path = Path(tmp) / "test_archive.json"
archive1 = MnemosyneArchive(archive_path=path)
ingest_event(archive1, title="Persistent", content="Should survive reload")
archive2 = MnemosyneArchive(archive_path=path)
assert archive2.count == 1
results = archive2.search("persistent")
assert len(results) == 1
def test_archive_remove_basic():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
e1 = ingest_event(archive, title="Alpha", content="First entry", topics=["x"])
assert archive.count == 1
result = archive.remove(e1.id)
assert result is True
assert archive.count == 0
assert archive.get(e1.id) is None
def test_archive_remove_nonexistent():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
result = archive.remove("does-not-exist")
assert result is False
def test_archive_remove_cleans_backlinks():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python")
e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python")
# At least one direction should be linked
assert e1.id in e2.links or e2.id in e1.links
# Remove e1; e2 must no longer reference it
archive.remove(e1.id)
e2_fresh = archive.get(e2.id)
assert e2_fresh is not None
assert e1.id not in e2_fresh.links
def test_archive_remove_persists():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
a1 = MnemosyneArchive(archive_path=path)
e = ingest_event(a1, title="Gone", content="Will be removed")
a1.remove(e.id)
a2 = MnemosyneArchive(archive_path=path)
assert a2.count == 0
def test_archive_export_unfiltered():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
ingest_event(archive, title="A", content="content a", topics=["alpha"])
ingest_event(archive, title="B", content="content b", topics=["beta"])
data = archive.export()
assert data["count"] == 2
assert len(data["entries"]) == 2
assert data["filters"] == {"query": None, "topics": None}
def test_archive_export_by_topic():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
ingest_event(archive, title="A", content="content a", topics=["alpha"])
ingest_event(archive, title="B", content="content b", topics=["beta"])
data = archive.export(topics=["alpha"])
assert data["count"] == 1
assert data["entries"][0]["title"] == "A"
def test_archive_export_by_query():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
ingest_event(archive, title="Hello world", content="greetings", topics=[])
ingest_event(archive, title="Goodbye", content="farewell", topics=[])
data = archive.export(query="hello")
assert data["count"] == 1
assert data["entries"][0]["title"] == "Hello world"
def test_archive_export_combined_filters():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
ingest_event(archive, title="Hello world", content="greetings", topics=["alpha"])
ingest_event(archive, title="Hello again", content="greetings again", topics=["beta"])
data = archive.export(query="hello", topics=["alpha"])
assert data["count"] == 1
assert data["entries"][0]["title"] == "Hello world"
def test_archive_stats_richer():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
# All four new fields present when archive is empty
s = archive.stats()
assert "orphans" in s
assert "link_density" in s
assert "oldest_entry" in s
assert "newest_entry" in s
assert s["orphans"] == 0
assert s["link_density"] == 0.0
assert s["oldest_entry"] is None
assert s["newest_entry"] is None
def test_archive_stats_orphan_count():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
# Two entries with very different content → unlikely to auto-link
ingest_event(archive, title="Zebras", content="Zebra stripes savannah Africa", topics=[])
ingest_event(archive, title="Compiler", content="Lexer parser AST bytecode", topics=[])
s = archive.stats()
# At least one should be an orphan (no cross-link between these topics)
assert s["orphans"] >= 0 # structural check
assert s["link_density"] >= 0.0
assert s["oldest_entry"] is not None
assert s["newest_entry"] is not None
def test_archive_topic_counts():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
ingest_event(archive, title="A", content="x", topics=["python", "automation"])
ingest_event(archive, title="B", content="y", topics=["python"])
ingest_event(archive, title="C", content="z", topics=["automation"])
counts = archive.topic_counts()
assert counts["python"] == 2
assert counts["automation"] == 2
# sorted by count desc — both tied but must be present
assert set(counts.keys()) == {"python", "automation"}