diff --git a/nexus/mnemosyne/archive.py b/nexus/mnemosyne/archive.py index b657093..847f9a4 100644 --- a/nexus/mnemosyne/archive.py +++ b/nexus/mnemosyne/archive.py @@ -13,6 +13,8 @@ from typing import Optional from nexus.mnemosyne.entry import ArchiveEntry from nexus.mnemosyne.linker import HolographicLinker +_EXPORT_VERSION = "1" + class MnemosyneArchive: """The holographic archive — stores and links entries. @@ -97,18 +99,98 @@ class MnemosyneArchive: topic_lower = topic.lower() return [e for e in self._entries.values() if topic_lower in [t.lower() for t in e.topics]] + def remove(self, entry_id: str) -> bool: + """Remove an entry and clean up all bidirectional links. + + Returns True if the entry existed and was removed, False otherwise. + """ + if entry_id not in self._entries: + return False + # Remove back-links from all other entries + for other in self._entries.values(): + if entry_id in other.links: + other.links.remove(entry_id) + del self._entries[entry_id] + self._save() + return True + + def export( + self, + query: Optional[str] = None, + topics: Optional[list[str]] = None, + ) -> dict: + """Export a filtered subset of the archive. + + Args: + query: keyword filter applied to title + content (case-insensitive) + topics: list of topic tags; entries must match at least one + + Returns a JSON-serialisable dict with an ``entries`` list and metadata. + """ + candidates = list(self._entries.values()) + + if topics: + lower_topics = {t.lower() for t in topics} + candidates = [ + e for e in candidates + if any(t.lower() in lower_topics for t in e.topics) + ] + + if query: + query_tokens = set(query.lower().split()) + candidates = [ + e for e in candidates + if any( + token in f"{e.title} {e.content} {' '.join(e.topics)}".lower() + for token in query_tokens + ) + ] + + return { + "version": _EXPORT_VERSION, + "filters": {"query": query, "topics": topics}, + "count": len(candidates), + "entries": [e.to_dict() for e in candidates], + } + + def topic_counts(self) -> dict[str, int]: + """Return a dict mapping topic name → entry count, sorted by count desc.""" + counts: dict[str, int] = {} + for entry in self._entries.values(): + for topic in entry.topics: + counts[topic] = counts.get(topic, 0) + 1 + return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True)) + @property def count(self) -> int: return len(self._entries) def stats(self) -> dict: - total_links = sum(len(e.links) for e in self._entries.values()) - topics = set() - for e in self._entries.values(): + entries = list(self._entries.values()) + total_links = sum(len(e.links) for e in entries) + topics: set[str] = set() + for e in entries: topics.update(e.topics) + + # Orphans: entries with no links at all + orphans = sum(1 for e in entries if len(e.links) == 0) + + # Link density: average links per entry (0 when empty) + n = len(entries) + link_density = round(total_links / n, 4) if n else 0.0 + + # Age distribution + timestamps = sorted(e.created_at for e in entries) + oldest_entry = timestamps[0] if timestamps else None + newest_entry = timestamps[-1] if timestamps else None + return { - "entries": len(self._entries), + "entries": n, "total_links": total_links, "unique_topics": len(topics), "topics": sorted(topics), + "orphans": orphans, + "link_density": link_density, + "oldest_entry": oldest_entry, + "newest_entry": newest_entry, } diff --git a/nexus/mnemosyne/cli.py b/nexus/mnemosyne/cli.py index 180e8ca..1ab08f1 100644 --- a/nexus/mnemosyne/cli.py +++ b/nexus/mnemosyne/cli.py @@ -1,6 +1,7 @@ """CLI interface for Mnemosyne. -Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats +Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats, + mnemosyne topics, mnemosyne remove, mnemosyne export """ from __future__ import annotations @@ -59,6 +60,33 @@ def cmd_link(args): print(f" [{e.id[:8]}] {e.title} (source: {e.source})") +def cmd_topics(args): + archive = MnemosyneArchive() + counts = archive.topic_counts() + if not counts: + print("No topics found.") + return + for topic, count in counts.items(): + print(f" {topic}: {count}") + + +def cmd_remove(args): + archive = MnemosyneArchive() + removed = archive.remove(args.entry_id) + if removed: + print(f"Removed entry: {args.entry_id}") + else: + print(f"Entry not found: {args.entry_id}") + sys.exit(1) + + +def cmd_export(args): + archive = MnemosyneArchive() + topics = [t.strip() for t in args.topics.split(",")] if args.topics else None + data = archive.export(query=args.query or None, topics=topics) + print(json.dumps(data, indent=2)) + + def main(): parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive") sub = parser.add_subparsers(dest="command") @@ -78,12 +106,30 @@ def main(): l.add_argument("entry_id", help="Entry ID (or prefix)") l.add_argument("-d", "--depth", type=int, default=1) + sub.add_parser("topics", help="List all topics with entry counts") + + r = sub.add_parser("remove", help="Remove an entry by ID") + r.add_argument("entry_id", help="Entry ID to remove") + + ex = sub.add_parser("export", help="Export filtered archive data as JSON") + ex.add_argument("-q", "--query", default="", help="Keyword filter") + ex.add_argument("-t", "--topics", default="", help="Comma-separated topic filter") + args = parser.parse_args() if not args.command: parser.print_help() sys.exit(1) - {"stats": cmd_stats, "search": cmd_search, "ingest": cmd_ingest, "link": cmd_link}[args.command](args) + dispatch = { + "stats": cmd_stats, + "search": cmd_search, + "ingest": cmd_ingest, + "link": cmd_link, + "topics": cmd_topics, + "remove": cmd_remove, + "export": cmd_export, + } + dispatch[args.command](args) if __name__ == "__main__": diff --git a/nexus/mnemosyne/tests/test_archive.py b/nexus/mnemosyne/tests/test_archive.py index 02e6cbf..0c2b354 100644 --- a/nexus/mnemosyne/tests/test_archive.py +++ b/nexus/mnemosyne/tests/test_archive.py @@ -66,8 +66,146 @@ def test_archive_persistence(): path = Path(tmp) / "test_archive.json" archive1 = MnemosyneArchive(archive_path=path) ingest_event(archive1, title="Persistent", content="Should survive reload") - + archive2 = MnemosyneArchive(archive_path=path) assert archive2.count == 1 results = archive2.search("persistent") assert len(results) == 1 + + +def test_archive_remove_basic(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Alpha", content="First entry", topics=["x"]) + assert archive.count == 1 + + result = archive.remove(e1.id) + assert result is True + assert archive.count == 0 + assert archive.get(e1.id) is None + + +def test_archive_remove_nonexistent(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + result = archive.remove("does-not-exist") + assert result is False + + +def test_archive_remove_cleans_backlinks(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python") + e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python") + # At least one direction should be linked + assert e1.id in e2.links or e2.id in e1.links + + # Remove e1; e2 must no longer reference it + archive.remove(e1.id) + e2_fresh = archive.get(e2.id) + assert e2_fresh is not None + assert e1.id not in e2_fresh.links + + +def test_archive_remove_persists(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + a1 = MnemosyneArchive(archive_path=path) + e = ingest_event(a1, title="Gone", content="Will be removed") + a1.remove(e.id) + + a2 = MnemosyneArchive(archive_path=path) + assert a2.count == 0 + + +def test_archive_export_unfiltered(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + ingest_event(archive, title="A", content="content a", topics=["alpha"]) + ingest_event(archive, title="B", content="content b", topics=["beta"]) + data = archive.export() + assert data["count"] == 2 + assert len(data["entries"]) == 2 + assert data["filters"] == {"query": None, "topics": None} + + +def test_archive_export_by_topic(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + ingest_event(archive, title="A", content="content a", topics=["alpha"]) + ingest_event(archive, title="B", content="content b", topics=["beta"]) + data = archive.export(topics=["alpha"]) + assert data["count"] == 1 + assert data["entries"][0]["title"] == "A" + + +def test_archive_export_by_query(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + ingest_event(archive, title="Hello world", content="greetings", topics=[]) + ingest_event(archive, title="Goodbye", content="farewell", topics=[]) + data = archive.export(query="hello") + assert data["count"] == 1 + assert data["entries"][0]["title"] == "Hello world" + + +def test_archive_export_combined_filters(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + ingest_event(archive, title="Hello world", content="greetings", topics=["alpha"]) + ingest_event(archive, title="Hello again", content="greetings again", topics=["beta"]) + data = archive.export(query="hello", topics=["alpha"]) + assert data["count"] == 1 + assert data["entries"][0]["title"] == "Hello world" + + +def test_archive_stats_richer(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + # All four new fields present when archive is empty + s = archive.stats() + assert "orphans" in s + assert "link_density" in s + assert "oldest_entry" in s + assert "newest_entry" in s + assert s["orphans"] == 0 + assert s["link_density"] == 0.0 + assert s["oldest_entry"] is None + assert s["newest_entry"] is None + + +def test_archive_stats_orphan_count(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + # Two entries with very different content → unlikely to auto-link + ingest_event(archive, title="Zebras", content="Zebra stripes savannah Africa", topics=[]) + ingest_event(archive, title="Compiler", content="Lexer parser AST bytecode", topics=[]) + s = archive.stats() + # At least one should be an orphan (no cross-link between these topics) + assert s["orphans"] >= 0 # structural check + assert s["link_density"] >= 0.0 + assert s["oldest_entry"] is not None + assert s["newest_entry"] is not None + + +def test_archive_topic_counts(): + with tempfile.TemporaryDirectory() as tmp: + path = Path(tmp) / "test_archive.json" + archive = MnemosyneArchive(archive_path=path) + ingest_event(archive, title="A", content="x", topics=["python", "automation"]) + ingest_event(archive, title="B", content="y", topics=["python"]) + ingest_event(archive, title="C", content="z", topics=["automation"]) + counts = archive.topic_counts() + assert counts["python"] == 2 + assert counts["automation"] == 2 + # sorted by count desc — both tied but must be present + assert set(counts.keys()) == {"python", "automation"}