From c0bb6bf918aeb4eaea996730eaf6d91d3fdec196 Mon Sep 17 00:00:00 2001 From: Alexander Whitestone Date: Sat, 11 Apr 2026 18:44:17 +0000 Subject: [PATCH] feat: add remove(), export(), list_topics(), richer stats() Closes #1218 - remove() cleans bidirectional links - export() filters by topic/source/date - stats() includes orphans, link_density, age range - list_topics() returns topic->count mapping --- nexus/mnemosyne/archive.py | 87 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 84 insertions(+), 3 deletions(-) diff --git a/nexus/mnemosyne/archive.py b/nexus/mnemosyne/archive.py index b657093..093a6c4 100644 --- a/nexus/mnemosyne/archive.py +++ b/nexus/mnemosyne/archive.py @@ -7,6 +7,7 @@ and provides query interfaces for retrieving connected knowledge. from __future__ import annotations import json +from datetime import datetime, timezone from pathlib import Path from typing import Optional @@ -58,6 +59,21 @@ class MnemosyneArchive: def get(self, entry_id: str) -> Optional[ArchiveEntry]: return self._entries.get(entry_id) + def remove(self, entry_id: str) -> bool: + """Remove an entry and clean all bidirectional links to it. + + Returns True if entry was found and removed, False otherwise. + """ + if entry_id not in self._entries: + return False + # Remove bidirectional links + for other in self._entries.values(): + if entry_id in other.links: + other.links.remove(entry_id) + del self._entries[entry_id] + self._save() + return True + def search(self, query: str, limit: int = 10) -> list[ArchiveEntry]: """Simple keyword search across titles and content.""" query_tokens = set(query.lower().split()) @@ -70,6 +86,44 @@ class MnemosyneArchive: scored.sort(key=lambda x: x[0], reverse=True) return [e for _, e in scored[:limit]] + def export( + self, + topics: Optional[list[str]] = None, + source: Optional[str] = None, + since: Optional[str] = None, + ) -> dict: + """Export archive data, optionally filtered. + + Args: + topics: Only include entries with at least one matching topic. + source: Only include entries from this source. + since: ISO date string — only include entries created after this. + + Returns: + Dict with entries, count, and export metadata. + """ + entries = list(self._entries.values()) + + if topics: + topics_lower = {t.lower() for t in topics} + entries = [e for e in entries if topics_lower & {t.lower() for t in e.topics}] + + if source: + entries = [e for e in entries if e.source == source] + + if since: + entries = [e for e in entries if e.created_at >= since] + + return { + "entries": [e.to_dict() for e in entries], + "count": len(entries), + "filters": { + "topics": topics, + "source": source, + "since": since, + }, + } + def get_linked(self, entry_id: str, depth: int = 1) -> list[ArchiveEntry]: """Get entries linked to a given entry, up to specified depth.""" visited = set() @@ -97,18 +151,45 @@ class MnemosyneArchive: topic_lower = topic.lower() return [e for e in self._entries.values() if topic_lower in [t.lower() for t in e.topics]] + def list_topics(self) -> dict[str, int]: + """Return dict mapping each topic to its entry count.""" + topic_counts: dict[str, int] = {} + for entry in self._entries.values(): + for topic in entry.topics: + topic_counts[topic] = topic_counts.get(topic, 0) + 1 + return dict(sorted(topic_counts.items(), key=lambda x: x[1], reverse=True)) + @property def count(self) -> int: return len(self._entries) def stats(self) -> dict: - total_links = sum(len(e.links) for e in self._entries.values()) + entries = list(self._entries.values()) + total_links = sum(len(e.links) for e in entries) topics = set() - for e in self._entries.values(): + for e in entries: topics.update(e.topics) + + # Orphans: entries with no links + orphans = sum(1 for e in entries if not e.links) + + # Link density: ratio of actual links to possible links + n = len(entries) + max_links = n * (n - 1) if n > 1 else 0 + link_density = total_links / max_links if max_links > 0 else 0.0 + + # Entry age range + timestamps = [e.created_at for e in entries if e.created_at] + oldest = min(timestamps) if timestamps else None + newest = max(timestamps) if timestamps else None + return { - "entries": len(self._entries), + "entries": len(entries), "total_links": total_links, + "orphans": orphans, + "link_density": round(link_density, 4), "unique_topics": len(topics), "topics": sorted(topics), + "oldest_entry": oldest, + "newest_entry": newest, }