feat: add remove(), export(), list_topics(), richer stats()
Closes #1218 - remove() cleans bidirectional links - export() filters by topic/source/date - stats() includes orphans, link_density, age range - list_topics() returns topic->count mapping
This commit is contained in:
@@ -7,6 +7,7 @@ and provides query interfaces for retrieving connected knowledge.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
@@ -58,6 +59,21 @@ class MnemosyneArchive:
|
||||
def get(self, entry_id: str) -> Optional[ArchiveEntry]:
|
||||
return self._entries.get(entry_id)
|
||||
|
||||
def remove(self, entry_id: str) -> bool:
|
||||
"""Remove an entry and clean all bidirectional links to it.
|
||||
|
||||
Returns True if entry was found and removed, False otherwise.
|
||||
"""
|
||||
if entry_id not in self._entries:
|
||||
return False
|
||||
# Remove bidirectional links
|
||||
for other in self._entries.values():
|
||||
if entry_id in other.links:
|
||||
other.links.remove(entry_id)
|
||||
del self._entries[entry_id]
|
||||
self._save()
|
||||
return True
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> list[ArchiveEntry]:
|
||||
"""Simple keyword search across titles and content."""
|
||||
query_tokens = set(query.lower().split())
|
||||
@@ -70,6 +86,44 @@ class MnemosyneArchive:
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
return [e for _, e in scored[:limit]]
|
||||
|
||||
def export(
|
||||
self,
|
||||
topics: Optional[list[str]] = None,
|
||||
source: Optional[str] = None,
|
||||
since: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""Export archive data, optionally filtered.
|
||||
|
||||
Args:
|
||||
topics: Only include entries with at least one matching topic.
|
||||
source: Only include entries from this source.
|
||||
since: ISO date string — only include entries created after this.
|
||||
|
||||
Returns:
|
||||
Dict with entries, count, and export metadata.
|
||||
"""
|
||||
entries = list(self._entries.values())
|
||||
|
||||
if topics:
|
||||
topics_lower = {t.lower() for t in topics}
|
||||
entries = [e for e in entries if topics_lower & {t.lower() for t in e.topics}]
|
||||
|
||||
if source:
|
||||
entries = [e for e in entries if e.source == source]
|
||||
|
||||
if since:
|
||||
entries = [e for e in entries if e.created_at >= since]
|
||||
|
||||
return {
|
||||
"entries": [e.to_dict() for e in entries],
|
||||
"count": len(entries),
|
||||
"filters": {
|
||||
"topics": topics,
|
||||
"source": source,
|
||||
"since": since,
|
||||
},
|
||||
}
|
||||
|
||||
def get_linked(self, entry_id: str, depth: int = 1) -> list[ArchiveEntry]:
|
||||
"""Get entries linked to a given entry, up to specified depth."""
|
||||
visited = set()
|
||||
@@ -97,18 +151,45 @@ class MnemosyneArchive:
|
||||
topic_lower = topic.lower()
|
||||
return [e for e in self._entries.values() if topic_lower in [t.lower() for t in e.topics]]
|
||||
|
||||
def list_topics(self) -> dict[str, int]:
|
||||
"""Return dict mapping each topic to its entry count."""
|
||||
topic_counts: dict[str, int] = {}
|
||||
for entry in self._entries.values():
|
||||
for topic in entry.topics:
|
||||
topic_counts[topic] = topic_counts.get(topic, 0) + 1
|
||||
return dict(sorted(topic_counts.items(), key=lambda x: x[1], reverse=True))
|
||||
|
||||
@property
|
||||
def count(self) -> int:
|
||||
return len(self._entries)
|
||||
|
||||
def stats(self) -> dict:
|
||||
total_links = sum(len(e.links) for e in self._entries.values())
|
||||
entries = list(self._entries.values())
|
||||
total_links = sum(len(e.links) for e in entries)
|
||||
topics = set()
|
||||
for e in self._entries.values():
|
||||
for e in entries:
|
||||
topics.update(e.topics)
|
||||
|
||||
# Orphans: entries with no links
|
||||
orphans = sum(1 for e in entries if not e.links)
|
||||
|
||||
# Link density: ratio of actual links to possible links
|
||||
n = len(entries)
|
||||
max_links = n * (n - 1) if n > 1 else 0
|
||||
link_density = total_links / max_links if max_links > 0 else 0.0
|
||||
|
||||
# Entry age range
|
||||
timestamps = [e.created_at for e in entries if e.created_at]
|
||||
oldest = min(timestamps) if timestamps else None
|
||||
newest = max(timestamps) if timestamps else None
|
||||
|
||||
return {
|
||||
"entries": len(self._entries),
|
||||
"entries": len(entries),
|
||||
"total_links": total_links,
|
||||
"orphans": orphans,
|
||||
"link_density": round(link_density, 4),
|
||||
"unique_topics": len(topics),
|
||||
"topics": sorted(topics),
|
||||
"oldest_entry": oldest,
|
||||
"newest_entry": newest,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user