Compare commits

...

4 Commits

Author SHA1 Message Date
1e170074ce chore: update module docstring for Phase 1
Some checks failed
CI / test (pull_request) Failing after 9s
CI / validate (pull_request) Failing after 13s
Review Approval Gate / verify-review (pull_request) Failing after 3s
2026-04-11 18:45:22 +00:00
27c211268f test: add tests for remove, export, list_topics, richer stats
Part of #1218
2026-04-11 18:45:07 +00:00
ddf4f8cd3d feat: add topics, remove, export CLI commands
Part of #1218
2026-04-11 18:44:41 +00:00
c0bb6bf918 feat: add remove(), export(), list_topics(), richer stats()
Closes #1218
- remove() cleans bidirectional links
- export() filters by topic/source/date
- stats() includes orphans, link_density, age range
- list_topics() returns topic->count mapping
2026-04-11 18:44:17 +00:00
3 changed files with 209 additions and 6 deletions

View File

@@ -7,6 +7,7 @@ and provides query interfaces for retrieving connected knowledge.
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Optional
@@ -58,6 +59,21 @@ class MnemosyneArchive:
def get(self, entry_id: str) -> Optional[ArchiveEntry]:
return self._entries.get(entry_id)
def remove(self, entry_id: str) -> bool:
"""Remove an entry and clean all bidirectional links to it.
Returns True if entry was found and removed, False otherwise.
"""
if entry_id not in self._entries:
return False
# Remove bidirectional links
for other in self._entries.values():
if entry_id in other.links:
other.links.remove(entry_id)
del self._entries[entry_id]
self._save()
return True
def search(self, query: str, limit: int = 10) -> list[ArchiveEntry]:
"""Simple keyword search across titles and content."""
query_tokens = set(query.lower().split())
@@ -70,6 +86,44 @@ class MnemosyneArchive:
scored.sort(key=lambda x: x[0], reverse=True)
return [e for _, e in scored[:limit]]
def export(
self,
topics: Optional[list[str]] = None,
source: Optional[str] = None,
since: Optional[str] = None,
) -> dict:
"""Export archive data, optionally filtered.
Args:
topics: Only include entries with at least one matching topic.
source: Only include entries from this source.
since: ISO date string — only include entries created after this.
Returns:
Dict with entries, count, and export metadata.
"""
entries = list(self._entries.values())
if topics:
topics_lower = {t.lower() for t in topics}
entries = [e for e in entries if topics_lower & {t.lower() for t in e.topics}]
if source:
entries = [e for e in entries if e.source == source]
if since:
entries = [e for e in entries if e.created_at >= since]
return {
"entries": [e.to_dict() for e in entries],
"count": len(entries),
"filters": {
"topics": topics,
"source": source,
"since": since,
},
}
def get_linked(self, entry_id: str, depth: int = 1) -> list[ArchiveEntry]:
"""Get entries linked to a given entry, up to specified depth."""
visited = set()
@@ -97,18 +151,45 @@ class MnemosyneArchive:
topic_lower = topic.lower()
return [e for e in self._entries.values() if topic_lower in [t.lower() for t in e.topics]]
def list_topics(self) -> dict[str, int]:
"""Return dict mapping each topic to its entry count."""
topic_counts: dict[str, int] = {}
for entry in self._entries.values():
for topic in entry.topics:
topic_counts[topic] = topic_counts.get(topic, 0) + 1
return dict(sorted(topic_counts.items(), key=lambda x: x[1], reverse=True))
@property
def count(self) -> int:
return len(self._entries)
def stats(self) -> dict:
total_links = sum(len(e.links) for e in self._entries.values())
entries = list(self._entries.values())
total_links = sum(len(e.links) for e in entries)
topics = set()
for e in self._entries.values():
for e in entries:
topics.update(e.topics)
# Orphans: entries with no links
orphans = sum(1 for e in entries if not e.links)
# Link density: ratio of actual links to possible links
n = len(entries)
max_links = n * (n - 1) if n > 1 else 0
link_density = total_links / max_links if max_links > 0 else 0.0
# Entry age range
timestamps = [e.created_at for e in entries if e.created_at]
oldest = min(timestamps) if timestamps else None
newest = max(timestamps) if timestamps else None
return {
"entries": len(self._entries),
"entries": len(entries),
"total_links": total_links,
"orphans": orphans,
"link_density": round(link_density, 4),
"unique_topics": len(topics),
"topics": sorted(topics),
"oldest_entry": oldest,
"newest_entry": newest,
}

View File

@@ -1,6 +1,6 @@
"""CLI interface for Mnemosyne.
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats, mnemosyne topics, mnemosyne remove, mnemosyne export
"""
from __future__ import annotations
@@ -59,6 +59,43 @@ def cmd_link(args):
print(f" [{e.id[:8]}] {e.title} (source: {e.source})")
def cmd_topics(args):
archive = MnemosyneArchive()
topics = archive.list_topics()
if not topics:
print("No topics found.")
return
for topic, count in topics.items():
print(f" {topic}: {count} {'entry' if count == 1 else 'entries'}")
def cmd_remove(args):
archive = MnemosyneArchive()
entry = archive.get(args.entry_id)
if not entry:
print(f"Entry not found: {args.entry_id}")
sys.exit(1)
title = entry.title
if archive.remove(args.entry_id):
print(f"Removed: [{args.entry_id[:8]}] {title}")
else:
print(f"Failed to remove: {args.entry_id}")
sys.exit(1)
def cmd_export(args):
archive = MnemosyneArchive()
topics = args.topics.split(",") if args.topics else None
data = archive.export(topics=topics, source=args.source, since=args.since)
output = json.dumps(data, indent=2)
if args.output:
with open(args.output, "w") as f:
f.write(output)
print(f"Exported {data['count']} entries to {args.output}")
else:
print(output)
def main():
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
sub = parser.add_subparsers(dest="command")
@@ -78,12 +115,25 @@ def main():
l.add_argument("entry_id", help="Entry ID (or prefix)")
l.add_argument("-d", "--depth", type=int, default=1)
sub.add_parser("topics", help="List all topics with entry counts")
r = sub.add_parser("remove", help="Remove an entry")
r.add_argument("entry_id", help="Entry ID (or prefix)")
e = sub.add_parser("export", help="Export archive data")
e.add_argument("--topics", default=None, help="Filter by comma-separated topics")
e.add_argument("--source", default=None, help="Filter by source")
e.add_argument("--since", default=None, help="Filter by date (ISO format)")
e.add_argument("-o", "--output", default=None, help="Output file path")
args = parser.parse_args()
if not args.command:
parser.print_help()
sys.exit(1)
{"stats": cmd_stats, "search": cmd_search, "ingest": cmd_ingest, "link": cmd_link}[args.command](args)
{"stats": cmd_stats, "search": cmd_search, "ingest": cmd_ingest,
"link": cmd_link, "topics": cmd_topics, "remove": cmd_remove,
"export": cmd_export}[args.command](args)
if __name__ == "__main__":

View File

@@ -66,8 +66,80 @@ def test_archive_persistence():
path = Path(tmp) / "test_archive.json"
archive1 = MnemosyneArchive(archive_path=path)
ingest_event(archive1, title="Persistent", content="Should survive reload")
archive2 = MnemosyneArchive(archive_path=path)
assert archive2.count == 1
results = archive2.search("persistent")
assert len(results) == 1
def test_archive_remove():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
e1 = ingest_event(archive, title="Entry one", content="First content", topics=["test"])
e2 = ingest_event(archive, title="Entry two", content="Second content", topics=["test"])
assert archive.count == 2
# Remove e1 — should also clean links from e2
assert archive.remove(e1.id) is True
assert archive.count == 1
assert e1.id not in e2.links
assert archive.get(e1.id) is None
# Remove non-existent
assert archive.remove("nonexistent") is False
def test_archive_export():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
ingest_event(archive, title="Python entry", content="About Python", topics=["coding", "python"])
ingest_event(archive, title="Cooking entry", content="About food", topics=["cooking"])
ingest_event(archive, title="Another python", content="More Python", topics=["python"])
# Export all
data = archive.export()
assert data["count"] == 3
# Export by topic
data = archive.export(topics=["python"])
assert data["count"] == 2
data = archive.export(topics=["cooking"])
assert data["count"] == 1
# Export by non-existent topic
data = archive.export(topics=["nonexistent"])
assert data["count"] == 0
def test_archive_list_topics():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
ingest_event(archive, title="E1", content="c", topics=["alpha", "beta"])
ingest_event(archive, title="E2", content="c", topics=["beta", "gamma"])
ingest_event(archive, title="E3", content="c", topics=["beta"])
topics = archive.list_topics()
assert topics["beta"] == 3
assert topics["alpha"] == 1
assert topics["gamma"] == 1
def test_archive_richer_stats():
with tempfile.TemporaryDirectory() as tmp:
path = Path(tmp) / "test_archive.json"
archive = MnemosyneArchive(archive_path=path)
e1 = ingest_event(archive, title="Linked entry", content="Shared content here", topics=["test"])
e2 = ingest_event(archive, title="Linked content", content="Shared entry here", topics=["test"])
ingest_event(archive, title="Orphan entry", content="Nothing in common cooking", topics=["food"])
stats = archive.stats()
assert stats["entries"] == 3
assert stats["orphans"] >= 1 # At least the cooking entry should be orphan
assert 0 <= stats["link_density"] <= 1
assert stats["oldest_entry"] is not None
assert stats["newest_entry"] is not None