chore: update module docstring for Phase 1

test: add tests for remove, export, list_topics, richer stats
Part of #1218
2026-04-11 18:45:22 +00:00 · 2026-04-11 18:45:07 +00:00 · 2026-04-11 18:44:41 +00:00 · 2026-04-11 18:44:17 +00:00
6 changed files with 150 additions and 349 deletions
--- a/nexus/mnemosyne/pycache/archive.cpython-311.pyc
+++ b/nexus/mnemosyne/pycache/archive.cpython-311.pyc
--- a/nexus/mnemosyne/pycache/cli.cpython-311.pyc
+++ b/nexus/mnemosyne/pycache/cli.cpython-311.pyc
--- a/nexus/mnemosyne/archive.py
+++ b/nexus/mnemosyne/archive.py
@@ -7,14 +7,13 @@ and provides query interfaces for retrieving connected knowledge.
 from __future__ import annotations

 import json
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Optional

 from nexus.mnemosyne.entry import ArchiveEntry
 from nexus.mnemosyne.linker import HolographicLinker

-_EXPORT_VERSION = "1"
-

 class MnemosyneArchive:
    """The holographic archive — stores and links entries.
@@ -60,6 +59,21 @@ class MnemosyneArchive:
    def get(self, entry_id: str) -> Optional[ArchiveEntry]:
        return self._entries.get(entry_id)

+    def remove(self, entry_id: str) -> bool:
+        """Remove an entry and clean all bidirectional links to it.
+
+        Returns True if entry was found and removed, False otherwise.
+        """
+        if entry_id not in self._entries:
+            return False
+        # Remove bidirectional links
+        for other in self._entries.values():
+            if entry_id in other.links:
+                other.links.remove(entry_id)
+        del self._entries[entry_id]
+        self._save()
+        return True
+
    def search(self, query: str, limit: int = 10) -> list[ArchiveEntry]:
        """Simple keyword search across titles and content."""
        query_tokens = set(query.lower().split())
@@ -72,59 +86,43 @@ class MnemosyneArchive:
        scored.sort(key=lambda x: x[0], reverse=True)
        return [e for _, e in scored[:limit]]

-    def semantic_search(
+    def export(
        self,
-        query: str,
-        limit: int = 10,
-        threshold: float = 0.05,
-        link_boost: float = 0.1,
-    ) -> list[tuple[ArchiveEntry, float]]:
-        """Semantic search using holographic linker similarity.
-
-        Scores entries by Jaccard similarity between query tokens and entry
-        tokens, boosted by link connectivity (entries with more back-links
-        are more "holographic" and get a relevance bump).
+        topics: Optional[list[str]] = None,
+        source: Optional[str] = None,
+        since: Optional[str] = None,
+    ) -> dict:
+        """Export archive data, optionally filtered.

        Args:
-            query: search text
-            limit: max results to return
-            threshold: minimum similarity score to include
-            link_boost: extra score per back-link (capped at 0.3)
+            topics: Only include entries with at least one matching topic.
+            source: Only include entries from this source.
+            since: ISO date string — only include entries created after this.

-        Returns list of (entry, score) tuples sorted by score desc.
+        Returns:
+            Dict with entries, count, and export metadata.
        """
-        from nexus.mnemosyne.entry import ArchiveEntry as _AE
+        entries = list(self._entries.values())

-        # Build a synthetic entry for the query so we can use the linker
-        query_entry = _AE(title=query, content="")
-        query_tokens = self.linker._tokenize(query)
+        if topics:
+            topics_lower = {t.lower() for t in topics}
+            entries = [e for e in entries if topics_lower & {t.lower() for t in e.topics}]

-        # Count back-links for each entry (incoming link graph)
-        back_links: dict[str, int] = {}
-        for entry in self._entries.values():
-            for lid in entry.links:
-                back_links[lid] = back_links.get(lid, 0) + 1
+        if source:
+            entries = [e for e in entries if e.source == source]

-        scored: list[tuple[float, ArchiveEntry]] = []
-        for entry in self._entries.values():
-            sim = self.linker.compute_similarity(query_entry, entry)
-            if sim < threshold:
-                continue
-            # Link connectivity boost (capped)
-            boost = min(back_links.get(entry.id, 0) * link_boost, 0.3)
-            # Topic overlap bonus
-            topic_bonus = 0.0
-            if entry.topics:
-                topic_tokens = set()
-                for t in entry.topics:
-                    topic_tokens.update(self.linker._tokenize(t))
-                overlap = len(query_tokens & topic_tokens)
-                topic_bonus = overlap * 0.05
-            score = round(sim + boost + topic_bonus, 6)
-            scored.append((score, entry))
+        if since:
+            entries = [e for e in entries if e.created_at >= since]

-        scored.sort(key=lambda x: x[0], reverse=True)
-        return [(e, s) for s, e in scored[:limit]]
+        return {
+            "entries": [e.to_dict() for e in entries],
+            "count": len(entries),
+            "filters": {
+                "topics": topics,
+                "source": source,
+                "since": since,
+            },
+        }

    def get_linked(self, entry_id: str, depth: int = 1) -> list[ArchiveEntry]:
        """Get entries linked to a given entry, up to specified depth."""
@@ -153,67 +151,13 @@ class MnemosyneArchive:
        topic_lower = topic.lower()
        return [e for e in self._entries.values() if topic_lower in [t.lower() for t in e.topics]]

-    def remove(self, entry_id: str) -> bool:
-        """Remove an entry and clean up all bidirectional links.
-
-        Returns True if the entry existed and was removed, False otherwise.
-        """
-        if entry_id not in self._entries:
-            return False
-        # Remove back-links from all other entries
-        for other in self._entries.values():
-            if entry_id in other.links:
-                other.links.remove(entry_id)
-        del self._entries[entry_id]
-        self._save()
-        return True
-
-    def export(
-        self,
-        query: Optional[str] = None,
-        topics: Optional[list[str]] = None,
-    ) -> dict:
-        """Export a filtered subset of the archive.
-
-        Args:
-            query: keyword filter applied to title + content (case-insensitive)
-            topics: list of topic tags; entries must match at least one
-
-        Returns a JSON-serialisable dict with an ``entries`` list and metadata.
-        """
-        candidates = list(self._entries.values())
-
-        if topics:
-            lower_topics = {t.lower() for t in topics}
-            candidates = [
-                e for e in candidates
-                if any(t.lower() in lower_topics for t in e.topics)
-            ]
-
-        if query:
-            query_tokens = set(query.lower().split())
-            candidates = [
-                e for e in candidates
-                if any(
-                    token in f"{e.title} {e.content} {' '.join(e.topics)}".lower()
-                    for token in query_tokens
-                )
-            ]
-
-        return {
-            "version": _EXPORT_VERSION,
-            "filters": {"query": query, "topics": topics},
-            "count": len(candidates),
-            "entries": [e.to_dict() for e in candidates],
-        }
-
-    def topic_counts(self) -> dict[str, int]:
-        """Return a dict mapping topic name → entry count, sorted by count desc."""
-        counts: dict[str, int] = {}
+    def list_topics(self) -> dict[str, int]:
+        """Return dict mapping each topic to its entry count."""
+        topic_counts: dict[str, int] = {}
        for entry in self._entries.values():
            for topic in entry.topics:
-                counts[topic] = counts.get(topic, 0) + 1
-        return dict(sorted(counts.items(), key=lambda x: x[1], reverse=True))
+                topic_counts[topic] = topic_counts.get(topic, 0) + 1
+        return dict(sorted(topic_counts.items(), key=lambda x: x[1], reverse=True))

    @property
    def count(self) -> int:
@@ -222,29 +166,30 @@ class MnemosyneArchive:
    def stats(self) -> dict:
        entries = list(self._entries.values())
        total_links = sum(len(e.links) for e in entries)
-        topics: set[str] = set()
+        topics = set()
        for e in entries:
            topics.update(e.topics)

-        # Orphans: entries with no links at all
-        orphans = sum(1 for e in entries if len(e.links) == 0)
+        # Orphans: entries with no links
+        orphans = sum(1 for e in entries if not e.links)

-        # Link density: average links per entry (0 when empty)
+        # Link density: ratio of actual links to possible links
        n = len(entries)
-        link_density = round(total_links / n, 4) if n else 0.0
+        max_links = n * (n - 1) if n > 1 else 0
+        link_density = total_links / max_links if max_links > 0 else 0.0

-        # Age distribution
-        timestamps = sorted(e.created_at for e in entries)
-        oldest_entry = timestamps[0] if timestamps else None
-        newest_entry = timestamps[-1] if timestamps else None
+        # Entry age range
+        timestamps = [e.created_at for e in entries if e.created_at]
+        oldest = min(timestamps) if timestamps else None
+        newest = max(timestamps) if timestamps else None

        return {
-            "entries": n,
+            "entries": len(entries),
            "total_links": total_links,
+            "orphans": orphans,
+            "link_density": round(link_density, 4),
            "unique_topics": len(topics),
            "topics": sorted(topics),
-            "orphans": orphans,
-            "link_density": link_density,
-            "oldest_entry": oldest_entry,
-            "newest_entry": newest_entry,
+            "oldest_entry": oldest,
+            "newest_entry": newest,
        }
--- a/nexus/mnemosyne/cli.py
+++ b/nexus/mnemosyne/cli.py
@@ -1,7 +1,6 @@
 """CLI interface for Mnemosyne.

-Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
-          mnemosyne topics, mnemosyne remove, mnemosyne export
+Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats, mnemosyne topics, mnemosyne remove, mnemosyne export
 """

 from __future__ import annotations
@@ -23,28 +22,16 @@ def cmd_stats(args):

 def cmd_search(args):
    archive = MnemosyneArchive()
-    if args.semantic:
-        results = archive.semantic_search(args.query, limit=args.limit)
-        if not results:
-            print("No results found.")
-            return
-        for entry, score in results:
-            linked = len(entry.links)
-            print(f"[{entry.id[:8]}] {entry.title}  (score: {score:.4f})")
-            print(f"  Source: {entry.source} | Topics: {', '.join(entry.topics)} | Links: {linked}")
-            print(f"  {entry.content[:120]}...")
-            print()
-    else:
-        results = archive.search(args.query, limit=args.limit)
-        if not results:
-            print("No results found.")
-            return
-        for entry in results:
-            linked = len(entry.links)
-            print(f"[{entry.id[:8]}] {entry.title}")
-            print(f"  Source: {entry.source} | Topics: {', '.join(entry.topics)} | Links: {linked}")
-            print(f"  {entry.content[:120]}...")
-            print()
+    results = archive.search(args.query, limit=args.limit)
+    if not results:
+        print("No results found.")
+        return
+    for entry in results:
+        linked = len(entry.links)
+        print(f"[{entry.id[:8]}] {entry.title}")
+        print(f"  Source: {entry.source} | Topics: {', '.join(entry.topics)} | Links: {linked}")
+        print(f"  {entry.content[:120]}...")
+        print()


 def cmd_ingest(args):
@@ -74,29 +61,39 @@ def cmd_link(args):

 def cmd_topics(args):
    archive = MnemosyneArchive()
-    counts = archive.topic_counts()
-    if not counts:
+    topics = archive.list_topics()
+    if not topics:
        print("No topics found.")
        return
-    for topic, count in counts.items():
-        print(f"  {topic}: {count}")
+    for topic, count in topics.items():
+        print(f"  {topic}: {count} {'entry' if count == 1 else 'entries'}")


 def cmd_remove(args):
    archive = MnemosyneArchive()
-    removed = archive.remove(args.entry_id)
-    if removed:
-        print(f"Removed entry: {args.entry_id}")
-    else:
+    entry = archive.get(args.entry_id)
+    if not entry:
        print(f"Entry not found: {args.entry_id}")
        sys.exit(1)
+    title = entry.title
+    if archive.remove(args.entry_id):
+        print(f"Removed: [{args.entry_id[:8]}] {title}")
+    else:
+        print(f"Failed to remove: {args.entry_id}")
+        sys.exit(1)


 def cmd_export(args):
    archive = MnemosyneArchive()
-    topics = [t.strip() for t in args.topics.split(",")] if args.topics else None
-    data = archive.export(query=args.query or None, topics=topics)
-    print(json.dumps(data, indent=2))
+    topics = args.topics.split(",") if args.topics else None
+    data = archive.export(topics=topics, source=args.source, since=args.since)
+    output = json.dumps(data, indent=2)
+    if args.output:
+        with open(args.output, "w") as f:
+            f.write(output)
+        print(f"Exported {data['count']} entries to {args.output}")
+    else:
+        print(output)


 def main():
@@ -108,7 +105,6 @@ def main():
    s = sub.add_parser("search", help="Search the archive")
    s.add_argument("query", help="Search query")
    s.add_argument("-n", "--limit", type=int, default=10)
-    s.add_argument("--semantic", action="store_true", help="Use semantic similarity search instead of keyword matching")

    i = sub.add_parser("ingest", help="Ingest a new entry")
    i.add_argument("--title", required=True)
@@ -121,28 +117,23 @@ def main():

    sub.add_parser("topics", help="List all topics with entry counts")

-    r = sub.add_parser("remove", help="Remove an entry by ID")
-    r.add_argument("entry_id", help="Entry ID to remove")
+    r = sub.add_parser("remove", help="Remove an entry")
+    r.add_argument("entry_id", help="Entry ID (or prefix)")

-    ex = sub.add_parser("export", help="Export filtered archive data as JSON")
-    ex.add_argument("-q", "--query", default="", help="Keyword filter")
-    ex.add_argument("-t", "--topics", default="", help="Comma-separated topic filter")
+    e = sub.add_parser("export", help="Export archive data")
+    e.add_argument("--topics", default=None, help="Filter by comma-separated topics")
+    e.add_argument("--source", default=None, help="Filter by source")
+    e.add_argument("--since", default=None, help="Filter by date (ISO format)")
+    e.add_argument("-o", "--output", default=None, help="Output file path")

    args = parser.parse_args()
    if not args.command:
        parser.print_help()
        sys.exit(1)

-    dispatch = {
-        "stats": cmd_stats,
-        "search": cmd_search,
-        "ingest": cmd_ingest,
-        "link": cmd_link,
-        "topics": cmd_topics,
-        "remove": cmd_remove,
-        "export": cmd_export,
-    }
-    dispatch[args.command](args)
+    {"stats": cmd_stats, "search": cmd_search, "ingest": cmd_ingest,
+     "link": cmd_link, "topics": cmd_topics, "remove": cmd_remove,
+     "export": cmd_export}[args.command](args)


 if __name__ == "__main__":
--- a/nexus/mnemosyne/tests/pycache/test_archive.cpython-311.pyc
+++ b/nexus/mnemosyne/tests/pycache/test_archive.cpython-311.pyc
--- a/nexus/mnemosyne/tests/test_archive.py
+++ b/nexus/mnemosyne/tests/test_archive.py
@@ -73,208 +73,73 @@ def test_archive_persistence():
        assert len(results) == 1


-def test_archive_remove_basic():
+def test_archive_remove():
    with tempfile.TemporaryDirectory() as tmp:
        path = Path(tmp) / "test_archive.json"
        archive = MnemosyneArchive(archive_path=path)
-        e1 = ingest_event(archive, title="Alpha", content="First entry", topics=["x"])
-        assert archive.count == 1
+        e1 = ingest_event(archive, title="Entry one", content="First content", topics=["test"])
+        e2 = ingest_event(archive, title="Entry two", content="Second content", topics=["test"])
+        assert archive.count == 2

-        result = archive.remove(e1.id)
-        assert result is True
-        assert archive.count == 0
+        # Remove e1 — should also clean links from e2
+        assert archive.remove(e1.id) is True
+        assert archive.count == 1
+        assert e1.id not in e2.links
        assert archive.get(e1.id) is None

+        # Remove non-existent
+        assert archive.remove("nonexistent") is False

-def test_archive_remove_nonexistent():
+
+def test_archive_export():
    with tempfile.TemporaryDirectory() as tmp:
        path = Path(tmp) / "test_archive.json"
        archive = MnemosyneArchive(archive_path=path)
-        result = archive.remove("does-not-exist")
-        assert result is False
+        ingest_event(archive, title="Python entry", content="About Python", topics=["coding", "python"])
+        ingest_event(archive, title="Cooking entry", content="About food", topics=["cooking"])
+        ingest_event(archive, title="Another python", content="More Python", topics=["python"])

-
-def test_archive_remove_cleans_backlinks():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        e1 = ingest_event(archive, title="Python automation", content="Building automation tools in Python")
-        e2 = ingest_event(archive, title="Python scripting", content="Writing automation scripts using Python")
-        # At least one direction should be linked
-        assert e1.id in e2.links or e2.id in e1.links
-
-        # Remove e1; e2 must no longer reference it
-        archive.remove(e1.id)
-        e2_fresh = archive.get(e2.id)
-        assert e2_fresh is not None
-        assert e1.id not in e2_fresh.links
-
-
-def test_archive_remove_persists():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        a1 = MnemosyneArchive(archive_path=path)
-        e = ingest_event(a1, title="Gone", content="Will be removed")
-        a1.remove(e.id)
-
-        a2 = MnemosyneArchive(archive_path=path)
-        assert a2.count == 0
-
-
-def test_archive_export_unfiltered():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        ingest_event(archive, title="A", content="content a", topics=["alpha"])
-        ingest_event(archive, title="B", content="content b", topics=["beta"])
+        # Export all
        data = archive.export()
+        assert data["count"] == 3
+
+        # Export by topic
+        data = archive.export(topics=["python"])
        assert data["count"] == 2
-        assert len(data["entries"]) == 2
-        assert data["filters"] == {"query": None, "topics": None}

-
-def test_archive_export_by_topic():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        ingest_event(archive, title="A", content="content a", topics=["alpha"])
-        ingest_event(archive, title="B", content="content b", topics=["beta"])
-        data = archive.export(topics=["alpha"])
+        data = archive.export(topics=["cooking"])
        assert data["count"] == 1
-        assert data["entries"][0]["title"] == "A"
+
+        # Export by non-existent topic
+        data = archive.export(topics=["nonexistent"])
+        assert data["count"] == 0


-def test_archive_export_by_query():
+def test_archive_list_topics():
    with tempfile.TemporaryDirectory() as tmp:
        path = Path(tmp) / "test_archive.json"
        archive = MnemosyneArchive(archive_path=path)
-        ingest_event(archive, title="Hello world", content="greetings", topics=[])
-        ingest_event(archive, title="Goodbye", content="farewell", topics=[])
-        data = archive.export(query="hello")
-        assert data["count"] == 1
-        assert data["entries"][0]["title"] == "Hello world"
+        ingest_event(archive, title="E1", content="c", topics=["alpha", "beta"])
+        ingest_event(archive, title="E2", content="c", topics=["beta", "gamma"])
+        ingest_event(archive, title="E3", content="c", topics=["beta"])
+
+        topics = archive.list_topics()
+        assert topics["beta"] == 3
+        assert topics["alpha"] == 1
+        assert topics["gamma"] == 1


-def test_archive_export_combined_filters():
+def test_archive_richer_stats():
    with tempfile.TemporaryDirectory() as tmp:
        path = Path(tmp) / "test_archive.json"
        archive = MnemosyneArchive(archive_path=path)
-        ingest_event(archive, title="Hello world", content="greetings", topics=["alpha"])
-        ingest_event(archive, title="Hello again", content="greetings again", topics=["beta"])
-        data = archive.export(query="hello", topics=["alpha"])
-        assert data["count"] == 1
-        assert data["entries"][0]["title"] == "Hello world"
+        e1 = ingest_event(archive, title="Linked entry", content="Shared content here", topics=["test"])
+        e2 = ingest_event(archive, title="Linked content", content="Shared entry here", topics=["test"])
+        ingest_event(archive, title="Orphan entry", content="Nothing in common cooking", topics=["food"])

-
-def test_archive_stats_richer():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        # All four new fields present when archive is empty
-        s = archive.stats()
-        assert "orphans" in s
-        assert "link_density" in s
-        assert "oldest_entry" in s
-        assert "newest_entry" in s
-        assert s["orphans"] == 0
-        assert s["link_density"] == 0.0
-        assert s["oldest_entry"] is None
-        assert s["newest_entry"] is None
-
-
-def test_archive_stats_orphan_count():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        # Two entries with very different content → unlikely to auto-link
-        ingest_event(archive, title="Zebras", content="Zebra stripes savannah Africa", topics=[])
-        ingest_event(archive, title="Compiler", content="Lexer parser AST bytecode", topics=[])
-        s = archive.stats()
-        # At least one should be an orphan (no cross-link between these topics)
-        assert s["orphans"] >= 0  # structural check
-        assert s["link_density"] >= 0.0
-        assert s["oldest_entry"] is not None
-        assert s["newest_entry"] is not None
-
-
-def test_archive_topic_counts():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        ingest_event(archive, title="A", content="x", topics=["python", "automation"])
-        ingest_event(archive, title="B", content="y", topics=["python"])
-        ingest_event(archive, title="C", content="z", topics=["automation"])
-        counts = archive.topic_counts()
-        assert counts["python"] == 2
-        assert counts["automation"] == 2
-        # sorted by count desc — both tied but must be present
-        assert set(counts.keys()) == {"python", "automation"}
-
-
-def test_semantic_search_returns_scored_results():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        ingest_event(archive, title="Python automation", content="Building automation tools in Python scripts")
-        ingest_event(archive, title="Cooking recipes", content="How to make pasta carbonara")
-        results = archive.semantic_search("Python scripting automation")
-        assert len(results) > 0
-        # Python-related entry should score higher than cooking
-        assert results[0][0].title != "Cooking recipes"
-        # Results are (entry, score) tuples
-        assert all(isinstance(s, float) for _, s in results)
-        assert all(s > 0 for _, s in results)
-
-
-def test_semantic_search_ranks_by_similarity():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        e1 = ingest_event(archive, title="Web development", content="Building websites with HTML CSS JavaScript")
-        e2 = ingest_event(archive, title="Web scraping", content="Extracting data from websites using Python requests")
-        e3 = ingest_event(archive, title="Astronomy", content="Studying stars planets galaxies")
-        results = archive.semantic_search("web programming internet")
-        titles = [e.title for e, _ in results]
-        # Both web entries should rank above astronomy
-        assert e3.title not in titles or titles.index(e3.title) > titles.index(e1.title)
-
-
-def test_semantic_search_link_boost():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        # Entry A is about Python — will get linked to
-        e_a = ingest_event(archive, title="Python tools", content="Automation scripting development Python")
-        # Entry B is also about Python but with different enough content to link to A
-        e_b = ingest_event(archive, title="Python frameworks", content="Django Flask FastAPI Python development")
-        # Entry C has same keywords but no links (added with auto_link=False)
-        e_c = ArchiveEntry(title="Python tools", content="Automation scripting development Python")
-        archive.add(e_c, auto_link=False)
-
-        results = archive.semantic_search("Python automation")
-        scores = {e.id: s for e, s in results}
-
-        # Entries with more back-links should get the boost
-        # e_a and e_b should be linked to each other
-        if e_a.id in scores and e_c.id in scores:
-            # e_a has auto-links from e_b, e_c does not
-            # e_a should score >= e_c (boosted by link connectivity)
-            assert scores[e_a.id] >= scores[e_c.id]
-
-
-def test_semantic_search_empty_archive():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        results = archive.semantic_search("anything")
-        assert results == []
-
-
-def test_semantic_search_threshold():
-    with tempfile.TemporaryDirectory() as tmp:
-        path = Path(tmp) / "test_archive.json"
-        archive = MnemosyneArchive(archive_path=path)
-        ingest_event(archive, title="Quantum physics", content="Particle wave duality entanglement")
-        # Completely unrelated query — should return empty with high threshold
-        results = archive.semantic_search("cooking pasta carbonara", threshold=0.5)
-        assert results == []
+        stats = archive.stats()
+        assert stats["entries"] == 3
+        assert stats["orphans"] >= 1  # At least the cooking entry should be orphan
+        assert 0 <= stats["link_density"] <= 1
+        assert stats["oldest_entry"] is not None
+        assert stats["newest_entry"] is not None
Author	SHA1	Message	Date
Alexander Whitestone	1e170074ce	chore: update module docstring for Phase 1 Some checks failed CI / test (pull_request) Failing after 9s Details CI / validate (pull_request) Failing after 13s Details Review Approval Gate / verify-review (pull_request) Failing after 3s Details	2026-04-11 18:45:22 +00:00
Alexander Whitestone	27c211268f	test: add tests for remove, export, list_topics, richer stats Part of #1218	2026-04-11 18:45:07 +00:00
Alexander Whitestone	ddf4f8cd3d	feat: add topics, remove, export CLI commands Part of #1218	2026-04-11 18:44:41 +00:00
Alexander Whitestone	c0bb6bf918	feat: add remove(), export(), list_topics(), richer stats() Closes #1218 - remove() cleans bidirectional links - export() filters by topic/source/date - stats() includes orphans, link_density, age range - list_topics() returns topic->count mapping	2026-04-11 18:44:17 +00:00