Compare commits
3 Commits
feat/mnemo
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b3939179b9 | |||
| a14bf80631 | |||
| 217ffd7147 |
@@ -7,10 +7,11 @@ and provides query interfaces for retrieving connected knowledge.
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
from nexus.mnemosyne.entry import ArchiveEntry, _compute_content_hash
|
||||
from nexus.mnemosyne.linker import HolographicLinker
|
||||
|
||||
_EXPORT_VERSION = "1"
|
||||
@@ -49,14 +50,83 @@ class MnemosyneArchive:
|
||||
with open(self.path, "w") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
|
||||
def find_duplicate(self, entry: ArchiveEntry) -> Optional[ArchiveEntry]:
|
||||
"""Return an existing entry with the same content hash, or None."""
|
||||
for existing in self._entries.values():
|
||||
if existing.content_hash == entry.content_hash and existing.id != entry.id:
|
||||
return existing
|
||||
return None
|
||||
|
||||
def add(self, entry: ArchiveEntry, auto_link: bool = True) -> ArchiveEntry:
|
||||
"""Add an entry to the archive. Auto-links to related entries."""
|
||||
"""Add an entry to the archive. Auto-links to related entries.
|
||||
|
||||
If an entry with the same content hash already exists, returns the
|
||||
existing entry without creating a duplicate.
|
||||
"""
|
||||
duplicate = self.find_duplicate(entry)
|
||||
if duplicate is not None:
|
||||
return duplicate
|
||||
self._entries[entry.id] = entry
|
||||
if auto_link:
|
||||
self.linker.apply_links(entry, list(self._entries.values()))
|
||||
self._save()
|
||||
return entry
|
||||
|
||||
def update_entry(
|
||||
self,
|
||||
entry_id: str,
|
||||
title: Optional[str] = None,
|
||||
content: Optional[str] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
auto_link: bool = True,
|
||||
) -> ArchiveEntry:
|
||||
"""Update title, content, and/or metadata on an existing entry.
|
||||
|
||||
Bumps ``updated_at`` and re-runs auto-linking when content changes.
|
||||
|
||||
Args:
|
||||
entry_id: ID of the entry to update.
|
||||
title: New title, or None to leave unchanged.
|
||||
content: New content, or None to leave unchanged.
|
||||
metadata: Dict to merge into existing metadata (replaces keys present).
|
||||
auto_link: If True, re-run holographic linker after content change.
|
||||
|
||||
Returns:
|
||||
The updated ArchiveEntry.
|
||||
|
||||
Raises:
|
||||
KeyError: If entry_id does not exist.
|
||||
"""
|
||||
entry = self._entries.get(entry_id)
|
||||
if entry is None:
|
||||
raise KeyError(entry_id)
|
||||
|
||||
content_changed = False
|
||||
if title is not None and title != entry.title:
|
||||
entry.title = title
|
||||
content_changed = True
|
||||
if content is not None and content != entry.content:
|
||||
entry.content = content
|
||||
content_changed = True
|
||||
if metadata is not None:
|
||||
entry.metadata.update(metadata)
|
||||
|
||||
if content_changed:
|
||||
entry.content_hash = _compute_content_hash(entry.title, entry.content)
|
||||
|
||||
entry.updated_at = datetime.now(timezone.utc).isoformat()
|
||||
|
||||
if content_changed and auto_link:
|
||||
# Clear old links from this entry and re-run linker
|
||||
for other in self._entries.values():
|
||||
if entry_id in other.links:
|
||||
other.links.remove(entry_id)
|
||||
entry.links = []
|
||||
self.linker.apply_links(entry, list(self._entries.values()))
|
||||
|
||||
self._save()
|
||||
return entry
|
||||
|
||||
def get(self, entry_id: str) -> Optional[ArchiveEntry]:
|
||||
return self._entries.get(entry_id)
|
||||
|
||||
@@ -510,6 +580,139 @@ class MnemosyneArchive:
|
||||
bridges.sort(key=lambda b: b["components_after_removal"], reverse=True)
|
||||
return bridges
|
||||
|
||||
def add_tags(self, entry_id: str, tags: list[str]) -> ArchiveEntry:
|
||||
"""Add new tags to an existing entry (deduplicates, case-preserving).
|
||||
|
||||
Args:
|
||||
entry_id: ID of the entry to update.
|
||||
tags: Tags to add. Already-present tags (case-insensitive) are skipped.
|
||||
|
||||
Returns:
|
||||
The updated ArchiveEntry.
|
||||
|
||||
Raises:
|
||||
KeyError: If entry_id does not exist.
|
||||
"""
|
||||
entry = self._entries.get(entry_id)
|
||||
if entry is None:
|
||||
raise KeyError(entry_id)
|
||||
existing_lower = {t.lower() for t in entry.topics}
|
||||
for tag in tags:
|
||||
if tag.lower() not in existing_lower:
|
||||
entry.topics.append(tag)
|
||||
existing_lower.add(tag.lower())
|
||||
self._save()
|
||||
return entry
|
||||
|
||||
def remove_tags(self, entry_id: str, tags: list[str]) -> ArchiveEntry:
|
||||
"""Remove specific tags from an existing entry (case-insensitive match).
|
||||
|
||||
Args:
|
||||
entry_id: ID of the entry to update.
|
||||
tags: Tags to remove. Tags not present are silently ignored.
|
||||
|
||||
Returns:
|
||||
The updated ArchiveEntry.
|
||||
|
||||
Raises:
|
||||
KeyError: If entry_id does not exist.
|
||||
"""
|
||||
entry = self._entries.get(entry_id)
|
||||
if entry is None:
|
||||
raise KeyError(entry_id)
|
||||
remove_lower = {t.lower() for t in tags}
|
||||
entry.topics = [t for t in entry.topics if t.lower() not in remove_lower]
|
||||
self._save()
|
||||
return entry
|
||||
|
||||
def retag(self, entry_id: str, tags: list[str]) -> ArchiveEntry:
|
||||
"""Replace all tags on an existing entry (deduplicates new list).
|
||||
|
||||
Args:
|
||||
entry_id: ID of the entry to update.
|
||||
tags: New tag list. Duplicates (case-insensitive) are collapsed.
|
||||
|
||||
Returns:
|
||||
The updated ArchiveEntry.
|
||||
|
||||
Raises:
|
||||
KeyError: If entry_id does not exist.
|
||||
"""
|
||||
entry = self._entries.get(entry_id)
|
||||
if entry is None:
|
||||
raise KeyError(entry_id)
|
||||
seen: set[str] = set()
|
||||
deduped: list[str] = []
|
||||
for tag in tags:
|
||||
if tag.lower() not in seen:
|
||||
seen.add(tag.lower())
|
||||
deduped.append(tag)
|
||||
entry.topics = deduped
|
||||
self._save()
|
||||
return entry
|
||||
|
||||
@staticmethod
|
||||
def _parse_dt(dt_str: str) -> datetime:
|
||||
"""Parse an ISO datetime string. Assumes UTC if no timezone is specified."""
|
||||
dt = datetime.fromisoformat(dt_str)
|
||||
if dt.tzinfo is None:
|
||||
dt = dt.replace(tzinfo=timezone.utc)
|
||||
return dt
|
||||
|
||||
def by_date_range(self, start: str, end: str) -> list[ArchiveEntry]:
|
||||
"""Return entries whose ``created_at`` falls within [start, end] (inclusive).
|
||||
|
||||
Args:
|
||||
start: ISO datetime string for the range start (e.g. "2024-01-01" or
|
||||
"2024-01-01T00:00:00Z"). Timezone-naive strings are treated as UTC.
|
||||
end: ISO datetime string for the range end. Timezone-naive strings are
|
||||
treated as UTC.
|
||||
|
||||
Returns:
|
||||
List of ArchiveEntry sorted by ``created_at`` ascending.
|
||||
"""
|
||||
start_dt = self._parse_dt(start)
|
||||
end_dt = self._parse_dt(end)
|
||||
results = []
|
||||
for entry in self._entries.values():
|
||||
entry_dt = self._parse_dt(entry.created_at)
|
||||
if start_dt <= entry_dt <= end_dt:
|
||||
results.append(entry)
|
||||
results.sort(key=lambda e: e.created_at)
|
||||
return results
|
||||
|
||||
def temporal_neighbors(self, entry_id: str, window_days: int = 7) -> list[ArchiveEntry]:
|
||||
"""Return entries created within ``window_days`` of a given entry.
|
||||
|
||||
The reference entry itself is excluded from results.
|
||||
|
||||
Args:
|
||||
entry_id: ID of the anchor entry.
|
||||
window_days: Number of days around the anchor's ``created_at`` to search.
|
||||
|
||||
Returns:
|
||||
List of ArchiveEntry sorted by ``created_at`` ascending.
|
||||
|
||||
Raises:
|
||||
KeyError: If ``entry_id`` does not exist in the archive.
|
||||
"""
|
||||
anchor = self._entries.get(entry_id)
|
||||
if anchor is None:
|
||||
raise KeyError(entry_id)
|
||||
anchor_dt = self._parse_dt(anchor.created_at)
|
||||
delta = timedelta(days=window_days)
|
||||
window_start = anchor_dt - delta
|
||||
window_end = anchor_dt + delta
|
||||
results = []
|
||||
for entry in self._entries.values():
|
||||
if entry.id == entry_id:
|
||||
continue
|
||||
entry_dt = self._parse_dt(entry.created_at)
|
||||
if window_start <= entry_dt <= window_end:
|
||||
results.append(entry)
|
||||
results.sort(key=lambda e: e.created_at)
|
||||
return results
|
||||
|
||||
def rebuild_links(self, threshold: Optional[float] = None) -> int:
|
||||
"""Recompute all links from scratch.
|
||||
|
||||
|
||||
@@ -2,7 +2,9 @@
|
||||
|
||||
Provides: mnemosyne ingest, mnemosyne search, mnemosyne link, mnemosyne stats,
|
||||
mnemosyne topics, mnemosyne remove, mnemosyne export,
|
||||
mnemosyne clusters, mnemosyne hubs, mnemosyne bridges, mnemosyne rebuild
|
||||
mnemosyne clusters, mnemosyne hubs, mnemosyne bridges, mnemosyne rebuild,
|
||||
mnemosyne tag, mnemosyne untag, mnemosyne retag,
|
||||
mnemosyne timeline, mnemosyne neighbors
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -143,6 +145,74 @@ def cmd_rebuild(args):
|
||||
print(f"Rebuilt links: {total} connections across {archive.count} entries")
|
||||
|
||||
|
||||
def cmd_tag(args):
|
||||
archive = MnemosyneArchive()
|
||||
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
|
||||
try:
|
||||
entry = archive.add_tags(args.entry_id, tags)
|
||||
except KeyError:
|
||||
print(f"Entry not found: {args.entry_id}")
|
||||
sys.exit(1)
|
||||
print(f"[{entry.id[:8]}] {entry.title}")
|
||||
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
|
||||
|
||||
|
||||
def cmd_untag(args):
|
||||
archive = MnemosyneArchive()
|
||||
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
|
||||
try:
|
||||
entry = archive.remove_tags(args.entry_id, tags)
|
||||
except KeyError:
|
||||
print(f"Entry not found: {args.entry_id}")
|
||||
sys.exit(1)
|
||||
print(f"[{entry.id[:8]}] {entry.title}")
|
||||
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
|
||||
|
||||
|
||||
def cmd_retag(args):
|
||||
archive = MnemosyneArchive()
|
||||
tags = [t.strip() for t in args.tags.split(",") if t.strip()]
|
||||
try:
|
||||
entry = archive.retag(args.entry_id, tags)
|
||||
except KeyError:
|
||||
print(f"Entry not found: {args.entry_id}")
|
||||
sys.exit(1)
|
||||
print(f"[{entry.id[:8]}] {entry.title}")
|
||||
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
|
||||
|
||||
|
||||
def cmd_timeline(args):
|
||||
archive = MnemosyneArchive()
|
||||
try:
|
||||
results = archive.by_date_range(args.start, args.end)
|
||||
except ValueError as e:
|
||||
print(f"Invalid date format: {e}")
|
||||
sys.exit(1)
|
||||
if not results:
|
||||
print("No entries found in that date range.")
|
||||
return
|
||||
for entry in results:
|
||||
print(f"[{entry.id[:8]}] {entry.created_at[:10]} {entry.title}")
|
||||
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
|
||||
print()
|
||||
|
||||
|
||||
def cmd_neighbors(args):
|
||||
archive = MnemosyneArchive()
|
||||
try:
|
||||
results = archive.temporal_neighbors(args.entry_id, window_days=args.days)
|
||||
except KeyError:
|
||||
print(f"Entry not found: {args.entry_id}")
|
||||
sys.exit(1)
|
||||
if not results:
|
||||
print("No temporal neighbors found.")
|
||||
return
|
||||
for entry in results:
|
||||
print(f"[{entry.id[:8]}] {entry.created_at[:10]} {entry.title}")
|
||||
print(f" Topics: {', '.join(entry.topics) if entry.topics else '(none)'}")
|
||||
print()
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(prog="mnemosyne", description="The Living Holographic Archive")
|
||||
sub = parser.add_subparsers(dest="command")
|
||||
@@ -184,6 +254,26 @@ def main():
|
||||
rb = sub.add_parser("rebuild", help="Recompute all links from scratch")
|
||||
rb.add_argument("-t", "--threshold", type=float, default=None, help="Similarity threshold override")
|
||||
|
||||
tg = sub.add_parser("tag", help="Add tags to an existing entry")
|
||||
tg.add_argument("entry_id", help="Entry ID")
|
||||
tg.add_argument("tags", help="Comma-separated tags to add")
|
||||
|
||||
ut = sub.add_parser("untag", help="Remove tags from an existing entry")
|
||||
ut.add_argument("entry_id", help="Entry ID")
|
||||
ut.add_argument("tags", help="Comma-separated tags to remove")
|
||||
|
||||
rt = sub.add_parser("retag", help="Replace all tags on an existing entry")
|
||||
rt.add_argument("entry_id", help="Entry ID")
|
||||
rt.add_argument("tags", help="Comma-separated new tag list")
|
||||
|
||||
tl = sub.add_parser("timeline", help="Show entries within an ISO date range")
|
||||
tl.add_argument("start", help="Start datetime (ISO format, e.g. 2024-01-01 or 2024-01-01T00:00:00Z)")
|
||||
tl.add_argument("end", help="End datetime (ISO format)")
|
||||
|
||||
nb = sub.add_parser("neighbors", help="Show entries temporally near a given entry")
|
||||
nb.add_argument("entry_id", help="Anchor entry ID")
|
||||
nb.add_argument("--days", type=int, default=7, help="Window in days (default: 7)")
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.command:
|
||||
parser.print_help()
|
||||
@@ -201,6 +291,11 @@ def main():
|
||||
"hubs": cmd_hubs,
|
||||
"bridges": cmd_bridges,
|
||||
"rebuild": cmd_rebuild,
|
||||
"tag": cmd_tag,
|
||||
"untag": cmd_untag,
|
||||
"retag": cmd_retag,
|
||||
"timeline": cmd_timeline,
|
||||
"neighbors": cmd_neighbors,
|
||||
}
|
||||
dispatch[args.command](args)
|
||||
|
||||
|
||||
@@ -6,12 +6,19 @@ with metadata, content, and links to related entries.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Optional
|
||||
import uuid
|
||||
|
||||
|
||||
def _compute_content_hash(title: str, content: str) -> str:
|
||||
"""Compute SHA-256 of title+content for deduplication."""
|
||||
raw = f"{title}\x00{content}".encode("utf-8")
|
||||
return hashlib.sha256(raw).hexdigest()
|
||||
|
||||
|
||||
@dataclass
|
||||
class ArchiveEntry:
|
||||
"""A single node in the Mnemosyne holographic archive."""
|
||||
@@ -24,7 +31,13 @@ class ArchiveEntry:
|
||||
topics: list[str] = field(default_factory=list)
|
||||
metadata: dict = field(default_factory=dict)
|
||||
created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||
updated_at: Optional[str] = None # Set on mutation; None means same as created_at
|
||||
links: list[str] = field(default_factory=list) # IDs of related entries
|
||||
content_hash: Optional[str] = None # SHA-256 of title+content for dedup
|
||||
|
||||
def __post_init__(self):
|
||||
if self.content_hash is None:
|
||||
self.content_hash = _compute_content_hash(self.title, self.content)
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
@@ -36,7 +49,9 @@ class ArchiveEntry:
|
||||
"topics": self.topics,
|
||||
"metadata": self.metadata,
|
||||
"created_at": self.created_at,
|
||||
"updated_at": self.updated_at,
|
||||
"links": self.links,
|
||||
"content_hash": self.content_hash,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from datetime import datetime, timezone, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from nexus.mnemosyne.entry import ArchiveEntry
|
||||
@@ -343,3 +344,512 @@ def test_archive_topic_counts():
|
||||
assert counts["automation"] == 2
|
||||
# sorted by count desc — both tied but must be present
|
||||
assert set(counts.keys()) == {"python", "automation"}
|
||||
|
||||
|
||||
# --- Tag management tests ---
|
||||
|
||||
def test_add_tags_basic():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
||||
archive.add_tags(e.id, ["beta", "gamma"])
|
||||
fresh = archive.get(e.id)
|
||||
assert "beta" in fresh.topics
|
||||
assert "gamma" in fresh.topics
|
||||
assert "alpha" in fresh.topics
|
||||
|
||||
|
||||
def test_add_tags_deduplication():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
||||
archive.add_tags(e.id, ["alpha", "ALPHA", "beta"])
|
||||
fresh = archive.get(e.id)
|
||||
lower_topics = [t.lower() for t in fresh.topics]
|
||||
assert lower_topics.count("alpha") == 1
|
||||
assert "beta" in lower_topics
|
||||
|
||||
|
||||
def test_add_tags_missing_entry():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
try:
|
||||
archive.add_tags("nonexistent-id", ["tag"])
|
||||
assert False, "Expected KeyError"
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def test_add_tags_empty_list():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
||||
archive.add_tags(e.id, [])
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.topics == ["alpha"]
|
||||
|
||||
|
||||
def test_remove_tags_basic():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["alpha", "beta", "gamma"])
|
||||
archive.remove_tags(e.id, ["beta"])
|
||||
fresh = archive.get(e.id)
|
||||
assert "beta" not in fresh.topics
|
||||
assert "alpha" in fresh.topics
|
||||
assert "gamma" in fresh.topics
|
||||
|
||||
|
||||
def test_remove_tags_case_insensitive():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["Python", "rust"])
|
||||
archive.remove_tags(e.id, ["PYTHON"])
|
||||
fresh = archive.get(e.id)
|
||||
assert "Python" not in fresh.topics
|
||||
assert "rust" in fresh.topics
|
||||
|
||||
|
||||
def test_remove_tags_missing_tag_silent():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
||||
archive.remove_tags(e.id, ["nope"]) # should not raise
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.topics == ["alpha"]
|
||||
|
||||
|
||||
def test_remove_tags_missing_entry():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
try:
|
||||
archive.remove_tags("nonexistent-id", ["tag"])
|
||||
assert False, "Expected KeyError"
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def test_retag_basic():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["old1", "old2"])
|
||||
archive.retag(e.id, ["new1", "new2"])
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.topics == ["new1", "new2"]
|
||||
|
||||
|
||||
def test_retag_deduplication():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["x"])
|
||||
archive.retag(e.id, ["go", "GO", "rust"])
|
||||
fresh = archive.get(e.id)
|
||||
lower_topics = [t.lower() for t in fresh.topics]
|
||||
assert lower_topics.count("go") == 1
|
||||
assert "rust" in lower_topics
|
||||
|
||||
|
||||
def test_retag_empty_list():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c", topics=["alpha"])
|
||||
archive.retag(e.id, [])
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.topics == []
|
||||
|
||||
|
||||
def test_retag_missing_entry():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
try:
|
||||
archive.retag("nonexistent-id", ["tag"])
|
||||
assert False, "Expected KeyError"
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def test_tag_persistence_across_reload():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
a1 = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(a1, title="T", content="c", topics=["alpha"])
|
||||
a1.add_tags(e.id, ["beta"])
|
||||
a1.remove_tags(e.id, ["alpha"])
|
||||
|
||||
a2 = MnemosyneArchive(archive_path=path)
|
||||
fresh = a2.get(e.id)
|
||||
assert "beta" in fresh.topics
|
||||
assert "alpha" not in fresh.topics
|
||||
|
||||
|
||||
# --- content_hash and updated_at field tests ---
|
||||
|
||||
def test_entry_has_content_hash():
|
||||
e = ArchiveEntry(title="Hello", content="world")
|
||||
assert e.content_hash is not None
|
||||
assert len(e.content_hash) == 64 # SHA-256 hex
|
||||
|
||||
|
||||
def test_entry_content_hash_deterministic():
|
||||
e1 = ArchiveEntry(title="Hello", content="world")
|
||||
e2 = ArchiveEntry(title="Hello", content="world")
|
||||
assert e1.content_hash == e2.content_hash
|
||||
|
||||
|
||||
def test_entry_content_hash_differs_on_different_content():
|
||||
e1 = ArchiveEntry(title="Hello", content="world")
|
||||
e2 = ArchiveEntry(title="Hello", content="different")
|
||||
assert e1.content_hash != e2.content_hash
|
||||
|
||||
|
||||
def test_entry_updated_at_defaults_none():
|
||||
e = ArchiveEntry(title="T", content="c")
|
||||
assert e.updated_at is None
|
||||
|
||||
|
||||
def test_entry_roundtrip_includes_new_fields():
|
||||
e = ArchiveEntry(title="T", content="c")
|
||||
d = e.to_dict()
|
||||
assert "content_hash" in d
|
||||
assert "updated_at" in d
|
||||
e2 = ArchiveEntry.from_dict(d)
|
||||
assert e2.content_hash == e.content_hash
|
||||
assert e2.updated_at == e.updated_at
|
||||
|
||||
|
||||
# --- content deduplication tests ---
|
||||
|
||||
def test_add_deduplication_same_content():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e1 = ingest_event(archive, title="Dup", content="Same content here")
|
||||
e2 = ingest_event(archive, title="Dup", content="Same content here")
|
||||
# Should NOT have created a second entry
|
||||
assert archive.count == 1
|
||||
assert e1.id == e2.id
|
||||
|
||||
|
||||
def test_add_deduplication_different_content():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
ingest_event(archive, title="A", content="Content one")
|
||||
ingest_event(archive, title="B", content="Content two")
|
||||
assert archive.count == 2
|
||||
|
||||
|
||||
def test_find_duplicate_returns_existing():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e1 = ingest_event(archive, title="Dup", content="Same content here")
|
||||
probe = ArchiveEntry(title="Dup", content="Same content here")
|
||||
dup = archive.find_duplicate(probe)
|
||||
assert dup is not None
|
||||
assert dup.id == e1.id
|
||||
|
||||
|
||||
def test_find_duplicate_returns_none_for_unique():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
ingest_event(archive, title="A", content="Some content")
|
||||
probe = ArchiveEntry(title="B", content="Totally different content")
|
||||
assert archive.find_duplicate(probe) is None
|
||||
|
||||
|
||||
def test_find_duplicate_empty_archive():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
probe = ArchiveEntry(title="X", content="y")
|
||||
assert archive.find_duplicate(probe) is None
|
||||
|
||||
|
||||
# --- update_entry tests ---
|
||||
|
||||
def test_update_entry_title():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="Old title", content="Some content")
|
||||
archive.update_entry(e.id, title="New title")
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.title == "New title"
|
||||
assert fresh.content == "Some content"
|
||||
|
||||
|
||||
def test_update_entry_content():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="Old content")
|
||||
archive.update_entry(e.id, content="New content")
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.content == "New content"
|
||||
|
||||
|
||||
def test_update_entry_metadata():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c")
|
||||
archive.update_entry(e.id, metadata={"key": "value"})
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.metadata["key"] == "value"
|
||||
|
||||
|
||||
def test_update_entry_bumps_updated_at():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c")
|
||||
assert e.updated_at is None
|
||||
archive.update_entry(e.id, title="Updated")
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.updated_at is not None
|
||||
|
||||
|
||||
def test_update_entry_refreshes_content_hash():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="Original content")
|
||||
old_hash = e.content_hash
|
||||
archive.update_entry(e.id, content="Completely new content")
|
||||
fresh = archive.get(e.id)
|
||||
assert fresh.content_hash != old_hash
|
||||
|
||||
|
||||
def test_update_entry_missing_raises():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
try:
|
||||
archive.update_entry("nonexistent-id", title="X")
|
||||
assert False, "Expected KeyError"
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def test_update_entry_persists_across_reload():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
a1 = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(a1, title="Before", content="Before content")
|
||||
a1.update_entry(e.id, title="After", content="After content")
|
||||
|
||||
a2 = MnemosyneArchive(archive_path=path)
|
||||
fresh = a2.get(e.id)
|
||||
assert fresh.title == "After"
|
||||
assert fresh.content == "After content"
|
||||
assert fresh.updated_at is not None
|
||||
|
||||
|
||||
def test_update_entry_no_change_no_crash():
|
||||
"""Calling update_entry with all None args should not fail."""
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
path = Path(tmp) / "test_archive.json"
|
||||
archive = MnemosyneArchive(archive_path=path)
|
||||
e = ingest_event(archive, title="T", content="c")
|
||||
result = archive.update_entry(e.id)
|
||||
assert result.title == "T"
|
||||
|
||||
|
||||
# --- by_date_range tests ---
|
||||
|
||||
def _make_entry_at(archive: MnemosyneArchive, title: str, dt: datetime) -> ArchiveEntry:
|
||||
"""Helper: ingest an entry and backdate its created_at."""
|
||||
e = ingest_event(archive, title=title, content=title)
|
||||
e.created_at = dt.isoformat()
|
||||
archive._save()
|
||||
return e
|
||||
|
||||
|
||||
def test_by_date_range_empty_archive():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
results = archive.by_date_range("2024-01-01", "2024-12-31")
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_by_date_range_returns_matching_entries():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
jan = datetime(2024, 1, 15, tzinfo=timezone.utc)
|
||||
mar = datetime(2024, 3, 10, tzinfo=timezone.utc)
|
||||
jun = datetime(2024, 6, 1, tzinfo=timezone.utc)
|
||||
e1 = _make_entry_at(archive, "Jan entry", jan)
|
||||
e2 = _make_entry_at(archive, "Mar entry", mar)
|
||||
e3 = _make_entry_at(archive, "Jun entry", jun)
|
||||
|
||||
results = archive.by_date_range("2024-01-01", "2024-04-01")
|
||||
ids = {e.id for e in results}
|
||||
assert e1.id in ids
|
||||
assert e2.id in ids
|
||||
assert e3.id not in ids
|
||||
|
||||
|
||||
def test_by_date_range_boundary_inclusive():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
exact = datetime(2024, 3, 1, tzinfo=timezone.utc)
|
||||
e = _make_entry_at(archive, "Exact boundary", exact)
|
||||
|
||||
results = archive.by_date_range("2024-03-01T00:00:00+00:00", "2024-03-01T00:00:00+00:00")
|
||||
assert len(results) == 1
|
||||
assert results[0].id == e.id
|
||||
|
||||
|
||||
def test_by_date_range_no_results():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
jan = datetime(2024, 1, 15, tzinfo=timezone.utc)
|
||||
_make_entry_at(archive, "Jan entry", jan)
|
||||
|
||||
results = archive.by_date_range("2023-01-01", "2023-12-31")
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_by_date_range_timezone_naive_treated_as_utc():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
dt = datetime(2024, 6, 15, tzinfo=timezone.utc)
|
||||
e = _make_entry_at(archive, "Summer", dt)
|
||||
|
||||
# Timezone-naive start/end should still match
|
||||
results = archive.by_date_range("2024-06-01", "2024-07-01")
|
||||
assert any(r.id == e.id for r in results)
|
||||
|
||||
|
||||
def test_by_date_range_sorted_ascending():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
dates = [
|
||||
datetime(2024, 3, 5, tzinfo=timezone.utc),
|
||||
datetime(2024, 1, 10, tzinfo=timezone.utc),
|
||||
datetime(2024, 2, 20, tzinfo=timezone.utc),
|
||||
]
|
||||
for i, dt in enumerate(dates):
|
||||
_make_entry_at(archive, f"Entry {i}", dt)
|
||||
|
||||
results = archive.by_date_range("2024-01-01", "2024-12-31")
|
||||
assert len(results) == 3
|
||||
assert results[0].created_at < results[1].created_at < results[2].created_at
|
||||
|
||||
|
||||
def test_by_date_range_single_entry_archive():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
dt = datetime(2024, 5, 1, tzinfo=timezone.utc)
|
||||
e = _make_entry_at(archive, "Only", dt)
|
||||
|
||||
assert archive.by_date_range("2024-01-01", "2024-12-31") == [e]
|
||||
assert archive.by_date_range("2025-01-01", "2025-12-31") == []
|
||||
|
||||
|
||||
# --- temporal_neighbors tests ---
|
||||
|
||||
def test_temporal_neighbors_empty_archive():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
e = ingest_event(archive, title="Lone", content="c")
|
||||
results = archive.temporal_neighbors(e.id, window_days=7)
|
||||
assert results == []
|
||||
|
||||
|
||||
def test_temporal_neighbors_missing_entry_raises():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
try:
|
||||
archive.temporal_neighbors("nonexistent-id")
|
||||
assert False, "Expected KeyError"
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def test_temporal_neighbors_returns_within_window():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
anchor_dt = datetime(2024, 4, 10, tzinfo=timezone.utc)
|
||||
near_dt = datetime(2024, 4, 14, tzinfo=timezone.utc) # +4 days — within 7
|
||||
far_dt = datetime(2024, 4, 20, tzinfo=timezone.utc) # +10 days — outside 7
|
||||
|
||||
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
||||
near = _make_entry_at(archive, "Near", near_dt)
|
||||
far = _make_entry_at(archive, "Far", far_dt)
|
||||
|
||||
results = archive.temporal_neighbors(anchor.id, window_days=7)
|
||||
ids = {e.id for e in results}
|
||||
assert near.id in ids
|
||||
assert far.id not in ids
|
||||
assert anchor.id not in ids
|
||||
|
||||
|
||||
def test_temporal_neighbors_excludes_anchor():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
dt = datetime(2024, 4, 10, tzinfo=timezone.utc)
|
||||
anchor = _make_entry_at(archive, "Anchor", dt)
|
||||
same = _make_entry_at(archive, "Same day", dt)
|
||||
|
||||
results = archive.temporal_neighbors(anchor.id, window_days=0)
|
||||
ids = {e.id for e in results}
|
||||
assert anchor.id not in ids
|
||||
assert same.id in ids
|
||||
|
||||
|
||||
def test_temporal_neighbors_custom_window():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
anchor_dt = datetime(2024, 4, 10, tzinfo=timezone.utc)
|
||||
within_3 = datetime(2024, 4, 12, tzinfo=timezone.utc) # +2 days
|
||||
outside_3 = datetime(2024, 4, 15, tzinfo=timezone.utc) # +5 days
|
||||
|
||||
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
||||
e_near = _make_entry_at(archive, "Near", within_3)
|
||||
e_far = _make_entry_at(archive, "Far", outside_3)
|
||||
|
||||
results = archive.temporal_neighbors(anchor.id, window_days=3)
|
||||
ids = {e.id for e in results}
|
||||
assert e_near.id in ids
|
||||
assert e_far.id not in ids
|
||||
|
||||
|
||||
def test_temporal_neighbors_sorted_ascending():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
anchor_dt = datetime(2024, 6, 15, tzinfo=timezone.utc)
|
||||
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
||||
for offset in [5, 1, 3]:
|
||||
_make_entry_at(archive, f"Offset {offset}", anchor_dt + timedelta(days=offset))
|
||||
|
||||
results = archive.temporal_neighbors(anchor.id, window_days=7)
|
||||
assert len(results) == 3
|
||||
assert results[0].created_at < results[1].created_at < results[2].created_at
|
||||
|
||||
|
||||
def test_temporal_neighbors_boundary_inclusive():
|
||||
with tempfile.TemporaryDirectory() as tmp:
|
||||
archive = MnemosyneArchive(archive_path=Path(tmp) / "a.json")
|
||||
anchor_dt = datetime(2024, 6, 15, tzinfo=timezone.utc)
|
||||
boundary_dt = anchor_dt + timedelta(days=7) # exactly at window edge
|
||||
|
||||
anchor = _make_entry_at(archive, "Anchor", anchor_dt)
|
||||
boundary = _make_entry_at(archive, "Boundary", boundary_dt)
|
||||
|
||||
results = archive.temporal_neighbors(anchor.id, window_days=7)
|
||||
assert any(r.id == boundary.id for r in results)
|
||||
|
||||
Reference in New Issue
Block a user