wip: add update_entry, find_by_hash, find_duplicates, skip_dups to archive

This commit is contained in:
Alexander Whitestone
2026-04-11 19:38:17 -04:00
parent d7343d1be2
commit 5e26ee0a7d
2 changed files with 93 additions and 2 deletions

Binary file not shown.

View File

@@ -49,8 +49,22 @@ class MnemosyneArchive:
with open(self.path, "w") as f:
json.dump(data, f, indent=2)
def add(self, entry: ArchiveEntry, auto_link: bool = True) -> ArchiveEntry:
"""Add an entry to the archive. Auto-links to related entries."""
def add(self, entry: ArchiveEntry, auto_link: bool = True, skip_dups: bool = False) -> ArchiveEntry:
"""Add an entry to the archive. Auto-links to related entries.
Args:
entry: The entry to add.
auto_link: Whether to automatically compute holographic links.
skip_dups: If True, return existing entry instead of adding a duplicate
(same title+content hash).
Returns:
The added (or existing, if skip_dups=True and duplicate found) entry.
"""
if skip_dups:
existing = self.find_by_hash(entry.content_hash)
if existing:
return existing
self._entries[entry.id] = entry
if auto_link:
self.linker.apply_links(entry, list(self._entries.values()))
@@ -581,6 +595,83 @@ class MnemosyneArchive:
self._save()
return entry
def update_entry(
self,
entry_id: str,
title: Optional[str] = None,
content: Optional[str] = None,
metadata: Optional[dict] = None,
re_link: bool = True,
) -> ArchiveEntry:
"""Update fields on an existing entry.
Only provided fields are changed. Bumps updated_at and optionally
recomputes holographic links (since content changed).
Args:
entry_id: ID of the entry to update.
title: New title (None = keep existing).
content: New content (None = keep existing).
metadata: New metadata dict (None = keep existing, {} to clear).
re_link: Whether to recompute holographic links after update.
Returns:
The updated ArchiveEntry.
Raises:
KeyError: If entry_id does not exist.
"""
entry = self._entries.get(entry_id)
if entry is None:
raise KeyError(entry_id)
old_hash = entry.content_hash
if title is not None:
entry.title = title
if content is not None:
entry.content = content
if metadata is not None:
entry.metadata = metadata
entry.touch()
# Re-link only if content actually changed
if re_link and entry.content_hash != old_hash:
# Clear existing links to this entry from others
for other in self._entries.values():
if entry_id in other.links:
other.links.remove(entry_id)
entry.links = []
# Re-apply
self.linker.apply_links(entry, list(self._entries.values()))
self._save()
return entry
def find_by_hash(self, content_hash: str) -> Optional[ArchiveEntry]:
"""Find an entry by its content hash (title + content SHA-256).
Returns the first match, or None if no entry has this hash.
"""
for entry in self._entries.values():
if entry.content_hash == content_hash:
return entry
return None
def find_duplicates(self) -> list[list[ArchiveEntry]]:
"""Find groups of entries with identical content hashes.
Returns a list of groups, where each group is a list of 2+ entries
sharing the same title+content. Sorted by group size descending.
"""
hash_groups: dict[str, list[ArchiveEntry]] = {}
for entry in self._entries.values():
h = entry.content_hash
hash_groups.setdefault(h, []).append(entry)
dups = [group for group in hash_groups.values() if len(group) > 1]
dups.sort(key=lambda g: len(g), reverse=True)
return dups
def rebuild_links(self, threshold: Optional[float] = None) -> int:
"""Recompute all links from scratch.