diff --git a/nexus/mnemosyne/__pycache__/archive.cpython-311.pyc b/nexus/mnemosyne/__pycache__/archive.cpython-311.pyc new file mode 100644 index 0000000..aee0d23 Binary files /dev/null and b/nexus/mnemosyne/__pycache__/archive.cpython-311.pyc differ diff --git a/nexus/mnemosyne/archive.py b/nexus/mnemosyne/archive.py index 6a13b20..4e65b13 100644 --- a/nexus/mnemosyne/archive.py +++ b/nexus/mnemosyne/archive.py @@ -49,8 +49,22 @@ class MnemosyneArchive: with open(self.path, "w") as f: json.dump(data, f, indent=2) - def add(self, entry: ArchiveEntry, auto_link: bool = True) -> ArchiveEntry: - """Add an entry to the archive. Auto-links to related entries.""" + def add(self, entry: ArchiveEntry, auto_link: bool = True, skip_dups: bool = False) -> ArchiveEntry: + """Add an entry to the archive. Auto-links to related entries. + + Args: + entry: The entry to add. + auto_link: Whether to automatically compute holographic links. + skip_dups: If True, return existing entry instead of adding a duplicate + (same title+content hash). + + Returns: + The added (or existing, if skip_dups=True and duplicate found) entry. + """ + if skip_dups: + existing = self.find_by_hash(entry.content_hash) + if existing: + return existing self._entries[entry.id] = entry if auto_link: self.linker.apply_links(entry, list(self._entries.values())) @@ -581,6 +595,83 @@ class MnemosyneArchive: self._save() return entry + def update_entry( + self, + entry_id: str, + title: Optional[str] = None, + content: Optional[str] = None, + metadata: Optional[dict] = None, + re_link: bool = True, + ) -> ArchiveEntry: + """Update fields on an existing entry. + + Only provided fields are changed. Bumps updated_at and optionally + recomputes holographic links (since content changed). + + Args: + entry_id: ID of the entry to update. + title: New title (None = keep existing). + content: New content (None = keep existing). + metadata: New metadata dict (None = keep existing, {} to clear). + re_link: Whether to recompute holographic links after update. + + Returns: + The updated ArchiveEntry. + + Raises: + KeyError: If entry_id does not exist. + """ + entry = self._entries.get(entry_id) + if entry is None: + raise KeyError(entry_id) + + old_hash = entry.content_hash + + if title is not None: + entry.title = title + if content is not None: + entry.content = content + if metadata is not None: + entry.metadata = metadata + entry.touch() + + # Re-link only if content actually changed + if re_link and entry.content_hash != old_hash: + # Clear existing links to this entry from others + for other in self._entries.values(): + if entry_id in other.links: + other.links.remove(entry_id) + entry.links = [] + # Re-apply + self.linker.apply_links(entry, list(self._entries.values())) + + self._save() + return entry + + def find_by_hash(self, content_hash: str) -> Optional[ArchiveEntry]: + """Find an entry by its content hash (title + content SHA-256). + + Returns the first match, or None if no entry has this hash. + """ + for entry in self._entries.values(): + if entry.content_hash == content_hash: + return entry + return None + + def find_duplicates(self) -> list[list[ArchiveEntry]]: + """Find groups of entries with identical content hashes. + + Returns a list of groups, where each group is a list of 2+ entries + sharing the same title+content. Sorted by group size descending. + """ + hash_groups: dict[str, list[ArchiveEntry]] = {} + for entry in self._entries.values(): + h = entry.content_hash + hash_groups.setdefault(h, []).append(entry) + dups = [group for group in hash_groups.values() if len(group) > 1] + dups.sort(key=lambda g: len(g), reverse=True) + return dups + def rebuild_links(self, threshold: Optional[float] = None) -> int: """Recompute all links from scratch.