""" AP Knowledge Base - Memory Directory Core Hierarchical memory storage with aging, relevance scoring, and consolidation. Based on the memdir pattern from Claude Code. """ import os import json import shutil from pathlib import Path from datetime import datetime from typing import List, Dict, Optional, Tuple, Set, Callable from collections import defaultdict import fnmatch from memory_types import ( MemoryEntry, MemoryQuery, MemoryType, MemoryScope, MemoryIndex, RelationshipType ) class MemoryDirectory: """ Core memory directory for hierarchical memory storage. Memory layout: / MEMORY.md # Index file facts/ # Fact memories procedures/ # Procedure memories observations/ # Observation memories lessons/ # Lesson memories archive/ # Archived memories relationships.json # Memory relationship graph """ MAX_INDEX_LINES = 200 MAX_INDEX_BYTES = 25000 MAX_MEMORY_FILES = 200 CONSOLIDATION_THRESHOLD = 0.85 # Similarity threshold for merging def __init__(self, base_dir: Path): self.base_dir = Path(base_dir) self._ensure_directories() self._memories: Dict[str, MemoryEntry] = {} self._relationships: Dict[str, List[Tuple[str, RelationshipType]]] = defaultdict(list) self._load_all() def _ensure_directories(self): """Create directory structure if it doesn't exist.""" dirs = ['facts', 'procedures', 'observations', 'lessons', 'archive'] for d in dirs: (self.base_dir / d).mkdir(parents=True, exist_ok=True) def _type_dir(self, mem_type: MemoryType) -> Path: """Get the directory for a memory type.""" return self.base_dir / f"{mem_type.value}s" def _memory_path(self, entry: MemoryEntry) -> Path: """Generate file path for a memory entry.""" safe_name = "".join(c if c.isalnum() or c in '-_' else '_' for c in entry.name) safe_name = safe_name[:50] # Limit length return self._type_dir(entry.type) / f"{entry.id}_{safe_name}.md" def _load_all(self): """Load all memories from disk.""" self._memories.clear() for mem_type in MemoryType: type_dir = self._type_dir(mem_type) if not type_dir.exists(): continue for file_path in type_dir.glob("*.md"): try: content = file_path.read_text(encoding='utf-8') entry = MemoryEntry.from_frontmatter(content, file_path) self._memories[entry.id] = entry except Exception as e: print(f"Warning: Failed to load {file_path}: {e}") # Load relationships rel_path = self.base_dir / "relationships.json" if rel_path.exists(): try: data = json.loads(rel_path.read_text()) for rel in data: self._relationships[rel['source']].append( (rel['target'], RelationshipType(rel['type'])) ) except Exception as e: print(f"Warning: Failed to load relationships: {e}") def save(self, entry: MemoryEntry) -> Path: """ Save a memory entry to disk. Returns: Path to the saved file """ entry.modified_at = datetime.utcnow() # Update file path entry.file_path = self._memory_path(entry) # Write to disk entry.file_path.parent.mkdir(parents=True, exist_ok=True) entry.file_path.write_text(entry.to_frontmatter(), encoding='utf-8') # Update cache self._memories[entry.id] = entry # Update index self._update_index() return entry.file_path def get(self, memory_id: str) -> Optional[MemoryEntry]: """Get a memory by ID.""" entry = self._memories.get(memory_id) if entry: entry.touch() return entry def delete(self, memory_id: str) -> bool: """Delete a memory by ID.""" entry = self._memories.get(memory_id) if not entry: return False # Remove file if entry.file_path and entry.file_path.exists(): entry.file_path.unlink() # Remove from cache del self._memories[memory_id] # Remove relationships if memory_id in self._relationships: del self._relationships[memory_id] for src, rels in self._relationships.items(): self._relationships[src] = [(t, r) for t, r in rels if t != memory_id] self._save_relationships() self._update_index() return True def archive(self, memory_id: str) -> bool: """Move a memory to the archive.""" entry = self._memories.get(memory_id) if not entry or not entry.file_path: return False archive_dir = self.base_dir / "archive" archive_dir.mkdir(exist_ok=True) new_path = archive_dir / entry.file_path.name shutil.move(str(entry.file_path), str(new_path)) entry.file_path = new_path entry.metadata['archived'] = True entry.metadata['archived_at'] = datetime.utcnow().isoformat() return True def search(self, query: MemoryQuery) -> List[Tuple[MemoryEntry, float]]: """ Search memories matching the query. Returns: List of (entry, score) tuples sorted by score """ results = [] for entry in self._memories.values(): if not query.matches(entry): continue # Calculate score based on sort criteria if query.sort_by == "relevance": score = entry.relevance_score(query.text) # Boost by freshness score = score * 0.7 + entry.freshness_score() * 0.3 elif query.sort_by == "freshness": score = entry.freshness_score() elif query.sort_by == "access": score = min(1.0, entry.access_count / 10.0) else: # date score = 1.0 / (1 + entry.age_days()) results.append((entry, score)) # Sort by score descending results.sort(key=lambda x: x[1], reverse=True) return results[:query.limit] def find_by_tags(self, tags: List[str], match_all: bool = False) -> List[MemoryEntry]: """Find memories by tags.""" tag_set = set(t.lower() for t in tags) results = [] for entry in self._memories.values(): entry_tags = set(t.lower() for t in entry.tags) if match_all: if tag_set <= entry_tags: results.append(entry) else: if tag_set & entry_tags: results.append(entry) return results def find_similar(self, entry: MemoryEntry, threshold: float = 0.8) -> List[MemoryEntry]: """ Find memories similar to the given entry. Uses simple text similarity based on name, description, and content. """ similar = [] for other in self._memories.values(): if other.id == entry.id: continue score = self._similarity(entry, other) if score >= threshold: similar.append(other) return similar def _similarity(self, a: MemoryEntry, b: MemoryEntry) -> float: """Calculate similarity between two memories (0.0-1.0).""" # Simple word-based Jaccard similarity def get_words(text: str) -> Set[str]: return set(w.lower() for w in text.split() if len(w) > 2) a_words = get_words(a.name + " " + a.description + " " + a.content[:500]) b_words = get_words(b.name + " " + b.description + " " + b.content[:500]) if not a_words or not b_words: return 0.0 intersection = len(a_words & b_words) union = len(a_words | b_words) return intersection / union if union > 0 else 0.0 def consolidate(self, dry_run: bool = False) -> List[Tuple[MemoryEntry, MemoryEntry]]: """ Consolidate similar memories by merging them. Returns: List of merged (old, new) pairs """ merged = [] to_delete = set() entries = list(self._memories.values()) for i, entry in enumerate(entries): if entry.id in to_delete: continue for other in entries[i+1:]: if other.id in to_delete: continue similarity = self._similarity(entry, other) if similarity >= self.CONSOLIDATION_THRESHOLD: # Merge other into entry if not dry_run: # Combine content entry.content = self._merge_content(entry, other) # Combine tags entry.tags = list(set(entry.tags + other.tags)) # Update description if len(other.description) > len(entry.description): entry.description = other.description # Boost confidence entry.confidence = max(entry.confidence, other.confidence) entry.modified_at = datetime.utcnow() # Save merged entry self.save(entry) # Delete duplicate self.delete(other.id) merged.append((other, entry)) to_delete.add(other.id) return merged def _merge_content(self, a: MemoryEntry, b: MemoryEntry) -> str: """Merge content from two memories intelligently.""" if a.content == b.content: return a.content # Simple merge: combine unique lines a_lines = set(a.content.strip().split('\n')) b_lines = set(b.content.strip().split('\n')) merged = list(a_lines | b_lines) return '\n'.join(merged) def get_stale_memories(self, max_age_days: int = 90) -> List[MemoryEntry]: """Get memories older than the specified age.""" cutoff = datetime.utcnow().timestamp() - (max_age_days * 86400) stale = [] for entry in self._memories.values(): if entry.created_at.timestamp() < cutoff: stale.append(entry) return stale def cleanup_archive(self, max_age_days: int = 365) -> int: """ Permanently delete archived memories older than max_age_days. Returns: Number of memories deleted """ archive_dir = self.base_dir / "archive" if not archive_dir.exists(): return 0 cutoff = datetime.utcnow().timestamp() - (max_age_days * 86400) deleted = 0 for file_path in archive_dir.glob("*.md"): try: mtime = file_path.stat().st_mtime if mtime < cutoff: file_path.unlink() deleted += 1 except Exception as e: print(f"Warning: Failed to delete {file_path}: {e}") return deleted def _update_index(self): """Update the MEMORY.md index file.""" index_path = self.base_dir / "MEMORY.md" # Build index entries entries = [] for entry in self._memories.values(): if entry.file_path: rel_path = entry.file_path.relative_to(self.base_dir) entries.append(MemoryIndex( name=entry.name, file_path=rel_path, description=entry.description, type=entry.type, created_at=entry.created_at )) # Sort by date descending entries.sort(key=lambda x: x.created_at, reverse=True) # Build content lines = ["# AP Knowledge Base - Memory Index", ""] lines.append("This is an auto-generated index of memories. Do not edit manually.") lines.append("") lines.append("## Recent Memories") lines.append("") for idx in entries[:self.MAX_INDEX_LINES]: lines.append(idx.to_line()) if len(entries) > self.MAX_INDEX_LINES: lines.append(f"\n... and {len(entries) - self.MAX_INDEX_LINES} more memories") lines.append("") lines.append("## Memory Types") lines.append("") for mem_type in MemoryType: count = sum(1 for e in self._memories.values() if e.type == mem_type) lines.append(f"- **{mem_type.value}**: {count} memories") content = '\n'.join(lines) # Truncate if too large if len(content) > self.MAX_INDEX_BYTES: content = content[:self.MAX_INDEX_BYTES] content += f"\n\n> WARNING: Index truncated ({len(content)} bytes, limit: {self.MAX_INDEX_BYTES})" index_path.write_text(content, encoding='utf-8') def _save_relationships(self): """Save relationships to disk.""" rel_path = self.base_dir / "relationships.json" data = [] for source, targets in self._relationships.items(): for target, rel_type in targets: data.append({ 'source': source, 'target': target, 'type': rel_type.value }) rel_path.write_text(json.dumps(data, indent=2), encoding='utf-8') def get_stats(self) -> Dict: """Get statistics about the memory directory.""" stats = { 'total_memories': len(self._memories), 'by_type': {}, 'by_scope': {}, 'by_source': {}, 'total_relationships': sum(len(r) for r in self._relationships.values()), 'avg_age_days': 0, 'avg_confidence': 0.0, } total_age = 0 total_confidence = 0.0 for entry in self._memories.values(): t = entry.type.value s = entry.scope.value src = entry.source stats['by_type'][t] = stats['by_type'].get(t, 0) + 1 stats['by_scope'][s] = stats['by_scope'].get(s, 0) + 1 stats['by_source'][src] = stats['by_source'].get(src, 0) + 1 total_age += entry.age_days() total_confidence += entry.confidence if self._memories: stats['avg_age_days'] = total_age / len(self._memories) stats['avg_confidence'] = total_confidence / len(self._memories) return stats def export_all(self) -> Dict: """Export all memories as a dictionary.""" return { 'memories': [entry.to_frontmatter() for entry in self._memories.values()], 'relationships': [ { 'source': src, 'target': tgt, 'type': rel.value } for src, rels in self._relationships.items() for tgt, rel in rels ], 'exported_at': datetime.utcnow().isoformat(), 'version': '1.0' } def import_data(self, data: Dict, overwrite: bool = False) -> Tuple[int, int]: """ Import memories from a dictionary. Returns: (imported_count, skipped_count) """ imported = 0 skipped = 0 for mem_text in data.get('memories', []): try: entry = MemoryEntry.from_frontmatter(mem_text) if entry.id in self._memories and not overwrite: skipped += 1 continue self.save(entry) imported += 1 except Exception as e: print(f"Warning: Failed to import memory: {e}") skipped += 1 # Import relationships for rel in data.get('relationships', []): self._relationships[rel['source']].append( (rel['target'], RelationshipType(rel['type'])) ) self._save_relationships() return imported, skipped