Files
timmy-config/wizards/allegro-primus/knowledge/memdir.py
2026-03-31 20:02:01 +00:00

494 lines
17 KiB
Python

"""
AP Knowledge Base - Memory Directory Core
Hierarchical memory storage with aging, relevance scoring, and consolidation.
Based on the memdir pattern from Claude Code.
"""
import os
import json
import shutil
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Optional, Tuple, Set, Callable
from collections import defaultdict
import fnmatch
from memory_types import (
MemoryEntry, MemoryQuery, MemoryType, MemoryScope,
MemoryIndex, RelationshipType
)
class MemoryDirectory:
"""
Core memory directory for hierarchical memory storage.
Memory layout:
<base_dir>/
MEMORY.md # Index file
facts/ # Fact memories
procedures/ # Procedure memories
observations/ # Observation memories
lessons/ # Lesson memories
archive/ # Archived memories
relationships.json # Memory relationship graph
"""
MAX_INDEX_LINES = 200
MAX_INDEX_BYTES = 25000
MAX_MEMORY_FILES = 200
CONSOLIDATION_THRESHOLD = 0.85 # Similarity threshold for merging
def __init__(self, base_dir: Path):
self.base_dir = Path(base_dir)
self._ensure_directories()
self._memories: Dict[str, MemoryEntry] = {}
self._relationships: Dict[str, List[Tuple[str, RelationshipType]]] = defaultdict(list)
self._load_all()
def _ensure_directories(self):
"""Create directory structure if it doesn't exist."""
dirs = ['facts', 'procedures', 'observations', 'lessons', 'archive']
for d in dirs:
(self.base_dir / d).mkdir(parents=True, exist_ok=True)
def _type_dir(self, mem_type: MemoryType) -> Path:
"""Get the directory for a memory type."""
return self.base_dir / f"{mem_type.value}s"
def _memory_path(self, entry: MemoryEntry) -> Path:
"""Generate file path for a memory entry."""
safe_name = "".join(c if c.isalnum() or c in '-_' else '_' for c in entry.name)
safe_name = safe_name[:50] # Limit length
return self._type_dir(entry.type) / f"{entry.id}_{safe_name}.md"
def _load_all(self):
"""Load all memories from disk."""
self._memories.clear()
for mem_type in MemoryType:
type_dir = self._type_dir(mem_type)
if not type_dir.exists():
continue
for file_path in type_dir.glob("*.md"):
try:
content = file_path.read_text(encoding='utf-8')
entry = MemoryEntry.from_frontmatter(content, file_path)
self._memories[entry.id] = entry
except Exception as e:
print(f"Warning: Failed to load {file_path}: {e}")
# Load relationships
rel_path = self.base_dir / "relationships.json"
if rel_path.exists():
try:
data = json.loads(rel_path.read_text())
for rel in data:
self._relationships[rel['source']].append(
(rel['target'], RelationshipType(rel['type']))
)
except Exception as e:
print(f"Warning: Failed to load relationships: {e}")
def save(self, entry: MemoryEntry) -> Path:
"""
Save a memory entry to disk.
Returns:
Path to the saved file
"""
entry.modified_at = datetime.utcnow()
# Update file path
entry.file_path = self._memory_path(entry)
# Write to disk
entry.file_path.parent.mkdir(parents=True, exist_ok=True)
entry.file_path.write_text(entry.to_frontmatter(), encoding='utf-8')
# Update cache
self._memories[entry.id] = entry
# Update index
self._update_index()
return entry.file_path
def get(self, memory_id: str) -> Optional[MemoryEntry]:
"""Get a memory by ID."""
entry = self._memories.get(memory_id)
if entry:
entry.touch()
return entry
def delete(self, memory_id: str) -> bool:
"""Delete a memory by ID."""
entry = self._memories.get(memory_id)
if not entry:
return False
# Remove file
if entry.file_path and entry.file_path.exists():
entry.file_path.unlink()
# Remove from cache
del self._memories[memory_id]
# Remove relationships
if memory_id in self._relationships:
del self._relationships[memory_id]
for src, rels in self._relationships.items():
self._relationships[src] = [(t, r) for t, r in rels if t != memory_id]
self._save_relationships()
self._update_index()
return True
def archive(self, memory_id: str) -> bool:
"""Move a memory to the archive."""
entry = self._memories.get(memory_id)
if not entry or not entry.file_path:
return False
archive_dir = self.base_dir / "archive"
archive_dir.mkdir(exist_ok=True)
new_path = archive_dir / entry.file_path.name
shutil.move(str(entry.file_path), str(new_path))
entry.file_path = new_path
entry.metadata['archived'] = True
entry.metadata['archived_at'] = datetime.utcnow().isoformat()
return True
def search(self, query: MemoryQuery) -> List[Tuple[MemoryEntry, float]]:
"""
Search memories matching the query.
Returns:
List of (entry, score) tuples sorted by score
"""
results = []
for entry in self._memories.values():
if not query.matches(entry):
continue
# Calculate score based on sort criteria
if query.sort_by == "relevance":
score = entry.relevance_score(query.text)
# Boost by freshness
score = score * 0.7 + entry.freshness_score() * 0.3
elif query.sort_by == "freshness":
score = entry.freshness_score()
elif query.sort_by == "access":
score = min(1.0, entry.access_count / 10.0)
else: # date
score = 1.0 / (1 + entry.age_days())
results.append((entry, score))
# Sort by score descending
results.sort(key=lambda x: x[1], reverse=True)
return results[:query.limit]
def find_by_tags(self, tags: List[str], match_all: bool = False) -> List[MemoryEntry]:
"""Find memories by tags."""
tag_set = set(t.lower() for t in tags)
results = []
for entry in self._memories.values():
entry_tags = set(t.lower() for t in entry.tags)
if match_all:
if tag_set <= entry_tags:
results.append(entry)
else:
if tag_set & entry_tags:
results.append(entry)
return results
def find_similar(self, entry: MemoryEntry, threshold: float = 0.8) -> List[MemoryEntry]:
"""
Find memories similar to the given entry.
Uses simple text similarity based on name, description, and content.
"""
similar = []
for other in self._memories.values():
if other.id == entry.id:
continue
score = self._similarity(entry, other)
if score >= threshold:
similar.append(other)
return similar
def _similarity(self, a: MemoryEntry, b: MemoryEntry) -> float:
"""Calculate similarity between two memories (0.0-1.0)."""
# Simple word-based Jaccard similarity
def get_words(text: str) -> Set[str]:
return set(w.lower() for w in text.split() if len(w) > 2)
a_words = get_words(a.name + " " + a.description + " " + a.content[:500])
b_words = get_words(b.name + " " + b.description + " " + b.content[:500])
if not a_words or not b_words:
return 0.0
intersection = len(a_words & b_words)
union = len(a_words | b_words)
return intersection / union if union > 0 else 0.0
def consolidate(self, dry_run: bool = False) -> List[Tuple[MemoryEntry, MemoryEntry]]:
"""
Consolidate similar memories by merging them.
Returns:
List of merged (old, new) pairs
"""
merged = []
to_delete = set()
entries = list(self._memories.values())
for i, entry in enumerate(entries):
if entry.id in to_delete:
continue
for other in entries[i+1:]:
if other.id in to_delete:
continue
similarity = self._similarity(entry, other)
if similarity >= self.CONSOLIDATION_THRESHOLD:
# Merge other into entry
if not dry_run:
# Combine content
entry.content = self._merge_content(entry, other)
# Combine tags
entry.tags = list(set(entry.tags + other.tags))
# Update description
if len(other.description) > len(entry.description):
entry.description = other.description
# Boost confidence
entry.confidence = max(entry.confidence, other.confidence)
entry.modified_at = datetime.utcnow()
# Save merged entry
self.save(entry)
# Delete duplicate
self.delete(other.id)
merged.append((other, entry))
to_delete.add(other.id)
return merged
def _merge_content(self, a: MemoryEntry, b: MemoryEntry) -> str:
"""Merge content from two memories intelligently."""
if a.content == b.content:
return a.content
# Simple merge: combine unique lines
a_lines = set(a.content.strip().split('\n'))
b_lines = set(b.content.strip().split('\n'))
merged = list(a_lines | b_lines)
return '\n'.join(merged)
def get_stale_memories(self, max_age_days: int = 90) -> List[MemoryEntry]:
"""Get memories older than the specified age."""
cutoff = datetime.utcnow().timestamp() - (max_age_days * 86400)
stale = []
for entry in self._memories.values():
if entry.created_at.timestamp() < cutoff:
stale.append(entry)
return stale
def cleanup_archive(self, max_age_days: int = 365) -> int:
"""
Permanently delete archived memories older than max_age_days.
Returns:
Number of memories deleted
"""
archive_dir = self.base_dir / "archive"
if not archive_dir.exists():
return 0
cutoff = datetime.utcnow().timestamp() - (max_age_days * 86400)
deleted = 0
for file_path in archive_dir.glob("*.md"):
try:
mtime = file_path.stat().st_mtime
if mtime < cutoff:
file_path.unlink()
deleted += 1
except Exception as e:
print(f"Warning: Failed to delete {file_path}: {e}")
return deleted
def _update_index(self):
"""Update the MEMORY.md index file."""
index_path = self.base_dir / "MEMORY.md"
# Build index entries
entries = []
for entry in self._memories.values():
if entry.file_path:
rel_path = entry.file_path.relative_to(self.base_dir)
entries.append(MemoryIndex(
name=entry.name,
file_path=rel_path,
description=entry.description,
type=entry.type,
created_at=entry.created_at
))
# Sort by date descending
entries.sort(key=lambda x: x.created_at, reverse=True)
# Build content
lines = ["# AP Knowledge Base - Memory Index", ""]
lines.append("This is an auto-generated index of memories. Do not edit manually.")
lines.append("")
lines.append("## Recent Memories")
lines.append("")
for idx in entries[:self.MAX_INDEX_LINES]:
lines.append(idx.to_line())
if len(entries) > self.MAX_INDEX_LINES:
lines.append(f"\n... and {len(entries) - self.MAX_INDEX_LINES} more memories")
lines.append("")
lines.append("## Memory Types")
lines.append("")
for mem_type in MemoryType:
count = sum(1 for e in self._memories.values() if e.type == mem_type)
lines.append(f"- **{mem_type.value}**: {count} memories")
content = '\n'.join(lines)
# Truncate if too large
if len(content) > self.MAX_INDEX_BYTES:
content = content[:self.MAX_INDEX_BYTES]
content += f"\n\n> WARNING: Index truncated ({len(content)} bytes, limit: {self.MAX_INDEX_BYTES})"
index_path.write_text(content, encoding='utf-8')
def _save_relationships(self):
"""Save relationships to disk."""
rel_path = self.base_dir / "relationships.json"
data = []
for source, targets in self._relationships.items():
for target, rel_type in targets:
data.append({
'source': source,
'target': target,
'type': rel_type.value
})
rel_path.write_text(json.dumps(data, indent=2), encoding='utf-8')
def get_stats(self) -> Dict:
"""Get statistics about the memory directory."""
stats = {
'total_memories': len(self._memories),
'by_type': {},
'by_scope': {},
'by_source': {},
'total_relationships': sum(len(r) for r in self._relationships.values()),
'avg_age_days': 0,
'avg_confidence': 0.0,
}
total_age = 0
total_confidence = 0.0
for entry in self._memories.values():
t = entry.type.value
s = entry.scope.value
src = entry.source
stats['by_type'][t] = stats['by_type'].get(t, 0) + 1
stats['by_scope'][s] = stats['by_scope'].get(s, 0) + 1
stats['by_source'][src] = stats['by_source'].get(src, 0) + 1
total_age += entry.age_days()
total_confidence += entry.confidence
if self._memories:
stats['avg_age_days'] = total_age / len(self._memories)
stats['avg_confidence'] = total_confidence / len(self._memories)
return stats
def export_all(self) -> Dict:
"""Export all memories as a dictionary."""
return {
'memories': [entry.to_frontmatter() for entry in self._memories.values()],
'relationships': [
{
'source': src,
'target': tgt,
'type': rel.value
}
for src, rels in self._relationships.items()
for tgt, rel in rels
],
'exported_at': datetime.utcnow().isoformat(),
'version': '1.0'
}
def import_data(self, data: Dict, overwrite: bool = False) -> Tuple[int, int]:
"""
Import memories from a dictionary.
Returns:
(imported_count, skipped_count)
"""
imported = 0
skipped = 0
for mem_text in data.get('memories', []):
try:
entry = MemoryEntry.from_frontmatter(mem_text)
if entry.id in self._memories and not overwrite:
skipped += 1
continue
self.save(entry)
imported += 1
except Exception as e:
print(f"Warning: Failed to import memory: {e}")
skipped += 1
# Import relationships
for rel in data.get('relationships', []):
self._relationships[rel['source']].append(
(rel['target'], RelationshipType(rel['type']))
)
self._save_relationships()
return imported, skipped