494 lines
17 KiB
Python
494 lines
17 KiB
Python
"""
|
|
AP Knowledge Base - Memory Directory Core
|
|
|
|
Hierarchical memory storage with aging, relevance scoring, and consolidation.
|
|
Based on the memdir pattern from Claude Code.
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import shutil
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
from typing import List, Dict, Optional, Tuple, Set, Callable
|
|
from collections import defaultdict
|
|
import fnmatch
|
|
|
|
from memory_types import (
|
|
MemoryEntry, MemoryQuery, MemoryType, MemoryScope,
|
|
MemoryIndex, RelationshipType
|
|
)
|
|
|
|
|
|
class MemoryDirectory:
|
|
"""
|
|
Core memory directory for hierarchical memory storage.
|
|
|
|
Memory layout:
|
|
<base_dir>/
|
|
MEMORY.md # Index file
|
|
facts/ # Fact memories
|
|
procedures/ # Procedure memories
|
|
observations/ # Observation memories
|
|
lessons/ # Lesson memories
|
|
archive/ # Archived memories
|
|
relationships.json # Memory relationship graph
|
|
"""
|
|
|
|
MAX_INDEX_LINES = 200
|
|
MAX_INDEX_BYTES = 25000
|
|
MAX_MEMORY_FILES = 200
|
|
CONSOLIDATION_THRESHOLD = 0.85 # Similarity threshold for merging
|
|
|
|
def __init__(self, base_dir: Path):
|
|
self.base_dir = Path(base_dir)
|
|
self._ensure_directories()
|
|
self._memories: Dict[str, MemoryEntry] = {}
|
|
self._relationships: Dict[str, List[Tuple[str, RelationshipType]]] = defaultdict(list)
|
|
self._load_all()
|
|
|
|
def _ensure_directories(self):
|
|
"""Create directory structure if it doesn't exist."""
|
|
dirs = ['facts', 'procedures', 'observations', 'lessons', 'archive']
|
|
for d in dirs:
|
|
(self.base_dir / d).mkdir(parents=True, exist_ok=True)
|
|
|
|
def _type_dir(self, mem_type: MemoryType) -> Path:
|
|
"""Get the directory for a memory type."""
|
|
return self.base_dir / f"{mem_type.value}s"
|
|
|
|
def _memory_path(self, entry: MemoryEntry) -> Path:
|
|
"""Generate file path for a memory entry."""
|
|
safe_name = "".join(c if c.isalnum() or c in '-_' else '_' for c in entry.name)
|
|
safe_name = safe_name[:50] # Limit length
|
|
return self._type_dir(entry.type) / f"{entry.id}_{safe_name}.md"
|
|
|
|
def _load_all(self):
|
|
"""Load all memories from disk."""
|
|
self._memories.clear()
|
|
|
|
for mem_type in MemoryType:
|
|
type_dir = self._type_dir(mem_type)
|
|
if not type_dir.exists():
|
|
continue
|
|
|
|
for file_path in type_dir.glob("*.md"):
|
|
try:
|
|
content = file_path.read_text(encoding='utf-8')
|
|
entry = MemoryEntry.from_frontmatter(content, file_path)
|
|
self._memories[entry.id] = entry
|
|
except Exception as e:
|
|
print(f"Warning: Failed to load {file_path}: {e}")
|
|
|
|
# Load relationships
|
|
rel_path = self.base_dir / "relationships.json"
|
|
if rel_path.exists():
|
|
try:
|
|
data = json.loads(rel_path.read_text())
|
|
for rel in data:
|
|
self._relationships[rel['source']].append(
|
|
(rel['target'], RelationshipType(rel['type']))
|
|
)
|
|
except Exception as e:
|
|
print(f"Warning: Failed to load relationships: {e}")
|
|
|
|
def save(self, entry: MemoryEntry) -> Path:
|
|
"""
|
|
Save a memory entry to disk.
|
|
|
|
Returns:
|
|
Path to the saved file
|
|
"""
|
|
entry.modified_at = datetime.utcnow()
|
|
|
|
# Update file path
|
|
entry.file_path = self._memory_path(entry)
|
|
|
|
# Write to disk
|
|
entry.file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
entry.file_path.write_text(entry.to_frontmatter(), encoding='utf-8')
|
|
|
|
# Update cache
|
|
self._memories[entry.id] = entry
|
|
|
|
# Update index
|
|
self._update_index()
|
|
|
|
return entry.file_path
|
|
|
|
def get(self, memory_id: str) -> Optional[MemoryEntry]:
|
|
"""Get a memory by ID."""
|
|
entry = self._memories.get(memory_id)
|
|
if entry:
|
|
entry.touch()
|
|
return entry
|
|
|
|
def delete(self, memory_id: str) -> bool:
|
|
"""Delete a memory by ID."""
|
|
entry = self._memories.get(memory_id)
|
|
if not entry:
|
|
return False
|
|
|
|
# Remove file
|
|
if entry.file_path and entry.file_path.exists():
|
|
entry.file_path.unlink()
|
|
|
|
# Remove from cache
|
|
del self._memories[memory_id]
|
|
|
|
# Remove relationships
|
|
if memory_id in self._relationships:
|
|
del self._relationships[memory_id]
|
|
for src, rels in self._relationships.items():
|
|
self._relationships[src] = [(t, r) for t, r in rels if t != memory_id]
|
|
|
|
self._save_relationships()
|
|
self._update_index()
|
|
|
|
return True
|
|
|
|
def archive(self, memory_id: str) -> bool:
|
|
"""Move a memory to the archive."""
|
|
entry = self._memories.get(memory_id)
|
|
if not entry or not entry.file_path:
|
|
return False
|
|
|
|
archive_dir = self.base_dir / "archive"
|
|
archive_dir.mkdir(exist_ok=True)
|
|
|
|
new_path = archive_dir / entry.file_path.name
|
|
shutil.move(str(entry.file_path), str(new_path))
|
|
|
|
entry.file_path = new_path
|
|
entry.metadata['archived'] = True
|
|
entry.metadata['archived_at'] = datetime.utcnow().isoformat()
|
|
|
|
return True
|
|
|
|
def search(self, query: MemoryQuery) -> List[Tuple[MemoryEntry, float]]:
|
|
"""
|
|
Search memories matching the query.
|
|
|
|
Returns:
|
|
List of (entry, score) tuples sorted by score
|
|
"""
|
|
results = []
|
|
|
|
for entry in self._memories.values():
|
|
if not query.matches(entry):
|
|
continue
|
|
|
|
# Calculate score based on sort criteria
|
|
if query.sort_by == "relevance":
|
|
score = entry.relevance_score(query.text)
|
|
# Boost by freshness
|
|
score = score * 0.7 + entry.freshness_score() * 0.3
|
|
elif query.sort_by == "freshness":
|
|
score = entry.freshness_score()
|
|
elif query.sort_by == "access":
|
|
score = min(1.0, entry.access_count / 10.0)
|
|
else: # date
|
|
score = 1.0 / (1 + entry.age_days())
|
|
|
|
results.append((entry, score))
|
|
|
|
# Sort by score descending
|
|
results.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
return results[:query.limit]
|
|
|
|
def find_by_tags(self, tags: List[str], match_all: bool = False) -> List[MemoryEntry]:
|
|
"""Find memories by tags."""
|
|
tag_set = set(t.lower() for t in tags)
|
|
results = []
|
|
|
|
for entry in self._memories.values():
|
|
entry_tags = set(t.lower() for t in entry.tags)
|
|
|
|
if match_all:
|
|
if tag_set <= entry_tags:
|
|
results.append(entry)
|
|
else:
|
|
if tag_set & entry_tags:
|
|
results.append(entry)
|
|
|
|
return results
|
|
|
|
def find_similar(self, entry: MemoryEntry, threshold: float = 0.8) -> List[MemoryEntry]:
|
|
"""
|
|
Find memories similar to the given entry.
|
|
|
|
Uses simple text similarity based on name, description, and content.
|
|
"""
|
|
similar = []
|
|
|
|
for other in self._memories.values():
|
|
if other.id == entry.id:
|
|
continue
|
|
|
|
score = self._similarity(entry, other)
|
|
if score >= threshold:
|
|
similar.append(other)
|
|
|
|
return similar
|
|
|
|
def _similarity(self, a: MemoryEntry, b: MemoryEntry) -> float:
|
|
"""Calculate similarity between two memories (0.0-1.0)."""
|
|
# Simple word-based Jaccard similarity
|
|
def get_words(text: str) -> Set[str]:
|
|
return set(w.lower() for w in text.split() if len(w) > 2)
|
|
|
|
a_words = get_words(a.name + " " + a.description + " " + a.content[:500])
|
|
b_words = get_words(b.name + " " + b.description + " " + b.content[:500])
|
|
|
|
if not a_words or not b_words:
|
|
return 0.0
|
|
|
|
intersection = len(a_words & b_words)
|
|
union = len(a_words | b_words)
|
|
|
|
return intersection / union if union > 0 else 0.0
|
|
|
|
def consolidate(self, dry_run: bool = False) -> List[Tuple[MemoryEntry, MemoryEntry]]:
|
|
"""
|
|
Consolidate similar memories by merging them.
|
|
|
|
Returns:
|
|
List of merged (old, new) pairs
|
|
"""
|
|
merged = []
|
|
to_delete = set()
|
|
|
|
entries = list(self._memories.values())
|
|
|
|
for i, entry in enumerate(entries):
|
|
if entry.id in to_delete:
|
|
continue
|
|
|
|
for other in entries[i+1:]:
|
|
if other.id in to_delete:
|
|
continue
|
|
|
|
similarity = self._similarity(entry, other)
|
|
|
|
if similarity >= self.CONSOLIDATION_THRESHOLD:
|
|
# Merge other into entry
|
|
if not dry_run:
|
|
# Combine content
|
|
entry.content = self._merge_content(entry, other)
|
|
# Combine tags
|
|
entry.tags = list(set(entry.tags + other.tags))
|
|
# Update description
|
|
if len(other.description) > len(entry.description):
|
|
entry.description = other.description
|
|
# Boost confidence
|
|
entry.confidence = max(entry.confidence, other.confidence)
|
|
entry.modified_at = datetime.utcnow()
|
|
|
|
# Save merged entry
|
|
self.save(entry)
|
|
|
|
# Delete duplicate
|
|
self.delete(other.id)
|
|
|
|
merged.append((other, entry))
|
|
to_delete.add(other.id)
|
|
|
|
return merged
|
|
|
|
def _merge_content(self, a: MemoryEntry, b: MemoryEntry) -> str:
|
|
"""Merge content from two memories intelligently."""
|
|
if a.content == b.content:
|
|
return a.content
|
|
|
|
# Simple merge: combine unique lines
|
|
a_lines = set(a.content.strip().split('\n'))
|
|
b_lines = set(b.content.strip().split('\n'))
|
|
|
|
merged = list(a_lines | b_lines)
|
|
return '\n'.join(merged)
|
|
|
|
def get_stale_memories(self, max_age_days: int = 90) -> List[MemoryEntry]:
|
|
"""Get memories older than the specified age."""
|
|
cutoff = datetime.utcnow().timestamp() - (max_age_days * 86400)
|
|
stale = []
|
|
|
|
for entry in self._memories.values():
|
|
if entry.created_at.timestamp() < cutoff:
|
|
stale.append(entry)
|
|
|
|
return stale
|
|
|
|
def cleanup_archive(self, max_age_days: int = 365) -> int:
|
|
"""
|
|
Permanently delete archived memories older than max_age_days.
|
|
|
|
Returns:
|
|
Number of memories deleted
|
|
"""
|
|
archive_dir = self.base_dir / "archive"
|
|
if not archive_dir.exists():
|
|
return 0
|
|
|
|
cutoff = datetime.utcnow().timestamp() - (max_age_days * 86400)
|
|
deleted = 0
|
|
|
|
for file_path in archive_dir.glob("*.md"):
|
|
try:
|
|
mtime = file_path.stat().st_mtime
|
|
if mtime < cutoff:
|
|
file_path.unlink()
|
|
deleted += 1
|
|
except Exception as e:
|
|
print(f"Warning: Failed to delete {file_path}: {e}")
|
|
|
|
return deleted
|
|
|
|
def _update_index(self):
|
|
"""Update the MEMORY.md index file."""
|
|
index_path = self.base_dir / "MEMORY.md"
|
|
|
|
# Build index entries
|
|
entries = []
|
|
for entry in self._memories.values():
|
|
if entry.file_path:
|
|
rel_path = entry.file_path.relative_to(self.base_dir)
|
|
entries.append(MemoryIndex(
|
|
name=entry.name,
|
|
file_path=rel_path,
|
|
description=entry.description,
|
|
type=entry.type,
|
|
created_at=entry.created_at
|
|
))
|
|
|
|
# Sort by date descending
|
|
entries.sort(key=lambda x: x.created_at, reverse=True)
|
|
|
|
# Build content
|
|
lines = ["# AP Knowledge Base - Memory Index", ""]
|
|
lines.append("This is an auto-generated index of memories. Do not edit manually.")
|
|
lines.append("")
|
|
lines.append("## Recent Memories")
|
|
lines.append("")
|
|
|
|
for idx in entries[:self.MAX_INDEX_LINES]:
|
|
lines.append(idx.to_line())
|
|
|
|
if len(entries) > self.MAX_INDEX_LINES:
|
|
lines.append(f"\n... and {len(entries) - self.MAX_INDEX_LINES} more memories")
|
|
|
|
lines.append("")
|
|
lines.append("## Memory Types")
|
|
lines.append("")
|
|
for mem_type in MemoryType:
|
|
count = sum(1 for e in self._memories.values() if e.type == mem_type)
|
|
lines.append(f"- **{mem_type.value}**: {count} memories")
|
|
|
|
content = '\n'.join(lines)
|
|
|
|
# Truncate if too large
|
|
if len(content) > self.MAX_INDEX_BYTES:
|
|
content = content[:self.MAX_INDEX_BYTES]
|
|
content += f"\n\n> WARNING: Index truncated ({len(content)} bytes, limit: {self.MAX_INDEX_BYTES})"
|
|
|
|
index_path.write_text(content, encoding='utf-8')
|
|
|
|
def _save_relationships(self):
|
|
"""Save relationships to disk."""
|
|
rel_path = self.base_dir / "relationships.json"
|
|
|
|
data = []
|
|
for source, targets in self._relationships.items():
|
|
for target, rel_type in targets:
|
|
data.append({
|
|
'source': source,
|
|
'target': target,
|
|
'type': rel_type.value
|
|
})
|
|
|
|
rel_path.write_text(json.dumps(data, indent=2), encoding='utf-8')
|
|
|
|
def get_stats(self) -> Dict:
|
|
"""Get statistics about the memory directory."""
|
|
stats = {
|
|
'total_memories': len(self._memories),
|
|
'by_type': {},
|
|
'by_scope': {},
|
|
'by_source': {},
|
|
'total_relationships': sum(len(r) for r in self._relationships.values()),
|
|
'avg_age_days': 0,
|
|
'avg_confidence': 0.0,
|
|
}
|
|
|
|
total_age = 0
|
|
total_confidence = 0.0
|
|
|
|
for entry in self._memories.values():
|
|
t = entry.type.value
|
|
s = entry.scope.value
|
|
src = entry.source
|
|
|
|
stats['by_type'][t] = stats['by_type'].get(t, 0) + 1
|
|
stats['by_scope'][s] = stats['by_scope'].get(s, 0) + 1
|
|
stats['by_source'][src] = stats['by_source'].get(src, 0) + 1
|
|
|
|
total_age += entry.age_days()
|
|
total_confidence += entry.confidence
|
|
|
|
if self._memories:
|
|
stats['avg_age_days'] = total_age / len(self._memories)
|
|
stats['avg_confidence'] = total_confidence / len(self._memories)
|
|
|
|
return stats
|
|
|
|
def export_all(self) -> Dict:
|
|
"""Export all memories as a dictionary."""
|
|
return {
|
|
'memories': [entry.to_frontmatter() for entry in self._memories.values()],
|
|
'relationships': [
|
|
{
|
|
'source': src,
|
|
'target': tgt,
|
|
'type': rel.value
|
|
}
|
|
for src, rels in self._relationships.items()
|
|
for tgt, rel in rels
|
|
],
|
|
'exported_at': datetime.utcnow().isoformat(),
|
|
'version': '1.0'
|
|
}
|
|
|
|
def import_data(self, data: Dict, overwrite: bool = False) -> Tuple[int, int]:
|
|
"""
|
|
Import memories from a dictionary.
|
|
|
|
Returns:
|
|
(imported_count, skipped_count)
|
|
"""
|
|
imported = 0
|
|
skipped = 0
|
|
|
|
for mem_text in data.get('memories', []):
|
|
try:
|
|
entry = MemoryEntry.from_frontmatter(mem_text)
|
|
|
|
if entry.id in self._memories and not overwrite:
|
|
skipped += 1
|
|
continue
|
|
|
|
self.save(entry)
|
|
imported += 1
|
|
except Exception as e:
|
|
print(f"Warning: Failed to import memory: {e}")
|
|
skipped += 1
|
|
|
|
# Import relationships
|
|
for rel in data.get('relationships', []):
|
|
self._relationships[rel['source']].append(
|
|
(rel['target'], RelationshipType(rel['type']))
|
|
)
|
|
|
|
self._save_relationships()
|
|
|
|
return imported, skipped
|