Files
timmy-config/wizards/allegro-primus/knowledge/sync.py
2026-03-31 20:02:01 +00:00

481 lines
18 KiB
Python

"""
AP Knowledge Base - Father Sync
Sync with father's knowledge:
- Export memories for father review
- Import father teachings
- Conflict resolution
"""
import json
import hashlib
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Optional, Tuple, Set
from dataclasses import dataclass, asdict
from memory_types import MemoryEntry, MemoryScope, SyncConflict, MemoryType
from memdir import MemoryDirectory
@dataclass
class SyncManifest:
"""Manifest for a sync operation."""
export_timestamp: str
source: str # 'child' or 'father'
memory_count: int
version: str = "1.0"
def to_dict(self) -> Dict:
return asdict(self)
@classmethod
def from_dict(cls, data: Dict) -> "SyncManifest":
return cls(**data)
class FatherSync:
"""
Handles synchronization between AP (child) and father's knowledge base.
Key concepts:
- Export: Package memories for father's review
- Import: Incorporate father's teachings
- Conflict resolution: Handle divergent updates
"""
def __init__(self, memdir: MemoryDirectory, father_inbox: Optional[Path] = None):
self.memdir = memdir
self.base_dir = memdir.base_dir
# Father inbox/outbox directories
self.sync_dir = self.base_dir / "sync"
self.sync_dir.mkdir(exist_ok=True)
self.outbox = self.sync_dir / "outbox"
self.outbox.mkdir(exist_ok=True)
self.inbox = father_inbox or (self.sync_dir / "inbox")
self.inbox.mkdir(exist_ok=True)
self.processed = self.sync_dir / "processed"
self.processed.mkdir(exist_ok=True)
# Sync state tracking
self.sync_state_path = self.sync_dir / "sync_state.json"
self.sync_state = self._load_sync_state()
def _load_sync_state(self) -> Dict:
"""Load sync state from disk."""
if self.sync_state_path.exists():
try:
return json.loads(self.sync_state_path.read_text())
except Exception as e:
print(f"Warning: Failed to load sync state: {e}")
return {
'last_export': None,
'last_import': None,
'exported_memories': {}, # memory_id -> export_timestamp
'imported_memories': {}, # memory_id -> import_timestamp
'conflicts': [] # List of unresolved conflicts
}
def _save_sync_state(self):
"""Save sync state to disk."""
self.sync_state_path.write_text(json.dumps(self.sync_state, indent=2), encoding='utf-8')
def _hash_memory(self, entry: MemoryEntry) -> str:
"""Generate a hash of memory content for change detection."""
content = f"{entry.name}:{entry.description}:{entry.content}:{entry.modified_at.isoformat()}"
return hashlib.sha256(content.encode()).hexdigest()[:16]
def export_for_father(
self,
scope: Optional[MemoryScope] = None,
types: Optional[List[MemoryType]] = None,
tags: Optional[List[str]] = None,
since_last_export: bool = True,
include_archived: bool = False
) -> Path:
"""
Export memories for father's review.
Args:
scope: Filter by scope (private memories usually not shared)
types: Filter by memory types
tags: Filter by tags
since_last_export: Only export memories modified since last export
include_archived: Include archived memories
Returns:
Path to the exported file
"""
# Collect memories to export
to_export = []
for entry in self.memdir._memories.values():
# Skip private memories by default
if scope and entry.scope != scope:
continue
# Filter by type
if types and entry.type not in types:
continue
# Filter by tags
if tags:
entry_tags = set(t.lower() for t in entry.tags)
if not any(t.lower() in entry_tags for t in tags):
continue
# Skip archived unless requested
if entry.metadata.get('archived') and not include_archived:
continue
# Skip if already exported and not modified
if since_last_export and self.sync_state['last_export']:
last_export = datetime.fromisoformat(self.sync_state['last_export'])
if entry.modified_at <= last_export:
# Check if hash matches
current_hash = self._hash_memory(entry)
if self.sync_state['exported_memories'].get(entry.id) == current_hash:
continue
to_export.append(entry)
# Build export package
export_data = {
'manifest': SyncManifest(
export_timestamp=datetime.utcnow().isoformat(),
source='child',
memory_count=len(to_export)
).to_dict(),
'memories': [entry.to_frontmatter() for entry in to_export],
'metadata': {
'export_reason': 'father_review',
'includes_private': scope == MemoryScope.PRIVATE,
'filters': {
'scope': scope.value if scope else None,
'types': [t.value for t in types] if types else None,
'tags': tags
}
}
}
# Write to outbox
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
export_path = self.outbox / f"export_{timestamp}.json"
export_path.write_text(json.dumps(export_data, indent=2), encoding='utf-8')
# Update sync state
self.sync_state['last_export'] = datetime.utcnow().isoformat()
for entry in to_export:
self.sync_state['exported_memories'][entry.id] = self._hash_memory(entry)
self._save_sync_state()
return export_path
def import_from_father(self, auto_resolve: bool = False) -> Tuple[int, int, List[SyncConflict]]:
"""
Import teachings from father.
Args:
auto_resolve: Automatically resolve simple conflicts
Returns:
(imported_count, skipped_count, conflicts)
"""
imported = 0
skipped = 0
conflicts = []
# Process all files in inbox
for import_file in self.inbox.glob("import_*.json"):
try:
data = json.loads(import_file.read_text())
# Validate manifest
manifest = SyncManifest.from_dict(data.get('manifest', {}))
if manifest.source != 'father':
print(f"Warning: Skipping import from unknown source: {manifest.source}")
continue
# Process each memory
for mem_text in data.get('memories', []):
entry = MemoryEntry.from_frontmatter(mem_text)
# Check for existing memory
existing = self.memdir.get(entry.id)
if existing:
# Check for conflicts
if self._has_conflict(existing, entry):
conflict = SyncConflict(
local_entry=existing,
remote_entry=entry,
conflict_type='content'
)
if auto_resolve:
resolution = self._auto_resolve(conflict)
if resolution:
conflict.resolution = resolution
if resolution == 'keep_remote':
self._apply_import(entry)
imported += 1
elif resolution == 'merge':
merged = self._merge_memories(existing, entry)
self.memdir.save(merged)
imported += 1
else: # keep_local
skipped += 1
else:
conflicts.append(conflict)
else:
conflicts.append(conflict)
skipped += 1
else:
# No conflict, skip (already have it)
skipped += 1
else:
# New memory from father
entry.source = 'father'
self._apply_import(entry)
imported += 1
# Move to processed
processed_path = self.processed / import_file.name
import_file.rename(processed_path)
except Exception as e:
print(f"Error processing {import_file}: {e}")
skipped += 1
# Update sync state
self.sync_state['last_import'] = datetime.utcnow().isoformat()
for conflict in conflicts:
self.sync_state['conflicts'].append({
'local_id': conflict.local_entry.id,
'remote_id': conflict.remote_entry.id,
'type': conflict.conflict_type,
'detected_at': datetime.utcnow().isoformat()
})
self._save_sync_state()
return imported, skipped, conflicts
def _has_conflict(self, local: MemoryEntry, remote: MemoryEntry) -> bool:
"""Check if there's a conflict between local and remote."""
# Different content = potential conflict
if local.content != remote.content:
return True
if local.description != remote.description:
return True
return False
def _auto_resolve(self, conflict: SyncConflict) -> Optional[str]:
"""
Attempt to auto-resolve a conflict.
Returns:
Resolution strategy or None if manual resolution needed
"""
local = conflict.local_entry
remote = conflict.remote_entry
# Father's version is newer
if remote.modified_at > local.modified_at:
# If father has higher confidence, accept his
if remote.confidence > local.confidence:
return 'keep_remote'
# If local has been accessed recently, prefer local
if local.accessed_at:
days_since_access = (datetime.utcnow() - local.accessed_at).days
if days_since_access < 7:
return 'keep_local'
# If content can be merged
if self._can_merge(local, remote):
return 'merge'
# Default: no auto-resolution
return None
def _can_merge(self, a: MemoryEntry, b: MemoryEntry) -> bool:
"""Check if two memories can be automatically merged."""
# Simple check: non-overlapping content additions
a_lines = set(a.content.split('\n'))
b_lines = set(b.content.split('\n'))
# If one is subset of other, can merge
if a_lines <= b_lines or b_lines <= a_lines:
return True
# If small overlap, probably can merge
overlap = len(a_lines & b_lines)
total = len(a_lines | b_lines)
return overlap / total > 0.5 if total > 0 else True
def _merge_memories(self, local: MemoryEntry, remote: MemoryEntry) -> MemoryEntry:
"""Merge two memories into one."""
# Combine content
local_lines = set(local.content.split('\n'))
remote_lines = set(remote.content.split('\n'))
merged_content = '\n'.join(sorted(local_lines | remote_lines))
# Prefer father's description if longer
description = remote.description if len(remote.description) > len(local.description) else local.description
# Merge tags
tags = list(set(local.tags + remote.tags))
# Create merged entry
merged = MemoryEntry(
id=local.id, # Keep local ID
name=local.name,
description=description,
type=local.type,
scope=local.scope,
content=merged_content,
tags=tags,
created_at=local.created_at,
modified_at=datetime.utcnow(),
source='merged',
confidence=max(local.confidence, remote.confidence),
file_path=local.file_path,
metadata={
**local.metadata,
'merged_from': remote.id,
'merged_at': datetime.utcnow().isoformat(),
'father_version': remote.modified_at.isoformat()
}
)
return merged
def _apply_import(self, entry: MemoryEntry):
"""Apply an imported memory."""
entry.modified_at = datetime.utcnow()
self.memdir.save(entry)
# Track in sync state
self.sync_state['imported_memories'][entry.id] = {
'imported_at': datetime.utcnow().isoformat(),
'original_source': entry.source
}
def resolve_conflict(
self,
conflict: SyncConflict,
resolution: str, # 'keep_local', 'keep_remote', 'merge', 'custom'
custom_content: Optional[str] = None
) -> MemoryEntry:
"""
Manually resolve a conflict.
Args:
conflict: The conflict to resolve
resolution: Resolution strategy
custom_content: Custom content if resolution is 'custom'
Returns:
The resolved memory entry
"""
if resolution == 'keep_local':
result = conflict.local_entry
elif resolution == 'keep_remote':
result = conflict.remote_entry
result.id = conflict.local_entry.id
result.file_path = conflict.local_entry.file_path
elif resolution == 'merge':
result = self._merge_memories(conflict.local_entry, conflict.remote_entry)
elif resolution == 'custom' and custom_content:
result = conflict.local_entry
result.content = custom_content
result.modified_at = datetime.utcnow()
else:
raise ValueError(f"Invalid resolution: {resolution}")
# Save and track
result.metadata['conflict_resolved'] = True
result.metadata['resolution_strategy'] = resolution
result.metadata['resolved_at'] = datetime.utcnow().isoformat()
self.memdir.save(result)
# Remove from unresolved conflicts
self.sync_state['conflicts'] = [
c for c in self.sync_state['conflicts']
if not (c['local_id'] == conflict.local_entry.id and
c['remote_id'] == conflict.remote_entry.id)
]
self._save_sync_state()
return result
def get_pending_exports(self) -> List[Path]:
"""Get list of pending export files."""
return sorted(self.outbox.glob("export_*.json"))
def get_pending_imports(self) -> List[Path]:
"""Get list of pending import files."""
return sorted(self.inbox.glob("import_*.json"))
def get_unresolved_conflicts(self) -> List[SyncConflict]:
"""Get all unresolved conflicts."""
conflicts = []
for conf_data in self.sync_state.get('conflicts', []):
local = self.memdir.get(conf_data['local_id'])
# Remote entry would need to be reconstructed from import file
# For now, just track that there's a conflict
if local:
conflicts.append(SyncConflict(
local_entry=local,
remote_entry=MemoryEntry(id=conf_data['remote_id'], name="unknown"),
conflict_type=conf_data['type']
))
return conflicts
def create_father_package(self, notes: str = "") -> Path:
"""
Create a complete package for father's review.
This includes:
- All team-scope memories
- Statistics and metadata
- Child's questions or notes
"""
# Export team memories
export_path = self.export_for_father(
scope=MemoryScope.TEAM,
since_last_export=False
)
# Add notes
data = json.loads(export_path.read_text())
data['notes'] = notes
data['questions'] = [] # Child can ask father questions
data['stats'] = self.memdir.get_stats()
export_path.write_text(json.dumps(data, indent=2), encoding='utf-8')
return export_path
def sync_status(self) -> Dict:
"""Get current sync status."""
return {
'last_export': self.sync_state['last_export'],
'last_import': self.sync_state['last_import'],
'pending_exports': len(self.get_pending_exports()),
'pending_imports': len(self.get_pending_imports()),
'unresolved_conflicts': len(self.get_unresolved_conflicts()),
'total_exported': len(self.sync_state['exported_memories']),
'total_imported': len(self.sync_state['imported_memories'])
}