Implements the missing pieces of the MemPalace epic (#367): - sovereign_store.py: Self-contained memory store replacing the third-party mempalace CLI and its ONNX dependency. Uses: * SQLite + FTS5 for keyword search (porter stemmer, unicode61) * HRR phase vectors (SHA-256 deterministic, numpy optional) for semantic similarity * Reciprocal Rank Fusion to merge keyword and semantic rankings * Trust scoring with boost/decay lifecycle * Room-based organization matching the existing PalaceRoom model - promotion.py (MP-4, #371): Quality-gated scratchpad-to-palace promotion. Four heuristic gates, no LLM call: 1. Length gate (min 5 words, max 500) 2. Structure gate (rejects fragments and pure code) 3. Duplicate gate (FTS5 + Jaccard overlap detection) 4. Staleness gate (7-day threshold for old notes) Includes force override, batch promotion, and audit logging. - 21 unit tests covering HRR vectors, store operations, search, trust lifecycle, and all promotion gates. Zero external dependencies. Zero API calls. Zero cloud. Refs: #367 #370 #371
189 lines
6.5 KiB
Python
189 lines
6.5 KiB
Python
"""Memory Promotion — quality-gated scratchpad-to-palace promotion.
|
|
|
|
Implements MP-4 (#371): move session notes to durable memory only when
|
|
they pass quality gates. No LLM calls — all heuristic-based.
|
|
|
|
Quality gates:
|
|
1. Minimum content length (too short = noise)
|
|
2. Duplicate detection (FTS5 + HRR similarity check)
|
|
3. Structural quality (has subject-verb structure, not just a fragment)
|
|
4. Staleness check (don't promote stale notes from old sessions)
|
|
|
|
Refs: Epic #367, Sub-issue #371
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
import time
|
|
from typing import Optional
|
|
|
|
try:
|
|
from .sovereign_store import SovereignStore
|
|
except ImportError:
|
|
from sovereign_store import SovereignStore
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Quality gate thresholds
|
|
# ---------------------------------------------------------------------------
|
|
|
|
MIN_CONTENT_WORDS = 5
|
|
MAX_CONTENT_WORDS = 500
|
|
DUPLICATE_SIMILARITY = 0.85
|
|
DUPLICATE_FTS_THRESHOLD = 3
|
|
STALE_SECONDS = 86400 * 7
|
|
MIN_TRUST_FOR_AUTO = 0.4
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Quality checks
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _check_length(content: str) -> tuple[bool, str]:
|
|
"""Gate 1: Content length check."""
|
|
words = content.split()
|
|
if len(words) < MIN_CONTENT_WORDS:
|
|
return False, f"Too short ({len(words)} words, minimum {MIN_CONTENT_WORDS})"
|
|
if len(words) > MAX_CONTENT_WORDS:
|
|
return False, f"Too long ({len(words)} words, maximum {MAX_CONTENT_WORDS}). Summarize first."
|
|
return True, "OK"
|
|
|
|
|
|
def _check_structure(content: str) -> tuple[bool, str]:
|
|
"""Gate 2: Basic structural quality."""
|
|
if not re.search(r"[a-zA-Z]", content):
|
|
return False, "No alphabetic content — pure code/numbers are not memory-worthy"
|
|
if len(content.split()) < 3:
|
|
return False, "Fragment — needs at least subject + predicate"
|
|
return True, "OK"
|
|
|
|
|
|
def _check_duplicate(content: str, store: SovereignStore, room: str) -> tuple[bool, str]:
|
|
"""Gate 3: Duplicate detection via hybrid search."""
|
|
results = store.search(content, room=room, limit=5, min_trust=0.0)
|
|
for r in results:
|
|
if r["score"] > DUPLICATE_SIMILARITY:
|
|
return False, f"Duplicate detected: memory #{r['memory_id']} (score {r['score']:.3f})"
|
|
if _text_overlap(content, r["content"]) > 0.8:
|
|
return False, f"Near-duplicate text: memory #{r['memory_id']}"
|
|
return True, "OK"
|
|
|
|
|
|
def _check_staleness(written_at: float) -> tuple[bool, str]:
|
|
"""Gate 4: Staleness check."""
|
|
age = time.time() - written_at
|
|
if age > STALE_SECONDS:
|
|
days = int(age / 86400)
|
|
return False, f"Stale ({days} days old). Review manually before promoting."
|
|
return True, "OK"
|
|
|
|
|
|
def _text_overlap(a: str, b: str) -> float:
|
|
"""Jaccard similarity between two texts (word-level)."""
|
|
words_a = set(a.lower().split())
|
|
words_b = set(b.lower().split())
|
|
if not words_a or not words_b:
|
|
return 0.0
|
|
intersection = words_a & words_b
|
|
union = words_a | words_b
|
|
return len(intersection) / len(union)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Public API
|
|
# ---------------------------------------------------------------------------
|
|
|
|
class PromotionResult:
|
|
"""Result of a promotion attempt."""
|
|
def __init__(self, success: bool, memory_id: Optional[int], reason: str, gates: dict):
|
|
self.success = success
|
|
self.memory_id = memory_id
|
|
self.reason = reason
|
|
self.gates = gates
|
|
|
|
def __repr__(self):
|
|
status = "PROMOTED" if self.success else "REJECTED"
|
|
return f"PromotionResult({status}: {self.reason})"
|
|
|
|
|
|
def evaluate_for_promotion(
|
|
content: str,
|
|
store: SovereignStore,
|
|
room: str = "general",
|
|
written_at: Optional[float] = None,
|
|
) -> dict:
|
|
"""Run all quality gates without actually promoting."""
|
|
if written_at is None:
|
|
written_at = time.time()
|
|
gates = {}
|
|
gates["length"] = _check_length(content)
|
|
gates["structure"] = _check_structure(content)
|
|
gates["duplicate"] = _check_duplicate(content, store, room)
|
|
gates["staleness"] = _check_staleness(written_at)
|
|
all_passed = all(passed for passed, _ in gates.values())
|
|
return {
|
|
"eligible": all_passed,
|
|
"gates": gates,
|
|
"content_preview": content[:100] + ("..." if len(content) > 100 else ""),
|
|
}
|
|
|
|
|
|
def promote(
|
|
content: str,
|
|
store: SovereignStore,
|
|
session_id: str,
|
|
scratch_key: str,
|
|
room: str = "general",
|
|
category: str = "",
|
|
trust: float = 0.5,
|
|
written_at: Optional[float] = None,
|
|
force: bool = False,
|
|
) -> PromotionResult:
|
|
"""Promote a scratchpad note to durable palace memory."""
|
|
if written_at is None:
|
|
written_at = time.time()
|
|
gates = {}
|
|
if not force:
|
|
gates["length"] = _check_length(content)
|
|
gates["structure"] = _check_structure(content)
|
|
gates["duplicate"] = _check_duplicate(content, store, room)
|
|
gates["staleness"] = _check_staleness(written_at)
|
|
for gate_name, (passed, message) in gates.items():
|
|
if not passed:
|
|
return PromotionResult(
|
|
success=False, memory_id=None,
|
|
reason=f"Failed gate '{gate_name}': {message}", gates=gates,
|
|
)
|
|
memory_id = store.store(content, room=room, category=category, trust=trust)
|
|
store.log_promotion(session_id, scratch_key, memory_id, reason="auto" if not force else "forced")
|
|
return PromotionResult(success=True, memory_id=memory_id, reason="Promoted to durable memory", gates=gates)
|
|
|
|
|
|
def promote_session_batch(
|
|
store: SovereignStore,
|
|
session_id: str,
|
|
notes: dict[str, dict],
|
|
room: str = "general",
|
|
force: bool = False,
|
|
) -> list[PromotionResult]:
|
|
"""Promote all notes from a session scratchpad."""
|
|
results = []
|
|
for key, entry in notes.items():
|
|
content = entry.get("value", str(entry)) if isinstance(entry, dict) else str(entry)
|
|
written_at = None
|
|
if isinstance(entry, dict) and "written_at" in entry:
|
|
try:
|
|
import datetime
|
|
written_at = datetime.datetime.strptime(
|
|
entry["written_at"], "%Y-%m-%d %H:%M:%S"
|
|
).timestamp()
|
|
except (ValueError, TypeError):
|
|
pass
|
|
result = promote(
|
|
content=str(content), store=store, session_id=session_id,
|
|
scratch_key=key, room=room, written_at=written_at, force=force,
|
|
)
|
|
results.append(result)
|
|
return results
|