Files
timmy-config/hermes-sovereign/mempalace/promotion.py
perplexity 593621c5e0 feat: sovereign memory store — zero-API durable memory (SQLite + FTS5 + HRR)
Implements the missing pieces of the MemPalace epic (#367):

- sovereign_store.py: Self-contained memory store replacing the third-party
  mempalace CLI and its ONNX dependency. Uses:
  * SQLite + FTS5 for keyword search (porter stemmer, unicode61)
  * HRR phase vectors (SHA-256 deterministic, numpy optional) for semantic similarity
  * Reciprocal Rank Fusion to merge keyword and semantic rankings
  * Trust scoring with boost/decay lifecycle
  * Room-based organization matching the existing PalaceRoom model

- promotion.py (MP-4, #371): Quality-gated scratchpad-to-palace promotion.
  Four heuristic gates, no LLM call:
  1. Length gate (min 5 words, max 500)
  2. Structure gate (rejects fragments and pure code)
  3. Duplicate gate (FTS5 + Jaccard overlap detection)
  4. Staleness gate (7-day threshold for old notes)
  Includes force override, batch promotion, and audit logging.

- 21 unit tests covering HRR vectors, store operations, search,
  trust lifecycle, and all promotion gates.

Zero external dependencies. Zero API calls. Zero cloud.

Refs: #367 #370 #371
2026-04-07 22:41:37 +00:00

189 lines
6.5 KiB
Python

"""Memory Promotion — quality-gated scratchpad-to-palace promotion.
Implements MP-4 (#371): move session notes to durable memory only when
they pass quality gates. No LLM calls — all heuristic-based.
Quality gates:
1. Minimum content length (too short = noise)
2. Duplicate detection (FTS5 + HRR similarity check)
3. Structural quality (has subject-verb structure, not just a fragment)
4. Staleness check (don't promote stale notes from old sessions)
Refs: Epic #367, Sub-issue #371
"""
from __future__ import annotations
import re
import time
from typing import Optional
try:
from .sovereign_store import SovereignStore
except ImportError:
from sovereign_store import SovereignStore
# ---------------------------------------------------------------------------
# Quality gate thresholds
# ---------------------------------------------------------------------------
MIN_CONTENT_WORDS = 5
MAX_CONTENT_WORDS = 500
DUPLICATE_SIMILARITY = 0.85
DUPLICATE_FTS_THRESHOLD = 3
STALE_SECONDS = 86400 * 7
MIN_TRUST_FOR_AUTO = 0.4
# ---------------------------------------------------------------------------
# Quality checks
# ---------------------------------------------------------------------------
def _check_length(content: str) -> tuple[bool, str]:
"""Gate 1: Content length check."""
words = content.split()
if len(words) < MIN_CONTENT_WORDS:
return False, f"Too short ({len(words)} words, minimum {MIN_CONTENT_WORDS})"
if len(words) > MAX_CONTENT_WORDS:
return False, f"Too long ({len(words)} words, maximum {MAX_CONTENT_WORDS}). Summarize first."
return True, "OK"
def _check_structure(content: str) -> tuple[bool, str]:
"""Gate 2: Basic structural quality."""
if not re.search(r"[a-zA-Z]", content):
return False, "No alphabetic content — pure code/numbers are not memory-worthy"
if len(content.split()) < 3:
return False, "Fragment — needs at least subject + predicate"
return True, "OK"
def _check_duplicate(content: str, store: SovereignStore, room: str) -> tuple[bool, str]:
"""Gate 3: Duplicate detection via hybrid search."""
results = store.search(content, room=room, limit=5, min_trust=0.0)
for r in results:
if r["score"] > DUPLICATE_SIMILARITY:
return False, f"Duplicate detected: memory #{r['memory_id']} (score {r['score']:.3f})"
if _text_overlap(content, r["content"]) > 0.8:
return False, f"Near-duplicate text: memory #{r['memory_id']}"
return True, "OK"
def _check_staleness(written_at: float) -> tuple[bool, str]:
"""Gate 4: Staleness check."""
age = time.time() - written_at
if age > STALE_SECONDS:
days = int(age / 86400)
return False, f"Stale ({days} days old). Review manually before promoting."
return True, "OK"
def _text_overlap(a: str, b: str) -> float:
"""Jaccard similarity between two texts (word-level)."""
words_a = set(a.lower().split())
words_b = set(b.lower().split())
if not words_a or not words_b:
return 0.0
intersection = words_a & words_b
union = words_a | words_b
return len(intersection) / len(union)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
class PromotionResult:
"""Result of a promotion attempt."""
def __init__(self, success: bool, memory_id: Optional[int], reason: str, gates: dict):
self.success = success
self.memory_id = memory_id
self.reason = reason
self.gates = gates
def __repr__(self):
status = "PROMOTED" if self.success else "REJECTED"
return f"PromotionResult({status}: {self.reason})"
def evaluate_for_promotion(
content: str,
store: SovereignStore,
room: str = "general",
written_at: Optional[float] = None,
) -> dict:
"""Run all quality gates without actually promoting."""
if written_at is None:
written_at = time.time()
gates = {}
gates["length"] = _check_length(content)
gates["structure"] = _check_structure(content)
gates["duplicate"] = _check_duplicate(content, store, room)
gates["staleness"] = _check_staleness(written_at)
all_passed = all(passed for passed, _ in gates.values())
return {
"eligible": all_passed,
"gates": gates,
"content_preview": content[:100] + ("..." if len(content) > 100 else ""),
}
def promote(
content: str,
store: SovereignStore,
session_id: str,
scratch_key: str,
room: str = "general",
category: str = "",
trust: float = 0.5,
written_at: Optional[float] = None,
force: bool = False,
) -> PromotionResult:
"""Promote a scratchpad note to durable palace memory."""
if written_at is None:
written_at = time.time()
gates = {}
if not force:
gates["length"] = _check_length(content)
gates["structure"] = _check_structure(content)
gates["duplicate"] = _check_duplicate(content, store, room)
gates["staleness"] = _check_staleness(written_at)
for gate_name, (passed, message) in gates.items():
if not passed:
return PromotionResult(
success=False, memory_id=None,
reason=f"Failed gate '{gate_name}': {message}", gates=gates,
)
memory_id = store.store(content, room=room, category=category, trust=trust)
store.log_promotion(session_id, scratch_key, memory_id, reason="auto" if not force else "forced")
return PromotionResult(success=True, memory_id=memory_id, reason="Promoted to durable memory", gates=gates)
def promote_session_batch(
store: SovereignStore,
session_id: str,
notes: dict[str, dict],
room: str = "general",
force: bool = False,
) -> list[PromotionResult]:
"""Promote all notes from a session scratchpad."""
results = []
for key, entry in notes.items():
content = entry.get("value", str(entry)) if isinstance(entry, dict) else str(entry)
written_at = None
if isinstance(entry, dict) and "written_at" in entry:
try:
import datetime
written_at = datetime.datetime.strptime(
entry["written_at"], "%Y-%m-%d %H:%M:%S"
).timestamp()
except (ValueError, TypeError):
pass
result = promote(
content=str(content), store=store, session_id=session_id,
scratch_key=key, room=room, written_at=written_at, force=force,
)
results.append(result)
return results