timmy-config/hermes-sovereign/mempalace/promotion.py

"""Memory Promotion — quality-gated scratchpad-to-palace promotion.

Implements MP-4 (#371): move session notes to durable memory only when
they pass quality gates. No LLM calls — all heuristic-based.

Quality gates:
  1. Minimum content length (too short = noise)
  2. Duplicate detection (FTS5 + HRR similarity check)
  3. Structural quality (has subject-verb structure, not just a fragment)
  4. Staleness check (don't promote stale notes from old sessions)

Refs: Epic #367, Sub-issue #371
"""

from __future__ import annotations

import re
import time
from typing import Optional

try:
    from .sovereign_store import SovereignStore
except ImportError:
    from sovereign_store import SovereignStore


# ---------------------------------------------------------------------------
# Quality gate thresholds
# ---------------------------------------------------------------------------

MIN_CONTENT_WORDS = 5
MAX_CONTENT_WORDS = 500
DUPLICATE_SIMILARITY = 0.85
DUPLICATE_FTS_THRESHOLD = 3
STALE_SECONDS = 86400 * 7
MIN_TRUST_FOR_AUTO = 0.4


# ---------------------------------------------------------------------------
# Quality checks
# ---------------------------------------------------------------------------

def _check_length(content: str) -> tuple[bool, str]:
    """Gate 1: Content length check."""
    words = content.split()
    if len(words) < MIN_CONTENT_WORDS:
        return False, f"Too short ({len(words)} words, minimum {MIN_CONTENT_WORDS})"
    if len(words) > MAX_CONTENT_WORDS:
        return False, f"Too long ({len(words)} words, maximum {MAX_CONTENT_WORDS}). Summarize first."
    return True, "OK"


def _check_structure(content: str) -> tuple[bool, str]:
    """Gate 2: Basic structural quality."""
    if not re.search(r"[a-zA-Z]", content):
        return False, "No alphabetic content — pure code/numbers are not memory-worthy"
    if len(content.split()) < 3:
        return False, "Fragment — needs at least subject + predicate"
    return True, "OK"


def _check_duplicate(content: str, store: SovereignStore, room: str) -> tuple[bool, str]:
    """Gate 3: Duplicate detection via hybrid search."""
    results = store.search(content, room=room, limit=5, min_trust=0.0)
    for r in results:
        if r["score"] > DUPLICATE_SIMILARITY:
            return False, f"Duplicate detected: memory #{r['memory_id']} (score {r['score']:.3f})"
        if _text_overlap(content, r["content"]) > 0.8:
            return False, f"Near-duplicate text: memory #{r['memory_id']}"
    return True, "OK"


def _check_staleness(written_at: float) -> tuple[bool, str]:
    """Gate 4: Staleness check."""
    age = time.time() - written_at
    if age > STALE_SECONDS:
        days = int(age / 86400)
        return False, f"Stale ({days} days old). Review manually before promoting."
    return True, "OK"


def _text_overlap(a: str, b: str) -> float:
    """Jaccard similarity between two texts (word-level)."""
    words_a = set(a.lower().split())
    words_b = set(b.lower().split())
    if not words_a or not words_b:
        return 0.0
    intersection = words_a & words_b
    union = words_a | words_b
    return len(intersection) / len(union)


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

class PromotionResult:
    """Result of a promotion attempt."""
    def __init__(self, success: bool, memory_id: Optional[int], reason: str, gates: dict):
        self.success = success
        self.memory_id = memory_id
        self.reason = reason
        self.gates = gates

    def __repr__(self):
        status = "PROMOTED" if self.success else "REJECTED"
        return f"PromotionResult({status}: {self.reason})"


def evaluate_for_promotion(
    content: str,
    store: SovereignStore,
    room: str = "general",
    written_at: Optional[float] = None,
) -> dict:
    """Run all quality gates without actually promoting."""
    if written_at is None:
        written_at = time.time()
    gates = {}
    gates["length"] = _check_length(content)
    gates["structure"] = _check_structure(content)
    gates["duplicate"] = _check_duplicate(content, store, room)
    gates["staleness"] = _check_staleness(written_at)
    all_passed = all(passed for passed, _ in gates.values())
    return {
        "eligible": all_passed,
        "gates": gates,
        "content_preview": content[:100] + ("..." if len(content) > 100 else ""),
    }


def promote(
    content: str,
    store: SovereignStore,
    session_id: str,
    scratch_key: str,
    room: str = "general",
    category: str = "",
    trust: float = 0.5,
    written_at: Optional[float] = None,
    force: bool = False,
) -> PromotionResult:
    """Promote a scratchpad note to durable palace memory."""
    if written_at is None:
        written_at = time.time()
    gates = {}
    if not force:
        gates["length"] = _check_length(content)
        gates["structure"] = _check_structure(content)
        gates["duplicate"] = _check_duplicate(content, store, room)
        gates["staleness"] = _check_staleness(written_at)
        for gate_name, (passed, message) in gates.items():
            if not passed:
                return PromotionResult(
                    success=False, memory_id=None,
                    reason=f"Failed gate '{gate_name}': {message}", gates=gates,
                )
    memory_id = store.store(content, room=room, category=category, trust=trust)
    store.log_promotion(session_id, scratch_key, memory_id, reason="auto" if not force else "forced")
    return PromotionResult(success=True, memory_id=memory_id, reason="Promoted to durable memory", gates=gates)


def promote_session_batch(
    store: SovereignStore,
    session_id: str,
    notes: dict[str, dict],
    room: str = "general",
    force: bool = False,
) -> list[PromotionResult]:
    """Promote all notes from a session scratchpad."""
    results = []
    for key, entry in notes.items():
        content = entry.get("value", str(entry)) if isinstance(entry, dict) else str(entry)
        written_at = None
        if isinstance(entry, dict) and "written_at" in entry:
            try:
                import datetime
                written_at = datetime.datetime.strptime(
                    entry["written_at"], "%Y-%m-%d %H:%M:%S"
                ).timestamp()
            except (ValueError, TypeError):
                pass
        result = promote(
            content=str(content), store=store, session_id=session_id,
            scratch_key=key, room=room, written_at=written_at, force=force,
        )
        results.append(result)
    return results