plugins/memory/holographic/retrieval.py

"""Hybrid keyword/BM25 retrieval for the memory store.

Ported from KIK memory_agent.py — combines FTS5 full-text search with
Jaccard similarity reranking and trust-weighted scoring.
"""

from __future__ import annotations

import math
from datetime import datetime, timezone
from typing import TYPE_CHECKING

if TYPE_CHECKING:
    from .store import MemoryStore

try:
    from . import holographic as hrr
except ImportError:
    import holographic as hrr  # type: ignore[no-redef]


class FactRetriever:
    """Multi-strategy fact retrieval with trust-weighted scoring."""

    def __init__(
        self,
        store: MemoryStore,
        temporal_decay_half_life: int = 0,  # days, 0 = disabled
        fts_weight: float = 0.4,
        jaccard_weight: float = 0.3,
        hrr_weight: float = 0.3,
        hrr_dim: int = 1024,
    ):
        self.store = store
        self.half_life = temporal_decay_half_life
        self.hrr_dim = hrr_dim

        # Auto-redistribute weights if numpy unavailable
        if hrr_weight > 0 and not hrr._HAS_NUMPY:
            fts_weight = 0.6
            jaccard_weight = 0.4
            hrr_weight = 0.0

        self.fts_weight = fts_weight
        self.jaccard_weight = jaccard_weight
        self.hrr_weight = hrr_weight

    def search(
        self,
        query: str,
        category: str | None = None,
        min_trust: float = 0.3,
        limit: int = 10,
    ) -> list[dict]:
        """Hybrid search: FTS5 candidates → Jaccard rerank → trust weighting.

        Pipeline:
        1. FTS5 search: Get limit*3 candidates from SQLite full-text search
        2. Jaccard boost: Token overlap between query and fact content
        3. Trust weighting: final_score = relevance * trust_score
        4. Temporal decay (optional): decay = 0.5^(age_days / half_life)

        Returns list of dicts with fact data + 'score' field, sorted by score desc.
        """
        # Stage 1: Get FTS5 candidates (more than limit for reranking headroom)
        candidates = self._fts_candidates(query, category, min_trust, limit * 3)

        if not candidates:
            return []

        # Stage 2: Rerank with Jaccard + trust + optional decay
        query_tokens = self._tokenize(query)
        scored = []

        for fact in candidates:
            content_tokens = self._tokenize(fact["content"])
            tag_tokens = self._tokenize(fact.get("tags", ""))
            all_tokens = content_tokens | tag_tokens

            jaccard = self._jaccard_similarity(query_tokens, all_tokens)
            fts_score = fact.get("fts_rank", 0.0)

            # HRR similarity
            if self.hrr_weight > 0 and fact.get("hrr_vector"):
                fact_vec = hrr.bytes_to_phases(fact["hrr_vector"])
                query_vec = hrr.encode_text(query, self.hrr_dim)
                hrr_sim = (hrr.similarity(query_vec, fact_vec) + 1.0) / 2.0  # shift to [0,1]
            else:
                hrr_sim = 0.5  # neutral

            # Combine FTS5 + Jaccard + HRR
            relevance = (self.fts_weight * fts_score
                        + self.jaccard_weight * jaccard
                        + self.hrr_weight * hrr_sim)

            # Trust weighting
            score = relevance * fact["trust_score"]

            # Optional temporal decay
            if self.half_life > 0:
                score *= self._temporal_decay(fact.get("updated_at") or fact.get("created_at"))

            fact["score"] = score
            scored.append(fact)

        # Sort by score descending, return top limit
        scored.sort(key=lambda x: x["score"], reverse=True)
        results = scored[:limit]
        # Strip raw HRR bytes — callers expect JSON-serializable dicts
        for fact in results:
            fact.pop("hrr_vector", None)
        return results

    def probe(
        self,
        entity: str,
        category: str | None = None,
        limit: int = 10,
    ) -> list[dict]:
        """Compositional entity query using HRR algebra.

        Unbinds entity from memory bank to extract associated content.
        This is NOT keyword search — it uses algebraic structure to find facts
        where the entity plays a structural role.

        Falls back to FTS5 search if numpy unavailable.
        """
        if not hrr._HAS_NUMPY:
            # Fallback to keyword search on entity name
            return self.search(entity, category=category, limit=limit)

        conn = self.store._conn

        # Encode entity as role-bound vector
        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
        probe_key = hrr.bind(entity_vec, role_entity)

        # Try category-specific bank first, then all facts
        if category:
            bank_name = f"cat:{category}"
            bank_row = conn.execute(
                "SELECT vector FROM memory_banks WHERE bank_name = ?",
                (bank_name,),
            ).fetchone()
            if bank_row:
                bank_vec = hrr.bytes_to_phases(bank_row["vector"])
                extracted = hrr.unbind(bank_vec, probe_key)
                # Use extracted signal to score individual facts
                return self._score_facts_by_vector(
                    extracted, category=category, limit=limit
                )

        # Score against individual fact vectors directly
        where = "WHERE hrr_vector IS NOT NULL"
        params: list = []
        if category:
            where += " AND category = ?"
            params.append(category)

        rows = conn.execute(
            f"""
            SELECT fact_id, content, category, tags, trust_score,
                   retrieval_count, helpful_count, created_at, updated_at,
                   hrr_vector
            FROM facts
            {where}
            """,
            params,
        ).fetchall()

        if not rows:
            # Final fallback: keyword search
            return self.search(entity, category=category, limit=limit)

        scored = []
        for row in rows:
            fact = dict(row)
            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
            # Unbind probe key from fact to see if entity is structurally present
            residual = hrr.unbind(fact_vec, probe_key)
            # Compare residual against content signal
            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
            content_vec = hrr.bind(hrr.encode_text(fact["content"], self.hrr_dim), role_content)
            sim = hrr.similarity(residual, content_vec)
            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
            scored.append(fact)

        scored.sort(key=lambda x: x["score"], reverse=True)
        return scored[:limit]

    def related(
        self,
        entity: str,
        category: str | None = None,
        limit: int = 10,
    ) -> list[dict]:
        """Discover facts that share structural connections with an entity.

        Unlike probe (which finds facts *about* an entity), related finds
        facts that are connected through shared context — e.g., other entities
        mentioned alongside this one, or content that overlaps structurally.

        Falls back to FTS5 search if numpy unavailable.
        """
        if not hrr._HAS_NUMPY:
            return self.search(entity, category=category, limit=limit)

        conn = self.store._conn

        # Encode entity as a bare atom (not role-bound — we want ANY structural match)
        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)

        # Get all facts with vectors
        where = "WHERE hrr_vector IS NOT NULL"
        params: list = []
        if category:
            where += " AND category = ?"
            params.append(category)

        rows = conn.execute(
            f"""
            SELECT fact_id, content, category, tags, trust_score,
                   retrieval_count, helpful_count, created_at, updated_at,
                   hrr_vector
            FROM facts
            {where}
            """,
            params,
        ).fetchall()

        if not rows:
            return self.search(entity, category=category, limit=limit)

        # Score each fact by how much the entity's atom appears in its vector
        # This catches both role-bound entity matches AND content word matches
        scored = []
        for row in rows:
            fact = dict(row)
            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))

            # Check structural similarity: unbind entity from fact
            residual = hrr.unbind(fact_vec, entity_vec)
            # A high-similarity residual to ANY known role vector means this entity
            # plays a structural role in the fact
            role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)

            entity_role_sim = hrr.similarity(residual, role_entity)
            content_role_sim = hrr.similarity(residual, role_content)
            # Take the max — entity could appear in either role
            best_sim = max(entity_role_sim, content_role_sim)

            fact["score"] = (best_sim + 1.0) / 2.0 * fact["trust_score"]
            scored.append(fact)

        scored.sort(key=lambda x: x["score"], reverse=True)
        return scored[:limit]

    def reason(
        self,
        entities: list[str],
        category: str | None = None,
        limit: int = 10,
    ) -> list[dict]:
        """Multi-entity compositional query — vector-space JOIN.

        Given multiple entities, algebraically intersects their structural
        connections to find facts related to ALL of them simultaneously.
        This is compositional reasoning that no embedding DB can do.

        Example: reason(["peppi", "backend"]) finds facts where peppi AND
        backend both play structural roles — without keyword matching.

        Falls back to FTS5 search if numpy unavailable.
        """
        if not hrr._HAS_NUMPY or not entities:
            # Fallback: search with all entities as keywords
            query = " ".join(entities)
            return self.search(query, category=category, limit=limit)

        conn = self.store._conn
        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)

        # For each entity, compute what the bank "remembers" about it
        # by unbinding entity+role from each fact vector
        entity_residuals = []
        for entity in entities:
            entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
            probe_key = hrr.bind(entity_vec, role_entity)
            entity_residuals.append(probe_key)

        # Get all facts with vectors
        where = "WHERE hrr_vector IS NOT NULL"
        params: list = []
        if category:
            where += " AND category = ?"
            params.append(category)

        rows = conn.execute(
            f"""
            SELECT fact_id, content, category, tags, trust_score,
                   retrieval_count, helpful_count, created_at, updated_at,
                   hrr_vector
            FROM facts
            {where}
            """,
            params,
        ).fetchall()

        if not rows:
            query = " ".join(entities)
            return self.search(query, category=category, limit=limit)

        # Score each fact by how much EACH entity is structurally present.
        # A fact scores high only if ALL entities have structural presence
        # (AND semantics via min, vs OR which would use mean/max).
        role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)

        scored = []
        for row in rows:
            fact = dict(row)
            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))

            entity_scores = []
            for probe_key in entity_residuals:
                residual = hrr.unbind(fact_vec, probe_key)
                sim = hrr.similarity(residual, role_content)
                entity_scores.append(sim)

            min_sim = min(entity_scores)
            fact["score"] = (min_sim + 1.0) / 2.0 * fact["trust_score"]
            scored.append(fact)

        scored.sort(key=lambda x: x["score"], reverse=True)
        return scored[:limit]

    def contradict(
        self,
        category: str | None = None,
        threshold: float = 0.3,
        limit: int = 10,
    ) -> list[dict]:
        """Find potentially contradictory facts via entity overlap + content divergence.

        Two facts contradict when they share entities (same subject) but have
        low content-vector similarity (different claims). This is automated
        memory hygiene — no other memory system does this.

        Returns pairs of facts with a contradiction score.
        Falls back to empty list if numpy unavailable.
        """
        if not hrr._HAS_NUMPY:
            return []

        conn = self.store._conn

        # Get all facts with vectors and their linked entities
        where = "WHERE f.hrr_vector IS NOT NULL"
        params: list = []
        if category:
            where += " AND f.category = ?"
            params.append(category)

        rows = conn.execute(
            f"""
            SELECT f.fact_id, f.content, f.category, f.tags, f.trust_score,
                   f.created_at, f.updated_at, f.hrr_vector
            FROM facts f
            {where}
            """,
            params,
        ).fetchall()

        if len(rows) < 2:
            return []

        # Guard against O(n²) explosion on large fact stores.
        # At 500 facts, that's ~125K comparisons — acceptable.
        # Above that, only check the most recently updated facts.
        _MAX_CONTRADICT_FACTS = 500
        if len(rows) > _MAX_CONTRADICT_FACTS:
            rows = sorted(rows, key=lambda r: r["updated_at"] or r["created_at"], reverse=True)
            rows = rows[:_MAX_CONTRADICT_FACTS]

        # Build entity sets per fact
        fact_entities: dict[int, set[str]] = {}
        for row in rows:
            fid = row["fact_id"]
            entity_rows = conn.execute(
                """
                SELECT e.name FROM entities e
                JOIN fact_entities fe ON fe.entity_id = e.entity_id
                WHERE fe.fact_id = ?
                """,
                (fid,),
            ).fetchall()
            fact_entities[fid] = {r["name"].lower() for r in entity_rows}

        # Compare all pairs: high entity overlap + low content similarity = contradiction
        facts = [dict(r) for r in rows]
        contradictions = []

        for i in range(len(facts)):
            for j in range(i + 1, len(facts)):
                f1, f2 = facts[i], facts[j]
                ents1 = fact_entities.get(f1["fact_id"], set())
                ents2 = fact_entities.get(f2["fact_id"], set())

                if not ents1 or not ents2:
                    continue

                # Entity overlap (Jaccard)
                entity_overlap = len(ents1 & ents2) / len(ents1 | ents2) if (ents1 | ents2) else 0.0

                if entity_overlap < 0.3:
                    continue  # Not enough entity overlap to be contradictory

                # Content similarity via HRR vectors
                v1 = hrr.bytes_to_phases(f1["hrr_vector"])
                v2 = hrr.bytes_to_phases(f2["hrr_vector"])
                content_sim = hrr.similarity(v1, v2)

                # High entity overlap + low content similarity = potential contradiction
                # contradiction_score: higher = more contradictory
                contradiction_score = entity_overlap * (1.0 - (content_sim + 1.0) / 2.0)

                if contradiction_score >= threshold:
                    # Strip hrr_vector from output (not JSON serializable)
                    f1_clean = {k: v for k, v in f1.items() if k != "hrr_vector"}
                    f2_clean = {k: v for k, v in f2.items() if k != "hrr_vector"}
                    contradictions.append({
                        "fact_a": f1_clean,
                        "fact_b": f2_clean,
                        "entity_overlap": round(entity_overlap, 3),
                        "content_similarity": round(content_sim, 3),
                        "contradiction_score": round(contradiction_score, 3),
                        "shared_entities": sorted(ents1 & ents2),
                    })

        contradictions.sort(key=lambda x: x["contradiction_score"], reverse=True)
        return contradictions[:limit]

    def _score_facts_by_vector(
        self,
        target_vec: "np.ndarray",
        category: str | None = None,
        limit: int = 10,
    ) -> list[dict]:
        """Score facts by similarity to a target vector."""
        conn = self.store._conn

        where = "WHERE hrr_vector IS NOT NULL"
        params: list = []
        if category:
            where += " AND category = ?"
            params.append(category)

        rows = conn.execute(
            f"""
            SELECT fact_id, content, category, tags, trust_score,
                   retrieval_count, helpful_count, created_at, updated_at,
                   hrr_vector
            FROM facts
            {where}
            """,
            params,
        ).fetchall()

        scored = []
        for row in rows:
            fact = dict(row)
            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
            sim = hrr.similarity(target_vec, fact_vec)
            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
            scored.append(fact)

        scored.sort(key=lambda x: x["score"], reverse=True)
        return scored[:limit]

    def _fts_candidates(
        self,
        query: str,
        category: str | None,
        min_trust: float,
        limit: int,
    ) -> list[dict]:
        """Get raw FTS5 candidates from the store.

        Uses the store's database connection directly for FTS5 MATCH
        with rank scoring. Normalizes FTS5 rank to [0, 1] range.
        """
        conn = self.store._conn

        # Build query - FTS5 rank is negative (lower = better match)
        # We need to join facts_fts with facts to get all columns
        params: list = []
        where_clauses = ["facts_fts MATCH ?"]
        params.append(query)

        if category:
            where_clauses.append("f.category = ?")
            params.append(category)

        where_clauses.append("f.trust_score >= ?")
        params.append(min_trust)

        where_sql = " AND ".join(where_clauses)

        sql = f"""
            SELECT f.*, facts_fts.rank as fts_rank_raw
            FROM facts_fts
            JOIN facts f ON f.fact_id = facts_fts.rowid
            WHERE {where_sql}
            ORDER BY facts_fts.rank
            LIMIT ?
        """
        params.append(limit)

        try:
            rows = conn.execute(sql, params).fetchall()
        except Exception:
            # FTS5 MATCH can fail on malformed queries — fall back to empty
            return []

        if not rows:
            return []

        # Normalize FTS5 rank: rank is negative, lower = better
        # Convert to positive score in [0, 1] range
        raw_ranks = [abs(row["fts_rank_raw"]) for row in rows]
        max_rank = max(raw_ranks) if raw_ranks else 1.0
        max_rank = max(max_rank, 1e-6)  # avoid div by zero

        results = []
        for row, raw_rank in zip(rows, raw_ranks):
            fact = dict(row)
            fact.pop("fts_rank_raw", None)
            fact["fts_rank"] = raw_rank / max_rank  # normalize to [0, 1]
            results.append(fact)

        return results

    @staticmethod
    def _tokenize(text: str) -> set[str]:
        """Simple whitespace tokenization with lowercasing.

        Strips common punctuation. No stemming/lemmatization (Phase 1).
        """
        if not text:
            return set()
        # Split on whitespace, lowercase, strip punctuation
        tokens = set()
        for word in text.lower().split():
            cleaned = word.strip(".,;:!?\"'()[]{}#@<>")
            if cleaned:
                tokens.add(cleaned)
        return tokens

    @staticmethod
    def _jaccard_similarity(set_a: set, set_b: set) -> float:
        """Jaccard similarity coefficient: |A ∩ B| / |A ∪ B|."""
        if not set_a or not set_b:
            return 0.0
        intersection = len(set_a & set_b)
        union = len(set_a | set_b)
        return intersection / union if union > 0 else 0.0

    def _temporal_decay(self, timestamp_str: str | None) -> float:
        """Exponential decay: 0.5^(age_days / half_life_days).

        Returns 1.0 if decay is disabled or timestamp is missing.
        """
        if not self.half_life or not timestamp_str:
            return 1.0

        try:
            if isinstance(timestamp_str, str):
                # Parse ISO format timestamp from SQLite
                ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
            else:
                ts = timestamp_str

            if ts.tzinfo is None:
                ts = ts.replace(tzinfo=timezone.utc)

            age_days = (datetime.now(timezone.utc) - ts).total_seconds() / 86400
            if age_days < 0:
                return 1.0

            return math.pow(0.5, age_days / self.half_life)
        except (ValueError, TypeError):
            return 1.0
-												feat(memory): pluggable memory provider interface with profile isolation, review fixes, and honcho CLI restoration (#4623)

* feat(memory): add pluggable memory provider interface with profile isolation

Introduces a pluggable MemoryProvider ABC so external memory backends can
integrate with Hermes without modifying core files. Each backend becomes a
plugin implementing a standard interface, orchestrated by MemoryManager.

Key architecture:
- agent/memory_provider.py — ABC with core + optional lifecycle hooks
- agent/memory_manager.py — single integration point in the agent loop
- agent/builtin_memory_provider.py — wraps existing MEMORY.md/USER.md

Profile isolation fixes applied to all 6 shipped plugins:
- Cognitive Memory: use get_hermes_home() instead of raw env var
- Hindsight Memory: check $HERMES_HOME/hindsight/config.json first,
  fall back to legacy ~/.hindsight/ for backward compat
- Hermes Memory Store: replace hardcoded ~/.hermes paths with
  get_hermes_home() for config loading and DB path defaults
- Mem0 Memory: use get_hermes_home() instead of raw env var
- RetainDB Memory: auto-derive profile-scoped project name from
  hermes_home path (hermes-<profile>), explicit env var overrides
- OpenViking Memory: read-only, no local state, isolation via .env

MemoryManager.initialize_all() now injects hermes_home into kwargs so
every provider can resolve profile-scoped storage without importing
get_hermes_home() themselves.

Plugin system: adds register_memory_provider() to PluginContext and
get_plugin_memory_providers() accessor.

Based on PR #3825. 46 tests (37 unit + 5 E2E + 4 plugin registration).

* refactor(memory): drop cognitive plugin, rewrite OpenViking as full provider

Remove cognitive-memory plugin (#727) — core mechanics are broken:
decay runs 24x too fast (hourly not daily), prefetch uses row ID as
timestamp, search limited by importance not similarity.

Rewrite openviking-memory plugin from a read-only search wrapper into
a full bidirectional memory provider using the complete OpenViking
session lifecycle API:

- sync_turn: records user/assistant messages to OpenViking session
  (threaded, non-blocking)
- on_session_end: commits session to trigger automatic memory extraction
  into 6 categories (profile, preferences, entities, events, cases,
  patterns)
- prefetch: background semantic search via find() endpoint
- on_memory_write: mirrors built-in memory writes to the session
- is_available: checks env var only, no network calls (ABC compliance)

Tools expanded from 3 to 5:
- viking_search: semantic search with mode/scope/limit
- viking_read: tiered content (abstract ~100tok / overview ~2k / full)
- viking_browse: filesystem-style navigation (list/tree/stat)
- viking_remember: explicit memory storage via session
- viking_add_resource: ingest URLs/docs into knowledge base

Uses direct HTTP via httpx (no openviking SDK dependency needed).
Response truncation on viking_read to prevent context flooding.

* fix(memory): harden Mem0 plugin — thread safety, non-blocking sync, circuit breaker

- Remove redundant mem0_context tool (identical to mem0_search with
  rerank=true, top_k=5 — wastes a tool slot and confuses the model)
- Thread sync_turn so it's non-blocking — Mem0's server-side LLM
  extraction can take 5-10s, was stalling the agent after every turn
- Add threading.Lock around _get_client() for thread-safe lazy init
  (prefetch and sync threads could race on first client creation)
- Add circuit breaker: after 5 consecutive API failures, pause calls
  for 120s instead of hammering a down server every turn. Auto-resets
  after cooldown. Logs a warning when tripped.
- Track success/failure in prefetch, sync_turn, and all tool calls
- Wait for previous sync to finish before starting a new one (prevents
  unbounded thread accumulation on rapid turns)
- Clean up shutdown to join both prefetch and sync threads

* fix(memory): enforce single external memory provider limit

MemoryManager now rejects a second non-builtin provider with a warning.
Built-in memory (MEMORY.md/USER.md) is always accepted. Only ONE
external plugin provider is allowed at a time. This prevents tool
schema bloat (some providers add 3-5 tools each) and conflicting
memory backends.

The warning message directs users to configure memory.provider in
config.yaml to select which provider to activate.

Updated all 47 tests to use builtin + one external pattern instead
of multiple externals. Added test_second_external_rejected to verify
the enforcement.

* feat(memory): add ByteRover memory provider plugin

Implements the ByteRover integration (from PR #3499 by hieuntg81) as a
MemoryProvider plugin instead of direct run_agent.py modifications.

ByteRover provides persistent memory via the brv CLI — a hierarchical
knowledge tree with tiered retrieval (fuzzy text then LLM-driven search).
Local-first with optional cloud sync.

Plugin capabilities:
- prefetch: background brv query for relevant context
- sync_turn: curate conversation turns (threaded, non-blocking)
- on_memory_write: mirror built-in memory writes to brv
- on_pre_compress: extract insights before context compression

Tools (3):
- brv_query: search the knowledge tree
- brv_curate: store facts/decisions/patterns
- brv_status: check CLI version and context tree state

Profile isolation: working directory at $HERMES_HOME/byterover/ (scoped
per profile). Binary resolution cached with thread-safe double-checked
locking. All write operations threaded to avoid blocking the agent
(curate can take 120s with LLM processing).

* fix(memory): thread remaining sync_turns, fix holographic, add config key

Plugin fixes:
- Hindsight: thread sync_turn (was blocking up to 30s via _run_in_thread)
- RetainDB: thread sync_turn (was blocking on HTTP POST)
- Both: shutdown now joins sync threads alongside prefetch threads

Holographic retrieval fixes:
- reason(): removed dead intersection_key computation (bundled but never
  used in scoring). Now reuses pre-computed entity_residuals directly,
  moved role_content encoding outside the inner loop.
- contradict(): added _MAX_CONTRADICT_FACTS=500 scaling guard. Above
  500 facts, only checks the most recently updated ones to avoid O(n^2)
  explosion (~125K comparisons at 500 is acceptable).

Config:
- Added memory.provider key to DEFAULT_CONFIG ("" = builtin only).
  No version bump needed (deep_merge handles new keys automatically).

* feat(memory): extract Honcho as a MemoryProvider plugin

Creates plugins/honcho-memory/ as a thin adapter over the existing
honcho_integration/ package. All 4 Honcho tools (profile, search,
context, conclude) move from the normal tool registry to the
MemoryProvider interface.

The plugin delegates all work to HonchoSessionManager — no Honcho
logic is reimplemented. It uses the existing config chain:
$HERMES_HOME/honcho.json -> ~/.honcho/config.json -> env vars.

Lifecycle hooks:
- initialize: creates HonchoSessionManager via existing client factory
- prefetch: background dialectic query
- sync_turn: records messages + flushes to API (threaded)
- on_memory_write: mirrors user profile writes as conclusions
- on_session_end: flushes all pending messages

This is a prerequisite for the MemoryManager wiring in run_agent.py.
Once wired, Honcho goes through the same provider interface as all
other memory plugins, and the scattered Honcho code in run_agent.py
can be consolidated into the single MemoryManager integration point.

* feat(memory): wire MemoryManager into run_agent.py

Adds 8 integration points for the external memory provider plugin,
all purely additive (zero existing code modified):

1. Init (~L1130): Create MemoryManager, find matching plugin provider
   from memory.provider config, initialize with session context
2. Tool injection (~L1160): Append provider tool schemas to self.tools
   and self.valid_tool_names after memory_manager init
3. System prompt (~L2705): Add external provider's system_prompt_block
   alongside existing MEMORY.md/USER.md blocks
4. Tool routing (~L5362): Route provider tool calls through
   memory_manager.handle_tool_call() before the catchall handler
5. Memory write bridge (~L5353): Notify external provider via
   on_memory_write() when the built-in memory tool writes
6. Pre-compress (~L5233): Call on_pre_compress() before context
   compression discards messages
7. Prefetch (~L6421): Inject provider prefetch results into the
   current-turn user message (same pattern as Honcho turn context)
8. Turn sync + session end (~L8161, ~L8172): sync_all() after each
   completed turn, queue_prefetch_all() for next turn, on_session_end()
   + shutdown_all() at conversation end

All hooks are wrapped in try/except — a failing provider never breaks
the agent. The existing memory system, Honcho integration, and all
other code paths are completely untouched.

Full suite: 7222 passed, 4 pre-existing failures.

* refactor(memory): remove legacy Honcho integration from core

Extracts all Honcho-specific code from run_agent.py, model_tools.py,
toolsets.py, and gateway/run.py. Honcho is now exclusively available
as a memory provider plugin (plugins/honcho-memory/).

Removed from run_agent.py (-457 lines):
- Honcho init block (session manager creation, activation, config)
- 8 Honcho methods: _honcho_should_activate, _strip_honcho_tools,
  _activate_honcho, _register_honcho_exit_hook, _queue_honcho_prefetch,
  _honcho_prefetch, _honcho_save_user_observation, _honcho_sync
- _inject_honcho_turn_context module-level function
- Honcho system prompt block (tool descriptions, CLI commands)
- Honcho context injection in api_messages building
- Honcho params from __init__ (honcho_session_key, honcho_manager,
  honcho_config)
- HONCHO_TOOL_NAMES constant
- All honcho-specific tool dispatch forwarding

Removed from other files:
- model_tools.py: honcho_tools import, honcho params from handle_function_call
- toolsets.py: honcho toolset definition, honcho tools from core tools list
- gateway/run.py: honcho params from AIAgent constructor calls

Removed tests (-339 lines):
- 9 Honcho-specific test methods from test_run_agent.py
- TestHonchoAtexitFlush class from test_exit_cleanup_interrupt.py

Restored two regex constants (_SURROGATE_RE, _BUDGET_WARNING_RE) that
were accidentally removed during the honcho function extraction.

The honcho_integration/ package is kept intact — the plugin delegates
to it. tools/honcho_tools.py registry entries are now dead code (import
commented out in model_tools.py) but the file is preserved for reference.

Full suite: 7207 passed, 4 pre-existing failures. Zero regressions.

* refactor(memory): restructure plugins, add CLI, clean gateway, migration notice

Plugin restructure:
- Move all memory plugins from plugins/<name>-memory/ to plugins/memory/<name>/
  (byterover, hindsight, holographic, honcho, mem0, openviking, retaindb)
- New plugins/memory/__init__.py discovery module that scans the directory
  directly, loading providers by name without the general plugin system
- run_agent.py uses load_memory_provider() instead of get_plugin_memory_providers()

CLI wiring:
- hermes memory setup — interactive curses picker + config wizard
- hermes memory status — show active provider, config, availability
- hermes memory off — disable external provider (built-in only)
- hermes honcho — now shows migration notice pointing to hermes memory setup

Gateway cleanup:
- Remove _get_or_create_gateway_honcho (already removed in prev commit)
- Remove _shutdown_gateway_honcho and _shutdown_all_gateway_honcho methods
- Remove all calls to shutdown methods (4 call sites)
- Remove _honcho_managers/_honcho_configs dict references

Dead code removal:
- Delete tools/honcho_tools.py (279 lines, import was already commented out)
- Delete tests/gateway/test_honcho_lifecycle.py (131 lines, tested removed methods)
- Remove if False placeholder from run_agent.py

Migration:
- Honcho migration notice on startup: detects existing honcho.json or
  ~/.honcho/config.json, prints guidance to run hermes memory setup.
  Only fires when memory.provider is not set and not in quiet mode.

Full suite: 7203 passed, 4 pre-existing failures. Zero regressions.

* feat(memory): standardize plugin config + add per-plugin documentation

Config architecture:
- Add save_config(values, hermes_home) to MemoryProvider ABC
- Honcho: writes to $HERMES_HOME/honcho.json (SDK native)
- Mem0: writes to $HERMES_HOME/mem0.json
- Hindsight: writes to $HERMES_HOME/hindsight/config.json
- Holographic: writes to config.yaml under plugins.hermes-memory-store
- OpenViking/RetainDB/ByteRover: env-var only (default no-op)

Setup wizard (hermes memory setup):
- Now calls provider.save_config() for non-secret config
- Secrets still go to .env via env vars
- Only memory.provider activation key goes to config.yaml

Documentation:
- README.md for each of the 7 providers in plugins/memory/<name>/
- Requirements, setup (wizard + manual), config reference, tools table
- Consistent format across all providers

The contract for new memory plugins:
- get_config_schema() declares all fields (REQUIRED)
- save_config() writes native config (REQUIRED if not env-var-only)
- Secrets use env_var field in schema, written to .env by wizard
- README.md in the plugin directory

* docs: add memory providers user guide + developer guide

New pages:
- user-guide/features/memory-providers.md — comprehensive guide covering
  all 7 shipped providers (Honcho, OpenViking, Mem0, Hindsight,
  Holographic, RetainDB, ByteRover). Each with setup, config, tools,
  cost, and unique features. Includes comparison table and profile
  isolation notes.
- developer-guide/memory-provider-plugin.md — how to build a new memory
  provider plugin. Covers ABC, required methods, config schema,
  save_config, threading contract, profile isolation, testing.

Updated pages:
- user-guide/features/memory.md — replaced Honcho section with link to
  new Memory Providers page
- user-guide/features/honcho.md — replaced with migration redirect to
  the new Memory Providers page
- sidebars.ts — added both new pages to navigation

* fix(memory): auto-migrate Honcho users to memory provider plugin

When honcho.json or ~/.honcho/config.json exists but memory.provider
is not set, automatically set memory.provider: honcho in config.yaml
and activate the plugin. The plugin reads the same config files, so
all data and credentials are preserved. Zero user action needed.

Persists the migration to config.yaml so it only fires once. Prints
a one-line confirmation in non-quiet mode.

* fix(memory): only auto-migrate Honcho when enabled + credentialed

Check HonchoClientConfig.enabled AND (api_key OR base_url) before
auto-migrating — not just file existence. Prevents false activation
for users who disabled Honcho, stopped using it (config lingers),
or have ~/.honcho/ from a different tool.

* feat(memory): auto-install pip dependencies during hermes memory setup

Reads pip_dependencies from plugin.yaml, checks which are missing,
installs them via pip before config walkthrough. Also shows install
guidance for external_dependencies (e.g. brv CLI for ByteRover).

Updated all 7 plugin.yaml files with pip_dependencies:
- honcho: honcho-ai
- mem0: mem0ai
- openviking: httpx
- hindsight: hindsight-client
- holographic: (none)
- retaindb: requests
- byterover: (external_dependencies for brv CLI)

* fix: remove remaining Honcho crash risks from cli.py and gateway

cli.py: removed Honcho session re-mapping block (would crash importing
deleted tools/honcho_tools.py), Honcho flush on compress, Honcho
session display on startup, Honcho shutdown on exit, honcho_session_key
AIAgent param.

gateway/run.py: removed honcho_session_key params from helper methods,
sync_honcho param, _honcho.shutdown() block.

tests: fixed test_cron_session_with_honcho_key_skipped (was passing
removed honcho_key param to _flush_memories_for_session).

* fix: include plugins/ in pyproject.toml package list

Without this, plugins/memory/ wouldn't be included in non-editable
installs. Hermes always runs from the repo checkout so this is belt-
and-suspenders, but prevents breakage if the install method changes.

* fix(memory): correct pip-to-import name mapping for dep checks

The heuristic dep.replace('-', '_') fails for packages where the pip
name differs from the import name: honcho-ai→honcho, mem0ai→mem0,
hindsight-client→hindsight_client. Added explicit mapping table so
hermes memory setup doesn't try to reinstall already-installed packages.

* chore: remove dead code from old plugin memory registration path

- hermes_cli/plugins.py: removed register_memory_provider(),
  _memory_providers list, get_plugin_memory_providers() — memory
  providers now use plugins/memory/ discovery, not the general plugin system
- hermes_cli/main.py: stripped 74 lines of dead honcho argparse
  subparsers (setup, status, sessions, map, peer, mode, tokens,
  identity, migrate) — kept only the migration redirect
- agent/memory_provider.py: updated docstring to reflect new
  registration path
- tests: replaced TestPluginMemoryProviderRegistration with
  TestPluginMemoryDiscovery that tests the actual plugins/memory/
  discovery system. Added 3 new tests (discover, load, nonexistent).

* chore: delete dead honcho_integration/cli.py and its tests

cli.py (794 lines) was the old 'hermes honcho' command handler — nobody
calls it since cmd_honcho was replaced with a migration redirect.

Deleted tests that imported from removed code:
- tests/honcho_integration/test_cli.py (tested _resolve_api_key)
- tests/honcho_integration/test_config_isolation.py (tested CLI config paths)
- tests/tools/test_honcho_tools.py (tested the deleted tools/honcho_tools.py)

Remaining honcho_integration/ files (actively used by the plugin):
- client.py (445 lines) — config loading, SDK client creation
- session.py (991 lines) — session management, queries, flush

* refactor: move honcho_integration/ into the honcho plugin

Moves client.py (445 lines) and session.py (991 lines) from the
top-level honcho_integration/ package into plugins/memory/honcho/.
No Honcho code remains in the main codebase.

- plugins/memory/honcho/client.py — config loading, SDK client creation
- plugins/memory/honcho/session.py — session management, queries, flush
- Updated all imports: run_agent.py (auto-migration), hermes_cli/doctor.py,
  plugin __init__.py, session.py cross-import, all tests
- Removed honcho_integration/ package and pyproject.toml entry
- Renamed tests/honcho_integration/ → tests/honcho_plugin/

* docs: update architecture + gateway-internals for memory provider system

- architecture.md: replaced honcho_integration/ with plugins/memory/
- gateway-internals.md: replaced Honcho-specific session routing and
  flush lifecycle docs with generic memory provider interface docs

* fix: update stale mock path for resolve_active_host after honcho plugin migration

* fix(memory): address review feedback — P0 lifecycle, ABC contract, honcho CLI restore

Review feedback from Honcho devs (erosika):

P0 — Provider lifecycle:
- Remove on_session_end() + shutdown_all() from run_conversation() tail
  (was killing providers after every turn in multi-turn sessions)
- Add shutdown_memory_provider() method on AIAgent for callers
- Wire shutdown into CLI atexit, reset_conversation, gateway stop/expiry

Bug fixes:
- Remove sync_honcho=False kwarg from /btw callsites (TypeError crash)
- Fix doctor.py references to dead 'hermes honcho setup' command
- Cache prefetch_all() before tool loop (was re-calling every iteration)

ABC contract hardening (all backwards-compatible):
- Add session_id kwarg to prefetch/sync_turn/queue_prefetch
- Make on_pre_compress() return str (provider insights in compression)
- Add **kwargs to on_turn_start() for runtime context
- Add on_delegation() hook for parent-side subagent observation
- Document agent_context/agent_identity/agent_workspace kwargs on
  initialize() (prevents cron corruption, enables profile scoping)
- Fix docstring: single external provider, not multiple

Honcho CLI restoration:
- Add plugins/memory/honcho/cli.py (from main's honcho_integration/cli.py
  with imports adapted to plugin path)
- Restore full hermes honcho command with all subcommands (status, peer,
  mode, tokens, identity, enable/disable, sync, peers, --target-profile)
- Restore auto-clone on profile creation + sync on hermes update
- hermes honcho setup now redirects to hermes memory setup

* fix(memory): wire on_delegation, skip_memory for cron/flush, fix ByteRover return type

- Wire on_delegation() in delegate_tool.py — parent's memory provider
  is notified with task+result after each subagent completes
- Add skip_memory=True to cron scheduler (prevents cron system prompts
  from corrupting user representations — closes #4052)
- Add skip_memory=True to gateway flush agent (throwaway agent shouldn't
  activate memory provider)
- Fix ByteRover on_pre_compress() return type: None -> str

* fix(honcho): port profile isolation fixes from PR #4632

Ports 5 bug fixes found during profile testing (erosika's PR #4632):

1. 3-tier config resolution — resolve_config_path() now checks
   $HERMES_HOME/honcho.json → ~/.hermes/honcho.json → ~/.honcho/config.json
   (non-default profiles couldn't find shared host blocks)

2. Thread host=_host_key() through from_global_config() in cmd_setup,
   cmd_status, cmd_identity (--target-profile was being ignored)

3. Use bare profile name as aiPeer (not host key with dots) — Honcho's
   peer ID pattern is ^[a-zA-Z0-9_-]+$, dots are invalid

4. Wrap add_peers() in try/except — was fatal on new AI peers, killed
   all message uploads for the session

5. Gate Honcho clone behind --clone/--clone-all on profile create
   (bare create should be blank-slate)

Also: sanitize assistant_peer_id via _sanitize_id()

* fix(tests): add module cleanup fixture to test_cli_provider_resolution

test_cli_provider_resolution._import_cli() wipes tools.*, cli, and
run_agent from sys.modules to force fresh imports, but had no cleanup.
This poisoned all subsequent tests on the same xdist worker — mocks
targeting tools.file_tools, tools.send_message_tool, etc. patched the
NEW module object while already-imported functions still referenced
the OLD one. Caused ~25 cascade failures: send_message KeyError,
process_registry FileNotFoundError, file_read_guards timeouts,
read_loop_detection file-not-found, mcp_oauth None port, and
provider_parity/codex_execution stale tool lists.

Fix: autouse fixture saves all affected modules before each test and
restores them after, matching the pattern in
test_managed_browserbase_and_modal.py.
											
										
										
											2026-04-02 15:33:51 -07:00
+								"""Hybrid keyword/BM25 retrieval for the memory store.
 								Ported from KIK memory_agent.py — combines FTS5 full-text search with
 								Jaccard similarity reranking and trust-weighted scoring.
 								"""
 								from __future__ import annotations
 								import math
 								from datetime import datetime, timezone
 								from typing import TYPE_CHECKING
 								if TYPE_CHECKING:
 								    from .store import MemoryStore
 								try:
 								    from . import holographic as hrr
 								except ImportError:
 								    import holographic as hrr  # type: ignore[no-redef]
 								class FactRetriever:
 								    """Multi-strategy fact retrieval with trust-weighted scoring."""
 								    def __init__(
 								        self,
 								        store: MemoryStore,
 								        temporal_decay_half_life: int = 0,  # days, 0 = disabled
 								        fts_weight: float = 0.4,
 								        jaccard_weight: float = 0.3,
 								        hrr_weight: float = 0.3,
 								        hrr_dim: int = 1024,
 								    ):
 								        self.store = store
 								        self.half_life = temporal_decay_half_life
 								        self.hrr_dim = hrr_dim
 								        # Auto-redistribute weights if numpy unavailable
 								        if hrr_weight > 0 and not hrr._HAS_NUMPY:
 								            fts_weight = 0.6
 								            jaccard_weight = 0.4
 								            hrr_weight = 0.0
 								        self.fts_weight = fts_weight
 								        self.jaccard_weight = jaccard_weight
 								        self.hrr_weight = hrr_weight
 								    def search(
 								        self,
 								        query: str,
 								        category: str | None = None,
 								        min_trust: float = 0.3,
 								        limit: int = 10,
 								    ) -> list[dict]:
 								        """Hybrid search: FTS5 candidates → Jaccard rerank → trust weighting.
 								        Pipeline:
 . FTS5 search: Get limit*3 candidates from SQLite full-text search
 . Jaccard boost: Token overlap between query and fact content
 . Trust weighting: final_score = relevance * trust_score
 . Temporal decay (optional): decay = 0.5^(age_days / half_life)
 								        Returns list of dicts with fact data + 'score' field, sorted by score desc.
 								        """
 								        # Stage 1: Get FTS5 candidates (more than limit for reranking headroom)
 								        candidates = self._fts_candidates(query, category, min_trust, limit * 3)
 								        if not candidates:
 								            return []
 								        # Stage 2: Rerank with Jaccard + trust + optional decay
 								        query_tokens = self._tokenize(query)
 								        scored = []
 								        for fact in candidates:
 								            content_tokens = self._tokenize(fact["content"])
 								            tag_tokens = self._tokenize(fact.get("tags", ""))
 								            all_tokens = content_tokens | tag_tokens
 								            jaccard = self._jaccard_similarity(query_tokens, all_tokens)
 								            fts_score = fact.get("fts_rank", 0.0)
 								            # HRR similarity
 								            if self.hrr_weight > 0 and fact.get("hrr_vector"):
 								                fact_vec = hrr.bytes_to_phases(fact["hrr_vector"])
 								                query_vec = hrr.encode_text(query, self.hrr_dim)
 								                hrr_sim = (hrr.similarity(query_vec, fact_vec) + 1.0) / 2.0  # shift to [0,1]
 								            else:
 								                hrr_sim = 0.5  # neutral
 								            # Combine FTS5 + Jaccard + HRR
 								            relevance = (self.fts_weight * fts_score
 								                        + self.jaccard_weight * jaccard
 								                        + self.hrr_weight * hrr_sim)
 								            # Trust weighting
 								            score = relevance * fact["trust_score"]
 								            # Optional temporal decay
 								            if self.half_life > 0:
 								                score *= self._temporal_decay(fact.get("updated_at") or fact.get("created_at"))
 								            fact["score"] = score
 								            scored.append(fact)
 								        # Sort by score descending, return top limit
 								        scored.sort(key=lambda x: x["score"], reverse=True)
 								        results = scored[:limit]
 								        # Strip raw HRR bytes — callers expect JSON-serializable dicts
 								        for fact in results:
 								            fact.pop("hrr_vector", None)
 								        return results
 								    def probe(
 								        self,
 								        entity: str,
 								        category: str | None = None,
 								        limit: int = 10,
 								    ) -> list[dict]:
 								        """Compositional entity query using HRR algebra.
 								        Unbinds entity from memory bank to extract associated content.
 								        This is NOT keyword search — it uses algebraic structure to find facts
 								        where the entity plays a structural role.
 								        Falls back to FTS5 search if numpy unavailable.
 								        """
 								        if not hrr._HAS_NUMPY:
 								            # Fallback to keyword search on entity name
 								            return self.search(entity, category=category, limit=limit)
 								        conn = self.store._conn
 								        # Encode entity as role-bound vector
 								        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
 								        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
 								        probe_key = hrr.bind(entity_vec, role_entity)
 								        # Try category-specific bank first, then all facts
 								        if category:
 								            bank_name = f"cat:{category}"
 								            bank_row = conn.execute(
 								                "SELECT vector FROM memory_banks WHERE bank_name = ?",
 								                (bank_name,),
 								            ).fetchone()
 								            if bank_row:
 								                bank_vec = hrr.bytes_to_phases(bank_row["vector"])
 								                extracted = hrr.unbind(bank_vec, probe_key)
 								                # Use extracted signal to score individual facts
 								                return self._score_facts_by_vector(
 								                    extracted, category=category, limit=limit
 								                )
 								        # Score against individual fact vectors directly
 								        where = "WHERE hrr_vector IS NOT NULL"
 								        params: list = []
 								        if category:
 								            where += " AND category = ?"
 								            params.append(category)
 								        rows = conn.execute(
 								            f"""
 								            SELECT fact_id, content, category, tags, trust_score,
 								                   retrieval_count, helpful_count, created_at, updated_at,
 								                   hrr_vector
 								            FROM facts
 								            {where}
 								            """,
 								            params,
 								        ).fetchall()
 								        if not rows:
 								            # Final fallback: keyword search
 								            return self.search(entity, category=category, limit=limit)
 								        scored = []
 								        for row in rows:
 								            fact = dict(row)
 								            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
 								            # Unbind probe key from fact to see if entity is structurally present
 								            residual = hrr.unbind(fact_vec, probe_key)
 								            # Compare residual against content signal
 								            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
 								            content_vec = hrr.bind(hrr.encode_text(fact["content"], self.hrr_dim), role_content)
 								            sim = hrr.similarity(residual, content_vec)
 								            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
 								            scored.append(fact)
 								        scored.sort(key=lambda x: x["score"], reverse=True)
 								        return scored[:limit]
 								    def related(
 								        self,
 								        entity: str,
 								        category: str | None = None,
 								        limit: int = 10,
 								    ) -> list[dict]:
 								        """Discover facts that share structural connections with an entity.
 								        Unlike probe (which finds facts *about* an entity), related finds
 								        facts that are connected through shared context — e.g., other entities
 								        mentioned alongside this one, or content that overlaps structurally.
 								        Falls back to FTS5 search if numpy unavailable.
 								        """
 								        if not hrr._HAS_NUMPY:
 								            return self.search(entity, category=category, limit=limit)
 								        conn = self.store._conn
 								        # Encode entity as a bare atom (not role-bound — we want ANY structural match)
 								        entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
 								        # Get all facts with vectors
 								        where = "WHERE hrr_vector IS NOT NULL"
 								        params: list = []
 								        if category:
 								            where += " AND category = ?"
 								            params.append(category)
 								        rows = conn.execute(
 								            f"""
 								            SELECT fact_id, content, category, tags, trust_score,
 								                   retrieval_count, helpful_count, created_at, updated_at,
 								                   hrr_vector
 								            FROM facts
 								            {where}
 								            """,
 								            params,
 								        ).fetchall()
 								        if not rows:
 								            return self.search(entity, category=category, limit=limit)
 								        # Score each fact by how much the entity's atom appears in its vector
 								        # This catches both role-bound entity matches AND content word matches
 								        scored = []
 								        for row in rows:
 								            fact = dict(row)
 								            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
 								            # Check structural similarity: unbind entity from fact
 								            residual = hrr.unbind(fact_vec, entity_vec)
 								            # A high-similarity residual to ANY known role vector means this entity
 								            # plays a structural role in the fact
 								            role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
 								            role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
 								            entity_role_sim = hrr.similarity(residual, role_entity)
 								            content_role_sim = hrr.similarity(residual, role_content)
 								            # Take the max — entity could appear in either role
 								            best_sim = max(entity_role_sim, content_role_sim)
 								            fact["score"] = (best_sim + 1.0) / 2.0 * fact["trust_score"]
 								            scored.append(fact)
 								        scored.sort(key=lambda x: x["score"], reverse=True)
 								        return scored[:limit]
 								    def reason(
 								        self,
 								        entities: list[str],
 								        category: str | None = None,
 								        limit: int = 10,
 								    ) -> list[dict]:
 								        """Multi-entity compositional query — vector-space JOIN.
 								        Given multiple entities, algebraically intersects their structural
 								        connections to find facts related to ALL of them simultaneously.
 								        This is compositional reasoning that no embedding DB can do.
 								        Example: reason(["peppi", "backend"]) finds facts where peppi AND
 								        backend both play structural roles — without keyword matching.
 								        Falls back to FTS5 search if numpy unavailable.
 								        """
 								        if not hrr._HAS_NUMPY or not entities:
 								            # Fallback: search with all entities as keywords
 								            query = " ".join(entities)
 								            return self.search(query, category=category, limit=limit)
 								        conn = self.store._conn
 								        role_entity = hrr.encode_atom("__hrr_role_entity__", self.hrr_dim)
 								        # For each entity, compute what the bank "remembers" about it
 								        # by unbinding entity+role from each fact vector
 								        entity_residuals = []
 								        for entity in entities:
 								            entity_vec = hrr.encode_atom(entity.lower(), self.hrr_dim)
 								            probe_key = hrr.bind(entity_vec, role_entity)
 								            entity_residuals.append(probe_key)
 								        # Get all facts with vectors
 								        where = "WHERE hrr_vector IS NOT NULL"
 								        params: list = []
 								        if category:
 								            where += " AND category = ?"
 								            params.append(category)
 								        rows = conn.execute(
 								            f"""
 								            SELECT fact_id, content, category, tags, trust_score,
 								                   retrieval_count, helpful_count, created_at, updated_at,
 								                   hrr_vector
 								            FROM facts
 								            {where}
 								            """,
 								            params,
 								        ).fetchall()
 								        if not rows:
 								            query = " ".join(entities)
 								            return self.search(query, category=category, limit=limit)
 								        # Score each fact by how much EACH entity is structurally present.
 								        # A fact scores high only if ALL entities have structural presence
 								        # (AND semantics via min, vs OR which would use mean/max).
 								        role_content = hrr.encode_atom("__hrr_role_content__", self.hrr_dim)
 								        scored = []
 								        for row in rows:
 								            fact = dict(row)
 								            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
 								            entity_scores = []
 								            for probe_key in entity_residuals:
 								                residual = hrr.unbind(fact_vec, probe_key)
 								                sim = hrr.similarity(residual, role_content)
 								                entity_scores.append(sim)
 								            min_sim = min(entity_scores)
 								            fact["score"] = (min_sim + 1.0) / 2.0 * fact["trust_score"]
 								            scored.append(fact)
 								        scored.sort(key=lambda x: x["score"], reverse=True)
 								        return scored[:limit]
 								    def contradict(
 								        self,
 								        category: str | None = None,
 								        threshold: float = 0.3,
 								        limit: int = 10,
 								    ) -> list[dict]:
 								        """Find potentially contradictory facts via entity overlap + content divergence.
 								        Two facts contradict when they share entities (same subject) but have
 								        low content-vector similarity (different claims). This is automated
 								        memory hygiene — no other memory system does this.
 								        Returns pairs of facts with a contradiction score.
 								        Falls back to empty list if numpy unavailable.
 								        """
 								        if not hrr._HAS_NUMPY:
 								            return []
 								        conn = self.store._conn
 								        # Get all facts with vectors and their linked entities
 								        where = "WHERE f.hrr_vector IS NOT NULL"
 								        params: list = []
 								        if category:
 								            where += " AND f.category = ?"
 								            params.append(category)
 								        rows = conn.execute(
 								            f"""
 								            SELECT f.fact_id, f.content, f.category, f.tags, f.trust_score,
 								                   f.created_at, f.updated_at, f.hrr_vector
 								            FROM facts f
 								            {where}
 								            """,
 								            params,
 								        ).fetchall()
 								        if len(rows) < 2:
 								            return []
 								        # Guard against O(n²) explosion on large fact stores.
 								        # At 500 facts, that's ~125K comparisons — acceptable.
 								        # Above that, only check the most recently updated facts.
 								        _MAX_CONTRADICT_FACTS = 500
 								        if len(rows) > _MAX_CONTRADICT_FACTS:
 								            rows = sorted(rows, key=lambda r: r["updated_at"] or r["created_at"], reverse=True)
 								            rows = rows[:_MAX_CONTRADICT_FACTS]
 								        # Build entity sets per fact
 								        fact_entities: dict[int, set[str]] = {}
 								        for row in rows:
 								            fid = row["fact_id"]
 								            entity_rows = conn.execute(
 								                """
 								                SELECT e.name FROM entities e
 								                JOIN fact_entities fe ON fe.entity_id = e.entity_id
 								                WHERE fe.fact_id = ?
 								                """,
 								                (fid,),
 								            ).fetchall()
 								            fact_entities[fid] = {r["name"].lower() for r in entity_rows}
 								        # Compare all pairs: high entity overlap + low content similarity = contradiction
 								        facts = [dict(r) for r in rows]
 								        contradictions = []
 								        for i in range(len(facts)):
 								            for j in range(i + 1, len(facts)):
 								                f1, f2 = facts[i], facts[j]
 								                ents1 = fact_entities.get(f1["fact_id"], set())
 								                ents2 = fact_entities.get(f2["fact_id"], set())
 								                if not ents1 or not ents2:
 								                    continue
 								                # Entity overlap (Jaccard)
 								                entity_overlap = len(ents1 & ents2) / len(ents1 | ents2) if (ents1 | ents2) else 0.0
 								                if entity_overlap < 0.3:
 								                    continue  # Not enough entity overlap to be contradictory
 								                # Content similarity via HRR vectors
 								                v1 = hrr.bytes_to_phases(f1["hrr_vector"])
 								                v2 = hrr.bytes_to_phases(f2["hrr_vector"])
 								                content_sim = hrr.similarity(v1, v2)
 								                # High entity overlap + low content similarity = potential contradiction
 								                # contradiction_score: higher = more contradictory
 								                contradiction_score = entity_overlap * (1.0 - (content_sim + 1.0) / 2.0)
 								                if contradiction_score >= threshold:
 								                    # Strip hrr_vector from output (not JSON serializable)
 								                    f1_clean = {k: v for k, v in f1.items() if k != "hrr_vector"}
 								                    f2_clean = {k: v for k, v in f2.items() if k != "hrr_vector"}
 								                    contradictions.append({
 								                        "fact_a": f1_clean,
 								                        "fact_b": f2_clean,
 								                        "entity_overlap": round(entity_overlap, 3),
 								                        "content_similarity": round(content_sim, 3),
 								                        "contradiction_score": round(contradiction_score, 3),
 								                        "shared_entities": sorted(ents1 & ents2),
 								                    })
 								        contradictions.sort(key=lambda x: x["contradiction_score"], reverse=True)
 								        return contradictions[:limit]
 								    def _score_facts_by_vector(
 								        self,
 								        target_vec: "np.ndarray",
 								        category: str | None = None,
 								        limit: int = 10,
 								    ) -> list[dict]:
 								        """Score facts by similarity to a target vector."""
 								        conn = self.store._conn
 								        where = "WHERE hrr_vector IS NOT NULL"
 								        params: list = []
 								        if category:
 								            where += " AND category = ?"
 								            params.append(category)
 								        rows = conn.execute(
 								            f"""
 								            SELECT fact_id, content, category, tags, trust_score,
 								                   retrieval_count, helpful_count, created_at, updated_at,
 								                   hrr_vector
 								            FROM facts
 								            {where}
 								            """,
 								            params,
 								        ).fetchall()
 								        scored = []
 								        for row in rows:
 								            fact = dict(row)
 								            fact_vec = hrr.bytes_to_phases(fact.pop("hrr_vector"))
 								            sim = hrr.similarity(target_vec, fact_vec)
 								            fact["score"] = (sim + 1.0) / 2.0 * fact["trust_score"]
 								            scored.append(fact)
 								        scored.sort(key=lambda x: x["score"], reverse=True)
 								        return scored[:limit]
 								    def _fts_candidates(
 								        self,
 								        query: str,
 								        category: str | None,
 								        min_trust: float,
 								        limit: int,
 								    ) -> list[dict]:
 								        """Get raw FTS5 candidates from the store.
 								        Uses the store's database connection directly for FTS5 MATCH
 								        with rank scoring. Normalizes FTS5 rank to [0, 1] range.
 								        """
 								        conn = self.store._conn
 								        # Build query - FTS5 rank is negative (lower = better match)
 								        # We need to join facts_fts with facts to get all columns
 								        params: list = []
 								        where_clauses = ["facts_fts MATCH ?"]
 								        params.append(query)
 								        if category:
 								            where_clauses.append("f.category = ?")
 								            params.append(category)
 								        where_clauses.append("f.trust_score >= ?")
 								        params.append(min_trust)
 								        where_sql = " AND ".join(where_clauses)
 								        sql = f"""
 								            SELECT f.*, facts_fts.rank as fts_rank_raw
 								            FROM facts_fts
 								            JOIN facts f ON f.fact_id = facts_fts.rowid
 								            WHERE {where_sql}
 								            ORDER BY facts_fts.rank
 								            LIMIT ?
 								        """
 								        params.append(limit)
 								        try:
 								            rows = conn.execute(sql, params).fetchall()
 								        except Exception:
 								            # FTS5 MATCH can fail on malformed queries — fall back to empty
 								            return []
 								        if not rows:
 								            return []
 								        # Normalize FTS5 rank: rank is negative, lower = better
 								        # Convert to positive score in [0, 1] range
 								        raw_ranks = [abs(row["fts_rank_raw"]) for row in rows]
 								        max_rank = max(raw_ranks) if raw_ranks else 1.0
 								        max_rank = max(max_rank, 1e-6)  # avoid div by zero
 								        results = []
 								        for row, raw_rank in zip(rows, raw_ranks):
 								            fact = dict(row)
 								            fact.pop("fts_rank_raw", None)
 								            fact["fts_rank"] = raw_rank / max_rank  # normalize to [0, 1]
 								            results.append(fact)
 								        return results
 								    @staticmethod
 								    def _tokenize(text: str) -> set[str]:
 								        """Simple whitespace tokenization with lowercasing.
 								        Strips common punctuation. No stemming/lemmatization (Phase 1).
 								        """
 								        if not text:
 								            return set()
 								        # Split on whitespace, lowercase, strip punctuation
 								        tokens = set()
 								        for word in text.lower().split():
 								            cleaned = word.strip(".,;:!?\"'()[]{}#@<>")
 								            if cleaned:
 								                tokens.add(cleaned)
 								        return tokens
 								    @staticmethod
 								    def _jaccard_similarity(set_a: set, set_b: set) -> float:
 								        """Jaccard similarity coefficient: |A ∩ B| / |A ∪ B|."""
 								        if not set_a or not set_b:
 								            return 0.0
 								        intersection = len(set_a & set_b)
 								        union = len(set_a | set_b)
 								        return intersection / union if union > 0 else 0.0
 								    def _temporal_decay(self, timestamp_str: str | None) -> float:
 								        """Exponential decay: 0.5^(age_days / half_life_days).
 								        Returns 1.0 if decay is disabled or timestamp is missing.
 								        """
 								        if not self.half_life or not timestamp_str:
 								            return 1.0
 								        try:
 								            if isinstance(timestamp_str, str):
 								                # Parse ISO format timestamp from SQLite
 								                ts = datetime.fromisoformat(timestamp_str.replace("Z", "+00:00"))
 								            else:
 								                ts = timestamp_str
 								            if ts.tzinfo is None:
 								                ts = ts.replace(tzinfo=timezone.utc)
 								            age_days = (datetime.now(timezone.utc) - ts).total_seconds() / 86400
 								            if age_days < 0:
 								                return 1.0
 								            return math.pow(0.5, age_days / self.half_life)
 								        except (ValueError, TypeError):
 								            return 1.0