""" SQLite-backed fact store with entity resolution and trust scoring. Single-user Hermes memory store plugin. """ import re import sqlite3 import threading from pathlib import Path try: from . import holographic as hrr except ImportError: import holographic as hrr # type: ignore[no-redef] _SCHEMA = """ CREATE TABLE IF NOT EXISTS facts ( fact_id INTEGER PRIMARY KEY AUTOINCREMENT, content TEXT NOT NULL UNIQUE, category TEXT DEFAULT 'general', tags TEXT DEFAULT '', trust_score REAL DEFAULT 0.5, retrieval_count INTEGER DEFAULT 0, helpful_count INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, hrr_vector BLOB ); CREATE TABLE IF NOT EXISTS entities ( entity_id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, entity_type TEXT DEFAULT 'unknown', aliases TEXT DEFAULT '', created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); CREATE TABLE IF NOT EXISTS fact_entities ( fact_id INTEGER REFERENCES facts(fact_id), entity_id INTEGER REFERENCES entities(entity_id), PRIMARY KEY (fact_id, entity_id) ); CREATE INDEX IF NOT EXISTS idx_facts_trust ON facts(trust_score DESC); CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category); CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name); CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts USING fts5(content, tags, content=facts, content_rowid=fact_id); CREATE TRIGGER IF NOT EXISTS facts_ai AFTER INSERT ON facts BEGIN INSERT INTO facts_fts(rowid, content, tags) VALUES (new.fact_id, new.content, new.tags); END; CREATE TRIGGER IF NOT EXISTS facts_ad AFTER DELETE ON facts BEGIN INSERT INTO facts_fts(facts_fts, rowid, content, tags) VALUES ('delete', old.fact_id, old.content, old.tags); END; CREATE TRIGGER IF NOT EXISTS facts_au AFTER UPDATE ON facts BEGIN INSERT INTO facts_fts(facts_fts, rowid, content, tags) VALUES ('delete', old.fact_id, old.content, old.tags); INSERT INTO facts_fts(rowid, content, tags) VALUES (new.fact_id, new.content, new.tags); END; CREATE TABLE IF NOT EXISTS memory_banks ( bank_id INTEGER PRIMARY KEY AUTOINCREMENT, bank_name TEXT NOT NULL UNIQUE, vector BLOB NOT NULL, dim INTEGER NOT NULL, fact_count INTEGER DEFAULT 0, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ); """ # Trust adjustment constants _HELPFUL_DELTA = 0.05 _UNHELPFUL_DELTA = -0.10 _TRUST_MIN = 0.0 _TRUST_MAX = 1.0 # Entity extraction patterns _RE_CAPITALIZED = re.compile(r'\b([A-Z][a-z]+(?:\s+[A-Z][a-z]+)+)\b') _RE_DOUBLE_QUOTE = re.compile(r'"([^"]+)"') _RE_SINGLE_QUOTE = re.compile(r"'([^']+)'") _RE_AKA = re.compile( r'(\w+(?:\s+\w+)*)\s+(?:aka|also known as)\s+(\w+(?:\s+\w+)*)', re.IGNORECASE, ) def _clamp_trust(value: float) -> float: return max(_TRUST_MIN, min(_TRUST_MAX, value)) class MemoryStore: """SQLite-backed fact store with entity resolution and trust scoring.""" def __init__( self, db_path: "str | Path | None" = None, default_trust: float = 0.5, hrr_dim: int = 1024, ) -> None: if db_path is None: from hermes_constants import get_hermes_home db_path = str(get_hermes_home() / "memory_store.db") self.db_path = Path(db_path).expanduser() self.db_path.parent.mkdir(parents=True, exist_ok=True) self.default_trust = _clamp_trust(default_trust) self.hrr_dim = hrr_dim self._hrr_available = hrr._HAS_NUMPY self._conn: sqlite3.Connection = sqlite3.connect( str(self.db_path), check_same_thread=False, timeout=10.0, ) self._lock = threading.RLock() self._conn.row_factory = sqlite3.Row self._init_db() # ------------------------------------------------------------------ # Initialisation # ------------------------------------------------------------------ def _init_db(self) -> None: """Create tables, indexes, and triggers if they do not exist. Enable WAL mode.""" self._conn.execute("PRAGMA journal_mode=WAL") self._conn.executescript(_SCHEMA) # Migrate: add hrr_vector column if missing (safe for existing databases) columns = {row[1] for row in self._conn.execute("PRAGMA table_info(facts)").fetchall()} if "hrr_vector" not in columns: self._conn.execute("ALTER TABLE facts ADD COLUMN hrr_vector BLOB") self._conn.commit() # ------------------------------------------------------------------ # Public API # ------------------------------------------------------------------ def add_fact( self, content: str, category: str = "general", tags: str = "", ) -> int: """Insert a fact and return its fact_id. Deduplicates by content (UNIQUE constraint). On duplicate, returns the existing fact_id without modifying the row. Extracts entities from the content and links them to the fact. """ with self._lock: content = content.strip() if not content: raise ValueError("content must not be empty") try: cur = self._conn.execute( """ INSERT INTO facts (content, category, tags, trust_score) VALUES (?, ?, ?, ?) """, (content, category, tags, self.default_trust), ) self._conn.commit() fact_id: int = cur.lastrowid # type: ignore[assignment] except sqlite3.IntegrityError: # Duplicate content — return existing id row = self._conn.execute( "SELECT fact_id FROM facts WHERE content = ?", (content,) ).fetchone() return int(row["fact_id"]) # Entity extraction and linking for name in self._extract_entities(content): entity_id = self._resolve_entity(name) self._link_fact_entity(fact_id, entity_id) # Compute HRR vector after entity linking self._compute_hrr_vector(fact_id, content) self._rebuild_bank(category) return fact_id def search_facts( self, query: str, category: str | None = None, min_trust: float = 0.3, limit: int = 10, ) -> list[dict]: """Full-text search over facts using FTS5. Returns a list of fact dicts ordered by FTS5 rank, then trust_score descending. Also increments retrieval_count for matched facts. """ with self._lock: query = query.strip() if not query: return [] params: list = [query, min_trust] category_clause = "" if category is not None: category_clause = "AND f.category = ?" params.append(category) params.append(limit) sql = f""" SELECT f.fact_id, f.content, f.category, f.tags, f.trust_score, f.retrieval_count, f.helpful_count, f.created_at, f.updated_at FROM facts f JOIN facts_fts fts ON fts.rowid = f.fact_id WHERE facts_fts MATCH ? AND f.trust_score >= ? {category_clause} ORDER BY fts.rank, f.trust_score DESC LIMIT ? """ rows = self._conn.execute(sql, params).fetchall() results = [self._row_to_dict(r) for r in rows] if results: ids = [r["fact_id"] for r in results] placeholders = ",".join("?" * len(ids)) self._conn.execute( f"UPDATE facts SET retrieval_count = retrieval_count + 1 WHERE fact_id IN ({placeholders})", ids, ) self._conn.commit() return results def update_fact( self, fact_id: int, content: str | None = None, trust_delta: float | None = None, tags: str | None = None, category: str | None = None, ) -> bool: """Partially update a fact. Trust is clamped to [0, 1]. Returns True if the row existed, False otherwise. """ with self._lock: row = self._conn.execute( "SELECT fact_id, trust_score FROM facts WHERE fact_id = ?", (fact_id,) ).fetchone() if row is None: return False assignments: list[str] = ["updated_at = CURRENT_TIMESTAMP"] params: list = [] if content is not None: assignments.append("content = ?") params.append(content.strip()) if tags is not None: assignments.append("tags = ?") params.append(tags) if category is not None: assignments.append("category = ?") params.append(category) if trust_delta is not None: new_trust = _clamp_trust(row["trust_score"] + trust_delta) assignments.append("trust_score = ?") params.append(new_trust) params.append(fact_id) self._conn.execute( f"UPDATE facts SET {', '.join(assignments)} WHERE fact_id = ?", params, ) self._conn.commit() # If content changed, re-extract entities if content is not None: self._conn.execute( "DELETE FROM fact_entities WHERE fact_id = ?", (fact_id,) ) for name in self._extract_entities(content): entity_id = self._resolve_entity(name) self._link_fact_entity(fact_id, entity_id) self._conn.commit() # Recompute HRR vector if content changed if content is not None: self._compute_hrr_vector(fact_id, content) # Rebuild bank for relevant category cat = category or self._conn.execute( "SELECT category FROM facts WHERE fact_id = ?", (fact_id,) ).fetchone()["category"] self._rebuild_bank(cat) return True def remove_fact(self, fact_id: int) -> bool: """Delete a fact and its entity links. Returns True if the row existed.""" with self._lock: row = self._conn.execute( "SELECT fact_id, category FROM facts WHERE fact_id = ?", (fact_id,) ).fetchone() if row is None: return False self._conn.execute( "DELETE FROM fact_entities WHERE fact_id = ?", (fact_id,) ) self._conn.execute("DELETE FROM facts WHERE fact_id = ?", (fact_id,)) self._conn.commit() self._rebuild_bank(row["category"]) return True def list_facts( self, category: str | None = None, min_trust: float = 0.0, limit: int = 50, ) -> list[dict]: """Browse facts ordered by trust_score descending. Optionally filter by category and minimum trust score. """ with self._lock: params: list = [min_trust] category_clause = "" if category is not None: category_clause = "AND category = ?" params.append(category) params.append(limit) sql = f""" SELECT fact_id, content, category, tags, trust_score, retrieval_count, helpful_count, created_at, updated_at FROM facts WHERE trust_score >= ? {category_clause} ORDER BY trust_score DESC LIMIT ? """ rows = self._conn.execute(sql, params).fetchall() return [self._row_to_dict(r) for r in rows] def record_feedback(self, fact_id: int, helpful: bool) -> dict: """Record user feedback and adjust trust asymmetrically. helpful=True -> trust += 0.05, helpful_count += 1 helpful=False -> trust -= 0.10 Returns a dict with fact_id, old_trust, new_trust, helpful_count. Raises KeyError if fact_id does not exist. """ with self._lock: row = self._conn.execute( "SELECT fact_id, trust_score, helpful_count FROM facts WHERE fact_id = ?", (fact_id,), ).fetchone() if row is None: raise KeyError(f"fact_id {fact_id} not found") old_trust: float = row["trust_score"] delta = _HELPFUL_DELTA if helpful else _UNHELPFUL_DELTA new_trust = _clamp_trust(old_trust + delta) helpful_increment = 1 if helpful else 0 self._conn.execute( """ UPDATE facts SET trust_score = ?, helpful_count = helpful_count + ?, updated_at = CURRENT_TIMESTAMP WHERE fact_id = ? """, (new_trust, helpful_increment, fact_id), ) self._conn.commit() return { "fact_id": fact_id, "old_trust": old_trust, "new_trust": new_trust, "helpful_count": row["helpful_count"] + helpful_increment, } # ------------------------------------------------------------------ # Entity helpers # ------------------------------------------------------------------ def _extract_entities(self, text: str) -> list[str]: """Extract entity candidates from text using simple regex rules. Rules applied (in order): 1. Capitalized multi-word phrases e.g. "John Doe" 2. Double-quoted terms e.g. "Python" 3. Single-quoted terms e.g. 'pytest' 4. AKA patterns e.g. "Guido aka BDFL" -> two entities Returns a deduplicated list preserving first-seen order. """ seen: set[str] = set() candidates: list[str] = [] def _add(name: str) -> None: stripped = name.strip() if stripped and stripped.lower() not in seen: seen.add(stripped.lower()) candidates.append(stripped) for m in _RE_CAPITALIZED.finditer(text): _add(m.group(1)) for m in _RE_DOUBLE_QUOTE.finditer(text): _add(m.group(1)) for m in _RE_SINGLE_QUOTE.finditer(text): _add(m.group(1)) for m in _RE_AKA.finditer(text): _add(m.group(1)) _add(m.group(2)) return candidates def _resolve_entity(self, name: str) -> int: """Find an existing entity by name or alias (case-insensitive) or create one. Returns the entity_id. """ # Exact name match row = self._conn.execute( "SELECT entity_id FROM entities WHERE name LIKE ?", (name,) ).fetchone() if row is not None: return int(row["entity_id"]) # Search aliases — aliases stored as comma-separated; use LIKE with % boundaries alias_row = self._conn.execute( """ SELECT entity_id FROM entities WHERE ',' || aliases || ',' LIKE '%,' || ? || ',%' """, (name,), ).fetchone() if alias_row is not None: return int(alias_row["entity_id"]) # Create new entity cur = self._conn.execute( "INSERT INTO entities (name) VALUES (?)", (name,) ) self._conn.commit() return int(cur.lastrowid) # type: ignore[return-value] def _link_fact_entity(self, fact_id: int, entity_id: int) -> None: """Insert into fact_entities, silently ignore if the link already exists.""" self._conn.execute( """ INSERT OR IGNORE INTO fact_entities (fact_id, entity_id) VALUES (?, ?) """, (fact_id, entity_id), ) self._conn.commit() def _compute_hrr_vector(self, fact_id: int, content: str) -> None: """Compute and store HRR vector for a fact. No-op if numpy unavailable.""" with self._lock: if not self._hrr_available: return # Get entities linked to this fact rows = self._conn.execute( """ SELECT e.name FROM entities e JOIN fact_entities fe ON fe.entity_id = e.entity_id WHERE fe.fact_id = ? """, (fact_id,), ).fetchall() entities = [row["name"] for row in rows] vector = hrr.encode_fact(content, entities, self.hrr_dim) self._conn.execute( "UPDATE facts SET hrr_vector = ? WHERE fact_id = ?", (hrr.phases_to_bytes(vector), fact_id), ) self._conn.commit() def _rebuild_bank(self, category: str) -> None: """Full rebuild of a category's memory bank from all its fact vectors.""" with self._lock: if not self._hrr_available: return bank_name = f"cat:{category}" rows = self._conn.execute( "SELECT hrr_vector FROM facts WHERE category = ? AND hrr_vector IS NOT NULL", (category,), ).fetchall() if not rows: self._conn.execute("DELETE FROM memory_banks WHERE bank_name = ?", (bank_name,)) self._conn.commit() return vectors = [hrr.bytes_to_phases(row["hrr_vector"]) for row in rows] bank_vector = hrr.bundle(*vectors) fact_count = len(vectors) # Check SNR hrr.snr_estimate(self.hrr_dim, fact_count) self._conn.execute( """ INSERT INTO memory_banks (bank_name, vector, dim, fact_count, updated_at) VALUES (?, ?, ?, ?, CURRENT_TIMESTAMP) ON CONFLICT(bank_name) DO UPDATE SET vector = excluded.vector, dim = excluded.dim, fact_count = excluded.fact_count, updated_at = excluded.updated_at """, (bank_name, hrr.phases_to_bytes(bank_vector), self.hrr_dim, fact_count), ) self._conn.commit() def rebuild_all_vectors(self, dim: int | None = None) -> int: """Recompute all HRR vectors + banks from text. For recovery/migration. Returns the number of facts processed. """ with self._lock: if not self._hrr_available: return 0 if dim is not None: self.hrr_dim = dim rows = self._conn.execute( "SELECT fact_id, content, category FROM facts" ).fetchall() categories: set[str] = set() for row in rows: self._compute_hrr_vector(row["fact_id"], row["content"]) categories.add(row["category"]) for category in categories: self._rebuild_bank(category) return len(rows) # ------------------------------------------------------------------ # Utilities # ------------------------------------------------------------------ def _row_to_dict(self, row: sqlite3.Row) -> dict: """Convert a sqlite3.Row to a plain dict.""" return dict(row) def close(self) -> None: """Close the database connection.""" self._conn.close() def __enter__(self) -> "MemoryStore": return self def __exit__(self, *_: object) -> None: self.close()