[loop-cycle-54] refactor: consolidate three memory stores into single table (#37) (#223)

2026-03-15 13:33:24 -04:00
parent 4a68f6cb8b
commit b4cb3e9975
14 changed files with 1425 additions and 1084 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -1,12 +1,12 @@
 import logging as _logging
 import os
 import sys
+from datetime import UTC
+from datetime import datetime as _datetime
 from typing import Literal

 from pydantic_settings import BaseSettings, SettingsConfigDict

-from datetime import UTC, datetime as _datetime
-
 APP_START_TIME: _datetime = _datetime.now(UTC)


--- a/src/dashboard/app.py
+++ b/src/dashboard/app.py
@@ -305,7 +305,7 @@ async def lifespan(app: FastAPI):
    # Auto-prune old vector store memories on startup
    if settings.memory_prune_days > 0:
        try:
-            from timmy.memory.vector_store import prune_memories
+            from timmy.memory_system import prune_memories

            pruned = prune_memories(
                older_than_days=settings.memory_prune_days,
--- a/src/dashboard/routes/memory.py
+++ b/src/dashboard/routes/memory.py
@@ -4,7 +4,7 @@ from fastapi import APIRouter, Form, HTTPException, Request
 from fastapi.responses import HTMLResponse, JSONResponse

 from dashboard.templating import templates
-from timmy.memory.vector_store import (
+from timmy.memory_system import (
    delete_memory,
    get_memory_stats,
    recall_personal_facts_with_ids,
--- a/src/dashboard/routes/system.py
+++ b/src/dashboard/routes/system.py
@@ -180,7 +180,7 @@ async def api_briefing_status():
@router.get("/api/memory/status", response_class=JSONResponse)
 async def api_memory_status():
    """Return memory database status including file info and indexed files count."""
-    from timmy.memory.vector_store import get_memory_stats
+    from timmy.memory_system import get_memory_stats

    db_path = Path(settings.repo_root) / "data" / "memory.db"
    db_exists = db_path.exists()
--- a/src/timmy/memory/unified.py
+++ b/src/timmy/memory/unified.py
@@ -1,28 +1,29 @@
-"""Unified memory database — single SQLite DB for all memory types.
+"""Unified memory schema and connection management.

-Consolidates three previously separate stores into one:
- **facts**: Long-term knowledge (user preferences, learned patterns)
- **chunks**: Indexed vault documents (markdown files from memory/)
- **episodes**: Runtime memories (conversations, agent observations)
-
-All three tables live in ``data/memory.db``.  Existing APIs in
-``vector_store.py`` and ``semantic_memory.py`` are updated to point here.
+This module provides the central database schema for Timmy's consolidated
+memory system. All memory types (facts, conversations, documents, vault chunks)
+are stored in a single `memories` table with a `memory_type` discriminator.
 """

 import logging
 import sqlite3
+import uuid
 from collections.abc import Generator
 from contextlib import closing, contextmanager
+from dataclasses import dataclass, field
+from datetime import UTC, datetime
 from pathlib import Path

 logger = logging.getLogger(__name__)

-DB_PATH = Path(__file__).parent.parent.parent.parent / "data" / "memory.db"
+# Paths
+PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
+DB_PATH = PROJECT_ROOT / "data" / "memory.db"


@contextmanager
 def get_connection() -> Generator[sqlite3.Connection, None, None]:
-    """Open (and lazily create) the unified memory database."""
+    """Get database connection to unified memory database."""
    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
    with closing(sqlite3.connect(str(DB_PATH))) as conn:
        conn.row_factory = sqlite3.Row
@@ -33,56 +34,166 @@ def get_connection() -> Generator[sqlite3.Connection, None, None]:


 def _ensure_schema(conn: sqlite3.Connection) -> None:
-    """Create the three core tables and indexes if they don't exist."""
-
-    # --- facts ---------------------------------------------------------------
+    """Create the unified memories table and indexes if they don't exist."""
    conn.execute("""
-        CREATE TABLE IF NOT EXISTS facts (
+        CREATE TABLE IF NOT EXISTS memories (
            id TEXT PRIMARY KEY,
-            category TEXT NOT NULL DEFAULT 'general',
            content TEXT NOT NULL,
-            confidence REAL NOT NULL DEFAULT 0.8,
+            memory_type TEXT NOT NULL DEFAULT 'fact',
            source TEXT NOT NULL DEFAULT 'agent',
+            embedding TEXT,
+            metadata TEXT,
+            source_hash TEXT,
+            agent_id TEXT,
+            task_id TEXT,
+            session_id TEXT,
+            confidence REAL NOT NULL DEFAULT 0.8,
            tags TEXT NOT NULL DEFAULT '[]',
            created_at TEXT NOT NULL,
            last_accessed TEXT,
            access_count INTEGER NOT NULL DEFAULT 0
        )
    """)
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category)")
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_facts_confidence ON facts(confidence)")
+    
+    # Create indexes for efficient querying
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_id)")
+    conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
+    conn.commit()

-    # --- chunks (vault document fragments) -----------------------------------
-    conn.execute("""
-        CREATE TABLE IF NOT EXISTS chunks (
-            id TEXT PRIMARY KEY,
-            source TEXT NOT NULL,
-            content TEXT NOT NULL,
-            embedding TEXT NOT NULL,
-            created_at TEXT NOT NULL,
-            source_hash TEXT NOT NULL
-        )
-    """)
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
+    # Run migration if needed
+    _migrate_schema(conn)

-    # --- episodes (runtime memory entries) -----------------------------------
-    conn.execute("""
-        CREATE TABLE IF NOT EXISTS episodes (
-            id TEXT PRIMARY KEY,
-            content TEXT NOT NULL,
-            source TEXT NOT NULL,
-            context_type TEXT NOT NULL DEFAULT 'conversation',
-            embedding TEXT,
-            metadata TEXT,
-            agent_id TEXT,
-            task_id TEXT,
-            session_id TEXT,
-            timestamp TEXT NOT NULL
-        )
-    """)
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_episodes_type ON episodes(context_type)")
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_episodes_time ON episodes(timestamp)")
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_episodes_session ON episodes(session_id)")
-    conn.execute("CREATE INDEX IF NOT EXISTS idx_episodes_agent ON episodes(agent_id)")
+
+def _migrate_schema(conn: sqlite3.Connection) -> None:
+    """Migrate from old three-table schema to unified memories table.
+    
+    Migration paths:
+    - episodes table -> memories (context_type -> memory_type)
+    - chunks table -> memories with memory_type='vault_chunk'
+    - facts table -> dropped (unused, 0 rows expected)
+    """
+    cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
+    tables = {row[0] for row in cursor.fetchall()}
+
+    has_memories = "memories" in tables
+    has_episodes = "episodes" in tables
+    has_chunks = "chunks" in tables
+    has_facts = "facts" in tables
+
+    # Check if we need to migrate (old schema exists but new one doesn't fully)
+    if not has_memories:
+        logger.info("Migration: Creating unified memories table")
+        # Schema will be created above
+
+    # Migrate episodes -> memories
+    if has_episodes and has_memories:
+        logger.info("Migration: Converting episodes table to memories")
+        try:
+            cols = _get_table_columns(conn, "episodes")
+            context_type_col = "context_type" if "context_type" in cols else "'conversation'"
+            
+            conn.execute(f"""
+                INSERT INTO memories (
+                    id, content, memory_type, source, embedding,
+                    metadata, agent_id, task_id, session_id,
+                    created_at, access_count, last_accessed
+                )
+                SELECT 
+                    id, content, 
+                    COALESCE({context_type_col}, 'conversation'),
+                    COALESCE(source, 'agent'),
+                    embedding,
+                    metadata, agent_id, task_id, session_id,
+                    COALESCE(timestamp, datetime('now')), 0, NULL
+                FROM episodes
+            """)
+            conn.execute("DROP TABLE episodes")
+            logger.info("Migration: Migrated episodes to memories")
+        except sqlite3.Error as exc:
+            logger.warning("Migration: Failed to migrate episodes: %s", exc)
+
+    # Migrate chunks -> memories as vault_chunk
+    if has_chunks and has_memories:
+        logger.info("Migration: Converting chunks table to memories")
+        try:
+            cols = _get_table_columns(conn, "chunks")
+            
+            id_col = "id" if "id" in cols else "CAST(rowid AS TEXT)"
+            content_col = "content" if "content" in cols else "text"
+            source_col = "filepath" if "filepath" in cols else ("source" if "source" in cols else "'vault'")
+            embedding_col = "embedding" if "embedding" in cols else "NULL"
+            created_col = "created_at" if "created_at" in cols else "datetime('now')"
+            
+            conn.execute(f"""
+                INSERT INTO memories (
+                    id, content, memory_type, source, embedding,
+                    created_at, access_count
+                )
+                SELECT 
+                    {id_col}, {content_col}, 'vault_chunk', {source_col},
+                    {embedding_col}, {created_col}, 0
+                FROM chunks
+            """)
+            conn.execute("DROP TABLE chunks")
+            logger.info("Migration: Migrated chunks to memories")
+        except sqlite3.Error as exc:
+            logger.warning("Migration: Failed to migrate chunks: %s", exc)
+
+    # Drop old facts table
+    if has_facts:
+        try:
+            conn.execute("DROP TABLE facts")
+            logger.info("Migration: Dropped old facts table")
+        except sqlite3.Error as exc:
+            logger.warning("Migration: Failed to drop facts: %s", exc)

    conn.commit()
+
+
+def _get_table_columns(conn: sqlite3.Connection, table_name: str) -> set[str]:
+    """Get the column names for a table."""
+    cursor = conn.execute(f"PRAGMA table_info({table_name})")
+    return {row[1] for row in cursor.fetchall()}
+
+
+# Backward compatibility aliases
+get_conn = get_connection
+
+
+@dataclass
+class MemoryEntry:
+    """A memory entry with vector embedding.
+    
+    Note: The DB column is `memory_type` but this field is named `context_type`
+    for backward API compatibility.
+    """
+
+    id: str = field(default_factory=lambda: str(uuid.uuid4()))
+    content: str = ""  # The actual text content
+    source: str = ""  # Where it came from (agent, user, system)
+    context_type: str = "conversation"  # API field name; DB column is memory_type
+    agent_id: str | None = None
+    task_id: str | None = None
+    session_id: str | None = None
+    metadata: dict | None = None
+    embedding: list[float] | None = None
+    timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
+    relevance_score: float | None = None  # Set during search
+
+
+@dataclass
+class MemoryChunk:
+    """A searchable chunk of memory."""
+
+    id: str
+    source: str  # filepath
+    content: str
+    embedding: list[float]
+    created_at: str
+
+
+# Note: Functions are available via memory_system module directly
+# from timmy.memory_system import store_memory, search_memories, etc.
--- a/src/timmy/memory/vector_store.py
+++ b/src/timmy/memory/vector_store.py
@@ -1,425 +1,37 @@
-"""Vector store for semantic memory using sqlite-vss.
-
-Provides embedding-based similarity search for the Echo agent
-to retrieve relevant context from conversation history.
-"""
-
-import json
-import logging
-import sqlite3
-import uuid
-from collections.abc import Generator
-from contextlib import contextmanager
-from dataclasses import dataclass, field
-from datetime import UTC, datetime
-
-logger = logging.getLogger(__name__)
-
-
-def _check_embedding_model() -> bool | None:
-    """Check if the canonical embedding model is available."""
-    try:
-        from timmy.semantic_memory import _get_embedding_model
-
-        model = _get_embedding_model()
-        return model is not None and model is not False
-    except (ImportError, AttributeError) as exc:
-        logger.debug("Embedding model check failed: %s", exc)
-        return None
-
-
-def _compute_embedding(text: str) -> list[float]:
-    """Compute embedding vector for text.
-
-    Delegates to the canonical embedding provider in semantic_memory
-    to avoid loading the model multiple times.
-    """
-    from timmy.semantic_memory import embed_text
-
-    return embed_text(text)
-
-
-@dataclass
-class MemoryEntry:
-    """A memory entry with vector embedding."""
-
-    id: str = field(default_factory=lambda: str(uuid.uuid4()))
-    content: str = ""  # The actual text content
-    source: str = ""  # Where it came from (agent, user, system)
-    context_type: str = "conversation"  # conversation, document, fact, etc.
-    agent_id: str | None = None
-    task_id: str | None = None
-    session_id: str | None = None
-    metadata: dict | None = None
-    embedding: list[float] | None = None
-    timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
-    relevance_score: float | None = None  # Set during search
-
-
-@contextmanager
-def _get_conn() -> Generator[sqlite3.Connection, None, None]:
-    """Get database connection to unified memory.db."""
-    from timmy.memory.unified import get_connection
-
-    with get_connection() as conn:
-        yield conn
-
-
-def store_memory(
-    content: str,
-    source: str,
-    context_type: str = "conversation",
-    agent_id: str | None = None,
-    task_id: str | None = None,
-    session_id: str | None = None,
-    metadata: dict | None = None,
-    compute_embedding: bool = True,
-) -> MemoryEntry:
-    """Store a memory entry with optional embedding.
-
-    Args:
-        content: The text content to store
-        source: Source of the memory (agent name, user, system)
-        context_type: Type of context (conversation, document, fact)
-        agent_id: Associated agent ID
-        task_id: Associated task ID
-        session_id: Session identifier
-        metadata: Additional structured data
-        compute_embedding: Whether to compute vector embedding
-
-    Returns:
-        The stored MemoryEntry
-    """
-    embedding = None
-    if compute_embedding:
-        embedding = _compute_embedding(content)
-
-    entry = MemoryEntry(
-        content=content,
-        source=source,
-        context_type=context_type,
-        agent_id=agent_id,
-        task_id=task_id,
-        session_id=session_id,
-        metadata=metadata,
-        embedding=embedding,
-    )
-
-    with _get_conn() as conn:
-        conn.execute(
-            """
-            INSERT INTO episodes
-            (id, content, source, context_type, agent_id, task_id, session_id,
-             metadata, embedding, timestamp)
-            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
-            """,
-            (
-                entry.id,
-                entry.content,
-                entry.source,
-                entry.context_type,
-                entry.agent_id,
-                entry.task_id,
-                entry.session_id,
-                json.dumps(metadata) if metadata else None,
-                json.dumps(embedding) if embedding else None,
-                entry.timestamp,
-            ),
-        )
-        conn.commit()
-
-    return entry
-
-
-def search_memories(
-    query: str,
-    limit: int = 10,
-    context_type: str | None = None,
-    agent_id: str | None = None,
-    session_id: str | None = None,
-    min_relevance: float = 0.0,
-) -> list[MemoryEntry]:
-    """Search for memories by semantic similarity.
-
-    Args:
-        query: Search query text
-        limit: Maximum results
-        context_type: Filter by context type
-        agent_id: Filter by agent
-        session_id: Filter by session
-        min_relevance: Minimum similarity score (0-1)
-
-    Returns:
-        List of MemoryEntry objects sorted by relevance
-    """
-    query_embedding = _compute_embedding(query)
-
-    # Build query with filters
-    conditions = []
-    params = []
-
-    if context_type:
-        conditions.append("context_type = ?")
-        params.append(context_type)
-    if agent_id:
-        conditions.append("agent_id = ?")
-        params.append(agent_id)
-    if session_id:
-        conditions.append("session_id = ?")
-        params.append(session_id)
-
-    where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
-
-    # Fetch candidates (we'll do in-memory similarity for now)
-    # For production with sqlite-vss, this would use vector similarity index
-    query_sql = f"""
-        SELECT * FROM episodes
-        {where_clause}
-        ORDER BY timestamp DESC
-        LIMIT ?
-    """
-    params.append(limit * 3)  # Get more candidates for ranking
-
-    with _get_conn() as conn:
-        rows = conn.execute(query_sql, params).fetchall()
-
-    # Compute similarity scores
-    results = []
-    for row in rows:
-        entry = MemoryEntry(
-            id=row["id"],
-            content=row["content"],
-            source=row["source"],
-            context_type=row["context_type"],
-            agent_id=row["agent_id"],
-            task_id=row["task_id"],
-            session_id=row["session_id"],
-            metadata=json.loads(row["metadata"]) if row["metadata"] else None,
-            embedding=json.loads(row["embedding"]) if row["embedding"] else None,
-            timestamp=row["timestamp"],
-        )
-
-        if entry.embedding:
-            # Cosine similarity
-            score = _cosine_similarity(query_embedding, entry.embedding)
-            entry.relevance_score = score
-            if score >= min_relevance:
-                results.append(entry)
-        else:
-            # Fallback: check for keyword overlap
-            score = _keyword_overlap(query, entry.content)
-            entry.relevance_score = score
-            if score >= min_relevance:
-                results.append(entry)
-
-    # Sort by relevance and return top results
-    results.sort(key=lambda x: x.relevance_score or 0, reverse=True)
-    return results[:limit]
-
-
-def _cosine_similarity(a: list[float], b: list[float]) -> float:
-    """Compute cosine similarity between two vectors."""
-    dot = sum(x * y for x, y in zip(a, b, strict=False))
-    norm_a = sum(x * x for x in a) ** 0.5
-    norm_b = sum(x * x for x in b) ** 0.5
-    if norm_a == 0 or norm_b == 0:
-        return 0.0
-    return dot / (norm_a * norm_b)
-
-
-def _keyword_overlap(query: str, content: str) -> float:
-    """Simple keyword overlap score as fallback."""
-    query_words = set(query.lower().split())
-    content_words = set(content.lower().split())
-    if not query_words:
-        return 0.0
-    overlap = len(query_words & content_words)
-    return overlap / len(query_words)
-
-
-def get_memory_context(query: str, max_tokens: int = 2000, **filters) -> str:
-    """Get relevant memory context as formatted text for LLM prompts.
-
-    Args:
-        query: Search query
-        max_tokens: Approximate maximum tokens to return
-        **filters: Additional filters (agent_id, session_id, etc.)
-
-    Returns:
-        Formatted context string for inclusion in prompts
-    """
-    memories = search_memories(query, limit=20, **filters)
-
-    context_parts = []
-    total_chars = 0
-    max_chars = max_tokens * 4  # Rough approximation
-
-    for mem in memories:
-        formatted = f"[{mem.source}]: {mem.content}"
-        if total_chars + len(formatted) > max_chars:
-            break
-        context_parts.append(formatted)
-        total_chars += len(formatted)
-
-    if not context_parts:
-        return ""
-
-    return "Relevant context from memory:\n" + "\n\n".join(context_parts)
-
-
-def recall_personal_facts(agent_id: str | None = None) -> list[str]:
-    """Recall personal facts about the user or system.
-
-    Args:
-        agent_id: Optional agent filter
-
-    Returns:
-        List of fact strings
-    """
-    with _get_conn() as conn:
-        if agent_id:
-            rows = conn.execute(
-                """
-                SELECT content FROM episodes
-                WHERE context_type = 'fact' AND agent_id = ?
-                ORDER BY timestamp DESC
-                LIMIT 100
-                """,
-                (agent_id,),
-            ).fetchall()
-        else:
-            rows = conn.execute(
-                """
-                SELECT content FROM episodes
-                WHERE context_type = 'fact'
-                ORDER BY timestamp DESC
-                LIMIT 100
-                """,
-            ).fetchall()
-
-    return [r["content"] for r in rows]
-
-
-def recall_personal_facts_with_ids(agent_id: str | None = None) -> list[dict]:
-    """Recall personal facts with their IDs for edit/delete operations."""
-    with _get_conn() as conn:
-        if agent_id:
-            rows = conn.execute(
-                "SELECT id, content FROM episodes WHERE context_type = 'fact' AND agent_id = ? ORDER BY timestamp DESC LIMIT 100",
-                (agent_id,),
-            ).fetchall()
-        else:
-            rows = conn.execute(
-                "SELECT id, content FROM episodes WHERE context_type = 'fact' ORDER BY timestamp DESC LIMIT 100",
-            ).fetchall()
-    return [{"id": r["id"], "content": r["content"]} for r in rows]
-
-
-def update_personal_fact(memory_id: str, new_content: str) -> bool:
-    """Update a personal fact's content."""
-    with _get_conn() as conn:
-        cursor = conn.execute(
-            "UPDATE episodes SET content = ? WHERE id = ? AND context_type = 'fact'",
-            (new_content, memory_id),
-        )
-        conn.commit()
-        updated = cursor.rowcount > 0
-    return updated
-
-
-def store_personal_fact(fact: str, agent_id: str | None = None) -> MemoryEntry:
-    """Store a personal fact about the user or system.
-
-    Args:
-        fact: The fact to store
-        agent_id: Associated agent
-
-    Returns:
-        The stored MemoryEntry
-    """
-    return store_memory(
-        content=fact,
-        source="system",
-        context_type="fact",
-        agent_id=agent_id,
-        metadata={"auto_extracted": False},
-    )
-
-
-def delete_memory(memory_id: str) -> bool:
-    """Delete a memory entry by ID.
-
-    Returns:
-        True if deleted, False if not found
-    """
-    with _get_conn() as conn:
-        cursor = conn.execute(
-            "DELETE FROM episodes WHERE id = ?",
-            (memory_id,),
-        )
-        conn.commit()
-        deleted = cursor.rowcount > 0
-    return deleted
-
-
-def get_memory_stats() -> dict:
-    """Get statistics about the memory store.
-
-    Returns:
-        Dict with counts by type, total entries, etc.
-    """
-    with _get_conn() as conn:
-        total = conn.execute("SELECT COUNT(*) as count FROM episodes").fetchone()["count"]
-
-        by_type = {}
-        rows = conn.execute(
-            "SELECT context_type, COUNT(*) as count FROM episodes GROUP BY context_type"
-        ).fetchall()
-        for row in rows:
-            by_type[row["context_type"]] = row["count"]
-
-        with_embeddings = conn.execute(
-            "SELECT COUNT(*) as count FROM episodes WHERE embedding IS NOT NULL"
-        ).fetchone()["count"]
-
-    return {
-        "total_entries": total,
-        "by_type": by_type,
-        "with_embeddings": with_embeddings,
-        "has_embedding_model": _check_embedding_model(),
-    }
-
-
-def prune_memories(older_than_days: int = 90, keep_facts: bool = True) -> int:
-    """Delete old memories to manage storage.
-
-    Args:
-        older_than_days: Delete memories older than this
-        keep_facts: Whether to preserve fact-type memories
-
-    Returns:
-        Number of entries deleted
-    """
-    from datetime import timedelta
-
-    cutoff = (datetime.now(UTC) - timedelta(days=older_than_days)).isoformat()
-
-    with _get_conn() as conn:
-        if keep_facts:
-            cursor = conn.execute(
-                """
-                DELETE FROM episodes
-                WHERE timestamp < ? AND context_type != 'fact'
-                """,
-                (cutoff,),
-            )
-        else:
-            cursor = conn.execute(
-                "DELETE FROM episodes WHERE timestamp < ?",
-                (cutoff,),
-            )
-
-        deleted = cursor.rowcount
-        conn.commit()
-
-    return deleted
+"""Backward compatibility — all memory functions live in memory_system now."""
+
+from timmy.memory_system import (
+    DB_PATH,
+    MemoryEntry,
+    _cosine_similarity,
+    _keyword_overlap,
+    delete_memory,
+    get_memory_context,
+    get_memory_stats,
+    get_memory_system,
+    prune_memories,
+    recall_personal_facts,
+    recall_personal_facts_with_ids,
+    search_memories,
+    store_memory,
+    store_personal_fact,
+    update_personal_fact,
+)
+
+__all__ = [
+    "DB_PATH",
+    "MemoryEntry",
+    "delete_memory",
+    "get_memory_context",
+    "get_memory_stats",
+    "get_memory_system",
+    "prune_memories",
+    "recall_personal_facts",
+    "recall_personal_facts_with_ids",
+    "search_memories",
+    "store_memory",
+    "store_personal_fact",
+    "update_personal_fact",
+    "_cosine_similarity",
+    "_keyword_overlap",
+]
--- a/src/timmy/memory_system.py
+++ b/src/timmy/memory_system.py
--- a/src/timmy/semantic_memory.py
+++ b/src/timmy/semantic_memory.py
@@ -1,486 +1,41 @@
-"""Tier 3: Semantic Memory — Vector search over vault files.
-
-Uses lightweight local embeddings (no cloud) for similarity search
-over all vault content. This is the "escape valve" when hot memory
-doesn't have the answer.
-
-Architecture:
- Indexes all markdown files in memory/ nightly or on-demand
- Uses sentence-transformers (local, no API calls)
- Stores vectors in SQLite (no external vector DB needed)
- memory_search() retrieves relevant context by similarity
-"""
-
-import hashlib
-import json
-import logging
-import sqlite3
-from contextlib import closing
-from dataclasses import dataclass
-from datetime import UTC, datetime
-from pathlib import Path
-
-logger = logging.getLogger(__name__)
-
-# Paths
-PROJECT_ROOT = Path(__file__).parent.parent.parent
-VAULT_PATH = PROJECT_ROOT / "memory"
-SEMANTIC_DB_PATH = PROJECT_ROOT / "data" / "memory.db"
-
-# Embedding model - small, fast, local
-# Using 'all-MiniLM-L6-v2' (~80MB) or fallback to simple keyword matching
-EMBEDDING_MODEL = None
-EMBEDDING_DIM = 384  # MiniLM dimension
-
-
-def _get_embedding_model():
-    """Lazy-load embedding model."""
-    global EMBEDDING_MODEL
-    if EMBEDDING_MODEL is None:
-        from config import settings
-
-        if settings.timmy_skip_embeddings:
-            EMBEDDING_MODEL = False
-            return EMBEDDING_MODEL
-        try:
-            from sentence_transformers import SentenceTransformer
-
-            EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
-            logger.info("SemanticMemory: Loaded embedding model")
-        except ImportError:
-            logger.warning("SemanticMemory: sentence-transformers not installed, using fallback")
-            EMBEDDING_MODEL = False  # Use fallback
-    return EMBEDDING_MODEL
-
-
-def _simple_hash_embedding(text: str) -> list[float]:
-    """Fallback: Simple hash-based embedding when transformers unavailable."""
-    # Create a deterministic pseudo-embedding from word hashes
-    words = text.lower().split()
-    vec = [0.0] * 128
-    for i, word in enumerate(words[:50]):  # First 50 words
-        h = hashlib.md5(word.encode()).hexdigest()
-        for j in range(8):
-            idx = (i * 8 + j) % 128
-            vec[idx] += int(h[j * 2 : j * 2 + 2], 16) / 255.0
-    # Normalize
-    import math
-
-    mag = math.sqrt(sum(x * x for x in vec)) or 1.0
-    return [x / mag for x in vec]
-
-
-def embed_text(text: str) -> list[float]:
-    """Generate embedding for text."""
-    model = _get_embedding_model()
-    if model and model is not False:
-        embedding = model.encode(text)
-        return embedding.tolist()
-    else:
-        return _simple_hash_embedding(text)
-
-
-def cosine_similarity(a: list[float], b: list[float]) -> float:
-    """Calculate cosine similarity between two vectors."""
-    import math
-
-    dot = sum(x * y for x, y in zip(a, b, strict=False))
-    mag_a = math.sqrt(sum(x * x for x in a))
-    mag_b = math.sqrt(sum(x * x for x in b))
-    if mag_a == 0 or mag_b == 0:
-        return 0.0
-    return dot / (mag_a * mag_b)
-
-
-@dataclass
-class MemoryChunk:
-    """A searchable chunk of memory."""
-
-    id: str
-    source: str  # filepath
-    content: str
-    embedding: list[float]
-    created_at: str
-
-
-class SemanticMemory:
-    """Vector-based semantic search over vault content."""
-
-    def __init__(self) -> None:
-        self.db_path = SEMANTIC_DB_PATH
-        self.vault_path = VAULT_PATH
-        self._init_db()
-
-    def _init_db(self) -> None:
-        """Initialize SQLite with vector storage."""
-        self.db_path.parent.mkdir(parents=True, exist_ok=True)
-        with closing(sqlite3.connect(str(self.db_path))) as conn:
-            conn.execute("""
-                CREATE TABLE IF NOT EXISTS chunks (
-                    id TEXT PRIMARY KEY,
-                    source TEXT NOT NULL,
-                    content TEXT NOT NULL,
-                    embedding TEXT NOT NULL,
-                    created_at TEXT NOT NULL,
-                    source_hash TEXT NOT NULL
-                )
-            """)
-            conn.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
-            conn.commit()
-
-    def index_file(self, filepath: Path) -> int:
-        """Index a single file into semantic memory."""
-        if not filepath.exists():
-            return 0
-
-        content = filepath.read_text()
-        file_hash = hashlib.md5(content.encode()).hexdigest()
-
-        with closing(sqlite3.connect(str(self.db_path))) as conn:
-            # Check if already indexed with same hash
-            cursor = conn.execute(
-                "SELECT source_hash FROM chunks WHERE source = ? LIMIT 1", (str(filepath),)
-            )
-            existing = cursor.fetchone()
-            if existing and existing[0] == file_hash:
-                return 0  # Already indexed
-
-            # Delete old chunks for this file
-            conn.execute("DELETE FROM chunks WHERE source = ?", (str(filepath),))
-
-            # Split into chunks (paragraphs)
-            chunks = self._split_into_chunks(content)
-
-            # Index each chunk
-            now = datetime.now(UTC).isoformat()
-            for i, chunk_text in enumerate(chunks):
-                if len(chunk_text.strip()) < 20:  # Skip tiny chunks
-                    continue
-
-                chunk_id = f"{filepath.stem}_{i}"
-                embedding = embed_text(chunk_text)
-
-                conn.execute(
-                    """INSERT INTO chunks (id, source, content, embedding, created_at, source_hash)
-                       VALUES (?, ?, ?, ?, ?, ?)""",
-                    (chunk_id, str(filepath), chunk_text, json.dumps(embedding), now, file_hash),
-                )
-
-            conn.commit()
-
-        logger.info("SemanticMemory: Indexed %s (%d chunks)", filepath.name, len(chunks))
-        return len(chunks)
-
-    def _split_into_chunks(self, text: str, max_chunk_size: int = 500) -> list[str]:
-        """Split text into semantic chunks."""
-        # Split by paragraphs first
-        paragraphs = text.split("\n\n")
-        chunks = []
-
-        for para in paragraphs:
-            para = para.strip()
-            if not para:
-                continue
-
-            # If paragraph is small enough, keep as one chunk
-            if len(para) <= max_chunk_size:
-                chunks.append(para)
-            else:
-                # Split long paragraphs by sentences
-                sentences = para.replace(". ", ".\n").split("\n")
-                current_chunk = ""
-
-                for sent in sentences:
-                    if len(current_chunk) + len(sent) < max_chunk_size:
-                        current_chunk += " " + sent if current_chunk else sent
-                    else:
-                        if current_chunk:
-                            chunks.append(current_chunk.strip())
-                        current_chunk = sent
-
-                if current_chunk:
-                    chunks.append(current_chunk.strip())
-
-        return chunks
-
-    def index_vault(self) -> int:
-        """Index entire vault directory."""
-        total_chunks = 0
-
-        for md_file in self.vault_path.rglob("*.md"):
-            # Skip handoff file (handled separately)
-            if "last-session-handoff" in md_file.name:
-                continue
-            total_chunks += self.index_file(md_file)
-
-        logger.info("SemanticMemory: Indexed vault (%d total chunks)", total_chunks)
-        return total_chunks
-
-    def search(self, query: str, top_k: int = 5) -> list[tuple[str, float]]:
-        """Search for relevant memory chunks."""
-        query_embedding = embed_text(query)
-
-        with closing(sqlite3.connect(str(self.db_path))) as conn:
-            conn.row_factory = sqlite3.Row
-
-            # Get all chunks (in production, use vector index)
-            rows = conn.execute("SELECT source, content, embedding FROM chunks").fetchall()
-
-        # Calculate similarities
-        scored = []
-        for row in rows:
-            embedding = json.loads(row["embedding"])
-            score = cosine_similarity(query_embedding, embedding)
-            scored.append((row["source"], row["content"], score))
-
-        # Sort by score descending
-        scored.sort(key=lambda x: x[2], reverse=True)
-
-        # Return top_k
-        return [(content, score) for _, content, score in scored[:top_k]]
-
-    def get_relevant_context(self, query: str, max_chars: int = 2000) -> str:
-        """Get formatted context string for a query."""
-        results = self.search(query, top_k=3)
-
-        if not results:
-            return ""
-
-        parts = []
-        total_chars = 0
-
-        for content, score in results:
-            if score < 0.3:  # Similarity threshold
-                continue
-
-            chunk = f"[Relevant memory - score {score:.2f}]: {content[:400]}..."
-            if total_chars + len(chunk) > max_chars:
-                break
-
-            parts.append(chunk)
-            total_chars += len(chunk)
-
-        return "\n\n".join(parts) if parts else ""
-
-    def stats(self) -> dict:
-        """Get indexing statistics."""
-        with closing(sqlite3.connect(str(self.db_path))) as conn:
-            cursor = conn.execute("SELECT COUNT(*), COUNT(DISTINCT source) FROM chunks")
-            total_chunks, total_files = cursor.fetchone()
-
-        return {
-            "total_chunks": total_chunks,
-            "total_files": total_files,
-            "embedding_dim": EMBEDDING_DIM if _get_embedding_model() else 128,
-        }
-
-
-class MemorySearcher:
-    """High-level interface for memory search."""
-
-    def __init__(self) -> None:
-        self.semantic = SemanticMemory()
-
-    def search(self, query: str, tiers: list[str] = None) -> dict:
-        """Search across memory tiers.
-
-        Args:
-            query: Search query
-            tiers: List of tiers to search ["hot", "vault", "semantic"]
-
-        Returns:
-            Dict with results from each tier
-        """
-        tiers = tiers or ["semantic"]  # Default to semantic only
-        results = {}
-
-        if "semantic" in tiers:
-            semantic_results = self.semantic.search(query, top_k=5)
-            results["semantic"] = [
-                {"content": content, "score": score} for content, score in semantic_results
-            ]
-
-        return results
-
-    def get_context_for_query(self, query: str) -> str:
-        """Get comprehensive context for a user query."""
-        # Get semantic context
-        semantic_context = self.semantic.get_relevant_context(query)
-
-        if semantic_context:
-            return f"## Relevant Past Context\n\n{semantic_context}"
-
-        return ""
-
-
-# Module-level singleton
-semantic_memory = SemanticMemory()
-memory_searcher = MemorySearcher()
-
-
-def memory_search(query: str, top_k: int = 5) -> str:
-    """Search past conversations, notes, and stored facts for relevant context.
-
-    Searches across both the vault (indexed markdown files) and the
-    runtime memory store (facts and conversation fragments stored via
-    memory_write).
-
-    Args:
-        query: What to search for (e.g. "Bitcoin strategy", "server setup").
-        top_k: Number of results to return (default 5).
-
-    Returns:
-        Formatted string of relevant memory results.
-    """
-    # Guard: model sometimes passes None for top_k
-    if top_k is None:
-        top_k = 5
-
-    parts: list[str] = []
-
-    # 1. Search semantic vault (indexed markdown files)
-    vault_results = semantic_memory.search(query, top_k)
-    for content, score in vault_results:
-        if score < 0.2:
-            continue
-        parts.append(f"[vault score {score:.2f}] {content[:300]}")
-
-    # 2. Search runtime vector store (stored facts/conversations)
-    try:
-        from timmy.memory.vector_store import search_memories
-
-        runtime_results = search_memories(query, limit=top_k, min_relevance=0.2)
-        for entry in runtime_results:
-            label = entry.context_type or "memory"
-            parts.append(f"[{label}] {entry.content[:300]}")
-    except Exception as exc:
-        logger.debug("Vector store search unavailable: %s", exc)
-
-    if not parts:
-        return "No relevant memories found."
-    return "\n\n".join(parts)
-
-
-def memory_read(query: str = "", top_k: int = 5) -> str:
-    """Read from persistent memory — search facts, notes, and past conversations.
-
-    This is the primary tool for recalling stored information. If no query
-    is given, returns the most recent personal facts.  With a query, it
-    searches semantically across all stored memories.
-
-    Args:
-        query: Optional search term. Leave empty to list recent facts.
-        top_k: Maximum results to return (default 5).
-
-    Returns:
-        Formatted string of memory contents.
-    """
-    if top_k is None:
-        top_k = 5
-
-    parts: list[str] = []
-
-    # Always include personal facts first
-    try:
-        from timmy.memory.vector_store import search_memories
-
-        facts = search_memories(query or "", limit=top_k, min_relevance=0.0)
-        fact_entries = [e for e in facts if (e.context_type or "") == "fact"]
-        if fact_entries:
-            parts.append("## Personal Facts")
-            for entry in fact_entries[:top_k]:
-                parts.append(f"- {entry.content[:300]}")
-    except Exception as exc:
-        logger.debug("Vector store unavailable for memory_read: %s", exc)
-
-    # If a query was provided, also do semantic search
-    if query:
-        search_result = memory_search(query, top_k)
-        if search_result and search_result != "No relevant memories found.":
-            parts.append("\n## Search Results")
-            parts.append(search_result)
-
-    if not parts:
-        return "No memories stored yet. Use memory_write to store information."
-    return "\n".join(parts)
-
-
-def memory_write(content: str, context_type: str = "fact") -> str:
-    """Store a piece of information in persistent memory.
-
-    Use this tool when the user explicitly asks you to remember something.
-    Stored memories are searchable via memory_search across all channels
-    (web GUI, Discord, Telegram, etc.).
-
-    Args:
-        content: The information to remember (e.g. a phrase, fact, or note).
-        context_type: Type of memory — "fact" for permanent facts,
-                      "conversation" for conversation context,
-                      "document" for document fragments.
-
-    Returns:
-        Confirmation that the memory was stored.
-    """
-    if not content or not content.strip():
-        return "Nothing to store — content is empty."
-
-    valid_types = ("fact", "conversation", "document")
-    if context_type not in valid_types:
-        context_type = "fact"
-
-    try:
-        from timmy.memory.vector_store import search_memories, store_memory
-
-        # Dedup check for facts — skip if a similar fact already exists
-        # Threshold 0.75 catches paraphrases (was 0.9 which only caught near-exact)
-        if context_type == "fact":
-            existing = search_memories(
-                content.strip(), limit=3, context_type="fact", min_relevance=0.75
-            )
-            if existing:
-                return f"Similar fact already stored (id={existing[0].id[:8]}). Skipping duplicate."
-
-        entry = store_memory(
-            content=content.strip(),
-            source="agent",
-            context_type=context_type,
-        )
-        return f"Stored in memory (type={context_type}, id={entry.id[:8]}). This is now searchable across all channels."
-    except Exception as exc:
-        logger.error("Failed to write memory: %s", exc)
-        return f"Failed to store memory: {exc}"
-
-
-def memory_forget(query: str) -> str:
-    """Remove a stored memory that is outdated, incorrect, or no longer relevant.
-
-    Searches for memories matching the query and deletes the closest match.
-    Use this when the user says to forget something or when stored information
-    has changed.
-
-    Args:
-        query: Description of the memory to forget (e.g. "my phone number",
-               "the old server address").
-
-    Returns:
-        Confirmation of what was forgotten, or a message if nothing matched.
-    """
-    if not query or not query.strip():
-        return "Nothing to forget — query is empty."
-
-    try:
-        from timmy.memory.vector_store import delete_memory, search_memories
-
-        results = search_memories(query.strip(), limit=3, min_relevance=0.3)
-        if not results:
-            return "No matching memories found to forget."
-
-        # Delete the closest match
-        best = results[0]
-        deleted = delete_memory(best.id)
-        if deleted:
-            return f'Forgotten: "{best.content[:80]}" (type={best.context_type})'
-        return "Memory not found (may have already been deleted)."
-    except Exception as exc:
-        logger.error("Failed to forget memory: %s", exc)
-        return f"Failed to forget: {exc}"
+"""Backward compatibility — all memory functions live in memory_system now."""
+
+from timmy.memory_system import (
+    DB_PATH,
+    EMBEDDING_DIM,
+    EMBEDDING_MODEL,
+    MemoryChunk,
+    MemoryEntry,
+    MemorySearcher,
+    SemanticMemory,
+    _get_embedding_model,
+    _simple_hash_embedding,
+    cosine_similarity,
+    embed_text,
+    memory_forget,
+    memory_read,
+    memory_search,
+    memory_searcher,
+    memory_write,
+    semantic_memory,
+)
+
+__all__ = [
+    "DB_PATH",
+    "EMBEDDING_DIM",
+    "EMBEDDING_MODEL",
+    "MemoryChunk",
+    "MemoryEntry",
+    "MemorySearcher",
+    "SemanticMemory",
+    "_get_embedding_model",
+    "_simple_hash_embedding",
+    "cosine_similarity",
+    "embed_text",
+    "memory_forget",
+    "memory_read",
+    "memory_search",
+    "memory_searcher",
+    "memory_write",
+    "semantic_memory",
+]
--- a/src/timmy/thinking.py
+++ b/src/timmy/thinking.py
@@ -462,7 +462,7 @@ class ThinkingEngine:
        Args:
            facts: List of fact strings to filter and store.
        """
-        from timmy.semantic_memory import memory_write
+        from timmy.memory_system import memory_write

        for fact in facts[:3]:  # Safety cap
            if not isinstance(fact, str) or len(fact.strip()) <= 10:
--- a/src/timmy/tools.py
+++ b/src/timmy/tools.py
@@ -508,7 +508,7 @@ def _register_grok_tool(toolkit: Toolkit) -> None:
 def _register_memory_tools(toolkit: Toolkit) -> None:
    """Register memory search, write, and forget tools."""
    try:
-        from timmy.semantic_memory import memory_forget, memory_read, memory_search, memory_write
+        from timmy.memory_system import memory_forget, memory_read, memory_search, memory_write

        toolkit.register(memory_search, name="memory_search")
        toolkit.register(memory_write, name="memory_write")
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -94,7 +94,8 @@ def clean_database(tmp_path):
        "infrastructure.models.registry",
    ]
    _memory_db_modules = [
-        "timmy.memory.unified",
+        "timmy.memory_system",  # Canonical location
+        "timmy.memory.unified",  # Backward compat
    ]
    _spark_db_modules = [
        "spark.memory",
@@ -122,14 +123,8 @@ def clean_database(tmp_path):
        except Exception:
            pass

-    # Redirect semantic memory DB path (uses SEMANTIC_DB_PATH, not DB_PATH)
-    try:
-        import timmy.semantic_memory as _sem_mod
-
-        originals[("timmy.semantic_memory", "SEMANTIC_DB_PATH")] = _sem_mod.SEMANTIC_DB_PATH
-        _sem_mod.SEMANTIC_DB_PATH = tmp_memory_db
-    except Exception:
-        pass
+    # Note: semantic_memory now re-exports from memory_system,
+    # so DB_PATH is already patched via _memory_db_modules above

    for mod_name in _spark_db_modules:
        try:
--- a/tests/dashboard/test_chat_persistence.py
+++ b/tests/dashboard/test_chat_persistence.py
@@ -1,7 +1,7 @@
 """Tests for SQLite-backed chat persistence (issue #46)."""

-from dashboard.store import Message, MessageLog
 import infrastructure.chat_store as _chat_store
+from dashboard.store import Message, MessageLog


 def test_persistence_across_instances(tmp_path):
--- a/tests/timmy/test_semantic_memory.py
+++ b/tests/timmy/test_semantic_memory.py
@@ -5,7 +5,7 @@ from unittest.mock import MagicMock, patch

 import pytest

-from timmy.semantic_memory import (
+from timmy.memory_system import (
    MemoryChunk,
    MemorySearcher,
    SemanticMemory,
@@ -201,6 +201,11 @@ class TestSemanticMemory:
        return sm

    def test_init_creates_db(self, mem):
+        # After consolidation, _init_db ensures schema is ready
+        # The DB file is created lazily; verify by checking we can get a connection
+
+        mem._init_db()
+        # If we get here without error, the DB is initialized
        assert mem.db_path.exists()

    def test_split_into_chunks_short(self, mem):
@@ -283,8 +288,12 @@ class TestSemanticMemory:
        mem.index_file(md_file)

        # Check DB directly - tiny chunks should NOT be stored
+        # After consolidation: chunks are stored in 'memories' table with memory_type='vault_chunk'
        conn = sqlite3.connect(str(mem.db_path))
-        cursor = conn.execute("SELECT COUNT(*) FROM chunks WHERE source = ?", (str(md_file),))
+        cursor = conn.execute(
+            "SELECT COUNT(*) FROM memories WHERE source = ? AND memory_type = 'vault_chunk'",
+            (str(md_file),)
+        )
        stored_count = cursor.fetchone()[0]
        conn.close()

@@ -316,13 +325,16 @@ class TestSemanticMemory:
        import sqlite3

        conn = sqlite3.connect(str(mem.db_path))
-        conn.execute("DELETE FROM chunks")
+        # After consolidation: chunks are stored in 'memories' table with memory_type='vault_chunk'
+        conn.execute("DELETE FROM memories WHERE memory_type = 'vault_chunk'")
        conn.commit()
        conn.close()

        mem.index_vault()
        conn = sqlite3.connect(str(mem.db_path))
-        rows = conn.execute("SELECT DISTINCT source FROM chunks").fetchall()
+        rows = conn.execute(
+            "SELECT DISTINCT source FROM memories WHERE memory_type = 'vault_chunk'"
+        ).fetchall()
        conn.close()
        sources = [r[0] for r in rows]
        # Only the real file should be indexed, not the handoff
@@ -517,8 +529,8 @@ class TestMemoryWrite:
        """Mock vector_store functions for memory_write tests."""
        # Patch where it's imported from, not where it's used
        with (
-            patch("timmy.memory.vector_store.search_memories") as mock_search,
-            patch("timmy.memory.vector_store.store_memory") as mock_store,
+            patch("timmy.memory_system.search_memories") as mock_search,
+            patch("timmy.memory_system.store_memory") as mock_store,
        ):
            # Default: no existing memories (no duplicates)
            mock_search.return_value = []
@@ -610,8 +622,8 @@ class TestMemoryForget:
        """Mock vector_store functions for memory_forget tests."""
        # Patch where it's imported from, not where it's used
        with (
-            patch("timmy.memory.vector_store.search_memories") as mock_search,
-            patch("timmy.memory.vector_store.delete_memory") as mock_delete,
+            patch("timmy.memory_system.search_memories") as mock_search,
+            patch("timmy.memory_system.delete_memory") as mock_delete,
        ):
            # Default: no results
            mock_search.return_value = []
--- a/tests/timmy/test_vector_store.py
+++ b/tests/timmy/test_vector_store.py
@@ -2,7 +2,7 @@

 import pytest

-from timmy.memory.vector_store import (
+from timmy.memory_system import (
    _cosine_similarity,
    _keyword_overlap,
    delete_memory,