forked from Rockachopa/Timmy-time-dashboard
202 lines
7.3 KiB
Python
202 lines
7.3 KiB
Python
"""Unified memory schema and connection management.
|
|
|
|
This module provides the central database schema for Timmy's consolidated
|
|
memory system. All memory types (facts, conversations, documents, vault chunks)
|
|
are stored in a single `memories` table with a `memory_type` discriminator.
|
|
"""
|
|
|
|
import logging
|
|
import sqlite3
|
|
import uuid
|
|
from collections.abc import Generator
|
|
from contextlib import closing, contextmanager
|
|
from dataclasses import dataclass, field
|
|
from datetime import UTC, datetime
|
|
from pathlib import Path
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Paths
|
|
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
|
|
DB_PATH = PROJECT_ROOT / "data" / "memory.db"
|
|
|
|
|
|
@contextmanager
|
|
def get_connection() -> Generator[sqlite3.Connection, None, None]:
|
|
"""Get database connection to unified memory database."""
|
|
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
with closing(sqlite3.connect(str(DB_PATH))) as conn:
|
|
conn.row_factory = sqlite3.Row
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
conn.execute("PRAGMA busy_timeout=5000")
|
|
_ensure_schema(conn)
|
|
yield conn
|
|
|
|
|
|
def _ensure_schema(conn: sqlite3.Connection) -> None:
|
|
"""Create the unified memories table and indexes if they don't exist."""
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS memories (
|
|
id TEXT PRIMARY KEY,
|
|
content TEXT NOT NULL,
|
|
memory_type TEXT NOT NULL DEFAULT 'fact',
|
|
source TEXT NOT NULL DEFAULT 'agent',
|
|
embedding TEXT,
|
|
metadata TEXT,
|
|
source_hash TEXT,
|
|
agent_id TEXT,
|
|
task_id TEXT,
|
|
session_id TEXT,
|
|
confidence REAL NOT NULL DEFAULT 0.8,
|
|
tags TEXT NOT NULL DEFAULT '[]',
|
|
created_at TEXT NOT NULL,
|
|
last_accessed TEXT,
|
|
access_count INTEGER NOT NULL DEFAULT 0
|
|
)
|
|
""")
|
|
|
|
# Create indexes for efficient querying
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_id)")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
|
|
conn.commit()
|
|
|
|
# Run migration if needed
|
|
_migrate_schema(conn)
|
|
|
|
|
|
def _migrate_schema(conn: sqlite3.Connection) -> None:
|
|
"""Migrate from old three-table schema to unified memories table.
|
|
|
|
Migration paths:
|
|
- episodes table -> memories (context_type -> memory_type)
|
|
- chunks table -> memories with memory_type='vault_chunk'
|
|
- facts table -> dropped (unused, 0 rows expected)
|
|
"""
|
|
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
|
tables = {row[0] for row in cursor.fetchall()}
|
|
|
|
has_memories = "memories" in tables
|
|
has_episodes = "episodes" in tables
|
|
has_chunks = "chunks" in tables
|
|
has_facts = "facts" in tables
|
|
|
|
# Check if we need to migrate (old schema exists but new one doesn't fully)
|
|
if not has_memories:
|
|
logger.info("Migration: Creating unified memories table")
|
|
# Schema will be created above
|
|
|
|
# Migrate episodes -> memories
|
|
if has_episodes and has_memories:
|
|
logger.info("Migration: Converting episodes table to memories")
|
|
try:
|
|
cols = _get_table_columns(conn, "episodes")
|
|
context_type_col = "context_type" if "context_type" in cols else "'conversation'"
|
|
|
|
conn.execute(f"""
|
|
INSERT INTO memories (
|
|
id, content, memory_type, source, embedding,
|
|
metadata, agent_id, task_id, session_id,
|
|
created_at, access_count, last_accessed
|
|
)
|
|
SELECT
|
|
id, content,
|
|
COALESCE({context_type_col}, 'conversation'),
|
|
COALESCE(source, 'agent'),
|
|
embedding,
|
|
metadata, agent_id, task_id, session_id,
|
|
COALESCE(timestamp, datetime('now')), 0, NULL
|
|
FROM episodes
|
|
""")
|
|
conn.execute("DROP TABLE episodes")
|
|
logger.info("Migration: Migrated episodes to memories")
|
|
except sqlite3.Error as exc:
|
|
logger.warning("Migration: Failed to migrate episodes: %s", exc)
|
|
|
|
# Migrate chunks -> memories as vault_chunk
|
|
if has_chunks and has_memories:
|
|
logger.info("Migration: Converting chunks table to memories")
|
|
try:
|
|
cols = _get_table_columns(conn, "chunks")
|
|
|
|
id_col = "id" if "id" in cols else "CAST(rowid AS TEXT)"
|
|
content_col = "content" if "content" in cols else "text"
|
|
source_col = (
|
|
"filepath" if "filepath" in cols else ("source" if "source" in cols else "'vault'")
|
|
)
|
|
embedding_col = "embedding" if "embedding" in cols else "NULL"
|
|
created_col = "created_at" if "created_at" in cols else "datetime('now')"
|
|
|
|
conn.execute(f"""
|
|
INSERT INTO memories (
|
|
id, content, memory_type, source, embedding,
|
|
created_at, access_count
|
|
)
|
|
SELECT
|
|
{id_col}, {content_col}, 'vault_chunk', {source_col},
|
|
{embedding_col}, {created_col}, 0
|
|
FROM chunks
|
|
""")
|
|
conn.execute("DROP TABLE chunks")
|
|
logger.info("Migration: Migrated chunks to memories")
|
|
except sqlite3.Error as exc:
|
|
logger.warning("Migration: Failed to migrate chunks: %s", exc)
|
|
|
|
# Drop old facts table
|
|
if has_facts:
|
|
try:
|
|
conn.execute("DROP TABLE facts")
|
|
logger.info("Migration: Dropped old facts table")
|
|
except sqlite3.Error as exc:
|
|
logger.warning("Migration: Failed to drop facts: %s", exc)
|
|
|
|
conn.commit()
|
|
|
|
|
|
def _get_table_columns(conn: sqlite3.Connection, table_name: str) -> set[str]:
|
|
"""Get the column names for a table."""
|
|
cursor = conn.execute(f"PRAGMA table_info({table_name})")
|
|
return {row[1] for row in cursor.fetchall()}
|
|
|
|
|
|
# Backward compatibility aliases
|
|
get_conn = get_connection
|
|
|
|
|
|
@dataclass
|
|
class MemoryEntry:
|
|
"""A memory entry with vector embedding.
|
|
|
|
Note: The DB column is `memory_type` but this field is named `context_type`
|
|
for backward API compatibility.
|
|
"""
|
|
|
|
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
|
content: str = "" # The actual text content
|
|
source: str = "" # Where it came from (agent, user, system)
|
|
context_type: str = "conversation" # API field name; DB column is memory_type
|
|
agent_id: str | None = None
|
|
task_id: str | None = None
|
|
session_id: str | None = None
|
|
metadata: dict | None = None
|
|
embedding: list[float] | None = None
|
|
timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
|
|
relevance_score: float | None = None # Set during search
|
|
|
|
|
|
@dataclass
|
|
class MemoryChunk:
|
|
"""A searchable chunk of memory."""
|
|
|
|
id: str
|
|
source: str # filepath
|
|
content: str
|
|
embedding: list[float]
|
|
created_at: str
|
|
|
|
|
|
# Note: Functions are available via memory_system module directly
|
|
# from timmy.memory_system import store_memory, search_memories, etc.
|