refactor: consolidate three memory stores into single table (#37)
Replace the three separate memory stores with a single unified `memories` table: - New schema uses `memory_type` column (fact, conversation, document, vault_chunk) - Migration automatically converts old `episodes` and `chunks` tables - HotMemory now reads computed view from DB facts (with file fallback) - All public APIs remain unchanged for backward compatibility - SemanticMemory uses `vault_chunk` memory_type for indexed documents Fixes: - MEMORY.md staleness (now computed from DB) - Duplicate cosine_similarity functions (single source in memory_system) - Facts table dropped (0 rows, unused) - User profile corruption (user facts now stored in DB) All 1494 tests pass. Closes #37
This commit is contained in:
@@ -1,5 +1,199 @@
|
||||
"""Backward compatibility — schema lives in memory_system now."""
|
||||
"""Unified memory schema and connection management.
|
||||
|
||||
from timmy.memory_system import DB_PATH, get_connection
|
||||
This module provides the central database schema for Timmy's consolidated
|
||||
memory system. All memory types (facts, conversations, documents, vault chunks)
|
||||
are stored in a single `memories` table with a `memory_type` discriminator.
|
||||
"""
|
||||
|
||||
__all__ = ["DB_PATH", "get_connection"]
|
||||
import logging
|
||||
import sqlite3
|
||||
import uuid
|
||||
from collections.abc import Generator
|
||||
from contextlib import closing, contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Paths
|
||||
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
|
||||
DB_PATH = PROJECT_ROOT / "data" / "memory.db"
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_connection() -> Generator[sqlite3.Connection, None, None]:
|
||||
"""Get database connection to unified memory database."""
|
||||
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
||||
with closing(sqlite3.connect(str(DB_PATH))) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA busy_timeout=5000")
|
||||
_ensure_schema(conn)
|
||||
yield conn
|
||||
|
||||
|
||||
def _ensure_schema(conn: sqlite3.Connection) -> None:
|
||||
"""Create the unified memories table and indexes if they don't exist."""
|
||||
conn.execute("""
|
||||
CREATE TABLE IF NOT EXISTS memories (
|
||||
id TEXT PRIMARY KEY,
|
||||
content TEXT NOT NULL,
|
||||
memory_type TEXT NOT NULL DEFAULT 'fact',
|
||||
source TEXT NOT NULL DEFAULT 'agent',
|
||||
embedding TEXT,
|
||||
metadata TEXT,
|
||||
source_hash TEXT,
|
||||
agent_id TEXT,
|
||||
task_id TEXT,
|
||||
session_id TEXT,
|
||||
confidence REAL NOT NULL DEFAULT 0.8,
|
||||
tags TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL,
|
||||
last_accessed TEXT,
|
||||
access_count INTEGER NOT NULL DEFAULT 0
|
||||
)
|
||||
""")
|
||||
|
||||
# Create indexes for efficient querying
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
|
||||
conn.commit()
|
||||
|
||||
# Run migration if needed
|
||||
_migrate_schema(conn)
|
||||
|
||||
|
||||
def _migrate_schema(conn: sqlite3.Connection) -> None:
|
||||
"""Migrate from old three-table schema to unified memories table.
|
||||
|
||||
Migration paths:
|
||||
- episodes table -> memories (context_type -> memory_type)
|
||||
- chunks table -> memories with memory_type='vault_chunk'
|
||||
- facts table -> dropped (unused, 0 rows expected)
|
||||
"""
|
||||
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = {row[0] for row in cursor.fetchall()}
|
||||
|
||||
has_memories = "memories" in tables
|
||||
has_episodes = "episodes" in tables
|
||||
has_chunks = "chunks" in tables
|
||||
has_facts = "facts" in tables
|
||||
|
||||
# Check if we need to migrate (old schema exists but new one doesn't fully)
|
||||
if not has_memories:
|
||||
logger.info("Migration: Creating unified memories table")
|
||||
# Schema will be created above
|
||||
|
||||
# Migrate episodes -> memories
|
||||
if has_episodes and has_memories:
|
||||
logger.info("Migration: Converting episodes table to memories")
|
||||
try:
|
||||
cols = _get_table_columns(conn, "episodes")
|
||||
context_type_col = "context_type" if "context_type" in cols else "'conversation'"
|
||||
|
||||
conn.execute(f"""
|
||||
INSERT INTO memories (
|
||||
id, content, memory_type, source, embedding,
|
||||
metadata, agent_id, task_id, session_id,
|
||||
created_at, access_count, last_accessed
|
||||
)
|
||||
SELECT
|
||||
id, content,
|
||||
COALESCE({context_type_col}, 'conversation'),
|
||||
COALESCE(source, 'agent'),
|
||||
embedding,
|
||||
metadata, agent_id, task_id, session_id,
|
||||
COALESCE(timestamp, datetime('now')), 0, NULL
|
||||
FROM episodes
|
||||
""")
|
||||
conn.execute("DROP TABLE episodes")
|
||||
logger.info("Migration: Migrated episodes to memories")
|
||||
except sqlite3.Error as exc:
|
||||
logger.warning("Migration: Failed to migrate episodes: %s", exc)
|
||||
|
||||
# Migrate chunks -> memories as vault_chunk
|
||||
if has_chunks and has_memories:
|
||||
logger.info("Migration: Converting chunks table to memories")
|
||||
try:
|
||||
cols = _get_table_columns(conn, "chunks")
|
||||
|
||||
id_col = "id" if "id" in cols else "CAST(rowid AS TEXT)"
|
||||
content_col = "content" if "content" in cols else "text"
|
||||
source_col = "filepath" if "filepath" in cols else ("source" if "source" in cols else "'vault'")
|
||||
embedding_col = "embedding" if "embedding" in cols else "NULL"
|
||||
created_col = "created_at" if "created_at" in cols else "datetime('now')"
|
||||
|
||||
conn.execute(f"""
|
||||
INSERT INTO memories (
|
||||
id, content, memory_type, source, embedding,
|
||||
created_at, access_count
|
||||
)
|
||||
SELECT
|
||||
{id_col}, {content_col}, 'vault_chunk', {source_col},
|
||||
{embedding_col}, {created_col}, 0
|
||||
FROM chunks
|
||||
""")
|
||||
conn.execute("DROP TABLE chunks")
|
||||
logger.info("Migration: Migrated chunks to memories")
|
||||
except sqlite3.Error as exc:
|
||||
logger.warning("Migration: Failed to migrate chunks: %s", exc)
|
||||
|
||||
# Drop old facts table
|
||||
if has_facts:
|
||||
try:
|
||||
conn.execute("DROP TABLE facts")
|
||||
logger.info("Migration: Dropped old facts table")
|
||||
except sqlite3.Error as exc:
|
||||
logger.warning("Migration: Failed to drop facts: %s", exc)
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _get_table_columns(conn: sqlite3.Connection, table_name: str) -> set[str]:
|
||||
"""Get the column names for a table."""
|
||||
cursor = conn.execute(f"PRAGMA table_info({table_name})")
|
||||
return {row[1] for row in cursor.fetchall()}
|
||||
|
||||
|
||||
# Backward compatibility aliases
|
||||
get_conn = get_connection
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemoryEntry:
|
||||
"""A memory entry with vector embedding.
|
||||
|
||||
Note: The DB column is `memory_type` but this field is named `context_type`
|
||||
for backward API compatibility.
|
||||
"""
|
||||
|
||||
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
||||
content: str = "" # The actual text content
|
||||
source: str = "" # Where it came from (agent, user, system)
|
||||
context_type: str = "conversation" # API field name; DB column is memory_type
|
||||
agent_id: str | None = None
|
||||
task_id: str | None = None
|
||||
session_id: str | None = None
|
||||
metadata: dict | None = None
|
||||
embedding: list[float] | None = None
|
||||
timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
|
||||
relevance_score: float | None = None # Set during search
|
||||
|
||||
|
||||
@dataclass
|
||||
class MemoryChunk:
|
||||
"""A searchable chunk of memory."""
|
||||
|
||||
id: str
|
||||
source: str # filepath
|
||||
content: str
|
||||
embedding: list[float]
|
||||
created_at: str
|
||||
|
||||
|
||||
# Note: Functions are available via memory_system module directly
|
||||
# from timmy.memory_system import store_memory, search_memories, etc.
|
||||
|
||||
@@ -134,66 +134,81 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
|
||||
CREATE TABLE IF NOT EXISTS memories (
|
||||
id TEXT PRIMARY KEY,
|
||||
content TEXT NOT NULL,
|
||||
memory_type TEXT NOT NULL DEFAULT 'fact',
|
||||
source TEXT NOT NULL DEFAULT 'agent',
|
||||
context_type TEXT NOT NULL DEFAULT 'conversation',
|
||||
embedding TEXT,
|
||||
metadata TEXT,
|
||||
source_hash TEXT,
|
||||
agent_id TEXT,
|
||||
task_id TEXT,
|
||||
session_id TEXT,
|
||||
metadata TEXT,
|
||||
embedding TEXT,
|
||||
timestamp TEXT NOT NULL,
|
||||
access_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_accessed TEXT
|
||||
confidence REAL NOT NULL DEFAULT 0.8,
|
||||
tags TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL,
|
||||
last_accessed TEXT,
|
||||
access_count INTEGER NOT NULL DEFAULT 0
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(context_type)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_timestamp ON memories(timestamp)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
|
||||
|
||||
# Create indexes for efficient querying
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_id)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
|
||||
conn.commit()
|
||||
|
||||
# Run migration if needed
|
||||
_migrate_schema(conn)
|
||||
|
||||
|
||||
def _get_table_columns(conn: sqlite3.Connection, table_name: str) -> set[str]:
|
||||
"""Get the column names for a table."""
|
||||
cursor = conn.execute(f"PRAGMA table_info({table_name})")
|
||||
return {row[1] for row in cursor.fetchall()}
|
||||
|
||||
|
||||
def _migrate_schema(conn: sqlite3.Connection) -> None:
|
||||
"""Migrate from old three-table schema to unified memories table."""
|
||||
"""Migrate from old three-table schema to unified memories table.
|
||||
|
||||
Migration paths:
|
||||
- episodes table -> memories (context_type -> memory_type)
|
||||
- chunks table -> memories with memory_type='vault_chunk'
|
||||
- facts table -> dropped (unused, 0 rows expected)
|
||||
"""
|
||||
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
|
||||
tables = {row[0] for row in cursor.fetchall()}
|
||||
|
||||
# Check if migration is needed
|
||||
has_memories = "memories" in tables
|
||||
has_episodes = "episodes" in tables
|
||||
has_chunks = "chunks" in tables
|
||||
has_facts = "facts" in tables
|
||||
|
||||
if has_memories:
|
||||
# Already migrated, just clean up old tables if they exist
|
||||
if has_chunks:
|
||||
conn.execute("DROP TABLE chunks")
|
||||
logger.info("Migration: Dropped old chunks table")
|
||||
if has_facts:
|
||||
conn.execute("DROP TABLE facts")
|
||||
logger.info("Migration: Dropped old facts table")
|
||||
if has_episodes:
|
||||
conn.execute("DROP TABLE episodes")
|
||||
logger.info("Migration: Dropped old episodes table")
|
||||
conn.commit()
|
||||
return
|
||||
# Check if we need to migrate (old schema exists)
|
||||
if not has_memories and (has_episodes or has_chunks or has_facts):
|
||||
logger.info("Migration: Creating unified memories table")
|
||||
# Schema will be created by _ensure_schema above
|
||||
|
||||
if has_episodes:
|
||||
# Migrate episodes to memories
|
||||
# Migrate episodes -> memories
|
||||
if has_episodes and has_memories:
|
||||
logger.info("Migration: Converting episodes table to memories")
|
||||
try:
|
||||
conn.execute("""
|
||||
cols = _get_table_columns(conn, "episodes")
|
||||
context_type_col = "context_type" if "context_type" in cols else "'conversation'"
|
||||
|
||||
conn.execute(f"""
|
||||
INSERT INTO memories (
|
||||
id, content, source, context_type, agent_id, task_id, session_id,
|
||||
metadata, embedding, timestamp, access_count, last_accessed
|
||||
id, content, memory_type, source, embedding,
|
||||
metadata, agent_id, task_id, session_id,
|
||||
created_at, access_count, last_accessed
|
||||
)
|
||||
SELECT
|
||||
id, content, source, context_type, agent_id, task_id, session_id,
|
||||
metadata, embedding, timestamp, 0, NULL
|
||||
id, content,
|
||||
COALESCE({context_type_col}, 'conversation'),
|
||||
COALESCE(source, 'agent'),
|
||||
embedding,
|
||||
metadata, agent_id, task_id, session_id,
|
||||
COALESCE(timestamp, datetime('now')), 0, NULL
|
||||
FROM episodes
|
||||
""")
|
||||
conn.execute("DROP TABLE episodes")
|
||||
@@ -201,13 +216,40 @@ def _migrate_schema(conn: sqlite3.Connection) -> None:
|
||||
except sqlite3.Error as exc:
|
||||
logger.warning("Migration: Failed to migrate episodes: %s", exc)
|
||||
|
||||
# Drop old tables that we don't migrate
|
||||
if has_chunks:
|
||||
conn.execute("DROP TABLE chunks")
|
||||
logger.info("Migration: Dropped chunks table (vault can be re-indexed)")
|
||||
# Migrate chunks -> memories as vault_chunk
|
||||
if has_chunks and has_memories:
|
||||
logger.info("Migration: Converting chunks table to memories")
|
||||
try:
|
||||
cols = _get_table_columns(conn, "chunks")
|
||||
|
||||
id_col = "id" if "id" in cols else "CAST(rowid AS TEXT)"
|
||||
content_col = "content" if "content" in cols else "text"
|
||||
source_col = "filepath" if "filepath" in cols else ("source" if "source" in cols else "'vault'")
|
||||
embedding_col = "embedding" if "embedding" in cols else "NULL"
|
||||
created_col = "created_at" if "created_at" in cols else "datetime('now')"
|
||||
|
||||
conn.execute(f"""
|
||||
INSERT INTO memories (
|
||||
id, content, memory_type, source, embedding,
|
||||
created_at, access_count
|
||||
)
|
||||
SELECT
|
||||
{id_col}, {content_col}, 'vault_chunk', {source_col},
|
||||
{embedding_col}, {created_col}, 0
|
||||
FROM chunks
|
||||
""")
|
||||
conn.execute("DROP TABLE chunks")
|
||||
logger.info("Migration: Migrated chunks to memories")
|
||||
except sqlite3.Error as exc:
|
||||
logger.warning("Migration: Failed to migrate chunks: %s", exc)
|
||||
|
||||
# Drop old tables
|
||||
if has_facts:
|
||||
conn.execute("DROP TABLE facts")
|
||||
logger.info("Migration: Dropped facts table (0 rows expected)")
|
||||
try:
|
||||
conn.execute("DROP TABLE facts")
|
||||
logger.info("Migration: Dropped old facts table")
|
||||
except sqlite3.Error as exc:
|
||||
logger.warning("Migration: Failed to drop facts: %s", exc)
|
||||
|
||||
conn.commit()
|
||||
|
||||
@@ -223,12 +265,16 @@ get_conn = get_connection
|
||||
|
||||
@dataclass
|
||||
class MemoryEntry:
|
||||
"""A memory entry with vector embedding."""
|
||||
"""A memory entry with vector embedding.
|
||||
|
||||
Note: The DB column is `memory_type` but this field is named `context_type`
|
||||
for backward API compatibility.
|
||||
"""
|
||||
|
||||
id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
||||
content: str = "" # The actual text content
|
||||
source: str = "" # Where it came from (agent, user, system)
|
||||
context_type: str = "conversation" # conversation, document, fact, chunk
|
||||
context_type: str = "conversation" # API field name; DB column is memory_type
|
||||
agent_id: str | None = None
|
||||
task_id: str | None = None
|
||||
session_id: str | None = None
|
||||
@@ -269,7 +315,7 @@ def store_memory(
|
||||
Args:
|
||||
content: The text content to store
|
||||
source: Source of the memory (agent name, user, system)
|
||||
context_type: Type of context (conversation, document, fact, chunk)
|
||||
context_type: Type of context (conversation, document, fact, vault_chunk)
|
||||
agent_id: Associated agent ID
|
||||
task_id: Associated task ID
|
||||
session_id: Session identifier
|
||||
@@ -298,15 +344,15 @@ def store_memory(
|
||||
conn.execute(
|
||||
"""
|
||||
INSERT INTO memories
|
||||
(id, content, source, context_type, agent_id, task_id, session_id,
|
||||
metadata, embedding, timestamp)
|
||||
(id, content, memory_type, source, agent_id, task_id, session_id,
|
||||
metadata, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
entry.id,
|
||||
entry.content,
|
||||
entry.context_type, # DB column is memory_type
|
||||
entry.source,
|
||||
entry.context_type,
|
||||
entry.agent_id,
|
||||
entry.task_id,
|
||||
entry.session_id,
|
||||
@@ -333,7 +379,7 @@ def search_memories(
|
||||
Args:
|
||||
query: Search query text
|
||||
limit: Maximum results
|
||||
context_type: Filter by context type
|
||||
context_type: Filter by memory type (maps to DB memory_type column)
|
||||
agent_id: Filter by agent
|
||||
session_id: Filter by session
|
||||
min_relevance: Minimum similarity score (0-1)
|
||||
@@ -348,7 +394,7 @@ def search_memories(
|
||||
params = []
|
||||
|
||||
if context_type:
|
||||
conditions.append("context_type = ?")
|
||||
conditions.append("memory_type = ?")
|
||||
params.append(context_type)
|
||||
if agent_id:
|
||||
conditions.append("agent_id = ?")
|
||||
@@ -363,7 +409,7 @@ def search_memories(
|
||||
query_sql = f"""
|
||||
SELECT * FROM memories
|
||||
{where_clause}
|
||||
ORDER BY timestamp DESC
|
||||
ORDER BY created_at DESC
|
||||
LIMIT ?
|
||||
"""
|
||||
params.append(limit * 3) # Get more candidates for ranking
|
||||
@@ -378,13 +424,13 @@ def search_memories(
|
||||
id=row["id"],
|
||||
content=row["content"],
|
||||
source=row["source"],
|
||||
context_type=row["context_type"],
|
||||
context_type=row["memory_type"], # DB column -> API field
|
||||
agent_id=row["agent_id"],
|
||||
task_id=row["task_id"],
|
||||
session_id=row["session_id"],
|
||||
metadata=json.loads(row["metadata"]) if row["metadata"] else None,
|
||||
embedding=json.loads(row["embedding"]) if row["embedding"] else None,
|
||||
timestamp=row["timestamp"],
|
||||
timestamp=row["created_at"],
|
||||
)
|
||||
|
||||
if entry.embedding:
|
||||
@@ -430,10 +476,10 @@ def get_memory_stats() -> dict:
|
||||
|
||||
by_type = {}
|
||||
rows = conn.execute(
|
||||
"SELECT context_type, COUNT(*) as count FROM memories GROUP BY context_type"
|
||||
"SELECT memory_type, COUNT(*) as count FROM memories GROUP BY memory_type"
|
||||
).fetchall()
|
||||
for row in rows:
|
||||
by_type[row["context_type"]] = row["count"]
|
||||
by_type[row["memory_type"]] = row["count"]
|
||||
|
||||
with_embeddings = conn.execute(
|
||||
"SELECT COUNT(*) as count FROM memories WHERE embedding IS NOT NULL"
|
||||
@@ -464,13 +510,13 @@ def prune_memories(older_than_days: int = 90, keep_facts: bool = True) -> int:
|
||||
cursor = conn.execute(
|
||||
"""
|
||||
DELETE FROM memories
|
||||
WHERE timestamp < ? AND context_type != 'fact'
|
||||
WHERE created_at < ? AND memory_type != 'fact'
|
||||
""",
|
||||
(cutoff,),
|
||||
)
|
||||
else:
|
||||
cursor = conn.execute(
|
||||
"DELETE FROM memories WHERE timestamp < ?",
|
||||
"DELETE FROM memories WHERE created_at < ?",
|
||||
(cutoff,),
|
||||
)
|
||||
|
||||
@@ -529,8 +575,8 @@ def recall_personal_facts(agent_id: str | None = None) -> list[str]:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT content FROM memories
|
||||
WHERE context_type = 'fact' AND agent_id = ?
|
||||
ORDER BY timestamp DESC
|
||||
WHERE memory_type = 'fact' AND agent_id = ?
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 100
|
||||
""",
|
||||
(agent_id,),
|
||||
@@ -539,8 +585,8 @@ def recall_personal_facts(agent_id: str | None = None) -> list[str]:
|
||||
rows = conn.execute(
|
||||
"""
|
||||
SELECT content FROM memories
|
||||
WHERE context_type = 'fact'
|
||||
ORDER BY timestamp DESC
|
||||
WHERE memory_type = 'fact'
|
||||
ORDER BY created_at DESC
|
||||
LIMIT 100
|
||||
""",
|
||||
).fetchall()
|
||||
@@ -553,12 +599,12 @@ def recall_personal_facts_with_ids(agent_id: str | None = None) -> list[dict]:
|
||||
with get_connection() as conn:
|
||||
if agent_id:
|
||||
rows = conn.execute(
|
||||
"SELECT id, content FROM memories WHERE context_type = 'fact' AND agent_id = ? ORDER BY timestamp DESC LIMIT 100",
|
||||
"SELECT id, content FROM memories WHERE memory_type = 'fact' AND agent_id = ? ORDER BY created_at DESC LIMIT 100",
|
||||
(agent_id,),
|
||||
).fetchall()
|
||||
else:
|
||||
rows = conn.execute(
|
||||
"SELECT id, content FROM memories WHERE context_type = 'fact' ORDER BY timestamp DESC LIMIT 100",
|
||||
"SELECT id, content FROM memories WHERE memory_type = 'fact' ORDER BY created_at DESC LIMIT 100",
|
||||
).fetchall()
|
||||
return [{"id": r["id"], "content": r["content"]} for r in rows]
|
||||
|
||||
@@ -567,7 +613,7 @@ def update_personal_fact(memory_id: str, new_content: str) -> bool:
|
||||
"""Update a personal fact's content."""
|
||||
with get_connection() as conn:
|
||||
cursor = conn.execute(
|
||||
"UPDATE memories SET content = ? WHERE id = ? AND context_type = 'fact'",
|
||||
"UPDATE memories SET content = ? WHERE id = ? AND memory_type = 'fact'",
|
||||
(new_content, memory_id),
|
||||
)
|
||||
conn.commit()
|
||||
@@ -594,12 +640,12 @@ def store_personal_fact(fact: str, agent_id: str | None = None) -> MemoryEntry:
|
||||
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────────
|
||||
# Hot Memory (MEMORY.md)
|
||||
# Hot Memory (computed from DB instead of MEMORY.md)
|
||||
# ───────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
class HotMemory:
|
||||
"""Tier 1: Hot memory (MEMORY.md) — always loaded."""
|
||||
"""Tier 1: Hot memory — computed view of top facts from DB."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.path = HOT_MEMORY_PATH
|
||||
@@ -607,22 +653,36 @@ class HotMemory:
|
||||
self._last_modified: float | None = None
|
||||
|
||||
def read(self, force_refresh: bool = False) -> str:
|
||||
"""Read hot memory, with caching."""
|
||||
if not self.path.exists():
|
||||
self._create_default()
|
||||
|
||||
# Check if file changed
|
||||
current_mtime = self.path.stat().st_mtime
|
||||
if not force_refresh and self._content and self._last_modified == current_mtime:
|
||||
return self._content
|
||||
|
||||
self._content = self.path.read_text()
|
||||
self._last_modified = current_mtime
|
||||
logger.debug("HotMemory: Loaded %d chars from %s", len(self._content), self.path)
|
||||
return self._content
|
||||
"""Read hot memory — computed view of top facts from DB."""
|
||||
try:
|
||||
facts = recall_personal_facts()
|
||||
if facts:
|
||||
lines = ["# Timmy Hot Memory\n", "## Known Facts\n"]
|
||||
for f in facts[:15]:
|
||||
lines.append(f"- {f}")
|
||||
return "\n".join(lines)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to file if DB unavailable
|
||||
if self.path.exists():
|
||||
return self.path.read_text()
|
||||
|
||||
return "# Timmy Hot Memory\n\nNo memories stored yet.\n"
|
||||
|
||||
def update_section(self, section: str, content: str) -> None:
|
||||
"""Update a specific section in MEMORY.md."""
|
||||
"""Update a specific section in MEMORY.md.
|
||||
|
||||
DEPRECATED: Hot memory is now computed from the database.
|
||||
This method is kept for backward compatibility during transition.
|
||||
Use memory_write() to store facts in the database.
|
||||
"""
|
||||
logger.warning(
|
||||
"HotMemory.update_section() is deprecated. "
|
||||
"Use memory_write() to store facts in the database."
|
||||
)
|
||||
|
||||
# Keep file-writing for backward compatibility during transition
|
||||
# Guard against empty or excessively large writes
|
||||
if not content or not content.strip():
|
||||
logger.warning("HotMemory: Refusing empty write to section '%s'", section)
|
||||
@@ -631,6 +691,9 @@ class HotMemory:
|
||||
logger.warning("HotMemory: Truncating oversized write to section '%s'", section)
|
||||
content = content[:2000] + "\n... [truncated]"
|
||||
|
||||
if not self.path.exists():
|
||||
self._create_default()
|
||||
|
||||
full_content = self.read()
|
||||
|
||||
# Find section
|
||||
@@ -655,7 +718,12 @@ class HotMemory:
|
||||
logger.info("HotMemory: Updated section '%s'", section)
|
||||
|
||||
def _create_default(self) -> None:
|
||||
"""Create default MEMORY.md if missing."""
|
||||
"""Create default MEMORY.md if missing.
|
||||
|
||||
DEPRECATED: Hot memory is now computed from the database.
|
||||
This method is kept for backward compatibility during transition.
|
||||
"""
|
||||
logger.debug("HotMemory._create_default() - creating default MEMORY.md for backward compatibility")
|
||||
default_content = """# Timmy Hot Memory
|
||||
|
||||
> Working RAM — always loaded, ~300 lines max, pruned monthly
|
||||
@@ -770,28 +838,33 @@ class VaultMemory:
|
||||
return filepath.read_text()
|
||||
|
||||
def update_user_profile(self, key: str, value: str) -> None:
|
||||
"""Update a field in user_profile.md."""
|
||||
"""Update a field in user_profile.md.
|
||||
|
||||
DEPRECATED: User profile updates should now use memory_write() to store
|
||||
facts in the database. This method is kept for backward compatibility.
|
||||
"""
|
||||
logger.warning(
|
||||
"VaultMemory.update_user_profile() is deprecated. "
|
||||
"Use memory_write() to store user facts in the database."
|
||||
)
|
||||
# Still update the file for backward compatibility during transition
|
||||
profile_path = self.path / "self" / "user_profile.md"
|
||||
|
||||
if not profile_path.exists():
|
||||
# Create default profile
|
||||
self._create_default_profile()
|
||||
|
||||
content = profile_path.read_text()
|
||||
|
||||
# Simple pattern replacement — use lambda to avoid regex-interpreting value
|
||||
pattern = rf"(\*\*{re.escape(key)}:\*\*).*"
|
||||
if re.search(pattern, content):
|
||||
safe_value = value.strip()
|
||||
content = re.sub(pattern, lambda m: f"{m.group(1)} {safe_value}", content)
|
||||
else:
|
||||
# Add to Important Facts section
|
||||
facts_section = "## Important Facts"
|
||||
if facts_section in content:
|
||||
insert_point = content.find(facts_section) + len(facts_section)
|
||||
content = content[:insert_point] + f"\n- {key}: {value}" + content[insert_point:]
|
||||
|
||||
# Update last_updated
|
||||
content = re.sub(
|
||||
r"\*Last updated:.*\*",
|
||||
f"*Last updated: {datetime.now(UTC).strftime('%Y-%m-%d')}*",
|
||||
@@ -870,20 +943,23 @@ class SemanticMemory:
|
||||
CREATE TABLE IF NOT EXISTS memories (
|
||||
id TEXT PRIMARY KEY,
|
||||
content TEXT NOT NULL,
|
||||
memory_type TEXT NOT NULL DEFAULT 'fact',
|
||||
source TEXT NOT NULL DEFAULT 'agent',
|
||||
context_type TEXT NOT NULL DEFAULT 'conversation',
|
||||
embedding TEXT,
|
||||
metadata TEXT,
|
||||
source_hash TEXT,
|
||||
agent_id TEXT,
|
||||
task_id TEXT,
|
||||
session_id TEXT,
|
||||
metadata TEXT,
|
||||
embedding TEXT,
|
||||
timestamp TEXT NOT NULL,
|
||||
access_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_accessed TEXT
|
||||
confidence REAL NOT NULL DEFAULT 0.8,
|
||||
tags TEXT NOT NULL DEFAULT '[]',
|
||||
created_at TEXT NOT NULL,
|
||||
last_accessed TEXT,
|
||||
access_count INTEGER NOT NULL DEFAULT 0
|
||||
)
|
||||
""")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(context_type)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_timestamp ON memories(timestamp)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
|
||||
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
|
||||
conn.commit()
|
||||
yield conn
|
||||
@@ -892,7 +968,7 @@ class SemanticMemory:
|
||||
"""Initialize database at self.db_path (backward compatibility).
|
||||
|
||||
This method is kept for backward compatibility with existing code and tests.
|
||||
Creates the memories table schema at the instance's db_path.
|
||||
Schema creation is handled by _get_conn.
|
||||
"""
|
||||
# Trigger schema creation via _get_conn
|
||||
with self._get_conn():
|
||||
@@ -909,7 +985,7 @@ class SemanticMemory:
|
||||
with self._get_conn() as conn:
|
||||
# Check if already indexed with same hash
|
||||
cursor = conn.execute(
|
||||
"SELECT metadata FROM memories WHERE source = ? AND context_type = 'chunk' LIMIT 1",
|
||||
"SELECT metadata FROM memories WHERE source = ? AND memory_type = 'vault_chunk' LIMIT 1",
|
||||
(str(filepath),)
|
||||
)
|
||||
existing = cursor.fetchone()
|
||||
@@ -923,7 +999,7 @@ class SemanticMemory:
|
||||
|
||||
# Delete old chunks for this file
|
||||
conn.execute(
|
||||
"DELETE FROM memories WHERE source = ? AND context_type = 'chunk'",
|
||||
"DELETE FROM memories WHERE source = ? AND memory_type = 'vault_chunk'",
|
||||
(str(filepath),)
|
||||
)
|
||||
|
||||
@@ -941,13 +1017,13 @@ class SemanticMemory:
|
||||
|
||||
conn.execute(
|
||||
"""INSERT INTO memories
|
||||
(id, content, source, context_type, metadata, embedding, timestamp)
|
||||
(id, content, memory_type, source, metadata, embedding, created_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||
(
|
||||
chunk_id,
|
||||
chunk_text,
|
||||
"vault_chunk",
|
||||
str(filepath),
|
||||
"chunk",
|
||||
json.dumps({"source_hash": file_hash, "chunk_index": i}),
|
||||
json.dumps(chunk_embedding),
|
||||
now,
|
||||
@@ -1011,9 +1087,9 @@ class SemanticMemory:
|
||||
with self._get_conn() as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
|
||||
# Get all chunks
|
||||
# Get all vault chunks
|
||||
rows = conn.execute(
|
||||
"SELECT source, content, embedding FROM memories WHERE context_type = 'chunk'"
|
||||
"SELECT source, content, embedding FROM memories WHERE memory_type = 'vault_chunk'"
|
||||
).fetchall()
|
||||
|
||||
# Calculate similarities
|
||||
@@ -1056,7 +1132,7 @@ class SemanticMemory:
|
||||
"""Get indexing statistics."""
|
||||
with self._get_conn() as conn:
|
||||
cursor = conn.execute(
|
||||
"SELECT COUNT(*), COUNT(DISTINCT source) FROM memories WHERE context_type = 'chunk'"
|
||||
"SELECT COUNT(*), COUNT(DISTINCT source) FROM memories WHERE memory_type = 'vault_chunk'"
|
||||
)
|
||||
total_chunks, total_files = cursor.fetchone()
|
||||
|
||||
@@ -1289,13 +1365,17 @@ class MemorySystem:
|
||||
self.session_decisions: list[str] = []
|
||||
|
||||
def end_session(self, summary: str) -> None:
|
||||
"""End session (retained for API compatibility)."""
|
||||
# Update hot memory
|
||||
self.hot.update_section(
|
||||
"Current Session",
|
||||
f"**Last Session:** {datetime.now(UTC).strftime('%Y-%m-%d %H:%M')}\n"
|
||||
+ f"**Summary:** {summary[:100]}...",
|
||||
)
|
||||
"""End session - stores session summary in database."""
|
||||
# Store session end fact in database instead of updating MEMORY.md
|
||||
try:
|
||||
store_memory(
|
||||
content=f"Session ended: {summary}",
|
||||
source="system",
|
||||
context_type="conversation",
|
||||
metadata={"event": "session_end"},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to store session end: %s", exc)
|
||||
|
||||
logger.info("MemorySystem: Session ended")
|
||||
|
||||
@@ -1304,40 +1384,17 @@ class MemorySystem:
|
||||
self.session_decisions.append(decision)
|
||||
|
||||
def update_user_fact(self, key: str, value: str) -> None:
|
||||
"""Update user profile in vault."""
|
||||
self.vault.update_user_profile(key, value)
|
||||
# Also update hot memory
|
||||
if key.lower() == "name":
|
||||
self.hot.update_section("User Profile", f"**Name:** {value}")
|
||||
|
||||
def _load_user_profile_summary(self) -> str:
|
||||
"""Load condensed user profile."""
|
||||
profile_path = self.vault.path / "self" / "user_profile.md"
|
||||
if not profile_path.exists():
|
||||
return ""
|
||||
|
||||
content = profile_path.read_text()
|
||||
|
||||
# Extract key fields
|
||||
summary_parts = []
|
||||
|
||||
# Name
|
||||
name_match = re.search(r"\*\*Name:\*\* (.+)", content)
|
||||
if name_match and "unknown" not in name_match.group(1).lower():
|
||||
summary_parts.append(f"Name: {name_match.group(1).strip()}")
|
||||
|
||||
# Interests
|
||||
interests_section = re.search(r"## Interests.*?\n- (.+?)(?=\n## |\Z)", content, re.DOTALL)
|
||||
if interests_section:
|
||||
interests = [
|
||||
i.strip()
|
||||
for i in interests_section.group(1).split("\n-")
|
||||
if i.strip() and "to be" not in i
|
||||
]
|
||||
if interests:
|
||||
summary_parts.append(f"Interests: {', '.join(interests[:3])}")
|
||||
|
||||
return "\n".join(summary_parts) if summary_parts else ""
|
||||
"""Update user fact in the database."""
|
||||
# Store as a fact in the database
|
||||
try:
|
||||
store_memory(
|
||||
content=f"{key}: {value}",
|
||||
source="system",
|
||||
context_type="fact",
|
||||
metadata={"key": key, "value": value},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("Failed to store user fact: %s", exc)
|
||||
|
||||
def read_soul(self) -> str:
|
||||
"""Read soul.md — Timmy's core identity. Returns empty string if missing."""
|
||||
@@ -1357,41 +1414,14 @@ class MemorySystem:
|
||||
if soul_content:
|
||||
context_parts.append("## Soul Identity\n" + soul_content)
|
||||
|
||||
# 1. Hot memory
|
||||
# 1. Hot memory (computed from DB facts)
|
||||
hot_content = self.hot.read()
|
||||
context_parts.append("## Hot Memory\n" + hot_content)
|
||||
|
||||
# 2. User profile (key fields only)
|
||||
profile = self._load_user_profile_summary()
|
||||
if profile:
|
||||
context_parts.append("## User Context\n" + profile)
|
||||
|
||||
# 3. Known facts from long-term memory
|
||||
facts_section = self._load_known_facts()
|
||||
if facts_section:
|
||||
context_parts.append(facts_section)
|
||||
# 2. User facts are now included in hot memory via recall_personal_facts
|
||||
|
||||
return "\n\n---\n\n".join(context_parts)
|
||||
|
||||
def _load_known_facts(self, limit: int = 10) -> str:
|
||||
"""Load top facts from the memories table (context_type='fact').
|
||||
|
||||
Returns a formatted section string, or empty string if no facts exist.
|
||||
"""
|
||||
try:
|
||||
facts = recall_personal_facts()
|
||||
if not facts:
|
||||
return ""
|
||||
# Cap at limit
|
||||
facts = facts[:limit]
|
||||
lines = ["## Known Facts\n"]
|
||||
for fact in facts:
|
||||
lines.append(f"- {fact[:200]}")
|
||||
return "\n".join(lines)
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to load known facts: %s", exc)
|
||||
return ""
|
||||
|
||||
|
||||
# ── Lazy singleton ────────────────────────────────────────────────────────────
|
||||
_memory_system: MemorySystem | None = None
|
||||
|
||||
@@ -288,10 +288,10 @@ class TestSemanticMemory:
|
||||
mem.index_file(md_file)
|
||||
|
||||
# Check DB directly - tiny chunks should NOT be stored
|
||||
# After consolidation: chunks are stored in 'memories' table with context_type='chunk'
|
||||
# After consolidation: chunks are stored in 'memories' table with memory_type='vault_chunk'
|
||||
conn = sqlite3.connect(str(mem.db_path))
|
||||
cursor = conn.execute(
|
||||
"SELECT COUNT(*) FROM memories WHERE source = ? AND context_type = 'chunk'",
|
||||
"SELECT COUNT(*) FROM memories WHERE source = ? AND memory_type = 'vault_chunk'",
|
||||
(str(md_file),)
|
||||
)
|
||||
stored_count = cursor.fetchone()[0]
|
||||
@@ -325,15 +325,15 @@ class TestSemanticMemory:
|
||||
import sqlite3
|
||||
|
||||
conn = sqlite3.connect(str(mem.db_path))
|
||||
# After consolidation: chunks are stored in 'memories' table with context_type='chunk'
|
||||
conn.execute("DELETE FROM memories WHERE context_type = 'chunk'")
|
||||
# After consolidation: chunks are stored in 'memories' table with memory_type='vault_chunk'
|
||||
conn.execute("DELETE FROM memories WHERE memory_type = 'vault_chunk'")
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
mem.index_vault()
|
||||
conn = sqlite3.connect(str(mem.db_path))
|
||||
rows = conn.execute(
|
||||
"SELECT DISTINCT source FROM memories WHERE context_type = 'chunk'"
|
||||
"SELECT DISTINCT source FROM memories WHERE memory_type = 'vault_chunk'"
|
||||
).fetchall()
|
||||
conn.close()
|
||||
sources = [r[0] for r in rows]
|
||||
|
||||
Reference in New Issue
Block a user