refactor: consolidate three memory stores into single table (#37)
Some checks failed
Tests / lint (pull_request) Failing after 4s
Tests / test (pull_request) Has been skipped

Replace the three separate memory stores with a single unified `memories` table:
- New schema uses `memory_type` column (fact, conversation, document, vault_chunk)
- Migration automatically converts old `episodes` and `chunks` tables
- HotMemory now reads computed view from DB facts (with file fallback)
- All public APIs remain unchanged for backward compatibility
- SemanticMemory uses `vault_chunk` memory_type for indexed documents

Fixes:
- MEMORY.md staleness (now computed from DB)
- Duplicate cosine_similarity functions (single source in memory_system)
- Facts table dropped (0 rows, unused)
- User profile corruption (user facts now stored in DB)

All 1494 tests pass.

Closes #37
This commit is contained in:
Alexander Whitestone
2026-03-15 13:31:10 -04:00
parent cd17c2d3ed
commit ff8b0960c1
3 changed files with 399 additions and 175 deletions

View File

@@ -1,5 +1,199 @@
"""Backward compatibility — schema lives in memory_system now."""
"""Unified memory schema and connection management.
from timmy.memory_system import DB_PATH, get_connection
This module provides the central database schema for Timmy's consolidated
memory system. All memory types (facts, conversations, documents, vault chunks)
are stored in a single `memories` table with a `memory_type` discriminator.
"""
__all__ = ["DB_PATH", "get_connection"]
import logging
import sqlite3
import uuid
from collections.abc import Generator
from contextlib import closing, contextmanager
from dataclasses import dataclass, field
from datetime import UTC, datetime
from pathlib import Path
logger = logging.getLogger(__name__)
# Paths
PROJECT_ROOT = Path(__file__).parent.parent.parent.parent
DB_PATH = PROJECT_ROOT / "data" / "memory.db"
@contextmanager
def get_connection() -> Generator[sqlite3.Connection, None, None]:
"""Get database connection to unified memory database."""
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
with closing(sqlite3.connect(str(DB_PATH))) as conn:
conn.row_factory = sqlite3.Row
conn.execute("PRAGMA journal_mode=WAL")
conn.execute("PRAGMA busy_timeout=5000")
_ensure_schema(conn)
yield conn
def _ensure_schema(conn: sqlite3.Connection) -> None:
"""Create the unified memories table and indexes if they don't exist."""
conn.execute("""
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
memory_type TEXT NOT NULL DEFAULT 'fact',
source TEXT NOT NULL DEFAULT 'agent',
embedding TEXT,
metadata TEXT,
source_hash TEXT,
agent_id TEXT,
task_id TEXT,
session_id TEXT,
confidence REAL NOT NULL DEFAULT 0.8,
tags TEXT NOT NULL DEFAULT '[]',
created_at TEXT NOT NULL,
last_accessed TEXT,
access_count INTEGER NOT NULL DEFAULT 0
)
""")
# Create indexes for efficient querying
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
conn.commit()
# Run migration if needed
_migrate_schema(conn)
def _migrate_schema(conn: sqlite3.Connection) -> None:
"""Migrate from old three-table schema to unified memories table.
Migration paths:
- episodes table -> memories (context_type -> memory_type)
- chunks table -> memories with memory_type='vault_chunk'
- facts table -> dropped (unused, 0 rows expected)
"""
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = {row[0] for row in cursor.fetchall()}
has_memories = "memories" in tables
has_episodes = "episodes" in tables
has_chunks = "chunks" in tables
has_facts = "facts" in tables
# Check if we need to migrate (old schema exists but new one doesn't fully)
if not has_memories:
logger.info("Migration: Creating unified memories table")
# Schema will be created above
# Migrate episodes -> memories
if has_episodes and has_memories:
logger.info("Migration: Converting episodes table to memories")
try:
cols = _get_table_columns(conn, "episodes")
context_type_col = "context_type" if "context_type" in cols else "'conversation'"
conn.execute(f"""
INSERT INTO memories (
id, content, memory_type, source, embedding,
metadata, agent_id, task_id, session_id,
created_at, access_count, last_accessed
)
SELECT
id, content,
COALESCE({context_type_col}, 'conversation'),
COALESCE(source, 'agent'),
embedding,
metadata, agent_id, task_id, session_id,
COALESCE(timestamp, datetime('now')), 0, NULL
FROM episodes
""")
conn.execute("DROP TABLE episodes")
logger.info("Migration: Migrated episodes to memories")
except sqlite3.Error as exc:
logger.warning("Migration: Failed to migrate episodes: %s", exc)
# Migrate chunks -> memories as vault_chunk
if has_chunks and has_memories:
logger.info("Migration: Converting chunks table to memories")
try:
cols = _get_table_columns(conn, "chunks")
id_col = "id" if "id" in cols else "CAST(rowid AS TEXT)"
content_col = "content" if "content" in cols else "text"
source_col = "filepath" if "filepath" in cols else ("source" if "source" in cols else "'vault'")
embedding_col = "embedding" if "embedding" in cols else "NULL"
created_col = "created_at" if "created_at" in cols else "datetime('now')"
conn.execute(f"""
INSERT INTO memories (
id, content, memory_type, source, embedding,
created_at, access_count
)
SELECT
{id_col}, {content_col}, 'vault_chunk', {source_col},
{embedding_col}, {created_col}, 0
FROM chunks
""")
conn.execute("DROP TABLE chunks")
logger.info("Migration: Migrated chunks to memories")
except sqlite3.Error as exc:
logger.warning("Migration: Failed to migrate chunks: %s", exc)
# Drop old facts table
if has_facts:
try:
conn.execute("DROP TABLE facts")
logger.info("Migration: Dropped old facts table")
except sqlite3.Error as exc:
logger.warning("Migration: Failed to drop facts: %s", exc)
conn.commit()
def _get_table_columns(conn: sqlite3.Connection, table_name: str) -> set[str]:
"""Get the column names for a table."""
cursor = conn.execute(f"PRAGMA table_info({table_name})")
return {row[1] for row in cursor.fetchall()}
# Backward compatibility aliases
get_conn = get_connection
@dataclass
class MemoryEntry:
"""A memory entry with vector embedding.
Note: The DB column is `memory_type` but this field is named `context_type`
for backward API compatibility.
"""
id: str = field(default_factory=lambda: str(uuid.uuid4()))
content: str = "" # The actual text content
source: str = "" # Where it came from (agent, user, system)
context_type: str = "conversation" # API field name; DB column is memory_type
agent_id: str | None = None
task_id: str | None = None
session_id: str | None = None
metadata: dict | None = None
embedding: list[float] | None = None
timestamp: str = field(default_factory=lambda: datetime.now(UTC).isoformat())
relevance_score: float | None = None # Set during search
@dataclass
class MemoryChunk:
"""A searchable chunk of memory."""
id: str
source: str # filepath
content: str
embedding: list[float]
created_at: str
# Note: Functions are available via memory_system module directly
# from timmy.memory_system import store_memory, search_memories, etc.

View File

@@ -134,66 +134,81 @@ def _ensure_schema(conn: sqlite3.Connection) -> None:
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
memory_type TEXT NOT NULL DEFAULT 'fact',
source TEXT NOT NULL DEFAULT 'agent',
context_type TEXT NOT NULL DEFAULT 'conversation',
embedding TEXT,
metadata TEXT,
source_hash TEXT,
agent_id TEXT,
task_id TEXT,
session_id TEXT,
metadata TEXT,
embedding TEXT,
timestamp TEXT NOT NULL,
access_count INTEGER NOT NULL DEFAULT 0,
last_accessed TEXT
confidence REAL NOT NULL DEFAULT 0.8,
tags TEXT NOT NULL DEFAULT '[]',
created_at TEXT NOT NULL,
last_accessed TEXT,
access_count INTEGER NOT NULL DEFAULT 0
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(context_type)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_timestamp ON memories(timestamp)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
# Create indexes for efficient querying
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_session ON memories(session_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_agent ON memories(agent_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
conn.commit()
# Run migration if needed
_migrate_schema(conn)
def _get_table_columns(conn: sqlite3.Connection, table_name: str) -> set[str]:
"""Get the column names for a table."""
cursor = conn.execute(f"PRAGMA table_info({table_name})")
return {row[1] for row in cursor.fetchall()}
def _migrate_schema(conn: sqlite3.Connection) -> None:
"""Migrate from old three-table schema to unified memories table."""
"""Migrate from old three-table schema to unified memories table.
Migration paths:
- episodes table -> memories (context_type -> memory_type)
- chunks table -> memories with memory_type='vault_chunk'
- facts table -> dropped (unused, 0 rows expected)
"""
cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table'")
tables = {row[0] for row in cursor.fetchall()}
# Check if migration is needed
has_memories = "memories" in tables
has_episodes = "episodes" in tables
has_chunks = "chunks" in tables
has_facts = "facts" in tables
if has_memories:
# Already migrated, just clean up old tables if they exist
if has_chunks:
conn.execute("DROP TABLE chunks")
logger.info("Migration: Dropped old chunks table")
if has_facts:
conn.execute("DROP TABLE facts")
logger.info("Migration: Dropped old facts table")
if has_episodes:
conn.execute("DROP TABLE episodes")
logger.info("Migration: Dropped old episodes table")
conn.commit()
return
# Check if we need to migrate (old schema exists)
if not has_memories and (has_episodes or has_chunks or has_facts):
logger.info("Migration: Creating unified memories table")
# Schema will be created by _ensure_schema above
if has_episodes:
# Migrate episodes to memories
# Migrate episodes -> memories
if has_episodes and has_memories:
logger.info("Migration: Converting episodes table to memories")
try:
conn.execute("""
cols = _get_table_columns(conn, "episodes")
context_type_col = "context_type" if "context_type" in cols else "'conversation'"
conn.execute(f"""
INSERT INTO memories (
id, content, source, context_type, agent_id, task_id, session_id,
metadata, embedding, timestamp, access_count, last_accessed
id, content, memory_type, source, embedding,
metadata, agent_id, task_id, session_id,
created_at, access_count, last_accessed
)
SELECT
id, content, source, context_type, agent_id, task_id, session_id,
metadata, embedding, timestamp, 0, NULL
id, content,
COALESCE({context_type_col}, 'conversation'),
COALESCE(source, 'agent'),
embedding,
metadata, agent_id, task_id, session_id,
COALESCE(timestamp, datetime('now')), 0, NULL
FROM episodes
""")
conn.execute("DROP TABLE episodes")
@@ -201,13 +216,40 @@ def _migrate_schema(conn: sqlite3.Connection) -> None:
except sqlite3.Error as exc:
logger.warning("Migration: Failed to migrate episodes: %s", exc)
# Drop old tables that we don't migrate
if has_chunks:
conn.execute("DROP TABLE chunks")
logger.info("Migration: Dropped chunks table (vault can be re-indexed)")
# Migrate chunks -> memories as vault_chunk
if has_chunks and has_memories:
logger.info("Migration: Converting chunks table to memories")
try:
cols = _get_table_columns(conn, "chunks")
id_col = "id" if "id" in cols else "CAST(rowid AS TEXT)"
content_col = "content" if "content" in cols else "text"
source_col = "filepath" if "filepath" in cols else ("source" if "source" in cols else "'vault'")
embedding_col = "embedding" if "embedding" in cols else "NULL"
created_col = "created_at" if "created_at" in cols else "datetime('now')"
conn.execute(f"""
INSERT INTO memories (
id, content, memory_type, source, embedding,
created_at, access_count
)
SELECT
{id_col}, {content_col}, 'vault_chunk', {source_col},
{embedding_col}, {created_col}, 0
FROM chunks
""")
conn.execute("DROP TABLE chunks")
logger.info("Migration: Migrated chunks to memories")
except sqlite3.Error as exc:
logger.warning("Migration: Failed to migrate chunks: %s", exc)
# Drop old tables
if has_facts:
conn.execute("DROP TABLE facts")
logger.info("Migration: Dropped facts table (0 rows expected)")
try:
conn.execute("DROP TABLE facts")
logger.info("Migration: Dropped old facts table")
except sqlite3.Error as exc:
logger.warning("Migration: Failed to drop facts: %s", exc)
conn.commit()
@@ -223,12 +265,16 @@ get_conn = get_connection
@dataclass
class MemoryEntry:
"""A memory entry with vector embedding."""
"""A memory entry with vector embedding.
Note: The DB column is `memory_type` but this field is named `context_type`
for backward API compatibility.
"""
id: str = field(default_factory=lambda: str(uuid.uuid4()))
content: str = "" # The actual text content
source: str = "" # Where it came from (agent, user, system)
context_type: str = "conversation" # conversation, document, fact, chunk
context_type: str = "conversation" # API field name; DB column is memory_type
agent_id: str | None = None
task_id: str | None = None
session_id: str | None = None
@@ -269,7 +315,7 @@ def store_memory(
Args:
content: The text content to store
source: Source of the memory (agent name, user, system)
context_type: Type of context (conversation, document, fact, chunk)
context_type: Type of context (conversation, document, fact, vault_chunk)
agent_id: Associated agent ID
task_id: Associated task ID
session_id: Session identifier
@@ -298,15 +344,15 @@ def store_memory(
conn.execute(
"""
INSERT INTO memories
(id, content, source, context_type, agent_id, task_id, session_id,
metadata, embedding, timestamp)
(id, content, memory_type, source, agent_id, task_id, session_id,
metadata, embedding, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(
entry.id,
entry.content,
entry.context_type, # DB column is memory_type
entry.source,
entry.context_type,
entry.agent_id,
entry.task_id,
entry.session_id,
@@ -333,7 +379,7 @@ def search_memories(
Args:
query: Search query text
limit: Maximum results
context_type: Filter by context type
context_type: Filter by memory type (maps to DB memory_type column)
agent_id: Filter by agent
session_id: Filter by session
min_relevance: Minimum similarity score (0-1)
@@ -348,7 +394,7 @@ def search_memories(
params = []
if context_type:
conditions.append("context_type = ?")
conditions.append("memory_type = ?")
params.append(context_type)
if agent_id:
conditions.append("agent_id = ?")
@@ -363,7 +409,7 @@ def search_memories(
query_sql = f"""
SELECT * FROM memories
{where_clause}
ORDER BY timestamp DESC
ORDER BY created_at DESC
LIMIT ?
"""
params.append(limit * 3) # Get more candidates for ranking
@@ -378,13 +424,13 @@ def search_memories(
id=row["id"],
content=row["content"],
source=row["source"],
context_type=row["context_type"],
context_type=row["memory_type"], # DB column -> API field
agent_id=row["agent_id"],
task_id=row["task_id"],
session_id=row["session_id"],
metadata=json.loads(row["metadata"]) if row["metadata"] else None,
embedding=json.loads(row["embedding"]) if row["embedding"] else None,
timestamp=row["timestamp"],
timestamp=row["created_at"],
)
if entry.embedding:
@@ -430,10 +476,10 @@ def get_memory_stats() -> dict:
by_type = {}
rows = conn.execute(
"SELECT context_type, COUNT(*) as count FROM memories GROUP BY context_type"
"SELECT memory_type, COUNT(*) as count FROM memories GROUP BY memory_type"
).fetchall()
for row in rows:
by_type[row["context_type"]] = row["count"]
by_type[row["memory_type"]] = row["count"]
with_embeddings = conn.execute(
"SELECT COUNT(*) as count FROM memories WHERE embedding IS NOT NULL"
@@ -464,13 +510,13 @@ def prune_memories(older_than_days: int = 90, keep_facts: bool = True) -> int:
cursor = conn.execute(
"""
DELETE FROM memories
WHERE timestamp < ? AND context_type != 'fact'
WHERE created_at < ? AND memory_type != 'fact'
""",
(cutoff,),
)
else:
cursor = conn.execute(
"DELETE FROM memories WHERE timestamp < ?",
"DELETE FROM memories WHERE created_at < ?",
(cutoff,),
)
@@ -529,8 +575,8 @@ def recall_personal_facts(agent_id: str | None = None) -> list[str]:
rows = conn.execute(
"""
SELECT content FROM memories
WHERE context_type = 'fact' AND agent_id = ?
ORDER BY timestamp DESC
WHERE memory_type = 'fact' AND agent_id = ?
ORDER BY created_at DESC
LIMIT 100
""",
(agent_id,),
@@ -539,8 +585,8 @@ def recall_personal_facts(agent_id: str | None = None) -> list[str]:
rows = conn.execute(
"""
SELECT content FROM memories
WHERE context_type = 'fact'
ORDER BY timestamp DESC
WHERE memory_type = 'fact'
ORDER BY created_at DESC
LIMIT 100
""",
).fetchall()
@@ -553,12 +599,12 @@ def recall_personal_facts_with_ids(agent_id: str | None = None) -> list[dict]:
with get_connection() as conn:
if agent_id:
rows = conn.execute(
"SELECT id, content FROM memories WHERE context_type = 'fact' AND agent_id = ? ORDER BY timestamp DESC LIMIT 100",
"SELECT id, content FROM memories WHERE memory_type = 'fact' AND agent_id = ? ORDER BY created_at DESC LIMIT 100",
(agent_id,),
).fetchall()
else:
rows = conn.execute(
"SELECT id, content FROM memories WHERE context_type = 'fact' ORDER BY timestamp DESC LIMIT 100",
"SELECT id, content FROM memories WHERE memory_type = 'fact' ORDER BY created_at DESC LIMIT 100",
).fetchall()
return [{"id": r["id"], "content": r["content"]} for r in rows]
@@ -567,7 +613,7 @@ def update_personal_fact(memory_id: str, new_content: str) -> bool:
"""Update a personal fact's content."""
with get_connection() as conn:
cursor = conn.execute(
"UPDATE memories SET content = ? WHERE id = ? AND context_type = 'fact'",
"UPDATE memories SET content = ? WHERE id = ? AND memory_type = 'fact'",
(new_content, memory_id),
)
conn.commit()
@@ -594,12 +640,12 @@ def store_personal_fact(fact: str, agent_id: str | None = None) -> MemoryEntry:
# ───────────────────────────────────────────────────────────────────────────────
# Hot Memory (MEMORY.md)
# Hot Memory (computed from DB instead of MEMORY.md)
# ───────────────────────────────────────────────────────────────────────────────
class HotMemory:
"""Tier 1: Hot memory (MEMORY.md) — always loaded."""
"""Tier 1: Hot memory — computed view of top facts from DB."""
def __init__(self) -> None:
self.path = HOT_MEMORY_PATH
@@ -607,22 +653,36 @@ class HotMemory:
self._last_modified: float | None = None
def read(self, force_refresh: bool = False) -> str:
"""Read hot memory, with caching."""
if not self.path.exists():
self._create_default()
# Check if file changed
current_mtime = self.path.stat().st_mtime
if not force_refresh and self._content and self._last_modified == current_mtime:
return self._content
self._content = self.path.read_text()
self._last_modified = current_mtime
logger.debug("HotMemory: Loaded %d chars from %s", len(self._content), self.path)
return self._content
"""Read hot memory — computed view of top facts from DB."""
try:
facts = recall_personal_facts()
if facts:
lines = ["# Timmy Hot Memory\n", "## Known Facts\n"]
for f in facts[:15]:
lines.append(f"- {f}")
return "\n".join(lines)
except Exception:
pass
# Fallback to file if DB unavailable
if self.path.exists():
return self.path.read_text()
return "# Timmy Hot Memory\n\nNo memories stored yet.\n"
def update_section(self, section: str, content: str) -> None:
"""Update a specific section in MEMORY.md."""
"""Update a specific section in MEMORY.md.
DEPRECATED: Hot memory is now computed from the database.
This method is kept for backward compatibility during transition.
Use memory_write() to store facts in the database.
"""
logger.warning(
"HotMemory.update_section() is deprecated. "
"Use memory_write() to store facts in the database."
)
# Keep file-writing for backward compatibility during transition
# Guard against empty or excessively large writes
if not content or not content.strip():
logger.warning("HotMemory: Refusing empty write to section '%s'", section)
@@ -631,6 +691,9 @@ class HotMemory:
logger.warning("HotMemory: Truncating oversized write to section '%s'", section)
content = content[:2000] + "\n... [truncated]"
if not self.path.exists():
self._create_default()
full_content = self.read()
# Find section
@@ -655,7 +718,12 @@ class HotMemory:
logger.info("HotMemory: Updated section '%s'", section)
def _create_default(self) -> None:
"""Create default MEMORY.md if missing."""
"""Create default MEMORY.md if missing.
DEPRECATED: Hot memory is now computed from the database.
This method is kept for backward compatibility during transition.
"""
logger.debug("HotMemory._create_default() - creating default MEMORY.md for backward compatibility")
default_content = """# Timmy Hot Memory
> Working RAM — always loaded, ~300 lines max, pruned monthly
@@ -770,28 +838,33 @@ class VaultMemory:
return filepath.read_text()
def update_user_profile(self, key: str, value: str) -> None:
"""Update a field in user_profile.md."""
"""Update a field in user_profile.md.
DEPRECATED: User profile updates should now use memory_write() to store
facts in the database. This method is kept for backward compatibility.
"""
logger.warning(
"VaultMemory.update_user_profile() is deprecated. "
"Use memory_write() to store user facts in the database."
)
# Still update the file for backward compatibility during transition
profile_path = self.path / "self" / "user_profile.md"
if not profile_path.exists():
# Create default profile
self._create_default_profile()
content = profile_path.read_text()
# Simple pattern replacement — use lambda to avoid regex-interpreting value
pattern = rf"(\*\*{re.escape(key)}:\*\*).*"
if re.search(pattern, content):
safe_value = value.strip()
content = re.sub(pattern, lambda m: f"{m.group(1)} {safe_value}", content)
else:
# Add to Important Facts section
facts_section = "## Important Facts"
if facts_section in content:
insert_point = content.find(facts_section) + len(facts_section)
content = content[:insert_point] + f"\n- {key}: {value}" + content[insert_point:]
# Update last_updated
content = re.sub(
r"\*Last updated:.*\*",
f"*Last updated: {datetime.now(UTC).strftime('%Y-%m-%d')}*",
@@ -870,20 +943,23 @@ class SemanticMemory:
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
memory_type TEXT NOT NULL DEFAULT 'fact',
source TEXT NOT NULL DEFAULT 'agent',
context_type TEXT NOT NULL DEFAULT 'conversation',
embedding TEXT,
metadata TEXT,
source_hash TEXT,
agent_id TEXT,
task_id TEXT,
session_id TEXT,
metadata TEXT,
embedding TEXT,
timestamp TEXT NOT NULL,
access_count INTEGER NOT NULL DEFAULT 0,
last_accessed TEXT
confidence REAL NOT NULL DEFAULT 0.8,
tags TEXT NOT NULL DEFAULT '[]',
created_at TEXT NOT NULL,
last_accessed TEXT,
access_count INTEGER NOT NULL DEFAULT 0
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(context_type)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_timestamp ON memories(timestamp)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_type ON memories(memory_type)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_time ON memories(created_at)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memories_source ON memories(source)")
conn.commit()
yield conn
@@ -892,7 +968,7 @@ class SemanticMemory:
"""Initialize database at self.db_path (backward compatibility).
This method is kept for backward compatibility with existing code and tests.
Creates the memories table schema at the instance's db_path.
Schema creation is handled by _get_conn.
"""
# Trigger schema creation via _get_conn
with self._get_conn():
@@ -909,7 +985,7 @@ class SemanticMemory:
with self._get_conn() as conn:
# Check if already indexed with same hash
cursor = conn.execute(
"SELECT metadata FROM memories WHERE source = ? AND context_type = 'chunk' LIMIT 1",
"SELECT metadata FROM memories WHERE source = ? AND memory_type = 'vault_chunk' LIMIT 1",
(str(filepath),)
)
existing = cursor.fetchone()
@@ -923,7 +999,7 @@ class SemanticMemory:
# Delete old chunks for this file
conn.execute(
"DELETE FROM memories WHERE source = ? AND context_type = 'chunk'",
"DELETE FROM memories WHERE source = ? AND memory_type = 'vault_chunk'",
(str(filepath),)
)
@@ -941,13 +1017,13 @@ class SemanticMemory:
conn.execute(
"""INSERT INTO memories
(id, content, source, context_type, metadata, embedding, timestamp)
(id, content, memory_type, source, metadata, embedding, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?)""",
(
chunk_id,
chunk_text,
"vault_chunk",
str(filepath),
"chunk",
json.dumps({"source_hash": file_hash, "chunk_index": i}),
json.dumps(chunk_embedding),
now,
@@ -1011,9 +1087,9 @@ class SemanticMemory:
with self._get_conn() as conn:
conn.row_factory = sqlite3.Row
# Get all chunks
# Get all vault chunks
rows = conn.execute(
"SELECT source, content, embedding FROM memories WHERE context_type = 'chunk'"
"SELECT source, content, embedding FROM memories WHERE memory_type = 'vault_chunk'"
).fetchall()
# Calculate similarities
@@ -1056,7 +1132,7 @@ class SemanticMemory:
"""Get indexing statistics."""
with self._get_conn() as conn:
cursor = conn.execute(
"SELECT COUNT(*), COUNT(DISTINCT source) FROM memories WHERE context_type = 'chunk'"
"SELECT COUNT(*), COUNT(DISTINCT source) FROM memories WHERE memory_type = 'vault_chunk'"
)
total_chunks, total_files = cursor.fetchone()
@@ -1289,13 +1365,17 @@ class MemorySystem:
self.session_decisions: list[str] = []
def end_session(self, summary: str) -> None:
"""End session (retained for API compatibility)."""
# Update hot memory
self.hot.update_section(
"Current Session",
f"**Last Session:** {datetime.now(UTC).strftime('%Y-%m-%d %H:%M')}\n"
+ f"**Summary:** {summary[:100]}...",
)
"""End session - stores session summary in database."""
# Store session end fact in database instead of updating MEMORY.md
try:
store_memory(
content=f"Session ended: {summary}",
source="system",
context_type="conversation",
metadata={"event": "session_end"},
)
except Exception as exc:
logger.warning("Failed to store session end: %s", exc)
logger.info("MemorySystem: Session ended")
@@ -1304,40 +1384,17 @@ class MemorySystem:
self.session_decisions.append(decision)
def update_user_fact(self, key: str, value: str) -> None:
"""Update user profile in vault."""
self.vault.update_user_profile(key, value)
# Also update hot memory
if key.lower() == "name":
self.hot.update_section("User Profile", f"**Name:** {value}")
def _load_user_profile_summary(self) -> str:
"""Load condensed user profile."""
profile_path = self.vault.path / "self" / "user_profile.md"
if not profile_path.exists():
return ""
content = profile_path.read_text()
# Extract key fields
summary_parts = []
# Name
name_match = re.search(r"\*\*Name:\*\* (.+)", content)
if name_match and "unknown" not in name_match.group(1).lower():
summary_parts.append(f"Name: {name_match.group(1).strip()}")
# Interests
interests_section = re.search(r"## Interests.*?\n- (.+?)(?=\n## |\Z)", content, re.DOTALL)
if interests_section:
interests = [
i.strip()
for i in interests_section.group(1).split("\n-")
if i.strip() and "to be" not in i
]
if interests:
summary_parts.append(f"Interests: {', '.join(interests[:3])}")
return "\n".join(summary_parts) if summary_parts else ""
"""Update user fact in the database."""
# Store as a fact in the database
try:
store_memory(
content=f"{key}: {value}",
source="system",
context_type="fact",
metadata={"key": key, "value": value},
)
except Exception as exc:
logger.warning("Failed to store user fact: %s", exc)
def read_soul(self) -> str:
"""Read soul.md — Timmy's core identity. Returns empty string if missing."""
@@ -1357,41 +1414,14 @@ class MemorySystem:
if soul_content:
context_parts.append("## Soul Identity\n" + soul_content)
# 1. Hot memory
# 1. Hot memory (computed from DB facts)
hot_content = self.hot.read()
context_parts.append("## Hot Memory\n" + hot_content)
# 2. User profile (key fields only)
profile = self._load_user_profile_summary()
if profile:
context_parts.append("## User Context\n" + profile)
# 3. Known facts from long-term memory
facts_section = self._load_known_facts()
if facts_section:
context_parts.append(facts_section)
# 2. User facts are now included in hot memory via recall_personal_facts
return "\n\n---\n\n".join(context_parts)
def _load_known_facts(self, limit: int = 10) -> str:
"""Load top facts from the memories table (context_type='fact').
Returns a formatted section string, or empty string if no facts exist.
"""
try:
facts = recall_personal_facts()
if not facts:
return ""
# Cap at limit
facts = facts[:limit]
lines = ["## Known Facts\n"]
for fact in facts:
lines.append(f"- {fact[:200]}")
return "\n".join(lines)
except Exception as exc:
logger.debug("Failed to load known facts: %s", exc)
return ""
# ── Lazy singleton ────────────────────────────────────────────────────────────
_memory_system: MemorySystem | None = None

View File

@@ -288,10 +288,10 @@ class TestSemanticMemory:
mem.index_file(md_file)
# Check DB directly - tiny chunks should NOT be stored
# After consolidation: chunks are stored in 'memories' table with context_type='chunk'
# After consolidation: chunks are stored in 'memories' table with memory_type='vault_chunk'
conn = sqlite3.connect(str(mem.db_path))
cursor = conn.execute(
"SELECT COUNT(*) FROM memories WHERE source = ? AND context_type = 'chunk'",
"SELECT COUNT(*) FROM memories WHERE source = ? AND memory_type = 'vault_chunk'",
(str(md_file),)
)
stored_count = cursor.fetchone()[0]
@@ -325,15 +325,15 @@ class TestSemanticMemory:
import sqlite3
conn = sqlite3.connect(str(mem.db_path))
# After consolidation: chunks are stored in 'memories' table with context_type='chunk'
conn.execute("DELETE FROM memories WHERE context_type = 'chunk'")
# After consolidation: chunks are stored in 'memories' table with memory_type='vault_chunk'
conn.execute("DELETE FROM memories WHERE memory_type = 'vault_chunk'")
conn.commit()
conn.close()
mem.index_vault()
conn = sqlite3.connect(str(mem.db_path))
rows = conn.execute(
"SELECT DISTINCT source FROM memories WHERE context_type = 'chunk'"
"SELECT DISTINCT source FROM memories WHERE memory_type = 'vault_chunk'"
).fetchall()
conn.close()
sources = [r[0] for r in rows]