forked from Rockachopa/Timmy-time-dashboard
- Replace dead `from swarm` imports in tools_delegation and tools_intro with working implementations sourced from _PERSONAS - Add `memory_write` tool so the agent can actually persist memories when users ask it to remember something - Enhance `memory_search` to search both vault files AND the runtime vector store for cross-channel recall (Discord/web/Telegram) - Add memory management config: memory_prune_days, memory_prune_keep_facts, memory_vault_max_mb - Auto-prune old vector store entries and warn on vault size at startup - Update tests for new delegation agent list (mace removed) Co-authored-by: Trip T <trip@local> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
402 lines
13 KiB
Python
402 lines
13 KiB
Python
"""Tier 3: Semantic Memory — Vector search over vault files.
|
|
|
|
Uses lightweight local embeddings (no cloud) for similarity search
|
|
over all vault content. This is the "escape valve" when hot memory
|
|
doesn't have the answer.
|
|
|
|
Architecture:
|
|
- Indexes all markdown files in memory/ nightly or on-demand
|
|
- Uses sentence-transformers (local, no API calls)
|
|
- Stores vectors in SQLite (no external vector DB needed)
|
|
- memory_search() retrieves relevant context by similarity
|
|
"""
|
|
|
|
import hashlib
|
|
import json
|
|
import logging
|
|
import sqlite3
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Paths
|
|
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
|
VAULT_PATH = PROJECT_ROOT / "memory"
|
|
SEMANTIC_DB_PATH = PROJECT_ROOT / "data" / "semantic_memory.db"
|
|
|
|
# Embedding model - small, fast, local
|
|
# Using 'all-MiniLM-L6-v2' (~80MB) or fallback to simple keyword matching
|
|
EMBEDDING_MODEL = None
|
|
EMBEDDING_DIM = 384 # MiniLM dimension
|
|
|
|
|
|
def _get_embedding_model():
|
|
"""Lazy-load embedding model."""
|
|
global EMBEDDING_MODEL
|
|
if EMBEDDING_MODEL is None:
|
|
from config import settings
|
|
if settings.timmy_skip_embeddings:
|
|
EMBEDDING_MODEL = False
|
|
return EMBEDDING_MODEL
|
|
try:
|
|
from sentence_transformers import SentenceTransformer
|
|
EMBEDDING_MODEL = SentenceTransformer('all-MiniLM-L6-v2')
|
|
logger.info("SemanticMemory: Loaded embedding model")
|
|
except ImportError:
|
|
logger.warning("SemanticMemory: sentence-transformers not installed, using fallback")
|
|
EMBEDDING_MODEL = False # Use fallback
|
|
return EMBEDDING_MODEL
|
|
|
|
|
|
def _simple_hash_embedding(text: str) -> list[float]:
|
|
"""Fallback: Simple hash-based embedding when transformers unavailable."""
|
|
# Create a deterministic pseudo-embedding from word hashes
|
|
words = text.lower().split()
|
|
vec = [0.0] * 128
|
|
for i, word in enumerate(words[:50]): # First 50 words
|
|
h = hashlib.md5(word.encode()).hexdigest()
|
|
for j in range(8):
|
|
idx = (i * 8 + j) % 128
|
|
vec[idx] += int(h[j*2:j*2+2], 16) / 255.0
|
|
# Normalize
|
|
import math
|
|
mag = math.sqrt(sum(x*x for x in vec)) or 1.0
|
|
return [x/mag for x in vec]
|
|
|
|
|
|
def embed_text(text: str) -> list[float]:
|
|
"""Generate embedding for text."""
|
|
model = _get_embedding_model()
|
|
if model and model is not False:
|
|
embedding = model.encode(text)
|
|
return embedding.tolist()
|
|
else:
|
|
return _simple_hash_embedding(text)
|
|
|
|
|
|
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
"""Calculate cosine similarity between two vectors."""
|
|
import math
|
|
dot = sum(x*y for x, y in zip(a, b))
|
|
mag_a = math.sqrt(sum(x*x for x in a))
|
|
mag_b = math.sqrt(sum(x*x for x in b))
|
|
if mag_a == 0 or mag_b == 0:
|
|
return 0.0
|
|
return dot / (mag_a * mag_b)
|
|
|
|
|
|
@dataclass
|
|
class MemoryChunk:
|
|
"""A searchable chunk of memory."""
|
|
id: str
|
|
source: str # filepath
|
|
content: str
|
|
embedding: list[float]
|
|
created_at: str
|
|
|
|
|
|
class SemanticMemory:
|
|
"""Vector-based semantic search over vault content."""
|
|
|
|
def __init__(self) -> None:
|
|
self.db_path = SEMANTIC_DB_PATH
|
|
self.vault_path = VAULT_PATH
|
|
self._init_db()
|
|
|
|
def _init_db(self) -> None:
|
|
"""Initialize SQLite with vector storage."""
|
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
conn = sqlite3.connect(str(self.db_path))
|
|
conn.execute("""
|
|
CREATE TABLE IF NOT EXISTS chunks (
|
|
id TEXT PRIMARY KEY,
|
|
source TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
embedding TEXT NOT NULL, -- JSON array
|
|
created_at TEXT NOT NULL,
|
|
source_hash TEXT NOT NULL
|
|
)
|
|
""")
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_source ON chunks(source)")
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def index_file(self, filepath: Path) -> int:
|
|
"""Index a single file into semantic memory."""
|
|
if not filepath.exists():
|
|
return 0
|
|
|
|
content = filepath.read_text()
|
|
file_hash = hashlib.md5(content.encode()).hexdigest()
|
|
|
|
# Check if already indexed with same hash
|
|
conn = sqlite3.connect(str(self.db_path))
|
|
cursor = conn.execute(
|
|
"SELECT source_hash FROM chunks WHERE source = ? LIMIT 1",
|
|
(str(filepath),)
|
|
)
|
|
existing = cursor.fetchone()
|
|
if existing and existing[0] == file_hash:
|
|
conn.close()
|
|
return 0 # Already indexed
|
|
|
|
# Delete old chunks for this file
|
|
conn.execute("DELETE FROM chunks WHERE source = ?", (str(filepath),))
|
|
|
|
# Split into chunks (paragraphs)
|
|
chunks = self._split_into_chunks(content)
|
|
|
|
# Index each chunk
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
for i, chunk_text in enumerate(chunks):
|
|
if len(chunk_text.strip()) < 20: # Skip tiny chunks
|
|
continue
|
|
|
|
chunk_id = f"{filepath.stem}_{i}"
|
|
embedding = embed_text(chunk_text)
|
|
|
|
conn.execute(
|
|
"""INSERT INTO chunks (id, source, content, embedding, created_at, source_hash)
|
|
VALUES (?, ?, ?, ?, ?, ?)""",
|
|
(chunk_id, str(filepath), chunk_text, json.dumps(embedding), now, file_hash)
|
|
)
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
logger.info("SemanticMemory: Indexed %s (%d chunks)", filepath.name, len(chunks))
|
|
return len(chunks)
|
|
|
|
def _split_into_chunks(self, text: str, max_chunk_size: int = 500) -> list[str]:
|
|
"""Split text into semantic chunks."""
|
|
# Split by paragraphs first
|
|
paragraphs = text.split('\n\n')
|
|
chunks = []
|
|
|
|
for para in paragraphs:
|
|
para = para.strip()
|
|
if not para:
|
|
continue
|
|
|
|
# If paragraph is small enough, keep as one chunk
|
|
if len(para) <= max_chunk_size:
|
|
chunks.append(para)
|
|
else:
|
|
# Split long paragraphs by sentences
|
|
sentences = para.replace('. ', '.\n').split('\n')
|
|
current_chunk = ""
|
|
|
|
for sent in sentences:
|
|
if len(current_chunk) + len(sent) < max_chunk_size:
|
|
current_chunk += " " + sent if current_chunk else sent
|
|
else:
|
|
if current_chunk:
|
|
chunks.append(current_chunk.strip())
|
|
current_chunk = sent
|
|
|
|
if current_chunk:
|
|
chunks.append(current_chunk.strip())
|
|
|
|
return chunks
|
|
|
|
def index_vault(self) -> int:
|
|
"""Index entire vault directory."""
|
|
total_chunks = 0
|
|
|
|
for md_file in self.vault_path.rglob("*.md"):
|
|
# Skip handoff file (handled separately)
|
|
if "last-session-handoff" in md_file.name:
|
|
continue
|
|
total_chunks += self.index_file(md_file)
|
|
|
|
logger.info("SemanticMemory: Indexed vault (%d total chunks)", total_chunks)
|
|
return total_chunks
|
|
|
|
def search(self, query: str, top_k: int = 5) -> list[tuple[str, float]]:
|
|
"""Search for relevant memory chunks."""
|
|
query_embedding = embed_text(query)
|
|
|
|
conn = sqlite3.connect(str(self.db_path))
|
|
conn.row_factory = sqlite3.Row
|
|
|
|
# Get all chunks (in production, use vector index)
|
|
rows = conn.execute(
|
|
"SELECT source, content, embedding FROM chunks"
|
|
).fetchall()
|
|
|
|
conn.close()
|
|
|
|
# Calculate similarities
|
|
scored = []
|
|
for row in rows:
|
|
embedding = json.loads(row["embedding"])
|
|
score = cosine_similarity(query_embedding, embedding)
|
|
scored.append((row["source"], row["content"], score))
|
|
|
|
# Sort by score descending
|
|
scored.sort(key=lambda x: x[2], reverse=True)
|
|
|
|
# Return top_k
|
|
return [(content, score) for _, content, score in scored[:top_k]]
|
|
|
|
def get_relevant_context(self, query: str, max_chars: int = 2000) -> str:
|
|
"""Get formatted context string for a query."""
|
|
results = self.search(query, top_k=3)
|
|
|
|
if not results:
|
|
return ""
|
|
|
|
parts = []
|
|
total_chars = 0
|
|
|
|
for content, score in results:
|
|
if score < 0.3: # Similarity threshold
|
|
continue
|
|
|
|
chunk = f"[Relevant memory - score {score:.2f}]: {content[:400]}..."
|
|
if total_chars + len(chunk) > max_chars:
|
|
break
|
|
|
|
parts.append(chunk)
|
|
total_chars += len(chunk)
|
|
|
|
return "\n\n".join(parts) if parts else ""
|
|
|
|
def stats(self) -> dict:
|
|
"""Get indexing statistics."""
|
|
conn = sqlite3.connect(str(self.db_path))
|
|
cursor = conn.execute("SELECT COUNT(*), COUNT(DISTINCT source) FROM chunks")
|
|
total_chunks, total_files = cursor.fetchone()
|
|
conn.close()
|
|
|
|
return {
|
|
"total_chunks": total_chunks,
|
|
"total_files": total_files,
|
|
"embedding_dim": EMBEDDING_DIM if _get_embedding_model() else 128,
|
|
}
|
|
|
|
|
|
class MemorySearcher:
|
|
"""High-level interface for memory search."""
|
|
|
|
def __init__(self) -> None:
|
|
self.semantic = SemanticMemory()
|
|
|
|
def search(self, query: str, tiers: list[str] = None) -> dict:
|
|
"""Search across memory tiers.
|
|
|
|
Args:
|
|
query: Search query
|
|
tiers: List of tiers to search ["hot", "vault", "semantic"]
|
|
|
|
Returns:
|
|
Dict with results from each tier
|
|
"""
|
|
tiers = tiers or ["semantic"] # Default to semantic only
|
|
results = {}
|
|
|
|
if "semantic" in tiers:
|
|
semantic_results = self.semantic.search(query, top_k=5)
|
|
results["semantic"] = [
|
|
{"content": content, "score": score}
|
|
for content, score in semantic_results
|
|
]
|
|
|
|
return results
|
|
|
|
def get_context_for_query(self, query: str) -> str:
|
|
"""Get comprehensive context for a user query."""
|
|
# Get semantic context
|
|
semantic_context = self.semantic.get_relevant_context(query)
|
|
|
|
if semantic_context:
|
|
return f"## Relevant Past Context\n\n{semantic_context}"
|
|
|
|
return ""
|
|
|
|
|
|
# Module-level singleton
|
|
semantic_memory = SemanticMemory()
|
|
memory_searcher = MemorySearcher()
|
|
|
|
|
|
def memory_search(query: str, top_k: int = 5) -> str:
|
|
"""Search past conversations, notes, and stored facts for relevant context.
|
|
|
|
Searches across both the vault (indexed markdown files) and the
|
|
runtime memory store (facts and conversation fragments stored via
|
|
memory_write).
|
|
|
|
Args:
|
|
query: What to search for (e.g. "Bitcoin strategy", "server setup").
|
|
top_k: Number of results to return (default 5).
|
|
|
|
Returns:
|
|
Formatted string of relevant memory results.
|
|
"""
|
|
# Guard: model sometimes passes None for top_k
|
|
if top_k is None:
|
|
top_k = 5
|
|
|
|
parts: list[str] = []
|
|
|
|
# 1. Search semantic vault (indexed markdown files)
|
|
vault_results = semantic_memory.search(query, top_k)
|
|
for content, score in vault_results:
|
|
if score < 0.2:
|
|
continue
|
|
parts.append(f"[vault score {score:.2f}] {content[:300]}")
|
|
|
|
# 2. Search runtime vector store (stored facts/conversations)
|
|
try:
|
|
from timmy.memory.vector_store import search_memories
|
|
runtime_results = search_memories(query, limit=top_k, min_relevance=0.2)
|
|
for entry in runtime_results:
|
|
label = entry.context_type or "memory"
|
|
parts.append(f"[{label}] {entry.content[:300]}")
|
|
except Exception as exc:
|
|
logger.debug("Vector store search unavailable: %s", exc)
|
|
|
|
if not parts:
|
|
return "No relevant memories found."
|
|
return "\n\n".join(parts)
|
|
|
|
|
|
def memory_write(content: str, context_type: str = "fact") -> str:
|
|
"""Store a piece of information in persistent memory.
|
|
|
|
Use this tool when the user explicitly asks you to remember something.
|
|
Stored memories are searchable via memory_search across all channels
|
|
(web GUI, Discord, Telegram, etc.).
|
|
|
|
Args:
|
|
content: The information to remember (e.g. a phrase, fact, or note).
|
|
context_type: Type of memory — "fact" for permanent facts,
|
|
"conversation" for conversation context,
|
|
"document" for document fragments.
|
|
|
|
Returns:
|
|
Confirmation that the memory was stored.
|
|
"""
|
|
if not content or not content.strip():
|
|
return "Nothing to store — content is empty."
|
|
|
|
valid_types = ("fact", "conversation", "document")
|
|
if context_type not in valid_types:
|
|
context_type = "fact"
|
|
|
|
try:
|
|
from timmy.memory.vector_store import store_memory
|
|
entry = store_memory(
|
|
content=content.strip(),
|
|
source="agent",
|
|
context_type=context_type,
|
|
)
|
|
return f"Stored in memory (type={context_type}, id={entry.id[:8]}). This is now searchable across all channels."
|
|
except Exception as exc:
|
|
logger.error("Failed to write memory: %s", exc)
|
|
return f"Failed to store memory: {exc}"
|