forked from Rockachopa/Timmy-time-dashboard
This commit is contained in:
@@ -1 +1,7 @@
|
||||
"""Memory — Persistent conversation and knowledge memory."""
|
||||
"""Memory — Persistent conversation and knowledge memory.
|
||||
|
||||
Sub-modules:
|
||||
embeddings — text-to-vector embedding + similarity functions
|
||||
unified — unified memory schema and connection management
|
||||
vector_store — backward compatibility re-exports from memory_system
|
||||
"""
|
||||
|
||||
88
src/timmy/memory/embeddings.py
Normal file
88
src/timmy/memory/embeddings.py
Normal file
@@ -0,0 +1,88 @@
|
||||
"""Embedding functions for Timmy's memory system.
|
||||
|
||||
Provides text-to-vector embedding using sentence-transformers (preferred)
|
||||
with a deterministic hash-based fallback when the ML library is unavailable.
|
||||
|
||||
Also includes vector similarity utilities (cosine similarity, keyword overlap).
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
import math
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Embedding model - small, fast, local
|
||||
EMBEDDING_MODEL = None
|
||||
EMBEDDING_DIM = 384 # MiniLM dimension
|
||||
|
||||
|
||||
def _get_embedding_model():
|
||||
"""Lazy-load embedding model."""
|
||||
global EMBEDDING_MODEL
|
||||
if EMBEDDING_MODEL is None:
|
||||
try:
|
||||
from config import settings
|
||||
|
||||
if settings.timmy_skip_embeddings:
|
||||
EMBEDDING_MODEL = False
|
||||
return EMBEDDING_MODEL
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
|
||||
logger.info("MemorySystem: Loaded embedding model")
|
||||
except ImportError:
|
||||
logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
|
||||
EMBEDDING_MODEL = False # Use fallback
|
||||
return EMBEDDING_MODEL
|
||||
|
||||
|
||||
def _simple_hash_embedding(text: str) -> list[float]:
|
||||
"""Fallback: Simple hash-based embedding when transformers unavailable."""
|
||||
words = text.lower().split()
|
||||
vec = [0.0] * 128
|
||||
for i, word in enumerate(words[:50]): # First 50 words
|
||||
h = hashlib.md5(word.encode()).hexdigest()
|
||||
for j in range(8):
|
||||
idx = (i * 8 + j) % 128
|
||||
vec[idx] += int(h[j * 2 : j * 2 + 2], 16) / 255.0
|
||||
# Normalize
|
||||
mag = math.sqrt(sum(x * x for x in vec)) or 1.0
|
||||
return [x / mag for x in vec]
|
||||
|
||||
|
||||
def embed_text(text: str) -> list[float]:
|
||||
"""Generate embedding for text."""
|
||||
model = _get_embedding_model()
|
||||
if model and model is not False:
|
||||
embedding = model.encode(text)
|
||||
return embedding.tolist()
|
||||
return _simple_hash_embedding(text)
|
||||
|
||||
|
||||
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
||||
"""Calculate cosine similarity between two vectors."""
|
||||
dot = sum(x * y for x, y in zip(a, b, strict=False))
|
||||
mag_a = math.sqrt(sum(x * x for x in a))
|
||||
mag_b = math.sqrt(sum(x * x for x in b))
|
||||
if mag_a == 0 or mag_b == 0:
|
||||
return 0.0
|
||||
return dot / (mag_a * mag_b)
|
||||
|
||||
|
||||
# Alias for backward compatibility
|
||||
_cosine_similarity = cosine_similarity
|
||||
|
||||
|
||||
def _keyword_overlap(query: str, content: str) -> float:
|
||||
"""Simple keyword overlap score as fallback."""
|
||||
query_words = set(query.lower().split())
|
||||
content_words = set(content.lower().split())
|
||||
if not query_words:
|
||||
return 0.0
|
||||
overlap = len(query_words & content_words)
|
||||
return overlap / len(query_words)
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
Architecture:
|
||||
- Database: Single `memories` table with unified schema
|
||||
- Embeddings: Local sentence-transformers with hash fallback
|
||||
- Embeddings: timmy.memory.embeddings (extracted)
|
||||
- CRUD: store_memory, search_memories, delete_memory, etc.
|
||||
- Tool functions: memory_search, memory_read, memory_write, memory_forget
|
||||
- Classes: HotMemory, VaultMemory, MemorySystem, SemanticMemory, MemorySearcher
|
||||
@@ -11,7 +11,6 @@ Architecture:
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import re
|
||||
import sqlite3
|
||||
import uuid
|
||||
@@ -21,6 +20,17 @@ from dataclasses import dataclass, field
|
||||
from datetime import UTC, datetime, timedelta
|
||||
from pathlib import Path
|
||||
|
||||
from timmy.memory.embeddings import (
|
||||
EMBEDDING_DIM,
|
||||
EMBEDDING_MODEL, # noqa: F401 — re-exported for backward compatibility
|
||||
_cosine_similarity, # noqa: F401 — re-exported for backward compatibility
|
||||
_get_embedding_model,
|
||||
_keyword_overlap,
|
||||
_simple_hash_embedding, # noqa: F401 — re-exported for backward compatibility
|
||||
cosine_similarity,
|
||||
embed_text,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Paths
|
||||
@@ -30,86 +40,6 @@ VAULT_PATH = PROJECT_ROOT / "memory"
|
||||
SOUL_PATH = VAULT_PATH / "self" / "soul.md"
|
||||
DB_PATH = PROJECT_ROOT / "data" / "memory.db"
|
||||
|
||||
# Embedding model - small, fast, local
|
||||
EMBEDDING_MODEL = None
|
||||
EMBEDDING_DIM = 384 # MiniLM dimension
|
||||
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────────
|
||||
# Embedding Functions
|
||||
# ───────────────────────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _get_embedding_model():
|
||||
"""Lazy-load embedding model."""
|
||||
global EMBEDDING_MODEL
|
||||
if EMBEDDING_MODEL is None:
|
||||
try:
|
||||
from config import settings
|
||||
|
||||
if settings.timmy_skip_embeddings:
|
||||
EMBEDDING_MODEL = False
|
||||
return EMBEDDING_MODEL
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
|
||||
logger.info("MemorySystem: Loaded embedding model")
|
||||
except ImportError:
|
||||
logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
|
||||
EMBEDDING_MODEL = False # Use fallback
|
||||
return EMBEDDING_MODEL
|
||||
|
||||
|
||||
def _simple_hash_embedding(text: str) -> list[float]:
|
||||
"""Fallback: Simple hash-based embedding when transformers unavailable."""
|
||||
words = text.lower().split()
|
||||
vec = [0.0] * 128
|
||||
for i, word in enumerate(words[:50]): # First 50 words
|
||||
h = hashlib.md5(word.encode()).hexdigest()
|
||||
for j in range(8):
|
||||
idx = (i * 8 + j) % 128
|
||||
vec[idx] += int(h[j * 2 : j * 2 + 2], 16) / 255.0
|
||||
# Normalize
|
||||
mag = math.sqrt(sum(x * x for x in vec)) or 1.0
|
||||
return [x / mag for x in vec]
|
||||
|
||||
|
||||
def embed_text(text: str) -> list[float]:
|
||||
"""Generate embedding for text."""
|
||||
model = _get_embedding_model()
|
||||
if model and model is not False:
|
||||
embedding = model.encode(text)
|
||||
return embedding.tolist()
|
||||
return _simple_hash_embedding(text)
|
||||
|
||||
|
||||
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
||||
"""Calculate cosine similarity between two vectors."""
|
||||
dot = sum(x * y for x, y in zip(a, b, strict=False))
|
||||
mag_a = math.sqrt(sum(x * x for x in a))
|
||||
mag_b = math.sqrt(sum(x * x for x in b))
|
||||
if mag_a == 0 or mag_b == 0:
|
||||
return 0.0
|
||||
return dot / (mag_a * mag_b)
|
||||
|
||||
|
||||
# Alias for backward compatibility
|
||||
_cosine_similarity = cosine_similarity
|
||||
|
||||
|
||||
def _keyword_overlap(query: str, content: str) -> float:
|
||||
"""Simple keyword overlap score as fallback."""
|
||||
query_words = set(query.lower().split())
|
||||
content_words = set(content.lower().split())
|
||||
if not query_words:
|
||||
return 0.0
|
||||
overlap = len(query_words & content_words)
|
||||
return overlap / len(query_words)
|
||||
|
||||
|
||||
# ───────────────────────────────────────────────────────────────────────────────
|
||||
# Database Connection
|
||||
|
||||
Reference in New Issue
Block a user