feat: code quality audit + autoresearch integration + infra hardening (#150)

This commit is contained in:
Alexander Whitestone
2026-03-08 12:50:44 -04:00
committed by GitHub
parent fd0ede0d51
commit ae3bb1cc21
186 changed files with 5129 additions and 3289 deletions

View File

@@ -25,11 +25,12 @@ def _get_model():
global _model, _has_embeddings
if _has_embeddings is False:
return None
if _model is not None:
return _model
from config import settings
# In test mode or low-memory environments, skip embedding model load
if settings.timmy_skip_embeddings:
_has_embeddings = False
@@ -37,7 +38,8 @@ def _get_model():
try:
from sentence_transformers import SentenceTransformer
_model = SentenceTransformer('all-MiniLM-L6-v2')
_model = SentenceTransformer("all-MiniLM-L6-v2")
_has_embeddings = True
return _model
except (ImportError, RuntimeError, Exception):
@@ -56,7 +58,7 @@ def _get_embedding_dimension() -> int:
def _compute_embedding(text: str) -> list[float]:
"""Compute embedding vector for text.
Uses sentence-transformers if available, otherwise returns
a simple hash-based vector for basic similarity.
"""
@@ -66,30 +68,31 @@ def _compute_embedding(text: str) -> list[float]:
return model.encode(text).tolist()
except Exception:
pass
# Fallback: simple character n-gram hash embedding
# Not as good but allows the system to work without heavy deps
dim = 384
vec = [0.0] * dim
text = text.lower()
# Generate character trigram features
for i in range(len(text) - 2):
trigram = text[i:i+3]
trigram = text[i : i + 3]
hash_val = hash(trigram) % dim
vec[hash_val] += 1.0
# Normalize
norm = sum(x*x for x in vec) ** 0.5
norm = sum(x * x for x in vec) ** 0.5
if norm > 0:
vec = [x/norm for x in vec]
vec = [x / norm for x in vec]
return vec
@dataclass
class MemoryEntry:
"""A memory entry with vector embedding."""
id: str = field(default_factory=lambda: str(uuid.uuid4()))
content: str = "" # The actual text content
source: str = "" # Where it came from (agent, user, system)
@@ -99,9 +102,7 @@ class MemoryEntry:
session_id: Optional[str] = None
metadata: Optional[dict] = None
embedding: Optional[list[float]] = None
timestamp: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat()
)
timestamp: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
relevance_score: Optional[float] = None # Set during search
@@ -110,7 +111,7 @@ def _get_conn() -> sqlite3.Connection:
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(str(DB_PATH))
conn.row_factory = sqlite3.Row
# Try to load sqlite-vss extension
try:
conn.enable_load_extension(True)
@@ -119,7 +120,7 @@ def _get_conn() -> sqlite3.Connection:
_has_vss = True
except Exception:
_has_vss = False
# Create tables
conn.execute(
"""
@@ -137,24 +138,14 @@ def _get_conn() -> sqlite3.Connection:
)
"""
)
# Create indexes
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_agent ON memory_entries(agent_id)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_task ON memory_entries(task_id)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_session ON memory_entries(session_id)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_time ON memory_entries(timestamp)"
)
conn.execute(
"CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(context_type)"
)
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_agent ON memory_entries(agent_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_task ON memory_entries(task_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_session ON memory_entries(session_id)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_time ON memory_entries(timestamp)")
conn.execute("CREATE INDEX IF NOT EXISTS idx_memory_type ON memory_entries(context_type)")
conn.commit()
return conn
@@ -170,7 +161,7 @@ def store_memory(
compute_embedding: bool = True,
) -> MemoryEntry:
"""Store a memory entry with optional embedding.
Args:
content: The text content to store
source: Source of the memory (agent name, user, system)
@@ -180,14 +171,14 @@ def store_memory(
session_id: Session identifier
metadata: Additional structured data
compute_embedding: Whether to compute vector embedding
Returns:
The stored MemoryEntry
"""
embedding = None
if compute_embedding:
embedding = _compute_embedding(content)
entry = MemoryEntry(
content=content,
source=source,
@@ -198,7 +189,7 @@ def store_memory(
metadata=metadata,
embedding=embedding,
)
conn = _get_conn()
conn.execute(
"""
@@ -222,7 +213,7 @@ def store_memory(
)
conn.commit()
conn.close()
return entry
@@ -235,7 +226,7 @@ def search_memories(
min_relevance: float = 0.0,
) -> list[MemoryEntry]:
"""Search for memories by semantic similarity.
Args:
query: Search query text
limit: Maximum results
@@ -243,18 +234,18 @@ def search_memories(
agent_id: Filter by agent
session_id: Filter by session
min_relevance: Minimum similarity score (0-1)
Returns:
List of MemoryEntry objects sorted by relevance
"""
query_embedding = _compute_embedding(query)
conn = _get_conn()
# Build query with filters
conditions = []
params = []
if context_type:
conditions.append("context_type = ?")
params.append(context_type)
@@ -264,9 +255,9 @@ def search_memories(
if session_id:
conditions.append("session_id = ?")
params.append(session_id)
where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
# Fetch candidates (we'll do in-memory similarity for now)
# For production with sqlite-vss, this would use vector similarity index
query_sql = f"""
@@ -276,10 +267,10 @@ def search_memories(
LIMIT ?
"""
params.append(limit * 3) # Get more candidates for ranking
rows = conn.execute(query_sql, params).fetchall()
conn.close()
# Compute similarity scores
results = []
for row in rows:
@@ -295,7 +286,7 @@ def search_memories(
embedding=json.loads(row["embedding"]) if row["embedding"] else None,
timestamp=row["timestamp"],
)
if entry.embedding:
# Cosine similarity
score = _cosine_similarity(query_embedding, entry.embedding)
@@ -308,7 +299,7 @@ def search_memories(
entry.relevance_score = score
if score >= min_relevance:
results.append(entry)
# Sort by relevance and return top results
results.sort(key=lambda x: x.relevance_score or 0, reverse=True)
return results[:limit]
@@ -316,9 +307,9 @@ def search_memories(
def _cosine_similarity(a: list[float], b: list[float]) -> float:
"""Compute cosine similarity between two vectors."""
dot = sum(x*y for x, y in zip(a, b))
norm_a = sum(x*x for x in a) ** 0.5
norm_b = sum(x*x for x in b) ** 0.5
dot = sum(x * y for x, y in zip(a, b))
norm_a = sum(x * x for x in a) ** 0.5
norm_b = sum(x * x for x in b) ** 0.5
if norm_a == 0 or norm_b == 0:
return 0.0
return dot / (norm_a * norm_b)
@@ -334,51 +325,47 @@ def _keyword_overlap(query: str, content: str) -> float:
return overlap / len(query_words)
def get_memory_context(
query: str,
max_tokens: int = 2000,
**filters
) -> str:
def get_memory_context(query: str, max_tokens: int = 2000, **filters) -> str:
"""Get relevant memory context as formatted text for LLM prompts.
Args:
query: Search query
max_tokens: Approximate maximum tokens to return
**filters: Additional filters (agent_id, session_id, etc.)
Returns:
Formatted context string for inclusion in prompts
"""
memories = search_memories(query, limit=20, **filters)
context_parts = []
total_chars = 0
max_chars = max_tokens * 4 # Rough approximation
for mem in memories:
formatted = f"[{mem.source}]: {mem.content}"
if total_chars + len(formatted) > max_chars:
break
context_parts.append(formatted)
total_chars += len(formatted)
if not context_parts:
return ""
return "Relevant context from memory:\n" + "\n\n".join(context_parts)
def recall_personal_facts(agent_id: Optional[str] = None) -> list[str]:
"""Recall personal facts about the user or system.
Args:
agent_id: Optional agent filter
Returns:
List of fact strings
"""
conn = _get_conn()
if agent_id:
rows = conn.execute(
"""
@@ -398,7 +385,7 @@ def recall_personal_facts(agent_id: Optional[str] = None) -> list[str]:
LIMIT 100
""",
).fetchall()
conn.close()
return [r["content"] for r in rows]
@@ -434,11 +421,11 @@ def update_personal_fact(memory_id: str, new_content: str) -> bool:
def store_personal_fact(fact: str, agent_id: Optional[str] = None) -> MemoryEntry:
"""Store a personal fact about the user or system.
Args:
fact: The fact to store
agent_id: Associated agent
Returns:
The stored MemoryEntry
"""
@@ -453,7 +440,7 @@ def store_personal_fact(fact: str, agent_id: Optional[str] = None) -> MemoryEntr
def delete_memory(memory_id: str) -> bool:
"""Delete a memory entry by ID.
Returns:
True if deleted, False if not found
"""
@@ -470,29 +457,27 @@ def delete_memory(memory_id: str) -> bool:
def get_memory_stats() -> dict:
"""Get statistics about the memory store.
Returns:
Dict with counts by type, total entries, etc.
"""
conn = _get_conn()
total = conn.execute(
"SELECT COUNT(*) as count FROM memory_entries"
).fetchone()["count"]
total = conn.execute("SELECT COUNT(*) as count FROM memory_entries").fetchone()["count"]
by_type = {}
rows = conn.execute(
"SELECT context_type, COUNT(*) as count FROM memory_entries GROUP BY context_type"
).fetchall()
for row in rows:
by_type[row["context_type"]] = row["count"]
with_embeddings = conn.execute(
"SELECT COUNT(*) as count FROM memory_entries WHERE embedding IS NOT NULL"
).fetchone()["count"]
conn.close()
return {
"total_entries": total,
"by_type": by_type,
@@ -503,20 +488,20 @@ def get_memory_stats() -> dict:
def prune_memories(older_than_days: int = 90, keep_facts: bool = True) -> int:
"""Delete old memories to manage storage.
Args:
older_than_days: Delete memories older than this
keep_facts: Whether to preserve fact-type memories
Returns:
Number of entries deleted
"""
from datetime import timedelta
cutoff = (datetime.now(timezone.utc) - timedelta(days=older_than_days)).isoformat()
conn = _get_conn()
if keep_facts:
cursor = conn.execute(
"""
@@ -530,9 +515,9 @@ def prune_memories(older_than_days: int = 90, keep_facts: bool = True) -> int:
"DELETE FROM memory_entries WHERE timestamp < ?",
(cutoff,),
)
deleted = cursor.rowcount
conn.commit()
conn.close()
return deleted