Compare commits
1 Commits
main
...
claude/iss
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aa3cad5707 |
@@ -1,7 +1,10 @@
|
|||||||
"""Memory — Persistent conversation and knowledge memory.
|
"""Memory — Persistent conversation and knowledge memory.
|
||||||
|
|
||||||
Sub-modules:
|
Sub-modules:
|
||||||
embeddings — text-to-vector embedding + similarity functions
|
embeddings — text-to-vector embedding + similarity functions
|
||||||
unified — unified memory schema and connection management
|
unified — unified memory schema and connection management
|
||||||
vector_store — backward compatibility re-exports from memory_system
|
chain — CRUD operations (store, search, delete, stats)
|
||||||
|
semantic — SemanticMemory and MemorySearcher classes
|
||||||
|
consolidation — HotMemory and VaultMemory classes
|
||||||
|
vector_store — backward compatibility re-exports from memory_system
|
||||||
"""
|
"""
|
||||||
|
|||||||
387
src/timmy/memory/chain.py
Normal file
387
src/timmy/memory/chain.py
Normal file
@@ -0,0 +1,387 @@
|
|||||||
|
"""CRUD operations for Timmy's unified memory database.
|
||||||
|
|
||||||
|
Provides store, search, delete, and management functions for the
|
||||||
|
`memories` table defined in timmy.memory.unified.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
import uuid
|
||||||
|
from contextlib import contextmanager
|
||||||
|
from datetime import UTC, datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from config import settings
|
||||||
|
from timmy.memory.embeddings import (
|
||||||
|
_keyword_overlap,
|
||||||
|
cosine_similarity,
|
||||||
|
embed_text,
|
||||||
|
)
|
||||||
|
from timmy.memory.unified import (
|
||||||
|
DB_PATH,
|
||||||
|
MemoryEntry,
|
||||||
|
_ensure_schema,
|
||||||
|
get_connection,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def store_memory(
|
||||||
|
content: str,
|
||||||
|
source: str,
|
||||||
|
context_type: str = "conversation",
|
||||||
|
agent_id: str | None = None,
|
||||||
|
task_id: str | None = None,
|
||||||
|
session_id: str | None = None,
|
||||||
|
metadata: dict | None = None,
|
||||||
|
compute_embedding: bool = True,
|
||||||
|
) -> MemoryEntry:
|
||||||
|
"""Store a memory entry with optional embedding.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: The text content to store
|
||||||
|
source: Source of the memory (agent name, user, system)
|
||||||
|
context_type: Type of context (conversation, document, fact, vault_chunk)
|
||||||
|
agent_id: Associated agent ID
|
||||||
|
task_id: Associated task ID
|
||||||
|
session_id: Session identifier
|
||||||
|
metadata: Additional structured data
|
||||||
|
compute_embedding: Whether to compute vector embedding
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The stored MemoryEntry
|
||||||
|
"""
|
||||||
|
embedding = None
|
||||||
|
if compute_embedding:
|
||||||
|
embedding = embed_text(content)
|
||||||
|
|
||||||
|
entry = MemoryEntry(
|
||||||
|
content=content,
|
||||||
|
source=source,
|
||||||
|
context_type=context_type,
|
||||||
|
agent_id=agent_id,
|
||||||
|
task_id=task_id,
|
||||||
|
session_id=session_id,
|
||||||
|
metadata=metadata,
|
||||||
|
embedding=embedding,
|
||||||
|
)
|
||||||
|
|
||||||
|
with get_connection() as conn:
|
||||||
|
conn.execute(
|
||||||
|
"""
|
||||||
|
INSERT INTO memories
|
||||||
|
(id, content, memory_type, source, agent_id, task_id, session_id,
|
||||||
|
metadata, embedding, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""",
|
||||||
|
(
|
||||||
|
entry.id,
|
||||||
|
entry.content,
|
||||||
|
entry.context_type, # DB column is memory_type
|
||||||
|
entry.source,
|
||||||
|
entry.agent_id,
|
||||||
|
entry.task_id,
|
||||||
|
entry.session_id,
|
||||||
|
json.dumps(metadata) if metadata else None,
|
||||||
|
json.dumps(embedding) if embedding else None,
|
||||||
|
entry.timestamp,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
return entry
|
||||||
|
|
||||||
|
|
||||||
|
def _build_search_filters(
|
||||||
|
context_type: str | None,
|
||||||
|
agent_id: str | None,
|
||||||
|
session_id: str | None,
|
||||||
|
) -> tuple[str, list]:
|
||||||
|
"""Build SQL WHERE clause and params from search filters."""
|
||||||
|
conditions: list[str] = []
|
||||||
|
params: list = []
|
||||||
|
|
||||||
|
if context_type:
|
||||||
|
conditions.append("memory_type = ?")
|
||||||
|
params.append(context_type)
|
||||||
|
if agent_id:
|
||||||
|
conditions.append("agent_id = ?")
|
||||||
|
params.append(agent_id)
|
||||||
|
if session_id:
|
||||||
|
conditions.append("session_id = ?")
|
||||||
|
params.append(session_id)
|
||||||
|
|
||||||
|
where_clause = "WHERE " + " AND ".join(conditions) if conditions else ""
|
||||||
|
return where_clause, params
|
||||||
|
|
||||||
|
|
||||||
|
def _fetch_memory_candidates(
|
||||||
|
where_clause: str, params: list, candidate_limit: int
|
||||||
|
) -> list[sqlite3.Row]:
|
||||||
|
"""Fetch candidate memory rows from the database."""
|
||||||
|
query_sql = f"""
|
||||||
|
SELECT * FROM memories
|
||||||
|
{where_clause}
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT ?
|
||||||
|
"""
|
||||||
|
params.append(candidate_limit)
|
||||||
|
|
||||||
|
with get_connection() as conn:
|
||||||
|
return conn.execute(query_sql, params).fetchall()
|
||||||
|
|
||||||
|
|
||||||
|
def _row_to_entry(row: sqlite3.Row) -> MemoryEntry:
|
||||||
|
"""Convert a database row to a MemoryEntry."""
|
||||||
|
return MemoryEntry(
|
||||||
|
id=row["id"],
|
||||||
|
content=row["content"],
|
||||||
|
source=row["source"],
|
||||||
|
context_type=row["memory_type"], # DB column -> API field
|
||||||
|
agent_id=row["agent_id"],
|
||||||
|
task_id=row["task_id"],
|
||||||
|
session_id=row["session_id"],
|
||||||
|
metadata=json.loads(row["metadata"]) if row["metadata"] else None,
|
||||||
|
embedding=json.loads(row["embedding"]) if row["embedding"] else None,
|
||||||
|
timestamp=row["created_at"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _score_and_filter(
|
||||||
|
rows: list[sqlite3.Row],
|
||||||
|
query: str,
|
||||||
|
query_embedding: list[float],
|
||||||
|
min_relevance: float,
|
||||||
|
) -> list[MemoryEntry]:
|
||||||
|
"""Score candidate rows by similarity and filter by min_relevance."""
|
||||||
|
results = []
|
||||||
|
for row in rows:
|
||||||
|
entry = _row_to_entry(row)
|
||||||
|
|
||||||
|
if entry.embedding:
|
||||||
|
score = cosine_similarity(query_embedding, entry.embedding)
|
||||||
|
else:
|
||||||
|
score = _keyword_overlap(query, entry.content)
|
||||||
|
|
||||||
|
entry.relevance_score = score
|
||||||
|
if score >= min_relevance:
|
||||||
|
results.append(entry)
|
||||||
|
|
||||||
|
results.sort(key=lambda x: x.relevance_score or 0, reverse=True)
|
||||||
|
return results
|
||||||
|
|
||||||
|
|
||||||
|
def search_memories(
|
||||||
|
query: str,
|
||||||
|
limit: int = 10,
|
||||||
|
context_type: str | None = None,
|
||||||
|
agent_id: str | None = None,
|
||||||
|
session_id: str | None = None,
|
||||||
|
min_relevance: float = 0.0,
|
||||||
|
) -> list[MemoryEntry]:
|
||||||
|
"""Search for memories by semantic similarity.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Search query text
|
||||||
|
limit: Maximum results
|
||||||
|
context_type: Filter by memory type (maps to DB memory_type column)
|
||||||
|
agent_id: Filter by agent
|
||||||
|
session_id: Filter by session
|
||||||
|
min_relevance: Minimum similarity score (0-1)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of MemoryEntry objects sorted by relevance
|
||||||
|
"""
|
||||||
|
query_embedding = embed_text(query)
|
||||||
|
where_clause, params = _build_search_filters(context_type, agent_id, session_id)
|
||||||
|
rows = _fetch_memory_candidates(where_clause, params, limit * 3)
|
||||||
|
results = _score_and_filter(rows, query, query_embedding, min_relevance)
|
||||||
|
return results[:limit]
|
||||||
|
|
||||||
|
|
||||||
|
def delete_memory(memory_id: str) -> bool:
|
||||||
|
"""Delete a memory entry by ID.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if deleted, False if not found
|
||||||
|
"""
|
||||||
|
with get_connection() as conn:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"DELETE FROM memories WHERE id = ?",
|
||||||
|
(memory_id,),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return cursor.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
def get_memory_stats() -> dict:
|
||||||
|
"""Get statistics about the memory store.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with counts by type, total entries, etc.
|
||||||
|
"""
|
||||||
|
from timmy.memory.embeddings import _get_embedding_model
|
||||||
|
|
||||||
|
with get_connection() as conn:
|
||||||
|
total = conn.execute("SELECT COUNT(*) as count FROM memories").fetchone()["count"]
|
||||||
|
|
||||||
|
by_type = {}
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT memory_type, COUNT(*) as count FROM memories GROUP BY memory_type"
|
||||||
|
).fetchall()
|
||||||
|
for row in rows:
|
||||||
|
by_type[row["memory_type"]] = row["count"]
|
||||||
|
|
||||||
|
with_embeddings = conn.execute(
|
||||||
|
"SELECT COUNT(*) as count FROM memories WHERE embedding IS NOT NULL"
|
||||||
|
).fetchone()["count"]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_entries": total,
|
||||||
|
"by_type": by_type,
|
||||||
|
"with_embeddings": with_embeddings,
|
||||||
|
"has_embedding_model": _get_embedding_model() is not False,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def prune_memories(older_than_days: int = 90, keep_facts: bool = True) -> int:
|
||||||
|
"""Delete old memories to manage storage.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
older_than_days: Delete memories older than this
|
||||||
|
keep_facts: Whether to preserve fact-type memories
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Number of entries deleted
|
||||||
|
"""
|
||||||
|
cutoff = (datetime.now(UTC) - timedelta(days=older_than_days)).isoformat()
|
||||||
|
|
||||||
|
with get_connection() as conn:
|
||||||
|
if keep_facts:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"""
|
||||||
|
DELETE FROM memories
|
||||||
|
WHERE created_at < ? AND memory_type != 'fact'
|
||||||
|
""",
|
||||||
|
(cutoff,),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"DELETE FROM memories WHERE created_at < ?",
|
||||||
|
(cutoff,),
|
||||||
|
)
|
||||||
|
|
||||||
|
deleted = cursor.rowcount
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
return deleted
|
||||||
|
|
||||||
|
|
||||||
|
def get_memory_context(query: str, max_tokens: int = 2000, **filters) -> str:
|
||||||
|
"""Get relevant memory context as formatted text for LLM prompts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Search query
|
||||||
|
max_tokens: Approximate maximum tokens to return
|
||||||
|
**filters: Additional filters (agent_id, session_id, etc.)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Formatted context string for inclusion in prompts
|
||||||
|
"""
|
||||||
|
memories = search_memories(query, limit=20, **filters)
|
||||||
|
|
||||||
|
context_parts = []
|
||||||
|
total_chars = 0
|
||||||
|
max_chars = max_tokens * 4 # Rough approximation
|
||||||
|
|
||||||
|
for mem in memories:
|
||||||
|
formatted = f"[{mem.source}]: {mem.content}"
|
||||||
|
if total_chars + len(formatted) > max_chars:
|
||||||
|
break
|
||||||
|
context_parts.append(formatted)
|
||||||
|
total_chars += len(formatted)
|
||||||
|
|
||||||
|
if not context_parts:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return "Relevant context from memory:\n" + "\n\n".join(context_parts)
|
||||||
|
|
||||||
|
|
||||||
|
def recall_personal_facts(agent_id: str | None = None) -> list[str]:
|
||||||
|
"""Recall personal facts about the user or system.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
agent_id: Optional agent filter
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of fact strings
|
||||||
|
"""
|
||||||
|
with get_connection() as conn:
|
||||||
|
if agent_id:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT content FROM memories
|
||||||
|
WHERE memory_type = 'fact' AND agent_id = ?
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT 100
|
||||||
|
""",
|
||||||
|
(agent_id,),
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"""
|
||||||
|
SELECT content FROM memories
|
||||||
|
WHERE memory_type = 'fact'
|
||||||
|
ORDER BY created_at DESC
|
||||||
|
LIMIT 100
|
||||||
|
""",
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
return [r["content"] for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def recall_personal_facts_with_ids(agent_id: str | None = None) -> list[dict]:
|
||||||
|
"""Recall personal facts with their IDs for edit/delete operations."""
|
||||||
|
with get_connection() as conn:
|
||||||
|
if agent_id:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, content FROM memories WHERE memory_type = 'fact' AND agent_id = ? ORDER BY created_at DESC LIMIT 100",
|
||||||
|
(agent_id,),
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT id, content FROM memories WHERE memory_type = 'fact' ORDER BY created_at DESC LIMIT 100",
|
||||||
|
).fetchall()
|
||||||
|
return [{"id": r["id"], "content": r["content"]} for r in rows]
|
||||||
|
|
||||||
|
|
||||||
|
def update_personal_fact(memory_id: str, new_content: str) -> bool:
|
||||||
|
"""Update a personal fact's content."""
|
||||||
|
with get_connection() as conn:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"UPDATE memories SET content = ? WHERE id = ? AND memory_type = 'fact'",
|
||||||
|
(new_content, memory_id),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
return cursor.rowcount > 0
|
||||||
|
|
||||||
|
|
||||||
|
def store_personal_fact(fact: str, agent_id: str | None = None) -> MemoryEntry:
|
||||||
|
"""Store a personal fact about the user or system.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
fact: The fact to store
|
||||||
|
agent_id: Associated agent
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The stored MemoryEntry
|
||||||
|
"""
|
||||||
|
return store_memory(
|
||||||
|
content=fact,
|
||||||
|
source="system",
|
||||||
|
context_type="fact",
|
||||||
|
agent_id=agent_id,
|
||||||
|
metadata={"auto_extracted": False},
|
||||||
|
)
|
||||||
310
src/timmy/memory/consolidation.py
Normal file
310
src/timmy/memory/consolidation.py
Normal file
@@ -0,0 +1,310 @@
|
|||||||
|
"""Hot and Vault memory classes for Timmy's memory consolidation tier.
|
||||||
|
|
||||||
|
HotMemory: Tier 1 — computed view of top facts from the database.
|
||||||
|
VaultMemory: Tier 2 — structured vault (memory/ directory), append-only markdown.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from timmy.memory.unified import PROJECT_ROOT
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
VAULT_PATH = PROJECT_ROOT / "memory"
|
||||||
|
|
||||||
|
_DEFAULT_HOT_MEMORY_TEMPLATE = """\
|
||||||
|
# Timmy Hot Memory
|
||||||
|
|
||||||
|
> Working RAM — always loaded, ~300 lines max, pruned monthly
|
||||||
|
> Last updated: {date}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Current Status
|
||||||
|
|
||||||
|
**Agent State:** Operational
|
||||||
|
**Mode:** Development
|
||||||
|
**Active Tasks:** 0
|
||||||
|
**Pending Decisions:** None
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Standing Rules
|
||||||
|
|
||||||
|
1. **Sovereignty First** — No cloud dependencies
|
||||||
|
2. **Local-Only Inference** — Ollama on localhost
|
||||||
|
3. **Privacy by Design** — Telemetry disabled
|
||||||
|
4. **Tool Minimalism** — Use tools only when necessary
|
||||||
|
5. **Memory Discipline** — Write handoffs at session end
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Agent Roster
|
||||||
|
|
||||||
|
| Agent | Role | Status |
|
||||||
|
|-------|------|--------|
|
||||||
|
| Timmy | Core | Active |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## User Profile
|
||||||
|
|
||||||
|
**Name:** (not set)
|
||||||
|
**Interests:** (to be learned)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Key Decisions
|
||||||
|
|
||||||
|
(none yet)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Pending Actions
|
||||||
|
|
||||||
|
- [ ] Learn user's name
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Prune date: {prune_date}*
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
class HotMemory:
|
||||||
|
"""Tier 1: Hot memory — computed view of top facts from DB."""
|
||||||
|
|
||||||
|
def __init__(self, path=None) -> None:
|
||||||
|
if path is None:
|
||||||
|
path = PROJECT_ROOT / "MEMORY.md"
|
||||||
|
self.path = path
|
||||||
|
self._content: str | None = None
|
||||||
|
self._last_modified: float | None = None
|
||||||
|
|
||||||
|
def read(self, force_refresh: bool = False) -> str:
|
||||||
|
"""Read hot memory — computed view of top facts + last reflection from DB."""
|
||||||
|
from timmy.memory.chain import recall_personal_facts
|
||||||
|
# Import recall_last_reflection lazily to support patching in memory_system
|
||||||
|
try:
|
||||||
|
# Use the version from memory_system so patches work correctly
|
||||||
|
import timmy.memory_system as _ms
|
||||||
|
recall_last_reflection = _ms.recall_last_reflection
|
||||||
|
except Exception:
|
||||||
|
from timmy.memory.chain import recall_personal_facts as _rpf # noqa: F811
|
||||||
|
recall_last_reflection = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
facts = recall_personal_facts()
|
||||||
|
lines = ["# Timmy Hot Memory\n"]
|
||||||
|
|
||||||
|
if facts:
|
||||||
|
lines.append("## Known Facts\n")
|
||||||
|
for f in facts[:15]:
|
||||||
|
lines.append(f"- {f}")
|
||||||
|
|
||||||
|
# Include the last reflection if available
|
||||||
|
if recall_last_reflection is not None:
|
||||||
|
try:
|
||||||
|
reflection = recall_last_reflection()
|
||||||
|
if reflection:
|
||||||
|
lines.append("\n## Last Reflection\n")
|
||||||
|
lines.append(reflection)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if len(lines) > 1:
|
||||||
|
return "\n".join(lines)
|
||||||
|
except Exception:
|
||||||
|
logger.debug("DB context read failed, falling back to file")
|
||||||
|
|
||||||
|
# Fallback to file if DB unavailable
|
||||||
|
if self.path.exists():
|
||||||
|
return self.path.read_text()
|
||||||
|
|
||||||
|
return "# Timmy Hot Memory\n\nNo memories stored yet.\n"
|
||||||
|
|
||||||
|
def update_section(self, section: str, content: str) -> None:
|
||||||
|
"""Update a specific section in MEMORY.md.
|
||||||
|
|
||||||
|
DEPRECATED: Hot memory is now computed from the database.
|
||||||
|
This method is kept for backward compatibility during transition.
|
||||||
|
Use memory_write() to store facts in the database.
|
||||||
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"HotMemory.update_section() is deprecated. "
|
||||||
|
"Use memory_write() to store facts in the database."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Keep file-writing for backward compatibility during transition
|
||||||
|
# Guard against empty or excessively large writes
|
||||||
|
if not content or not content.strip():
|
||||||
|
logger.warning("HotMemory: Refusing empty write to section '%s'", section)
|
||||||
|
return
|
||||||
|
if len(content) > 2000:
|
||||||
|
logger.warning("HotMemory: Truncating oversized write to section '%s'", section)
|
||||||
|
content = content[:2000] + "\n... [truncated]"
|
||||||
|
|
||||||
|
if not self.path.exists():
|
||||||
|
self._create_default()
|
||||||
|
|
||||||
|
full_content = self.read()
|
||||||
|
|
||||||
|
# Find section
|
||||||
|
pattern = rf"(## {re.escape(section)}.*?)(?=\n## |\Z)"
|
||||||
|
match = re.search(pattern, full_content, re.DOTALL)
|
||||||
|
|
||||||
|
if match:
|
||||||
|
# Replace section
|
||||||
|
new_section = f"## {section}\n\n{content}\n\n"
|
||||||
|
full_content = full_content[: match.start()] + new_section + full_content[match.end() :]
|
||||||
|
else:
|
||||||
|
# Append section — guard against missing prune marker
|
||||||
|
insert_point = full_content.rfind("*Prune date:")
|
||||||
|
new_section = f"## {section}\n\n{content}\n\n"
|
||||||
|
if insert_point < 0:
|
||||||
|
# No prune marker — just append at end
|
||||||
|
full_content = full_content.rstrip() + "\n\n" + new_section
|
||||||
|
else:
|
||||||
|
full_content = (
|
||||||
|
full_content[:insert_point] + new_section + "\n" + full_content[insert_point:]
|
||||||
|
)
|
||||||
|
|
||||||
|
self.path.write_text(full_content)
|
||||||
|
self._content = full_content
|
||||||
|
self._last_modified = self.path.stat().st_mtime
|
||||||
|
logger.info("HotMemory: Updated section '%s'", section)
|
||||||
|
|
||||||
|
def _create_default(self) -> None:
|
||||||
|
"""Create default MEMORY.md if missing.
|
||||||
|
|
||||||
|
DEPRECATED: Hot memory is now computed from the database.
|
||||||
|
This method is kept for backward compatibility during transition.
|
||||||
|
"""
|
||||||
|
logger.debug(
|
||||||
|
"HotMemory._create_default() - creating default MEMORY.md for backward compatibility"
|
||||||
|
)
|
||||||
|
now = datetime.now(UTC)
|
||||||
|
content = _DEFAULT_HOT_MEMORY_TEMPLATE.format(
|
||||||
|
date=now.strftime("%Y-%m-%d"),
|
||||||
|
prune_date=now.replace(day=25).strftime("%Y-%m-%d"),
|
||||||
|
)
|
||||||
|
self.path.write_text(content)
|
||||||
|
logger.info("HotMemory: Created default MEMORY.md")
|
||||||
|
|
||||||
|
|
||||||
|
class VaultMemory:
|
||||||
|
"""Tier 2: Structured vault (memory/) — append-only markdown."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.path = VAULT_PATH
|
||||||
|
self._ensure_structure()
|
||||||
|
|
||||||
|
def _ensure_structure(self) -> None:
|
||||||
|
"""Ensure vault directory structure exists."""
|
||||||
|
(self.path / "self").mkdir(parents=True, exist_ok=True)
|
||||||
|
(self.path / "notes").mkdir(parents=True, exist_ok=True)
|
||||||
|
(self.path / "aar").mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
def write_note(self, name: str, content: str, namespace: str = "notes") -> Path:
|
||||||
|
"""Write a note to the vault."""
|
||||||
|
# Add timestamp to filename
|
||||||
|
timestamp = datetime.now(UTC).strftime("%Y%m%d")
|
||||||
|
filename = f"{timestamp}_{name}.md"
|
||||||
|
filepath = self.path / namespace / filename
|
||||||
|
|
||||||
|
# Add header
|
||||||
|
full_content = f"""# {name.replace("_", " ").title()}
|
||||||
|
|
||||||
|
> Created: {datetime.now(UTC).isoformat()}
|
||||||
|
> Namespace: {namespace}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
{content}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Auto-generated by Timmy Memory System*
|
||||||
|
"""
|
||||||
|
|
||||||
|
filepath.write_text(full_content)
|
||||||
|
logger.info("VaultMemory: Wrote %s", filepath)
|
||||||
|
return filepath
|
||||||
|
|
||||||
|
def read_file(self, filepath: Path) -> str:
|
||||||
|
"""Read a file from the vault."""
|
||||||
|
if not filepath.exists():
|
||||||
|
return ""
|
||||||
|
return filepath.read_text()
|
||||||
|
|
||||||
|
def update_user_profile(self, key: str, value: str) -> None:
|
||||||
|
"""Update a field in user_profile.md.
|
||||||
|
|
||||||
|
DEPRECATED: User profile updates should now use memory_write() to store
|
||||||
|
facts in the database. This method is kept for backward compatibility.
|
||||||
|
"""
|
||||||
|
logger.warning(
|
||||||
|
"VaultMemory.update_user_profile() is deprecated. "
|
||||||
|
"Use memory_write() to store user facts in the database."
|
||||||
|
)
|
||||||
|
# Still update the file for backward compatibility during transition
|
||||||
|
profile_path = self.path / "self" / "user_profile.md"
|
||||||
|
|
||||||
|
if not profile_path.exists():
|
||||||
|
self._create_default_profile()
|
||||||
|
|
||||||
|
content = profile_path.read_text()
|
||||||
|
|
||||||
|
pattern = rf"(\*\*{re.escape(key)}:\*\*).*"
|
||||||
|
if re.search(pattern, content):
|
||||||
|
safe_value = value.strip()
|
||||||
|
content = re.sub(pattern, lambda m: f"{m.group(1)} {safe_value}", content)
|
||||||
|
else:
|
||||||
|
facts_section = "## Important Facts"
|
||||||
|
if facts_section in content:
|
||||||
|
insert_point = content.find(facts_section) + len(facts_section)
|
||||||
|
content = content[:insert_point] + f"\n- {key}: {value}" + content[insert_point:]
|
||||||
|
|
||||||
|
content = re.sub(
|
||||||
|
r"\*Last updated:.*\*",
|
||||||
|
f"*Last updated: {datetime.now(UTC).strftime('%Y-%m-%d')}*",
|
||||||
|
content,
|
||||||
|
)
|
||||||
|
|
||||||
|
profile_path.write_text(content)
|
||||||
|
logger.info("VaultMemory: Updated user profile: %s = %s", key, value)
|
||||||
|
|
||||||
|
def _create_default_profile(self) -> None:
|
||||||
|
"""Create default user profile."""
|
||||||
|
profile_path = self.path / "self" / "user_profile.md"
|
||||||
|
default = """# User Profile
|
||||||
|
|
||||||
|
> Learned information about the user.
|
||||||
|
|
||||||
|
## Basic Information
|
||||||
|
|
||||||
|
**Name:** (unknown)
|
||||||
|
**Location:** (unknown)
|
||||||
|
**Occupation:** (unknown)
|
||||||
|
|
||||||
|
## Interests & Expertise
|
||||||
|
|
||||||
|
- (to be learned)
|
||||||
|
|
||||||
|
## Preferences
|
||||||
|
|
||||||
|
- Response style: concise, technical
|
||||||
|
- Tool usage: minimal
|
||||||
|
|
||||||
|
## Important Facts
|
||||||
|
|
||||||
|
- (to be extracted)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*Last updated: {date}*
|
||||||
|
""".format(date=datetime.now(UTC).strftime("%Y-%m-%d"))
|
||||||
|
|
||||||
|
profile_path.write_text(default)
|
||||||
278
src/timmy/memory/semantic.py
Normal file
278
src/timmy/memory/semantic.py
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
"""Semantic memory and search classes for Timmy.
|
||||||
|
|
||||||
|
Provides SemanticMemory (vector search over vault content) and
|
||||||
|
MemorySearcher (high-level multi-tier search interface).
|
||||||
|
"""
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sqlite3
|
||||||
|
from contextlib import closing, contextmanager
|
||||||
|
from collections.abc import Generator
|
||||||
|
from datetime import UTC, datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from config import settings
|
||||||
|
from timmy.memory.embeddings import (
|
||||||
|
EMBEDDING_DIM,
|
||||||
|
_get_embedding_model,
|
||||||
|
cosine_similarity,
|
||||||
|
embed_text,
|
||||||
|
)
|
||||||
|
from timmy.memory.unified import (
|
||||||
|
DB_PATH,
|
||||||
|
PROJECT_ROOT,
|
||||||
|
_ensure_schema,
|
||||||
|
get_connection,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
VAULT_PATH = PROJECT_ROOT / "memory"
|
||||||
|
|
||||||
|
|
||||||
|
class SemanticMemory:
|
||||||
|
"""Vector-based semantic search over vault content."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.db_path = DB_PATH
|
||||||
|
self.vault_path = VAULT_PATH
|
||||||
|
|
||||||
|
@contextmanager
|
||||||
|
def _get_conn(self) -> Generator[sqlite3.Connection, None, None]:
|
||||||
|
"""Get connection to the instance's db_path (backward compatibility).
|
||||||
|
|
||||||
|
Uses self.db_path if set differently from global DB_PATH,
|
||||||
|
otherwise uses the global get_connection().
|
||||||
|
"""
|
||||||
|
if self.db_path == DB_PATH:
|
||||||
|
# Use global connection (normal production path)
|
||||||
|
with get_connection() as conn:
|
||||||
|
yield conn
|
||||||
|
else:
|
||||||
|
# Use instance-specific db_path (test path)
|
||||||
|
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
with closing(sqlite3.connect(str(self.db_path))) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
conn.execute("PRAGMA journal_mode=WAL")
|
||||||
|
conn.execute(f"PRAGMA busy_timeout={settings.db_busy_timeout_ms}")
|
||||||
|
# Ensure schema exists
|
||||||
|
_ensure_schema(conn)
|
||||||
|
yield conn
|
||||||
|
|
||||||
|
def _init_db(self) -> None:
|
||||||
|
"""Initialize database at self.db_path (backward compatibility).
|
||||||
|
|
||||||
|
This method is kept for backward compatibility with existing code and tests.
|
||||||
|
Schema creation is handled by _get_conn.
|
||||||
|
"""
|
||||||
|
# Trigger schema creation via _get_conn
|
||||||
|
with self._get_conn():
|
||||||
|
pass
|
||||||
|
|
||||||
|
def index_file(self, filepath: Path) -> int:
|
||||||
|
"""Index a single file into semantic memory."""
|
||||||
|
if not filepath.exists():
|
||||||
|
return 0
|
||||||
|
|
||||||
|
content = filepath.read_text()
|
||||||
|
file_hash = hashlib.md5(content.encode()).hexdigest()
|
||||||
|
|
||||||
|
with self._get_conn() as conn:
|
||||||
|
# Check if already indexed with same hash
|
||||||
|
cursor = conn.execute(
|
||||||
|
"SELECT metadata FROM memories WHERE source = ? AND memory_type = 'vault_chunk' LIMIT 1",
|
||||||
|
(str(filepath),),
|
||||||
|
)
|
||||||
|
existing = cursor.fetchone()
|
||||||
|
if existing and existing[0]:
|
||||||
|
try:
|
||||||
|
meta = json.loads(existing[0])
|
||||||
|
if meta.get("source_hash") == file_hash:
|
||||||
|
return 0 # Already indexed
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Delete old chunks for this file
|
||||||
|
conn.execute(
|
||||||
|
"DELETE FROM memories WHERE source = ? AND memory_type = 'vault_chunk'",
|
||||||
|
(str(filepath),),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Split into chunks (paragraphs)
|
||||||
|
chunks = self._split_into_chunks(content)
|
||||||
|
|
||||||
|
# Index each chunk
|
||||||
|
now = datetime.now(UTC).isoformat()
|
||||||
|
for i, chunk_text in enumerate(chunks):
|
||||||
|
if len(chunk_text.strip()) < 20: # Skip tiny chunks
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk_id = f"{filepath.stem}_{i}"
|
||||||
|
chunk_embedding = embed_text(chunk_text)
|
||||||
|
|
||||||
|
conn.execute(
|
||||||
|
"""INSERT INTO memories
|
||||||
|
(id, content, memory_type, source, metadata, embedding, created_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
||||||
|
(
|
||||||
|
chunk_id,
|
||||||
|
chunk_text,
|
||||||
|
"vault_chunk",
|
||||||
|
str(filepath),
|
||||||
|
json.dumps({"source_hash": file_hash, "chunk_index": i}),
|
||||||
|
json.dumps(chunk_embedding),
|
||||||
|
now,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
logger.info("SemanticMemory: Indexed %s (%d chunks)", filepath.name, len(chunks))
|
||||||
|
return len(chunks)
|
||||||
|
|
||||||
|
def _split_into_chunks(self, text: str, max_chunk_size: int = 500) -> list[str]:
|
||||||
|
"""Split text into semantic chunks."""
|
||||||
|
# Split by paragraphs first
|
||||||
|
paragraphs = text.split("\n\n")
|
||||||
|
chunks = []
|
||||||
|
|
||||||
|
for para in paragraphs:
|
||||||
|
para = para.strip()
|
||||||
|
if not para:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If paragraph is small enough, keep as one chunk
|
||||||
|
if len(para) <= max_chunk_size:
|
||||||
|
chunks.append(para)
|
||||||
|
else:
|
||||||
|
# Split long paragraphs by sentences
|
||||||
|
sentences = para.replace(". ", ".\n").split("\n")
|
||||||
|
current_chunk = ""
|
||||||
|
|
||||||
|
for sent in sentences:
|
||||||
|
if len(current_chunk) + len(sent) < max_chunk_size:
|
||||||
|
current_chunk += " " + sent if current_chunk else sent
|
||||||
|
else:
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk.strip())
|
||||||
|
current_chunk = sent
|
||||||
|
|
||||||
|
if current_chunk:
|
||||||
|
chunks.append(current_chunk.strip())
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
def index_vault(self) -> int:
|
||||||
|
"""Index entire vault directory."""
|
||||||
|
total_chunks = 0
|
||||||
|
|
||||||
|
for md_file in self.vault_path.rglob("*.md"):
|
||||||
|
# Skip handoff file (handled separately)
|
||||||
|
if "last-session-handoff" in md_file.name:
|
||||||
|
continue
|
||||||
|
total_chunks += self.index_file(md_file)
|
||||||
|
|
||||||
|
logger.info("SemanticMemory: Indexed vault (%d total chunks)", total_chunks)
|
||||||
|
return total_chunks
|
||||||
|
|
||||||
|
def search(self, query: str, top_k: int = 5) -> list[tuple[str, float]]:
|
||||||
|
"""Search for relevant memory chunks."""
|
||||||
|
query_embedding = embed_text(query)
|
||||||
|
|
||||||
|
with self._get_conn() as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
|
||||||
|
# Get all vault chunks
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT source, content, embedding FROM memories WHERE memory_type = 'vault_chunk'"
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Calculate similarities
|
||||||
|
scored = []
|
||||||
|
for row in rows:
|
||||||
|
embedding = json.loads(row["embedding"])
|
||||||
|
score = cosine_similarity(query_embedding, embedding)
|
||||||
|
scored.append((row["source"], row["content"], score))
|
||||||
|
|
||||||
|
# Sort by score descending
|
||||||
|
scored.sort(key=lambda x: x[2], reverse=True)
|
||||||
|
|
||||||
|
# Return top_k
|
||||||
|
return [(content, score) for _, content, score in scored[:top_k]]
|
||||||
|
|
||||||
|
def get_relevant_context(self, query: str, max_chars: int = 2000) -> str:
|
||||||
|
"""Get formatted context string for a query."""
|
||||||
|
results = self.search(query, top_k=3)
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
total_chars = 0
|
||||||
|
|
||||||
|
for content, score in results:
|
||||||
|
if score < 0.3: # Similarity threshold
|
||||||
|
continue
|
||||||
|
|
||||||
|
chunk = f"[Relevant memory - score {score:.2f}]: {content[:400]}..."
|
||||||
|
if total_chars + len(chunk) > max_chars:
|
||||||
|
break
|
||||||
|
|
||||||
|
parts.append(chunk)
|
||||||
|
total_chars += len(chunk)
|
||||||
|
|
||||||
|
return "\n\n".join(parts) if parts else ""
|
||||||
|
|
||||||
|
def stats(self) -> dict:
|
||||||
|
"""Get indexing statistics."""
|
||||||
|
with self._get_conn() as conn:
|
||||||
|
cursor = conn.execute(
|
||||||
|
"SELECT COUNT(*), COUNT(DISTINCT source) FROM memories WHERE memory_type = 'vault_chunk'"
|
||||||
|
)
|
||||||
|
total_chunks, total_files = cursor.fetchone()
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total_chunks": total_chunks,
|
||||||
|
"total_files": total_files,
|
||||||
|
"embedding_dim": EMBEDDING_DIM if _get_embedding_model() else 128,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class MemorySearcher:
|
||||||
|
"""High-level interface for memory search."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.semantic = SemanticMemory()
|
||||||
|
|
||||||
|
def search(self, query: str, tiers: list[str] = None) -> dict:
|
||||||
|
"""Search across memory tiers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
query: Search query
|
||||||
|
tiers: List of tiers to search ["hot", "vault", "semantic"]
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dict with results from each tier
|
||||||
|
"""
|
||||||
|
tiers = tiers or ["semantic"] # Default to semantic only
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
if "semantic" in tiers:
|
||||||
|
semantic_results = self.semantic.search(query, top_k=5)
|
||||||
|
results["semantic"] = [
|
||||||
|
{"content": content, "score": score} for content, score in semantic_results
|
||||||
|
]
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
def get_context_for_query(self, query: str) -> str:
|
||||||
|
"""Get comprehensive context for a user query."""
|
||||||
|
# Get semantic context
|
||||||
|
semantic_context = self.semantic.get_relevant_context(query)
|
||||||
|
|
||||||
|
if semantic_context:
|
||||||
|
return f"## Relevant Past Context\n\n{semantic_context}"
|
||||||
|
|
||||||
|
return ""
|
||||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user