WIP: Gemini Code progress on #976

Automated salvage commit — agent session ended (exit 124). Work in progress, may need continuation.
feat: Implement semantic index for research outputs (#976 )
2026-03-23 15:26:23 -04:00 · 2026-03-23 14:15:40 -04:00
5 changed files with 179 additions and 81 deletions
--- a/index_research_docs.py
+++ b/index_research_docs.py
@@ -0,0 +1,33 @@
+
+import os
+import sys
+from pathlib import Path
+
+# Add the src directory to the Python path
+sys.path.insert(0, str(Path(__file__).parent / "src"))
+
+from timmy.memory_system import memory_store
+
+def index_research_documents():
+    research_dir = Path("docs/research")
+    if not research_dir.is_dir():
+        print(f"Research directory not found: {research_dir}")
+        return
+
+    print(f"Indexing research documents from {research_dir}...")
+    indexed_count = 0
+    for file_path in research_dir.glob("*.md"):
+        try:
+            content = file_path.read_text()
+            topic = file_path.stem.replace("-", " ").title() # Derive topic from filename
+            print(f"Storing '{topic}' from {file_path.name}...")
+            # Using type="research" as per issue requirement
+            result = memory_store(topic=topic, report=content, type="research")
+            print(f"  Result: {result}")
+            indexed_count += 1
+        except Exception as e:
+            print(f"Error indexing {file_path.name}: {e}")
+    print(f"Finished indexing. Total documents indexed: {indexed_count}")
+
+if __name__ == "__main__":
+    index_research_documents()
--- a/src/config.py
+++ b/src/config.py
@@ -217,6 +217,10 @@ class Settings(BaseSettings):
    # ── Test / Diagnostics ─────────────────────────────────────────────
    # Skip loading heavy embedding models (for tests / low-memory envs).
    timmy_skip_embeddings: bool = False
+    # Embedding backend: "ollama" for Ollama, "local" for sentence-transformers.
+    timmy_embedding_backend: Literal["ollama", "local"] = "ollama"
+    # Ollama model to use for embeddings (e.g., "nomic-embed-text").
+    ollama_embedding_model: str = "nomic-embed-text"
    # Disable CSRF middleware entirely (for tests).
    timmy_disable_csrf: bool = False
    # Mark the process as running in test mode.
--- a/src/timmy/memory/embeddings.py
+++ b/src/timmy/memory/embeddings.py
@@ -9,35 +9,81 @@ Also includes vector similarity utilities (cosine similarity, keyword overlap).
 import hashlib
 import logging
 import math
+import json
+import httpx # Import httpx for Ollama API calls
+
+from config import settings

 logger = logging.getLogger(__name__)

 # Embedding model - small, fast, local
 EMBEDDING_MODEL = None
-EMBEDDING_DIM = 384  # MiniLM dimension
+EMBEDDING_DIM = 384  # MiniLM dimension, will be overridden if Ollama model has different dim

+class OllamaEmbedder:
+    """Mimics SentenceTransformer interface for Ollama."""
+    def __init__(self, model_name: str, ollama_url: str):
+        self.model_name = model_name
+        self.ollama_url = ollama_url
+        self.dimension = 0 # Will be updated after first call
+
+    def encode(self, sentences: str | list[str], convert_to_numpy: bool = False, normalize_embeddings: bool = True) -> list[list[float]] | list[float]:
+        """Generate embeddings using Ollama."""
+        if isinstance(sentences, str):
+            sentences = [sentences]
+
+        all_embeddings = []
+        for sentence in sentences:
+            try:
+                response = httpx.post(
+                    f"{self.ollama_url}/api/embeddings",
+                    json={"model": self.model_name, "prompt": sentence},
+                    timeout=settings.mcp_bridge_timeout,
+                )
+                response.raise_for_status()
+                embedding = response.json()["embedding"]
+                if not self.dimension:
+                    self.dimension = len(embedding) # Set dimension on first successful call
+                    global EMBEDDING_DIM
+                    EMBEDDING_DIM = self.dimension # Update global EMBEDDING_DIM
+                all_embeddings.append(embedding)
+            except httpx.RequestError as exc:
+                logger.error("Ollama embeddings request failed: %s", exc)
+                # Fallback to simple hash embedding on Ollama error
+                return _simple_hash_embedding(sentence)
+            except json.JSONDecodeError as exc:
+                logger.error("Failed to decode Ollama embeddings response: %s", exc)
+                return _simple_hash_embedding(sentence)
+
+        if len(all_embeddings) == 1 and isinstance(sentences, str):
+            return all_embeddings[0]
+        return all_embeddings

 def _get_embedding_model():
-    """Lazy-load embedding model."""
+    """Lazy-load embedding model, preferring Ollama if configured."""
    global EMBEDDING_MODEL
+    global EMBEDDING_DIM
    if EMBEDDING_MODEL is None:
-        try:
-            from config import settings
+        if settings.timmy_skip_embeddings:
+            EMBEDDING_MODEL = False
+            return EMBEDDING_MODEL

-            if settings.timmy_skip_embeddings:
-                EMBEDDING_MODEL = False
-                return EMBEDDING_MODEL
-        except ImportError:
-            pass
+        if settings.timmy_embedding_backend == "ollama":
+            logger.info("MemorySystem: Using Ollama for embeddings with model %s", settings.ollama_embedding_model)
+            EMBEDDING_MODEL = OllamaEmbedder(settings.ollama_embedding_model, settings.normalized_ollama_url)
+            # We don't know the dimension until after the first call, so keep it default for now.
+            # It will be updated dynamically in OllamaEmbedder.encode
+            return EMBEDDING_MODEL
+        else:
+            try:
+                from sentence_transformers import SentenceTransformer

-        try:
-            from sentence_transformers import SentenceTransformer
-
-            EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
-            logger.info("MemorySystem: Loaded embedding model")
-        except ImportError:
-            logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
-            EMBEDDING_MODEL = False  # Use fallback
+                EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
+                EMBEDDING_DIM = 384 # Reset to MiniLM dimension
+                logger.info("MemorySystem: Loaded local embedding model (all-MiniLM-L6-v2)")
+            except ImportError:
+                logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
+                EMBEDDING_MODEL = False  # Use fallback
    return EMBEDDING_MODEL


@@ -60,10 +106,14 @@ def embed_text(text: str) -> list[float]:
    model = _get_embedding_model()
    if model and model is not False:
        embedding = model.encode(text)
-        return embedding.tolist()
+        # Ensure it's a list of floats, not numpy array
+        if hasattr(embedding, 'tolist'):
+            return embedding.tolist()
+        return embedding
    return _simple_hash_embedding(text)


+
 def cosine_similarity(a: list[float], b: list[float]) -> float:
    """Calculate cosine similarity between two vectors."""
    dot = sum(x * y for x, y in zip(a, b, strict=False))
--- a/src/timmy/memory_system.py
+++ b/src/timmy/memory_system.py
@@ -1206,7 +1206,7 @@ memory_searcher = MemorySearcher()
 # ───────────────────────────────────────────────────────────────────────────────


-def memory_search(query: str, top_k: int = 5) -> str:
+def memory_search(query: str, limit: int = 10) -> str:
    """Search past conversations, notes, and stored facts for relevant context.

    Searches across both the vault (indexed markdown files) and the
@@ -1215,19 +1215,19 @@ def memory_search(query: str, top_k: int = 5) -> str:

    Args:
        query: What to search for (e.g. "Bitcoin strategy", "server setup").
-        top_k: Number of results to return (default 5).
+        limit: Number of results to return (default 10).

    Returns:
        Formatted string of relevant memory results.
    """
-    # Guard: model sometimes passes None for top_k
-    if top_k is None:
-        top_k = 5
+    # Guard: model sometimes passes None for limit
+    if limit is None:
+        limit = 10

    parts: list[str] = []

    # 1. Search semantic vault (indexed markdown files)
-    vault_results = semantic_memory.search(query, top_k)
+    vault_results = semantic_memory.search(query, limit)
    for content, score in vault_results:
        if score < 0.2:
            continue
@@ -1235,7 +1235,7 @@ def memory_search(query: str, top_k: int = 5) -> str:

    # 2. Search runtime vector store (stored facts/conversations)
    try:
-        runtime_results = search_memories(query, limit=top_k, min_relevance=0.2)
+        runtime_results = search_memories(query, limit=limit, min_relevance=0.2)
        for entry in runtime_results:
            label = entry.context_type or "memory"
            parts.append(f"[{label}] {entry.content[:300]}")
@@ -1289,45 +1289,48 @@ def memory_read(query: str = "", top_k: int = 5) -> str:
    return "\n".join(parts)


-def memory_write(content: str, context_type: str = "fact") -> str:
-    """Store a piece of information in persistent memory.
+def memory_store(topic: str, report: str, type: str = "research") -> str:
+    """Store a piece of information in persistent memory, particularly for research outputs.

-    Use this tool when the user explicitly asks you to remember something.
-    Stored memories are searchable via memory_search across all channels
-    (web GUI, Discord, Telegram, etc.).
+    Use this tool to store structured research findings or other important documents.
+    Stored memories are searchable via memory_search across all channels.

    Args:
-        content: The information to remember (e.g. a phrase, fact, or note).
-        context_type: Type of memory — "fact" for permanent facts,
-                      "conversation" for conversation context,
-                      "document" for document fragments.
+        topic: A concise title or topic for the research output.
+        report: The detailed content of the research output or document.
+        type: Type of memory — "research" for research outputs (default),
+              "fact" for permanent facts, "conversation" for conversation context,
+              "document" for other document fragments.

    Returns:
        Confirmation that the memory was stored.
    """
-    if not content or not content.strip():
-        return "Nothing to store — content is empty."
+    if not report or not report.strip():
+        return "Nothing to store — report is empty."

-    valid_types = ("fact", "conversation", "document")
-    if context_type not in valid_types:
-        context_type = "fact"
+    # Combine topic and report for embedding and storage content
+    full_content = f"Topic: {topic.strip()}\n\nReport: {report.strip()}"
+
+    valid_types = ("fact", "conversation", "document", "research")
+    if type not in valid_types:
+        type = "research"

    try:
-        # Dedup check for facts — skip if a similar fact already exists
-        # Threshold 0.75 catches paraphrases (was 0.9 which only caught near-exact)
-        if context_type == "fact":
+        # Dedup check for facts and research — skip if similar exists
+        if type in ("fact", "research"):
            existing = search_memories(
-                content.strip(), limit=3, context_type="fact", min_relevance=0.75
+                full_content, limit=3, context_type=type, min_relevance=0.75
            )
            if existing:
-                return f"Similar fact already stored (id={existing[0].id[:8]}). Skipping duplicate."
+                return f"Similar {type} already stored (id={existing[0].id[:8]}). Skipping duplicate."

        entry = store_memory(
-            content=content.strip(),
+            content=full_content,
            source="agent",
-            context_type=context_type,
+            context_type=type,
+            metadata={"topic": topic},
        )
-        return f"Stored in memory (type={context_type}, id={entry.id[:8]}). This is now searchable across all channels."
+        return f"Stored in memory (type={type}, id={entry.id[:8]}). This is now searchable across all channels."
    except Exception as exc:
        logger.error("Failed to write memory: %s", exc)
        return f"Failed to store memory: {exc}"
--- a/tests/timmy/test_semantic_memory.py
+++ b/tests/timmy/test_semantic_memory.py
@@ -16,7 +16,7 @@ from timmy.memory_system import (
    memory_forget,
    memory_read,
    memory_search,
-    memory_write,
+    memory_store,
 )


@@ -490,7 +490,7 @@ class TestMemorySearch:
        assert isinstance(result, str)

    def test_none_top_k_handled(self):
-        result = memory_search("test", top_k=None)
+        result = memory_search("test", limit=None)
        assert isinstance(result, str)

    def test_basic_search_returns_string(self):
@@ -521,12 +521,12 @@ class TestMemoryRead:
        assert isinstance(result, str)


-class TestMemoryWrite:
-    """Test module-level memory_write function."""
+class TestMemoryStore:
+    """Test module-level memory_store function."""

    @pytest.fixture(autouse=True)
    def mock_vector_store(self):
-        """Mock vector_store functions for memory_write tests."""
+        """Mock vector_store functions for memory_store tests."""
        # Patch where it's imported from, not where it's used
        with (
            patch("timmy.memory_system.search_memories") as mock_search,
@@ -542,75 +542,83 @@ class TestMemoryWrite:

            yield {"search": mock_search, "store": mock_store}

-    def test_memory_write_empty_content(self):
-        """Test that empty content returns error message."""
-        result = memory_write("")
+    def test_memory_store_empty_report(self):
+        """Test that empty report returns error message."""
+        result = memory_store(topic="test", report="")
        assert "empty" in result.lower()

-    def test_memory_write_whitespace_only(self):
-        """Test that whitespace-only content returns error."""
-        result = memory_write("   \n\t   ")
+    def test_memory_store_whitespace_only(self):
+        """Test that whitespace-only report returns error."""
+        result = memory_store(topic="test", report="   \n\t   ")
        assert "empty" in result.lower()

-    def test_memory_write_valid_content(self, mock_vector_store):
+    def test_memory_store_valid_content(self, mock_vector_store):
        """Test writing valid content."""
-        result = memory_write("Remember this important fact.")
+        result = memory_store(topic="fact about Timmy", report="Remember this important fact.")
        assert "stored" in result.lower() or "memory" in result.lower()
        mock_vector_store["store"].assert_called_once()

-    def test_memory_write_dedup_for_facts(self, mock_vector_store):
-        """Test that duplicate facts are skipped."""
+    def test_memory_store_dedup_for_facts_or_research(self, mock_vector_store):
+        """Test that duplicate facts or research are skipped."""
        # Simulate existing similar fact
        mock_entry = MagicMock()
        mock_entry.id = "existing-id"
        mock_vector_store["search"].return_value = [mock_entry]

-        result = memory_write("Similar fact text", context_type="fact")
+        # Test with 'fact'
+        result = memory_store(topic="Similar fact", report="Similar fact text", type="fact")
        assert "similar" in result.lower() or "duplicate" in result.lower()
        mock_vector_store["store"].assert_not_called()

-    def test_memory_write_no_dedup_for_conversation(self, mock_vector_store):
+        mock_vector_store["store"].reset_mock()
+        # Test with 'research'
+        result = memory_store(topic="Similar research", report="Similar research content", type="research")
+        assert "similar" in result.lower() or "duplicate" in result.lower()
+        mock_vector_store["store"].assert_not_called()
+
+    def test_memory_store_no_dedup_for_conversation(self, mock_vector_store):
        """Test that conversation entries are not deduplicated."""
        # Even with existing entries, conversations should be stored
        mock_entry = MagicMock()
        mock_entry.id = "existing-id"
        mock_vector_store["search"].return_value = [mock_entry]

-        memory_write("Conversation text", context_type="conversation")
+        memory_store(topic="Conversation", report="Conversation text", type="conversation")
        # Should still store (no duplicate check for non-fact)
        mock_vector_store["store"].assert_called_once()

-    def test_memory_write_invalid_context_type(self, mock_vector_store):
-        """Test that invalid context_type defaults to 'fact'."""
-        memory_write("Some content", context_type="invalid_type")
-        # Should still succeed, using "fact" as default
+    def test_memory_store_invalid_type_defaults_to_research(self, mock_vector_store):
+        """Test that invalid type defaults to 'research'."""
+        memory_store(topic="Invalid type test", report="Some content", type="invalid_type")
+        # Should still succeed, using "research" as default
        mock_vector_store["store"].assert_called_once()
        call_kwargs = mock_vector_store["store"].call_args.kwargs
-        assert call_kwargs.get("context_type") == "fact"
+        assert call_kwargs.get("context_type") == "research"

-    def test_memory_write_valid_context_types(self, mock_vector_store):
+    def test_memory_store_valid_types(self, mock_vector_store):
        """Test all valid context types."""
-        valid_types = ["fact", "conversation", "document"]
+        valid_types = ["fact", "conversation", "document", "research"]
        for ctx_type in valid_types:
            mock_vector_store["store"].reset_mock()
-            memory_write(f"Content for {ctx_type}", context_type=ctx_type)
+            memory_store(topic=f"Topic for {ctx_type}", report=f"Content for {ctx_type}", type=ctx_type)
            mock_vector_store["store"].assert_called_once()

-    def test_memory_write_strips_content(self, mock_vector_store):
-        """Test that content is stripped of leading/trailing whitespace."""
-        memory_write("  padded content  ")
+    def test_memory_store_strips_report_and_adds_topic(self, mock_vector_store):
+        """Test that report is stripped of leading/trailing whitespace and combined with topic."""
+        memory_store(topic="  My Topic  ", report="  padded content  ")
        call_kwargs = mock_vector_store["store"].call_args.kwargs
-        assert call_kwargs.get("content") == "padded content"
+        assert call_kwargs.get("content") == "Topic: My Topic\n\nReport: padded content"
+        assert call_kwargs.get("metadata") == {"topic": "  My Topic  "}

-    def test_memory_write_unicode_content(self, mock_vector_store):
+    def test_memory_store_unicode_report(self, mock_vector_store):
        """Test writing unicode content."""
-        result = memory_write("Unicode content: 你好世界 🎉")
+        result = memory_store(topic="Unicode", report="Unicode content: 你好世界 🎉")
        assert "stored" in result.lower() or "memory" in result.lower()

-    def test_memory_write_handles_exception(self, mock_vector_store):
+    def test_memory_store_handles_exception(self, mock_vector_store):
        """Test handling of store_memory exceptions."""
        mock_vector_store["store"].side_effect = Exception("DB error")
-        result = memory_write("This will fail")
+        result = memory_store(topic="Failing", report="This will fail")
        assert "failed" in result.lower() or "error" in result.lower()