[gemini] Implement semantic index for research outputs (#976) (#1227)

2026-03-23 22:45:29 +00:00
parent e6391c599d
commit 697575e561
5 changed files with 179 additions and 81 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -228,6 +228,10 @@ class Settings(BaseSettings):
    # ── Test / Diagnostics ─────────────────────────────────────────────
    # Skip loading heavy embedding models (for tests / low-memory envs).
    timmy_skip_embeddings: bool = False
+    # Embedding backend: "ollama" for Ollama, "local" for sentence-transformers.
+    timmy_embedding_backend: Literal["ollama", "local"] = "local"
+    # Ollama model to use for embeddings (e.g., "nomic-embed-text").
+    ollama_embedding_model: str = "nomic-embed-text"
    # Disable CSRF middleware entirely (for tests).
    timmy_disable_csrf: bool = False
    # Mark the process as running in test mode.
--- a/src/timmy/memory/embeddings.py
+++ b/src/timmy/memory/embeddings.py
@@ -9,35 +9,81 @@ Also includes vector similarity utilities (cosine similarity, keyword overlap).
 import hashlib
 import logging
 import math
+import json
+import httpx # Import httpx for Ollama API calls
+
+from config import settings

 logger = logging.getLogger(__name__)

 # Embedding model - small, fast, local
 EMBEDDING_MODEL = None
-EMBEDDING_DIM = 384  # MiniLM dimension
+EMBEDDING_DIM = 384  # MiniLM dimension, will be overridden if Ollama model has different dim

+class OllamaEmbedder:
+    """Mimics SentenceTransformer interface for Ollama."""
+    def __init__(self, model_name: str, ollama_url: str):
+        self.model_name = model_name
+        self.ollama_url = ollama_url
+        self.dimension = 0 # Will be updated after first call
+
+    def encode(self, sentences: str | list[str], convert_to_numpy: bool = False, normalize_embeddings: bool = True) -> list[list[float]] | list[float]:
+        """Generate embeddings using Ollama."""
+        if isinstance(sentences, str):
+            sentences = [sentences]
+
+        all_embeddings = []
+        for sentence in sentences:
+            try:
+                response = httpx.post(
+                    f"{self.ollama_url}/api/embeddings",
+                    json={"model": self.model_name, "prompt": sentence},
+                    timeout=settings.mcp_bridge_timeout,
+                )
+                response.raise_for_status()
+                embedding = response.json()["embedding"]
+                if not self.dimension:
+                    self.dimension = len(embedding) # Set dimension on first successful call
+                    global EMBEDDING_DIM
+                    EMBEDDING_DIM = self.dimension # Update global EMBEDDING_DIM
+                all_embeddings.append(embedding)
+            except httpx.RequestError as exc:
+                logger.error("Ollama embeddings request failed: %s", exc)
+                # Fallback to simple hash embedding on Ollama error
+                return _simple_hash_embedding(sentence)
+            except json.JSONDecodeError as exc:
+                logger.error("Failed to decode Ollama embeddings response: %s", exc)
+                return _simple_hash_embedding(sentence)
+
+        if len(all_embeddings) == 1 and isinstance(sentences, str):
+            return all_embeddings[0]
+        return all_embeddings

 def _get_embedding_model():
-    """Lazy-load embedding model."""
+    """Lazy-load embedding model, preferring Ollama if configured."""
    global EMBEDDING_MODEL
+    global EMBEDDING_DIM
    if EMBEDDING_MODEL is None:
-        try:
-            from config import settings
+        if settings.timmy_skip_embeddings:
+            EMBEDDING_MODEL = False
+            return EMBEDDING_MODEL

-            if settings.timmy_skip_embeddings:
-                EMBEDDING_MODEL = False
-                return EMBEDDING_MODEL
-        except ImportError:
-            pass
+        if settings.timmy_embedding_backend == "ollama":
+            logger.info("MemorySystem: Using Ollama for embeddings with model %s", settings.ollama_embedding_model)
+            EMBEDDING_MODEL = OllamaEmbedder(settings.ollama_embedding_model, settings.normalized_ollama_url)
+            # We don't know the dimension until after the first call, so keep it default for now.
+            # It will be updated dynamically in OllamaEmbedder.encode
+            return EMBEDDING_MODEL
+        else:
+            try:
+                from sentence_transformers import SentenceTransformer

-        try:
-            from sentence_transformers import SentenceTransformer
-
-            EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
-            logger.info("MemorySystem: Loaded embedding model")
-        except ImportError:
-            logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
-            EMBEDDING_MODEL = False  # Use fallback
+                EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
+                EMBEDDING_DIM = 384 # Reset to MiniLM dimension
+                logger.info("MemorySystem: Loaded local embedding model (all-MiniLM-L6-v2)")
+            except ImportError:
+                logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
+                EMBEDDING_MODEL = False  # Use fallback
    return EMBEDDING_MODEL


@@ -60,10 +106,14 @@ def embed_text(text: str) -> list[float]:
    model = _get_embedding_model()
    if model and model is not False:
        embedding = model.encode(text)
-        return embedding.tolist()
+        # Ensure it's a list of floats, not numpy array
+        if hasattr(embedding, 'tolist'):
+            return embedding.tolist()
+        return embedding
    return _simple_hash_embedding(text)


+
 def cosine_similarity(a: list[float], b: list[float]) -> float:
    """Calculate cosine similarity between two vectors."""
    dot = sum(x * y for x, y in zip(a, b, strict=False))
--- a/src/timmy/memory_system.py
+++ b/src/timmy/memory_system.py
@@ -1206,7 +1206,7 @@ memory_searcher = MemorySearcher()
 # ───────────────────────────────────────────────────────────────────────────────


-def memory_search(query: str, top_k: int = 5) -> str:
+def memory_search(query: str, limit: int = 10) -> str:
    """Search past conversations, notes, and stored facts for relevant context.

    Searches across both the vault (indexed markdown files) and the
@@ -1215,19 +1215,19 @@ def memory_search(query: str, top_k: int = 5) -> str:

    Args:
        query: What to search for (e.g. "Bitcoin strategy", "server setup").
-        top_k: Number of results to return (default 5).
+        limit: Number of results to return (default 10).

    Returns:
        Formatted string of relevant memory results.
    """
-    # Guard: model sometimes passes None for top_k
-    if top_k is None:
-        top_k = 5
+    # Guard: model sometimes passes None for limit
+    if limit is None:
+        limit = 10

    parts: list[str] = []

    # 1. Search semantic vault (indexed markdown files)
-    vault_results = semantic_memory.search(query, top_k)
+    vault_results = semantic_memory.search(query, limit)
    for content, score in vault_results:
        if score < 0.2:
            continue
@@ -1235,7 +1235,7 @@ def memory_search(query: str, top_k: int = 5) -> str:

    # 2. Search runtime vector store (stored facts/conversations)
    try:
-        runtime_results = search_memories(query, limit=top_k, min_relevance=0.2)
+        runtime_results = search_memories(query, limit=limit, min_relevance=0.2)
        for entry in runtime_results:
            label = entry.context_type or "memory"
            parts.append(f"[{label}] {entry.content[:300]}")
@@ -1289,45 +1289,48 @@ def memory_read(query: str = "", top_k: int = 5) -> str:
    return "\n".join(parts)


-def memory_write(content: str, context_type: str = "fact") -> str:
-    """Store a piece of information in persistent memory.
+def memory_store(topic: str, report: str, type: str = "research") -> str:
+    """Store a piece of information in persistent memory, particularly for research outputs.

-    Use this tool when the user explicitly asks you to remember something.
-    Stored memories are searchable via memory_search across all channels
-    (web GUI, Discord, Telegram, etc.).
+    Use this tool to store structured research findings or other important documents.
+    Stored memories are searchable via memory_search across all channels.

    Args:
-        content: The information to remember (e.g. a phrase, fact, or note).
-        context_type: Type of memory — "fact" for permanent facts,
-                      "conversation" for conversation context,
-                      "document" for document fragments.
+        topic: A concise title or topic for the research output.
+        report: The detailed content of the research output or document.
+        type: Type of memory — "research" for research outputs (default),
+              "fact" for permanent facts, "conversation" for conversation context,
+              "document" for other document fragments.

    Returns:
        Confirmation that the memory was stored.
    """
-    if not content or not content.strip():
-        return "Nothing to store — content is empty."
+    if not report or not report.strip():
+        return "Nothing to store — report is empty."

-    valid_types = ("fact", "conversation", "document")
-    if context_type not in valid_types:
-        context_type = "fact"
+    # Combine topic and report for embedding and storage content
+    full_content = f"Topic: {topic.strip()}\n\nReport: {report.strip()}"
+
+    valid_types = ("fact", "conversation", "document", "research")
+    if type not in valid_types:
+        type = "research"

    try:
-        # Dedup check for facts — skip if a similar fact already exists
-        # Threshold 0.75 catches paraphrases (was 0.9 which only caught near-exact)
-        if context_type == "fact":
+        # Dedup check for facts and research — skip if similar exists
+        if type in ("fact", "research"):
            existing = search_memories(
-                content.strip(), limit=3, context_type="fact", min_relevance=0.75
+                full_content, limit=3, context_type=type, min_relevance=0.75
            )
            if existing:
-                return f"Similar fact already stored (id={existing[0].id[:8]}). Skipping duplicate."
+                return f"Similar {type} already stored (id={existing[0].id[:8]}). Skipping duplicate."

        entry = store_memory(
-            content=content.strip(),
+            content=full_content,
            source="agent",
-            context_type=context_type,
+            context_type=type,
+            metadata={"topic": topic},
        )
-        return f"Stored in memory (type={context_type}, id={entry.id[:8]}). This is now searchable across all channels."
+        return f"Stored in memory (type={type}, id={entry.id[:8]}). This is now searchable across all channels."
    except Exception as exc:
        logger.error("Failed to write memory: %s", exc)
        return f"Failed to store memory: {exc}"