[gemini] Implement semantic index for research outputs (#976) (#1227)

2026-03-23 22:45:29 +00:00
parent e6391c599d
commit 697575e561
5 changed files with 179 additions and 81 deletions
--- a/src/timmy/memory_system.py
+++ b/src/timmy/memory_system.py
@@ -1206,7 +1206,7 @@ memory_searcher = MemorySearcher()
 # ───────────────────────────────────────────────────────────────────────────────


-def memory_search(query: str, top_k: int = 5) -> str:
+def memory_search(query: str, limit: int = 10) -> str:
    """Search past conversations, notes, and stored facts for relevant context.

    Searches across both the vault (indexed markdown files) and the
@@ -1215,19 +1215,19 @@ def memory_search(query: str, top_k: int = 5) -> str:

    Args:
        query: What to search for (e.g. "Bitcoin strategy", "server setup").
-        top_k: Number of results to return (default 5).
+        limit: Number of results to return (default 10).

    Returns:
        Formatted string of relevant memory results.
    """
-    # Guard: model sometimes passes None for top_k
-    if top_k is None:
-        top_k = 5
+    # Guard: model sometimes passes None for limit
+    if limit is None:
+        limit = 10

    parts: list[str] = []

    # 1. Search semantic vault (indexed markdown files)
-    vault_results = semantic_memory.search(query, top_k)
+    vault_results = semantic_memory.search(query, limit)
    for content, score in vault_results:
        if score < 0.2:
            continue
@@ -1235,7 +1235,7 @@ def memory_search(query: str, top_k: int = 5) -> str:

    # 2. Search runtime vector store (stored facts/conversations)
    try:
-        runtime_results = search_memories(query, limit=top_k, min_relevance=0.2)
+        runtime_results = search_memories(query, limit=limit, min_relevance=0.2)
        for entry in runtime_results:
            label = entry.context_type or "memory"
            parts.append(f"[{label}] {entry.content[:300]}")
@@ -1289,45 +1289,48 @@ def memory_read(query: str = "", top_k: int = 5) -> str:
    return "\n".join(parts)


-def memory_write(content: str, context_type: str = "fact") -> str:
-    """Store a piece of information in persistent memory.
+def memory_store(topic: str, report: str, type: str = "research") -> str:
+    """Store a piece of information in persistent memory, particularly for research outputs.

-    Use this tool when the user explicitly asks you to remember something.
-    Stored memories are searchable via memory_search across all channels
-    (web GUI, Discord, Telegram, etc.).
+    Use this tool to store structured research findings or other important documents.
+    Stored memories are searchable via memory_search across all channels.

    Args:
-        content: The information to remember (e.g. a phrase, fact, or note).
-        context_type: Type of memory — "fact" for permanent facts,
-                      "conversation" for conversation context,
-                      "document" for document fragments.
+        topic: A concise title or topic for the research output.
+        report: The detailed content of the research output or document.
+        type: Type of memory — "research" for research outputs (default),
+              "fact" for permanent facts, "conversation" for conversation context,
+              "document" for other document fragments.

    Returns:
        Confirmation that the memory was stored.
    """
-    if not content or not content.strip():
-        return "Nothing to store — content is empty."
+    if not report or not report.strip():
+        return "Nothing to store — report is empty."

-    valid_types = ("fact", "conversation", "document")
-    if context_type not in valid_types:
-        context_type = "fact"
+    # Combine topic and report for embedding and storage content
+    full_content = f"Topic: {topic.strip()}\n\nReport: {report.strip()}"
+
+    valid_types = ("fact", "conversation", "document", "research")
+    if type not in valid_types:
+        type = "research"

    try:
-        # Dedup check for facts — skip if a similar fact already exists
-        # Threshold 0.75 catches paraphrases (was 0.9 which only caught near-exact)
-        if context_type == "fact":
+        # Dedup check for facts and research — skip if similar exists
+        if type in ("fact", "research"):
            existing = search_memories(
-                content.strip(), limit=3, context_type="fact", min_relevance=0.75
+                full_content, limit=3, context_type=type, min_relevance=0.75
            )
            if existing:
-                return f"Similar fact already stored (id={existing[0].id[:8]}). Skipping duplicate."
+                return f"Similar {type} already stored (id={existing[0].id[:8]}). Skipping duplicate."

        entry = store_memory(
-            content=content.strip(),
+            content=full_content,
            source="agent",
-            context_type=context_type,
+            context_type=type,
+            metadata={"topic": topic},
        )
-        return f"Stored in memory (type={context_type}, id={entry.id[:8]}). This is now searchable across all channels."
+        return f"Stored in memory (type={type}, id={entry.id[:8]}). This is now searchable across all channels."
    except Exception as exc:
        logger.error("Failed to write memory: %s", exc)
        return f"Failed to store memory: {exc}"