feat: export conversation trajectories to ShareGPT JSONL for LoRA fine-tuning

Implements AutoLoRA Step 3 of 7: a script that reads Timmy's session logs and chat history, groups entries into conversation trajectories, and writes ShareGPT-compatible JSONL suitable for Hermes 4 LoRA fine-tuning. Sources (priority order): 1. logs/session_*.jsonl — rich logs with tool calls 2. data/chat.db — SQLite chat history fallback Usage: python scripts/export_trajectories.py [--output ~/timmy-training-data.jsonl] python scripts/export_trajectories.py --validate-only --output <file> python scripts/export_trajectories.py --min-examples 100 Fixes #1102 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 14:20:04 -04:00
7 changed files with 745 additions and 179 deletions
--- a/index_research_docs.py
+++ b/index_research_docs.py
@@ -1,33 +0,0 @@
-
-import os
-import sys
-from pathlib import Path
-
-# Add the src directory to the Python path
-sys.path.insert(0, str(Path(__file__).parent / "src"))
-
-from timmy.memory_system import memory_store
-
-def index_research_documents():
-    research_dir = Path("docs/research")
-    if not research_dir.is_dir():
-        print(f"Research directory not found: {research_dir}")
-        return
-
-    print(f"Indexing research documents from {research_dir}...")
-    indexed_count = 0
-    for file_path in research_dir.glob("*.md"):
-        try:
-            content = file_path.read_text()
-            topic = file_path.stem.replace("-", " ").title() # Derive topic from filename
-            print(f"Storing '{topic}' from {file_path.name}...")
-            # Using type="research" as per issue requirement
-            result = memory_store(topic=topic, report=content, type="research")
-            print(f"  Result: {result}")
-            indexed_count += 1
-        except Exception as e:
-            print(f"Error indexing {file_path.name}: {e}")
-    print(f"Finished indexing. Total documents indexed: {indexed_count}")
-
-if __name__ == "__main__":
-    index_research_documents()
--- a/scripts/export_trajectories.py
+++ b/scripts/export_trajectories.py
@@ -0,0 +1,358 @@
+#!/usr/bin/env python3
+"""Export Claude conversation trajectories to ShareGPT JSONL format for LoRA fine-tuning.
+
+Reads from two sources (in priority order):
+  1. logs/session_*.jsonl — rich logs with tool calls (preferred)
+  2. data/chat.db         — SQLite chat history (fallback)
+
+Output is a ShareGPT-compatible JSONL file where each line is one conversation:
+  {"conversations": [
+      {"from": "human",  "value": "..."},
+      {"from": "gpt",    "value": "...", "tool_calls": [...]},
+      {"from": "tool",   "value": "..."},
+      {"from": "gpt",    "value": "..."}
+  ]}
+
+Epic: #1091 Project Bannerlord — AutoLoRA Sovereignty Loop (Step 3 of 7)
+Refs: #1102
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sqlite3
+import sys
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Any
+
+# ── Constants ────────────────────────────────────────────────────────────────
+
+REPO_ROOT = Path(__file__).resolve().parents[1]
+DEFAULT_LOGS_DIR = REPO_ROOT / "logs"
+DEFAULT_DB_PATH = REPO_ROOT / "data" / "chat.db"
+DEFAULT_OUTPUT = Path.home() / "timmy-training-data.jsonl"
+
+# Time gap that signals a new conversation boundary
+CONVERSATION_GAP_MINUTES = 30
+
+# Role mappings → ShareGPT "from" values
+ROLE_MAP = {
+    "user": "human",
+    "timmy": "gpt",
+    "agent": "gpt",
+    "assistant": "gpt",
+    "system": "system",
+}
+
+
+# ── Session log reader ───────────────────────────────────────────────────────
+
+def _parse_ts(ts: str) -> datetime | None:
+    """Parse an ISO timestamp string, returning None on failure."""
+    try:
+        return datetime.fromisoformat(ts)
+    except (ValueError, TypeError):
+        return None
+
+
+def _group_into_conversations(
+    entries: list[dict],
+    gap_minutes: int = CONVERSATION_GAP_MINUTES,
+) -> list[list[dict]]:
+    """Split a flat list of session entries into conversation windows.
+
+    A new conversation starts whenever there is a gap ≥ *gap_minutes* between
+    consecutive entries, or when the type sequence restarts with a user message
+    after an agent reply.
+    """
+    if not entries:
+        return []
+
+    conversations: list[list[dict]] = []
+    current: list[dict] = []
+    last_ts: datetime | None = None
+
+    for entry in entries:
+        ts = _parse_ts(entry.get("timestamp", ""))
+
+        if last_ts is not None and ts is not None:
+            gap = ts - last_ts
+            if gap >= timedelta(minutes=gap_minutes):
+                if current:
+                    conversations.append(current)
+                current = []
+
+        current.append(entry)
+        if ts is not None:
+            last_ts = ts
+
+    if current:
+        conversations.append(current)
+
+    return conversations
+
+
+def _conversation_to_sharegpt(entries: list[dict]) -> dict[str, Any] | None:
+    """Convert a list of session entries into a ShareGPT conversation dict.
+
+    Returns None if the conversation has fewer than 2 turns (not useful for
+    training).
+    """
+    turns: list[dict[str, Any]] = []
+    pending_tool_calls: list[dict] = []
+
+    for entry in entries:
+        etype = entry.get("type")
+
+        if etype == "message":
+            role_raw = entry.get("role", "")
+            from_role = ROLE_MAP.get(role_raw, "gpt")
+            content = entry.get("content", "")
+
+            if not content:
+                continue
+
+            turn: dict[str, Any] = {"from": from_role, "value": content}
+
+            # Attach any accumulated tool calls to this gpt turn
+            if pending_tool_calls and from_role == "gpt":
+                turn["tool_calls"] = pending_tool_calls
+                pending_tool_calls = []
+
+            turns.append(turn)
+
+        elif etype == "tool_call":
+            tool_name = entry.get("tool", "unknown")
+            args = entry.get("args", {})
+            result = entry.get("result", "")
+
+            # Record call for the next gpt turn
+            pending_tool_calls.append({
+                "name": tool_name,
+                "arguments": args,
+            })
+
+            # Also emit a tool-result turn immediately after
+            turns.append({"from": "tool", "value": str(result), "tool": tool_name})
+
+    # Discard conversations with < 2 meaningful turns
+    meaningful = [t for t in turns if t["from"] in ("human", "gpt")]
+    if len(meaningful) < 2:
+        return None
+
+    return {"conversations": turns}
+
+
+def load_from_session_logs(logs_dir: Path) -> list[dict[str, Any]]:
+    """Load all session JSONL logs and return ShareGPT-formatted conversations."""
+    log_files = sorted(logs_dir.glob("session_*.jsonl"))
+    if not log_files:
+        return []
+
+    all_entries: list[dict] = []
+    for log_file in log_files:
+        try:
+            with open(log_file) as f:
+                for line in f:
+                    line = line.strip()
+                    if line:
+                        try:
+                            all_entries.append(json.loads(line))
+                        except json.JSONDecodeError:
+                            continue
+        except OSError:
+            continue
+
+    # Sort by timestamp for correct ordering across files
+    all_entries.sort(key=lambda e: e.get("timestamp", ""))
+
+    conversation_groups = _group_into_conversations(all_entries)
+    results: list[dict[str, Any]] = []
+    for group in conversation_groups:
+        conv = _conversation_to_sharegpt(group)
+        if conv is not None:
+            results.append(conv)
+
+    return results
+
+
+# ── SQLite fallback reader ───────────────────────────────────────────────────
+
+def load_from_sqlite(db_path: Path) -> list[dict[str, Any]]:
+    """Read chat.db and return ShareGPT-formatted conversations."""
+    if not db_path.exists():
+        return []
+
+    try:
+        conn = sqlite3.connect(str(db_path))
+        conn.row_factory = sqlite3.Row
+        rows = conn.execute(
+            "SELECT role, content, timestamp FROM chat_messages ORDER BY id"
+        ).fetchall()
+        conn.close()
+    except sqlite3.Error:
+        return []
+
+    entries = [
+        {
+            "type": "message",
+            "role": row["role"],
+            "content": row["content"],
+            "timestamp": row["timestamp"],
+        }
+        for row in rows
+    ]
+
+    conversation_groups = _group_into_conversations(entries)
+    results: list[dict[str, Any]] = []
+    for group in conversation_groups:
+        conv = _conversation_to_sharegpt(group)
+        if conv is not None:
+            results.append(conv)
+
+    return results
+
+
+# ── Validation ───────────────────────────────────────────────────────────────
+
+def validate_output(output_path: Path) -> dict[str, Any]:
+    """Validate the exported JSONL and return stats."""
+    if not output_path.exists():
+        return {"error": "Output file not found"}
+
+    total = 0
+    with_tools = 0
+    turn_counts: list[int] = []
+
+    with open(output_path) as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                obj = json.loads(line)
+            except json.JSONDecodeError:
+                continue
+
+            total += 1
+            turns = obj.get("conversations", [])
+            turn_counts.append(len(turns))
+
+            has_tool = any(
+                t.get("from") == "tool" or t.get("tool_calls")
+                for t in turns
+            )
+            if has_tool:
+                with_tools += 1
+
+    avg_turns = sum(turn_counts) / len(turn_counts) if turn_counts else 0
+
+    return {
+        "total_conversations": total,
+        "with_tool_calls": with_tools,
+        "avg_turns_per_conversation": round(avg_turns, 1),
+        "output_path": str(output_path),
+    }
+
+
+# ── Main ─────────────────────────────────────────────────────────────────────
+
+def build_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(
+        description="Export Timmy conversation trajectories to ShareGPT JSONL",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    p.add_argument(
+        "--logs-dir",
+        type=Path,
+        default=DEFAULT_LOGS_DIR,
+        help="Directory containing session_*.jsonl files",
+    )
+    p.add_argument(
+        "--db",
+        type=Path,
+        default=DEFAULT_DB_PATH,
+        help="Path to chat.db (used if no session logs found)",
+    )
+    p.add_argument(
+        "--output",
+        type=Path,
+        default=DEFAULT_OUTPUT,
+        help="Output JSONL file path",
+    )
+    p.add_argument(
+        "--gap-minutes",
+        type=int,
+        default=CONVERSATION_GAP_MINUTES,
+        help="Time gap (minutes) between entries that marks a new conversation",
+    )
+    p.add_argument(
+        "--validate-only",
+        action="store_true",
+        help="Skip export; just validate an existing output file",
+    )
+    p.add_argument(
+        "--min-examples",
+        type=int,
+        default=0,
+        help="Exit non-zero if fewer than this many examples are exported",
+    )
+    return p
+
+
+def main(argv: list[str] | None = None) -> int:
+    args = build_parser().parse_args(argv)
+
+    if args.validate_only:
+        stats = validate_output(args.output)
+        print(json.dumps(stats, indent=2))
+        return 0
+
+    # ── Load conversations ───────────────────────────────────────────────────
+    print(f"[1/3] Loading from session logs: {args.logs_dir}")
+    conversations = load_from_session_logs(args.logs_dir)
+
+    if not conversations:
+        print(f"[1/3] No session logs found — falling back to SQLite: {args.db}")
+        conversations = load_from_sqlite(args.db)
+
+    if not conversations:
+        print(
+            "WARNING: No conversation data found.\n"
+            "  • Run the dashboard and have some conversations first.\n"
+            "  • Session logs are written to logs/session_YYYY-MM-DD.jsonl\n"
+            "  • Chat history is stored in data/chat.db",
+            file=sys.stderr,
+        )
+        # Still write empty file so downstream steps don't error on missing file
+        args.output.parent.mkdir(parents=True, exist_ok=True)
+        args.output.write_text("")
+        return 0
+
+    # ── Write output ─────────────────────────────────────────────────────────
+    print(f"[2/3] Writing {len(conversations)} conversations → {args.output}")
+    args.output.parent.mkdir(parents=True, exist_ok=True)
+    with open(args.output, "w") as f:
+        for conv in conversations:
+            f.write(json.dumps(conv) + "\n")
+
+    # ── Validate ─────────────────────────────────────────────────────────────
+    print("[3/3] Validating output…")
+    stats = validate_output(args.output)
+    print(json.dumps(stats, indent=2))
+
+    if args.min_examples and stats.get("total_conversations", 0) < args.min_examples:
+        print(
+            f"ERROR: Only {stats['total_conversations']} examples exported "
+            f"(need ≥ {args.min_examples})",
+            file=sys.stderr,
+        )
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
--- a/src/config.py
+++ b/src/config.py
@@ -217,10 +217,6 @@ class Settings(BaseSettings):
    # ── Test / Diagnostics ─────────────────────────────────────────────
    # Skip loading heavy embedding models (for tests / low-memory envs).
    timmy_skip_embeddings: bool = False
-    # Embedding backend: "ollama" for Ollama, "local" for sentence-transformers.
-    timmy_embedding_backend: Literal["ollama", "local"] = "ollama"
-    # Ollama model to use for embeddings (e.g., "nomic-embed-text").
-    ollama_embedding_model: str = "nomic-embed-text"
    # Disable CSRF middleware entirely (for tests).
    timmy_disable_csrf: bool = False
    # Mark the process as running in test mode.
--- a/src/timmy/memory/embeddings.py
+++ b/src/timmy/memory/embeddings.py
@@ -9,81 +9,35 @@ Also includes vector similarity utilities (cosine similarity, keyword overlap).
 import hashlib
 import logging
 import math
-import json
-import httpx # Import httpx for Ollama API calls
-
-from config import settings

 logger = logging.getLogger(__name__)

 # Embedding model - small, fast, local
 EMBEDDING_MODEL = None
-EMBEDDING_DIM = 384  # MiniLM dimension, will be overridden if Ollama model has different dim
+EMBEDDING_DIM = 384  # MiniLM dimension

-class OllamaEmbedder:
-    """Mimics SentenceTransformer interface for Ollama."""
-    def __init__(self, model_name: str, ollama_url: str):
-        self.model_name = model_name
-        self.ollama_url = ollama_url
-        self.dimension = 0 # Will be updated after first call
-
-    def encode(self, sentences: str | list[str], convert_to_numpy: bool = False, normalize_embeddings: bool = True) -> list[list[float]] | list[float]:
-        """Generate embeddings using Ollama."""
-        if isinstance(sentences, str):
-            sentences = [sentences]
-
-        all_embeddings = []
-        for sentence in sentences:
-            try:
-                response = httpx.post(
-                    f"{self.ollama_url}/api/embeddings",
-                    json={"model": self.model_name, "prompt": sentence},
-                    timeout=settings.mcp_bridge_timeout,
-                )
-                response.raise_for_status()
-                embedding = response.json()["embedding"]
-                if not self.dimension:
-                    self.dimension = len(embedding) # Set dimension on first successful call
-                    global EMBEDDING_DIM
-                    EMBEDDING_DIM = self.dimension # Update global EMBEDDING_DIM
-                all_embeddings.append(embedding)
-            except httpx.RequestError as exc:
-                logger.error("Ollama embeddings request failed: %s", exc)
-                # Fallback to simple hash embedding on Ollama error
-                return _simple_hash_embedding(sentence)
-            except json.JSONDecodeError as exc:
-                logger.error("Failed to decode Ollama embeddings response: %s", exc)
-                return _simple_hash_embedding(sentence)
-
-        if len(all_embeddings) == 1 and isinstance(sentences, str):
-            return all_embeddings[0]
-        return all_embeddings

 def _get_embedding_model():
-    """Lazy-load embedding model, preferring Ollama if configured."""
+    """Lazy-load embedding model."""
    global EMBEDDING_MODEL
-    global EMBEDDING_DIM
    if EMBEDDING_MODEL is None:
-        if settings.timmy_skip_embeddings:
-            EMBEDDING_MODEL = False
-            return EMBEDDING_MODEL
+        try:
+            from config import settings

-        if settings.timmy_embedding_backend == "ollama":
-            logger.info("MemorySystem: Using Ollama for embeddings with model %s", settings.ollama_embedding_model)
-            EMBEDDING_MODEL = OllamaEmbedder(settings.ollama_embedding_model, settings.normalized_ollama_url)
-            # We don't know the dimension until after the first call, so keep it default for now.
-            # It will be updated dynamically in OllamaEmbedder.encode
-            return EMBEDDING_MODEL
-        else:
-            try:
-                from sentence_transformers import SentenceTransformer
+            if settings.timmy_skip_embeddings:
+                EMBEDDING_MODEL = False
+                return EMBEDDING_MODEL
+        except ImportError:
+            pass

-                EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
-                EMBEDDING_DIM = 384 # Reset to MiniLM dimension
-                logger.info("MemorySystem: Loaded local embedding model (all-MiniLM-L6-v2)")
-            except ImportError:
-                logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
-                EMBEDDING_MODEL = False  # Use fallback
+        try:
+            from sentence_transformers import SentenceTransformer
+
+            EMBEDDING_MODEL = SentenceTransformer("all-MiniLM-L6-v2")
+            logger.info("MemorySystem: Loaded embedding model")
+        except ImportError:
+            logger.warning("MemorySystem: sentence-transformers not installed, using fallback")
+            EMBEDDING_MODEL = False  # Use fallback
    return EMBEDDING_MODEL


@@ -106,14 +60,10 @@ def embed_text(text: str) -> list[float]:
    model = _get_embedding_model()
    if model and model is not False:
        embedding = model.encode(text)
-        # Ensure it's a list of floats, not numpy array
-        if hasattr(embedding, 'tolist'):
-            return embedding.tolist()
-        return embedding
+        return embedding.tolist()
    return _simple_hash_embedding(text)


-
 def cosine_similarity(a: list[float], b: list[float]) -> float:
    """Calculate cosine similarity between two vectors."""
    dot = sum(x * y for x, y in zip(a, b, strict=False))
--- a/src/timmy/memory_system.py
+++ b/src/timmy/memory_system.py
@@ -1206,7 +1206,7 @@ memory_searcher = MemorySearcher()
 # ───────────────────────────────────────────────────────────────────────────────


-def memory_search(query: str, limit: int = 10) -> str:
+def memory_search(query: str, top_k: int = 5) -> str:
    """Search past conversations, notes, and stored facts for relevant context.

    Searches across both the vault (indexed markdown files) and the
@@ -1215,19 +1215,19 @@ def memory_search(query: str, limit: int = 10) -> str:

    Args:
        query: What to search for (e.g. "Bitcoin strategy", "server setup").
-        limit: Number of results to return (default 10).
+        top_k: Number of results to return (default 5).

    Returns:
        Formatted string of relevant memory results.
    """
-    # Guard: model sometimes passes None for limit
-    if limit is None:
-        limit = 10
+    # Guard: model sometimes passes None for top_k
+    if top_k is None:
+        top_k = 5

    parts: list[str] = []

    # 1. Search semantic vault (indexed markdown files)
-    vault_results = semantic_memory.search(query, limit)
+    vault_results = semantic_memory.search(query, top_k)
    for content, score in vault_results:
        if score < 0.2:
            continue
@@ -1235,7 +1235,7 @@ def memory_search(query: str, limit: int = 10) -> str:

    # 2. Search runtime vector store (stored facts/conversations)
    try:
-        runtime_results = search_memories(query, limit=limit, min_relevance=0.2)
+        runtime_results = search_memories(query, limit=top_k, min_relevance=0.2)
        for entry in runtime_results:
            label = entry.context_type or "memory"
            parts.append(f"[{label}] {entry.content[:300]}")
@@ -1289,48 +1289,45 @@ def memory_read(query: str = "", top_k: int = 5) -> str:
    return "\n".join(parts)


-def memory_store(topic: str, report: str, type: str = "research") -> str:
-    """Store a piece of information in persistent memory, particularly for research outputs.
+def memory_write(content: str, context_type: str = "fact") -> str:
+    """Store a piece of information in persistent memory.

-    Use this tool to store structured research findings or other important documents.
-    Stored memories are searchable via memory_search across all channels.
+    Use this tool when the user explicitly asks you to remember something.
+    Stored memories are searchable via memory_search across all channels
+    (web GUI, Discord, Telegram, etc.).

    Args:
-        topic: A concise title or topic for the research output.
-        report: The detailed content of the research output or document.
-        type: Type of memory — "research" for research outputs (default),
-              "fact" for permanent facts, "conversation" for conversation context,
-              "document" for other document fragments.
+        content: The information to remember (e.g. a phrase, fact, or note).
+        context_type: Type of memory — "fact" for permanent facts,
+                      "conversation" for conversation context,
+                      "document" for document fragments.

    Returns:
        Confirmation that the memory was stored.
    """
-    if not report or not report.strip():
-        return "Nothing to store — report is empty."
+    if not content or not content.strip():
+        return "Nothing to store — content is empty."

-    # Combine topic and report for embedding and storage content
-    full_content = f"Topic: {topic.strip()}\n\nReport: {report.strip()}"
-
-    valid_types = ("fact", "conversation", "document", "research")
-    if type not in valid_types:
-        type = "research"
+    valid_types = ("fact", "conversation", "document")
+    if context_type not in valid_types:
+        context_type = "fact"

    try:
-        # Dedup check for facts and research — skip if similar exists
-        if type in ("fact", "research"):
+        # Dedup check for facts — skip if a similar fact already exists
+        # Threshold 0.75 catches paraphrases (was 0.9 which only caught near-exact)
+        if context_type == "fact":
            existing = search_memories(
-                full_content, limit=3, context_type=type, min_relevance=0.75
+                content.strip(), limit=3, context_type="fact", min_relevance=0.75
            )
            if existing:
-                return f"Similar {type} already stored (id={existing[0].id[:8]}). Skipping duplicate."
+                return f"Similar fact already stored (id={existing[0].id[:8]}). Skipping duplicate."

        entry = store_memory(
-            content=full_content,
+            content=content.strip(),
            source="agent",
-            context_type=type,
-            metadata={"topic": topic},
+            context_type=context_type,
        )
-        return f"Stored in memory (type={type}, id={entry.id[:8]}). This is now searchable across all channels."
+        return f"Stored in memory (type={context_type}, id={entry.id[:8]}). This is now searchable across all channels."
    except Exception as exc:
        logger.error("Failed to write memory: %s", exc)
        return f"Failed to store memory: {exc}"
--- a/tests/scripts/test_export_trajectories.py
+++ b/tests/scripts/test_export_trajectories.py
@@ -0,0 +1,306 @@
+"""Unit tests for scripts/export_trajectories.py."""
+
+from __future__ import annotations
+
+import json
+import sqlite3
+from datetime import datetime, timedelta
+from pathlib import Path
+
+import pytest
+
+import scripts.export_trajectories as et
+
+
+# ── Helpers ──────────────────────────────────────────────────────────────────
+
+def _ts(base: datetime, offset_minutes: int = 0) -> str:
+    return (base + timedelta(minutes=offset_minutes)).isoformat()
+
+
+BASE = datetime(2026, 3, 1, 10, 0, 0)
+
+
+def _make_session_entries(base: datetime = BASE) -> list[dict]:
+    """Minimal session log entries: user → tool_call → timmy reply."""
+    return [
+        {"type": "message", "role": "user",  "content": "list my files",      "timestamp": _ts(base, 0)},
+        {"type": "tool_call", "tool": "shell", "args": {"cmd": "ls"}, "result": "a.py\nb.py", "timestamp": _ts(base, 1)},
+        {"type": "message", "role": "timmy", "content": "You have two files.", "timestamp": _ts(base, 2)},
+    ]
+
+
+# ── _group_into_conversations ─────────────────────────────────────────────────
+
+class TestGroupIntoConversations:
+    def test_empty(self):
+        assert et._group_into_conversations([]) == []
+
+    def test_single_group_no_gap(self):
+        entries = _make_session_entries()
+        groups = et._group_into_conversations(entries, gap_minutes=30)
+        assert len(groups) == 1
+        assert groups[0] == entries
+
+    def test_split_on_large_gap(self):
+        entries_a = _make_session_entries(BASE)
+        # Second set starts 60 minutes later
+        entries_b = _make_session_entries(BASE + timedelta(hours=1))
+        groups = et._group_into_conversations(entries_a + entries_b, gap_minutes=30)
+        assert len(groups) == 2
+        assert len(groups[0]) == 3
+        assert len(groups[1]) == 3
+
+    def test_no_split_within_gap(self):
+        entries = _make_session_entries()
+        groups = et._group_into_conversations(entries, gap_minutes=60)
+        assert len(groups) == 1
+
+    def test_entries_without_timestamp(self):
+        entries = [
+            {"type": "message", "role": "user", "content": "hello"},
+            {"type": "message", "role": "timmy", "content": "hi"},
+        ]
+        groups = et._group_into_conversations(entries, gap_minutes=30)
+        assert len(groups) == 1
+
+
+# ── _conversation_to_sharegpt ─────────────────────────────────────────────────
+
+class TestConversationToSharegpt:
+    def test_basic_exchange(self):
+        entries = _make_session_entries()
+        result = et._conversation_to_sharegpt(entries)
+        assert result is not None
+        turns = result["conversations"]
+
+        human_turns = [t for t in turns if t["from"] == "human"]
+        gpt_turns   = [t for t in turns if t["from"] == "gpt"]
+        tool_turns  = [t for t in turns if t["from"] == "tool"]
+
+        assert len(human_turns) == 1
+        assert len(gpt_turns) == 1
+        assert len(tool_turns) == 1
+
+    def test_tool_calls_attached_to_gpt_turn(self):
+        entries = [
+            {"type": "message",   "role": "user",  "content": "run ls",        "timestamp": _ts(BASE, 0)},
+            {"type": "tool_call", "tool": "shell", "args": {}, "result": "ok", "timestamp": _ts(BASE, 1)},
+            {"type": "message",   "role": "timmy", "content": "done",          "timestamp": _ts(BASE, 2)},
+        ]
+        result = et._conversation_to_sharegpt(entries)
+        assert result is not None
+        gpt_turns = [t for t in result["conversations"] if t["from"] == "gpt"]
+        assert len(gpt_turns) == 1
+        assert "tool_calls" in gpt_turns[0]
+        assert gpt_turns[0]["tool_calls"][0]["name"] == "shell"
+
+    def test_too_short_returns_none(self):
+        # Only one meaningful turn → not useful for training
+        entries = [{"type": "message", "role": "user", "content": "hi", "timestamp": _ts(BASE)}]
+        assert et._conversation_to_sharegpt(entries) is None
+
+    def test_empty_content_skipped(self):
+        entries = [
+            {"type": "message", "role": "user",  "content": "",     "timestamp": _ts(BASE, 0)},
+            {"type": "message", "role": "timmy", "content": "pong", "timestamp": _ts(BASE, 1)},
+        ]
+        # Only one non-empty turn → should return None
+        assert et._conversation_to_sharegpt(entries) is None
+
+    def test_role_mapping(self):
+        entries = [
+            {"type": "message", "role": "user",      "content": "q", "timestamp": _ts(BASE, 0)},
+            {"type": "message", "role": "assistant",  "content": "a", "timestamp": _ts(BASE, 1)},
+        ]
+        result = et._conversation_to_sharegpt(entries)
+        assert result is not None
+        roles = [t["from"] for t in result["conversations"]]
+        assert "human" in roles
+        assert "gpt" in roles
+
+    def test_decision_entries_ignored(self):
+        """Non-message, non-tool entries (decisions, errors) should be skipped."""
+        entries = _make_session_entries() + [
+            {"type": "decision", "decision": "do something", "timestamp": _ts(BASE, 10)},
+        ]
+        result = et._conversation_to_sharegpt(entries)
+        assert result is not None
+        assert all(t["from"] != "decision" for t in result["conversations"])
+
+
+# ── load_from_session_logs ────────────────────────────────────────────────────
+
+class TestLoadFromSessionLogs:
+    def test_empty_directory(self, tmp_path):
+        assert et.load_from_session_logs(tmp_path) == []
+
+    def test_missing_directory(self, tmp_path):
+        assert et.load_from_session_logs(tmp_path / "nonexistent") == []
+
+    def test_reads_single_log(self, tmp_path):
+        entries = _make_session_entries()
+        log = tmp_path / "session_2026-03-01.jsonl"
+        log.write_text("\n".join(json.dumps(e) for e in entries) + "\n")
+
+        result = et.load_from_session_logs(tmp_path)
+        assert len(result) == 1
+        assert result[0]["conversations"][0]["from"] == "human"
+
+    def test_reads_multiple_logs(self, tmp_path):
+        for day in range(3):
+            entries = _make_session_entries(BASE + timedelta(days=day, hours=2 * day))
+            log = tmp_path / f"session_2026-03-0{day + 1}.jsonl"
+            log.write_text("\n".join(json.dumps(e) for e in entries) + "\n")
+
+        result = et.load_from_session_logs(tmp_path)
+        # 3 log files, each a separate conversation (days apart)
+        assert len(result) == 3
+
+    def test_skips_malformed_lines(self, tmp_path):
+        log = tmp_path / "session_2026-03-01.jsonl"
+        entries = _make_session_entries()
+        lines = [json.dumps(e) for e in entries]
+        lines.insert(1, "not valid json{{{")
+        log.write_text("\n".join(lines) + "\n")
+
+        # Should still parse valid entries
+        result = et.load_from_session_logs(tmp_path)
+        assert len(result) == 1
+
+
+# ── load_from_sqlite ──────────────────────────────────────────────────────────
+
+class TestLoadFromSqlite:
+    def _make_db(self, tmp_path: Path, rows: list[tuple]) -> Path:
+        db = tmp_path / "chat.db"
+        conn = sqlite3.connect(str(db))
+        conn.execute("""
+            CREATE TABLE IF NOT EXISTS chat_messages (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                role TEXT, content TEXT, timestamp TEXT, source TEXT
+            )
+        """)
+        conn.executemany(
+            "INSERT INTO chat_messages (role, content, timestamp, source) VALUES (?,?,?,?)",
+            rows,
+        )
+        conn.commit()
+        conn.close()
+        return db
+
+    def test_missing_db(self, tmp_path):
+        assert et.load_from_sqlite(tmp_path / "missing.db") == []
+
+    def test_reads_conversation(self, tmp_path):
+        rows = [
+            ("user",  "hello",       _ts(BASE, 0), "browser"),
+            ("agent", "hi there",    _ts(BASE, 5), "browser"),
+        ]
+        db = self._make_db(tmp_path, rows)
+        result = et.load_from_sqlite(db)
+        assert len(result) == 1
+        turns = result[0]["conversations"]
+        assert turns[0]["from"] == "human"
+        assert turns[1]["from"] == "gpt"
+
+    def test_splits_on_gap(self, tmp_path):
+        rows = [
+            ("user",  "a", _ts(BASE, 0),   "browser"),
+            ("agent", "b", _ts(BASE, 5),   "browser"),
+            ("user",  "c", _ts(BASE, 120), "browser"),  # 2h gap
+            ("agent", "d", _ts(BASE, 125), "browser"),
+        ]
+        db = self._make_db(tmp_path, rows)
+        result = et.load_from_sqlite(db)
+        assert len(result) == 2
+
+
+# ── validate_output ───────────────────────────────────────────────────────────
+
+class TestValidateOutput:
+    def test_missing_file(self, tmp_path):
+        stats = et.validate_output(tmp_path / "missing.jsonl")
+        assert "error" in stats
+
+    def test_counts_conversations(self, tmp_path):
+        out = tmp_path / "out.jsonl"
+        convs = [
+            {"conversations": [{"from": "human", "value": "hi"}, {"from": "gpt", "value": "ho"}]},
+            {"conversations": [{"from": "human", "value": "a"},  {"from": "gpt", "value": "b"}]},
+        ]
+        out.write_text("\n".join(json.dumps(c) for c in convs) + "\n")
+        stats = et.validate_output(out)
+        assert stats["total_conversations"] == 2
+        assert stats["with_tool_calls"] == 0
+
+    def test_counts_tool_calls(self, tmp_path):
+        out = tmp_path / "out.jsonl"
+        conv = {"conversations": [
+            {"from": "human", "value": "run"},
+            {"from": "gpt", "value": "ok", "tool_calls": [{"name": "shell", "arguments": {}}]},
+            {"from": "tool", "value": "done", "tool": "shell"},
+        ]}
+        out.write_text(json.dumps(conv) + "\n")
+        stats = et.validate_output(out)
+        assert stats["with_tool_calls"] == 1
+
+
+# ── CLI (main) ────────────────────────────────────────────────────────────────
+
+class TestMain:
+    def test_no_data_exits_0(self, tmp_path):
+        out = tmp_path / "out.jsonl"
+        code = et.main([
+            "--logs-dir", str(tmp_path / "logs"),
+            "--db", str(tmp_path / "missing.db"),
+            "--output", str(out),
+        ])
+        assert code == 0
+        assert out.exists()
+
+    def test_exports_from_logs(self, tmp_path):
+        logs = tmp_path / "logs"
+        logs.mkdir()
+        entries = _make_session_entries()
+        (logs / "session_2026-03-01.jsonl").write_text(
+            "\n".join(json.dumps(e) for e in entries) + "\n"
+        )
+        out = tmp_path / "out.jsonl"
+        code = et.main([
+            "--logs-dir", str(logs),
+            "--db", str(tmp_path / "missing.db"),
+            "--output", str(out),
+        ])
+        assert code == 0
+        lines = [l for l in out.read_text().splitlines() if l.strip()]
+        assert len(lines) == 1
+
+    def test_validate_only(self, tmp_path, capsys):
+        out = tmp_path / "out.jsonl"
+        conv = {"conversations": [
+            {"from": "human", "value": "x"},
+            {"from": "gpt",   "value": "y"},
+        ]}
+        out.write_text(json.dumps(conv) + "\n")
+        code = et.main(["--validate-only", "--output", str(out)])
+        assert code == 0
+        captured = capsys.readouterr()
+        stats = json.loads(captured.out)
+        assert stats["total_conversations"] == 1
+
+    def test_min_examples_fails(self, tmp_path):
+        logs = tmp_path / "logs"
+        logs.mkdir()
+        entries = _make_session_entries()
+        (logs / "session_2026-03-01.jsonl").write_text(
+            "\n".join(json.dumps(e) for e in entries) + "\n"
+        )
+        out = tmp_path / "out.jsonl"
+        code = et.main([
+            "--logs-dir", str(logs),
+            "--db", str(tmp_path / "missing.db"),
+            "--output", str(out),
+            "--min-examples", "100",
+        ])
+        assert code == 1
--- a/tests/timmy/test_semantic_memory.py
+++ b/tests/timmy/test_semantic_memory.py
@@ -16,7 +16,7 @@ from timmy.memory_system import (
    memory_forget,
    memory_read,
    memory_search,
-    memory_store,
+    memory_write,
 )


@@ -490,7 +490,7 @@ class TestMemorySearch:
        assert isinstance(result, str)

    def test_none_top_k_handled(self):
-        result = memory_search("test", limit=None)
+        result = memory_search("test", top_k=None)
        assert isinstance(result, str)

    def test_basic_search_returns_string(self):
@@ -521,12 +521,12 @@ class TestMemoryRead:
        assert isinstance(result, str)


-class TestMemoryStore:
-    """Test module-level memory_store function."""
+class TestMemoryWrite:
+    """Test module-level memory_write function."""

    @pytest.fixture(autouse=True)
    def mock_vector_store(self):
-        """Mock vector_store functions for memory_store tests."""
+        """Mock vector_store functions for memory_write tests."""
        # Patch where it's imported from, not where it's used
        with (
            patch("timmy.memory_system.search_memories") as mock_search,
@@ -542,83 +542,75 @@ class TestMemoryStore:

            yield {"search": mock_search, "store": mock_store}

-    def test_memory_store_empty_report(self):
-        """Test that empty report returns error message."""
-        result = memory_store(topic="test", report="")
+    def test_memory_write_empty_content(self):
+        """Test that empty content returns error message."""
+        result = memory_write("")
        assert "empty" in result.lower()

-    def test_memory_store_whitespace_only(self):
-        """Test that whitespace-only report returns error."""
-        result = memory_store(topic="test", report="   \n\t   ")
+    def test_memory_write_whitespace_only(self):
+        """Test that whitespace-only content returns error."""
+        result = memory_write("   \n\t   ")
        assert "empty" in result.lower()

-    def test_memory_store_valid_content(self, mock_vector_store):
+    def test_memory_write_valid_content(self, mock_vector_store):
        """Test writing valid content."""
-        result = memory_store(topic="fact about Timmy", report="Remember this important fact.")
+        result = memory_write("Remember this important fact.")
        assert "stored" in result.lower() or "memory" in result.lower()
        mock_vector_store["store"].assert_called_once()

-    def test_memory_store_dedup_for_facts_or_research(self, mock_vector_store):
-        """Test that duplicate facts or research are skipped."""
+    def test_memory_write_dedup_for_facts(self, mock_vector_store):
+        """Test that duplicate facts are skipped."""
        # Simulate existing similar fact
        mock_entry = MagicMock()
        mock_entry.id = "existing-id"
        mock_vector_store["search"].return_value = [mock_entry]

-        # Test with 'fact'
-        result = memory_store(topic="Similar fact", report="Similar fact text", type="fact")
+        result = memory_write("Similar fact text", context_type="fact")
        assert "similar" in result.lower() or "duplicate" in result.lower()
        mock_vector_store["store"].assert_not_called()

-        mock_vector_store["store"].reset_mock()
-        # Test with 'research'
-        result = memory_store(topic="Similar research", report="Similar research content", type="research")
-        assert "similar" in result.lower() or "duplicate" in result.lower()
-        mock_vector_store["store"].assert_not_called()
-
-    def test_memory_store_no_dedup_for_conversation(self, mock_vector_store):
+    def test_memory_write_no_dedup_for_conversation(self, mock_vector_store):
        """Test that conversation entries are not deduplicated."""
        # Even with existing entries, conversations should be stored
        mock_entry = MagicMock()
        mock_entry.id = "existing-id"
        mock_vector_store["search"].return_value = [mock_entry]

-        memory_store(topic="Conversation", report="Conversation text", type="conversation")
+        memory_write("Conversation text", context_type="conversation")
        # Should still store (no duplicate check for non-fact)
        mock_vector_store["store"].assert_called_once()

-    def test_memory_store_invalid_type_defaults_to_research(self, mock_vector_store):
-        """Test that invalid type defaults to 'research'."""
-        memory_store(topic="Invalid type test", report="Some content", type="invalid_type")
-        # Should still succeed, using "research" as default
+    def test_memory_write_invalid_context_type(self, mock_vector_store):
+        """Test that invalid context_type defaults to 'fact'."""
+        memory_write("Some content", context_type="invalid_type")
+        # Should still succeed, using "fact" as default
        mock_vector_store["store"].assert_called_once()
        call_kwargs = mock_vector_store["store"].call_args.kwargs
-        assert call_kwargs.get("context_type") == "research"
+        assert call_kwargs.get("context_type") == "fact"

-    def test_memory_store_valid_types(self, mock_vector_store):
+    def test_memory_write_valid_context_types(self, mock_vector_store):
        """Test all valid context types."""
-        valid_types = ["fact", "conversation", "document", "research"]
+        valid_types = ["fact", "conversation", "document"]
        for ctx_type in valid_types:
            mock_vector_store["store"].reset_mock()
-            memory_store(topic=f"Topic for {ctx_type}", report=f"Content for {ctx_type}", type=ctx_type)
+            memory_write(f"Content for {ctx_type}", context_type=ctx_type)
            mock_vector_store["store"].assert_called_once()

-    def test_memory_store_strips_report_and_adds_topic(self, mock_vector_store):
-        """Test that report is stripped of leading/trailing whitespace and combined with topic."""
-        memory_store(topic="  My Topic  ", report="  padded content  ")
+    def test_memory_write_strips_content(self, mock_vector_store):
+        """Test that content is stripped of leading/trailing whitespace."""
+        memory_write("  padded content  ")
        call_kwargs = mock_vector_store["store"].call_args.kwargs
-        assert call_kwargs.get("content") == "Topic: My Topic\n\nReport: padded content"
-        assert call_kwargs.get("metadata") == {"topic": "  My Topic  "}
+        assert call_kwargs.get("content") == "padded content"

-    def test_memory_store_unicode_report(self, mock_vector_store):
+    def test_memory_write_unicode_content(self, mock_vector_store):
        """Test writing unicode content."""
-        result = memory_store(topic="Unicode", report="Unicode content: 你好世界 🎉")
+        result = memory_write("Unicode content: 你好世界 🎉")
        assert "stored" in result.lower() or "memory" in result.lower()

-    def test_memory_store_handles_exception(self, mock_vector_store):
+    def test_memory_write_handles_exception(self, mock_vector_store):
        """Test handling of store_memory exceptions."""
        mock_vector_store["store"].side_effect = Exception("DB error")
-        result = memory_store(topic="Failing", report="This will fail")
+        result = memory_write("This will fail")
        assert "failed" in result.lower() or "error" in result.lower()