Improve test coverage from 63.6% to 73.4% and fix test infrastructure (#137)

2026-03-06 13:21:05 -05:00
parent 23f744f296
commit 3f06e7231d
17 changed files with 2312 additions and 16 deletions
--- a/tests/timmy/test_semantic_memory.py
+++ b/tests/timmy/test_semantic_memory.py
@@ -0,0 +1,247 @@
+"""Tests for timmy.semantic_memory — semantic search, chunking, indexing."""
+
+import pytest
+from pathlib import Path
+from unittest.mock import patch
+
+from timmy.semantic_memory import (
+    _simple_hash_embedding,
+    embed_text,
+    cosine_similarity,
+    SemanticMemory,
+    MemorySearcher,
+    MemoryChunk,
+    memory_search,
+    _get_embedding_model,
+)
+
+
+class TestSimpleHashEmbedding:
+    """Test the fallback hash-based embedding."""
+
+    def test_returns_list_of_floats(self):
+        vec = _simple_hash_embedding("hello world")
+        assert isinstance(vec, list)
+        assert len(vec) == 128
+        assert all(isinstance(x, float) for x in vec)
+
+    def test_deterministic(self):
+        a = _simple_hash_embedding("same text")
+        b = _simple_hash_embedding("same text")
+        assert a == b
+
+    def test_different_texts_differ(self):
+        a = _simple_hash_embedding("hello world")
+        b = _simple_hash_embedding("goodbye universe")
+        assert a != b
+
+    def test_normalized(self):
+        import math
+        vec = _simple_hash_embedding("test normalization")
+        magnitude = math.sqrt(sum(x * x for x in vec))
+        assert abs(magnitude - 1.0) < 0.01
+
+
+class TestEmbedText:
+    """Test embed_text with fallback."""
+
+    def test_returns_embedding(self):
+        # TIMMY_SKIP_EMBEDDINGS=1 in conftest, so uses fallback
+        vec = embed_text("test text")
+        assert isinstance(vec, list)
+        assert len(vec) > 0
+
+
+class TestCosineSimilarity:
+    """Test cosine_similarity function."""
+
+    def test_identical_vectors(self):
+        v = [1.0, 0.0, 0.0]
+        assert cosine_similarity(v, v) == pytest.approx(1.0)
+
+    def test_orthogonal_vectors(self):
+        a = [1.0, 0.0]
+        b = [0.0, 1.0]
+        assert cosine_similarity(a, b) == pytest.approx(0.0)
+
+    def test_opposite_vectors(self):
+        a = [1.0, 0.0]
+        b = [-1.0, 0.0]
+        assert cosine_similarity(a, b) == pytest.approx(-1.0)
+
+    def test_zero_vector(self):
+        a = [0.0, 0.0]
+        b = [1.0, 0.0]
+        assert cosine_similarity(a, b) == 0.0
+
+
+class TestSemanticMemory:
+    """Test SemanticMemory class."""
+
+    @pytest.fixture
+    def mem(self, tmp_path):
+        sm = SemanticMemory()
+        sm.db_path = tmp_path / "test_semantic.db"
+        sm.vault_path = tmp_path / "vault"
+        sm.vault_path.mkdir()
+        sm._init_db()
+        return sm
+
+    def test_init_creates_db(self, mem):
+        assert mem.db_path.exists()
+
+    def test_split_into_chunks_short(self, mem):
+        text = "Short paragraph."
+        chunks = mem._split_into_chunks(text)
+        assert len(chunks) == 1
+        assert chunks[0] == "Short paragraph."
+
+    def test_split_into_chunks_multiple_paragraphs(self, mem):
+        text = "First paragraph.\n\nSecond paragraph.\n\nThird paragraph."
+        chunks = mem._split_into_chunks(text)
+        assert len(chunks) == 3
+
+    def test_split_into_chunks_long_paragraph(self, mem):
+        text = ". ".join([f"Sentence {i}" for i in range(50)])
+        chunks = mem._split_into_chunks(text, max_chunk_size=100)
+        assert len(chunks) > 1
+
+    def test_split_empty_text(self, mem):
+        assert mem._split_into_chunks("") == []
+
+    def test_index_file(self, mem):
+        md_file = mem.vault_path / "test.md"
+        md_file.write_text("# Title\n\nThis is a test document with enough content to index properly.\n\nAnother paragraph with more content here.")
+        count = mem.index_file(md_file)
+        assert count > 0
+
+    def test_index_nonexistent_file(self, mem):
+        count = mem.index_file(Path("/nonexistent/file.md"))
+        assert count == 0
+
+    def test_index_file_skips_already_indexed(self, mem):
+        md_file = mem.vault_path / "cached.md"
+        md_file.write_text("# Cached\n\nContent that should only be indexed once if unchanged.")
+        count1 = mem.index_file(md_file)
+        count2 = mem.index_file(md_file)
+        assert count1 > 0
+        assert count2 == 0  # Already indexed, same hash
+
+    def test_index_vault(self, mem):
+        (mem.vault_path / "a.md").write_text("# File A\n\nContent of file A with some meaningful text here.")
+        (mem.vault_path / "b.md").write_text("# File B\n\nContent of file B with different meaningful text.")
+        total = mem.index_vault()
+        assert total >= 2
+
+    def test_index_vault_skips_handoff(self, mem):
+        """Verify handoff files are excluded from indexing."""
+        handoff = mem.vault_path / "last-session-handoff.md"
+        handoff.write_text("# Handoff\n\nThis should be skipped completely from indexing.")
+        real = mem.vault_path / "real.md"
+        real.write_text("# Real\n\nThis should be indexed with enough meaningful content.")
+
+        # index_file on the handoff file should NOT skip it
+        # (that's only index_vault logic), so test the vault logic directly
+        count = mem.index_file(handoff)
+        assert count > 0  # index_file indexes everything
+
+        # Wipe and re-test via index_vault
+        import sqlite3
+        conn = sqlite3.connect(str(mem.db_path))
+        conn.execute("DELETE FROM chunks")
+        conn.commit()
+        conn.close()
+
+        mem.index_vault()
+        conn = sqlite3.connect(str(mem.db_path))
+        rows = conn.execute("SELECT DISTINCT source FROM chunks").fetchall()
+        conn.close()
+        sources = [r[0] for r in rows]
+        # Only the real file should be indexed, not the handoff
+        assert any("real" in s for s in sources)
+        assert not any("last-session-handoff" in s for s in sources)
+
+    def test_search_returns_results(self, mem):
+        md = mem.vault_path / "searchable.md"
+        md.write_text("# Python\n\nPython is a programming language used for web development and data science.")
+        mem.index_file(md)
+
+        results = mem.search("programming language")
+        assert len(results) > 0
+        # Each result is (content, score)
+        assert isinstance(results[0], tuple)
+        assert len(results[0]) == 2
+
+    def test_search_empty_db(self, mem):
+        results = mem.search("anything")
+        assert results == []
+
+    def test_get_relevant_context(self, mem):
+        md = mem.vault_path / "context.md"
+        md.write_text("# Important\n\nThis is very important information about the system architecture.")
+        mem.index_file(md)
+
+        ctx = mem.get_relevant_context("architecture")
+        # May or may not match depending on hash-based similarity
+        assert isinstance(ctx, str)
+
+    def test_get_relevant_context_empty(self, mem):
+        assert mem.get_relevant_context("anything") == ""
+
+    def test_stats(self, mem):
+        stats = mem.stats()
+        assert "total_chunks" in stats
+        assert "total_files" in stats
+        assert stats["total_chunks"] == 0
+
+
+class TestMemorySearcher:
+    """Test MemorySearcher high-level interface."""
+
+    @pytest.fixture
+    def searcher(self, tmp_path):
+        ms = MemorySearcher()
+        ms.semantic.db_path = tmp_path / "searcher.db"
+        ms.semantic.vault_path = tmp_path / "vault"
+        ms.semantic.vault_path.mkdir()
+        ms.semantic._init_db()
+        return ms
+
+    def test_search_semantic_tier(self, searcher):
+        results = searcher.search("test query", tiers=["semantic"])
+        assert "semantic" in results
+
+    def test_search_defaults_to_semantic(self, searcher):
+        results = searcher.search("test")
+        assert "semantic" in results
+
+    def test_get_context_for_query_empty(self, searcher):
+        ctx = searcher.get_context_for_query("test")
+        assert ctx == ""  # Empty DB
+
+
+class TestMemorySearch:
+    """Test module-level memory_search function."""
+
+    def test_no_results(self):
+        result = memory_search("something obscure that won't match anything")
+        assert isinstance(result, str)
+
+    def test_none_top_k_handled(self):
+        result = memory_search("test", top_k=None)
+        assert isinstance(result, str)
+
+
+class TestMemoryChunk:
+    """Test MemoryChunk dataclass."""
+
+    def test_create(self):
+        chunk = MemoryChunk(
+            id="c1",
+            source="/path/to/file.md",
+            content="chunk text",
+            embedding=[0.1, 0.2],
+            created_at="2026-03-06",
+        )
+        assert chunk.id == "c1"
+        assert chunk.content == "chunk text"