fix: add unit tests for memory/embeddings.py
Some checks failed
Tests / lint (pull_request) Successful in 3s
Tests / test (pull_request) Failing after 1m7s

Tests cover _simple_hash_embedding, embed_text, cosine_similarity,
_keyword_overlap, and _get_embedding_model with full branch coverage.

Fixes #431

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
kimi
2026-03-19 10:49:44 -04:00
parent aeadca1a9b
commit ae2285f241

View File

@@ -0,0 +1,180 @@
"""Unit tests for timmy.memory.embeddings module."""
import math
from unittest.mock import MagicMock, patch
import pytest
from timmy.memory.embeddings import (
_keyword_overlap,
_simple_hash_embedding,
cosine_similarity,
embed_text,
)
# ---------------------------------------------------------------------------
# _simple_hash_embedding
# ---------------------------------------------------------------------------
class TestSimpleHashEmbedding:
def test_returns_128_dim_vector(self):
vec = _simple_hash_embedding("hello world")
assert len(vec) == 128
def test_deterministic(self):
assert _simple_hash_embedding("test") == _simple_hash_embedding("test")
def test_normalized(self):
vec = _simple_hash_embedding("the quick brown fox")
mag = math.sqrt(sum(x * x for x in vec))
assert mag == pytest.approx(1.0, abs=1e-6)
def test_different_texts_differ(self):
a = _simple_hash_embedding("hello")
b = _simple_hash_embedding("goodbye")
assert a != b
def test_empty_string(self):
vec = _simple_hash_embedding("")
assert len(vec) == 128
# All zeros → magnitude 0 → division by 1.0 → still all zeros
assert all(x == 0.0 for x in vec)
def test_unicode(self):
vec = _simple_hash_embedding("café résumé naïve")
assert len(vec) == 128
mag = math.sqrt(sum(x * x for x in vec))
assert mag == pytest.approx(1.0, abs=1e-6)
def test_long_text_truncates_to_50_words(self):
words = [f"word{i}" for i in range(100)]
vec_100 = _simple_hash_embedding(" ".join(words))
vec_50 = _simple_hash_embedding(" ".join(words[:50]))
assert vec_100 == vec_50
# ---------------------------------------------------------------------------
# embed_text
# ---------------------------------------------------------------------------
class TestEmbedText:
@patch("timmy.memory.embeddings._get_embedding_model")
def test_uses_fallback_when_model_false(self, mock_get):
mock_get.return_value = False
vec = embed_text("hello")
assert len(vec) == 128 # hash fallback dimension
@patch("timmy.memory.embeddings._get_embedding_model")
def test_uses_model_when_available(self, mock_get):
import numpy as np
fake_model = MagicMock()
fake_model.encode.return_value = np.array([0.1, 0.2, 0.3])
mock_get.return_value = fake_model
result = embed_text("test")
fake_model.encode.assert_called_once_with("test")
assert result == pytest.approx([0.1, 0.2, 0.3])
@patch("timmy.memory.embeddings._get_embedding_model")
def test_uses_fallback_when_model_none(self, mock_get):
mock_get.return_value = None
vec = embed_text("hello")
assert len(vec) == 128
# ---------------------------------------------------------------------------
# cosine_similarity
# ---------------------------------------------------------------------------
class TestCosineSimilarity:
def test_identical_vectors(self):
v = [1.0, 2.0, 3.0]
assert cosine_similarity(v, v) == pytest.approx(1.0)
def test_orthogonal_vectors(self):
a = [1.0, 0.0]
b = [0.0, 1.0]
assert cosine_similarity(a, b) == pytest.approx(0.0)
def test_opposite_vectors(self):
a = [1.0, 0.0]
b = [-1.0, 0.0]
assert cosine_similarity(a, b) == pytest.approx(-1.0)
def test_zero_vector_returns_zero(self):
assert cosine_similarity([0.0, 0.0], [1.0, 2.0]) == 0.0
assert cosine_similarity([1.0, 2.0], [0.0, 0.0]) == 0.0
assert cosine_similarity([0.0, 0.0], [0.0, 0.0]) == 0.0
def test_different_length_vectors(self):
# zip(strict=False) truncates to shorter
a = [1.0, 0.0, 0.0]
b = [1.0, 0.0]
assert cosine_similarity(a, b) == pytest.approx(1.0)
# ---------------------------------------------------------------------------
# _keyword_overlap
# ---------------------------------------------------------------------------
class TestKeywordOverlap:
def test_full_overlap(self):
assert _keyword_overlap("hello world", "hello world") == pytest.approx(1.0)
def test_partial_overlap(self):
assert _keyword_overlap("hello world", "hello there") == pytest.approx(0.5)
def test_no_overlap(self):
assert _keyword_overlap("hello", "world") == pytest.approx(0.0)
def test_empty_query(self):
assert _keyword_overlap("", "some content") == 0.0
def test_empty_content(self):
assert _keyword_overlap("hello", "") == 0.0
def test_case_insensitive(self):
assert _keyword_overlap("Hello World", "hello world") == pytest.approx(1.0)
def test_superset_content(self):
assert _keyword_overlap("cat", "the cat sat on the mat") == pytest.approx(1.0)
# ---------------------------------------------------------------------------
# _get_embedding_model
# ---------------------------------------------------------------------------
class TestGetEmbeddingModel:
def test_skip_embeddings_setting(self):
"""When timmy_skip_embeddings is True, model should be False."""
import timmy.memory.embeddings as mod
original = mod.EMBEDDING_MODEL
try:
mod.EMBEDDING_MODEL = None # reset lazy cache
with patch("timmy.memory.embeddings.settings", create=True) as mock_settings:
mock_settings.timmy_skip_embeddings = True
# Patch the import inside _get_embedding_model
with patch.dict("sys.modules", {"config": MagicMock(settings=mock_settings)}):
result = mod._get_embedding_model()
assert result is False
finally:
mod.EMBEDDING_MODEL = original
def test_caches_model(self):
"""Subsequent calls return the cached value."""
import timmy.memory.embeddings as mod
original = mod.EMBEDDING_MODEL
try:
mod.EMBEDDING_MODEL = "cached_sentinel"
result = mod._get_embedding_model()
assert result == "cached_sentinel"
finally:
mod.EMBEDDING_MODEL = original