#!/usr/bin/env python3 """Tests for Phase 2: Relevance Engine""" import pytest from datetime import datetime from pathlib import Path import sys sys.path.insert(0, str(Path(__file__).parent.parent)) from pipeline import RelevanceScorer, FeedItem class TestRelevanceScorer: """Test suite for relevance scoring.""" @pytest.fixture def scorer(self): return RelevanceScorer() @pytest.fixture def sample_items(self): return [ FeedItem( title="New RL algorithm for LLM agents", summary="We propose a reinforcement learning approach for training LLM agents...", url="http://example.com/1", source="arxiv", published=datetime.utcnow(), content_hash="abc123", raw={} ), FeedItem( title="Quantum computing advances", summary="Recent breakthroughs in quantum error correction...", url="http://example.com/2", source="arxiv", published=datetime.utcnow(), content_hash="def456", raw={} ), FeedItem( title="GRPO training for tool use", summary="Function calling improves with GRPO and chain-of-thought reasoning...", url="http://example.com/3", source="openai", published=datetime.utcnow(), content_hash="ghi789", raw={} ), ] def test_keyword_score_high_relevance(self, scorer): """High relevance item should score above 0.5.""" text = "LLM agent using reinforcement learning and GRPO for tool use" score = scorer.keyword_score(text) assert score > 0.5, f"Expected >0.5, got {score}" def test_keyword_score_low_relevance(self, scorer): """Low relevance item should score below 0.5.""" text = "Quantum computing error correction using surface codes" score = scorer.keyword_score(text) assert score < 0.5, f"Expected <0.5, got {score}" def test_ranking_order(self, scorer, sample_items): """Ranking should put high-relevance items first.""" ranked = scorer.rank(sample_items, top_n=10, min_score=0.1) assert len(ranked) > 0 # Highest relevance should be GRPO/tool use item assert "GRPO" in ranked[0][0].title or "RL" in ranked[0][0].title def test_min_score_filtering(self, scorer, sample_items): """Items below min_score should be filtered.""" ranked = scorer.rank(sample_items, top_n=10, min_score=1.0) # Should filter out low-relevance quantum item titles = [item.title for item, _ in ranked] assert "Quantum" not in titles or any("Quantum" in t for t in titles) if __name__ == "__main__": pytest.main([__file__, "-v"])