diff --git a/intelligence/deepdive/tests/test_aggregator.py b/intelligence/deepdive/tests/test_aggregator.py new file mode 100644 index 0000000..e2f641d --- /dev/null +++ b/intelligence/deepdive/tests/test_aggregator.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +"""Tests for Phase 1: Source Aggregation""" + +import asyncio +import pytest +from datetime import datetime, timedelta +from pathlib import Path +import sys +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from pipeline import RSSAggregator, FeedItem + + +class TestRSSAggregator: + """Test suite for RSS aggregation.""" + + @pytest.fixture + def aggregator(self, tmp_path): + return RSSAggregator(cache_dir=tmp_path) + + @pytest.mark.asyncio + async def test_fetch_arxiv_cs_ai(self, aggregator): + """Test fetching real arXiv cs.AI feed.""" + items = await aggregator.fetch_feed( + url="http://export.arxiv.org/rss/cs.AI", + name="test_arxiv", + max_items=5 + ) + + assert len(items) > 0, "Should fetch items from arXiv" + assert all(isinstance(i, FeedItem) for i in items) + assert all(i.title for i in items) + assert all(i.url.startswith("http") for i in items) + print(f"Fetched {len(items)} items from arXiv cs.AI") + + @pytest.mark.asyncio + async def test_fetch_all_sources(self, aggregator): + """Test fetching from multiple sources.""" + sources = [ + {"name": "arxiv_ai", "url": "http://export.arxiv.org/rss/cs.AI", "max_items": 3}, + {"name": "arxiv_cl", "url": "http://export.arxiv.org/rss/cs.CL", "max_items": 3}, + ] + + since = datetime.utcnow() - timedelta(hours=48) + items = await aggregator.fetch_all(sources, since=since) + + assert len(items) > 0 + # Check deduplication + hashes = [i.content_hash for i in items] + assert len(hashes) == len(set(hashes)), "Should deduplicate items" + + def test_content_hash_consistency(self): + """Test that identical content produces identical hashes.""" + agg = RSSAggregator() + h1 = agg._compute_hash("Test content") + h2 = agg._compute_hash("Test content") + h3 = agg._compute_hash("Different content") + + assert h1 == h2 + assert h1 != h3 + + +if __name__ == "__main__": + pytest.main([__file__, "-v"])