65 lines
2.1 KiB
Python
65 lines
2.1 KiB
Python
#!/usr/bin/env python3
|
|
"""Tests for Phase 1: Source Aggregation"""
|
|
|
|
import asyncio
|
|
import pytest
|
|
from datetime import datetime, timedelta
|
|
from pathlib import Path
|
|
import sys
|
|
sys.path.insert(0, str(Path(__file__).parent.parent))
|
|
|
|
from pipeline import RSSAggregator, FeedItem
|
|
|
|
|
|
class TestRSSAggregator:
|
|
"""Test suite for RSS aggregation."""
|
|
|
|
@pytest.fixture
|
|
def aggregator(self, tmp_path):
|
|
return RSSAggregator(cache_dir=tmp_path)
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_arxiv_cs_ai(self, aggregator):
|
|
"""Test fetching real arXiv cs.AI feed."""
|
|
items = await aggregator.fetch_feed(
|
|
url="http://export.arxiv.org/rss/cs.AI",
|
|
name="test_arxiv",
|
|
max_items=5
|
|
)
|
|
|
|
assert len(items) > 0, "Should fetch items from arXiv"
|
|
assert all(isinstance(i, FeedItem) for i in items)
|
|
assert all(i.title for i in items)
|
|
assert all(i.url.startswith("http") for i in items)
|
|
print(f"Fetched {len(items)} items from arXiv cs.AI")
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_fetch_all_sources(self, aggregator):
|
|
"""Test fetching from multiple sources."""
|
|
sources = [
|
|
{"name": "arxiv_ai", "url": "http://export.arxiv.org/rss/cs.AI", "max_items": 3},
|
|
{"name": "arxiv_cl", "url": "http://export.arxiv.org/rss/cs.CL", "max_items": 3},
|
|
]
|
|
|
|
since = datetime.utcnow() - timedelta(hours=48)
|
|
items = await aggregator.fetch_all(sources, since=since)
|
|
|
|
assert len(items) > 0
|
|
# Check deduplication
|
|
hashes = [i.content_hash for i in items]
|
|
assert len(hashes) == len(set(hashes)), "Should deduplicate items"
|
|
|
|
def test_content_hash_consistency(self):
|
|
"""Test that identical content produces identical hashes."""
|
|
agg = RSSAggregator()
|
|
h1 = agg._compute_hash("Test content")
|
|
h2 = agg._compute_hash("Test content")
|
|
h3 = agg._compute_hash("Different content")
|
|
|
|
assert h1 == h2
|
|
assert h1 != h3
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pytest.main([__file__, "-v"])
|