[BURN] #830: Phase 1 tests (arXiv RSS aggregation)
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
This commit is contained in:
64
intelligence/deepdive/tests/test_aggregator.py
Normal file
64
intelligence/deepdive/tests/test_aggregator.py
Normal file
@@ -0,0 +1,64 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Tests for Phase 1: Source Aggregation"""
|
||||
|
||||
import asyncio
|
||||
import pytest
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
import sys
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from pipeline import RSSAggregator, FeedItem
|
||||
|
||||
|
||||
class TestRSSAggregator:
|
||||
"""Test suite for RSS aggregation."""
|
||||
|
||||
@pytest.fixture
|
||||
def aggregator(self, tmp_path):
|
||||
return RSSAggregator(cache_dir=tmp_path)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_arxiv_cs_ai(self, aggregator):
|
||||
"""Test fetching real arXiv cs.AI feed."""
|
||||
items = await aggregator.fetch_feed(
|
||||
url="http://export.arxiv.org/rss/cs.AI",
|
||||
name="test_arxiv",
|
||||
max_items=5
|
||||
)
|
||||
|
||||
assert len(items) > 0, "Should fetch items from arXiv"
|
||||
assert all(isinstance(i, FeedItem) for i in items)
|
||||
assert all(i.title for i in items)
|
||||
assert all(i.url.startswith("http") for i in items)
|
||||
print(f"Fetched {len(items)} items from arXiv cs.AI")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_fetch_all_sources(self, aggregator):
|
||||
"""Test fetching from multiple sources."""
|
||||
sources = [
|
||||
{"name": "arxiv_ai", "url": "http://export.arxiv.org/rss/cs.AI", "max_items": 3},
|
||||
{"name": "arxiv_cl", "url": "http://export.arxiv.org/rss/cs.CL", "max_items": 3},
|
||||
]
|
||||
|
||||
since = datetime.utcnow() - timedelta(hours=48)
|
||||
items = await aggregator.fetch_all(sources, since=since)
|
||||
|
||||
assert len(items) > 0
|
||||
# Check deduplication
|
||||
hashes = [i.content_hash for i in items]
|
||||
assert len(hashes) == len(set(hashes)), "Should deduplicate items"
|
||||
|
||||
def test_content_hash_consistency(self):
|
||||
"""Test that identical content produces identical hashes."""
|
||||
agg = RSSAggregator()
|
||||
h1 = agg._compute_hash("Test content")
|
||||
h2 = agg._compute_hash("Test content")
|
||||
h3 = agg._compute_hash("Different content")
|
||||
|
||||
assert h1 == h2
|
||||
assert h1 != h3
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pytest.main([__file__, "-v"])
|
||||
Reference in New Issue
Block a user