#!/usr/bin/env python3
"""Tests for Phase 1: Source Aggregation"""

import asyncio
import pytest
from datetime import datetime, timedelta
from pathlib import Path
import sys
sys.path.insert(0, str(Path(__file__).parent.parent))

from pipeline import RSSAggregator, FeedItem


class TestRSSAggregator:
    """Test suite for RSS aggregation."""
    
    @pytest.fixture
    def aggregator(self, tmp_path):
        return RSSAggregator(cache_dir=tmp_path)
    
    @pytest.mark.asyncio
    async def test_fetch_arxiv_cs_ai(self, aggregator):
        """Test fetching real arXiv cs.AI feed."""
        items = await aggregator.fetch_feed(
            url="http://export.arxiv.org/rss/cs.AI",
            name="test_arxiv",
            max_items=5
        )
        
        assert len(items) > 0, "Should fetch items from arXiv"
        assert all(isinstance(i, FeedItem) for i in items)
        assert all(i.title for i in items)
        assert all(i.url.startswith("http") for i in items)
        print(f"Fetched {len(items)} items from arXiv cs.AI")
    
    @pytest.mark.asyncio
    async def test_fetch_all_sources(self, aggregator):
        """Test fetching from multiple sources."""
        sources = [
            {"name": "arxiv_ai", "url": "http://export.arxiv.org/rss/cs.AI", "max_items": 3},
            {"name": "arxiv_cl", "url": "http://export.arxiv.org/rss/cs.CL", "max_items": 3},
        ]
        
        since = datetime.utcnow() - timedelta(hours=48)
        items = await aggregator.fetch_all(sources, since=since)
        
        assert len(items) > 0
        # Check deduplication
        hashes = [i.content_hash for i in items]
        assert len(hashes) == len(set(hashes)), "Should deduplicate items"
    
    def test_content_hash_consistency(self):
        """Test that identical content produces identical hashes."""
        agg = RSSAggregator()
        h1 = agg._compute_hash("Test content")
        h2 = agg._compute_hash("Test content")
        h3 = agg._compute_hash("Different content")
        
        assert h1 == h2
        assert h1 != h3


if __name__ == "__main__":
    pytest.main([__file__, "-v"])