# Deep Dive Source Configuration # Define RSS feeds, API endpoints, and scrapers for content aggregation feeds: # arXiv Categories arxiv_ai: name: "arXiv Artificial Intelligence" url: "http://export.arxiv.org/rss/cs.AI" type: rss poll_interval_hours: 24 enabled: true arxiv_cl: name: "arXiv Computation and Language" url: "http://export.arxiv.org/rss/cs.CL" type: rss poll_interval_hours: 24 enabled: true arxiv_lg: name: "arXiv Learning" url: "http://export.arxiv.org/rss/cs.LG" type: rss poll_interval_hours: 24 enabled: true arxiv_lm: name: "arXiv Large Language Models" url: "http://export.arxiv.org/rss/cs.LG" type: rss poll_interval_hours: 24 enabled: true # AI Lab Blogs openai_blog: name: "OpenAI Blog" url: "https://openai.com/blog/rss.xml" type: rss poll_interval_hours: 6 enabled: true deepmind_news: name: "Google DeepMind News" url: "https://deepmind.google/news/rss.xml" type: rss poll_interval_hours: 12 enabled: true google_research: name: "Google Research Blog" url: "https://research.google/blog/rss/" type: rss poll_interval_hours: 12 enabled: true anthropic_news: name: "Anthropic News" url: "https://www.anthropic.com/news" type: scraper # Custom scraper required poll_interval_hours: 12 enabled: false # Enable when scraper implemented selectors: container: "article" title: "h2, .title" link: "a[href^='/news']" date: "time" summary: ".summary, p" # Newsletters importai: name: "Import AI" url: "https://importai.substack.com/feed" type: rss poll_interval_hours: 24 enabled: true tldr_ai: name: "TLDR AI" url: "https://tldr.tech/ai/rss" type: rss poll_interval_hours: 24 enabled: true the_batch: name: "The Batch (DeepLearning.AI)" url: "https://read.deeplearning.ai/the-batch/rss" type: rss poll_interval_hours: 24 enabled: false # API Sources (for future expansion) api_sources: huggingface_papers: name: "Hugging Face Daily Papers" url: "https://huggingface.co/api/daily_papers" type: api enabled: false auth_required: false semanticscholar: name: "Semantic Scholar" url: "https://api.semanticscholar.org/graph/v1/" type: api enabled: false auth_required: true api_key_env: "SEMANTIC_SCHOLAR_API_KEY" # Global settings settings: max_entries_per_source: 50 min_summary_length: 100 request_timeout_seconds: 30 user_agent: "DeepDive-Bot/1.0 (Research Aggregation)" respect_robots_txt: true rate_limit_delay_seconds: 2