Files
the-nexus/config/deepdive_sources.yaml
Ezra 4f3a163541
Some checks failed
Deploy Nexus / deploy (push) Has been cancelled
[ezra] Deep Dive source configuration template (#830)
2026-04-05 05:19:00 +00:00

116 lines
2.7 KiB
YAML

# Deep Dive Source Configuration
# Define RSS feeds, API endpoints, and scrapers for content aggregation
feeds:
# arXiv Categories
arxiv_ai:
name: "arXiv Artificial Intelligence"
url: "http://export.arxiv.org/rss/cs.AI"
type: rss
poll_interval_hours: 24
enabled: true
arxiv_cl:
name: "arXiv Computation and Language"
url: "http://export.arxiv.org/rss/cs.CL"
type: rss
poll_interval_hours: 24
enabled: true
arxiv_lg:
name: "arXiv Learning"
url: "http://export.arxiv.org/rss/cs.LG"
type: rss
poll_interval_hours: 24
enabled: true
arxiv_lm:
name: "arXiv Large Language Models"
url: "http://export.arxiv.org/rss/cs.LG"
type: rss
poll_interval_hours: 24
enabled: true
# AI Lab Blogs
openai_blog:
name: "OpenAI Blog"
url: "https://openai.com/blog/rss.xml"
type: rss
poll_interval_hours: 6
enabled: true
deepmind_news:
name: "Google DeepMind News"
url: "https://deepmind.google/news/rss.xml"
type: rss
poll_interval_hours: 12
enabled: true
google_research:
name: "Google Research Blog"
url: "https://research.google/blog/rss/"
type: rss
poll_interval_hours: 12
enabled: true
anthropic_news:
name: "Anthropic News"
url: "https://www.anthropic.com/news"
type: scraper # Custom scraper required
poll_interval_hours: 12
enabled: false # Enable when scraper implemented
selectors:
container: "article"
title: "h2, .title"
link: "a[href^='/news']"
date: "time"
summary: ".summary, p"
# Newsletters
importai:
name: "Import AI"
url: "https://importai.substack.com/feed"
type: rss
poll_interval_hours: 24
enabled: true
tldr_ai:
name: "TLDR AI"
url: "https://tldr.tech/ai/rss"
type: rss
poll_interval_hours: 24
enabled: true
the_batch:
name: "The Batch (DeepLearning.AI)"
url: "https://read.deeplearning.ai/the-batch/rss"
type: rss
poll_interval_hours: 24
enabled: false
# API Sources (for future expansion)
api_sources:
huggingface_papers:
name: "Hugging Face Daily Papers"
url: "https://huggingface.co/api/daily_papers"
type: api
enabled: false
auth_required: false
semanticscholar:
name: "Semantic Scholar"
url: "https://api.semanticscholar.org/graph/v1/"
type: api
enabled: false
auth_required: true
api_key_env: "SEMANTIC_SCHOLAR_API_KEY"
# Global settings
settings:
max_entries_per_source: 50
min_summary_length: 100
request_timeout_seconds: 30
user_agent: "DeepDive-Bot/1.0 (Research Aggregation)"
respect_robots_txt: true
rate_limit_delay_seconds: 2