# Deep Dive — Phase 1 Configuration # Parent: the-nexus#830 # ArXiv categories to monitor categories: - cs.AI # Artificial Intelligence - cs.CL # Computation and Language (NLP) - cs.LG # Machine Learning # Feed URLs (arXiv RSS format) feed_template: "http://export.arxiv.org/rss/{category}" # Rate limiting (seconds between requests) rate_limit: 3 # Storage output_dir: "data/deepdive/raw" # Date format for output files date_format: "%Y-%m-%d" # User agent for requests user_agent: "DeepDiveBot/0.1 (research aggregator; ezra@timmy.local)" # Keywords for pre-filtering (Phase 2 does real scoring) keywords: - "agent" - "llm" - "language model" - "reinforcement learning" - "rl" - "grpo" - "fine-tuning" - "training" - "inference" - "open source" - "local" - "gemma" - "llama" - "hermes" - "tool use" - "rag" - "embeddings"