the-nexus/scaffold/deepdive/phase1/config.yaml

# Deep Dive — Phase 1 Configuration
# Parent: the-nexus#830

# ArXiv categories to monitor
categories:
  - cs.AI   # Artificial Intelligence
  - cs.CL   # Computation and Language (NLP)
  - cs.LG   # Machine Learning

# Feed URLs (arXiv RSS format)
feed_template: "http://export.arxiv.org/rss/{category}"

# Rate limiting (seconds between requests)
rate_limit: 3

# Storage
output_dir: "data/deepdive/raw"

# Date format for output files
date_format: "%Y-%m-%d"

# User agent for requests
user_agent: "DeepDiveBot/0.1 (research aggregator; ezra@timmy.local)"

# Keywords for pre-filtering (Phase 2 does real scoring)
keywords:
  - "agent"
  - "llm"
  - "language model"
  - "reinforcement learning"
  - "rl"
  - "grpo"
  - "fine-tuning"
  - "training"
  - "inference"
  - "open source"
  - "local"
  - "gemma"
  - "llama"
  - "hermes"
  - "tool use"
  - "rag"
  - "embeddings"