44 lines
879 B
YAML
44 lines
879 B
YAML
# Deep Dive — Phase 1 Configuration
|
|
# Parent: the-nexus#830
|
|
|
|
# ArXiv categories to monitor
|
|
categories:
|
|
- cs.AI # Artificial Intelligence
|
|
- cs.CL # Computation and Language (NLP)
|
|
- cs.LG # Machine Learning
|
|
|
|
# Feed URLs (arXiv RSS format)
|
|
feed_template: "http://export.arxiv.org/rss/{category}"
|
|
|
|
# Rate limiting (seconds between requests)
|
|
rate_limit: 3
|
|
|
|
# Storage
|
|
output_dir: "data/deepdive/raw"
|
|
|
|
# Date format for output files
|
|
date_format: "%Y-%m-%d"
|
|
|
|
# User agent for requests
|
|
user_agent: "DeepDiveBot/0.1 (research aggregator; ezra@timmy.local)"
|
|
|
|
# Keywords for pre-filtering (Phase 2 does real scoring)
|
|
keywords:
|
|
- "agent"
|
|
- "llm"
|
|
- "language model"
|
|
- "reinforcement learning"
|
|
- "rl"
|
|
- "grpo"
|
|
- "fine-tuning"
|
|
- "training"
|
|
- "inference"
|
|
- "open source"
|
|
- "local"
|
|
- "gemma"
|
|
- "llama"
|
|
- "hermes"
|
|
- "tool use"
|
|
- "rag"
|
|
- "embeddings"
|