Files
the-nexus/scaffold/deepdive/phase1/config.yaml

44 lines
879 B
YAML
Raw Normal View History

# Deep Dive — Phase 1 Configuration
# Parent: the-nexus#830
# ArXiv categories to monitor
categories:
- cs.AI # Artificial Intelligence
- cs.CL # Computation and Language (NLP)
- cs.LG # Machine Learning
# Feed URLs (arXiv RSS format)
feed_template: "http://export.arxiv.org/rss/{category}"
# Rate limiting (seconds between requests)
rate_limit: 3
# Storage
output_dir: "data/deepdive/raw"
# Date format for output files
date_format: "%Y-%m-%d"
# User agent for requests
user_agent: "DeepDiveBot/0.1 (research aggregator; ezra@timmy.local)"
# Keywords for pre-filtering (Phase 2 does real scoring)
keywords:
- "agent"
- "llm"
- "language model"
- "reinforcement learning"
- "rl"
- "grpo"
- "fine-tuning"
- "training"
- "inference"
- "open source"
- "local"
- "gemma"
- "llama"
- "hermes"
- "tool use"
- "rag"
- "embeddings"