diff --git a/intelligence/deepdive/config.yaml b/intelligence/deepdive/config.yaml new file mode 100644 index 0000000..7c1df0e --- /dev/null +++ b/intelligence/deepdive/config.yaml @@ -0,0 +1,99 @@ +# Deep Dive Configuration +# Copy to config.yaml and customize + +deepdive: + # Schedule + schedule: + daily_time: "06:00" + timezone: "America/New_York" + + # Phase 1: Aggregation + sources: + - name: "arxiv_cs_ai" + url: "http://export.arxiv.org/rss/cs.AI" + type: "rss" + fetch_window_hours: 24 + max_items: 50 + + - name: "arxiv_cs_cl" + url: "http://export.arxiv.org/rss/cs.CL" + type: "rss" + fetch_window_hours: 24 + max_items: 50 + + - name: "arxiv_cs_lg" + url: "http://export.arxiv.org/rss/cs.LG" + type: "rss" + fetch_window_hours: 24 + max_items: 50 + + - name: "openai_blog" + url: "https://openai.com/blog/rss.xml" + type: "rss" + fetch_window_hours: 48 + max_items: 5 + + - name: "anthropic_blog" + url: "https://www.anthropic.com/blog/rss.xml" + type: "rss" + fetch_window_hours: 48 + max_items: 5 + + - name: "deepmind_blog" + url: "https://deepmind.google/blog/rss.xml" + type: "rss" + fetch_window_hours: 48 + max_items: 5 + + # Phase 2: Relevance + relevance: + model: "all-MiniLM-L6-v2" # ~80MB embeddings model + top_n: 10 # Items selected for briefing + min_score: 0.25 # Hard cutoff + keywords: + - "LLM agent" + - "agent architecture" + - "tool use" + - "function calling" + - "chain of thought" + - "reasoning" + - "reinforcement learning" + - "RLHF" + - "GRPO" + - "PPO" + - "fine-tuning" + - "transformer" + - "attention mechanism" + - "inference optimization" + - "quantization" + - "local LLM" + - "llama.cpp" + - "ollama" + - "vLLM" + - "Hermes" + - "open source AI" + + # Phase 3: Synthesis + synthesis: + llm_endpoint: "http://localhost:4000/v1" # Local llama-server + llm_model: "gemma-4-it" + max_summary_length: 800 + temperature: 0.7 + + # Phase 4: Audio + tts: + engine: "piper" + model_path: "~/.local/share/piper/models" + voice: "en_US-amy-medium" + speed: 1.0 + output_format: "mp3" # piper outputs WAV, convert for Telegram + + # Phase 5: Delivery + delivery: + method: "telegram" + bot_token: "${TELEGRAM_BOT_TOKEN}" # From env + channel_id: "-1003664764329" + send_text_summary: true + + output_dir: "~/briefings" + log_level: "INFO"