[scaffold] Deep Dive intelligence pipeline: intelligence/deepdive/config.yaml

2026-04-05 06:19:50 +00:00
parent a8b4f7a8c0
commit cca5909cf9
1 changed files with 99 additions and 0 deletions
--- a/intelligence/deepdive/config.yaml
+++ b/intelligence/deepdive/config.yaml
@@ -0,0 +1,99 @@
+# Deep Dive Configuration
+# Copy to config.yaml and customize
+
+deepdive:
+  # Schedule
+  schedule:
+    daily_time: "06:00"
+    timezone: "America/New_York"
+    
+  # Phase 1: Aggregation
+  sources:
+    - name: "arxiv_cs_ai"
+      url: "http://export.arxiv.org/rss/cs.AI"
+      type: "rss"
+      fetch_window_hours: 24
+      max_items: 50
+      
+    - name: "arxiv_cs_cl"
+      url: "http://export.arxiv.org/rss/cs.CL"
+      type: "rss"
+      fetch_window_hours: 24
+      max_items: 50
+      
+    - name: "arxiv_cs_lg"
+      url: "http://export.arxiv.org/rss/cs.LG"
+      type: "rss"
+      fetch_window_hours: 24
+      max_items: 50
+      
+    - name: "openai_blog"
+      url: "https://openai.com/blog/rss.xml"
+      type: "rss"
+      fetch_window_hours: 48
+      max_items: 5
+      
+    - name: "anthropic_blog"
+      url: "https://www.anthropic.com/blog/rss.xml"
+      type: "rss"
+      fetch_window_hours: 48
+      max_items: 5
+      
+    - name: "deepmind_blog"
+      url: "https://deepmind.google/blog/rss.xml"
+      type: "rss"
+      fetch_window_hours: 48
+      max_items: 5
+
+  # Phase 2: Relevance
+  relevance:
+    model: "all-MiniLM-L6-v2"  # ~80MB embeddings model
+    top_n: 10  # Items selected for briefing
+    min_score: 0.25  # Hard cutoff
+    keywords:
+      - "LLM agent"
+      - "agent architecture"
+      - "tool use"
+      - "function calling"
+      - "chain of thought"
+      - "reasoning"
+      - "reinforcement learning"
+      - "RLHF"
+      - "GRPO"
+      - "PPO"
+      - "fine-tuning"
+      - "transformer"
+      - "attention mechanism"
+      - "inference optimization"
+      - "quantization"
+      - "local LLM"
+      - "llama.cpp"
+      - "ollama"
+      - "vLLM"
+      - "Hermes"
+      - "open source AI"
+
+  # Phase 3: Synthesis
+  synthesis:
+    llm_endpoint: "http://localhost:4000/v1"  # Local llama-server
+    llm_model: "gemma-4-it"
+    max_summary_length: 800
+    temperature: 0.7
+
+  # Phase 4: Audio
+  tts:
+    engine: "piper"
+    model_path: "~/.local/share/piper/models"
+    voice: "en_US-amy-medium"
+    speed: 1.0
+    output_format: "mp3"  # piper outputs WAV, convert for Telegram
+
+  # Phase 5: Delivery
+  delivery:
+    method: "telegram"
+    bot_token: "${TELEGRAM_BOT_TOKEN}"  # From env
+    channel_id: "-1003664764329"
+    send_text_summary: true
+    
+  output_dir: "~/briefings"
+  log_level: "INFO"