[ezra] Deep Dive keywords configuration (#830)

2026-04-05 05:19:01 +00:00
parent 4f3a163541
commit e83892d282
1 changed files with 149 additions and 0 deletions
--- a/config/deepdive_keywords.yaml
+++ b/config/deepdive_keywords.yaml
@@ -0,0 +1,149 @@
+# Deep Dive Relevance Keywords
+# Define keywords and their weights for scoring entries
+
+# Weight tiers: High (3.0x), Medium (1.5x), Low (0.5x)
+weights:
+  high: 3.0
+  medium: 1.5
+  low: 0.5
+
+# High-priority keywords (critical to Hermes/Timmy work)
+high:
+  # Framework specific
+  - hermes
+  - timmy
+  - timmy foundation
+  - langchain
+  - langgraph
+  - crewai
+  - autogen
+  - autogpt
+  - babyagi
+  
+  # Agent concepts
+  - llm agent
+  - llm agents
+  - agent framework
+  - agent frameworks
+  - multi-agent
+  - multi agent
+  - agent orchestration
+  - agentic
+  - agentic workflow
+  - agent system
+  
+  # Tool use
+  - tool use
+  - tool calling
+  - function calling
+  - mcp
+  - model context protocol
+  - toolformer
+  - gorilla
+  
+  # Reasoning
+  - chain-of-thought
+  - chain of thought
+  - reasoning
+  - planning
+  - reflection
+  - self-reflection
+  
+  # RL and training
+  - reinforcement learning
+  - RLHF
+  - DPO
+  - GRPO
+  - PPO
+  - preference optimization
+  - alignment
+  
+  # Fine tuning
+  - fine-tuning
+  - finetuning
+  - instruction tuning
+  - supervised fine-tuning
+  - sft
+  - peft
+  - lora
+  
+  # Safety
+  - ai safety
+  - constitutional ai
+  - red teaming
+  - adversarial
+
+# Medium-priority keywords (relevant to AI work)
+medium:
+  # Core concepts
+  - llm
+  - large language model
+  - foundation model
+  - transformer
+  - attention mechanism
+  - prompting
+  - prompt engineering
+  - few-shot
+  - zero-shot
+  - in-context learning
+  
+  # Architecture
+  - mixture of experts
+  - MoE
+  - retrieval augmented generation
+  - RAG
+  - vector database
+  - embeddings
+  - semantic search
+  
+  # Inference
+  - inference optimization
+  - quantization
+  - model distillation
+  - knowledge distillation
+  - KV cache
+  - speculative decoding
+  - vLLM
+  
+  # Open research
+  - open source
+  - open weight
+  - llama
+  - mistral
+  - qwen
+  - deepseek
+  
+  # Companies
+  - openai
+  - anthropic
+  - claude
+  - gpt
+  - gemini
+  - deepmind
+  - google ai
+
+# Low-priority keywords (general AI)
+low:
+  - artificial intelligence
+  - machine learning
+  - deep learning
+  - neural network
+  - natural language processing
+  - NLP
+  - computer vision
+
+# Source-specific bonuses (points added based on source)
+source_bonuses:
+  arxiv_ai: 0.5
+  arxiv_cl: 0.5
+  arxiv_lg: 0.5
+  openai_blog: 0.3
+  anthropic_news: 0.4
+  deepmind_news: 0.3
+
+# Filter settings
+filter:
+  min_relevance_score: 2.0
+  max_entries_per_briefing: 15
+  embedding_model: "all-MiniLM-L6-v2"
+  use_embeddings: true