diff --git a/config/deepdive_keywords.yaml b/config/deepdive_keywords.yaml new file mode 100644 index 0000000..4d42497 --- /dev/null +++ b/config/deepdive_keywords.yaml @@ -0,0 +1,149 @@ +# Deep Dive Relevance Keywords +# Define keywords and their weights for scoring entries + +# Weight tiers: High (3.0x), Medium (1.5x), Low (0.5x) +weights: + high: 3.0 + medium: 1.5 + low: 0.5 + +# High-priority keywords (critical to Hermes/Timmy work) +high: + # Framework specific + - hermes + - timmy + - timmy foundation + - langchain + - langgraph + - crewai + - autogen + - autogpt + - babyagi + + # Agent concepts + - llm agent + - llm agents + - agent framework + - agent frameworks + - multi-agent + - multi agent + - agent orchestration + - agentic + - agentic workflow + - agent system + + # Tool use + - tool use + - tool calling + - function calling + - mcp + - model context protocol + - toolformer + - gorilla + + # Reasoning + - chain-of-thought + - chain of thought + - reasoning + - planning + - reflection + - self-reflection + + # RL and training + - reinforcement learning + - RLHF + - DPO + - GRPO + - PPO + - preference optimization + - alignment + + # Fine tuning + - fine-tuning + - finetuning + - instruction tuning + - supervised fine-tuning + - sft + - peft + - lora + + # Safety + - ai safety + - constitutional ai + - red teaming + - adversarial + +# Medium-priority keywords (relevant to AI work) +medium: + # Core concepts + - llm + - large language model + - foundation model + - transformer + - attention mechanism + - prompting + - prompt engineering + - few-shot + - zero-shot + - in-context learning + + # Architecture + - mixture of experts + - MoE + - retrieval augmented generation + - RAG + - vector database + - embeddings + - semantic search + + # Inference + - inference optimization + - quantization + - model distillation + - knowledge distillation + - KV cache + - speculative decoding + - vLLM + + # Open research + - open source + - open weight + - llama + - mistral + - qwen + - deepseek + + # Companies + - openai + - anthropic + - claude + - gpt + - gemini + - deepmind + - google ai + +# Low-priority keywords (general AI) +low: + - artificial intelligence + - machine learning + - deep learning + - neural network + - natural language processing + - NLP + - computer vision + +# Source-specific bonuses (points added based on source) +source_bonuses: + arxiv_ai: 0.5 + arxiv_cl: 0.5 + arxiv_lg: 0.5 + openai_blog: 0.3 + anthropic_news: 0.4 + deepmind_news: 0.3 + +# Filter settings +filter: + min_relevance_score: 2.0 + max_entries_per_briefing: 15 + embedding_model: "all-MiniLM-L6-v2" + use_embeddings: true