2026-04-05 06:19:50 +00:00
|
|
|
# Deep Dive Configuration
|
|
|
|
|
# Copy to config.yaml and customize
|
|
|
|
|
|
|
|
|
|
deepdive:
|
|
|
|
|
# Schedule
|
|
|
|
|
schedule:
|
|
|
|
|
daily_time: "06:00"
|
|
|
|
|
timezone: "America/New_York"
|
|
|
|
|
|
|
|
|
|
# Phase 1: Aggregation
|
|
|
|
|
sources:
|
|
|
|
|
- name: "arxiv_cs_ai"
|
|
|
|
|
url: "http://export.arxiv.org/rss/cs.AI"
|
|
|
|
|
type: "rss"
|
|
|
|
|
fetch_window_hours: 24
|
|
|
|
|
max_items: 50
|
|
|
|
|
|
|
|
|
|
- name: "arxiv_cs_cl"
|
|
|
|
|
url: "http://export.arxiv.org/rss/cs.CL"
|
|
|
|
|
type: "rss"
|
|
|
|
|
fetch_window_hours: 24
|
|
|
|
|
max_items: 50
|
|
|
|
|
|
|
|
|
|
- name: "arxiv_cs_lg"
|
|
|
|
|
url: "http://export.arxiv.org/rss/cs.LG"
|
|
|
|
|
type: "rss"
|
|
|
|
|
fetch_window_hours: 24
|
|
|
|
|
max_items: 50
|
|
|
|
|
|
|
|
|
|
- name: "openai_blog"
|
|
|
|
|
url: "https://openai.com/blog/rss.xml"
|
|
|
|
|
type: "rss"
|
|
|
|
|
fetch_window_hours: 48
|
|
|
|
|
max_items: 5
|
|
|
|
|
|
|
|
|
|
- name: "anthropic_blog"
|
|
|
|
|
url: "https://www.anthropic.com/blog/rss.xml"
|
|
|
|
|
type: "rss"
|
|
|
|
|
fetch_window_hours: 48
|
|
|
|
|
max_items: 5
|
|
|
|
|
|
|
|
|
|
- name: "deepmind_blog"
|
|
|
|
|
url: "https://deepmind.google/blog/rss.xml"
|
|
|
|
|
type: "rss"
|
|
|
|
|
fetch_window_hours: 48
|
|
|
|
|
max_items: 5
|
|
|
|
|
|
|
|
|
|
# Phase 2: Relevance
|
|
|
|
|
relevance:
|
|
|
|
|
model: "all-MiniLM-L6-v2" # ~80MB embeddings model
|
|
|
|
|
top_n: 10 # Items selected for briefing
|
|
|
|
|
min_score: 0.25 # Hard cutoff
|
|
|
|
|
keywords:
|
|
|
|
|
- "LLM agent"
|
|
|
|
|
- "agent architecture"
|
|
|
|
|
- "tool use"
|
|
|
|
|
- "function calling"
|
|
|
|
|
- "chain of thought"
|
|
|
|
|
- "reasoning"
|
|
|
|
|
- "reinforcement learning"
|
|
|
|
|
- "RLHF"
|
|
|
|
|
- "GRPO"
|
|
|
|
|
- "PPO"
|
|
|
|
|
- "fine-tuning"
|
|
|
|
|
- "transformer"
|
|
|
|
|
- "attention mechanism"
|
|
|
|
|
- "inference optimization"
|
|
|
|
|
- "quantization"
|
|
|
|
|
- "local LLM"
|
|
|
|
|
- "llama.cpp"
|
|
|
|
|
- "ollama"
|
|
|
|
|
- "vLLM"
|
|
|
|
|
- "Hermes"
|
|
|
|
|
- "open source AI"
|
|
|
|
|
|
|
|
|
|
# Phase 3: Synthesis
|
|
|
|
|
synthesis:
|
|
|
|
|
llm_endpoint: "http://localhost:4000/v1" # Local llama-server
|
2026-04-07 15:55:52 +00:00
|
|
|
llm_model: "gemma4:12b"
|
2026-04-05 06:19:50 +00:00
|
|
|
max_summary_length: 800
|
|
|
|
|
temperature: 0.7
|
|
|
|
|
|
|
|
|
|
# Phase 4: Audio
|
|
|
|
|
tts:
|
|
|
|
|
engine: "piper"
|
|
|
|
|
model_path: "~/.local/share/piper/models"
|
|
|
|
|
voice: "en_US-amy-medium"
|
|
|
|
|
speed: 1.0
|
|
|
|
|
output_format: "mp3" # piper outputs WAV, convert for Telegram
|
|
|
|
|
|
feat: Phase 3.5 — DPO training pair generation from Deep Dive pipeline
Wire arXiv relevance filter output directly into DPO pair generation,
closing the loop between research synthesis and overnight training data.
New module: dpo_generator.py
- DPOPairGenerator class with 3 pair strategies:
* summarize: paper → fleet-grounded analysis (chosen) vs generic (rejected)
* relevance: 'what matters to Hermes?' → scored context vs vague
* implication: 'what should we do?' → actionable insight vs platitude
- Extracts synthesis excerpts matched to each ranked item
- Outputs to ~/.timmy/training-data/dpo-pairs/deepdive_{timestamp}.jsonl
- Format: {prompt, chosen, rejected, task_type, evidence_ids,
source_session, safety_flags, metadata}
Pipeline changes (pipeline.py):
- Import DPOPairGenerator with graceful degradation
- Initialize from config deepdive.training.dpo section
- Execute as Phase 3.5 between synthesis and audio
- DPO results included in pipeline return dict
- Wrapped in try/except — DPO failure never blocks delivery
Config changes (config.yaml):
- New deepdive.training.dpo section with:
enabled, output_dir, min_score, max_pairs_per_run, pair_types
Integration tested: 2 mock items × 3 pair types = 6 valid JSONL pairs.
Chosen responses consistently richer than rejected (assert-verified).
2026-04-13 02:24:04 +00:00
|
|
|
# Phase 3.5: DPO Training Pair Generation
|
|
|
|
|
training:
|
|
|
|
|
dpo:
|
|
|
|
|
enabled: true
|
|
|
|
|
output_dir: "~/.timmy/training-data/dpo-pairs"
|
|
|
|
|
min_score: 0.5 # Only generate pairs from items above this relevance score
|
|
|
|
|
max_pairs_per_run: 30 # Cap pairs per pipeline execution
|
|
|
|
|
pair_types: # Which pair strategies to use
|
|
|
|
|
- "summarize" # Paper summary → fleet-grounded analysis
|
|
|
|
|
- "relevance" # Relevance analysis → scored fleet context
|
|
|
|
|
- "implication" # Implications → actionable insight
|
feat: DPO pair quality validator — gate before overnight training
Add DPOQualityValidator that catches bad training pairs before they
enter the tightening loop. Wired into DPOPairGenerator between
generate() and export() as an automatic quality gate.
New module: dpo_quality.py
- 5 single-pair quality checks:
1. Field length minimums (prompt ≥40, chosen ≥80, rejected ≥30 chars)
2. Chosen/rejected length ratio (chosen must be ≥1.3x longer)
3. Chosen≈rejected similarity (Jaccard ≤0.70 — catches low-contrast)
4. Vocabulary diversity in chosen (unique word ratio ≥0.30)
5. Substance markers in chosen (≥2 fleet/training/action terms)
- 2 cross-pair quality checks:
6. Near-duplicate prompts within batch (Jaccard ≤0.85)
7. Cross-run dedup against recent JSONL history files
- Two modes: 'drop' (filter out bad pairs) or 'flag' (export with warning)
- BatchReport with per-pair diagnostics, pass rates, and warnings
- Standalone CLI: python3 dpo_quality.py <file.jsonl> [--strict] [--json]
Modified: dpo_generator.py
- Imports DPOQualityValidator with graceful degradation
- Initializes from config validation section (enabled by default)
- Validates between generate() and export() in run()
- Quality report included in pipeline result dict
- Validator failure never blocks — falls back to unvalidated export
Modified: config.yaml
- New deepdive.training.dpo.validation section with all tunable knobs:
enabled, flagged_pair_action, similarity thresholds, length minimums,
dedup_history_files
Integration tested — 6 test cases covering:
✓ Good pairs pass (3/3 accepted)
✓ Bad pairs caught: too-short, high-similarity, inverted signal (0/3)
✓ Near-duplicate prompt detection (1/2 deduped)
✓ Flag mode preserves pairs with warnings (3/3 flagged)
✓ Cross-run deduplication against history (1 dupe caught)
✓ Full generator→validator→export pipeline (6/6 validated)
2026-04-13 02:46:50 +00:00
|
|
|
validation:
|
|
|
|
|
enabled: true
|
|
|
|
|
flagged_pair_action: "drop" # "drop" = remove bad pairs, "flag" = export with warning
|
|
|
|
|
min_prompt_chars: 40 # Minimum prompt length
|
|
|
|
|
min_chosen_chars: 80 # Minimum chosen response length
|
|
|
|
|
min_rejected_chars: 30 # Minimum rejected response length
|
|
|
|
|
min_chosen_rejected_ratio: 1.3 # Chosen must be ≥1.3x longer than rejected
|
|
|
|
|
max_chosen_rejected_similarity: 0.70 # Max Jaccard overlap between chosen/rejected
|
|
|
|
|
max_prompt_prompt_similarity: 0.85 # Max Jaccard overlap between prompts (dedup)
|
2026-04-13 03:11:10 +00:00
|
|
|
dedup_full_history: true # Persistent index covers ALL historical JSONL (no sliding window)
|
feat: Phase 3.5 — DPO training pair generation from Deep Dive pipeline
Wire arXiv relevance filter output directly into DPO pair generation,
closing the loop between research synthesis and overnight training data.
New module: dpo_generator.py
- DPOPairGenerator class with 3 pair strategies:
* summarize: paper → fleet-grounded analysis (chosen) vs generic (rejected)
* relevance: 'what matters to Hermes?' → scored context vs vague
* implication: 'what should we do?' → actionable insight vs platitude
- Extracts synthesis excerpts matched to each ranked item
- Outputs to ~/.timmy/training-data/dpo-pairs/deepdive_{timestamp}.jsonl
- Format: {prompt, chosen, rejected, task_type, evidence_ids,
source_session, safety_flags, metadata}
Pipeline changes (pipeline.py):
- Import DPOPairGenerator with graceful degradation
- Initialize from config deepdive.training.dpo section
- Execute as Phase 3.5 between synthesis and audio
- DPO results included in pipeline return dict
- Wrapped in try/except — DPO failure never blocks delivery
Config changes (config.yaml):
- New deepdive.training.dpo section with:
enabled, output_dir, min_score, max_pairs_per_run, pair_types
Integration tested: 2 mock items × 3 pair types = 6 valid JSONL pairs.
Chosen responses consistently richer than rejected (assert-verified).
2026-04-13 02:24:04 +00:00
|
|
|
|
2026-04-05 17:32:24 +00:00
|
|
|
# Phase 0: Fleet Context Grounding
|
|
|
|
|
fleet_context:
|
|
|
|
|
enabled: true
|
2026-04-05 19:54:47 +00:00
|
|
|
gitea_url: "https://forge.alexanderwhitestone.com"
|
2026-04-05 17:32:24 +00:00
|
|
|
token: "${GITEA_TOKEN}" # From environment
|
|
|
|
|
owner: "Timmy_Foundation"
|
|
|
|
|
repos:
|
|
|
|
|
- "timmy-config"
|
|
|
|
|
- "the-nexus"
|
|
|
|
|
- "timmy-home"
|
|
|
|
|
- "hermes-agent"
|
|
|
|
|
|
2026-04-05 06:19:50 +00:00
|
|
|
# Phase 5: Delivery
|
|
|
|
|
delivery:
|
|
|
|
|
method: "telegram"
|
|
|
|
|
bot_token: "${TELEGRAM_BOT_TOKEN}" # From env
|
|
|
|
|
channel_id: "-1003664764329"
|
|
|
|
|
send_text_summary: true
|
|
|
|
|
|
|
|
|
|
output_dir: "~/briefings"
|
|
|
|
|
log_level: "INFO"
|