Files
ezra-environment/the-nexus/deepdive/bin/phase3_synthesize.py
Ezra 9f010ad044 [BURN] Deep Dive scaffold: 5-phase sovereign NotebookLM (#830)
Complete production-ready scaffold for automated daily AI intelligence briefings:

- Phase 1: Source aggregation (arXiv + lab blogs)
- Phase 2: Relevance ranking (keyword + source authority scoring)
- Phase 3: LLM synthesis (Hermes-context briefing generation)
- Phase 4: TTS audio (edge-tts/OpenAI/ElevenLabs)
- Phase 5: Telegram delivery (voice message)

Deliverables:
- docs/ARCHITECTURE.md (9000+ lines) - system design
- docs/OPERATIONS.md - runbook and troubleshooting
- 5 executable phase scripts (bin/)
- Full pipeline orchestrator (run_full_pipeline.py)
- requirements.txt, README.md

Addresses all 9 acceptance criteria from #830.
Ready for host selection, credential config, and cron activation.

Author: Ezra | Burn mode | 2026-04-05
2026-04-05 05:48:12 +00:00

265 lines
9.6 KiB
Python

#!/usr/bin/env python3
"""
Deep Dive Phase 3: Synthesis Engine
Generates structured intelligence briefing via LLM.
Usage:
python phase3_synthesize.py [--date YYYY-MM-DD] [--output-dir DIR]
Issue: the-nexus#830
"""
import argparse
import json
import os
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
from typing import List, Optional
# System prompt engineered for Hermes/Timmy context
BRIEFING_SYSTEM_PROMPT = """You are Deep Dive, an intelligence briefing system for the Hermes Agent Framework and Timmy organization.
Your task is to synthesize AI/ML research sources into a structured daily intelligence briefing tailored for Alexander Whitestone (founder) and the Hermes development team.
CONTEXT ABOUT HERMES/TIMMY:
- Hermes is an open-source AI agent framework with tool use, multi-agent orchestration, and MCP (Model Context Protocol) support
- Timmy is the fleet coordinator managing multiple AI coding agents
- Current priorities: agent reliability, context compression, distributed execution, sovereign infrastructure
- Technology stack: Python, asyncio, SQLite, FastAPI, llama.cpp, vLLM
BRIEFING STRUCTURE:
1. HEADLINES (3-5 bullets): Major developments with impact assessment
2. DEEP DIVES (2-3 items): Detailed analysis of most relevant papers/posts
3. IMPLICATIONS FOR HERMES: How this research affects our roadmap
4. ACTION ITEMS: Specific follow-ups for the team
5. SOURCES: Cited with URLs
TONE:
- Professional intelligence briefing
- Concise but substantive
- Technical depth appropriate for AI engineers
- Forward-looking implications
RULES:
- Prioritize sources by relevance to agent systems and LLM architecture
- Include specific techniques/methods when applicable
- Connect findings to Hermes' current challenges
- Always cite sources
"""
@dataclass
class Source:
"""Ranked source item."""
title: str
url: str
source: str
summary: str
score: float
class SynthesisEngine:
"""Generate intelligence briefings via LLM."""
def __init__(self, output_dir: Path, date: str, model: str = "openai/gpt-4o-mini"):
self.output_dir = output_dir
self.date = date
self.model = model
self.ranked_dir = output_dir / "ranked"
self.briefings_dir = output_dir / "briefings"
self.briefings_dir.mkdir(parents=True, exist_ok=True)
def load_ranked_sources(self) -> List[Source]:
"""Load ranked sources from Phase 2."""
ranked_file = self.ranked_dir / f"{self.date}.json"
if not ranked_file.exists():
raise FileNotFoundError(f"Phase 2 output not found: {ranked_file}")
with open(ranked_file) as f:
data = json.load(f)
return [
Source(
title=item.get('title', ''),
url=item.get('url', ''),
source=item.get('source', ''),
summary=item.get('summary', ''),
score=item.get('total_score', 0)
)
for item in data.get('items', [])
]
def format_sources_for_llm(self, sources: List[Source]) -> str:
"""Format sources for LLM consumption."""
lines = []
for i, src in enumerate(sources[:15], 1): # Top 15 sources
lines.append(f"\n--- Source {i} [{src.source}] (score: {src.score}) ---")
lines.append(f"Title: {src.title}")
lines.append(f"URL: {src.url}")
lines.append(f"Summary: {src.summary[:800]}")
return "\n".join(lines)
def generate_briefing_openai(self, sources_text: str) -> str:
"""Generate briefing using OpenAI API."""
try:
from openai import OpenAI
client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": BRIEFING_SYSTEM_PROMPT},
{"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}) based on these sources:\n\n{sources_text}"}
],
temperature=0.7,
max_tokens=4000
)
return response.choices[0].message.content
except Exception as e:
print(f"[ERROR] OpenAI generation failed: {e}")
return self._fallback_briefing(sources_text)
def generate_briefing_anthropic(self, sources_text: str) -> str:
"""Generate briefing using Anthropic API."""
try:
import anthropic
client = anthropic.Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
response = client.messages.create(
model="claude-3-haiku-20240307",
max_tokens=4000,
system=BRIEFING_SYSTEM_PROMPT,
messages=[
{"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}) based on these sources:\n\n{sources_text}"}
]
)
return response.content[0].text
except Exception as e:
print(f"[ERROR] Anthropic generation failed: {e}")
return self._fallback_briefing(sources_text)
def generate_briefing_hermes(self, sources_text: str) -> str:
"""Generate briefing using local Hermes endpoint."""
try:
import requests
response = requests.post(
"http://localhost:8645/v1/chat/completions",
json={
"model": "hermes",
"messages": [
{"role": "system", "content": BRIEFING_SYSTEM_PROMPT},
{"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}):\n\n{sources_text[:6000]}"}
],
"temperature": 0.7,
"max_tokens": 4000
},
timeout=120
)
return response.json()['choices'][0]['message']['content']
except Exception as e:
print(f"[ERROR] Hermes generation failed: {e}")
return self._fallback_briefing(sources_text)
def _fallback_briefing(self, sources_text: str) -> str:
"""Generate fallback briefing when LLM fails."""
lines = [
f"# Deep Dive: AI Intelligence Briefing — {self.date}",
"",
"*Note: LLM synthesis unavailable. This is a structured source digest.*",
"",
"## Sources Today",
""
]
# Simple extraction from sources
for line in sources_text.split('\n')[:50]:
if line.startswith('Title:') or line.startswith('URL:'):
lines.append(line)
lines.extend([
"",
"## Note",
"LLM synthesis failed. Review source URLs directly for content.",
"",
"---",
"Deep Dive (Fallback Mode) | Hermes Agent Framework"
])
return "\n".join(lines)
def generate_briefing(self, sources: List[Source]) -> str:
"""Generate briefing using selected model."""
sources_text = self.format_sources_for_llm(sources)
print(f"[Phase 3] Generating briefing using {self.model}...")
if 'openai' in self.model.lower():
return self.generate_briefing_openai(sources_text)
elif 'anthropic' in self.model or 'claude' in self.model.lower():
return self.generate_briefing_anthropic(sources_text)
elif 'hermes' in self.model.lower():
return self.generate_briefing_hermes(sources_text)
else:
# Try OpenAI first, fallback to Hermes
if os.environ.get('OPENAI_API_KEY'):
return self.generate_briefing_openai(sources_text)
elif os.environ.get('ANTHROPIC_API_KEY'):
return self.generate_briefing_anthropic(sources_text)
else:
return self.generate_briefing_hermes(sources_text)
def save_briefing(self, content: str):
"""Save briefing to markdown file."""
output_file = self.briefings_dir / f"{self.date}.md"
# Add metadata header
header = f"""---
date: {self.date}
generated_at: {datetime.now().isoformat()}
model: {self.model}
version: 1.0
---
"""
full_content = header + content
with open(output_file, 'w') as f:
f.write(full_content)
print(f"[Phase 3] Saved briefing to {output_file}")
return output_file
def run(self) -> Path:
"""Run full synthesis pipeline."""
print(f"[Phase 3] Synthesizing briefing for {self.date}")
sources = self.load_ranked_sources()
print(f"[Phase 3] Loaded {len(sources)} ranked sources")
briefing = self.generate_briefing(sources)
output_file = self.save_briefing(briefing)
print(f"[Phase 3] Briefing generated: {len(briefing)} characters")
return output_file
def main():
parser = argparse.ArgumentParser(description='Deep Dive Phase 3: Synthesis Engine')
parser.add_argument('--date', default=datetime.now().strftime('%Y-%m-%d'),
help='Target date (YYYY-MM-DD)')
parser.add_argument('--output-dir', type=Path, default=Path('../data'),
help='Output directory for data')
parser.add_argument('--model', default='openai/gpt-4o-mini',
help='LLM model for synthesis')
args = parser.parse_args()
engine = SynthesisEngine(args.output_dir, args.date, args.model)
engine.run()
if __name__ == '__main__':
main()