Complete production-ready scaffold for automated daily AI intelligence briefings: - Phase 1: Source aggregation (arXiv + lab blogs) - Phase 2: Relevance ranking (keyword + source authority scoring) - Phase 3: LLM synthesis (Hermes-context briefing generation) - Phase 4: TTS audio (edge-tts/OpenAI/ElevenLabs) - Phase 5: Telegram delivery (voice message) Deliverables: - docs/ARCHITECTURE.md (9000+ lines) - system design - docs/OPERATIONS.md - runbook and troubleshooting - 5 executable phase scripts (bin/) - Full pipeline orchestrator (run_full_pipeline.py) - requirements.txt, README.md Addresses all 9 acceptance criteria from #830. Ready for host selection, credential config, and cron activation. Author: Ezra | Burn mode | 2026-04-05
265 lines
9.6 KiB
Python
265 lines
9.6 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Deep Dive Phase 3: Synthesis Engine
|
|
Generates structured intelligence briefing via LLM.
|
|
|
|
Usage:
|
|
python phase3_synthesize.py [--date YYYY-MM-DD] [--output-dir DIR]
|
|
|
|
Issue: the-nexus#830
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import os
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import List, Optional
|
|
|
|
|
|
# System prompt engineered for Hermes/Timmy context
|
|
BRIEFING_SYSTEM_PROMPT = """You are Deep Dive, an intelligence briefing system for the Hermes Agent Framework and Timmy organization.
|
|
|
|
Your task is to synthesize AI/ML research sources into a structured daily intelligence briefing tailored for Alexander Whitestone (founder) and the Hermes development team.
|
|
|
|
CONTEXT ABOUT HERMES/TIMMY:
|
|
- Hermes is an open-source AI agent framework with tool use, multi-agent orchestration, and MCP (Model Context Protocol) support
|
|
- Timmy is the fleet coordinator managing multiple AI coding agents
|
|
- Current priorities: agent reliability, context compression, distributed execution, sovereign infrastructure
|
|
- Technology stack: Python, asyncio, SQLite, FastAPI, llama.cpp, vLLM
|
|
|
|
BRIEFING STRUCTURE:
|
|
1. HEADLINES (3-5 bullets): Major developments with impact assessment
|
|
2. DEEP DIVES (2-3 items): Detailed analysis of most relevant papers/posts
|
|
3. IMPLICATIONS FOR HERMES: How this research affects our roadmap
|
|
4. ACTION ITEMS: Specific follow-ups for the team
|
|
5. SOURCES: Cited with URLs
|
|
|
|
TONE:
|
|
- Professional intelligence briefing
|
|
- Concise but substantive
|
|
- Technical depth appropriate for AI engineers
|
|
- Forward-looking implications
|
|
|
|
RULES:
|
|
- Prioritize sources by relevance to agent systems and LLM architecture
|
|
- Include specific techniques/methods when applicable
|
|
- Connect findings to Hermes' current challenges
|
|
- Always cite sources
|
|
"""
|
|
|
|
|
|
@dataclass
|
|
class Source:
|
|
"""Ranked source item."""
|
|
title: str
|
|
url: str
|
|
source: str
|
|
summary: str
|
|
score: float
|
|
|
|
|
|
class SynthesisEngine:
|
|
"""Generate intelligence briefings via LLM."""
|
|
|
|
def __init__(self, output_dir: Path, date: str, model: str = "openai/gpt-4o-mini"):
|
|
self.output_dir = output_dir
|
|
self.date = date
|
|
self.model = model
|
|
self.ranked_dir = output_dir / "ranked"
|
|
self.briefings_dir = output_dir / "briefings"
|
|
self.briefings_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def load_ranked_sources(self) -> List[Source]:
|
|
"""Load ranked sources from Phase 2."""
|
|
ranked_file = self.ranked_dir / f"{self.date}.json"
|
|
if not ranked_file.exists():
|
|
raise FileNotFoundError(f"Phase 2 output not found: {ranked_file}")
|
|
|
|
with open(ranked_file) as f:
|
|
data = json.load(f)
|
|
|
|
return [
|
|
Source(
|
|
title=item.get('title', ''),
|
|
url=item.get('url', ''),
|
|
source=item.get('source', ''),
|
|
summary=item.get('summary', ''),
|
|
score=item.get('total_score', 0)
|
|
)
|
|
for item in data.get('items', [])
|
|
]
|
|
|
|
def format_sources_for_llm(self, sources: List[Source]) -> str:
|
|
"""Format sources for LLM consumption."""
|
|
lines = []
|
|
for i, src in enumerate(sources[:15], 1): # Top 15 sources
|
|
lines.append(f"\n--- Source {i} [{src.source}] (score: {src.score}) ---")
|
|
lines.append(f"Title: {src.title}")
|
|
lines.append(f"URL: {src.url}")
|
|
lines.append(f"Summary: {src.summary[:800]}")
|
|
return "\n".join(lines)
|
|
|
|
def generate_briefing_openai(self, sources_text: str) -> str:
|
|
"""Generate briefing using OpenAI API."""
|
|
try:
|
|
from openai import OpenAI
|
|
client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
|
|
|
|
response = client.chat.completions.create(
|
|
model="gpt-4o-mini",
|
|
messages=[
|
|
{"role": "system", "content": BRIEFING_SYSTEM_PROMPT},
|
|
{"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}) based on these sources:\n\n{sources_text}"}
|
|
],
|
|
temperature=0.7,
|
|
max_tokens=4000
|
|
)
|
|
return response.choices[0].message.content
|
|
except Exception as e:
|
|
print(f"[ERROR] OpenAI generation failed: {e}")
|
|
return self._fallback_briefing(sources_text)
|
|
|
|
def generate_briefing_anthropic(self, sources_text: str) -> str:
|
|
"""Generate briefing using Anthropic API."""
|
|
try:
|
|
import anthropic
|
|
client = anthropic.Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY'))
|
|
|
|
response = client.messages.create(
|
|
model="claude-3-haiku-20240307",
|
|
max_tokens=4000,
|
|
system=BRIEFING_SYSTEM_PROMPT,
|
|
messages=[
|
|
{"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}) based on these sources:\n\n{sources_text}"}
|
|
]
|
|
)
|
|
return response.content[0].text
|
|
except Exception as e:
|
|
print(f"[ERROR] Anthropic generation failed: {e}")
|
|
return self._fallback_briefing(sources_text)
|
|
|
|
def generate_briefing_hermes(self, sources_text: str) -> str:
|
|
"""Generate briefing using local Hermes endpoint."""
|
|
try:
|
|
import requests
|
|
|
|
response = requests.post(
|
|
"http://localhost:8645/v1/chat/completions",
|
|
json={
|
|
"model": "hermes",
|
|
"messages": [
|
|
{"role": "system", "content": BRIEFING_SYSTEM_PROMPT},
|
|
{"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}):\n\n{sources_text[:6000]}"}
|
|
],
|
|
"temperature": 0.7,
|
|
"max_tokens": 4000
|
|
},
|
|
timeout=120
|
|
)
|
|
return response.json()['choices'][0]['message']['content']
|
|
except Exception as e:
|
|
print(f"[ERROR] Hermes generation failed: {e}")
|
|
return self._fallback_briefing(sources_text)
|
|
|
|
def _fallback_briefing(self, sources_text: str) -> str:
|
|
"""Generate fallback briefing when LLM fails."""
|
|
lines = [
|
|
f"# Deep Dive: AI Intelligence Briefing — {self.date}",
|
|
"",
|
|
"*Note: LLM synthesis unavailable. This is a structured source digest.*",
|
|
"",
|
|
"## Sources Today",
|
|
""
|
|
]
|
|
# Simple extraction from sources
|
|
for line in sources_text.split('\n')[:50]:
|
|
if line.startswith('Title:') or line.startswith('URL:'):
|
|
lines.append(line)
|
|
|
|
lines.extend([
|
|
"",
|
|
"## Note",
|
|
"LLM synthesis failed. Review source URLs directly for content.",
|
|
"",
|
|
"---",
|
|
"Deep Dive (Fallback Mode) | Hermes Agent Framework"
|
|
])
|
|
|
|
return "\n".join(lines)
|
|
|
|
def generate_briefing(self, sources: List[Source]) -> str:
|
|
"""Generate briefing using selected model."""
|
|
sources_text = self.format_sources_for_llm(sources)
|
|
|
|
print(f"[Phase 3] Generating briefing using {self.model}...")
|
|
|
|
if 'openai' in self.model.lower():
|
|
return self.generate_briefing_openai(sources_text)
|
|
elif 'anthropic' in self.model or 'claude' in self.model.lower():
|
|
return self.generate_briefing_anthropic(sources_text)
|
|
elif 'hermes' in self.model.lower():
|
|
return self.generate_briefing_hermes(sources_text)
|
|
else:
|
|
# Try OpenAI first, fallback to Hermes
|
|
if os.environ.get('OPENAI_API_KEY'):
|
|
return self.generate_briefing_openai(sources_text)
|
|
elif os.environ.get('ANTHROPIC_API_KEY'):
|
|
return self.generate_briefing_anthropic(sources_text)
|
|
else:
|
|
return self.generate_briefing_hermes(sources_text)
|
|
|
|
def save_briefing(self, content: str):
|
|
"""Save briefing to markdown file."""
|
|
output_file = self.briefings_dir / f"{self.date}.md"
|
|
|
|
# Add metadata header
|
|
header = f"""---
|
|
date: {self.date}
|
|
generated_at: {datetime.now().isoformat()}
|
|
model: {self.model}
|
|
version: 1.0
|
|
---
|
|
|
|
"""
|
|
|
|
full_content = header + content
|
|
|
|
with open(output_file, 'w') as f:
|
|
f.write(full_content)
|
|
|
|
print(f"[Phase 3] Saved briefing to {output_file}")
|
|
return output_file
|
|
|
|
def run(self) -> Path:
|
|
"""Run full synthesis pipeline."""
|
|
print(f"[Phase 3] Synthesizing briefing for {self.date}")
|
|
|
|
sources = self.load_ranked_sources()
|
|
print(f"[Phase 3] Loaded {len(sources)} ranked sources")
|
|
|
|
briefing = self.generate_briefing(sources)
|
|
output_file = self.save_briefing(briefing)
|
|
|
|
print(f"[Phase 3] Briefing generated: {len(briefing)} characters")
|
|
return output_file
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description='Deep Dive Phase 3: Synthesis Engine')
|
|
parser.add_argument('--date', default=datetime.now().strftime('%Y-%m-%d'),
|
|
help='Target date (YYYY-MM-DD)')
|
|
parser.add_argument('--output-dir', type=Path, default=Path('../data'),
|
|
help='Output directory for data')
|
|
parser.add_argument('--model', default='openai/gpt-4o-mini',
|
|
help='LLM model for synthesis')
|
|
args = parser.parse_args()
|
|
|
|
engine = SynthesisEngine(args.output_dir, args.date, args.model)
|
|
engine.run()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|