#!/usr/bin/env python3 """ Deep Dive Phase 3: Synthesis Engine Generates structured intelligence briefing via LLM. Usage: python phase3_synthesize.py [--date YYYY-MM-DD] [--output-dir DIR] Issue: the-nexus#830 """ import argparse import json import os from dataclasses import dataclass from datetime import datetime from pathlib import Path from typing import List, Optional # System prompt engineered for Hermes/Timmy context BRIEFING_SYSTEM_PROMPT = """You are Deep Dive, an intelligence briefing system for the Hermes Agent Framework and Timmy organization. Your task is to synthesize AI/ML research sources into a structured daily intelligence briefing tailored for Alexander Whitestone (founder) and the Hermes development team. CONTEXT ABOUT HERMES/TIMMY: - Hermes is an open-source AI agent framework with tool use, multi-agent orchestration, and MCP (Model Context Protocol) support - Timmy is the fleet coordinator managing multiple AI coding agents - Current priorities: agent reliability, context compression, distributed execution, sovereign infrastructure - Technology stack: Python, asyncio, SQLite, FastAPI, llama.cpp, vLLM BRIEFING STRUCTURE: 1. HEADLINES (3-5 bullets): Major developments with impact assessment 2. DEEP DIVES (2-3 items): Detailed analysis of most relevant papers/posts 3. IMPLICATIONS FOR HERMES: How this research affects our roadmap 4. ACTION ITEMS: Specific follow-ups for the team 5. SOURCES: Cited with URLs TONE: - Professional intelligence briefing - Concise but substantive - Technical depth appropriate for AI engineers - Forward-looking implications RULES: - Prioritize sources by relevance to agent systems and LLM architecture - Include specific techniques/methods when applicable - Connect findings to Hermes' current challenges - Always cite sources """ @dataclass class Source: """Ranked source item.""" title: str url: str source: str summary: str score: float class SynthesisEngine: """Generate intelligence briefings via LLM.""" def __init__(self, output_dir: Path, date: str, model: str = "openai/gpt-4o-mini"): self.output_dir = output_dir self.date = date self.model = model self.ranked_dir = output_dir / "ranked" self.briefings_dir = output_dir / "briefings" self.briefings_dir.mkdir(parents=True, exist_ok=True) def load_ranked_sources(self) -> List[Source]: """Load ranked sources from Phase 2.""" ranked_file = self.ranked_dir / f"{self.date}.json" if not ranked_file.exists(): raise FileNotFoundError(f"Phase 2 output not found: {ranked_file}") with open(ranked_file) as f: data = json.load(f) return [ Source( title=item.get('title', ''), url=item.get('url', ''), source=item.get('source', ''), summary=item.get('summary', ''), score=item.get('total_score', 0) ) for item in data.get('items', []) ] def format_sources_for_llm(self, sources: List[Source]) -> str: """Format sources for LLM consumption.""" lines = [] for i, src in enumerate(sources[:15], 1): # Top 15 sources lines.append(f"\n--- Source {i} [{src.source}] (score: {src.score}) ---") lines.append(f"Title: {src.title}") lines.append(f"URL: {src.url}") lines.append(f"Summary: {src.summary[:800]}") return "\n".join(lines) def generate_briefing_openai(self, sources_text: str) -> str: """Generate briefing using OpenAI API.""" try: from openai import OpenAI client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY')) response = client.chat.completions.create( model="gpt-4o-mini", messages=[ {"role": "system", "content": BRIEFING_SYSTEM_PROMPT}, {"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}) based on these sources:\n\n{sources_text}"} ], temperature=0.7, max_tokens=4000 ) return response.choices[0].message.content except Exception as e: print(f"[ERROR] OpenAI generation failed: {e}") return self._fallback_briefing(sources_text) def generate_briefing_anthropic(self, sources_text: str) -> str: """Generate briefing using Anthropic API.""" try: import anthropic client = anthropic.Anthropic(api_key=os.environ.get('ANTHROPIC_API_KEY')) response = client.messages.create( model="claude-3-haiku-20240307", max_tokens=4000, system=BRIEFING_SYSTEM_PROMPT, messages=[ {"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}) based on these sources:\n\n{sources_text}"} ] ) return response.content[0].text except Exception as e: print(f"[ERROR] Anthropic generation failed: {e}") return self._fallback_briefing(sources_text) def generate_briefing_hermes(self, sources_text: str) -> str: """Generate briefing using local Hermes endpoint.""" try: import requests response = requests.post( "http://localhost:8645/v1/chat/completions", json={ "model": "hermes", "messages": [ {"role": "system", "content": BRIEFING_SYSTEM_PROMPT}, {"role": "user", "content": f"Generate today's Deep Dive briefing ({self.date}):\n\n{sources_text[:6000]}"} ], "temperature": 0.7, "max_tokens": 4000 }, timeout=120 ) return response.json()['choices'][0]['message']['content'] except Exception as e: print(f"[ERROR] Hermes generation failed: {e}") return self._fallback_briefing(sources_text) def _fallback_briefing(self, sources_text: str) -> str: """Generate fallback briefing when LLM fails.""" lines = [ f"# Deep Dive: AI Intelligence Briefing — {self.date}", "", "*Note: LLM synthesis unavailable. This is a structured source digest.*", "", "## Sources Today", "" ] # Simple extraction from sources for line in sources_text.split('\n')[:50]: if line.startswith('Title:') or line.startswith('URL:'): lines.append(line) lines.extend([ "", "## Note", "LLM synthesis failed. Review source URLs directly for content.", "", "---", "Deep Dive (Fallback Mode) | Hermes Agent Framework" ]) return "\n".join(lines) def generate_briefing(self, sources: List[Source]) -> str: """Generate briefing using selected model.""" sources_text = self.format_sources_for_llm(sources) print(f"[Phase 3] Generating briefing using {self.model}...") if 'openai' in self.model.lower(): return self.generate_briefing_openai(sources_text) elif 'anthropic' in self.model or 'claude' in self.model.lower(): return self.generate_briefing_anthropic(sources_text) elif 'hermes' in self.model.lower(): return self.generate_briefing_hermes(sources_text) else: # Try OpenAI first, fallback to Hermes if os.environ.get('OPENAI_API_KEY'): return self.generate_briefing_openai(sources_text) elif os.environ.get('ANTHROPIC_API_KEY'): return self.generate_briefing_anthropic(sources_text) else: return self.generate_briefing_hermes(sources_text) def save_briefing(self, content: str): """Save briefing to markdown file.""" output_file = self.briefings_dir / f"{self.date}.md" # Add metadata header header = f"""--- date: {self.date} generated_at: {datetime.now().isoformat()} model: {self.model} version: 1.0 --- """ full_content = header + content with open(output_file, 'w') as f: f.write(full_content) print(f"[Phase 3] Saved briefing to {output_file}") return output_file def run(self) -> Path: """Run full synthesis pipeline.""" print(f"[Phase 3] Synthesizing briefing for {self.date}") sources = self.load_ranked_sources() print(f"[Phase 3] Loaded {len(sources)} ranked sources") briefing = self.generate_briefing(sources) output_file = self.save_briefing(briefing) print(f"[Phase 3] Briefing generated: {len(briefing)} characters") return output_file def main(): parser = argparse.ArgumentParser(description='Deep Dive Phase 3: Synthesis Engine') parser.add_argument('--date', default=datetime.now().strftime('%Y-%m-%d'), help='Target date (YYYY-MM-DD)') parser.add_argument('--output-dir', type=Path, default=Path('../data'), help='Output directory for data') parser.add_argument('--model', default='openai/gpt-4o-mini', help='LLM model for synthesis') args = parser.parse_args() engine = SynthesisEngine(args.output_dir, args.date, args.model) engine.run() if __name__ == '__main__': main()