diff --git a/scaffold/deep-dive/synthesis/synthesis_engine.py b/scaffold/deep-dive/synthesis/synthesis_engine.py new file mode 100644 index 0000000..b85cc1b --- /dev/null +++ b/scaffold/deep-dive/synthesis/synthesis_engine.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python3 +""" +Synthesis Engine for Deep Dive +Generates intelligence briefings from filtered content +""" + +import json +from datetime import datetime +from typing import List, Any +from dataclasses import dataclass + +# Load prompt template +with open("synthesis_prompt.txt") as f: + SYSTEM_PROMPT = f.read() + +@dataclass +class Briefing: + date: str + headlines: List[dict] + deep_dives: List[dict] + implications: str + reading_list: List[dict] + raw_text: str + +def generate_briefing( + papers: List[Any], + blogs: List[Any], + model_client=None, # Hermes AIAgent or similar + date: str = None +) -> Briefing: + """Generate a briefing from ranked papers and blog posts.""" + + date = date or datetime.now().strftime("%Y-%m-%d") + + # Build input for LLM + input_data = { + "date": date, + "papers": [ + { + "title": p.title, + "authors": p.authors, + "abstract": p.abstract[:500] + "..." if len(p.abstract) > 500 else p.abstract, + "url": p.url, + "arxiv_id": p.arxiv_id, + "relevance_score": score + } + for p, score in papers[:10] # Top 10 papers + ], + "blogs": [ + { + "title": b.title, + "source": b.source, + "url": b.url, + "summary": b.summary[:300] if b.summary else "" + } + for b in blogs[:5] # Top 5 blog posts + ] + } + + # Call LLM for synthesis (placeholder - integrate with Hermes routing) + if model_client: + response = model_client.chat( + system_message=SYSTEM_PROMPT, + message=f"Generate briefing from this data:\n```json\n{json.dumps(input_data, indent=2)}\n```" + ) + raw_text = response + else: + # Mock output for testing + raw_text = f"# Deep Dive Briefing — {date}\n\n(Mock output - integrate LLM)" + + # Parse structured data from raw_text + # (In production, use structured output or regex parsing) + + return Briefing( + date=date, + headlines=[], + deep_dives=[], + implications="", + reading_list=[], + raw_text=raw_text + ) + +if __name__ == "__main__": + print("Synthesis engine loaded") + print(f"Prompt length: {len(SYSTEM_PROMPT)} chars")