the-nexus/bin/deepdive_synthesis.py

#!/usr/bin/env python3
"""deepdive_synthesis.py — Phase 3: LLM-powered intelligence briefing synthesis. Issue #830."""

import argparse
import json
import os
from datetime import datetime
from pathlib import Path
from typing import List, Dict


BRIEFING_PROMPT = """You are Deep Dive, an AI intelligence analyst for the Timmy Foundation fleet.

Your task: Synthesize the following research papers into a tight, actionable intelligence briefing for Alexander Whitestone, founder of Timmy.

CONTEXT:
- Timmy Foundation builds autonomous AI agents using the Hermes framework
- Focus areas: LLM architecture, tool use, RL training, agent systems
- Alexander prefers: Plain speech, evidence over vibes, concrete implications

SOURCES:
{sources}

OUTPUT FORMAT:
# Deep Dive Intelligence Brief — {date}

## Headlines (3 items)
For each top paper:
- **Title**: Paper name
- **Why It Matters**: One sentence on relevance to Hermes/Timmy
- **Key Insight**: The actionable takeaway

## Deep Dive (1 item)
Expand on the most relevant paper:
- Problem it solves
- Method/approach
- Implications for our agent work
- Suggested follow-up (if any)

## Bottom Line
3 bullets on what to know/do this week

Write in tight, professional intelligence style. No fluff."""


class SynthesisEngine:
    def __init__(self, provider: str = None):
        self.provider = provider or os.environ.get("DEEPDIVE_LLM_PROVIDER", "openai")
        self.api_key = os.environ.get("OPENAI_API_KEY") or os.environ.get("ANTHROPIC_API_KEY")

    def synthesize(self, items: List[Dict], date: str) -> str:
        """Generate briefing from ranked items."""
        sources_text = self._format_sources(items)
        prompt = BRIEFING_PROMPT.format(sources=sources_text, date=date)

        if self.provider == "openai":
            return self._call_openai(prompt)
        elif self.provider == "anthropic":
            return self._call_anthropic(prompt)
        else:
            return self._fallback_synthesis(items, date)

    def _format_sources(self, items: List[Dict]) -> str:
        lines = []
        for i, item in enumerate(items[:10], 1):
            lines.append(f"\n{i}. {item.get('title', 'Untitled')}")
            lines.append(f"   URL: {item.get('url', 'N/A')}")
            lines.append(f"   Abstract: {item.get('content', 'No abstract')[:500]}...")
            lines.append(f"   Relevance Score: {item.get('score', 0)}")
        return "\n".join(lines)

    def _call_openai(self, prompt: str) -> str:
        """Call OpenAI API for synthesis."""
        try:
            import openai
            client = openai.OpenAI(api_key=self.api_key)

            response = client.chat.completions.create(
                model="gpt-4o-mini",  # Cost-effective for daily briefings
                messages=[
                    {"role": "system", "content": "You are an expert AI research analyst. Be concise and actionable."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.3,
                max_tokens=2000
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"[WARN] OpenAI synthesis failed: {e}")
            return self._fallback_synthesis_from_prompt(prompt)

    def _call_anthropic(self, prompt: str) -> str:
        """Call Anthropic API for synthesis."""
        try:
            import anthropic
            client = anthropic.Anthropic(api_key=self.api_key)

            response = client.messages.create(
                model="claude-3-haiku-20240307",  # Cost-effective
                max_tokens=2000,
                temperature=0.3,
                system="You are an expert AI research analyst. Be concise and actionable.",
                messages=[{"role": "user", "content": prompt}]
            )
            return response.content[0].text
        except Exception as e:
            print(f"[WARN] Anthropic synthesis failed: {e}")
            return self._fallback_synthesis_from_prompt(prompt)

    def _fallback_synthesis(self, items: List[Dict], date: str) -> str:
        """Generate basic briefing without LLM."""
        lines = [
            f"# Deep Dive Intelligence Brief — {date}",
            "",
            "## Headlines",
            ""
        ]
        for i, item in enumerate(items[:3], 1):
            lines.append(f"{i}. [{item.get('title', 'Untitled')}]({item.get('url', '')})")
            lines.append(f"   Relevance Score: {item.get('score', 0)}")
            lines.append("")

        lines.extend([
            "## Bottom Line",
            "",
            f"- Reviewed {len(items)} papers from arXiv",
            "- Run with LLM API key for full synthesis"
        ])

        return "\n".join(lines)

    def _fallback_synthesis_from_prompt(self, prompt: str) -> str:
        """Extract items from prompt and do basic synthesis."""
        # Simple extraction for fallback
        return "# Deep Dive\n\n[LLM synthesis unavailable - check API key]\n\n" + prompt[:1000]


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--input", required=True, help="Path to ranked.json")
    parser.add_argument("--output", required=True, help="Path to write briefing.md")
    parser.add_argument("--date", default=None)
    parser.add_argument("--provider", default=None)
    args = parser.parse_args()

    date = args.date or datetime.now().strftime("%Y-%m-%d")

    # Load ranked items
    ranked_data = json.loads(Path(args.input).read_text())
    items = ranked_data.get("items", [])

    if not items:
        print("[ERROR] No items to synthesize")
        return 1

    print(f"[INFO] Synthesizing {len(items)} items...")

    # Generate briefing
    engine = SynthesisEngine(provider=args.provider)
    briefing = engine.synthesize(items, date)

    # Write output
    Path(args.output).write_text(briefing)
    print(f"[INFO] Briefing written to {args.output}")

    return 0


if __name__ == "__main__":
    exit(main())