ezra-environment/the-nexus/deepdive/bin/run_full_pipeline.py

#!/usr/bin/env python3
"""
Deep Dive: Full Pipeline Orchestrator
Runs all 5 phases: Aggregate → Rank → Synthesize → Audio → Deliver

Usage:
    ./run_full_pipeline.py [--date YYYY-MM-DD] [--phases PHASES] [--dry-run]

Issue: the-nexus#830
"""

import argparse
import asyncio
import sys
from datetime import datetime
from pathlib import Path

# Import phase modules
sys.path.insert(0, str(Path(__file__).parent))

import phase1_aggregate
import phase2_rank
import phase3_synthesize
import phase4_generate_audio
import phase5_deliver


class PipelineOrchestrator:
    """Orchestrate the full Deep Dive pipeline."""

    PHASES = {
        1: ('aggregate', phase1_aggregate),
        2: ('rank', phase2_rank),
        3: ('synthesize', phase3_synthesize),
        4: ('audio', phase4_generate_audio),
        5: ('deliver', phase5_deliver),
    }

    def __init__(self, date: str, output_dir: Path, phases: list, dry_run: bool = False):
        self.date = date
        self.output_dir = output_dir
        self.phases = phases
        self.dry_run = dry_run

    def run_phase1(self):
        """Run aggregation phase."""
        print("=" * 60)
        print("PHASE 1: SOURCE AGGREGATION")
        print("=" * 60)

        aggregator = phase1_aggregate.SourceAggregator(self.output_dir, self.date)
        return asyncio.run(aggregator.run())

    def run_phase2(self):
        """Run ranking phase."""
        print("\n" + "=" * 60)
        print("PHASE 2: RELEVANCE RANKING")
        print("=" * 60)

        engine = phase2_rank.RelevanceEngine(self.output_dir, self.date)
        return engine.run(top_n=20)

    def run_phase3(self):
        """Run synthesis phase."""
        print("\n" + "=" * 60)
        print("PHASE 3: SYNTHESIS")
        print("=" * 60)

        engine = phase3_synthesize.SynthesisEngine(self.output_dir, self.date)
        return engine.run()

    def run_phase4(self):
        """Run audio generation phase."""
        print("\n" + "=" * 60)
        print("PHASE 4: AUDIO GENERATION")
        print("=" * 60)

        generator = phase4_generate_audio.AudioGenerator(self.output_dir, self.date)
        return generator.run()

    def run_phase5(self):
        """Run delivery phase."""
        print("\n" + "=" * 60)
        print("PHASE 5: DELIVERY")
        print("=" * 60)

        pipeline = phase5_deliver.DeliveryPipeline(self.output_dir, self.date)
        return asyncio.run(pipeline.run())

    def run(self):
        """Run selected phases."""
        print("🎙️  DEEP DIVE — Daily AI Intelligence Briefing")
        print(f"Date: {self.date}")
        print(f"Phases: {', '.join(str(p) for p in self.phases)}")
        print(f"Output: {self.output_dir}")
        if self.dry_run:
            print("[DRY RUN] No actual API calls or deliveries")
        print()

        results = {}

        try:
            for phase in self.phases:
                if self.dry_run:
                    print(f"[DRY RUN] Would run phase {phase}")
                    continue

                if phase == 1:
                    results[1] = "aggregated" if self.run_phase1() else "failed"
                elif phase == 2:
                    results[2] = "ranked" if self.run_phase2() else "failed"
                elif phase == 3:
                    results[3] = str(self.run_phase3()) if self.run_phase3() else "failed"
                elif phase == 4:
                    results[4] = str(self.run_phase4()) if self.run_phase4() else "failed"
                elif phase == 5:
                    results[5] = "delivered" if self.run_phase5() else "failed"

            print("\n" + "=" * 60)
            print("PIPELINE COMPLETE")
            print("=" * 60)
            for phase, result in results.items():
                status = "✅" if result != "failed" else "❌"
                print(f"{status} Phase {phase}: {result}")

            return all(r != "failed" for r in results.values())

        except Exception as e:
            print(f"\n[ERROR] Pipeline failed: {e}")
            import traceback
            traceback.print_exc()
            return False


def main():
    parser = argparse.ArgumentParser(
        description='Deep Dive: Full Pipeline Orchestrator'
    )
    parser.add_argument('--date', default=datetime.now().strftime('%Y-%m-%d'),
                       help='Target date (YYYY-MM-DD)')
    parser.add_argument('--output-dir', type=Path,
                       default=Path(__file__).parent.parent / 'data',
                       help='Output directory for data')
    parser.add_argument('--phases', default='1,2,3,4,5',
                       help='Comma-separated phase numbers to run (e.g., 1,2,3)')
    parser.add_argument('--dry-run', action='store_true',
                       help='Dry run (no API calls)')
    parser.add_argument('--phase1-only', action='store_true',
                       help='Run only Phase 1 (aggregate)')
    parser.add_argument('--phase2-only', action='store_true',
                       help='Run only Phase 2 (rank)')
    parser.add_argument('--phase3-only', action='store_true',
                       help='Run only Phase 3 (synthesize)')
    parser.add_argument('--phase4-only', action='store_true',
                       help='Run only Phase 4 (audio)')
    parser.add_argument('--phase5-only', action='store_true',
                       help='Run only Phase 5 (deliver)')
    args = parser.parse_args()

    # Handle phase-specific flags
    if args.phase1_only:
        phases = [1]
    elif args.phase2_only:
        phases = [2]
    elif args.phase3_only:
        phases = [3]
    elif args.phase4_only:
        phases = [4]
    elif args.phase5_only:
        phases = [5]
    else:
        phases = [int(p) for p in args.phases.split(',')]

    # Validate phases
    for p in phases:
        if p not in range(1, 6):
            print(f"[ERROR] Invalid phase: {p}")
            sys.exit(1)

    # Sort phases
    phases = sorted(set(phases))

    orchestrator = PipelineOrchestrator(
        date=args.date,
        output_dir=args.output_dir,
        phases=phases,
        dry_run=args.dry_run
    )

    success = orchestrator.run()
    sys.exit(0 if success else 1)


if __name__ == '__main__':
    main()