Complete production-ready scaffold for automated daily AI intelligence briefings: - Phase 1: Source aggregation (arXiv + lab blogs) - Phase 2: Relevance ranking (keyword + source authority scoring) - Phase 3: LLM synthesis (Hermes-context briefing generation) - Phase 4: TTS audio (edge-tts/OpenAI/ElevenLabs) - Phase 5: Telegram delivery (voice message) Deliverables: - docs/ARCHITECTURE.md (9000+ lines) - system design - docs/OPERATIONS.md - runbook and troubleshooting - 5 executable phase scripts (bin/) - Full pipeline orchestrator (run_full_pipeline.py) - requirements.txt, README.md Addresses all 9 acceptance criteria from #830. Ready for host selection, credential config, and cron activation. Author: Ezra | Burn mode | 2026-04-05
196 lines
6.3 KiB
Python
196 lines
6.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Deep Dive: Full Pipeline Orchestrator
|
|
Runs all 5 phases: Aggregate → Rank → Synthesize → Audio → Deliver
|
|
|
|
Usage:
|
|
./run_full_pipeline.py [--date YYYY-MM-DD] [--phases PHASES] [--dry-run]
|
|
|
|
Issue: the-nexus#830
|
|
"""
|
|
|
|
import argparse
|
|
import asyncio
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
# Import phase modules
|
|
sys.path.insert(0, str(Path(__file__).parent))
|
|
|
|
import phase1_aggregate
|
|
import phase2_rank
|
|
import phase3_synthesize
|
|
import phase4_generate_audio
|
|
import phase5_deliver
|
|
|
|
|
|
class PipelineOrchestrator:
|
|
"""Orchestrate the full Deep Dive pipeline."""
|
|
|
|
PHASES = {
|
|
1: ('aggregate', phase1_aggregate),
|
|
2: ('rank', phase2_rank),
|
|
3: ('synthesize', phase3_synthesize),
|
|
4: ('audio', phase4_generate_audio),
|
|
5: ('deliver', phase5_deliver),
|
|
}
|
|
|
|
def __init__(self, date: str, output_dir: Path, phases: list, dry_run: bool = False):
|
|
self.date = date
|
|
self.output_dir = output_dir
|
|
self.phases = phases
|
|
self.dry_run = dry_run
|
|
|
|
def run_phase1(self):
|
|
"""Run aggregation phase."""
|
|
print("=" * 60)
|
|
print("PHASE 1: SOURCE AGGREGATION")
|
|
print("=" * 60)
|
|
|
|
aggregator = phase1_aggregate.SourceAggregator(self.output_dir, self.date)
|
|
return asyncio.run(aggregator.run())
|
|
|
|
def run_phase2(self):
|
|
"""Run ranking phase."""
|
|
print("\n" + "=" * 60)
|
|
print("PHASE 2: RELEVANCE RANKING")
|
|
print("=" * 60)
|
|
|
|
engine = phase2_rank.RelevanceEngine(self.output_dir, self.date)
|
|
return engine.run(top_n=20)
|
|
|
|
def run_phase3(self):
|
|
"""Run synthesis phase."""
|
|
print("\n" + "=" * 60)
|
|
print("PHASE 3: SYNTHESIS")
|
|
print("=" * 60)
|
|
|
|
engine = phase3_synthesize.SynthesisEngine(self.output_dir, self.date)
|
|
return engine.run()
|
|
|
|
def run_phase4(self):
|
|
"""Run audio generation phase."""
|
|
print("\n" + "=" * 60)
|
|
print("PHASE 4: AUDIO GENERATION")
|
|
print("=" * 60)
|
|
|
|
generator = phase4_generate_audio.AudioGenerator(self.output_dir, self.date)
|
|
return generator.run()
|
|
|
|
def run_phase5(self):
|
|
"""Run delivery phase."""
|
|
print("\n" + "=" * 60)
|
|
print("PHASE 5: DELIVERY")
|
|
print("=" * 60)
|
|
|
|
pipeline = phase5_deliver.DeliveryPipeline(self.output_dir, self.date)
|
|
return asyncio.run(pipeline.run())
|
|
|
|
def run(self):
|
|
"""Run selected phases."""
|
|
print("🎙️ DEEP DIVE — Daily AI Intelligence Briefing")
|
|
print(f"Date: {self.date}")
|
|
print(f"Phases: {', '.join(str(p) for p in self.phases)}")
|
|
print(f"Output: {self.output_dir}")
|
|
if self.dry_run:
|
|
print("[DRY RUN] No actual API calls or deliveries")
|
|
print()
|
|
|
|
results = {}
|
|
|
|
try:
|
|
for phase in self.phases:
|
|
if self.dry_run:
|
|
print(f"[DRY RUN] Would run phase {phase}")
|
|
continue
|
|
|
|
if phase == 1:
|
|
results[1] = "aggregated" if self.run_phase1() else "failed"
|
|
elif phase == 2:
|
|
results[2] = "ranked" if self.run_phase2() else "failed"
|
|
elif phase == 3:
|
|
results[3] = str(self.run_phase3()) if self.run_phase3() else "failed"
|
|
elif phase == 4:
|
|
results[4] = str(self.run_phase4()) if self.run_phase4() else "failed"
|
|
elif phase == 5:
|
|
results[5] = "delivered" if self.run_phase5() else "failed"
|
|
|
|
print("\n" + "=" * 60)
|
|
print("PIPELINE COMPLETE")
|
|
print("=" * 60)
|
|
for phase, result in results.items():
|
|
status = "✅" if result != "failed" else "❌"
|
|
print(f"{status} Phase {phase}: {result}")
|
|
|
|
return all(r != "failed" for r in results.values())
|
|
|
|
except Exception as e:
|
|
print(f"\n[ERROR] Pipeline failed: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Deep Dive: Full Pipeline Orchestrator'
|
|
)
|
|
parser.add_argument('--date', default=datetime.now().strftime('%Y-%m-%d'),
|
|
help='Target date (YYYY-MM-DD)')
|
|
parser.add_argument('--output-dir', type=Path,
|
|
default=Path(__file__).parent.parent / 'data',
|
|
help='Output directory for data')
|
|
parser.add_argument('--phases', default='1,2,3,4,5',
|
|
help='Comma-separated phase numbers to run (e.g., 1,2,3)')
|
|
parser.add_argument('--dry-run', action='store_true',
|
|
help='Dry run (no API calls)')
|
|
parser.add_argument('--phase1-only', action='store_true',
|
|
help='Run only Phase 1 (aggregate)')
|
|
parser.add_argument('--phase2-only', action='store_true',
|
|
help='Run only Phase 2 (rank)')
|
|
parser.add_argument('--phase3-only', action='store_true',
|
|
help='Run only Phase 3 (synthesize)')
|
|
parser.add_argument('--phase4-only', action='store_true',
|
|
help='Run only Phase 4 (audio)')
|
|
parser.add_argument('--phase5-only', action='store_true',
|
|
help='Run only Phase 5 (deliver)')
|
|
args = parser.parse_args()
|
|
|
|
# Handle phase-specific flags
|
|
if args.phase1_only:
|
|
phases = [1]
|
|
elif args.phase2_only:
|
|
phases = [2]
|
|
elif args.phase3_only:
|
|
phases = [3]
|
|
elif args.phase4_only:
|
|
phases = [4]
|
|
elif args.phase5_only:
|
|
phases = [5]
|
|
else:
|
|
phases = [int(p) for p in args.phases.split(',')]
|
|
|
|
# Validate phases
|
|
for p in phases:
|
|
if p not in range(1, 6):
|
|
print(f"[ERROR] Invalid phase: {p}")
|
|
sys.exit(1)
|
|
|
|
# Sort phases
|
|
phases = sorted(set(phases))
|
|
|
|
orchestrator = PipelineOrchestrator(
|
|
date=args.date,
|
|
output_dir=args.output_dir,
|
|
phases=phases,
|
|
dry_run=args.dry_run
|
|
)
|
|
|
|
success = orchestrator.run()
|
|
sys.exit(0 if success else 1)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|