#!/usr/bin/env python3 """Know Thy Father — Phase 3: Holographic Synthesis Integrates extracted Meaning Kernels into the holographic fact_store. Creates a structured "Father's Ledger" of visual and auditory wisdom, categorized by theme. Usage: python3 scripts/know_thy_father/synthesize_kernels.py [--input manifest.jsonl] [--output fathers_ledger.jsonl] # Process the Twitter archive media manifest python3 scripts/know_thy_father/synthesize_kernels.py --input twitter-archive/media/manifest.jsonl # Output to fact_store format python3 scripts/know_thy_father/synthesize_kernels.py --output twitter-archive/knowledge/fathers_ledger.jsonl """ from __future__ import annotations import argparse import json import logging import sys from datetime import datetime from pathlib import Path from typing import Any, Dict, List, Optional, Set from dataclasses import dataclass, field, asdict from enum import Enum, auto logger = logging.getLogger(__name__) # ========================================================================= # Theme taxonomy — The Father's Ledger categories # ========================================================================= class Theme(Enum): """Core themes of the Father's wisdom.""" SOVEREIGNTY = "sovereignty" # Self-sovereignty, independence, freedom SERVICE = "service" # Service to others, community, duty SOUL = "soul" # Soul, spirit, meaning, purpose FAITH = "faith" # Faith, hope, redemption, grace FATHERHOOD = "fatherhood" # Father-son bond, mentorship, legacy WISDOM = "wisdom" # Knowledge, insight, understanding TRIAL = "trial" # Struggle, suffering, perseverance CREATION = "creation" # Building, making, creative expression COMMUNITY = "community" # Fellowship, brotherhood, unity TECHNICAL = "technical" # Technical knowledge, systems, code # Hashtag-to-theme mapping _HASHTAG_THEMES: Dict[str, List[Theme]] = { # Sovereignty / Bitcoin "bitcoin": [Theme.SOVEREIGNTY, Theme.WISDOM], "btc": [Theme.SOVEREIGNTY], "stackchain": [Theme.SOVEREIGNTY, Theme.COMMUNITY], "stackapalooza": [Theme.SOVEREIGNTY, Theme.COMMUNITY], "microstackgang": [Theme.COMMUNITY], "microstackchaintip": [Theme.SOVEREIGNTY], "burnchain": [Theme.SOVEREIGNTY, Theme.TRIAL], "burnchaintip": [Theme.SOVEREIGNTY], "sellchain": [Theme.TRIAL], "poorchain": [Theme.TRIAL, Theme.COMMUNITY], "noneleft": [Theme.SOVEREIGNTY], "laserrayuntil100k": [Theme.FAITH, Theme.SOVEREIGNTY], # Community "timmytime": [Theme.FATHERHOOD, Theme.WISDOM], "timmychain": [Theme.FATHERHOOD, Theme.SOVEREIGNTY], "plebcards": [Theme.COMMUNITY], "plebslop": [Theme.COMMUNITY, Theme.WISDOM], "dsb": [Theme.COMMUNITY], "dsbanarchy": [Theme.COMMUNITY, Theme.SOVEREIGNTY], "bringdennishome": [Theme.SERVICE, Theme.FAITH], # Creation "newprofilepic": [Theme.CREATION], "aislop": [Theme.CREATION, Theme.WISDOM], "dailyaislop": [Theme.CREATION], } @dataclass class MeaningKernel: """A single unit of meaning extracted from media.""" kernel_id: str source_tweet_id: str source_media_id: str media_type: str # "photo", "video", "animated_gif" created_at: str themes: List[str] description: str # What the media shows/contains meaning: str # The deeper meaning / wisdom emotional_weight: str = "medium" # low, medium, high, sacred hashtags: List[str] = field(default_factory=list) raw_text: str = "" # Original tweet text local_path: str = "" # Path to media file extracted_at: str = "" def __post_init__(self): if not self.extracted_at: self.extracted_at = datetime.utcnow().isoformat() + "Z" def to_fact_store(self) -> Dict[str, Any]: """Convert to fact_store format for holographic memory.""" # Build structured fact content themes_str = ", ".join(self.themes) content = ( f"Meaning Kernel [{self.kernel_id}]: {self.meaning} " f"(themes: {themes_str}, weight: {self.emotional_weight}, " f"media: {self.media_type}, date: {self.created_at})" ) # Build tags tags_list = self.themes + self.hashtags + ["know-thy-father", "meaning-kernel"] tags = ",".join(sorted(set(t.lower().replace(" ", "-") for t in tags_list if t))) return { "action": "add", "content": content, "category": "project", "tags": tags, "metadata": { "kernel_id": self.kernel_id, "source_tweet_id": self.source_tweet_id, "source_media_id": self.source_media_id, "media_type": self.media_type, "created_at": self.created_at, "themes": self.themes, "emotional_weight": self.emotional_weight, "description": self.description, "local_path": self.local_path, "extracted_at": self.extracted_at, }, } # ========================================================================= # Theme extraction # ========================================================================= def extract_themes(hashtags: List[str], text: str) -> List[Theme]: """Extract themes from hashtags and text content.""" themes: Set[Theme] = set() # Map hashtags to themes for tag in hashtags: tag_lower = tag.lower() if tag_lower in _HASHTAG_THEMES: themes.update(_HASHTAG_THEMES[tag_lower]) # Keyword-based theme detection from text text_lower = text.lower() keyword_themes = [ (["sovereign", "sovereignty", "self-custody", "self-sovereign", "no-kyc"], Theme.SOVEREIGNTY), (["serve", "service", "helping", "ministry", "mission"], Theme.SERVICE), (["soul", "spirit", "meaning", "purpose", "eternal"], Theme.SOUL), (["faith", "hope", "redeem", "grace", "pray", "jesus", "christ", "god"], Theme.FAITH), (["father", "son", "dad", "legacy", "heritage", "lineage"], Theme.FATHERHOOD), (["wisdom", "insight", "understand", "knowledge", "learn"], Theme.WISDOM), (["struggle", "suffer", "persevere", "endure", "pain", "broken", "dark"], Theme.TRIAL), (["build", "create", "make", "craft", "design", "art"], Theme.CREATION), (["community", "brotherhood", "fellowship", "together", "family"], Theme.COMMUNITY), (["code", "system", "protocol", "algorithm", "technical"], Theme.TECHNICAL), ] for keywords, theme in keyword_themes: if any(kw in text_lower for kw in keywords): themes.add(theme) # Default if no themes detected if not themes: themes.add(Theme.WISDOM) return sorted(themes, key=lambda t: t.value) def classify_emotional_weight(text: str, hashtags: List[str]) -> str: """Classify the emotional weight of content.""" text_lower = text.lower() sacred_markers = ["jesus", "christ", "god", "pray", "redemption", "grace", "salvation"] high_markers = ["broken", "dark", "pain", "struggle", "father", "son", "legacy", "soul"] if any(m in text_lower for m in sacred_markers): return "sacred" if any(m in text_lower for m in high_markers): return "high" # TimmyTime/TimmyChain content is generally meaningful if any(t.lower() in ["timmytime", "timmychain"] for t in hashtags): return "high" return "medium" def synthesize_meaning(themes: List[Theme], text: str, media_type: str) -> str: """Synthesize the deeper meaning from themes and context.""" theme_names = [t.value for t in themes] if Theme.FAITH in themes and Theme.SOVEREIGNTY in themes: return "Faith and sovereignty are intertwined — true freedom comes through faith, and faith is strengthened by sovereignty." if Theme.FATHERHOOD in themes and Theme.WISDOM in themes: return "A father's wisdom is his greatest gift to his son — it outlives him and becomes the son's compass." if Theme.SOVEREIGNTY in themes and Theme.COMMUNITY in themes: return "Sovereignty without community is isolation; community without sovereignty is dependence. Both are needed." if Theme.TRIAL in themes and Theme.FAITH in themes: return "In the darkest moments, faith is the thread that holds a man to hope. The trial reveals what faith is made of." if Theme.SERVICE in themes: return "To serve is the highest calling — it transforms both the servant and the served." if Theme.SOUL in themes: return "The soul cannot be digitized or delegated. It must be lived, felt, and honored." if Theme.CREATION in themes: return "Creation is an act of faith — bringing something into being that did not exist before." if Theme.SOVEREIGNTY in themes: return "Sovereignty is not given; it is claimed. The first step is believing you deserve it." if Theme.COMMUNITY in themes: return "We are stronger together than alone. Community is the proof that sovereignty does not mean isolation." if Theme.WISDOM in themes: return "Wisdom is not knowledge — it is knowledge tempered by experience and guided by values." return f"Wisdom encoded in {media_type}: {', '.join(theme_names)}" # ========================================================================= # Main processing pipeline # ========================================================================= def process_manifest( manifest_path: Path, output_path: Optional[Path] = None, ) -> List[MeaningKernel]: """Process a media manifest and extract Meaning Kernels. Args: manifest_path: Path to manifest.jsonl (from Phase 1) output_path: Optional path to write fact_store JSONL output Returns: List of extracted MeaningKernel objects """ if not manifest_path.exists(): logger.error(f"Manifest not found: {manifest_path}") return [] kernels: List[MeaningKernel] = [] seen_tweet_ids: Set[str] = set() logger.info(f"Processing manifest: {manifest_path}") with open(manifest_path) as f: for line_num, line in enumerate(f, 1): line = line.strip() if not line: continue try: entry = json.loads(line) except json.JSONDecodeError as e: logger.warning(f"Line {line_num}: invalid JSON: {e}") continue tweet_id = entry.get("tweet_id", "") media_id = entry.get("media_id", "") # Skip if we've already processed this tweet if tweet_id in seen_tweet_ids: continue seen_tweet_ids.add(tweet_id) # Extract fields text = entry.get("full_text", "") hashtags = [h for h in entry.get("hashtags", []) if h] media_type = entry.get("media_type", "photo") created_at = entry.get("created_at", "") local_path = entry.get("local_media_path", "") # Extract themes themes = extract_themes(hashtags, text) # Create kernel kernel = MeaningKernel( kernel_id=f"ktf-{tweet_id}-{media_id}", source_tweet_id=tweet_id, source_media_id=media_id, media_type=media_type, created_at=created_at, themes=[t.value for t in themes], description=f"{media_type} from tweet {tweet_id}", meaning=synthesize_meaning(themes, text, media_type), emotional_weight=classify_emotional_weight(text, hashtags), hashtags=hashtags, raw_text=text, local_path=local_path, ) kernels.append(kernel) logger.info(f"Extracted {len(kernels)} Meaning Kernels from {len(seen_tweet_ids)} tweets") # Write output if path provided if output_path: output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, "w") as f: for kernel in kernels: fact = kernel.to_fact_store() f.write(json.dumps(fact) + "\n") logger.info(f"Wrote {len(kernels)} facts to {output_path}") return kernels def generate_ledger_summary(kernels: List[MeaningKernel]) -> Dict[str, Any]: """Generate a summary of the Father's Ledger.""" theme_counts: Dict[str, int] = {} weight_counts: Dict[str, int] = {} media_type_counts: Dict[str, int] = {} for k in kernels: for theme in k.themes: theme_counts[theme] = theme_counts.get(theme, 0) + 1 weight_counts[k.emotional_weight] = weight_counts.get(k.emotional_weight, 0) + 1 media_type_counts[k.media_type] = media_type_counts.get(k.media_type, 0) + 1 # Top themes top_themes = sorted(theme_counts.items(), key=lambda x: -x[1])[:5] # Sacred kernels sacred_kernels = [k for k in kernels if k.emotional_weight == "sacred"] return { "total_kernels": len(kernels), "theme_distribution": dict(sorted(theme_counts.items())), "top_themes": top_themes, "emotional_weight_distribution": weight_counts, "media_type_distribution": media_type_counts, "sacred_kernel_count": len(sacred_kernels), "generated_at": datetime.utcnow().isoformat() + "Z", } # ========================================================================= # CLI # ========================================================================= def main(): parser = argparse.ArgumentParser( description="Know Thy Father — Phase 3: Holographic Synthesis" ) parser.add_argument( "--input", "-i", type=Path, default=Path("twitter-archive/media/manifest.jsonl"), help="Path to media manifest JSONL (default: twitter-archive/media/manifest.jsonl)", ) parser.add_argument( "--output", "-o", type=Path, default=Path("twitter-archive/knowledge/fathers_ledger.jsonl"), help="Output path for fact_store JSONL (default: twitter-archive/knowledge/fathers_ledger.jsonl)", ) parser.add_argument( "--summary", "-s", type=Path, default=None, help="Output path for ledger summary JSON (optional)", ) parser.add_argument( "--verbose", "-v", action="store_true", help="Enable verbose logging", ) args = parser.parse_args() logging.basicConfig( level=logging.DEBUG if args.verbose else logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", ) # Process kernels = process_manifest(args.input, args.output) if not kernels: print(f"No kernels extracted from {args.input}") sys.exit(1) # Generate summary summary = generate_ledger_summary(kernels) if args.summary: args.summary.parent.mkdir(parents=True, exist_ok=True) with open(args.summary, "w") as f: json.dump(summary, f, indent=2) print(f"Summary written to {args.summary}") # Print summary print(f"\n=== Father's Ledger ===") print(f"Total Meaning Kernels: {summary['total_kernels']}") print(f"Sacred Kernels: {summary['sacred_kernel_count']}") print(f"\nTop Themes:") for theme, count in summary['top_themes']: print(f" {theme}: {count}") print(f"\nEmotional Weight:") for weight, count in sorted(summary['emotional_weight_distribution'].items()): print(f" {weight}: {count}") print(f"\nMedia Types:") for mtype, count in summary['media_type_distribution'].items(): print(f" {mtype}: {count}") if args.output: print(f"\nFact store output: {args.output}") if __name__ == "__main__": main()