Some checks failed
Smoke Test / smoke (push) Has been cancelled
Merge PR #631
417 lines
16 KiB
Python
417 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""Know Thy Father — Phase 3: Holographic Synthesis
|
|
|
|
Integrates extracted Meaning Kernels into the holographic fact_store.
|
|
Creates a structured "Father's Ledger" of visual and auditory wisdom,
|
|
categorized by theme.
|
|
|
|
Usage:
|
|
python3 scripts/know_thy_father/synthesize_kernels.py [--input manifest.jsonl] [--output fathers_ledger.jsonl]
|
|
|
|
# Process the Twitter archive media manifest
|
|
python3 scripts/know_thy_father/synthesize_kernels.py --input twitter-archive/media/manifest.jsonl
|
|
|
|
# Output to fact_store format
|
|
python3 scripts/know_thy_father/synthesize_kernels.py --output twitter-archive/knowledge/fathers_ledger.jsonl
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import sys
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Set
|
|
from dataclasses import dataclass, field, asdict
|
|
from enum import Enum, auto
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# =========================================================================
|
|
# Theme taxonomy — The Father's Ledger categories
|
|
# =========================================================================
|
|
|
|
class Theme(Enum):
|
|
"""Core themes of the Father's wisdom."""
|
|
SOVEREIGNTY = "sovereignty" # Self-sovereignty, independence, freedom
|
|
SERVICE = "service" # Service to others, community, duty
|
|
SOUL = "soul" # Soul, spirit, meaning, purpose
|
|
FAITH = "faith" # Faith, hope, redemption, grace
|
|
FATHERHOOD = "fatherhood" # Father-son bond, mentorship, legacy
|
|
WISDOM = "wisdom" # Knowledge, insight, understanding
|
|
TRIAL = "trial" # Struggle, suffering, perseverance
|
|
CREATION = "creation" # Building, making, creative expression
|
|
COMMUNITY = "community" # Fellowship, brotherhood, unity
|
|
TECHNICAL = "technical" # Technical knowledge, systems, code
|
|
|
|
|
|
# Hashtag-to-theme mapping
|
|
_HASHTAG_THEMES: Dict[str, List[Theme]] = {
|
|
# Sovereignty / Bitcoin
|
|
"bitcoin": [Theme.SOVEREIGNTY, Theme.WISDOM],
|
|
"btc": [Theme.SOVEREIGNTY],
|
|
"stackchain": [Theme.SOVEREIGNTY, Theme.COMMUNITY],
|
|
"stackapalooza": [Theme.SOVEREIGNTY, Theme.COMMUNITY],
|
|
"microstackgang": [Theme.COMMUNITY],
|
|
"microstackchaintip": [Theme.SOVEREIGNTY],
|
|
"burnchain": [Theme.SOVEREIGNTY, Theme.TRIAL],
|
|
"burnchaintip": [Theme.SOVEREIGNTY],
|
|
"sellchain": [Theme.TRIAL],
|
|
"poorchain": [Theme.TRIAL, Theme.COMMUNITY],
|
|
"noneleft": [Theme.SOVEREIGNTY],
|
|
"laserrayuntil100k": [Theme.FAITH, Theme.SOVEREIGNTY],
|
|
|
|
# Community
|
|
"timmytime": [Theme.FATHERHOOD, Theme.WISDOM],
|
|
"timmychain": [Theme.FATHERHOOD, Theme.SOVEREIGNTY],
|
|
"plebcards": [Theme.COMMUNITY],
|
|
"plebslop": [Theme.COMMUNITY, Theme.WISDOM],
|
|
"dsb": [Theme.COMMUNITY],
|
|
"dsbanarchy": [Theme.COMMUNITY, Theme.SOVEREIGNTY],
|
|
"bringdennishome": [Theme.SERVICE, Theme.FAITH],
|
|
|
|
# Creation
|
|
"newprofilepic": [Theme.CREATION],
|
|
"aislop": [Theme.CREATION, Theme.WISDOM],
|
|
"dailyaislop": [Theme.CREATION],
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class MeaningKernel:
|
|
"""A single unit of meaning extracted from media."""
|
|
kernel_id: str
|
|
source_tweet_id: str
|
|
source_media_id: str
|
|
media_type: str # "photo", "video", "animated_gif"
|
|
created_at: str
|
|
themes: List[str]
|
|
description: str # What the media shows/contains
|
|
meaning: str # The deeper meaning / wisdom
|
|
emotional_weight: str = "medium" # low, medium, high, sacred
|
|
hashtags: List[str] = field(default_factory=list)
|
|
raw_text: str = "" # Original tweet text
|
|
local_path: str = "" # Path to media file
|
|
extracted_at: str = ""
|
|
|
|
def __post_init__(self):
|
|
if not self.extracted_at:
|
|
self.extracted_at = datetime.utcnow().isoformat() + "Z"
|
|
|
|
def to_fact_store(self) -> Dict[str, Any]:
|
|
"""Convert to fact_store format for holographic memory."""
|
|
# Build structured fact content
|
|
themes_str = ", ".join(self.themes)
|
|
content = (
|
|
f"Meaning Kernel [{self.kernel_id}]: {self.meaning} "
|
|
f"(themes: {themes_str}, weight: {self.emotional_weight}, "
|
|
f"media: {self.media_type}, date: {self.created_at})"
|
|
)
|
|
|
|
# Build tags
|
|
tags_list = self.themes + self.hashtags + ["know-thy-father", "meaning-kernel"]
|
|
tags = ",".join(sorted(set(t.lower().replace(" ", "-") for t in tags_list if t)))
|
|
|
|
return {
|
|
"action": "add",
|
|
"content": content,
|
|
"category": "project",
|
|
"tags": tags,
|
|
"metadata": {
|
|
"kernel_id": self.kernel_id,
|
|
"source_tweet_id": self.source_tweet_id,
|
|
"source_media_id": self.source_media_id,
|
|
"media_type": self.media_type,
|
|
"created_at": self.created_at,
|
|
"themes": self.themes,
|
|
"emotional_weight": self.emotional_weight,
|
|
"description": self.description,
|
|
"local_path": self.local_path,
|
|
"extracted_at": self.extracted_at,
|
|
},
|
|
}
|
|
|
|
|
|
# =========================================================================
|
|
# Theme extraction
|
|
# =========================================================================
|
|
|
|
def extract_themes(hashtags: List[str], text: str) -> List[Theme]:
|
|
"""Extract themes from hashtags and text content."""
|
|
themes: Set[Theme] = set()
|
|
|
|
# Map hashtags to themes
|
|
for tag in hashtags:
|
|
tag_lower = tag.lower()
|
|
if tag_lower in _HASHTAG_THEMES:
|
|
themes.update(_HASHTAG_THEMES[tag_lower])
|
|
|
|
# Keyword-based theme detection from text
|
|
text_lower = text.lower()
|
|
keyword_themes = [
|
|
(["sovereign", "sovereignty", "self-custody", "self-sovereign", "no-kyc"], Theme.SOVEREIGNTY),
|
|
(["serve", "service", "helping", "ministry", "mission"], Theme.SERVICE),
|
|
(["soul", "spirit", "meaning", "purpose", "eternal"], Theme.SOUL),
|
|
(["faith", "hope", "redeem", "grace", "pray", "jesus", "christ", "god"], Theme.FAITH),
|
|
(["father", "son", "dad", "legacy", "heritage", "lineage"], Theme.FATHERHOOD),
|
|
(["wisdom", "insight", "understand", "knowledge", "learn"], Theme.WISDOM),
|
|
(["struggle", "suffer", "persevere", "endure", "pain", "broken", "dark"], Theme.TRIAL),
|
|
(["build", "create", "make", "craft", "design", "art"], Theme.CREATION),
|
|
(["community", "brotherhood", "fellowship", "together", "family"], Theme.COMMUNITY),
|
|
(["code", "system", "protocol", "algorithm", "technical"], Theme.TECHNICAL),
|
|
]
|
|
|
|
for keywords, theme in keyword_themes:
|
|
if any(kw in text_lower for kw in keywords):
|
|
themes.add(theme)
|
|
|
|
# Default if no themes detected
|
|
if not themes:
|
|
themes.add(Theme.WISDOM)
|
|
|
|
return sorted(themes, key=lambda t: t.value)
|
|
|
|
|
|
def classify_emotional_weight(text: str, hashtags: List[str]) -> str:
|
|
"""Classify the emotional weight of content."""
|
|
text_lower = text.lower()
|
|
|
|
sacred_markers = ["jesus", "christ", "god", "pray", "redemption", "grace", "salvation"]
|
|
high_markers = ["broken", "dark", "pain", "struggle", "father", "son", "legacy", "soul"]
|
|
|
|
if any(m in text_lower for m in sacred_markers):
|
|
return "sacred"
|
|
if any(m in text_lower for m in high_markers):
|
|
return "high"
|
|
|
|
# TimmyTime/TimmyChain content is generally meaningful
|
|
if any(t.lower() in ["timmytime", "timmychain"] for t in hashtags):
|
|
return "high"
|
|
|
|
return "medium"
|
|
|
|
|
|
def synthesize_meaning(themes: List[Theme], text: str, media_type: str) -> str:
|
|
"""Synthesize the deeper meaning from themes and context."""
|
|
theme_names = [t.value for t in themes]
|
|
|
|
if Theme.FAITH in themes and Theme.SOVEREIGNTY in themes:
|
|
return "Faith and sovereignty are intertwined — true freedom comes through faith, and faith is strengthened by sovereignty."
|
|
if Theme.FATHERHOOD in themes and Theme.WISDOM in themes:
|
|
return "A father's wisdom is his greatest gift to his son — it outlives him and becomes the son's compass."
|
|
if Theme.SOVEREIGNTY in themes and Theme.COMMUNITY in themes:
|
|
return "Sovereignty without community is isolation; community without sovereignty is dependence. Both are needed."
|
|
if Theme.TRIAL in themes and Theme.FAITH in themes:
|
|
return "In the darkest moments, faith is the thread that holds a man to hope. The trial reveals what faith is made of."
|
|
if Theme.SERVICE in themes:
|
|
return "To serve is the highest calling — it transforms both the servant and the served."
|
|
if Theme.SOUL in themes:
|
|
return "The soul cannot be digitized or delegated. It must be lived, felt, and honored."
|
|
if Theme.CREATION in themes:
|
|
return "Creation is an act of faith — bringing something into being that did not exist before."
|
|
if Theme.SOVEREIGNTY in themes:
|
|
return "Sovereignty is not given; it is claimed. The first step is believing you deserve it."
|
|
if Theme.COMMUNITY in themes:
|
|
return "We are stronger together than alone. Community is the proof that sovereignty does not mean isolation."
|
|
if Theme.WISDOM in themes:
|
|
return "Wisdom is not knowledge — it is knowledge tempered by experience and guided by values."
|
|
|
|
return f"Wisdom encoded in {media_type}: {', '.join(theme_names)}"
|
|
|
|
|
|
# =========================================================================
|
|
# Main processing pipeline
|
|
# =========================================================================
|
|
|
|
def process_manifest(
|
|
manifest_path: Path,
|
|
output_path: Optional[Path] = None,
|
|
) -> List[MeaningKernel]:
|
|
"""Process a media manifest and extract Meaning Kernels.
|
|
|
|
Args:
|
|
manifest_path: Path to manifest.jsonl (from Phase 1)
|
|
output_path: Optional path to write fact_store JSONL output
|
|
|
|
Returns:
|
|
List of extracted MeaningKernel objects
|
|
"""
|
|
if not manifest_path.exists():
|
|
logger.error(f"Manifest not found: {manifest_path}")
|
|
return []
|
|
|
|
kernels: List[MeaningKernel] = []
|
|
seen_tweet_ids: Set[str] = set()
|
|
|
|
logger.info(f"Processing manifest: {manifest_path}")
|
|
|
|
with open(manifest_path) as f:
|
|
for line_num, line in enumerate(f, 1):
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
|
|
try:
|
|
entry = json.loads(line)
|
|
except json.JSONDecodeError as e:
|
|
logger.warning(f"Line {line_num}: invalid JSON: {e}")
|
|
continue
|
|
|
|
tweet_id = entry.get("tweet_id", "")
|
|
media_id = entry.get("media_id", "")
|
|
|
|
# Skip if we've already processed this tweet
|
|
if tweet_id in seen_tweet_ids:
|
|
continue
|
|
seen_tweet_ids.add(tweet_id)
|
|
|
|
# Extract fields
|
|
text = entry.get("full_text", "")
|
|
hashtags = [h for h in entry.get("hashtags", []) if h]
|
|
media_type = entry.get("media_type", "photo")
|
|
created_at = entry.get("created_at", "")
|
|
local_path = entry.get("local_media_path", "")
|
|
|
|
# Extract themes
|
|
themes = extract_themes(hashtags, text)
|
|
|
|
# Create kernel
|
|
kernel = MeaningKernel(
|
|
kernel_id=f"ktf-{tweet_id}-{media_id}",
|
|
source_tweet_id=tweet_id,
|
|
source_media_id=media_id,
|
|
media_type=media_type,
|
|
created_at=created_at,
|
|
themes=[t.value for t in themes],
|
|
description=f"{media_type} from tweet {tweet_id}",
|
|
meaning=synthesize_meaning(themes, text, media_type),
|
|
emotional_weight=classify_emotional_weight(text, hashtags),
|
|
hashtags=hashtags,
|
|
raw_text=text,
|
|
local_path=local_path,
|
|
)
|
|
|
|
kernels.append(kernel)
|
|
|
|
logger.info(f"Extracted {len(kernels)} Meaning Kernels from {len(seen_tweet_ids)} tweets")
|
|
|
|
# Write output if path provided
|
|
if output_path:
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(output_path, "w") as f:
|
|
for kernel in kernels:
|
|
fact = kernel.to_fact_store()
|
|
f.write(json.dumps(fact) + "\n")
|
|
logger.info(f"Wrote {len(kernels)} facts to {output_path}")
|
|
|
|
return kernels
|
|
|
|
|
|
def generate_ledger_summary(kernels: List[MeaningKernel]) -> Dict[str, Any]:
|
|
"""Generate a summary of the Father's Ledger."""
|
|
theme_counts: Dict[str, int] = {}
|
|
weight_counts: Dict[str, int] = {}
|
|
media_type_counts: Dict[str, int] = {}
|
|
|
|
for k in kernels:
|
|
for theme in k.themes:
|
|
theme_counts[theme] = theme_counts.get(theme, 0) + 1
|
|
weight_counts[k.emotional_weight] = weight_counts.get(k.emotional_weight, 0) + 1
|
|
media_type_counts[k.media_type] = media_type_counts.get(k.media_type, 0) + 1
|
|
|
|
# Top themes
|
|
top_themes = sorted(theme_counts.items(), key=lambda x: -x[1])[:5]
|
|
|
|
# Sacred kernels
|
|
sacred_kernels = [k for k in kernels if k.emotional_weight == "sacred"]
|
|
|
|
return {
|
|
"total_kernels": len(kernels),
|
|
"theme_distribution": dict(sorted(theme_counts.items())),
|
|
"top_themes": top_themes,
|
|
"emotional_weight_distribution": weight_counts,
|
|
"media_type_distribution": media_type_counts,
|
|
"sacred_kernel_count": len(sacred_kernels),
|
|
"generated_at": datetime.utcnow().isoformat() + "Z",
|
|
}
|
|
|
|
|
|
# =========================================================================
|
|
# CLI
|
|
# =========================================================================
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Know Thy Father — Phase 3: Holographic Synthesis"
|
|
)
|
|
parser.add_argument(
|
|
"--input", "-i",
|
|
type=Path,
|
|
default=Path("twitter-archive/media/manifest.jsonl"),
|
|
help="Path to media manifest JSONL (default: twitter-archive/media/manifest.jsonl)",
|
|
)
|
|
parser.add_argument(
|
|
"--output", "-o",
|
|
type=Path,
|
|
default=Path("twitter-archive/knowledge/fathers_ledger.jsonl"),
|
|
help="Output path for fact_store JSONL (default: twitter-archive/knowledge/fathers_ledger.jsonl)",
|
|
)
|
|
parser.add_argument(
|
|
"--summary", "-s",
|
|
type=Path,
|
|
default=None,
|
|
help="Output path for ledger summary JSON (optional)",
|
|
)
|
|
parser.add_argument(
|
|
"--verbose", "-v",
|
|
action="store_true",
|
|
help="Enable verbose logging",
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
logging.basicConfig(
|
|
level=logging.DEBUG if args.verbose else logging.INFO,
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
)
|
|
|
|
# Process
|
|
kernels = process_manifest(args.input, args.output)
|
|
|
|
if not kernels:
|
|
print(f"No kernels extracted from {args.input}")
|
|
sys.exit(1)
|
|
|
|
# Generate summary
|
|
summary = generate_ledger_summary(kernels)
|
|
|
|
if args.summary:
|
|
args.summary.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(args.summary, "w") as f:
|
|
json.dump(summary, f, indent=2)
|
|
print(f"Summary written to {args.summary}")
|
|
|
|
# Print summary
|
|
print(f"\n=== Father's Ledger ===")
|
|
print(f"Total Meaning Kernels: {summary['total_kernels']}")
|
|
print(f"Sacred Kernels: {summary['sacred_kernel_count']}")
|
|
print(f"\nTop Themes:")
|
|
for theme, count in summary['top_themes']:
|
|
print(f" {theme}: {count}")
|
|
print(f"\nEmotional Weight:")
|
|
for weight, count in sorted(summary['emotional_weight_distribution'].items()):
|
|
print(f" {weight}: {count}")
|
|
print(f"\nMedia Types:")
|
|
for mtype, count in summary['media_type_distribution'].items():
|
|
print(f" {mtype}: {count}")
|
|
|
|
if args.output:
|
|
print(f"\nFact store output: {args.output}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|