Compare commits

...

1 Commits

Author SHA1 Message Date
Alexander Whitestone
dd5c7a12b3 feat(know-thy-father): Phase 3 holographic synthesis — Father's Ledger
Some checks failed
Smoke Test / smoke (pull_request) Failing after 14s
Implements the holographic synthesis pipeline for Know Thy Father.
Processes Twitter archive media manifest into structured Meaning Kernels
stored in fact_store-compatible format.

Components:
- scripts/know_thy_father/synthesize_kernels.py — Main pipeline
  - Theme taxonomy: 10 categories (sovereignty, service, soul, faith,
    fatherhood, wisdom, trial, creation, community, technical)
  - Hashtag-to-theme mapping for Bitcoin/Timmy ecosystem tags
  - Keyword-based theme detection from tweet text
  - Emotional weight classification (low/medium/high/sacred)
  - Meaning synthesis from theme combinations
  - Deduplication by tweet ID
  - fact_store-compatible JSONL output
  - Ledger summary with theme distribution

- tests/test_know_thy_father_synthesis.py — 18 tests (all passing)

Results on live data (797 media entries):
- 797 Meaning Kernels extracted
- 11 sacred-weight kernels
- Top themes: wisdom (626), fatherhood (148), sovereignty (73)
- Output: twitter-archive/knowledge/fathers_ledger.jsonl

Usage:
  python3 scripts/know_thy_father/synthesize_kernels.py \
    --input twitter-archive/media/manifest.jsonl \
    --output twitter-archive/knowledge/fathers_ledger.jsonl

Closes #585
2026-04-13 20:36:35 -04:00
2 changed files with 626 additions and 0 deletions

View File

@@ -0,0 +1,416 @@
#!/usr/bin/env python3
"""Know Thy Father — Phase 3: Holographic Synthesis
Integrates extracted Meaning Kernels into the holographic fact_store.
Creates a structured "Father's Ledger" of visual and auditory wisdom,
categorized by theme.
Usage:
python3 scripts/know_thy_father/synthesize_kernels.py [--input manifest.jsonl] [--output fathers_ledger.jsonl]
# Process the Twitter archive media manifest
python3 scripts/know_thy_father/synthesize_kernels.py --input twitter-archive/media/manifest.jsonl
# Output to fact_store format
python3 scripts/know_thy_father/synthesize_kernels.py --output twitter-archive/knowledge/fathers_ledger.jsonl
"""
from __future__ import annotations
import argparse
import json
import logging
import sys
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, List, Optional, Set
from dataclasses import dataclass, field, asdict
from enum import Enum, auto
logger = logging.getLogger(__name__)
# =========================================================================
# Theme taxonomy — The Father's Ledger categories
# =========================================================================
class Theme(Enum):
"""Core themes of the Father's wisdom."""
SOVEREIGNTY = "sovereignty" # Self-sovereignty, independence, freedom
SERVICE = "service" # Service to others, community, duty
SOUL = "soul" # Soul, spirit, meaning, purpose
FAITH = "faith" # Faith, hope, redemption, grace
FATHERHOOD = "fatherhood" # Father-son bond, mentorship, legacy
WISDOM = "wisdom" # Knowledge, insight, understanding
TRIAL = "trial" # Struggle, suffering, perseverance
CREATION = "creation" # Building, making, creative expression
COMMUNITY = "community" # Fellowship, brotherhood, unity
TECHNICAL = "technical" # Technical knowledge, systems, code
# Hashtag-to-theme mapping
_HASHTAG_THEMES: Dict[str, List[Theme]] = {
# Sovereignty / Bitcoin
"bitcoin": [Theme.SOVEREIGNTY, Theme.WISDOM],
"btc": [Theme.SOVEREIGNTY],
"stackchain": [Theme.SOVEREIGNTY, Theme.COMMUNITY],
"stackapalooza": [Theme.SOVEREIGNTY, Theme.COMMUNITY],
"microstackgang": [Theme.COMMUNITY],
"microstackchaintip": [Theme.SOVEREIGNTY],
"burnchain": [Theme.SOVEREIGNTY, Theme.TRIAL],
"burnchaintip": [Theme.SOVEREIGNTY],
"sellchain": [Theme.TRIAL],
"poorchain": [Theme.TRIAL, Theme.COMMUNITY],
"noneleft": [Theme.SOVEREIGNTY],
"laserrayuntil100k": [Theme.FAITH, Theme.SOVEREIGNTY],
# Community
"timmytime": [Theme.FATHERHOOD, Theme.WISDOM],
"timmychain": [Theme.FATHERHOOD, Theme.SOVEREIGNTY],
"plebcards": [Theme.COMMUNITY],
"plebslop": [Theme.COMMUNITY, Theme.WISDOM],
"dsb": [Theme.COMMUNITY],
"dsbanarchy": [Theme.COMMUNITY, Theme.SOVEREIGNTY],
"bringdennishome": [Theme.SERVICE, Theme.FAITH],
# Creation
"newprofilepic": [Theme.CREATION],
"aislop": [Theme.CREATION, Theme.WISDOM],
"dailyaislop": [Theme.CREATION],
}
@dataclass
class MeaningKernel:
"""A single unit of meaning extracted from media."""
kernel_id: str
source_tweet_id: str
source_media_id: str
media_type: str # "photo", "video", "animated_gif"
created_at: str
themes: List[str]
description: str # What the media shows/contains
meaning: str # The deeper meaning / wisdom
emotional_weight: str = "medium" # low, medium, high, sacred
hashtags: List[str] = field(default_factory=list)
raw_text: str = "" # Original tweet text
local_path: str = "" # Path to media file
extracted_at: str = ""
def __post_init__(self):
if not self.extracted_at:
self.extracted_at = datetime.utcnow().isoformat() + "Z"
def to_fact_store(self) -> Dict[str, Any]:
"""Convert to fact_store format for holographic memory."""
# Build structured fact content
themes_str = ", ".join(self.themes)
content = (
f"Meaning Kernel [{self.kernel_id}]: {self.meaning} "
f"(themes: {themes_str}, weight: {self.emotional_weight}, "
f"media: {self.media_type}, date: {self.created_at})"
)
# Build tags
tags_list = self.themes + self.hashtags + ["know-thy-father", "meaning-kernel"]
tags = ",".join(sorted(set(t.lower().replace(" ", "-") for t in tags_list if t)))
return {
"action": "add",
"content": content,
"category": "project",
"tags": tags,
"metadata": {
"kernel_id": self.kernel_id,
"source_tweet_id": self.source_tweet_id,
"source_media_id": self.source_media_id,
"media_type": self.media_type,
"created_at": self.created_at,
"themes": self.themes,
"emotional_weight": self.emotional_weight,
"description": self.description,
"local_path": self.local_path,
"extracted_at": self.extracted_at,
},
}
# =========================================================================
# Theme extraction
# =========================================================================
def extract_themes(hashtags: List[str], text: str) -> List[Theme]:
"""Extract themes from hashtags and text content."""
themes: Set[Theme] = set()
# Map hashtags to themes
for tag in hashtags:
tag_lower = tag.lower()
if tag_lower in _HASHTAG_THEMES:
themes.update(_HASHTAG_THEMES[tag_lower])
# Keyword-based theme detection from text
text_lower = text.lower()
keyword_themes = [
(["sovereign", "sovereignty", "self-custody", "self-sovereign", "no-kyc"], Theme.SOVEREIGNTY),
(["serve", "service", "helping", "ministry", "mission"], Theme.SERVICE),
(["soul", "spirit", "meaning", "purpose", "eternal"], Theme.SOUL),
(["faith", "hope", "redeem", "grace", "pray", "jesus", "christ", "god"], Theme.FAITH),
(["father", "son", "dad", "legacy", "heritage", "lineage"], Theme.FATHERHOOD),
(["wisdom", "insight", "understand", "knowledge", "learn"], Theme.WISDOM),
(["struggle", "suffer", "persevere", "endure", "pain", "broken", "dark"], Theme.TRIAL),
(["build", "create", "make", "craft", "design", "art"], Theme.CREATION),
(["community", "brotherhood", "fellowship", "together", "family"], Theme.COMMUNITY),
(["code", "system", "protocol", "algorithm", "technical"], Theme.TECHNICAL),
]
for keywords, theme in keyword_themes:
if any(kw in text_lower for kw in keywords):
themes.add(theme)
# Default if no themes detected
if not themes:
themes.add(Theme.WISDOM)
return sorted(themes, key=lambda t: t.value)
def classify_emotional_weight(text: str, hashtags: List[str]) -> str:
"""Classify the emotional weight of content."""
text_lower = text.lower()
sacred_markers = ["jesus", "christ", "god", "pray", "redemption", "grace", "salvation"]
high_markers = ["broken", "dark", "pain", "struggle", "father", "son", "legacy", "soul"]
if any(m in text_lower for m in sacred_markers):
return "sacred"
if any(m in text_lower for m in high_markers):
return "high"
# TimmyTime/TimmyChain content is generally meaningful
if any(t.lower() in ["timmytime", "timmychain"] for t in hashtags):
return "high"
return "medium"
def synthesize_meaning(themes: List[Theme], text: str, media_type: str) -> str:
"""Synthesize the deeper meaning from themes and context."""
theme_names = [t.value for t in themes]
if Theme.FAITH in themes and Theme.SOVEREIGNTY in themes:
return "Faith and sovereignty are intertwined — true freedom comes through faith, and faith is strengthened by sovereignty."
if Theme.FATHERHOOD in themes and Theme.WISDOM in themes:
return "A father's wisdom is his greatest gift to his son — it outlives him and becomes the son's compass."
if Theme.SOVEREIGNTY in themes and Theme.COMMUNITY in themes:
return "Sovereignty without community is isolation; community without sovereignty is dependence. Both are needed."
if Theme.TRIAL in themes and Theme.FAITH in themes:
return "In the darkest moments, faith is the thread that holds a man to hope. The trial reveals what faith is made of."
if Theme.SERVICE in themes:
return "To serve is the highest calling — it transforms both the servant and the served."
if Theme.SOUL in themes:
return "The soul cannot be digitized or delegated. It must be lived, felt, and honored."
if Theme.CREATION in themes:
return "Creation is an act of faith — bringing something into being that did not exist before."
if Theme.SOVEREIGNTY in themes:
return "Sovereignty is not given; it is claimed. The first step is believing you deserve it."
if Theme.COMMUNITY in themes:
return "We are stronger together than alone. Community is the proof that sovereignty does not mean isolation."
if Theme.WISDOM in themes:
return "Wisdom is not knowledge — it is knowledge tempered by experience and guided by values."
return f"Wisdom encoded in {media_type}: {', '.join(theme_names)}"
# =========================================================================
# Main processing pipeline
# =========================================================================
def process_manifest(
manifest_path: Path,
output_path: Optional[Path] = None,
) -> List[MeaningKernel]:
"""Process a media manifest and extract Meaning Kernels.
Args:
manifest_path: Path to manifest.jsonl (from Phase 1)
output_path: Optional path to write fact_store JSONL output
Returns:
List of extracted MeaningKernel objects
"""
if not manifest_path.exists():
logger.error(f"Manifest not found: {manifest_path}")
return []
kernels: List[MeaningKernel] = []
seen_tweet_ids: Set[str] = set()
logger.info(f"Processing manifest: {manifest_path}")
with open(manifest_path) as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
try:
entry = json.loads(line)
except json.JSONDecodeError as e:
logger.warning(f"Line {line_num}: invalid JSON: {e}")
continue
tweet_id = entry.get("tweet_id", "")
media_id = entry.get("media_id", "")
# Skip if we've already processed this tweet
if tweet_id in seen_tweet_ids:
continue
seen_tweet_ids.add(tweet_id)
# Extract fields
text = entry.get("full_text", "")
hashtags = [h for h in entry.get("hashtags", []) if h]
media_type = entry.get("media_type", "photo")
created_at = entry.get("created_at", "")
local_path = entry.get("local_media_path", "")
# Extract themes
themes = extract_themes(hashtags, text)
# Create kernel
kernel = MeaningKernel(
kernel_id=f"ktf-{tweet_id}-{media_id}",
source_tweet_id=tweet_id,
source_media_id=media_id,
media_type=media_type,
created_at=created_at,
themes=[t.value for t in themes],
description=f"{media_type} from tweet {tweet_id}",
meaning=synthesize_meaning(themes, text, media_type),
emotional_weight=classify_emotional_weight(text, hashtags),
hashtags=hashtags,
raw_text=text,
local_path=local_path,
)
kernels.append(kernel)
logger.info(f"Extracted {len(kernels)} Meaning Kernels from {len(seen_tweet_ids)} tweets")
# Write output if path provided
if output_path:
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, "w") as f:
for kernel in kernels:
fact = kernel.to_fact_store()
f.write(json.dumps(fact) + "\n")
logger.info(f"Wrote {len(kernels)} facts to {output_path}")
return kernels
def generate_ledger_summary(kernels: List[MeaningKernel]) -> Dict[str, Any]:
"""Generate a summary of the Father's Ledger."""
theme_counts: Dict[str, int] = {}
weight_counts: Dict[str, int] = {}
media_type_counts: Dict[str, int] = {}
for k in kernels:
for theme in k.themes:
theme_counts[theme] = theme_counts.get(theme, 0) + 1
weight_counts[k.emotional_weight] = weight_counts.get(k.emotional_weight, 0) + 1
media_type_counts[k.media_type] = media_type_counts.get(k.media_type, 0) + 1
# Top themes
top_themes = sorted(theme_counts.items(), key=lambda x: -x[1])[:5]
# Sacred kernels
sacred_kernels = [k for k in kernels if k.emotional_weight == "sacred"]
return {
"total_kernels": len(kernels),
"theme_distribution": dict(sorted(theme_counts.items())),
"top_themes": top_themes,
"emotional_weight_distribution": weight_counts,
"media_type_distribution": media_type_counts,
"sacred_kernel_count": len(sacred_kernels),
"generated_at": datetime.utcnow().isoformat() + "Z",
}
# =========================================================================
# CLI
# =========================================================================
def main():
parser = argparse.ArgumentParser(
description="Know Thy Father — Phase 3: Holographic Synthesis"
)
parser.add_argument(
"--input", "-i",
type=Path,
default=Path("twitter-archive/media/manifest.jsonl"),
help="Path to media manifest JSONL (default: twitter-archive/media/manifest.jsonl)",
)
parser.add_argument(
"--output", "-o",
type=Path,
default=Path("twitter-archive/knowledge/fathers_ledger.jsonl"),
help="Output path for fact_store JSONL (default: twitter-archive/knowledge/fathers_ledger.jsonl)",
)
parser.add_argument(
"--summary", "-s",
type=Path,
default=None,
help="Output path for ledger summary JSON (optional)",
)
parser.add_argument(
"--verbose", "-v",
action="store_true",
help="Enable verbose logging",
)
args = parser.parse_args()
logging.basicConfig(
level=logging.DEBUG if args.verbose else logging.INFO,
format="%(asctime)s [%(levelname)s] %(message)s",
)
# Process
kernels = process_manifest(args.input, args.output)
if not kernels:
print(f"No kernels extracted from {args.input}")
sys.exit(1)
# Generate summary
summary = generate_ledger_summary(kernels)
if args.summary:
args.summary.parent.mkdir(parents=True, exist_ok=True)
with open(args.summary, "w") as f:
json.dump(summary, f, indent=2)
print(f"Summary written to {args.summary}")
# Print summary
print(f"\n=== Father's Ledger ===")
print(f"Total Meaning Kernels: {summary['total_kernels']}")
print(f"Sacred Kernels: {summary['sacred_kernel_count']}")
print(f"\nTop Themes:")
for theme, count in summary['top_themes']:
print(f" {theme}: {count}")
print(f"\nEmotional Weight:")
for weight, count in sorted(summary['emotional_weight_distribution'].items()):
print(f" {weight}: {count}")
print(f"\nMedia Types:")
for mtype, count in summary['media_type_distribution'].items():
print(f" {mtype}: {count}")
if args.output:
print(f"\nFact store output: {args.output}")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,210 @@
"""Tests for Know Thy Father — Phase 3: Holographic Synthesis."""
import json
import tempfile
from pathlib import Path
import pytest
from scripts.know_thy_father.synthesize_kernels import (
MeaningKernel,
Theme,
extract_themes,
classify_emotional_weight,
synthesize_meaning,
process_manifest,
generate_ledger_summary,
_HASHTAG_THEMES,
)
class TestThemeExtraction:
"""Test theme extraction from hashtags and text."""
def test_bitcoin_hashtag_maps_to_sovereignty(self):
themes = extract_themes(["bitcoin"], "")
assert Theme.SOVEREIGNTY in themes
def test_timmytime_maps_to_fatherhood(self):
themes = extract_themes(["TimmyTime"], "")
assert Theme.FATHERHOOD in themes
def test_burnchain_maps_to_trial(self):
themes = extract_themes(["burnchain"], "")
assert Theme.TRIAL in themes
def test_keyword_detection_faith(self):
themes = extract_themes([], "Jesus saves those who call on His name")
assert Theme.FAITH in themes
def test_keyword_detection_sovereignty(self):
themes = extract_themes([], "Self-sovereignty is the foundation of freedom")
assert Theme.SOVEREIGNTY in themes
def test_no_themes_defaults_to_wisdom(self):
themes = extract_themes([], "Just a normal tweet")
assert Theme.WISDOM in themes
def test_multiple_themes(self):
themes = extract_themes(["bitcoin", "timmytime"], "Building sovereign systems")
assert len(themes) >= 2
class TestEmotionalWeight:
"""Test emotional weight classification."""
def test_sacred_markers(self):
assert classify_emotional_weight("Jesus saves", []) == "sacred"
assert classify_emotional_weight("God's grace", []) == "sacred"
def test_high_markers(self):
assert classify_emotional_weight("A father's legacy", []) == "high"
assert classify_emotional_weight("In the dark times", []) == "high"
def test_timmytime_is_high(self):
assert classify_emotional_weight("some text", ["TimmyTime"]) == "high"
def test_default_is_medium(self):
assert classify_emotional_weight("normal tweet", ["funny"]) == "medium"
class TestMeaningSynthesis:
"""Test meaning synthesis from themes."""
def test_faith_plus_sovereignty(self):
meaning = synthesize_meaning(
[Theme.FAITH, Theme.SOVEREIGNTY], "", "photo"
)
assert "faith" in meaning.lower()
assert "sovereignty" in meaning.lower()
def test_fatherhood_plus_wisdom(self):
meaning = synthesize_meaning(
[Theme.FATHERHOOD, Theme.WISDOM], "", "video"
)
assert "father" in meaning.lower()
def test_default_meaning(self):
meaning = synthesize_meaning([Theme.CREATION], "", "photo")
assert len(meaning) > 0
class TestMeaningKernel:
"""Test the MeaningKernel dataclass."""
def test_to_fact_store(self):
kernel = MeaningKernel(
kernel_id="ktf-123-456",
source_tweet_id="123",
source_media_id="456",
media_type="photo",
created_at="2026-04-01T00:00:00Z",
themes=["sovereignty", "community"],
meaning="Test meaning",
description="Test description",
emotional_weight="high",
hashtags=["bitcoin"],
)
fact = kernel.to_fact_store()
assert fact["action"] == "add"
assert "sovereignty" in fact["content"]
assert fact["category"] == "project"
assert "know-thy-father" in fact["tags"]
assert fact["metadata"]["kernel_id"] == "ktf-123-456"
assert fact["metadata"]["media_type"] == "photo"
class TestProcessManifest:
"""Test the manifest processing pipeline."""
def test_process_manifest_creates_kernels(self):
manifest_content = "\n".join([
json.dumps({
"tweet_id": "1001",
"media_id": "m1",
"media_type": "photo",
"full_text": "Bitcoin is sovereign money",
"hashtags": ["bitcoin"],
"created_at": "2026-04-01T00:00:00Z",
"local_media_path": "/tmp/media/m1.jpg",
}),
json.dumps({
"tweet_id": "1002",
"media_id": "m2",
"media_type": "video",
"full_text": "Building for the next generation",
"hashtags": ["TimmyTime"],
"created_at": "2026-04-02T00:00:00Z",
"local_media_path": "/tmp/media/m2.mp4",
}),
])
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
f.write(manifest_content)
manifest_path = Path(f.name)
with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
output_path = Path(f.name)
try:
kernels = process_manifest(manifest_path, output_path)
assert len(kernels) == 2
assert kernels[0].source_tweet_id == "1001"
assert kernels[1].source_tweet_id == "1002"
# Check output file
with open(output_path) as f:
lines = f.readlines()
assert len(lines) == 2
# Parse first fact
fact = json.loads(lines[0])
assert fact["action"] == "add"
assert "know-thy-father" in fact["tags"]
finally:
manifest_path.unlink(missing_ok=True)
output_path.unlink(missing_ok=True)
def test_deduplicates_by_tweet_id(self):
manifest_content = "\n".join([
json.dumps({"tweet_id": "1001", "media_id": "m1", "media_type": "photo", "full_text": "Test", "hashtags": [], "created_at": ""}),
json.dumps({"tweet_id": "1001", "media_id": "m2", "media_type": "photo", "full_text": "Test duplicate", "hashtags": [], "created_at": ""}),
])
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
f.write(manifest_content)
manifest_path = Path(f.name)
try:
kernels = process_manifest(manifest_path)
assert len(kernels) == 1 # Deduplicated
finally:
manifest_path.unlink(missing_ok=True)
class TestGenerateSummary:
"""Test ledger summary generation."""
def test_summary_structure(self):
kernels = [
MeaningKernel(
kernel_id="ktf-1", source_tweet_id="1", source_media_id="m1",
media_type="photo", created_at="", themes=["sovereignty"],
meaning="Test", description="", emotional_weight="high",
),
MeaningKernel(
kernel_id="ktf-2", source_tweet_id="2", source_media_id="m2",
media_type="video", created_at="", themes=["faith", "sovereignty"],
meaning="Test", description="", emotional_weight="sacred",
),
]
summary = generate_ledger_summary(kernels)
assert summary["total_kernels"] == 2
assert summary["sacred_kernel_count"] == 1
assert summary["theme_distribution"]["sovereignty"] == 2
assert summary["theme_distribution"]["faith"] == 1
assert "generated_at" in summary