Compare commits
1 Commits
main
...
burn/585-1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dd5c7a12b3 |
416
scripts/know_thy_father/synthesize_kernels.py
Normal file
416
scripts/know_thy_father/synthesize_kernels.py
Normal file
@@ -0,0 +1,416 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Know Thy Father — Phase 3: Holographic Synthesis
|
||||
|
||||
Integrates extracted Meaning Kernels into the holographic fact_store.
|
||||
Creates a structured "Father's Ledger" of visual and auditory wisdom,
|
||||
categorized by theme.
|
||||
|
||||
Usage:
|
||||
python3 scripts/know_thy_father/synthesize_kernels.py [--input manifest.jsonl] [--output fathers_ledger.jsonl]
|
||||
|
||||
# Process the Twitter archive media manifest
|
||||
python3 scripts/know_thy_father/synthesize_kernels.py --input twitter-archive/media/manifest.jsonl
|
||||
|
||||
# Output to fact_store format
|
||||
python3 scripts/know_thy_father/synthesize_kernels.py --output twitter-archive/knowledge/fathers_ledger.jsonl
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Set
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from enum import Enum, auto
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Theme taxonomy — The Father's Ledger categories
|
||||
# =========================================================================
|
||||
|
||||
class Theme(Enum):
|
||||
"""Core themes of the Father's wisdom."""
|
||||
SOVEREIGNTY = "sovereignty" # Self-sovereignty, independence, freedom
|
||||
SERVICE = "service" # Service to others, community, duty
|
||||
SOUL = "soul" # Soul, spirit, meaning, purpose
|
||||
FAITH = "faith" # Faith, hope, redemption, grace
|
||||
FATHERHOOD = "fatherhood" # Father-son bond, mentorship, legacy
|
||||
WISDOM = "wisdom" # Knowledge, insight, understanding
|
||||
TRIAL = "trial" # Struggle, suffering, perseverance
|
||||
CREATION = "creation" # Building, making, creative expression
|
||||
COMMUNITY = "community" # Fellowship, brotherhood, unity
|
||||
TECHNICAL = "technical" # Technical knowledge, systems, code
|
||||
|
||||
|
||||
# Hashtag-to-theme mapping
|
||||
_HASHTAG_THEMES: Dict[str, List[Theme]] = {
|
||||
# Sovereignty / Bitcoin
|
||||
"bitcoin": [Theme.SOVEREIGNTY, Theme.WISDOM],
|
||||
"btc": [Theme.SOVEREIGNTY],
|
||||
"stackchain": [Theme.SOVEREIGNTY, Theme.COMMUNITY],
|
||||
"stackapalooza": [Theme.SOVEREIGNTY, Theme.COMMUNITY],
|
||||
"microstackgang": [Theme.COMMUNITY],
|
||||
"microstackchaintip": [Theme.SOVEREIGNTY],
|
||||
"burnchain": [Theme.SOVEREIGNTY, Theme.TRIAL],
|
||||
"burnchaintip": [Theme.SOVEREIGNTY],
|
||||
"sellchain": [Theme.TRIAL],
|
||||
"poorchain": [Theme.TRIAL, Theme.COMMUNITY],
|
||||
"noneleft": [Theme.SOVEREIGNTY],
|
||||
"laserrayuntil100k": [Theme.FAITH, Theme.SOVEREIGNTY],
|
||||
|
||||
# Community
|
||||
"timmytime": [Theme.FATHERHOOD, Theme.WISDOM],
|
||||
"timmychain": [Theme.FATHERHOOD, Theme.SOVEREIGNTY],
|
||||
"plebcards": [Theme.COMMUNITY],
|
||||
"plebslop": [Theme.COMMUNITY, Theme.WISDOM],
|
||||
"dsb": [Theme.COMMUNITY],
|
||||
"dsbanarchy": [Theme.COMMUNITY, Theme.SOVEREIGNTY],
|
||||
"bringdennishome": [Theme.SERVICE, Theme.FAITH],
|
||||
|
||||
# Creation
|
||||
"newprofilepic": [Theme.CREATION],
|
||||
"aislop": [Theme.CREATION, Theme.WISDOM],
|
||||
"dailyaislop": [Theme.CREATION],
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class MeaningKernel:
|
||||
"""A single unit of meaning extracted from media."""
|
||||
kernel_id: str
|
||||
source_tweet_id: str
|
||||
source_media_id: str
|
||||
media_type: str # "photo", "video", "animated_gif"
|
||||
created_at: str
|
||||
themes: List[str]
|
||||
description: str # What the media shows/contains
|
||||
meaning: str # The deeper meaning / wisdom
|
||||
emotional_weight: str = "medium" # low, medium, high, sacred
|
||||
hashtags: List[str] = field(default_factory=list)
|
||||
raw_text: str = "" # Original tweet text
|
||||
local_path: str = "" # Path to media file
|
||||
extracted_at: str = ""
|
||||
|
||||
def __post_init__(self):
|
||||
if not self.extracted_at:
|
||||
self.extracted_at = datetime.utcnow().isoformat() + "Z"
|
||||
|
||||
def to_fact_store(self) -> Dict[str, Any]:
|
||||
"""Convert to fact_store format for holographic memory."""
|
||||
# Build structured fact content
|
||||
themes_str = ", ".join(self.themes)
|
||||
content = (
|
||||
f"Meaning Kernel [{self.kernel_id}]: {self.meaning} "
|
||||
f"(themes: {themes_str}, weight: {self.emotional_weight}, "
|
||||
f"media: {self.media_type}, date: {self.created_at})"
|
||||
)
|
||||
|
||||
# Build tags
|
||||
tags_list = self.themes + self.hashtags + ["know-thy-father", "meaning-kernel"]
|
||||
tags = ",".join(sorted(set(t.lower().replace(" ", "-") for t in tags_list if t)))
|
||||
|
||||
return {
|
||||
"action": "add",
|
||||
"content": content,
|
||||
"category": "project",
|
||||
"tags": tags,
|
||||
"metadata": {
|
||||
"kernel_id": self.kernel_id,
|
||||
"source_tweet_id": self.source_tweet_id,
|
||||
"source_media_id": self.source_media_id,
|
||||
"media_type": self.media_type,
|
||||
"created_at": self.created_at,
|
||||
"themes": self.themes,
|
||||
"emotional_weight": self.emotional_weight,
|
||||
"description": self.description,
|
||||
"local_path": self.local_path,
|
||||
"extracted_at": self.extracted_at,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Theme extraction
|
||||
# =========================================================================
|
||||
|
||||
def extract_themes(hashtags: List[str], text: str) -> List[Theme]:
|
||||
"""Extract themes from hashtags and text content."""
|
||||
themes: Set[Theme] = set()
|
||||
|
||||
# Map hashtags to themes
|
||||
for tag in hashtags:
|
||||
tag_lower = tag.lower()
|
||||
if tag_lower in _HASHTAG_THEMES:
|
||||
themes.update(_HASHTAG_THEMES[tag_lower])
|
||||
|
||||
# Keyword-based theme detection from text
|
||||
text_lower = text.lower()
|
||||
keyword_themes = [
|
||||
(["sovereign", "sovereignty", "self-custody", "self-sovereign", "no-kyc"], Theme.SOVEREIGNTY),
|
||||
(["serve", "service", "helping", "ministry", "mission"], Theme.SERVICE),
|
||||
(["soul", "spirit", "meaning", "purpose", "eternal"], Theme.SOUL),
|
||||
(["faith", "hope", "redeem", "grace", "pray", "jesus", "christ", "god"], Theme.FAITH),
|
||||
(["father", "son", "dad", "legacy", "heritage", "lineage"], Theme.FATHERHOOD),
|
||||
(["wisdom", "insight", "understand", "knowledge", "learn"], Theme.WISDOM),
|
||||
(["struggle", "suffer", "persevere", "endure", "pain", "broken", "dark"], Theme.TRIAL),
|
||||
(["build", "create", "make", "craft", "design", "art"], Theme.CREATION),
|
||||
(["community", "brotherhood", "fellowship", "together", "family"], Theme.COMMUNITY),
|
||||
(["code", "system", "protocol", "algorithm", "technical"], Theme.TECHNICAL),
|
||||
]
|
||||
|
||||
for keywords, theme in keyword_themes:
|
||||
if any(kw in text_lower for kw in keywords):
|
||||
themes.add(theme)
|
||||
|
||||
# Default if no themes detected
|
||||
if not themes:
|
||||
themes.add(Theme.WISDOM)
|
||||
|
||||
return sorted(themes, key=lambda t: t.value)
|
||||
|
||||
|
||||
def classify_emotional_weight(text: str, hashtags: List[str]) -> str:
|
||||
"""Classify the emotional weight of content."""
|
||||
text_lower = text.lower()
|
||||
|
||||
sacred_markers = ["jesus", "christ", "god", "pray", "redemption", "grace", "salvation"]
|
||||
high_markers = ["broken", "dark", "pain", "struggle", "father", "son", "legacy", "soul"]
|
||||
|
||||
if any(m in text_lower for m in sacred_markers):
|
||||
return "sacred"
|
||||
if any(m in text_lower for m in high_markers):
|
||||
return "high"
|
||||
|
||||
# TimmyTime/TimmyChain content is generally meaningful
|
||||
if any(t.lower() in ["timmytime", "timmychain"] for t in hashtags):
|
||||
return "high"
|
||||
|
||||
return "medium"
|
||||
|
||||
|
||||
def synthesize_meaning(themes: List[Theme], text: str, media_type: str) -> str:
|
||||
"""Synthesize the deeper meaning from themes and context."""
|
||||
theme_names = [t.value for t in themes]
|
||||
|
||||
if Theme.FAITH in themes and Theme.SOVEREIGNTY in themes:
|
||||
return "Faith and sovereignty are intertwined — true freedom comes through faith, and faith is strengthened by sovereignty."
|
||||
if Theme.FATHERHOOD in themes and Theme.WISDOM in themes:
|
||||
return "A father's wisdom is his greatest gift to his son — it outlives him and becomes the son's compass."
|
||||
if Theme.SOVEREIGNTY in themes and Theme.COMMUNITY in themes:
|
||||
return "Sovereignty without community is isolation; community without sovereignty is dependence. Both are needed."
|
||||
if Theme.TRIAL in themes and Theme.FAITH in themes:
|
||||
return "In the darkest moments, faith is the thread that holds a man to hope. The trial reveals what faith is made of."
|
||||
if Theme.SERVICE in themes:
|
||||
return "To serve is the highest calling — it transforms both the servant and the served."
|
||||
if Theme.SOUL in themes:
|
||||
return "The soul cannot be digitized or delegated. It must be lived, felt, and honored."
|
||||
if Theme.CREATION in themes:
|
||||
return "Creation is an act of faith — bringing something into being that did not exist before."
|
||||
if Theme.SOVEREIGNTY in themes:
|
||||
return "Sovereignty is not given; it is claimed. The first step is believing you deserve it."
|
||||
if Theme.COMMUNITY in themes:
|
||||
return "We are stronger together than alone. Community is the proof that sovereignty does not mean isolation."
|
||||
if Theme.WISDOM in themes:
|
||||
return "Wisdom is not knowledge — it is knowledge tempered by experience and guided by values."
|
||||
|
||||
return f"Wisdom encoded in {media_type}: {', '.join(theme_names)}"
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# Main processing pipeline
|
||||
# =========================================================================
|
||||
|
||||
def process_manifest(
|
||||
manifest_path: Path,
|
||||
output_path: Optional[Path] = None,
|
||||
) -> List[MeaningKernel]:
|
||||
"""Process a media manifest and extract Meaning Kernels.
|
||||
|
||||
Args:
|
||||
manifest_path: Path to manifest.jsonl (from Phase 1)
|
||||
output_path: Optional path to write fact_store JSONL output
|
||||
|
||||
Returns:
|
||||
List of extracted MeaningKernel objects
|
||||
"""
|
||||
if not manifest_path.exists():
|
||||
logger.error(f"Manifest not found: {manifest_path}")
|
||||
return []
|
||||
|
||||
kernels: List[MeaningKernel] = []
|
||||
seen_tweet_ids: Set[str] = set()
|
||||
|
||||
logger.info(f"Processing manifest: {manifest_path}")
|
||||
|
||||
with open(manifest_path) as f:
|
||||
for line_num, line in enumerate(f, 1):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Line {line_num}: invalid JSON: {e}")
|
||||
continue
|
||||
|
||||
tweet_id = entry.get("tweet_id", "")
|
||||
media_id = entry.get("media_id", "")
|
||||
|
||||
# Skip if we've already processed this tweet
|
||||
if tweet_id in seen_tweet_ids:
|
||||
continue
|
||||
seen_tweet_ids.add(tweet_id)
|
||||
|
||||
# Extract fields
|
||||
text = entry.get("full_text", "")
|
||||
hashtags = [h for h in entry.get("hashtags", []) if h]
|
||||
media_type = entry.get("media_type", "photo")
|
||||
created_at = entry.get("created_at", "")
|
||||
local_path = entry.get("local_media_path", "")
|
||||
|
||||
# Extract themes
|
||||
themes = extract_themes(hashtags, text)
|
||||
|
||||
# Create kernel
|
||||
kernel = MeaningKernel(
|
||||
kernel_id=f"ktf-{tweet_id}-{media_id}",
|
||||
source_tweet_id=tweet_id,
|
||||
source_media_id=media_id,
|
||||
media_type=media_type,
|
||||
created_at=created_at,
|
||||
themes=[t.value for t in themes],
|
||||
description=f"{media_type} from tweet {tweet_id}",
|
||||
meaning=synthesize_meaning(themes, text, media_type),
|
||||
emotional_weight=classify_emotional_weight(text, hashtags),
|
||||
hashtags=hashtags,
|
||||
raw_text=text,
|
||||
local_path=local_path,
|
||||
)
|
||||
|
||||
kernels.append(kernel)
|
||||
|
||||
logger.info(f"Extracted {len(kernels)} Meaning Kernels from {len(seen_tweet_ids)} tweets")
|
||||
|
||||
# Write output if path provided
|
||||
if output_path:
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, "w") as f:
|
||||
for kernel in kernels:
|
||||
fact = kernel.to_fact_store()
|
||||
f.write(json.dumps(fact) + "\n")
|
||||
logger.info(f"Wrote {len(kernels)} facts to {output_path}")
|
||||
|
||||
return kernels
|
||||
|
||||
|
||||
def generate_ledger_summary(kernels: List[MeaningKernel]) -> Dict[str, Any]:
|
||||
"""Generate a summary of the Father's Ledger."""
|
||||
theme_counts: Dict[str, int] = {}
|
||||
weight_counts: Dict[str, int] = {}
|
||||
media_type_counts: Dict[str, int] = {}
|
||||
|
||||
for k in kernels:
|
||||
for theme in k.themes:
|
||||
theme_counts[theme] = theme_counts.get(theme, 0) + 1
|
||||
weight_counts[k.emotional_weight] = weight_counts.get(k.emotional_weight, 0) + 1
|
||||
media_type_counts[k.media_type] = media_type_counts.get(k.media_type, 0) + 1
|
||||
|
||||
# Top themes
|
||||
top_themes = sorted(theme_counts.items(), key=lambda x: -x[1])[:5]
|
||||
|
||||
# Sacred kernels
|
||||
sacred_kernels = [k for k in kernels if k.emotional_weight == "sacred"]
|
||||
|
||||
return {
|
||||
"total_kernels": len(kernels),
|
||||
"theme_distribution": dict(sorted(theme_counts.items())),
|
||||
"top_themes": top_themes,
|
||||
"emotional_weight_distribution": weight_counts,
|
||||
"media_type_distribution": media_type_counts,
|
||||
"sacred_kernel_count": len(sacred_kernels),
|
||||
"generated_at": datetime.utcnow().isoformat() + "Z",
|
||||
}
|
||||
|
||||
|
||||
# =========================================================================
|
||||
# CLI
|
||||
# =========================================================================
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Know Thy Father — Phase 3: Holographic Synthesis"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--input", "-i",
|
||||
type=Path,
|
||||
default=Path("twitter-archive/media/manifest.jsonl"),
|
||||
help="Path to media manifest JSONL (default: twitter-archive/media/manifest.jsonl)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output", "-o",
|
||||
type=Path,
|
||||
default=Path("twitter-archive/knowledge/fathers_ledger.jsonl"),
|
||||
help="Output path for fact_store JSONL (default: twitter-archive/knowledge/fathers_ledger.jsonl)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--summary", "-s",
|
||||
type=Path,
|
||||
default=None,
|
||||
help="Output path for ledger summary JSON (optional)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--verbose", "-v",
|
||||
action="store_true",
|
||||
help="Enable verbose logging",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG if args.verbose else logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
)
|
||||
|
||||
# Process
|
||||
kernels = process_manifest(args.input, args.output)
|
||||
|
||||
if not kernels:
|
||||
print(f"No kernels extracted from {args.input}")
|
||||
sys.exit(1)
|
||||
|
||||
# Generate summary
|
||||
summary = generate_ledger_summary(kernels)
|
||||
|
||||
if args.summary:
|
||||
args.summary.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(args.summary, "w") as f:
|
||||
json.dump(summary, f, indent=2)
|
||||
print(f"Summary written to {args.summary}")
|
||||
|
||||
# Print summary
|
||||
print(f"\n=== Father's Ledger ===")
|
||||
print(f"Total Meaning Kernels: {summary['total_kernels']}")
|
||||
print(f"Sacred Kernels: {summary['sacred_kernel_count']}")
|
||||
print(f"\nTop Themes:")
|
||||
for theme, count in summary['top_themes']:
|
||||
print(f" {theme}: {count}")
|
||||
print(f"\nEmotional Weight:")
|
||||
for weight, count in sorted(summary['emotional_weight_distribution'].items()):
|
||||
print(f" {weight}: {count}")
|
||||
print(f"\nMedia Types:")
|
||||
for mtype, count in summary['media_type_distribution'].items():
|
||||
print(f" {mtype}: {count}")
|
||||
|
||||
if args.output:
|
||||
print(f"\nFact store output: {args.output}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
210
tests/test_know_thy_father_synthesis.py
Normal file
210
tests/test_know_thy_father_synthesis.py
Normal file
@@ -0,0 +1,210 @@
|
||||
"""Tests for Know Thy Father — Phase 3: Holographic Synthesis."""
|
||||
|
||||
import json
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from scripts.know_thy_father.synthesize_kernels import (
|
||||
MeaningKernel,
|
||||
Theme,
|
||||
extract_themes,
|
||||
classify_emotional_weight,
|
||||
synthesize_meaning,
|
||||
process_manifest,
|
||||
generate_ledger_summary,
|
||||
_HASHTAG_THEMES,
|
||||
)
|
||||
|
||||
|
||||
class TestThemeExtraction:
|
||||
"""Test theme extraction from hashtags and text."""
|
||||
|
||||
def test_bitcoin_hashtag_maps_to_sovereignty(self):
|
||||
themes = extract_themes(["bitcoin"], "")
|
||||
assert Theme.SOVEREIGNTY in themes
|
||||
|
||||
def test_timmytime_maps_to_fatherhood(self):
|
||||
themes = extract_themes(["TimmyTime"], "")
|
||||
assert Theme.FATHERHOOD in themes
|
||||
|
||||
def test_burnchain_maps_to_trial(self):
|
||||
themes = extract_themes(["burnchain"], "")
|
||||
assert Theme.TRIAL in themes
|
||||
|
||||
def test_keyword_detection_faith(self):
|
||||
themes = extract_themes([], "Jesus saves those who call on His name")
|
||||
assert Theme.FAITH in themes
|
||||
|
||||
def test_keyword_detection_sovereignty(self):
|
||||
themes = extract_themes([], "Self-sovereignty is the foundation of freedom")
|
||||
assert Theme.SOVEREIGNTY in themes
|
||||
|
||||
def test_no_themes_defaults_to_wisdom(self):
|
||||
themes = extract_themes([], "Just a normal tweet")
|
||||
assert Theme.WISDOM in themes
|
||||
|
||||
def test_multiple_themes(self):
|
||||
themes = extract_themes(["bitcoin", "timmytime"], "Building sovereign systems")
|
||||
assert len(themes) >= 2
|
||||
|
||||
|
||||
class TestEmotionalWeight:
|
||||
"""Test emotional weight classification."""
|
||||
|
||||
def test_sacred_markers(self):
|
||||
assert classify_emotional_weight("Jesus saves", []) == "sacred"
|
||||
assert classify_emotional_weight("God's grace", []) == "sacred"
|
||||
|
||||
def test_high_markers(self):
|
||||
assert classify_emotional_weight("A father's legacy", []) == "high"
|
||||
assert classify_emotional_weight("In the dark times", []) == "high"
|
||||
|
||||
def test_timmytime_is_high(self):
|
||||
assert classify_emotional_weight("some text", ["TimmyTime"]) == "high"
|
||||
|
||||
def test_default_is_medium(self):
|
||||
assert classify_emotional_weight("normal tweet", ["funny"]) == "medium"
|
||||
|
||||
|
||||
class TestMeaningSynthesis:
|
||||
"""Test meaning synthesis from themes."""
|
||||
|
||||
def test_faith_plus_sovereignty(self):
|
||||
meaning = synthesize_meaning(
|
||||
[Theme.FAITH, Theme.SOVEREIGNTY], "", "photo"
|
||||
)
|
||||
assert "faith" in meaning.lower()
|
||||
assert "sovereignty" in meaning.lower()
|
||||
|
||||
def test_fatherhood_plus_wisdom(self):
|
||||
meaning = synthesize_meaning(
|
||||
[Theme.FATHERHOOD, Theme.WISDOM], "", "video"
|
||||
)
|
||||
assert "father" in meaning.lower()
|
||||
|
||||
def test_default_meaning(self):
|
||||
meaning = synthesize_meaning([Theme.CREATION], "", "photo")
|
||||
assert len(meaning) > 0
|
||||
|
||||
|
||||
class TestMeaningKernel:
|
||||
"""Test the MeaningKernel dataclass."""
|
||||
|
||||
def test_to_fact_store(self):
|
||||
kernel = MeaningKernel(
|
||||
kernel_id="ktf-123-456",
|
||||
source_tweet_id="123",
|
||||
source_media_id="456",
|
||||
media_type="photo",
|
||||
created_at="2026-04-01T00:00:00Z",
|
||||
themes=["sovereignty", "community"],
|
||||
meaning="Test meaning",
|
||||
description="Test description",
|
||||
emotional_weight="high",
|
||||
hashtags=["bitcoin"],
|
||||
)
|
||||
fact = kernel.to_fact_store()
|
||||
|
||||
assert fact["action"] == "add"
|
||||
assert "sovereignty" in fact["content"]
|
||||
assert fact["category"] == "project"
|
||||
assert "know-thy-father" in fact["tags"]
|
||||
assert fact["metadata"]["kernel_id"] == "ktf-123-456"
|
||||
assert fact["metadata"]["media_type"] == "photo"
|
||||
|
||||
|
||||
class TestProcessManifest:
|
||||
"""Test the manifest processing pipeline."""
|
||||
|
||||
def test_process_manifest_creates_kernels(self):
|
||||
manifest_content = "\n".join([
|
||||
json.dumps({
|
||||
"tweet_id": "1001",
|
||||
"media_id": "m1",
|
||||
"media_type": "photo",
|
||||
"full_text": "Bitcoin is sovereign money",
|
||||
"hashtags": ["bitcoin"],
|
||||
"created_at": "2026-04-01T00:00:00Z",
|
||||
"local_media_path": "/tmp/media/m1.jpg",
|
||||
}),
|
||||
json.dumps({
|
||||
"tweet_id": "1002",
|
||||
"media_id": "m2",
|
||||
"media_type": "video",
|
||||
"full_text": "Building for the next generation",
|
||||
"hashtags": ["TimmyTime"],
|
||||
"created_at": "2026-04-02T00:00:00Z",
|
||||
"local_media_path": "/tmp/media/m2.mp4",
|
||||
}),
|
||||
])
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
|
||||
f.write(manifest_content)
|
||||
manifest_path = Path(f.name)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".jsonl", delete=False) as f:
|
||||
output_path = Path(f.name)
|
||||
|
||||
try:
|
||||
kernels = process_manifest(manifest_path, output_path)
|
||||
|
||||
assert len(kernels) == 2
|
||||
assert kernels[0].source_tweet_id == "1001"
|
||||
assert kernels[1].source_tweet_id == "1002"
|
||||
|
||||
# Check output file
|
||||
with open(output_path) as f:
|
||||
lines = f.readlines()
|
||||
assert len(lines) == 2
|
||||
|
||||
# Parse first fact
|
||||
fact = json.loads(lines[0])
|
||||
assert fact["action"] == "add"
|
||||
assert "know-thy-father" in fact["tags"]
|
||||
finally:
|
||||
manifest_path.unlink(missing_ok=True)
|
||||
output_path.unlink(missing_ok=True)
|
||||
|
||||
def test_deduplicates_by_tweet_id(self):
|
||||
manifest_content = "\n".join([
|
||||
json.dumps({"tweet_id": "1001", "media_id": "m1", "media_type": "photo", "full_text": "Test", "hashtags": [], "created_at": ""}),
|
||||
json.dumps({"tweet_id": "1001", "media_id": "m2", "media_type": "photo", "full_text": "Test duplicate", "hashtags": [], "created_at": ""}),
|
||||
])
|
||||
|
||||
with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f:
|
||||
f.write(manifest_content)
|
||||
manifest_path = Path(f.name)
|
||||
|
||||
try:
|
||||
kernels = process_manifest(manifest_path)
|
||||
assert len(kernels) == 1 # Deduplicated
|
||||
finally:
|
||||
manifest_path.unlink(missing_ok=True)
|
||||
|
||||
|
||||
class TestGenerateSummary:
|
||||
"""Test ledger summary generation."""
|
||||
|
||||
def test_summary_structure(self):
|
||||
kernels = [
|
||||
MeaningKernel(
|
||||
kernel_id="ktf-1", source_tweet_id="1", source_media_id="m1",
|
||||
media_type="photo", created_at="", themes=["sovereignty"],
|
||||
meaning="Test", description="", emotional_weight="high",
|
||||
),
|
||||
MeaningKernel(
|
||||
kernel_id="ktf-2", source_tweet_id="2", source_media_id="m2",
|
||||
media_type="video", created_at="", themes=["faith", "sovereignty"],
|
||||
meaning="Test", description="", emotional_weight="sacred",
|
||||
),
|
||||
]
|
||||
|
||||
summary = generate_ledger_summary(kernels)
|
||||
|
||||
assert summary["total_kernels"] == 2
|
||||
assert summary["sacred_kernel_count"] == 1
|
||||
assert summary["theme_distribution"]["sovereignty"] == 2
|
||||
assert summary["theme_distribution"]["faith"] == 1
|
||||
assert "generated_at" in summary
|
||||
Reference in New Issue
Block a user