Files
timmy-home/twitter-archive/know-thy-father/tracker.py
Timmy e20ffd3e1d
Some checks failed
Smoke Test / smoke (pull_request) Failing after 11s
feat: Know Thy Father processing log and tracker (#587)
Structured processing log for the multimodal Twitter archive analysis.
33 meaning kernel entries indexed with theme classification.

## What
- twitter-archive/know-thy-father/PROCESSING_LOG.md — progress tracker,
  theme index, arc pattern catalog
- twitter-archive/know-thy-father/entries/processed.jsonl — 33 structured
  entries with tweet_id, media_type, arc, meaning_kernel, themes
- twitter-archive/know-thy-father/tracker.py — CLI tool for status/add/report
- tests/twitter_archive/test_ktf_tracker.py — 7 tests

## Themes tracked
identity (20), transmutation (13), authenticity (12), digital_agency (11),
agency (8), glitch (8), silence (5), void (5), collective_identity (4),
noise (4), presence (4), simulation (2), shadow (1), self_naming (1),
persistence (1)

## Usage
python tracker.py status   — show progress
python tracker.py add X.json — add entry
python tracker.py report   — generate markdown report

Closes #587.
2026-04-13 20:21:44 -04:00

207 lines
5.9 KiB
Python

#!/usr/bin/env python3
"""
Know Thy Father — Processing Tracker
Tracks the progress of multimodal analysis on the Twitter archive.
Reads processed.jsonl, computes stats, and updates the processing log.
Usage:
python tracker.py status # Show current progress
python tracker.py add ENTRY.json # Add a new processed entry
python tracker.py report # Generate markdown report
"""
import json
import sys
from collections import Counter
from datetime import datetime
from pathlib import Path
LOG_DIR = Path(__file__).parent
ENTRIES_FILE = LOG_DIR / "entries" / "processed.jsonl"
LOG_FILE = LOG_DIR / "PROCESSING_LOG.md"
TOTAL_TARGETS = 108
def load_entries() -> list[dict]:
"""Load all processed entries from the JSONL file."""
if not ENTRIES_FILE.exists():
return []
entries = []
with open(ENTRIES_FILE, "r") as f:
for line in f:
line = line.strip()
if line:
entries.append(json.loads(line))
return entries
def save_entry(entry: dict) -> None:
"""Append a single entry to the JSONL file."""
ENTRIES_FILE.parent.mkdir(parents=True, exist_ok=True)
with open(ENTRIES_FILE, "a") as f:
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
def compute_stats(entries: list[dict]) -> dict:
"""Compute processing statistics."""
processed = len(entries)
pending = max(0, TOTAL_TARGETS - processed)
# Theme distribution
theme_counter = Counter()
for entry in entries:
for theme in entry.get("themes", []):
theme_counter[theme] += 1
# Media type distribution
media_counter = Counter()
for entry in entries:
media_type = entry.get("media_type", "unknown")
media_counter[media_type] += 1
# Processing method distribution
method_counter = Counter()
for entry in entries:
method = entry.get("method", "unknown")
method_counter[method] += 1
return {
"total_targets": TOTAL_TARGETS,
"processed": processed,
"pending": pending,
"completion_pct": round(processed / TOTAL_TARGETS * 100, 1) if TOTAL_TARGETS > 0 else 0,
"themes": dict(theme_counter.most_common()),
"media_types": dict(media_counter.most_common()),
"methods": dict(method_counter.most_common()),
}
def cmd_status() -> None:
"""Print current processing status."""
entries = load_entries()
stats = compute_stats(entries)
print(f"Know Thy Father — Processing Status")
print(f"{'=' * 40}")
print(f" Total targets: {stats['total_targets']}")
print(f" Processed: {stats['processed']}")
print(f" Pending: {stats['pending']}")
print(f" Completion: {stats['completion_pct']}%")
print()
print("Theme distribution:")
for theme, count in stats["themes"].items():
print(f" {theme:25s} {count}")
print()
print("Media types:")
for media, count in stats["media_types"].items():
print(f" {media:25s} {count}")
def cmd_add(entry_path: str) -> None:
"""Add a new processed entry from a JSON file."""
with open(entry_path, "r") as f:
entry = json.load(f)
# Validate required fields
required = ["tweet_id", "media_type", "arc", "meaning_kernel"]
missing = [f for f in required if f not in entry]
if missing:
print(f"Error: missing required fields: {missing}")
sys.exit(1)
# Add timestamp if not present
if "processed_at" not in entry:
entry["processed_at"] = datetime.utcnow().isoformat() + "Z"
save_entry(entry)
print(f"Added entry for tweet {entry['tweet_id']}")
entries = load_entries()
stats = compute_stats(entries)
print(f"Progress: {stats['processed']}/{stats['total_targets']} ({stats['completion_pct']}%)")
def cmd_report() -> None:
"""Generate a markdown report of current progress."""
entries = load_entries()
stats = compute_stats(entries)
lines = [
"# Know Thy Father — Processing Report",
"",
f"Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}",
"",
"## Progress",
"",
f"| Metric | Count |",
f"|--------|-------|",
f"| Total targets | {stats['total_targets']} |",
f"| Processed | {stats['processed']} |",
f"| Pending | {stats['pending']} |",
f"| Completion | {stats['completion_pct']}% |",
"",
"## Theme Distribution",
"",
"| Theme | Count |",
"|-------|-------|",
]
for theme, count in stats["themes"].items():
lines.append(f"| {theme} | {count} |")
lines.extend([
"",
"## Media Types",
"",
"| Type | Count |",
"|------|-------|",
])
for media, count in stats["media_types"].items():
lines.append(f"| {media} | {count} |")
lines.extend([
"",
"## Recent Entries",
"",
])
for entry in entries[-5:]:
lines.append(f"### Tweet {entry['tweet_id']}")
lines.append(f"- **Arc:** {entry['arc']}")
lines.append(f"- **Kernel:** {entry['meaning_kernel'][:100]}...")
lines.append("")
report = "\n".join(lines)
print(report)
# Also save to file
report_file = LOG_DIR / "REPORT.md"
with open(report_file, "w") as f:
f.write(report)
print(f"\nReport saved to {report_file}")
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: tracker.py [status|add|report]")
sys.exit(1)
cmd = sys.argv[1]
if cmd == "status":
cmd_status()
elif cmd == "add":
if len(sys.argv) < 3:
print("Usage: tracker.py add ENTRY.json")
sys.exit(1)
cmd_add(sys.argv[2])
elif cmd == "report":
cmd_report()
else:
print(f"Unknown command: {cmd}")
print("Usage: tracker.py [status|add|report]")
sys.exit(1)