Some checks failed
Smoke Test / smoke (pull_request) Failing after 11s
Structured processing log for the multimodal Twitter archive analysis. 33 meaning kernel entries indexed with theme classification. ## What - twitter-archive/know-thy-father/PROCESSING_LOG.md — progress tracker, theme index, arc pattern catalog - twitter-archive/know-thy-father/entries/processed.jsonl — 33 structured entries with tweet_id, media_type, arc, meaning_kernel, themes - twitter-archive/know-thy-father/tracker.py — CLI tool for status/add/report - tests/twitter_archive/test_ktf_tracker.py — 7 tests ## Themes tracked identity (20), transmutation (13), authenticity (12), digital_agency (11), agency (8), glitch (8), silence (5), void (5), collective_identity (4), noise (4), presence (4), simulation (2), shadow (1), self_naming (1), persistence (1) ## Usage python tracker.py status — show progress python tracker.py add X.json — add entry python tracker.py report — generate markdown report Closes #587.
207 lines
5.9 KiB
Python
207 lines
5.9 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Know Thy Father — Processing Tracker
|
|
|
|
Tracks the progress of multimodal analysis on the Twitter archive.
|
|
Reads processed.jsonl, computes stats, and updates the processing log.
|
|
|
|
Usage:
|
|
python tracker.py status # Show current progress
|
|
python tracker.py add ENTRY.json # Add a new processed entry
|
|
python tracker.py report # Generate markdown report
|
|
"""
|
|
|
|
import json
|
|
import sys
|
|
from collections import Counter
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
LOG_DIR = Path(__file__).parent
|
|
ENTRIES_FILE = LOG_DIR / "entries" / "processed.jsonl"
|
|
LOG_FILE = LOG_DIR / "PROCESSING_LOG.md"
|
|
|
|
TOTAL_TARGETS = 108
|
|
|
|
|
|
def load_entries() -> list[dict]:
|
|
"""Load all processed entries from the JSONL file."""
|
|
if not ENTRIES_FILE.exists():
|
|
return []
|
|
entries = []
|
|
with open(ENTRIES_FILE, "r") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if line:
|
|
entries.append(json.loads(line))
|
|
return entries
|
|
|
|
|
|
def save_entry(entry: dict) -> None:
|
|
"""Append a single entry to the JSONL file."""
|
|
ENTRIES_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
with open(ENTRIES_FILE, "a") as f:
|
|
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
|
|
|
|
|
def compute_stats(entries: list[dict]) -> dict:
|
|
"""Compute processing statistics."""
|
|
processed = len(entries)
|
|
pending = max(0, TOTAL_TARGETS - processed)
|
|
|
|
# Theme distribution
|
|
theme_counter = Counter()
|
|
for entry in entries:
|
|
for theme in entry.get("themes", []):
|
|
theme_counter[theme] += 1
|
|
|
|
# Media type distribution
|
|
media_counter = Counter()
|
|
for entry in entries:
|
|
media_type = entry.get("media_type", "unknown")
|
|
media_counter[media_type] += 1
|
|
|
|
# Processing method distribution
|
|
method_counter = Counter()
|
|
for entry in entries:
|
|
method = entry.get("method", "unknown")
|
|
method_counter[method] += 1
|
|
|
|
return {
|
|
"total_targets": TOTAL_TARGETS,
|
|
"processed": processed,
|
|
"pending": pending,
|
|
"completion_pct": round(processed / TOTAL_TARGETS * 100, 1) if TOTAL_TARGETS > 0 else 0,
|
|
"themes": dict(theme_counter.most_common()),
|
|
"media_types": dict(media_counter.most_common()),
|
|
"methods": dict(method_counter.most_common()),
|
|
}
|
|
|
|
|
|
def cmd_status() -> None:
|
|
"""Print current processing status."""
|
|
entries = load_entries()
|
|
stats = compute_stats(entries)
|
|
|
|
print(f"Know Thy Father — Processing Status")
|
|
print(f"{'=' * 40}")
|
|
print(f" Total targets: {stats['total_targets']}")
|
|
print(f" Processed: {stats['processed']}")
|
|
print(f" Pending: {stats['pending']}")
|
|
print(f" Completion: {stats['completion_pct']}%")
|
|
print()
|
|
print("Theme distribution:")
|
|
for theme, count in stats["themes"].items():
|
|
print(f" {theme:25s} {count}")
|
|
print()
|
|
print("Media types:")
|
|
for media, count in stats["media_types"].items():
|
|
print(f" {media:25s} {count}")
|
|
|
|
|
|
def cmd_add(entry_path: str) -> None:
|
|
"""Add a new processed entry from a JSON file."""
|
|
with open(entry_path, "r") as f:
|
|
entry = json.load(f)
|
|
|
|
# Validate required fields
|
|
required = ["tweet_id", "media_type", "arc", "meaning_kernel"]
|
|
missing = [f for f in required if f not in entry]
|
|
if missing:
|
|
print(f"Error: missing required fields: {missing}")
|
|
sys.exit(1)
|
|
|
|
# Add timestamp if not present
|
|
if "processed_at" not in entry:
|
|
entry["processed_at"] = datetime.utcnow().isoformat() + "Z"
|
|
|
|
save_entry(entry)
|
|
print(f"Added entry for tweet {entry['tweet_id']}")
|
|
|
|
entries = load_entries()
|
|
stats = compute_stats(entries)
|
|
print(f"Progress: {stats['processed']}/{stats['total_targets']} ({stats['completion_pct']}%)")
|
|
|
|
|
|
def cmd_report() -> None:
|
|
"""Generate a markdown report of current progress."""
|
|
entries = load_entries()
|
|
stats = compute_stats(entries)
|
|
|
|
lines = [
|
|
"# Know Thy Father — Processing Report",
|
|
"",
|
|
f"Generated: {datetime.utcnow().strftime('%Y-%m-%d %H:%M UTC')}",
|
|
"",
|
|
"## Progress",
|
|
"",
|
|
f"| Metric | Count |",
|
|
f"|--------|-------|",
|
|
f"| Total targets | {stats['total_targets']} |",
|
|
f"| Processed | {stats['processed']} |",
|
|
f"| Pending | {stats['pending']} |",
|
|
f"| Completion | {stats['completion_pct']}% |",
|
|
"",
|
|
"## Theme Distribution",
|
|
"",
|
|
"| Theme | Count |",
|
|
"|-------|-------|",
|
|
]
|
|
|
|
for theme, count in stats["themes"].items():
|
|
lines.append(f"| {theme} | {count} |")
|
|
|
|
lines.extend([
|
|
"",
|
|
"## Media Types",
|
|
"",
|
|
"| Type | Count |",
|
|
"|------|-------|",
|
|
])
|
|
|
|
for media, count in stats["media_types"].items():
|
|
lines.append(f"| {media} | {count} |")
|
|
|
|
lines.extend([
|
|
"",
|
|
"## Recent Entries",
|
|
"",
|
|
])
|
|
|
|
for entry in entries[-5:]:
|
|
lines.append(f"### Tweet {entry['tweet_id']}")
|
|
lines.append(f"- **Arc:** {entry['arc']}")
|
|
lines.append(f"- **Kernel:** {entry['meaning_kernel'][:100]}...")
|
|
lines.append("")
|
|
|
|
report = "\n".join(lines)
|
|
print(report)
|
|
|
|
# Also save to file
|
|
report_file = LOG_DIR / "REPORT.md"
|
|
with open(report_file, "w") as f:
|
|
f.write(report)
|
|
print(f"\nReport saved to {report_file}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) < 2:
|
|
print("Usage: tracker.py [status|add|report]")
|
|
sys.exit(1)
|
|
|
|
cmd = sys.argv[1]
|
|
|
|
if cmd == "status":
|
|
cmd_status()
|
|
elif cmd == "add":
|
|
if len(sys.argv) < 3:
|
|
print("Usage: tracker.py add ENTRY.json")
|
|
sys.exit(1)
|
|
cmd_add(sys.argv[2])
|
|
elif cmd == "report":
|
|
cmd_report()
|
|
else:
|
|
print(f"Unknown command: {cmd}")
|
|
print("Usage: tracker.py [status|add|report]")
|
|
sys.exit(1)
|