55 lines
1.7 KiB
Python
55 lines
1.7 KiB
Python
|
|
import json
|
||
|
|
import os
|
||
|
|
import sys
|
||
|
|
|
||
|
|
# Read tweets.js
|
||
|
|
tweets_js_path = os.path.expanduser("~/Downloads/twitter-2026-03-27-d4471cc6eb6703034d592f870933561ebee374d9d9b90c9b8923abff064afc1e/data/tweets.js")
|
||
|
|
with open(tweets_js_path, 'r') as f:
|
||
|
|
tweets_js = f.read()
|
||
|
|
|
||
|
|
# Strip JS prefix
|
||
|
|
tweets_js = tweets_js.replace('window.YTD.tweets.part0 = ', '').strip()
|
||
|
|
tweets_data = json.loads(tweets_js)
|
||
|
|
|
||
|
|
# Separate tweets
|
||
|
|
original_tweets = []
|
||
|
|
retweets = []
|
||
|
|
|
||
|
|
for tweet in tweets_data:
|
||
|
|
# Access the actual tweet data
|
||
|
|
tweet_data = tweet['tweet']
|
||
|
|
# Check if it's a retweet
|
||
|
|
if tweet_data.get('retweeted'):
|
||
|
|
retweets.append(tweet_data)
|
||
|
|
else:
|
||
|
|
original_tweets.append(tweet_data)
|
||
|
|
|
||
|
|
# Write to JSONL files
|
||
|
|
extracted_dir = os.path.expanduser("~/.timmy/twitter-archive/extracted")
|
||
|
|
with open(os.path.join(extracted_dir, 'tweets.jsonl'), 'w') as f:
|
||
|
|
for tweet in original_tweets:
|
||
|
|
f.write(json.dumps(tweet) + '\n')
|
||
|
|
|
||
|
|
with open(os.path.join(extracted_dir, 'retweets.jsonl'), 'w') as f:
|
||
|
|
for tweet in retweets:
|
||
|
|
f.write(json.dumps(tweet) + '\n')
|
||
|
|
|
||
|
|
# Create manifest
|
||
|
|
# Filter tweets that have 'created_at'
|
||
|
|
valid_tweets = [tweet for tweet in tweets_data if 'created_at' in tweet['tweet']]
|
||
|
|
|
||
|
|
if valid_tweets:
|
||
|
|
earliest_date = min(tweet['tweet']['created_at'] for tweet in valid_tweets)
|
||
|
|
latest_date = max(tweet['tweet']['created_at'] for tweet in valid_tweets)
|
||
|
|
else:
|
||
|
|
earliest_date = None
|
||
|
|
latest_date = None
|
||
|
|
|
||
|
|
manifest = {
|
||
|
|
'original_count': len(original_tweets),
|
||
|
|
'retweet_count': len(retweets),
|
||
|
|
'earliest_date': earliest_date,
|
||
|
|
'latest_date': latest_date
|
||
|
|
}
|
||
|
|
with open(os.path.expanduser("~/.timmy/twitter-archive/manifest.json"), 'w') as f:
|
||
|
|
json.dump(manifest, f)
|