import json import os import sys # Read tweets.js tweets_js_path = os.path.expanduser("~/Downloads/twitter-2026-03-27-d4471cc6eb6703034d592f870933561ebee374d9d9b90c9b8923abff064afc1e/data/tweets.js") with open(tweets_js_path, 'r') as f: tweets_js = f.read() # Strip JS prefix tweets_js = tweets_js.replace('window.YTD.tweets.part0 = ', '').strip() tweets_data = json.loads(tweets_js) # Separate tweets original_tweets = [] retweets = [] for tweet in tweets_data: # Access the actual tweet data tweet_data = tweet['tweet'] # Check if it's a retweet if tweet_data.get('retweeted'): retweets.append(tweet_data) else: original_tweets.append(tweet_data) # Write to JSONL files extracted_dir = os.path.expanduser("~/.timmy/twitter-archive/extracted") with open(os.path.join(extracted_dir, 'tweets.jsonl'), 'w') as f: for tweet in original_tweets: f.write(json.dumps(tweet) + '\n') with open(os.path.join(extracted_dir, 'retweets.jsonl'), 'w') as f: for tweet in retweets: f.write(json.dumps(tweet) + '\n') # Create manifest # Filter tweets that have 'created_at' valid_tweets = [tweet for tweet in tweets_data if 'created_at' in tweet['tweet']] if valid_tweets: earliest_date = min(tweet['tweet']['created_at'] for tweet in valid_tweets) latest_date = max(tweet['tweet']['created_at'] for tweet in valid_tweets) else: earliest_date = None latest_date = None manifest = { 'original_count': len(original_tweets), 'retweet_count': len(retweets), 'earliest_date': earliest_date, 'latest_date': latest_date } with open(os.path.expanduser("~/.timmy/twitter-archive/manifest.json"), 'w') as f: json.dump(manifest, f)