fix: update fetch_transcript.py for youtube-transcript-api v1.x

The library removed the static get_transcript() method in v1.0.
Migrate to the new instance-based fetch() API and normalize
FetchedTranscriptSnippet objects back to dicts for compatibility
with the rest of the script.
This commit is contained in:
analista
2026-04-01 12:49:03 +00:00
committed by Teknium
parent e905768ffd
commit 3400098481

View File

@@ -48,7 +48,11 @@ def format_timestamp(seconds: float) -> str:
def fetch_transcript(video_id: str, languages: list = None):
"""Fetch transcript segments from YouTube."""
"""Fetch transcript segments from YouTube.
Returns a list of dicts with 'text', 'start', and 'duration' keys.
Compatible with youtube-transcript-api v1.x.
"""
try:
from youtube_transcript_api import YouTubeTranscriptApi
except ImportError:
@@ -56,9 +60,17 @@ def fetch_transcript(video_id: str, languages: list = None):
file=sys.stderr)
sys.exit(1)
api = YouTubeTranscriptApi()
if languages:
return YouTubeTranscriptApi.get_transcript(video_id, languages=languages)
return YouTubeTranscriptApi.get_transcript(video_id)
result = api.fetch(video_id, languages=languages)
else:
result = api.fetch(video_id)
# v1.x returns FetchedTranscriptSnippet objects; normalize to dicts
return [
{"text": seg.text, "start": seg.start, "duration": seg.duration}
for seg in result
]
def main():