1212import logging
1313from glob import glob
1414from yt_dlp import YoutubeDL
15- from yt_dlp .utils import DownloadError , ExtractorError , PostProcessingError , UnavailableVideoError
15+ from yt_dlp .utils import (
16+ DownloadError ,
17+ ExtractorError ,
18+ PostProcessingError ,
19+ UnavailableVideoError ,
20+ )
1621from youtube_transcript_api import YouTubeTranscriptApi
17- from youtube_transcript_api import TranscriptsDisabled , NoTranscriptFound , VideoUnavailable , TooManyRequests
22+ from youtube_transcript_api import (
23+ TranscriptsDisabled ,
24+ NoTranscriptFound ,
25+ VideoUnavailable ,
26+ TooManyRequests ,
27+ )
1828from youtube_transcript_api .formatters import JSONFormatter
1929from tqdm import tqdm
2030
@@ -56,7 +66,9 @@ def download_transcripts():
5666 for video_id in tqdm (list (ids ), desc = "Downloading transcripts" ):
5767 try :
5868 time .sleep (sleep_time ) # Rate limiting pause
59- transcript = YouTubeTranscriptApi .get_transcript (video_id , languages = c .LANGUAGE )
69+ transcript = YouTubeTranscriptApi .get_transcript (
70+ video_id , languages = c .LANGUAGE
71+ )
6072 json_transcript = formatter .format_transcript (transcript )
6173 transcript_path = os .path .join (c .TRANSCRIPT_DIR , f"{ video_id } .json" )
6274 with open (transcript_path , "w" , encoding = "utf-8" ) as out_file :
@@ -65,7 +77,7 @@ def download_transcripts():
6577 except TranscriptsDisabled as e :
6678 logger .error ("Transcripts are disabled for %s. Error: %s" , video_id , e )
6779 except NoTranscriptFound as e :
68- logger .error ("No transcript found for %s in the specified languages . Error: %s" , video_id , e )
80+ logger .error ("No transcript %s in specified langs . Error: %s" , video_id , e )
6981 except VideoUnavailable as e :
7082 logger .error ("Video %s is unavailable. Error: %s" , video_id , e )
7183 except TooManyRequests as e :
@@ -75,7 +87,7 @@ def download_transcripts():
7587 logger .error ("An unexpected error occurred for %s. Error: %s" , video_id , e )
7688
7789
78- def process_youtube_video (video_id , download_options ):
90+ def download_single_video (video_id , download_options ):
7991 """Download a YouTube video using specified options."""
8092 video_url = f"https://www.youtube.com/watch?v={ video_id } "
8193 try :
@@ -91,41 +103,33 @@ def process_youtube_video(video_id, download_options):
91103 except UnavailableVideoError as e :
92104 logger .error ("Video %s is unavailable. Error: %s" , video_id , e )
93105 except Exception as e :
94- logger .error ("An unexpected error occurred for video %s. Error: %s" , video_id , e )
106+ logger .error ("An unexpected error occurred for %s. Error: %s" , video_id , e )
95107
96108
97109def download_videos ():
98110 """Download videos for video IDs specified in conf.ID if not already downloaded."""
99- os .makedirs (c .OUTPUT_DIR , exist_ok = True )
100111 os .makedirs (c .VIDEO_DIR , exist_ok = True )
101- existing_ids = get_existing_ids (c .OUTPUT_DIR , "mp4" )
112+ existing_ids = get_existing_ids (c .VIDEO_DIR , "mp4" )
102113
103114 with open (c .ID , "r" , encoding = "utf-8" ) as f :
104115 all_ids = {line .strip () for line in f if line .strip ()}
105116
106- ids = all_ids - existing_ids
117+ ids = list (all_ids - existing_ids )
118+
119+ if not ids :
120+ logger .info ("All videos have already been downloaded." )
121+ return
122+
123+ error_count = 0
124+ # Use tqdm progress bar to show progress
125+ with tqdm (ids , desc = "Downloading videos" , unit = "video" ) as pbar :
126+ for video_id in pbar :
127+ time .sleep (1 ) # Rate limiting pause
128+ if not download_single_video (video_id , c .YT_CONFIG ):
129+ error_count += 1
130+ pbar .set_postfix (errors = error_count )
107131
108- for video_id in ids :
109- time .sleep (1 ) # Rate limiting pause
110- process_youtube_video (video_id , download_options )
111-
112-
113- # Global YouTube download configuration
114- download_options = {
115- "format" : "worstvideo[height>=720]/bestvideo[height<=480]" ,
116- "writesubtitles" : False ,
117- "outtmpl" : os .path .join (c .VIDEO_DIR , "%(id)s.%(ext)s" ),
118- "nocheckcertificate" : True ,
119- "noplaylist" : True ,
120- "no-metadata-json" : True ,
121- "no-metadata" : True ,
122- "concurrent-fragments" : 5 ,
123- "hls-prefer-ffmpeg" : True ,
124- "http-chunk-size" : 10485760 , # 10MB chunks
125- "sleep-interval" : 0 ,
126- "geo-bypass" : True ,
127- "limit_rate" : "5M" ,
128- }
132+ logger .info ("Video download completed: Total %d, Errors %d." , error_count )
129133
130134
131135def main ():
0 commit comments