Skip to content

Commit 6358247

Browse files
committed
Update youtube-dl 2021.01.08
1 parent 10b9cce commit 6358247

22 files changed

+913
-380
lines changed

youtube_dl/downloader/hls.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,8 +172,12 @@ def is_ad_fragment_end(s):
172172
iv = decrypt_info.get('IV') or compat_struct_pack('>8xq', media_sequence)
173173
decrypt_info['KEY'] = decrypt_info.get('KEY') or self.ydl.urlopen(
174174
self._prepare_url(info_dict, info_dict.get('_decryption_key_url') or decrypt_info['URI'])).read()
175-
frag_content = AES.new(
176-
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
175+
# Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block
176+
# size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded,
177+
# not what it decrypts to.
178+
if not test:
179+
frag_content = AES.new(
180+
decrypt_info['KEY'], AES.MODE_CBC, iv).decrypt(frag_content)
177181
self._append_fragment(ctx, frag_content)
178182
# We only download the first fragment during the test
179183
if test:

youtube_dl/extractor/acast.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from .common import InfoExtractor
77
from ..utils import (
88
clean_html,
9+
clean_podcast_url,
910
int_or_none,
1011
parse_iso8601,
1112
)
@@ -17,7 +18,7 @@ def _extract_episode(self, episode, show_info):
1718
info = {
1819
'id': episode['id'],
1920
'display_id': episode.get('episodeUrl'),
20-
'url': episode['url'],
21+
'url': clean_podcast_url(episode['url']),
2122
'title': title,
2223
'description': clean_html(episode.get('description') or episode.get('summary')),
2324
'thumbnail': episode.get('image'),

youtube_dl/extractor/applepodcasts.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# coding: utf-8
2+
from __future__ import unicode_literals
3+
4+
from .common import InfoExtractor
5+
from ..utils import (
6+
clean_podcast_url,
7+
int_or_none,
8+
parse_iso8601,
9+
try_get,
10+
)
11+
12+
13+
class ApplePodcastsIE(InfoExtractor):
14+
_VALID_URL = r'https?://podcasts\.apple\.com/(?:[^/]+/)?podcast(?:/[^/]+){1,2}.*?\bi=(?P<id>\d+)'
15+
_TESTS = [{
16+
'url': 'https://podcasts.apple.com/us/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
17+
'md5': 'df02e6acb11c10e844946a39e7222b08',
18+
'info_dict': {
19+
'id': '1000482637777',
20+
'ext': 'mp3',
21+
'title': '207 - Whitney Webb Returns',
22+
'description': 'md5:13a73bade02d2e43737751e3987e1399',
23+
'upload_date': '20200705',
24+
'timestamp': 1593921600,
25+
'duration': 6425,
26+
'series': 'The Tim Dillon Show',
27+
}
28+
}, {
29+
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns/id1135137367?i=1000482637777',
30+
'only_matching': True,
31+
}, {
32+
'url': 'https://podcasts.apple.com/podcast/207-whitney-webb-returns?i=1000482637777',
33+
'only_matching': True,
34+
}, {
35+
'url': 'https://podcasts.apple.com/podcast/id1135137367?i=1000482637777',
36+
'only_matching': True,
37+
}]
38+
39+
def _real_extract(self, url):
40+
episode_id = self._match_id(url)
41+
webpage = self._download_webpage(url, episode_id)
42+
ember_data = self._parse_json(self._search_regex(
43+
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
44+
webpage, 'ember data'), episode_id)
45+
episode = ember_data['data']['attributes']
46+
description = episode.get('description') or {}
47+
48+
series = None
49+
for inc in (ember_data.get('included') or []):
50+
if inc.get('type') == 'media/podcast':
51+
series = try_get(inc, lambda x: x['attributes']['name'])
52+
53+
return {
54+
'id': episode_id,
55+
'title': episode['name'],
56+
'url': clean_podcast_url(episode['assetUrl']),
57+
'description': description.get('standard') or description.get('short'),
58+
'timestamp': parse_iso8601(episode.get('releaseDateTime')),
59+
'duration': int_or_none(episode.get('durationInMilliseconds'), 1000),
60+
'series': series,
61+
}

youtube_dl/extractor/bfmtv.py

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
# coding: utf-8
2+
from __future__ import unicode_literals
3+
4+
import re
5+
6+
from .common import InfoExtractor
7+
from ..utils import extract_attributes
8+
9+
10+
class BFMTVBaseIE(InfoExtractor):
11+
_VALID_URL_BASE = r'https?://(?:www\.)?bfmtv\.com/'
12+
_VALID_URL_TMPL = _VALID_URL_BASE + r'(?:[^/]+/)*[^/?&#]+_%s[A-Z]-(?P<id>\d{12})\.html'
13+
_VIDEO_BLOCK_REGEX = r'(<div[^>]+class="video_block"[^>]*>)'
14+
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/%s_default/index.html?videoId=%s'
15+
16+
def _brightcove_url_result(self, video_id, video_block):
17+
account_id = video_block.get('accountid') or '876450612001'
18+
player_id = video_block.get('playerid') or 'I2qBTln4u'
19+
return self.url_result(
20+
self.BRIGHTCOVE_URL_TEMPLATE % (account_id, player_id, video_id),
21+
'BrightcoveNew', video_id)
22+
23+
24+
class BFMTVIE(BFMTVBaseIE):
25+
IE_NAME = 'bfmtv'
26+
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'V'
27+
_TESTS = [{
28+
'url': 'https://www.bfmtv.com/politique/emmanuel-macron-l-islam-est-une-religion-qui-vit-une-crise-aujourd-hui-partout-dans-le-monde_VN-202010020146.html',
29+
'info_dict': {
30+
'id': '6196747868001',
31+
'ext': 'mp4',
32+
'title': 'Emmanuel Macron: "L\'Islam est une religion qui vit une crise aujourd’hui, partout dans le monde"',
33+
'description': 'Le Président s\'exprime sur la question du séparatisme depuis les Mureaux, dans les Yvelines.',
34+
'uploader_id': '876450610001',
35+
'upload_date': '20201002',
36+
'timestamp': 1601629620,
37+
},
38+
}]
39+
40+
def _real_extract(self, url):
41+
bfmtv_id = self._match_id(url)
42+
webpage = self._download_webpage(url, bfmtv_id)
43+
video_block = extract_attributes(self._search_regex(
44+
self._VIDEO_BLOCK_REGEX, webpage, 'video block'))
45+
return self._brightcove_url_result(video_block['videoid'], video_block)
46+
47+
48+
class BFMTVLiveIE(BFMTVIE):
49+
IE_NAME = 'bfmtv:live'
50+
_VALID_URL = BFMTVBaseIE._VALID_URL_BASE + '(?P<id>(?:[^/]+/)?en-direct)'
51+
_TESTS = [{
52+
'url': 'https://www.bfmtv.com/en-direct/',
53+
'info_dict': {
54+
'id': '5615950982001',
55+
'ext': 'mp4',
56+
'title': r're:^le direct BFMTV WEB \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
57+
'uploader_id': '876450610001',
58+
'upload_date': '20171018',
59+
'timestamp': 1508329950,
60+
},
61+
'params': {
62+
'skip_download': True,
63+
},
64+
}, {
65+
'url': 'https://www.bfmtv.com/economie/en-direct/',
66+
'only_matching': True,
67+
}]
68+
69+
70+
class BFMTVArticleIE(BFMTVBaseIE):
71+
IE_NAME = 'bfmtv:article'
72+
_VALID_URL = BFMTVBaseIE._VALID_URL_TMPL % 'A'
73+
_TESTS = [{
74+
'url': 'https://www.bfmtv.com/sante/covid-19-un-responsable-de-l-institut-pasteur-se-demande-quand-la-france-va-se-reconfiner_AV-202101060198.html',
75+
'info_dict': {
76+
'id': '202101060198',
77+
'title': 'Covid-19: un responsable de l\'Institut Pasteur se demande "quand la France va se reconfiner"',
78+
'description': 'md5:947974089c303d3ac6196670ae262843',
79+
},
80+
'playlist_count': 2,
81+
}, {
82+
'url': 'https://www.bfmtv.com/international/pour-bolsonaro-le-bresil-est-en-faillite-mais-il-ne-peut-rien-faire_AD-202101060232.html',
83+
'only_matching': True,
84+
}, {
85+
'url': 'https://www.bfmtv.com/sante/covid-19-oui-le-vaccin-de-pfizer-distribue-en-france-a-bien-ete-teste-sur-des-personnes-agees_AN-202101060275.html',
86+
'only_matching': True,
87+
}]
88+
89+
def _real_extract(self, url):
90+
bfmtv_id = self._match_id(url)
91+
webpage = self._download_webpage(url, bfmtv_id)
92+
93+
entries = []
94+
for video_block_el in re.findall(self._VIDEO_BLOCK_REGEX, webpage):
95+
video_block = extract_attributes(video_block_el)
96+
video_id = video_block.get('videoid')
97+
if not video_id:
98+
continue
99+
entries.append(self._brightcove_url_result(video_id, video_block))
100+
101+
return self.playlist_result(
102+
entries, bfmtv_id, self._og_search_title(webpage, fatal=False),
103+
self._html_search_meta(['og:description', 'description'], webpage))

youtube_dl/extractor/bibeltv.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# coding: utf-8
2+
from __future__ import unicode_literals
3+
4+
from .common import InfoExtractor
5+
6+
7+
class BibelTVIE(InfoExtractor):
8+
_VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?:crn/)?(?P<id>\d+)'
9+
_TESTS = [{
10+
'url': 'https://www.bibeltv.de/mediathek/videos/329703-sprachkurs-in-malaiisch',
11+
'md5': '252f908192d611de038b8504b08bf97f',
12+
'info_dict': {
13+
'id': 'ref:329703',
14+
'ext': 'mp4',
15+
'title': 'Sprachkurs in Malaiisch',
16+
'description': 'md5:3e9f197d29ee164714e67351cf737dfe',
17+
'timestamp': 1608316701,
18+
'uploader_id': '5840105145001',
19+
'upload_date': '20201218',
20+
}
21+
}, {
22+
'url': 'https://www.bibeltv.de/mediathek/videos/crn/326374',
23+
'only_matching': True,
24+
}]
25+
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/5840105145001/default_default/index.html?videoId=ref:%s'
26+
27+
def _real_extract(self, url):
28+
crn_id = self._match_id(url)
29+
return self.url_result(
30+
self.BRIGHTCOVE_URL_TEMPLATE % crn_id, 'BrightcoveNew')

0 commit comments

Comments
 (0)