Skip to content

Commit 2765503

Browse files
committed
[vimeo:review] improve config url extraction and extract original format(closes #20305)
1 parent 0d08bcd commit 2765503

File tree

1 file changed

+38
-26
lines changed

1 file changed

+38
-26
lines changed

youtube_dl/extractor/vimeo.py

Lines changed: 38 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,32 @@ def _parse_config(self, config, video_id):
195195
'subtitles': subtitles,
196196
}
197197

198+
def _extract_original_format(self, url, video_id):
199+
download_data = self._download_json(
200+
url, video_id, fatal=False,
201+
query={'action': 'load_download_config'},
202+
headers={'X-Requested-With': 'XMLHttpRequest'})
203+
if download_data:
204+
source_file = download_data.get('source_file')
205+
if isinstance(source_file, dict):
206+
download_url = source_file.get('download_url')
207+
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
208+
source_name = source_file.get('public_name', 'Original')
209+
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
210+
ext = (try_get(
211+
source_file, lambda x: x['extension'],
212+
compat_str) or determine_ext(
213+
download_url, None) or 'mp4').lower()
214+
return {
215+
'url': download_url,
216+
'ext': ext,
217+
'width': int_or_none(source_file.get('width')),
218+
'height': int_or_none(source_file.get('height')),
219+
'filesize': parse_filesize(source_file.get('size')),
220+
'format_id': source_name,
221+
'preference': 1,
222+
}
223+
198224

199225
class VimeoIE(VimeoBaseInfoExtractor):
200226
"""Information extractor for vimeo.com."""
@@ -659,29 +685,11 @@ def is_rented():
659685
comment_count = None
660686

661687
formats = []
662-
download_request = sanitized_Request('https://vimeo.com/%s?action=load_download_config' % video_id, headers={
663-
'X-Requested-With': 'XMLHttpRequest'})
664-
download_data = self._download_json(download_request, video_id, fatal=False)
665-
if download_data:
666-
source_file = download_data.get('source_file')
667-
if isinstance(source_file, dict):
668-
download_url = source_file.get('download_url')
669-
if download_url and not source_file.get('is_cold') and not source_file.get('is_defrosting'):
670-
source_name = source_file.get('public_name', 'Original')
671-
if self._is_valid_url(download_url, video_id, '%s video' % source_name):
672-
ext = (try_get(
673-
source_file, lambda x: x['extension'],
674-
compat_str) or determine_ext(
675-
download_url, None) or 'mp4').lower()
676-
formats.append({
677-
'url': download_url,
678-
'ext': ext,
679-
'width': int_or_none(source_file.get('width')),
680-
'height': int_or_none(source_file.get('height')),
681-
'filesize': parse_filesize(source_file.get('size')),
682-
'format_id': source_name,
683-
'preference': 1,
684-
})
688+
689+
source_format = self._extract_original_format(
690+
'https://vimeo.com/' + video_id, video_id)
691+
if source_format:
692+
formats.append(source_format)
685693

686694
info_dict_config = self._parse_config(config, video_id)
687695
formats.extend(info_dict_config['formats'])
@@ -940,7 +948,7 @@ def _real_extract(self, url):
940948
class VimeoReviewIE(VimeoBaseInfoExtractor):
941949
IE_NAME = 'vimeo:review'
942950
IE_DESC = 'Review pages on vimeo'
943-
_VALID_URL = r'https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)'
951+
_VALID_URL = r'(?P<url>https://vimeo\.com/[^/]+/review/(?P<id>[^/]+)/[0-9a-f]{10})'
944952
_TESTS = [{
945953
'url': 'https://vimeo.com/user21297594/review/75524534/3c257a1b5d',
946954
'md5': 'c507a72f780cacc12b2248bb4006d253',
@@ -992,18 +1000,22 @@ def _get_config_url(self, webpage_url, video_id, video_password_verified=False):
9921000
data = self._parse_json(self._search_regex(
9931001
r'window\s*=\s*_extend\(window,\s*({.+?})\);', webpage, 'data',
9941002
default=NO_DEFAULT if video_password_verified else '{}'), video_id)
995-
config_url = data.get('vimeo_esi', {}).get('config', {}).get('configUrl')
1003+
config = data.get('vimeo_esi', {}).get('config', {})
1004+
config_url = config.get('configUrl') or try_get(config, lambda x: x['clipData']['configUrl'])
9961005
if config_url is None:
9971006
self._verify_video_password(webpage_url, video_id, webpage)
9981007
config_url = self._get_config_url(
9991008
webpage_url, video_id, video_password_verified=True)
10001009
return config_url
10011010

10021011
def _real_extract(self, url):
1003-
video_id = self._match_id(url)
1012+
page_url, video_id = re.match(self._VALID_URL, url).groups()
10041013
config_url = self._get_config_url(url, video_id)
10051014
config = self._download_json(config_url, video_id)
10061015
info_dict = self._parse_config(config, video_id)
1016+
source_format = self._extract_original_format(page_url, video_id)
1017+
if source_format:
1018+
info_dict['formats'].append(source_format)
10071019
self._vimeo_sort_formats(info_dict['formats'])
10081020
info_dict['id'] = video_id
10091021
return info_dict

0 commit comments

Comments
 (0)