Skip to content

Commit 8e65950

Browse files
committed
Update youtube-dl 2021.04.07
1 parent 2e6100a commit 8e65950

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

75 files changed

+3689
-1475
lines changed

youtube_dl/YoutubeDL.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1511,14 +1511,18 @@ def sanitize_numeric_fields(info):
15111511
if 'display_id' not in info_dict and 'id' in info_dict:
15121512
info_dict['display_id'] = info_dict['id']
15131513

1514-
if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
1515-
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
1516-
# see http://bugs.python.org/issue1646728)
1517-
try:
1518-
upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1519-
info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1520-
except (ValueError, OverflowError, OSError):
1521-
pass
1514+
for ts_key, date_key in (
1515+
('timestamp', 'upload_date'),
1516+
('release_timestamp', 'release_date'),
1517+
):
1518+
if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
1519+
# Working around out-of-range timestamp values (e.g. negative ones on Windows,
1520+
# see http://bugs.python.org/issue1646728)
1521+
try:
1522+
upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
1523+
info_dict[date_key] = upload_date.strftime('%Y%m%d')
1524+
except (ValueError, OverflowError, OSError):
1525+
pass
15221526

15231527
# Auto generate title fields corresponding to the *_number fields when missing
15241528
# in order to always have clean titles. This is very common for TV series.

youtube_dl/compat.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,15 @@ def __init__(self, version, name, value, *args, **kwargs):
7373
except ImportError: # Python 2
7474
import Cookie as compat_cookies
7575

76+
if sys.version_info[0] == 2:
77+
class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
78+
def load(self, rawdata):
79+
if isinstance(rawdata, compat_str):
80+
rawdata = str(rawdata)
81+
return super(compat_cookies_SimpleCookie, self).load(rawdata)
82+
else:
83+
compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
84+
7685
try:
7786
import html.entities as compat_html_entities
7887
except ImportError: # Python 2
@@ -3000,6 +3009,7 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
30003009
'compat_cookiejar',
30013010
'compat_cookiejar_Cookie',
30023011
'compat_cookies',
3012+
'compat_cookies_SimpleCookie',
30033013
'compat_ctypes_WINFUNCTYPE',
30043014
'compat_etree_Element',
30053015
'compat_etree_fromstring',

youtube_dl/extractor/apa.py

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,21 @@
66
from .common import InfoExtractor
77
from ..utils import (
88
determine_ext,
9-
js_to_json,
9+
int_or_none,
1010
url_or_none,
1111
)
1212

1313

1414
class APAIE(InfoExtractor):
15-
_VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
15+
_VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
1616
_TESTS = [{
1717
'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
1818
'md5': '2b12292faeb0a7d930c778c7a5b4759b',
1919
'info_dict': {
20-
'id': 'jjv85FdZ',
20+
'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
2121
'ext': 'mp4',
22-
'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
23-
'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
22+
'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
2423
'thumbnail': r're:^https?://.*\.jpg$',
25-
'duration': 254,
26-
'timestamp': 1519211149,
27-
'upload_date': '20180221',
2824
},
2925
}, {
3026
'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
@@ -46,9 +42,11 @@ def _extract_urls(webpage):
4642
webpage)]
4743

4844
def _real_extract(self, url):
49-
video_id = self._match_id(url)
45+
mobj = re.match(self._VALID_URL, url)
46+
video_id, base_url = mobj.group('id', 'base_url')
5047

51-
webpage = self._download_webpage(url, video_id)
48+
webpage = self._download_webpage(
49+
'%s/player/%s' % (base_url, video_id), video_id)
5250

5351
jwplatform_id = self._search_regex(
5452
r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@@ -59,16 +57,18 @@ def _real_extract(self, url):
5957
'jwplatform:' + jwplatform_id, ie='JWPlatform',
6058
video_id=video_id)
6159

62-
sources = self._parse_json(
63-
self._search_regex(
64-
r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
65-
video_id, transform_source=js_to_json)
60+
def extract(field, name=None):
61+
return self._search_regex(
62+
r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
63+
webpage, name or field, default=None, group='value')
64+
65+
title = extract('title') or video_id
66+
description = extract('description')
67+
thumbnail = extract('poster', 'thumbnail')
6668

6769
formats = []
68-
for source in sources:
69-
if not isinstance(source, dict):
70-
continue
71-
source_url = url_or_none(source.get('file'))
70+
for format_id in ('hls', 'progressive'):
71+
source_url = url_or_none(extract(format_id))
7272
if not source_url:
7373
continue
7474
ext = determine_ext(source_url)
@@ -77,18 +77,19 @@ def _real_extract(self, url):
7777
source_url, video_id, 'mp4', entry_protocol='m3u8_native',
7878
m3u8_id='hls', fatal=False))
7979
else:
80+
height = int_or_none(self._search_regex(
81+
r'(\d+)\.mp4', source_url, 'height', default=None))
8082
formats.append({
8183
'url': source_url,
84+
'format_id': format_id,
85+
'height': height,
8286
})
8387
self._sort_formats(formats)
8488

85-
thumbnail = self._search_regex(
86-
r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
87-
'thumbnail', fatal=False, group='url')
88-
8989
return {
9090
'id': video_id,
91-
'title': video_id,
91+
'title': title,
92+
'description': description,
9293
'thumbnail': thumbnail,
9394
'formats': formats,
9495
}

youtube_dl/extractor/applepodcasts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def _real_extract(self, url):
4242
ember_data = self._parse_json(self._search_regex(
4343
r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
4444
webpage, 'ember data'), episode_id)
45+
ember_data = ember_data.get(episode_id) or ember_data
4546
episode = ember_data['data']['attributes']
4647
description = episode.get('description') or {}
4748

youtube_dl/extractor/ard.py

Lines changed: 44 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -284,20 +284,42 @@ def _real_extract(self, url):
284284

285285
formats = []
286286
for a in video_node.findall('.//asset'):
287+
file_name = xpath_text(a, './fileName', default=None)
288+
if not file_name:
289+
continue
290+
format_type = a.attrib.get('type')
291+
format_url = url_or_none(file_name)
292+
if format_url:
293+
ext = determine_ext(file_name)
294+
if ext == 'm3u8':
295+
formats.extend(self._extract_m3u8_formats(
296+
format_url, display_id, 'mp4', entry_protocol='m3u8_native',
297+
m3u8_id=format_type or 'hls', fatal=False))
298+
continue
299+
elif ext == 'f4m':
300+
formats.extend(self._extract_f4m_formats(
301+
update_url_query(format_url, {'hdcore': '3.7.0'}),
302+
display_id, f4m_id=format_type or 'hds', fatal=False))
303+
continue
287304
f = {
288-
'format_id': a.attrib['type'],
289-
'width': int_or_none(a.find('./frameWidth').text),
290-
'height': int_or_none(a.find('./frameHeight').text),
291-
'vbr': int_or_none(a.find('./bitrateVideo').text),
292-
'abr': int_or_none(a.find('./bitrateAudio').text),
293-
'vcodec': a.find('./codecVideo').text,
294-
'tbr': int_or_none(a.find('./totalBitrate').text),
305+
'format_id': format_type,
306+
'width': int_or_none(xpath_text(a, './frameWidth')),
307+
'height': int_or_none(xpath_text(a, './frameHeight')),
308+
'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
309+
'abr': int_or_none(xpath_text(a, './bitrateAudio')),
310+
'vcodec': xpath_text(a, './codecVideo'),
311+
'tbr': int_or_none(xpath_text(a, './totalBitrate')),
295312
}
296-
if a.find('./serverPrefix').text:
297-
f['url'] = a.find('./serverPrefix').text
298-
f['playpath'] = a.find('./fileName').text
313+
server_prefix = xpath_text(a, './serverPrefix', default=None)
314+
if server_prefix:
315+
f.update({
316+
'url': server_prefix,
317+
'playpath': file_name,
318+
})
299319
else:
300-
f['url'] = a.find('./fileName').text
320+
if not format_url:
321+
continue
322+
f['url'] = format_url
301323
formats.append(f)
302324
self._sort_formats(formats)
303325

@@ -313,7 +335,7 @@ def _real_extract(self, url):
313335

314336

315337
class ARDBetaMediathekIE(ARDMediathekBaseIE):
316-
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
338+
_VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
317339
_TESTS = [{
318340
'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
319341
'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
@@ -343,22 +365,22 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
343365
}, {
344366
'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
345367
'only_matching': True,
368+
}, {
369+
'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
370+
'only_matching': True,
371+
}, {
372+
'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
373+
'only_matching': True,
346374
}]
347375

348376
def _real_extract(self, url):
349-
mobj = re.match(self._VALID_URL, url)
350-
video_id = mobj.group('video_id')
351-
display_id = mobj.group('display_id')
352-
if display_id:
353-
display_id = display_id.rstrip('/')
354-
if not display_id:
355-
display_id = video_id
377+
video_id = self._match_id(url)
356378

357379
player_page = self._download_json(
358380
'https://api.ardmediathek.de/public-gateway',
359-
display_id, data=json.dumps({
381+
video_id, data=json.dumps({
360382
'query': '''{
361-
playerPage(client:"%s", clipId: "%s") {
383+
playerPage(client: "ard", clipId: "%s") {
362384
blockedByFsk
363385
broadcastedOn
364386
maturityContentRating
@@ -388,7 +410,7 @@ def _real_extract(self, url):
388410
}
389411
}
390412
}
391-
}''' % (mobj.group('client'), video_id),
413+
}''' % video_id,
392414
}).encode(), headers={
393415
'Content-Type': 'application/json'
394416
})['data']['playerPage']
@@ -413,7 +435,6 @@ def _real_extract(self, url):
413435
r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
414436
info.update({
415437
'age_limit': age_limit,
416-
'display_id': display_id,
417438
'title': title,
418439
'description': description,
419440
'timestamp': unified_timestamp(player_page.get('broadcastedOn')),

youtube_dl/extractor/arnes.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
# coding: utf-8
2+
from __future__ import unicode_literals
3+
4+
from .common import InfoExtractor
5+
from ..compat import (
6+
compat_parse_qs,
7+
compat_urllib_parse_urlparse,
8+
)
9+
from ..utils import (
10+
float_or_none,
11+
int_or_none,
12+
parse_iso8601,
13+
remove_start,
14+
)
15+
16+
17+
class ArnesIE(InfoExtractor):
18+
IE_NAME = 'video.arnes.si'
19+
IE_DESC = 'Arnes Video'
20+
_VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
21+
_TESTS = [{
22+
'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
23+
'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
24+
'info_dict': {
25+
'id': 'a1qrWTOQfVoU',
26+
'ext': 'mp4',
27+
'title': 'Linearna neodvisnost, definicija',
28+
'description': 'Linearna neodvisnost, definicija',
29+
'license': 'PRIVATE',
30+
'creator': 'Polona Oblak',
31+
'timestamp': 1585063725,
32+
'upload_date': '20200324',
33+
'channel': 'Polona Oblak',
34+
'channel_id': 'q6pc04hw24cj',
35+
'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
36+
'duration': 596.75,
37+
'view_count': int,
38+
'tags': ['linearna_algebra'],
39+
'start_time': 10,
40+
}
41+
}, {
42+
'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
43+
'only_matching': True,
44+
}, {
45+
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
46+
'only_matching': True,
47+
}, {
48+
'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
49+
'only_matching': True,
50+
}, {
51+
'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
52+
'only_matching': True,
53+
}, {
54+
'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
55+
'only_matching': True,
56+
}]
57+
_BASE_URL = 'https://video.arnes.si'
58+
59+
def _real_extract(self, url):
60+
video_id = self._match_id(url)
61+
62+
video = self._download_json(
63+
self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
64+
title = video['title']
65+
66+
formats = []
67+
for media in (video.get('media') or []):
68+
media_url = media.get('url')
69+
if not media_url:
70+
continue
71+
formats.append({
72+
'url': self._BASE_URL + media_url,
73+
'format_id': remove_start(media.get('format'), 'FORMAT_'),
74+
'format_note': media.get('formatTranslation'),
75+
'width': int_or_none(media.get('width')),
76+
'height': int_or_none(media.get('height')),
77+
})
78+
self._sort_formats(formats)
79+
80+
channel = video.get('channel') or {}
81+
channel_id = channel.get('url')
82+
thumbnail = video.get('thumbnailUrl')
83+
84+
return {
85+
'id': video_id,
86+
'title': title,
87+
'formats': formats,
88+
'thumbnail': self._BASE_URL + thumbnail,
89+
'description': video.get('description'),
90+
'license': video.get('license'),
91+
'creator': video.get('author'),
92+
'timestamp': parse_iso8601(video.get('creationTime')),
93+
'channel': channel.get('name'),
94+
'channel_id': channel_id,
95+
'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
96+
'duration': float_or_none(video.get('duration'), 1000),
97+
'view_count': int_or_none(video.get('views')),
98+
'tags': video.get('hashtags'),
99+
'start_time': int_or_none(compat_parse_qs(
100+
compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
101+
}

0 commit comments

Comments
 (0)