Skip to content

Commit 2a54da5

Browse files
committed
Update youtube-dl 2021.02.10
1 parent 0f06946 commit 2a54da5

32 files changed

+1637
-1872
lines changed

youtube_dl/extractor/abcnews.py

Lines changed: 72 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
# coding: utf-8
22
from __future__ import unicode_literals
33

4-
import calendar
54
import re
6-
import time
75

86
from .amp import AMPIE
97
from .common import InfoExtractor
10-
from .youtube import YoutubeIE
11-
from ..compat import compat_urlparse
8+
from ..utils import (
9+
parse_duration,
10+
parse_iso8601,
11+
try_get,
12+
)
1213

1314

1415
class AbcNewsVideoIE(AMPIE):
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
1819
(?:
1920
abcnews\.go\.com/
2021
(?:
21-
[^/]+/video/(?P<display_id>[0-9a-z-]+)-|
22-
video/embed\?.*?\bid=
22+
(?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
23+
video/(?:embed|itemfeed)\?.*?\bid=
2324
)|
2425
fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
2526
)
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
3637
'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
3738
'duration': 180,
3839
'thumbnail': r're:^https?://.*\.jpg$',
40+
'timestamp': 1380454200,
41+
'upload_date': '20130929',
3942
},
4043
'params': {
4144
# m3u8 download
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
4750
}, {
4851
'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
4952
'only_matching': True,
53+
}, {
54+
'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
55+
'only_matching': True,
56+
}, {
57+
'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
58+
'only_matching': True,
5059
}]
5160

5261
def _real_extract(self, url):
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
6776
_VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
6877

6978
_TESTS = [{
70-
'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
79+
# Youtube Embeds
80+
'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
7181
'info_dict': {
72-
'id': '10505354',
73-
'ext': 'flv',
74-
'display_id': 'dramatic-video-rare-death-job-america',
75-
'title': 'Occupational Hazards',
76-
'description': 'Nightline investigates the dangers that lurk at various jobs.',
77-
'thumbnail': r're:^https?://.*\.jpg$',
78-
'upload_date': '20100428',
79-
'timestamp': 1272412800,
82+
'id': '51286501',
83+
'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
84+
'description': 'Billingsley went from a child actor to Hollywood power player.',
8085
},
81-
'add_ie': ['AbcNewsVideo'],
86+
'playlist_count': 5,
8287
}, {
8388
'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
8489
'info_dict': {
8590
'id': '38897857',
8691
'ext': 'mp4',
87-
'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
8892
'title': 'Justin Timberlake Drops Hints For Secret Single',
8993
'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
90-
'upload_date': '20160515',
91-
'timestamp': 1463329500,
94+
'upload_date': '20160505',
95+
'timestamp': 1462442280,
9296
},
9397
'params': {
9498
# m3u8 download
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
100104
}, {
101105
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
102106
'only_matching': True,
107+
}, {
108+
# inline.type == 'video'
109+
'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
110+
'only_matching': True,
103111
}]
104112

105113
def _real_extract(self, url):
106-
mobj = re.match(self._VALID_URL, url)
107-
display_id = mobj.group('display_id')
108-
video_id = mobj.group('id')
109-
110-
webpage = self._download_webpage(url, video_id)
111-
video_url = self._search_regex(
112-
r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
113-
full_video_url = compat_urlparse.urljoin(url, video_url)
114-
115-
youtube_url = YoutubeIE._extract_url(webpage)
116-
117-
timestamp = None
118-
date_str = self._html_search_regex(
119-
r'<span[^>]+class="timestamp">([^<]+)</span>',
120-
webpage, 'timestamp', fatal=False)
121-
if date_str:
122-
tz_offset = 0
123-
if date_str.endswith(' ET'): # Eastern Time
124-
tz_offset = -5
125-
date_str = date_str[:-3]
126-
date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
127-
for date_format in date_formats:
128-
try:
129-
timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
130-
except ValueError:
131-
continue
132-
if timestamp is not None:
133-
timestamp -= tz_offset * 3600
134-
135-
entry = {
136-
'_type': 'url_transparent',
137-
'ie_key': AbcNewsVideoIE.ie_key(),
138-
'url': full_video_url,
139-
'id': video_id,
140-
'display_id': display_id,
141-
'timestamp': timestamp,
142-
}
143-
144-
if youtube_url:
145-
entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
146-
return self.playlist_result(entries)
147-
148-
return entry
114+
story_id = self._match_id(url)
115+
webpage = self._download_webpage(url, story_id)
116+
story = self._parse_json(self._search_regex(
117+
r"window\['__abcnews__'\]\s*=\s*({.+?});",
118+
webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
119+
article_contents = story.get('articleContents') or {}
120+
121+
def entries():
122+
featured_video = story.get('featuredVideo') or {}
123+
feed = try_get(featured_video, lambda x: x['video']['feed'])
124+
if feed:
125+
yield {
126+
'_type': 'url',
127+
'id': featured_video.get('id'),
128+
'title': featured_video.get('name'),
129+
'url': feed,
130+
'thumbnail': featured_video.get('images'),
131+
'description': featured_video.get('description'),
132+
'timestamp': parse_iso8601(featured_video.get('uploadDate')),
133+
'duration': parse_duration(featured_video.get('duration')),
134+
'ie_key': AbcNewsVideoIE.ie_key(),
135+
}
136+
137+
for inline in (article_contents.get('inlines') or []):
138+
inline_type = inline.get('type')
139+
if inline_type == 'iframe':
140+
iframe_url = try_get(inline, lambda x: x['attrs']['src'])
141+
if iframe_url:
142+
yield self.url_result(iframe_url)
143+
elif inline_type == 'video':
144+
video_id = inline.get('id')
145+
if video_id:
146+
yield {
147+
'_type': 'url',
148+
'id': video_id,
149+
'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
150+
'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
151+
'description': inline.get('description'),
152+
'duration': parse_duration(inline.get('duration')),
153+
'ie_key': AbcNewsVideoIE.ie_key(),
154+
}
155+
156+
return self.playlist_result(
157+
entries(), story_id, article_contents.get('headline'),
158+
article_contents.get('subHead'))

youtube_dl/extractor/adn.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
strip_or_none,
2727
try_get,
2828
unified_strdate,
29+
urlencode_postdata,
2930
)
3031

3132

@@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
5152
}
5253
}
5354

55+
_NETRC_MACHINE = 'animedigitalnetwork'
5456
_BASE_URL = 'http://animedigitalnetwork.fr'
5557
_API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
5658
_PLAYER_BASE_URL = _API_BASE_URL + 'player/'
59+
_HEADERS = {}
60+
_LOGIN_ERR_MESSAGE = 'Unable to log in'
5761
_RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
5862
_POS_ALIGN_MAP = {
5963
'start': 1,
@@ -129,19 +133,42 @@ def _get_subtitles(self, sub_url, video_id):
129133
}])
130134
return subtitles
131135

136+
def _real_initialize(self):
137+
username, password = self._get_login_info()
138+
if not username:
139+
return
140+
try:
141+
access_token = (self._download_json(
142+
self._API_BASE_URL + 'authentication/login', None,
143+
'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
144+
data=urlencode_postdata({
145+
'password': password,
146+
'rememberMe': False,
147+
'source': 'Web',
148+
'username': username,
149+
})) or {}).get('accessToken')
150+
if access_token:
151+
self._HEADERS = {'authorization': 'Bearer ' + access_token}
152+
except ExtractorError as e:
153+
message = None
154+
if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
155+
resp = self._parse_json(
156+
e.cause.read().decode(), None, fatal=False) or {}
157+
message = resp.get('message') or resp.get('code')
158+
self.report_warning(message or self._LOGIN_ERR_MESSAGE)
159+
132160
def _real_extract(self, url):
133161
video_id = self._match_id(url)
134162
video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
135163
player = self._download_json(
136164
video_base_url + 'configuration', video_id,
137-
'Downloading player config JSON metadata')['player']
165+
'Downloading player config JSON metadata',
166+
headers=self._HEADERS)['player']
138167
options = player['options']
139168

140169
user = options['user']
141170
if not user.get('hasAccess'):
142-
raise ExtractorError(
143-
'This video is only available for paying users', expected=True)
144-
# self.raise_login_required() # FIXME: Login is not implemented
171+
self.raise_login_required()
145172

146173
token = self._download_json(
147174
user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@@ -188,8 +215,7 @@ def _real_extract(self, url):
188215
message = error.get('message')
189216
if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
190217
self.raise_geo_restricted(msg=message)
191-
else:
192-
raise ExtractorError(message)
218+
raise ExtractorError(message)
193219
else:
194220
raise ExtractorError('Giving up retrying')
195221

youtube_dl/extractor/aenetworks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
252252
_TESTS = [{
253253
'url': 'http://www.history.com/shows/ancient-aliens',
254254
'info_dict': {
255-
'id': 'SH012427480000',
255+
'id': 'SERIES1574',
256256
'title': 'Ancient Aliens',
257257
'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
258258
},

youtube_dl/extractor/amp.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
int_or_none,
99
mimetype2ext,
1010
parse_iso8601,
11+
unified_timestamp,
1112
url_or_none,
1213
)
1314

@@ -88,7 +89,7 @@ def get_media_node(name, default=None):
8889

8990
self._sort_formats(formats)
9091

91-
timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
92+
timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
9293

9394
return {
9495
'id': video_id,

youtube_dl/extractor/archiveorg.py

Lines changed: 42 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,15 +2,17 @@
22

33
from .common import InfoExtractor
44
from ..utils import (
5-
unified_strdate,
65
clean_html,
6+
extract_attributes,
7+
unified_strdate,
8+
unified_timestamp,
79
)
810

911

1012
class ArchiveOrgIE(InfoExtractor):
1113
IE_NAME = 'archive.org'
1214
IE_DESC = 'archive.org videos'
13-
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
15+
_VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
1416
_TESTS = [{
1517
'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
1618
'md5': '8af1d4cf447933ed3c7f4871162602db',
@@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
1921
'ext': 'ogg',
2022
'title': '1968 Demo - FJCC Conference Presentation Reel #1',
2123
'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
22-
'upload_date': '19681210',
23-
'uploader': 'SRI International'
24+
'creator': 'SRI International',
25+
'release_date': '19681210',
26+
'uploader': 'SRI International',
27+
'timestamp': 1268695290,
28+
'upload_date': '20100315',
2429
}
2530
}, {
2631
'url': 'https://archive.org/details/Cops1922',
@@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
2934
'id': 'Cops1922',
3035
'ext': 'mp4',
3136
'title': 'Buster Keaton\'s "Cops" (1922)',
32-
'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
37+
'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
38+
'timestamp': 1387699629,
39+
'upload_date': '20131222',
3340
}
3441
}, {
3542
'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
3643
'only_matching': True,
44+
}, {
45+
'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
46+
'only_matching': True,
3747
}]
3848

3949
def _real_extract(self, url):
4050
video_id = self._match_id(url)
4151
webpage = self._download_webpage(
4252
'http://archive.org/embed/' + video_id, video_id)
43-
jwplayer_playlist = self._parse_json(self._search_regex(
44-
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
45-
webpage, 'jwplayer playlist'), video_id)
46-
info = self._parse_jwplayer_data(
47-
{'playlist': jwplayer_playlist}, video_id, base_url=url)
53+
54+
playlist = None
55+
play8 = self._search_regex(
56+
r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
57+
'playlist', default=None)
58+
if play8:
59+
attrs = extract_attributes(play8)
60+
playlist = attrs.get('value')
61+
if not playlist:
62+
# Old jwplayer fallback
63+
playlist = self._search_regex(
64+
r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
65+
webpage, 'jwplayer playlist', default='[]')
66+
jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
67+
if jwplayer_playlist:
68+
info = self._parse_jwplayer_data(
69+
{'playlist': jwplayer_playlist}, video_id, base_url=url)
70+
else:
71+
# HTML5 media fallback
72+
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
73+
info['id'] = video_id
4874

4975
def get_optional(metadata, field):
5076
return metadata.get(field, [None])[0]
@@ -58,8 +84,12 @@ def get_optional(metadata, field):
5884
'description': clean_html(get_optional(metadata, 'description')),
5985
})
6086
if info.get('_type') != 'playlist':
87+
creator = get_optional(metadata, 'creator')
6188
info.update({
62-
'uploader': get_optional(metadata, 'creator'),
63-
'upload_date': unified_strdate(get_optional(metadata, 'date')),
89+
'creator': creator,
90+
'release_date': unified_strdate(get_optional(metadata, 'date')),
91+
'uploader': get_optional(metadata, 'publisher') or creator,
92+
'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
93+
'language': get_optional(metadata, 'language'),
6494
})
6595
return info

0 commit comments

Comments
 (0)