yasoob
diff --git a/‎youtube_dl/extractor/abcnews.py
Lines changed: 72 additions & 62 deletions b/‎youtube_dl/extractor/abcnews.py
Lines changed: 72 additions & 62 deletions
diff --git a/‎youtube_dl/extractor/adn.py
Lines changed: 32 additions & 6 deletions b/‎youtube_dl/extractor/adn.py
Lines changed: 32 additions & 6 deletions
diff --git a/‎youtube_dl/extractor/aenetworks.py
Lines changed: 1 addition & 1 deletion b/‎youtube_dl/extractor/aenetworks.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎youtube_dl/extractor/amp.py
Lines changed: 2 additions & 1 deletion b/‎youtube_dl/extractor/amp.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎youtube_dl/extractor/archiveorg.py
Lines changed: 42 additions & 12 deletions b/‎youtube_dl/extractor/archiveorg.py
Lines changed: 42 additions & 12 deletions
@@ -1,14 +1,15 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import calendar
 import re
-import time
 
 from .amp import AMPIE
 from .common import InfoExtractor
-from .youtube import YoutubeIE
-from ..compat import compat_urlparse
+from ..utils import (
+    parse_duration,
+    parse_iso8601,
+    try_get,
+)
 
 
 class AbcNewsVideoIE(AMPIE):
@@ -18,8 +19,8 @@ class AbcNewsVideoIE(AMPIE):
                         (?:
                             abcnews\.go\.com/
                             (?:
-                                [^/]+/video/(?P<display_id>[0-9a-z-]+)-|
-                                video/embed\?.*?\bid=
+                                (?:[^/]+/)*video/(?P<display_id>[0-9a-z-]+)-|
+                                video/(?:embed|itemfeed)\?.*?\bid=
                             )|
                             fivethirtyeight\.abcnews\.go\.com/video/embed/\d+/
                         )
@@ -36,6 +37,8 @@ class AbcNewsVideoIE(AMPIE):
             'description': 'George Stephanopoulos goes one-on-one with Iranian Foreign Minister Dr. Javad Zarif.',
             'duration': 180,
             'thumbnail': r're:^https?://.*\.jpg$',
+            'timestamp': 1380454200,
+            'upload_date': '20130929',
         },
         'params': {
             # m3u8 download
@@ -47,6 +50,12 @@ class AbcNewsVideoIE(AMPIE):
     }, {
         'url': 'http://abcnews.go.com/2020/video/2020-husband-stands-teacher-jail-student-affairs-26119478',
         'only_matching': True,
+    }, {
+        'url': 'http://abcnews.go.com/video/itemfeed?id=46979033',
+        'only_matching': True,
+    }, {
+        'url': 'https://abcnews.go.com/GMA/News/video/history-christmas-story-67894761',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
@@ -67,28 +76,23 @@ class AbcNewsIE(InfoExtractor):
     _VALID_URL = r'https?://abcnews\.go\.com/(?:[^/]+/)+(?P<display_id>[0-9a-z-]+)/story\?id=(?P<id>\d+)'
 
     _TESTS = [{
-        'url': 'http://abcnews.go.com/Blotter/News/dramatic-video-rare-death-job-america/story?id=10498713#.UIhwosWHLjY',
+        # Youtube Embeds
+        'url': 'https://abcnews.go.com/Entertainment/peter-billingsley-child-actor-christmas-story-hollywood-power/story?id=51286501',
         'info_dict': {
-            'id': '10505354',
-            'ext': 'flv',
-            'display_id': 'dramatic-video-rare-death-job-america',
-            'title': 'Occupational Hazards',
-            'description': 'Nightline investigates the dangers that lurk at various jobs.',
-            'thumbnail': r're:^https?://.*\.jpg$',
-            'upload_date': '20100428',
-            'timestamp': 1272412800,
+            'id': '51286501',
+            'title': "Peter Billingsley: From child actor in 'A Christmas Story' to Hollywood power player",
+            'description': 'Billingsley went from a child actor to Hollywood power player.',
         },
-        'add_ie': ['AbcNewsVideo'],
+        'playlist_count': 5,
     }, {
         'url': 'http://abcnews.go.com/Entertainment/justin-timberlake-performs-stop-feeling-eurovision-2016/story?id=39125818',
         'info_dict': {
             'id': '38897857',
             'ext': 'mp4',
-            'display_id': 'justin-timberlake-performs-stop-feeling-eurovision-2016',
             'title': 'Justin Timberlake Drops Hints For Secret Single',
             'description': 'Lara Spencer reports the buzziest stories of the day in "GMA" Pop News.',
-            'upload_date': '20160515',
-            'timestamp': 1463329500,
+            'upload_date': '20160505',
+            'timestamp': 1462442280,
         },
         'params': {
             # m3u8 download
@@ -100,49 +104,55 @@ class AbcNewsIE(InfoExtractor):
     }, {
         'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
         'only_matching': True,
+    }, {
+        # inline.type == 'video'
+        'url': 'http://abcnews.go.com/Technology/exclusive-apple-ceo-tim-cook-iphone-cracking-software/story?id=37173343',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        display_id = mobj.group('display_id')
-        video_id = mobj.group('id')
-
-        webpage = self._download_webpage(url, video_id)
-        video_url = self._search_regex(
-            r'window\.abcnvideo\.url\s*=\s*"([^"]+)"', webpage, 'video URL')
-        full_video_url = compat_urlparse.urljoin(url, video_url)
-
-        youtube_url = YoutubeIE._extract_url(webpage)
-
-        timestamp = None
-        date_str = self._html_search_regex(
-            r'<span[^>]+class="timestamp">([^<]+)</span>',
-            webpage, 'timestamp', fatal=False)
-        if date_str:
-            tz_offset = 0
-            if date_str.endswith(' ET'):  # Eastern Time
-                tz_offset = -5
-                date_str = date_str[:-3]
-            date_formats = ['%b. %d, %Y', '%b %d, %Y, %I:%M %p']
-            for date_format in date_formats:
-                try:
-                    timestamp = calendar.timegm(time.strptime(date_str.strip(), date_format))
-                except ValueError:
-                    continue
-            if timestamp is not None:
-                timestamp -= tz_offset * 3600
-
-        entry = {
-            '_type': 'url_transparent',
-            'ie_key': AbcNewsVideoIE.ie_key(),
-            'url': full_video_url,
-            'id': video_id,
-            'display_id': display_id,
-            'timestamp': timestamp,
-        }
-
-        if youtube_url:
-            entries = [entry, self.url_result(youtube_url, ie=YoutubeIE.ie_key())]
-            return self.playlist_result(entries)
-
-        return entry
+        story_id = self._match_id(url)
+        webpage = self._download_webpage(url, story_id)
+        story = self._parse_json(self._search_regex(
+            r"window\['__abcnews__'\]\s*=\s*({.+?});",
+            webpage, 'data'), story_id)['page']['content']['story']['everscroll'][0]
+        article_contents = story.get('articleContents') or {}
+
+        def entries():
+            featured_video = story.get('featuredVideo') or {}
+            feed = try_get(featured_video, lambda x: x['video']['feed'])
+            if feed:
+                yield {
+                    '_type': 'url',
+                    'id': featured_video.get('id'),
+                    'title': featured_video.get('name'),
+                    'url': feed,
+                    'thumbnail': featured_video.get('images'),
+                    'description': featured_video.get('description'),
+                    'timestamp': parse_iso8601(featured_video.get('uploadDate')),
+                    'duration': parse_duration(featured_video.get('duration')),
+                    'ie_key': AbcNewsVideoIE.ie_key(),
+                }
+
+            for inline in (article_contents.get('inlines') or []):
+                inline_type = inline.get('type')
+                if inline_type == 'iframe':
+                    iframe_url = try_get(inline, lambda x: x['attrs']['src'])
+                    if iframe_url:
+                        yield self.url_result(iframe_url)
+                elif inline_type == 'video':
+                    video_id = inline.get('id')
+                    if video_id:
+                        yield {
+                            '_type': 'url',
+                            'id': video_id,
+                            'url': 'http://abcnews.go.com/video/embed?id=' + video_id,
+                            'thumbnail': inline.get('imgSrc') or inline.get('imgDefault'),
+                            'description': inline.get('description'),
+                            'duration': parse_duration(inline.get('duration')),
+                            'ie_key': AbcNewsVideoIE.ie_key(),
+                        }
+
+        return self.playlist_result(
+            entries(), story_id, article_contents.get('headline'),
+            article_contents.get('subHead'))
@@ -26,6 +26,7 @@
     strip_or_none,
     try_get,
     unified_strdate,
+    urlencode_postdata,
 )
 
 
@@ -51,9 +52,12 @@ class ADNIE(InfoExtractor):
         }
     }
 
+    _NETRC_MACHINE = 'animedigitalnetwork'
     _BASE_URL = 'http://animedigitalnetwork.fr'
     _API_BASE_URL = 'https://gw.api.animedigitalnetwork.fr/'
     _PLAYER_BASE_URL = _API_BASE_URL + 'player/'
+    _HEADERS = {}
+    _LOGIN_ERR_MESSAGE = 'Unable to log in'
     _RSA_KEY = (0x9B42B08905199A5CCE2026274399CA560ECB209EE9878A708B1C0812E1BB8CB5D1FB7441861147C1A1F2F3A0476DD63A9CAC20D3E983613346850AA6CB38F16DC7D720FD7D86FC6E5B3D5BBC72E14CD0BF9E869F2CEA2CCAD648F1DCE38F1FF916CEFB2D339B64AA0264372344BC775E265E8A852F88144AB0BD9AA06C1A4ABB, 65537)
     _POS_ALIGN_MAP = {
         'start': 1,
@@ -129,19 +133,42 @@ def _get_subtitles(self, sub_url, video_id):
             }])
         return subtitles
 
+    def _real_initialize(self):
+        username, password = self._get_login_info()
+        if not username:
+            return
+        try:
+            access_token = (self._download_json(
+                self._API_BASE_URL + 'authentication/login', None,
+                'Logging in', self._LOGIN_ERR_MESSAGE, fatal=False,
+                data=urlencode_postdata({
+                    'password': password,
+                    'rememberMe': False,
+                    'source': 'Web',
+                    'username': username,
+                })) or {}).get('accessToken')
+            if access_token:
+                self._HEADERS = {'authorization': 'Bearer ' + access_token}
+        except ExtractorError as e:
+            message = None
+            if isinstance(e.cause, compat_HTTPError) and e.cause.code == 401:
+                resp = self._parse_json(
+                    e.cause.read().decode(), None, fatal=False) or {}
+                message = resp.get('message') or resp.get('code')
+            self.report_warning(message or self._LOGIN_ERR_MESSAGE)
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video_base_url = self._PLAYER_BASE_URL + 'video/%s/' % video_id
         player = self._download_json(
             video_base_url + 'configuration', video_id,
-            'Downloading player config JSON metadata')['player']
+            'Downloading player config JSON metadata',
+            headers=self._HEADERS)['player']
         options = player['options']
 
         user = options['user']
         if not user.get('hasAccess'):
-            raise ExtractorError(
-                'This video is only available for paying users', expected=True)
-            # self.raise_login_required() # FIXME: Login is not implemented
+            self.raise_login_required()
 
         token = self._download_json(
             user.get('refreshTokenUrl') or (self._PLAYER_BASE_URL + 'refresh/token'),
@@ -188,8 +215,7 @@ def _real_extract(self, url):
                 message = error.get('message')
                 if e.cause.code == 403 and error.get('code') == 'player-bad-geolocation-country':
                     self.raise_geo_restricted(msg=message)
-                else:
-                    raise ExtractorError(message)
+                raise ExtractorError(message)
         else:
             raise ExtractorError('Giving up retrying')
 
 
@@ -252,7 +252,7 @@ class AENetworksShowIE(AENetworksListBaseIE):
     _TESTS = [{
         'url': 'http://www.history.com/shows/ancient-aliens',
         'info_dict': {
-            'id': 'SH012427480000',
+            'id': 'SERIES1574',
             'title': 'Ancient Aliens',
             'description': 'md5:3f6d74daf2672ff3ae29ed732e37ea7f',
         },
 
@@ -8,6 +8,7 @@
     int_or_none,
     mimetype2ext,
     parse_iso8601,
+    unified_timestamp,
     url_or_none,
 )
 
@@ -88,7 +89,7 @@ def get_media_node(name, default=None):
 
         self._sort_formats(formats)
 
-        timestamp = parse_iso8601(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
+        timestamp = unified_timestamp(item.get('pubDate'), ' ') or parse_iso8601(item.get('dc-date'))
 
         return {
             'id': video_id,
 
@@ -2,15 +2,17 @@
 
 from .common import InfoExtractor
 from ..utils import (
-    unified_strdate,
     clean_html,
+    extract_attributes,
+    unified_strdate,
+    unified_timestamp,
 )
 
 
 class ArchiveOrgIE(InfoExtractor):
     IE_NAME = 'archive.org'
     IE_DESC = 'archive.org videos'
-    _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#]+)(?:[?].*)?$'
+    _VALID_URL = r'https?://(?:www\.)?archive\.org/(?:details|embed)/(?P<id>[^/?#&]+)'
     _TESTS = [{
         'url': 'http://archive.org/details/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'md5': '8af1d4cf447933ed3c7f4871162602db',
@@ -19,8 +21,11 @@ class ArchiveOrgIE(InfoExtractor):
             'ext': 'ogg',
             'title': '1968 Demo - FJCC Conference Presentation Reel #1',
             'description': 'md5:da45c349df039f1cc8075268eb1b5c25',
-            'upload_date': '19681210',
-            'uploader': 'SRI International'
+            'creator': 'SRI International',
+            'release_date': '19681210',
+            'uploader': 'SRI International',
+            'timestamp': 1268695290,
+            'upload_date': '20100315',
         }
     }, {
         'url': 'https://archive.org/details/Cops1922',
@@ -29,22 +34,43 @@ class ArchiveOrgIE(InfoExtractor):
             'id': 'Cops1922',
             'ext': 'mp4',
             'title': 'Buster Keaton\'s "Cops" (1922)',
-            'description': 'md5:89e7c77bf5d965dd5c0372cfb49470f6',
+            'description': 'md5:43a603fd6c5b4b90d12a96b921212b9c',
+            'timestamp': 1387699629,
+            'upload_date': '20131222',
         }
     }, {
         'url': 'http://archive.org/embed/XD300-23_68HighlightsAResearchCntAugHumanIntellect',
         'only_matching': True,
+    }, {
+        'url': 'https://archive.org/details/MSNBCW_20131125_040000_To_Catch_a_Predator/',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
         webpage = self._download_webpage(
             'http://archive.org/embed/' + video_id, video_id)
-        jwplayer_playlist = self._parse_json(self._search_regex(
-            r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
-            webpage, 'jwplayer playlist'), video_id)
-        info = self._parse_jwplayer_data(
-            {'playlist': jwplayer_playlist}, video_id, base_url=url)
+
+        playlist = None
+        play8 = self._search_regex(
+            r'(<[^>]+\bclass=["\']js-play8-playlist[^>]+>)', webpage,
+            'playlist', default=None)
+        if play8:
+            attrs = extract_attributes(play8)
+            playlist = attrs.get('value')
+        if not playlist:
+            # Old jwplayer fallback
+            playlist = self._search_regex(
+                r"(?s)Play\('[^']+'\s*,\s*(\[.+\])\s*,\s*{.*?}\)",
+                webpage, 'jwplayer playlist', default='[]')
+        jwplayer_playlist = self._parse_json(playlist, video_id, fatal=False)
+        if jwplayer_playlist:
+            info = self._parse_jwplayer_data(
+                {'playlist': jwplayer_playlist}, video_id, base_url=url)
+        else:
+            # HTML5 media fallback
+            info = self._parse_html5_media_entries(url, webpage, video_id)[0]
+            info['id'] = video_id
 
         def get_optional(metadata, field):
             return metadata.get(field, [None])[0]
@@ -58,8 +84,12 @@ def get_optional(metadata, field):
             'description': clean_html(get_optional(metadata, 'description')),
         })
         if info.get('_type') != 'playlist':
+            creator = get_optional(metadata, 'creator')
             info.update({
-                'uploader': get_optional(metadata, 'creator'),
-                'upload_date': unified_strdate(get_optional(metadata, 'date')),
+                'creator': creator,
+                'release_date': unified_strdate(get_optional(metadata, 'date')),
+                'uploader': get_optional(metadata, 'publisher') or creator,
+                'timestamp': unified_timestamp(get_optional(metadata, 'publicdate')),
+                'language': get_optional(metadata, 'language'),
             })
         return info