yasoob
diff --git a/‎youtube_dl/YoutubeDL.py
Lines changed: 12 additions & 8 deletions b/‎youtube_dl/YoutubeDL.py
Lines changed: 12 additions & 8 deletions
diff --git a/‎youtube_dl/compat.py
Lines changed: 10 additions & 0 deletions b/‎youtube_dl/compat.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎youtube_dl/extractor/apa.py
Lines changed: 24 additions & 23 deletions b/‎youtube_dl/extractor/apa.py
Lines changed: 24 additions & 23 deletions
diff --git a/‎youtube_dl/extractor/applepodcasts.py
Lines changed: 1 addition & 0 deletions b/‎youtube_dl/extractor/applepodcasts.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎youtube_dl/extractor/ard.py
Lines changed: 44 additions & 23 deletions b/‎youtube_dl/extractor/ard.py
Lines changed: 44 additions & 23 deletions
diff --git a/‎youtube_dl/extractor/arnes.py
Lines changed: 101 additions & 0 deletions b/‎youtube_dl/extractor/arnes.py
Lines changed: 101 additions & 0 deletions
@@ -1511,14 +1511,18 @@ def sanitize_numeric_fields(info):
         if 'display_id' not in info_dict and 'id' in info_dict:
             info_dict['display_id'] = info_dict['id']
 
-        if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
-            # Working around out-of-range timestamp values (e.g. negative ones on Windows,
-            # see http://bugs.python.org/issue1646728)
-            try:
-                upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
-                info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
-            except (ValueError, OverflowError, OSError):
-                pass
+        for ts_key, date_key in (
+                ('timestamp', 'upload_date'),
+                ('release_timestamp', 'release_date'),
+        ):
+            if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+                # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+                # see http://bugs.python.org/issue1646728)
+                try:
+                    upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+                    info_dict[date_key] = upload_date.strftime('%Y%m%d')
+                except (ValueError, OverflowError, OSError):
+                    pass
 
         # Auto generate title fields corresponding to the *_number fields when missing
         # in order to always have clean titles. This is very common for TV series.
 
@@ -73,6 +73,15 @@ def __init__(self, version, name, value, *args, **kwargs):
 except ImportError:  # Python 2
     import Cookie as compat_cookies
 
+if sys.version_info[0] == 2:
+    class compat_cookies_SimpleCookie(compat_cookies.SimpleCookie):
+        def load(self, rawdata):
+            if isinstance(rawdata, compat_str):
+                rawdata = str(rawdata)
+            return super(compat_cookies_SimpleCookie, self).load(rawdata)
+else:
+    compat_cookies_SimpleCookie = compat_cookies.SimpleCookie
+
 try:
     import html.entities as compat_html_entities
 except ImportError:  # Python 2
@@ -3000,6 +3009,7 @@ def compat_ctypes_WINFUNCTYPE(*args, **kwargs):
     'compat_cookiejar',
     'compat_cookiejar_Cookie',
     'compat_cookies',
+    'compat_cookies_SimpleCookie',
     'compat_ctypes_WINFUNCTYPE',
     'compat_etree_Element',
     'compat_etree_fromstring',
 
@@ -6,25 +6,21 @@
 from .common import InfoExtractor
 from ..utils import (
     determine_ext,
-    js_to_json,
+    int_or_none,
     url_or_none,
 )
 
 
 class APAIE(InfoExtractor):
-    _VALID_URL = r'https?://[^/]+\.apa\.at/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
+    _VALID_URL = r'(?P<base_url>https?://[^/]+\.apa\.at)/embed/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
     _TESTS = [{
         'url': 'http://uvp.apa.at/embed/293f6d17-692a-44e3-9fd5-7b178f3a1029',
         'md5': '2b12292faeb0a7d930c778c7a5b4759b',
         'info_dict': {
-            'id': 'jjv85FdZ',
+            'id': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
             'ext': 'mp4',
-            'title': '"Blau ist mysteriös": Die Blue Man Group im Interview',
-            'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
+            'title': '293f6d17-692a-44e3-9fd5-7b178f3a1029',
             'thumbnail': r're:^https?://.*\.jpg$',
-            'duration': 254,
-            'timestamp': 1519211149,
-            'upload_date': '20180221',
         },
     }, {
         'url': 'https://uvp-apapublisher.sf.apa.at/embed/2f94e9e6-d945-4db2-9548-f9a41ebf7b78',
@@ -46,9 +42,11 @@ def _extract_urls(webpage):
                 webpage)]
 
     def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id, base_url = mobj.group('id', 'base_url')
 
-        webpage = self._download_webpage(url, video_id)
+        webpage = self._download_webpage(
+            '%s/player/%s' % (base_url, video_id), video_id)
 
         jwplatform_id = self._search_regex(
             r'media[iI]d\s*:\s*["\'](?P<id>[a-zA-Z0-9]{8})', webpage,
@@ -59,16 +57,18 @@ def _real_extract(self, url):
                 'jwplatform:' + jwplatform_id, ie='JWPlatform',
                 video_id=video_id)
 
-        sources = self._parse_json(
-            self._search_regex(
-                r'sources\s*=\s*(\[.+?\])\s*;', webpage, 'sources'),
-            video_id, transform_source=js_to_json)
+        def extract(field, name=None):
+            return self._search_regex(
+                r'\b%s["\']\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1' % field,
+                webpage, name or field, default=None, group='value')
+
+        title = extract('title') or video_id
+        description = extract('description')
+        thumbnail = extract('poster', 'thumbnail')
 
         formats = []
-        for source in sources:
-            if not isinstance(source, dict):
-                continue
-            source_url = url_or_none(source.get('file'))
+        for format_id in ('hls', 'progressive'):
+            source_url = url_or_none(extract(format_id))
             if not source_url:
                 continue
             ext = determine_ext(source_url)
@@ -77,18 +77,19 @@ def _real_extract(self, url):
                     source_url, video_id, 'mp4', entry_protocol='m3u8_native',
                     m3u8_id='hls', fatal=False))
             else:
+                height = int_or_none(self._search_regex(
+                    r'(\d+)\.mp4', source_url, 'height', default=None))
                 formats.append({
                     'url': source_url,
+                    'format_id': format_id,
+                    'height': height,
                 })
         self._sort_formats(formats)
 
-        thumbnail = self._search_regex(
-            r'image\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage,
-            'thumbnail', fatal=False, group='url')
-
         return {
             'id': video_id,
-            'title': video_id,
+            'title': title,
+            'description': description,
             'thumbnail': thumbnail,
             'formats': formats,
         }
@@ -42,6 +42,7 @@ def _real_extract(self, url):
         ember_data = self._parse_json(self._search_regex(
             r'id="shoebox-ember-data-store"[^>]*>\s*({.+?})\s*<',
             webpage, 'ember data'), episode_id)
+        ember_data = ember_data.get(episode_id) or ember_data
         episode = ember_data['data']['attributes']
         description = episode.get('description') or {}
 
 
@@ -284,20 +284,42 @@ def _real_extract(self, url):
 
         formats = []
         for a in video_node.findall('.//asset'):
+            file_name = xpath_text(a, './fileName', default=None)
+            if not file_name:
+                continue
+            format_type = a.attrib.get('type')
+            format_url = url_or_none(file_name)
+            if format_url:
+                ext = determine_ext(file_name)
+                if ext == 'm3u8':
+                    formats.extend(self._extract_m3u8_formats(
+                        format_url, display_id, 'mp4', entry_protocol='m3u8_native',
+                        m3u8_id=format_type or 'hls', fatal=False))
+                    continue
+                elif ext == 'f4m':
+                    formats.extend(self._extract_f4m_formats(
+                        update_url_query(format_url, {'hdcore': '3.7.0'}),
+                        display_id, f4m_id=format_type or 'hds', fatal=False))
+                    continue
             f = {
-                'format_id': a.attrib['type'],
-                'width': int_or_none(a.find('./frameWidth').text),
-                'height': int_or_none(a.find('./frameHeight').text),
-                'vbr': int_or_none(a.find('./bitrateVideo').text),
-                'abr': int_or_none(a.find('./bitrateAudio').text),
-                'vcodec': a.find('./codecVideo').text,
-                'tbr': int_or_none(a.find('./totalBitrate').text),
+                'format_id': format_type,
+                'width': int_or_none(xpath_text(a, './frameWidth')),
+                'height': int_or_none(xpath_text(a, './frameHeight')),
+                'vbr': int_or_none(xpath_text(a, './bitrateVideo')),
+                'abr': int_or_none(xpath_text(a, './bitrateAudio')),
+                'vcodec': xpath_text(a, './codecVideo'),
+                'tbr': int_or_none(xpath_text(a, './totalBitrate')),
             }
-            if a.find('./serverPrefix').text:
-                f['url'] = a.find('./serverPrefix').text
-                f['playpath'] = a.find('./fileName').text
+            server_prefix = xpath_text(a, './serverPrefix', default=None)
+            if server_prefix:
+                f.update({
+                    'url': server_prefix,
+                    'playpath': file_name,
+                })
             else:
-                f['url'] = a.find('./fileName').text
+                if not format_url:
+                    continue
+                f['url'] = format_url
             formats.append(f)
         self._sort_formats(formats)
 
@@ -313,7 +335,7 @@ def _real_extract(self, url):
 
 
 class ARDBetaMediathekIE(ARDMediathekBaseIE):
-    _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?P<client>[^/]+)/(?:player|live|video)/(?P<display_id>(?:[^/]+/)*)(?P<video_id>[a-zA-Z0-9]+)'
+    _VALID_URL = r'https://(?:(?:beta|www)\.)?ardmediathek\.de/(?:[^/]+/)?(?:player|live|video)/(?:[^/]+/)*(?P<id>Y3JpZDovL[a-zA-Z0-9]+)'
     _TESTS = [{
         'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/',
         'md5': 'a1dc75a39c61601b980648f7c9f9f71d',
@@ -343,22 +365,22 @@ class ARDBetaMediathekIE(ARDMediathekBaseIE):
     }, {
         'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg',
         'only_matching': True,
+    }, {
+        'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet',
+        'only_matching': True,
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('video_id')
-        display_id = mobj.group('display_id')
-        if display_id:
-            display_id = display_id.rstrip('/')
-        if not display_id:
-            display_id = video_id
+        video_id = self._match_id(url)
 
         player_page = self._download_json(
             'https://api.ardmediathek.de/public-gateway',
-            display_id, data=json.dumps({
+            video_id, data=json.dumps({
                 'query': '''{
-  playerPage(client:"%s", clipId: "%s") {
+  playerPage(client: "ard", clipId: "%s") {
     blockedByFsk
     broadcastedOn
     maturityContentRating
@@ -388,7 +410,7 @@ def _real_extract(self, url):
       }
     }
   }
-}''' % (mobj.group('client'), video_id),
+}''' % video_id,
             }).encode(), headers={
                 'Content-Type': 'application/json'
             })['data']['playerPage']
@@ -413,7 +435,6 @@ def _real_extract(self, url):
                 r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None))
         info.update({
             'age_limit': age_limit,
-            'display_id': display_id,
             'title': title,
             'description': description,
             'timestamp': unified_timestamp(player_page.get('broadcastedOn')),
 
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+from ..compat import (
+    compat_parse_qs,
+    compat_urllib_parse_urlparse,
+)
+from ..utils import (
+    float_or_none,
+    int_or_none,
+    parse_iso8601,
+    remove_start,
+)
+
+
+class ArnesIE(InfoExtractor):
+    IE_NAME = 'video.arnes.si'
+    IE_DESC = 'Arnes Video'
+    _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
+    _TESTS = [{
+        'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
+        'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
+        'info_dict': {
+            'id': 'a1qrWTOQfVoU',
+            'ext': 'mp4',
+            'title': 'Linearna neodvisnost, definicija',
+            'description': 'Linearna neodvisnost, definicija',
+            'license': 'PRIVATE',
+            'creator': 'Polona Oblak',
+            'timestamp': 1585063725,
+            'upload_date': '20200324',
+            'channel': 'Polona Oblak',
+            'channel_id': 'q6pc04hw24cj',
+            'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
+            'duration': 596.75,
+            'view_count': int,
+            'tags': ['linearna_algebra'],
+            'start_time': 10,
+        }
+    }, {
+        'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
+        'only_matching': True,
+    }, {
+        'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
+        'only_matching': True,
+    }]
+    _BASE_URL = 'https://video.arnes.si'
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        video = self._download_json(
+            self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
+        title = video['title']
+
+        formats = []
+        for media in (video.get('media') or []):
+            media_url = media.get('url')
+            if not media_url:
+                continue
+            formats.append({
+                'url': self._BASE_URL + media_url,
+                'format_id': remove_start(media.get('format'), 'FORMAT_'),
+                'format_note': media.get('formatTranslation'),
+                'width': int_or_none(media.get('width')),
+                'height': int_or_none(media.get('height')),
+            })
+        self._sort_formats(formats)
+
+        channel = video.get('channel') or {}
+        channel_id = channel.get('url')
+        thumbnail = video.get('thumbnailUrl')
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'thumbnail': self._BASE_URL + thumbnail,
+            'description': video.get('description'),
+            'license': video.get('license'),
+            'creator': video.get('author'),
+            'timestamp': parse_iso8601(video.get('creationTime')),
+            'channel': channel.get('name'),
+            'channel_id': channel_id,
+            'channel_url': self._BASE_URL + '/?channel=' + channel_id if channel_id else None,
+            'duration': float_or_none(video.get('duration'), 1000),
+            'view_count': int_or_none(video.get('views')),
+            'tags': video.get('hashtags'),
+            'start_time': int_or_none(compat_parse_qs(
+                compat_urllib_parse_urlparse(url).query).get('t', [None])[0]),
+        }