Update to 2020.12.29

5 years ago · 262db5f1ce
parent 8bff4f84b5
commit 262db5f1ce
16 changed files with 212 additions and 169 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -322,7 +322,6 @@
 - **Funk**
 - **Fusion**
 - **Fux**
 - **FXNetworks**
 - **Gaia**
 - **GameInformer**
 - **GameSpot**
--- a/youtube_dlc/extractor/amcnetworks.py
+++ b/youtube_dlc/extractor/amcnetworks.py
@ -80,7 +80,8 @@ class AMCNetworksIE(ThePlatformIE):
        title = theplatform_metadata['title']
        rating = try_get(
            theplatform_metadata, lambda x: x['ratings'][0]['rating'])
-        if properties.get('videoCategory') == 'TVE-Auth':
+        video_category = properties.get('videoCategory')
        if video_category and video_category.endswith('-Auth'):
            resource = self._get_mvpd_resource(
                requestor_id, title, video_id, rating)
            query['auth'] = self._extract_mvpd_auth(
--- a/youtube_dlc/extractor/aparat.py
+++ b/youtube_dlc/extractor/aparat.py
@ -3,6 +3,7 @@ from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import (
    get_element_by_id,
    int_or_none,
    merge_dicts,
    mimetype2ext,
@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
        webpage = self._download_webpage(url, video_id, fatal=False)
        if not webpage:
            # Note: There is an easier-to-parse configuration at
            # http://www.aparat.com/video/video/config/videohash/%video_id
            # but the URL in there does not work
            webpage = self._download_webpage(
                'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
                video_id)
-        options = self._parse_json(
+        options = self._parse_json(self._search_regex(
-            self._search_regex(
+            r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
                r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
                webpage, 'options', group='value'),
            video_id)
        player = options['plugins']['sabaPlayerPlugin']
        formats = []
-        for sources in player['multiSRC']:
+        for sources in (options.get('multiSRC') or []):
            for item in sources:
                if not isinstance(item, dict):
                    continue
@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
        info = self._search_json_ld(webpage, video_id, default={})
        if not info.get('title'):
-            info['title'] = player['title']
+            info['title'] = get_element_by_id('videoTitle', webpage) or \
                self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
        return merge_dicts(info, {
            'id': video_id,
            'thumbnail': url_or_none(options.get('poster')),
-            'duration': int_or_none(player.get('duration')),
+            'duration': int_or_none(options.get('duration')),
            'formats': formats,
        })
--- a/youtube_dlc/extractor/brightcove.py
+++ b/youtube_dlc/extractor/brightcove.py
@ -543,15 +543,6 @@ class BrightcoveNewIE(AdobePassIE):
        if sources_num == key_systems_present:
            raise ExtractorError('This video is DRM protected', expected=True)
        if not formats:
            # for sonyliv.com DRM protected videos
            s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
            if s3_source_url:
                formats.append({
                    'url': s3_source_url,
                    'format_id': 'source',
                })
        errors = json_data.get('errors')
        if not formats and errors:
            error = errors[0]
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@ -415,7 +415,6 @@ from .fujitv import FujiTVFODPlus7IE
 from .funimation import FunimationIE
 from .funk import FunkIE
 from .fusion import FusionIE
 from .fxnetworks import FXNetworksIE
 from .gaia import GaiaIE
 from .gameinformer import GameInformerIE
 from .gamespot import GameSpotIE
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@ -64,7 +64,10 @@ from .tube8 import Tube8IE
 from .mofosex import MofosexEmbedIE
 from .spankwire import SpankwireIE
 from .youporn import YouPornIE
-from .vimeo import VimeoIE
+from .vimeo import (
    VimeoIE,
    VHXEmbedIE,
 )
 from .dailymotion import DailymotionIE
 from .dailymail import DailyMailIE
 from .onionstudios import OnionStudiosIE
@ -2191,7 +2194,18 @@ class GenericIE(InfoExtractor):
        #     'params': {
        #         'force_generic_extractor': True,
        #     },
-        # }
+        # },
        {
            # VHX Embed
            'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
            'info_dict': {
                'id': '858208',
                'ext': 'mp4',
                'title': 'Untitled',
                'uploader_id': 'user80538407',
                'uploader': 'OTT Videos',
            },
        },
    ]
    def report_following_redirect(self, new_url):
@ -2569,6 +2583,10 @@ class GenericIE(InfoExtractor):
        if vimeo_urls:
            return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
        vhx_url = VHXEmbedIE._extract_url(webpage)
        if vhx_url:
            return self.url_result(vhx_url, VHXEmbedIE.ie_key())
        vid_me_embed_url = self._search_regex(
            r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
            webpage, 'vid.me embed', default=None)
--- a/youtube_dlc/extractor/go.py
+++ b/youtube_dlc/extractor/go.py
@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
        'disneynow': {
            'brand': '011',
            'resource_id': 'Disney',
-        }
+        },
        'fxnow.fxnetworks': {
            'brand': '025',
            'requestor_id': 'dtci',
        },
    }
    _VALID_URL = r'''(?x)
                    https?://
                        (?:
                            (?:(?P<sub_domain>%s)\.)?go|
-                            (?P<sub_domain_2>abc|freeform|disneynow)
+                            (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
                        )\.com/
                        (?:
                            (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
        'info_dict': {
            'id': 'VDKA12782841',
            'ext': 'mp4',
            'title': 'First Look: Better Things - Season 2',
            'description': 'md5:fa73584a95761c605d9d54904e35b407',
        },
        'params': {
            'geo_bypass_ip_block': '3.244.239.0/24',
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
        'only_matching': True,
--- a/youtube_dlc/extractor/mitele.py
+++ b/youtube_dlc/extractor/mitele.py
@ -2,15 +2,14 @@
 from __future__ import unicode_literals
 import json
-from .common import InfoExtractor
+from .telecinco import TelecincoIE
 from ..utils import (
    int_or_none,
    parse_iso8601,
    smuggle_url,
 )
-class MiTeleIE(InfoExtractor):
+class MiTeleIE(TelecincoIE):
    IE_DESC = 'mitele.es'
    _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
@ -53,7 +52,7 @@ class MiTeleIE(InfoExtractor):
        },
        'params': {
            'skip_download': True,
-        }
+        },
    }, {
        'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
        'only_matching': True,
@ -69,13 +68,11 @@ class MiTeleIE(InfoExtractor):
            r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
            webpage, 'Pre Player'), display_id)['prePlayer']
        title = pre_player['title']
-        video = pre_player['video']
+        video_info = self._parse_content(pre_player['video'], url)
        video_id = video['dataMediaId']
        content = pre_player.get('content') or {}
        info = content.get('info') or {}
-        info = {
+        video_info.update({
            'id': video_id,
            'title': title,
            'description': info.get('synopsis'),
            'series': content.get('title'),
@ -83,38 +80,7 @@ class MiTeleIE(InfoExtractor):
            'episode': content.get('subtitle'),
            'episode_number': int_or_none(info.get('episode_number')),
            'duration': int_or_none(info.get('duration')),
            'thumbnail': video.get('dataPoster'),
            'age_limit': int_or_none(info.get('rating')),
            'timestamp': parse_iso8601(pre_player.get('publishedTime')),
-        }
+        })
-
+        return video_info
        if video.get('dataCmsId') == 'ooyala':
            info.update({
                '_type': 'url_transparent',
                # for some reason only HLS is supported
                'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
            })
        else:
            config = self._download_json(
                video['dataConfig'], video_id, 'Downloading config JSON')
            services = config['services']
            gbx = self._download_json(
                services['gbx'], video_id, 'Downloading gbx JSON')
            caronte = self._download_json(
                services['caronte'], video_id, 'Downloading caronte JSON')
            cerbero = self._download_json(
                caronte['cerbero'], video_id, 'Downloading cerbero JSON',
                headers={
                    'Content-Type': 'application/json;charset=UTF-8',
                    'Origin': 'https://www.mitele.es'
                },
                data=json.dumps({
                    'bbx': caronte['bbx'],
                    'gbx': gbx['gbx']
                }).encode('utf-8'))
            formats = self._extract_m3u8_formats(
                caronte['dls'][0]['stream'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
                query=dict([cerbero['tokens']['1']['cdn'].split('=', 1)]))
            info['formats'] = formats
        return info
--- a/youtube_dlc/extractor/nhk.py
+++ b/youtube_dlc/extractor/nhk.py
@ -90,7 +90,7 @@ class NhkVodIE(NhkBaseIE):
    _TESTS = [{
        # video clip
        'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
-        'md5': '256a1be14f48d960a7e61e2532d95ec3',
+        'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
        'info_dict': {
            'id': 'a95j5iza',
            'ext': 'mp4',
--- a/youtube_dlc/extractor/piksel.py
+++ b/youtube_dlc/extractor/piksel.py
@ -6,16 +6,33 @@ import re
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    ExtractorError,
    dict_get,
    ExtractorError,
    int_or_none,
    unescapeHTML,
    parse_iso8601,
    try_get,
    unescapeHTML,
 )
 class PikselIE(InfoExtractor):
-    _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)'
+    _VALID_URL = r'''(?x)https?://
        (?:
            (?:
                player\.
                    (?:
                        olympusattelecom|
                        vibebyvista
                    )|
                (?:api|player)\.multicastmedia|
                (?:api-ovp|player)\.piksel
            )\.com|
            (?:
                mz-edge\.stream\.co|
                movie-s\.nhk\.or
            )\.jp|
            vidego\.baltimorecity\.gov
        )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
    _TESTS = [
        {
            'url': 'http://player.piksel.com/v/ums2867l',
@ -56,46 +73,41 @@ class PikselIE(InfoExtractor):
        if mobj:
            return mobj.group('url')
    def _call_api(self, app_token, resource, display_id, query, fatal=True):
        response = (self._download_json(
            'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
            display_id, query=query, fatal=fatal) or {}).get('response')
        failure = try_get(response, lambda x: x['failure']['reason'])
        if failure:
            if fatal:
                raise ExtractorError(failure, expected=True)
            self.report_warning(failure)
        return response
    def _real_extract(self, url):
-        display_id = self._match_id(url)
+        ref_id, display_id = re.match(self._VALID_URL, url).groups()
        webpage = self._download_webpage(url, display_id)
        video_id = self._search_regex(
            r'data-de-program-uuid=[\'"]([a-z0-9]+)',
            webpage, 'program uuid', default=display_id)
        app_token = self._search_regex([
            r'clientAPI\s*:\s*"([^"]+)"',
            r'data-de-api-key\s*=\s*"([^"]+)"'
        ], webpage, 'app token')
-        response = self._download_json(
+        query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
-            'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token,
+        program = self._call_api(
-            video_id, query={
+            app_token, 'program', display_id, query)['WsProgramResponse']['program']
-                'v': video_id
+        video_id = program['uuid']
-            })['response']
+        video_data = program['asset']
        failure = response.get('failure')
        if failure:
            raise ExtractorError(response['failure']['reason'], expected=True)
        video_data = response['WsProgramResponse']['program']['asset']
        title = video_data['title']
        asset_type = dict_get(video_data, ['assetType', 'asset_type'])
        formats = []
-        m3u8_url = dict_get(video_data, [
+        def process_asset_file(asset_file):
-            'm3u8iPadURL',
+            if not asset_file:
-            'ipadM3u8Url',
+                return
            'm3u8AndroidURL',
            'm3u8iPhoneURL',
            'iphoneM3u8Url'])
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False))
        asset_type = dict_get(video_data, ['assetType', 'asset_type'])
        for asset_file in video_data.get('assetFiles', []):
            # TODO: extract rtmp formats
            http_url = asset_file.get('http_url')
            if not http_url:
-                continue
+                return
            tbr = None
            vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
            abr = int_or_none(asset_file.get('audioBitrate'), 1024)
@ -118,6 +130,43 @@ class PikselIE(InfoExtractor):
                'filesize': int_or_none(asset_file.get('filesize')),
                'tbr': tbr,
            })
        def process_asset_files(asset_files):
            for asset_file in (asset_files or []):
                process_asset_file(asset_file)
        process_asset_files(video_data.get('assetFiles'))
        process_asset_file(video_data.get('referenceFile'))
        if not formats:
            asset_id = video_data.get('assetid') or program.get('assetid')
            if asset_id:
                process_asset_files(try_get(self._call_api(
                    app_token, 'asset_file', display_id, {
                        'assetid': asset_id,
                    }, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
        m3u8_url = dict_get(video_data, [
            'm3u8iPadURL',
            'ipadM3u8Url',
            'm3u8AndroidURL',
            'm3u8iPhoneURL',
            'iphoneM3u8Url'])
        if m3u8_url:
            formats.extend(self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native',
                m3u8_id='hls', fatal=False))
        smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
        if smil_url:
            transform_source = None
            if ref_id == 'nhkworld':
                # TODO: figure out if this is something to be fixed in urljoin,
                # _parse_smil_formats or keep it here
                transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
            formats.extend(self._extract_smil_formats(
                re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
                transform_source=transform_source, fatal=False))
        self._sort_formats(formats)
        subtitles = {}
--- a/youtube_dlc/extractor/teachable.py
+++ b/youtube_dlc/extractor/teachable.py
@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE):
    @staticmethod
    def _is_teachable(webpage):
        return 'teachableTracker.linker:autoLink' in webpage and re.search(
-            r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com',
+            r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
            webpage)
    @staticmethod
--- a/youtube_dlc/extractor/telecinco.py
+++ b/youtube_dlc/extractor/telecinco.py
@ -5,14 +5,11 @@ import json
 import re
 from .common import InfoExtractor
 from .ooyala import OoyalaIE
 from ..utils import (
    clean_html,
    determine_ext,
    int_or_none,
    str_or_none,
    try_get,
    urljoin,
 )
@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor):
            'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
        },
        'playlist': [{
-            'md5': 'adb28c37238b675dad0f042292f209a7',
+            'md5': '7ee56d665cfd241c0e6d80fd175068b0',
            'info_dict': {
                'id': 'JEA5ijCnF6p5W08A1rNKn7',
                'ext': 'mp4',
@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor):
        }]
    }, {
        'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
-        'md5': '9468140ebc300fbb8b9d65dc6e5c4b43',
+        'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
        'info_dict': {
            'id': 'jn24Od1zGLG4XUZcnUnZB6',
            'ext': 'mp4',
@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor):
        },
    }, {
        'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
-        'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6',
+        'md5': 'eddb50291df704ce23c74821b995bcac',
        'info_dict': {
            'id': 'aywerkD2Sv1vGNqq9b85Q2',
            'ext': 'mp4',
@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor):
    def _parse_content(self, content, url):
        video_id = content['dataMediaId']
        if content.get('dataCmsId') == 'ooyala':
            return self.url_result(
                'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
        config_url = urljoin(url, content['dataConfig'])
        config = self._download_json(
-            config_url, video_id, 'Downloading config JSON')
+            content['dataConfig'], video_id, 'Downloading config JSON')
        title = config['info']['title']
-
+        services = config['services']
-        def mmc_url(mmc_type):
+        caronte = self._download_json(services['caronte'], video_id)
-            return re.sub(
+        stream = caronte['dls'][0]['stream']
-                r'/(?:flash|html5)\.json', '/%s.json' % mmc_type,
+        headers = self.geo_verification_headers()
-                config['services']['mmc'])
+        headers.update({
-
+            'Content-Type': 'application/json;charset=UTF-8',
-        duration = None
+            'Origin': re.match(r'https?://[^/]+', url).group(0),
-        formats = []
+        })
-        for mmc_type in ('flash', 'html5'):
+        cdn = self._download_json(
-            mmc = self._download_json(
+            caronte['cerbero'], video_id, data=json.dumps({
-                mmc_url(mmc_type), video_id,
+                'bbx': caronte['bbx'],
-                'Downloading %s mmc JSON' % mmc_type, fatal=False)
+                'gbx': self._download_json(services['gbx'], video_id)['gbx'],
-            if not mmc:
+            }).encode(), headers=headers)['tokens']['1']['cdn']
-                continue
+        formats = self._extract_m3u8_formats(
-            if not duration:
+            stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
                duration = int_or_none(mmc.get('duration'))
            for location in mmc['locations']:
                gat = self._proto_relative_url(location.get('gat'), 'http:')
                gcp = location.get('gcp')
                ogn = location.get('ogn')
                if None in (gat, gcp, ogn):
                    continue
                token_data = {
                    'gcp': gcp,
                    'ogn': ogn,
                    'sta': 0,
                }
                media = self._download_json(
                    gat, video_id, data=json.dumps(token_data).encode('utf-8'),
                    headers={
                        'Content-Type': 'application/json;charset=utf-8',
                        'Referer': url,
                    }, fatal=False) or {}
                stream = media.get('stream') or media.get('file')
                if not stream:
                    continue
                ext = determine_ext(stream)
                if ext == 'f4m':
                    formats.extend(self._extract_f4m_formats(
                        stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
                        video_id, f4m_id='hds', fatal=False))
                elif ext == 'm3u8':
                    formats.extend(self._extract_m3u8_formats(
                        stream, video_id, 'mp4', 'm3u8_native',
                        m3u8_id='hls', fatal=False))
        self._sort_formats(formats)
        return {
@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor):
            'title': title,
            'formats': formats,
            'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
-            'duration': duration,
+            'duration': int_or_none(content.get('dataDuration')),
        }
    def _real_extract(self, url):
--- a/youtube_dlc/extractor/toggle.py
+++ b/youtube_dlc/extractor/toggle.py
@ -200,7 +200,7 @@ class ToggleIE(InfoExtractor):
 class MeWatchIE(InfoExtractor):
    IE_NAME = 'mewatch'
-    _VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
+    _VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
    _TESTS = [{
        'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
        'info_dict': {
@ -220,6 +220,9 @@ class MeWatchIE(InfoExtractor):
    }, {
        'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
        'only_matching': True,
    }, {
        'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
        'only_matching': True,
    }]
    def _real_extract(self, url):
--- a/youtube_dlc/extractor/vimeo.py
+++ b/youtube_dlc/extractor/vimeo.py
@ -1119,6 +1119,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
    IE_NAME = 'vhx:embed'
    _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
    @staticmethod
    def _extract_url(webpage):
        mobj = re.search(
            r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
        return unescapeHTML(mobj.group(1)) if mobj else None
    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
@ -1127,5 +1133,6 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
            'ott data'), video_id, js_to_json)['config_url']
        config = self._download_json(config_url, video_id)
        info = self._parse_config(config, video_id)
        info['id'] = video_id
        self._vimeo_sort_formats(info['formats'])
        return info
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
 from ..swfinterp import SWFInterpreter
 from ..compat import (
    compat_chr,
    compat_HTTPError,
    compat_kwargs,
    compat_parse_qs,
    compat_urllib_parse_unquote,
@ -64,7 +65,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
    _RESERVED_NAMES = (
-        r'embed|e|channel|c|user|playlist|watch|w|v|results|shared|'
+        r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
        r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
        r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
@ -303,6 +304,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
    _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
    _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
    def _call_api(self, ep, query, video_id):
        data = self._DEFAULT_API_DATA.copy()
@ -320,7 +322,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    def _extract_yt_initial_data(self, video_id, webpage):
        return self._parse_json(
            self._search_regex(
-                (r'%s\s*\n' % self._YT_INITIAL_DATA_RE,
+                (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
                 self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
            video_id)
@ -345,7 +347,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
                            (?:(?:www|dev)\.)?invidio\.us/|
                            (?:(?:www|no)\.)?invidiou\.sh/|
-                            (?:(?:www|fi|de)\.)?invidious\.snopyta\.org/|
+                            (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
                            (?:www\.)?invidious\.kabi\.tk/|
                            (?:www\.)?invidious\.13ad\.de/|
                            (?:www\.)?invidious\.mastodon\.host/|
@ -1120,6 +1122,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                'skip_download': True,
            },
        },
        {
            # another example of '};' in ytInitialData
            'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
            'only_matching': True,
        },
        {
            'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
            'only_matching': True,
        },
    ]
    def __init__(self, *args, **kwargs):
@ -1779,7 +1790,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
        if not video_info and not player_response:
            player_response = extract_player_response(
                self._search_regex(
-                    (r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE,
+                    (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
                     self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
                    'initial player response', default='{}'),
                video_id)
@ -2830,6 +2841,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
        'only_matching': True,
    }]
    @classmethod
    def suitable(cls, url):
        return False if YoutubeIE.suitable(url) else super(
            YoutubeTabIE, cls).suitable(url)
    def _extract_channel_id(self, webpage):
        channel_id = self._html_search_meta(
            'channelId', webpage, 'channel id', default=None)
@ -3143,10 +3159,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
        for page_num in itertools.count(1):
            if not continuation:
                break
-            browse = self._download_json(
+            count = 0
-                'https://www.youtube.com/browse_ajax', None,
+            retries = 3
-                'Downloading page %d' % page_num,
+            while count <= retries:
-                headers=headers, query=continuation, fatal=False)
+                try:
                    # Downloading page may result in intermittent 5xx HTTP error
                    # that is usually worked around with a retry
                    browse = self._download_json(
                        'https://www.youtube.com/browse_ajax', None,
                        'Downloading page %d%s'
                        % (page_num, ' (retry #%d)' % count if count else ''),
                        headers=headers, query=continuation)
                    break
                except ExtractorError as e:
                    if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
                        count += 1
                        if count <= retries:
                            continue
                    raise
            if not browse:
                break
            response = try_get(browse, lambda x: x[1]['response'], dict)
--- a/youtube_dlc/extractor/zype.py
+++ b/youtube_dlc/extractor/zype.py
@ -85,7 +85,13 @@ class ZypeIE(InfoExtractor):
        else:
            m3u8_url = self._search_regex(
                r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
-                body, 'm3u8 url', group='url')
+                body, 'm3u8 url', group='url', default=None)
            if not m3u8_url:
                source = self._parse_json(self._search_regex(
                    r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
                    'source'), video_id, js_to_json)
                if source.get('integration') == 'verizon-media':
                    m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
            formats = self._extract_m3u8_formats(
                m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
            text_tracks = self._search_regex(