Update to 2020.12.26

5 years ago · 8bff4f84b5
parent 3221ffac20
commit 8bff4f84b5
14 changed files with 425 additions and 281 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -112,6 +112,7 @@
 - **blinkx**
 - **Bloomberg**
 - **BokeCC**
+ - **BongaCams**
 - **BostonGlobe**
 - **Box**
 - **Bpb**: Bundeszentrale für politische Bildung
@ -146,6 +147,7 @@
 - **CBS**
 - **CBSInteractive**
 - **CBSLocal**
+ - **CBSLocalArticle**
 - **cbsnews**: CBS News
 - **cbsnews:embed**
 - **cbsnews:livevideo**: CBS News Live Videos
@ -198,6 +200,7 @@
 - **CSNNE**
 - **CSpan**: C-SPAN
 - **CtsNews**: 華視新聞
+ - **CTV**
 - **CTVNews**
 - **cu.ntv.co.jp**: Nippon Television Network
 - **Culturebox**
@ -1119,6 +1122,7 @@
 - **WeiboMobile**
 - **WeiqiTV**: WQTV
 - **Wistia**
+ - **WistiaPlaylist**
 - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl
 - **WorldStarHipHop**
 - **WSJ**: Wall Street Journal
--- a/test/test_all_urls.py
+++ b/test/test_all_urls.py
@ -39,7 +39,7 @@ class TestAllURLsMatching(unittest.TestCase):
        assertTab('https://www.youtube.com/embedded')
        assertTab('https://www.youtube.com/feed')  # Own channel's home page
        assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q')
-        assertPlaylist('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
+        assertTab('https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8')
        assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC')
        assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')  # 668
        self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M'))
@ -60,8 +60,8 @@ class TestAllURLsMatching(unittest.TestCase):
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec')
        assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos')

-    # def test_youtube_user_matching(self):
-    #     self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])
+    def test_youtube_user_matching(self):
+        self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab'])

    def test_youtube_feeds(self):
        self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab'])
--- a/youtube_dlc/extractor/bbc.py
+++ b/youtube_dlc/extractor/bbc.py
@ -49,22 +49,17 @@ class BBCCoUkIE(InfoExtractor):
    _LOGIN_URL = 'https://account.bbc.com/signin'
    _NETRC_MACHINE = 'bbc'

-    _MEDIASELECTOR_URLS = [
+    _MEDIA_SELECTOR_URL_TEMPL = 'https://open.live.bbc.co.uk/mediaselector/6/select/version/2.0/mediaset/%s/vpid/%s'
+    _MEDIA_SETS = [
        # Provides HQ HLS streams with even better quality that pc mediaset but fails
        # with geolocation in some cases when it's even not geo restricted at all (e.g.
        # http://www.bbc.co.uk/programmes/b06bp7lf). Also may fail with selectionunavailable.
-        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
-        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/pc/vpid/%s',
+        'iptv-all',
+        'pc',
    ]

-    _MEDIASELECTION_NS = 'http://bbc.co.uk/2008/mp/mediaselection'
    _EMP_PLAYLIST_NS = 'http://bbc.co.uk/2008/emp/playlist'

-    _NAMESPACES = (
-        _MEDIASELECTION_NS,
-        _EMP_PLAYLIST_NS,
-    )
-
    _TESTS = [
        {
            'url': 'http://www.bbc.co.uk/programmes/b039g8p7',
@ -261,8 +256,6 @@ class BBCCoUkIE(InfoExtractor):
            'only_matching': True,
        }]

-    _USP_RE = r'/([^/]+?)\.ism(?:\.hlsv2\.ism)?/[^/]+\.m3u8'
-
    def _login(self):
        username, password = self._get_login_info()
        if username is None:
@ -307,22 +300,14 @@ class BBCCoUkIE(InfoExtractor):
    def _extract_items(self, playlist):
        return playlist.findall('./{%s}item' % self._EMP_PLAYLIST_NS)

-    def _findall_ns(self, element, xpath):
-        elements = []
-        for ns in self._NAMESPACES:
-            elements.extend(element.findall(xpath % ns))
-        return elements
-
    def _extract_medias(self, media_selection):
-        error = media_selection.find('./{%s}error' % self._MEDIASELECTION_NS)
-        if error is None:
-            media_selection.find('./{%s}error' % self._EMP_PLAYLIST_NS)
-        if error is not None:
-            raise BBCCoUkIE.MediaSelectionError(error.get('id'))
-        return self._findall_ns(media_selection, './{%s}media')
+        error = media_selection.get('result')
+        if error:
+            raise BBCCoUkIE.MediaSelectionError(error)
+        return media_selection.get('media') or []

    def _extract_connections(self, media):
-        return self._findall_ns(media, './{%s}connection')
+        return media.get('connection') or []

    def _get_subtitles(self, media, programme_id):
        subtitles = {}
@ -334,13 +319,13 @@ class BBCCoUkIE(InfoExtractor):
                cc_url, programme_id, 'Downloading captions', fatal=False)
            if not isinstance(captions, compat_etree_Element):
                continue
-            lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
-            subtitles[lang] = [
+            subtitles['en'] = [
                {
                    'url': connection.get('href'),
                    'ext': 'ttml',
                },
            ]
+            break
        return subtitles

    def _raise_extractor_error(self, media_selection_error):
@ -350,10 +335,10 @@ class BBCCoUkIE(InfoExtractor):

    def _download_media_selector(self, programme_id):
        last_exception = None
-        for mediaselector_url in self._MEDIASELECTOR_URLS:
+        for media_set in self._MEDIA_SETS:
            try:
                return self._download_media_selector_url(
-                    mediaselector_url % programme_id, programme_id)
+                    self._MEDIA_SELECTOR_URL_TEMPL % (media_set, programme_id), programme_id)
            except BBCCoUkIE.MediaSelectionError as e:
                if e.id in ('notukerror', 'geolocation', 'selectionunavailable'):
                    last_exception = e
@ -362,8 +347,8 @@ class BBCCoUkIE(InfoExtractor):
        self._raise_extractor_error(last_exception)

    def _download_media_selector_url(self, url, programme_id=None):
-        media_selection = self._download_xml(
-            url, programme_id, 'Downloading media selection XML',
+        media_selection = self._download_json(
+            url, programme_id, 'Downloading media selection JSON',
            expected_status=(403, 404))
        return self._process_media_selector(media_selection, programme_id)

@ -377,7 +362,6 @@ class BBCCoUkIE(InfoExtractor):
            if kind in ('video', 'audio'):
                bitrate = int_or_none(media.get('bitrate'))
                encoding = media.get('encoding')
-                service = media.get('service')
                width = int_or_none(media.get('width'))
                height = int_or_none(media.get('height'))
                file_size = int_or_none(media.get('media_file_size'))
@ -392,8 +376,6 @@ class BBCCoUkIE(InfoExtractor):
                    supplier = connection.get('supplier')
                    transfer_format = connection.get('transferFormat')
                    format_id = supplier or conn_kind or protocol
-                    if service:
-                        format_id = '%s_%s' % (service, format_id)
                    # ASX playlist
                    if supplier == 'asx':
                        for i, ref in enumerate(self._extract_asx_playlist(connection, programme_id)):
@ -408,20 +390,11 @@ class BBCCoUkIE(InfoExtractor):
                        formats.extend(self._extract_m3u8_formats(
                            href, programme_id, ext='mp4', entry_protocol='m3u8_native',
                            m3u8_id=format_id, fatal=False))
-                        if re.search(self._USP_RE, href):
-                            usp_formats = self._extract_m3u8_formats(
-                                re.sub(self._USP_RE, r'/\1.ism/\1.m3u8', href),
-                                programme_id, ext='mp4', entry_protocol='m3u8_native',
-                                m3u8_id=format_id, fatal=False)
-                            for f in usp_formats:
-                                if f.get('height') and f['height'] > 720:
-                                    continue
-                                formats.append(f)
                    elif transfer_format == 'hds':
                        formats.extend(self._extract_f4m_formats(
                            href, programme_id, f4m_id=format_id, fatal=False))
                    else:
-                        if not service and not supplier and bitrate:
+                        if not supplier and bitrate:
                            format_id += '-%d' % bitrate
                        fmt = {
                            'format_id': format_id,
@ -554,7 +527,7 @@ class BBCCoUkIE(InfoExtractor):
        webpage = self._download_webpage(url, group_id, 'Downloading video page')

        error = self._search_regex(
-            r'<div\b[^>]+\bclass=["\']smp__message delta["\'][^>]*>([^<]+)<',
+            r'<div\b[^>]+\bclass=["\'](?:smp|playout)__message delta["\'][^>]*>\s*([^<]+?)\s*<',
            webpage, 'error', default=None)
        if error:
            raise ExtractorError(error, expected=True)
@ -607,16 +580,9 @@ class BBCIE(BBCCoUkIE):
    IE_DESC = 'BBC'
    _VALID_URL = r'https?://(?:www\.)?bbc\.(?:com|co\.uk)/(?:[^/]+/)+(?P<id>[^/#?]+)'

-    _MEDIASELECTOR_URLS = [
-        # Provides HQ HLS streams but fails with geolocation in some cases when it's
-        # even not geo restricted at all
-        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/iptv-all/vpid/%s',
-        # Provides more formats, namely direct mp4 links, but fails on some videos with
-        # notukerror for non UK (?) users (e.g.
-        # http://www.bbc.com/travel/story/20150625-sri-lankas-spicy-secret)
-        'http://open.live.bbc.co.uk/mediaselector/4/mtis/stream/%s',
-        # Provides fewer formats, but works everywhere for everybody (hopefully)
-        'http://open.live.bbc.co.uk/mediaselector/5/select/version/2.0/mediaset/journalism-pc/vpid/%s',
+    _MEDIA_SETS = [
+        'mobile-tablet-main',
+        'pc',
    ]

    _TESTS = [{
--- a/youtube_dlc/extractor/bongacams.py
+++ b/youtube_dlc/extractor/bongacams.py
@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..compat import compat_str
+from ..utils import (
+    int_or_none,
+    try_get,
+    urlencode_postdata,
+)
+
+
+class BongaCamsIE(InfoExtractor):
+    _VALID_URL = r'https?://(?P<host>(?:[^/]+\.)?bongacams\d*\.com)/(?P<id>[^/?&#]+)'
+    _TESTS = [{
+        'url': 'https://de.bongacams.com/azumi-8',
+        'only_matching': True,
+    }, {
+        'url': 'https://cn.bongacams.com/azumi-8',
+        'only_matching': True,
+    }]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        host = mobj.group('host')
+        channel_id = mobj.group('id')
+
+        amf = self._download_json(
+            'https://%s/tools/amf.php' % host, channel_id,
+            data=urlencode_postdata((
+                ('method', 'getRoomData'),
+                ('args[]', channel_id),
+                ('args[]', 'false'),
+            )), headers={'X-Requested-With': 'XMLHttpRequest'})
+
+        server_url = amf['localData']['videoServerUrl']
+
+        uploader_id = try_get(
+            amf, lambda x: x['performerData']['username'], compat_str) or channel_id
+        uploader = try_get(
+            amf, lambda x: x['performerData']['displayName'], compat_str)
+        like_count = int_or_none(try_get(
+            amf, lambda x: x['performerData']['loversCount']))
+
+        formats = self._extract_m3u8_formats(
+            '%s/hls/stream_%s/playlist.m3u8' % (server_url, uploader_id),
+            channel_id, 'mp4', m3u8_id='hls', live=True)
+        self._sort_formats(formats)
+
+        return {
+            'id': channel_id,
+            'title': self._live_title(uploader or uploader_id),
+            'uploader': uploader,
+            'uploader_id': uploader_id,
+            'like_count': like_count,
+            'age_limit': 18,
+            'is_live': True,
+            'formats': formats,
+        }
--- a/youtube_dlc/extractor/extractors.py
+++ b/youtube_dlc/extractor/extractors.py
@ -127,6 +127,7 @@ from .bleacherreport import (
 from .blinkx import BlinkxIE
 from .bloomberg import BloombergIE
 from .bokecc import BokeCCIE
+from .bongacams import BongaCamsIE
 from .bostonglobe import BostonGlobeIE
 from .box import BoxIE
 from .bpb import BpbIE
@ -1492,7 +1493,10 @@ from .weibo import (
    WeiboMobileIE
 )
 from .weiqitv import WeiqiTVIE
-from .wistia import WistiaIE
+from .wistia import (
+    WistiaIE,
+    WistiaPlaylistIE,
+)
 from .worldstarhiphop import WorldStarHipHopIE
 from .wsj import (
    WSJIE,
--- a/youtube_dlc/extractor/generic.py
+++ b/youtube_dlc/extractor/generic.py
@ -2022,22 +2022,6 @@ class GenericIE(InfoExtractor):
            },
            'add_ie': [SpringboardPlatformIE.ie_key()],
        },
-        {
-            'url': 'https://www.youtube.com/shared?ci=1nEzmT-M4fU',
-            'info_dict': {
-                'id': 'uPDB5I9wfp8',
-                'ext': 'webm',
-                'title': 'Pocoyo: 90 minutos de episódios completos Português para crianças - PARTE 3',
-                'description': 'md5:d9e4d9346a2dfff4c7dc4c8cec0f546d',
-                'upload_date': '20160219',
-                'uploader': 'Pocoyo - Português (BR)',
-                'uploader_id': 'PocoyoBrazil',
-            },
-            'add_ie': [YoutubeIE.ie_key()],
-            'params': {
-                'skip_download': True,
-            },
-        },
        {
            'url': 'https://www.yapfiles.ru/show/1872528/690b05d3054d2dbe1e69523aa21bb3b1.mp4.html',
            'info_dict': {
--- a/youtube_dlc/extractor/instagram.py
+++ b/youtube_dlc/extractor/instagram.py
@ -22,7 +22,7 @@ from ..utils import (


 class InstagramIE(InfoExtractor):
-    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv)/(?P<id>[^/?#&]+))'
+    _VALID_URL = r'(?P<url>https?://(?:www\.)?instagram\.com/(?:p|tv|reel)/(?P<id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://instagram.com/p/aye83DjauH/?foo=bar#abc',
        'md5': '0d2da106a9d2631273e192b372806516',
@ -35,7 +35,7 @@ class InstagramIE(InfoExtractor):
            'timestamp': 1371748545,
            'upload_date': '20130620',
            'uploader_id': 'naomipq',
-            'uploader': 'Naomi Leonor Phan-Quang',
+            'uploader': 'B E A U T Y  F O R  A S H E S',
            'like_count': int,
            'comment_count': int,
            'comments': list,
@ -95,6 +95,9 @@ class InstagramIE(InfoExtractor):
    }, {
        'url': 'https://www.instagram.com/tv/aye83DjauH/',
        'only_matching': True,
+    }, {
+        'url': 'https://www.instagram.com/reel/CDUMkliABpa/',
+        'only_matching': True,
    }]

    @staticmethod
@ -122,81 +125,92 @@ class InstagramIE(InfoExtractor):

        webpage = self._download_webpage(url, video_id)

-        (video_url, description, thumbnail, timestamp, uploader,
+        (media, video_url, description, thumbnail, timestamp, uploader,
         uploader_id, like_count, comment_count, comments, height,
-         width) = [None] * 11
-
-        shared_data = try_get(webpage,
-                              (lambda x: self._parse_json(
-                                  self._search_regex(
-                                      r'window\.__additionalDataLoaded\(\'/(?:p|tv)/(?:[^/?#&]+)/\',({.+?})\);',
-                                      x, 'additional data', default='{}'),
-                                  video_id, fatal=False),
-                               lambda x: self._parse_json(
-                                  self._search_regex(
-                                      r'window\._sharedData\s*=\s*({.+?});',
-                                      x, 'shared data', default='{}'),
-                                  video_id, fatal=False)['entry_data']['PostPage'][0]),
-                              None)
+         width) = [None] * 12
+
+        shared_data = self._parse_json(
+            self._search_regex(
+                r'window\._sharedData\s*=\s*({.+?});',
+                webpage, 'shared data', default='{}'),
+            video_id, fatal=False)
        if shared_data:
            media = try_get(
                shared_data,
-                (lambda x: x['graphql']['shortcode_media'],
-                 lambda x: x['media']),
+                (lambda x: x['entry_data']['PostPage'][0]['graphql']['shortcode_media'],
+                 lambda x: x['entry_data']['PostPage'][0]['media']),
                dict)
-            if media:
-                video_url = media.get('video_url')
-                height = int_or_none(media.get('dimensions', {}).get('height'))
-                width = int_or_none(media.get('dimensions', {}).get('width'))
-                description = try_get(
-                    media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
-                    compat_str) or media.get('caption')
-                thumbnail = media.get('display_src') or media.get('thumbnail_src')
-                timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
-                uploader = media.get('owner', {}).get('full_name')
-                uploader_id = media.get('owner', {}).get('username')
-
-                def get_count(key, kind):
-                    return int_or_none(try_get(
+        # _sharedData.entry_data.PostPage is empty when authenticated (see
+        # https://github.com/ytdl-org/youtube-dl/pull/22880)
+        if not media:
+            additional_data = self._parse_json(
+                self._search_regex(
+                    r'window\.__additionalDataLoaded\s*\(\s*[^,]+,\s*({.+?})\s*\)\s*;',
+                    webpage, 'additional data', default='{}'),
+                video_id, fatal=False)
+            if additional_data:
+                media = try_get(
+                    additional_data, lambda x: x['graphql']['shortcode_media'],
+                    dict)
+        if media:
+            video_url = media.get('video_url')
+            height = int_or_none(media.get('dimensions', {}).get('height'))
+            width = int_or_none(media.get('dimensions', {}).get('width'))
+            description = try_get(
+                media, lambda x: x['edge_media_to_caption']['edges'][0]['node']['text'],
+                compat_str) or media.get('caption')
+            thumbnail = media.get('display_src') or media.get('display_url')
+            timestamp = int_or_none(media.get('taken_at_timestamp') or media.get('date'))
+            uploader = media.get('owner', {}).get('full_name')
+            uploader_id = media.get('owner', {}).get('username')
+
+            def get_count(keys, kind):
+                if not isinstance(keys, (list, tuple)):
+                    keys = [keys]
+                for key in keys:
+                    count = int_or_none(try_get(
                        media, (lambda x: x['edge_media_%s' % key]['count'],
                                lambda x: x['%ss' % kind]['count'])))
-                like_count = get_count('preview_like', 'like')
-                comment_count = get_count('to_comment', 'comment')
-
-                comments = [{
-                    'author': comment.get('user', {}).get('username'),
-                    'author_id': comment.get('user', {}).get('id'),
-                    'id': comment.get('id'),
-                    'text': comment.get('text'),
-                    'timestamp': int_or_none(comment.get('created_at')),
-                } for comment in media.get(
-                    'comments', {}).get('nodes', []) if comment.get('text')]
-                if not video_url:
-                    edges = try_get(
-                        media, lambda x: x['edge_sidecar_to_children']['edges'],
-                        list) or []
-                    if edges:
-                        entries = []
-                        for edge_num, edge in enumerate(edges, start=1):
-                            node = try_get(edge, lambda x: x['node'], dict)
-                            if not node:
-                                continue
-                            node_video_url = url_or_none(node.get('video_url'))
-                            if not node_video_url:
-                                continue
-                            entries.append({
-                                'id': node.get('shortcode') or node['id'],
-                                'title': 'Video %d' % edge_num,
-                                'url': node_video_url,
-                                'thumbnail': node.get('display_url'),
-                                'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
-                                'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
-                                'view_count': int_or_none(node.get('video_view_count')),
-                            })
-                        return self.playlist_result(
-                            entries, video_id,
-                            'Post by %s' % uploader_id if uploader_id else None,
-                            description)
+                    if count is not None:
+                        return count
+            like_count = get_count('preview_like', 'like')
+            comment_count = get_count(
+                ('preview_comment', 'to_comment', 'to_parent_comment'), 'comment')
+
+            comments = [{
+                'author': comment.get('user', {}).get('username'),
+                'author_id': comment.get('user', {}).get('id'),
+                'id': comment.get('id'),
+                'text': comment.get('text'),
+                'timestamp': int_or_none(comment.get('created_at')),
+            } for comment in media.get(
+                'comments', {}).get('nodes', []) if comment.get('text')]
+            if not video_url:
+                edges = try_get(
+                    media, lambda x: x['edge_sidecar_to_children']['edges'],
+                    list) or []
+                if edges:
+                    entries = []
+                    for edge_num, edge in enumerate(edges, start=1):
+                        node = try_get(edge, lambda x: x['node'], dict)
+                        if not node:
+                            continue
+                        node_video_url = url_or_none(node.get('video_url'))
+                        if not node_video_url:
+                            continue
+                        entries.append({
+                            'id': node.get('shortcode') or node['id'],
+                            'title': 'Video %d' % edge_num,
+                            'url': node_video_url,
+                            'thumbnail': node.get('display_url'),
+                            'width': int_or_none(try_get(node, lambda x: x['dimensions']['width'])),
+                            'height': int_or_none(try_get(node, lambda x: x['dimensions']['height'])),
+                            'view_count': int_or_none(node.get('video_view_count')),
+                        })
+                    return self.playlist_result(
+                        entries, video_id,
+                        'Post by %s' % uploader_id if uploader_id else None,
+                        description)

        if not video_url:
            video_url = self._og_search_video_url(webpage, secure=False)
--- a/youtube_dlc/extractor/pornhub.py
+++ b/youtube_dlc/extractor/pornhub.py
@ -288,14 +288,24 @@ class PornHubIE(PornHubBaseIE):
            video_urls.append((v_url, None))
            video_urls_set.add(v_url)

+        def parse_quality_items(quality_items):
+            q_items = self._parse_json(quality_items, video_id, fatal=False)
+            if not isinstance(q_items, list):
+                return
+            for item in q_items:
+                if isinstance(item, dict):
+                    add_video_url(item.get('url'))
+
        if not video_urls:
-            FORMAT_PREFIXES = ('media', 'quality')
+            FORMAT_PREFIXES = ('media', 'quality', 'qualityItems')
            js_vars = extract_js_vars(
                webpage, r'(var\s+(?:%s)_.+)' % '|'.join(FORMAT_PREFIXES),
                default=None)
            if js_vars:
                for key, format_url in js_vars.items():
-                    if any(key.startswith(p) for p in FORMAT_PREFIXES):
+                    if key.startswith(FORMAT_PREFIXES[-1]):
+                        parse_quality_items(format_url)
+                    elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]):
                        add_video_url(format_url)
            if not video_urls and re.search(
                    r'<[^>]+\bid=["\']lockedPlayer', webpage):
@ -351,12 +361,16 @@ class PornHubIE(PornHubBaseIE):
            r'(?s)From:&nbsp;.+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<',
            webpage, 'uploader', default=None)

+        def extract_vote_count(kind, name):
+            return self._extract_count(
+                (r'<span[^>]+\bclass="votes%s"[^>]*>([\d,\.]+)</span>' % kind,
+                 r'<span[^>]+\bclass=["\']votes%s["\'][^>]*\bdata-rating=["\'](\d+)' % kind),
+                webpage, name)
+
        view_count = self._extract_count(
            r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view')
-        like_count = self._extract_count(
-            r'<span[^>]+class="votesUp"[^>]*>([\d,\.]+)</span>', webpage, 'like')
-        dislike_count = self._extract_count(
-            r'<span[^>]+class="votesDown"[^>]*>([\d,\.]+)</span>', webpage, 'dislike')
+        like_count = extract_vote_count('Up', 'like')
+        dislike_count = extract_vote_count('Down', 'dislike')
        comment_count = self._extract_count(
            r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment')

--- a/youtube_dlc/extractor/spankbang.py
+++ b/youtube_dlc/extractor/spankbang.py
@ -7,17 +7,24 @@ from ..utils import (
    determine_ext,
    ExtractorError,
    merge_dicts,
-    orderedSet,
    parse_duration,
    parse_resolution,
    str_to_int,
    url_or_none,
    urlencode_postdata,
+    urljoin,
 )


 class SpankBangIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/(?:video|play|embed)\b'
+    _VALID_URL = r'''(?x)
+                    https?://
+                        (?:[^/]+\.)?spankbang\.com/
+                        (?:
+                            (?P<id>[\da-z]+)/(?:video|play|embed)\b|
+                            [\da-z]+-(?P<id_2>[\da-z]+)/playlist/[^/?#&]+
+                        )
+                    '''
    _TESTS = [{
        'url': 'http://spankbang.com/3vvn/video/fantasy+solo',
        'md5': '1cc433e1d6aa14bc376535b8679302f7',
@ -57,10 +64,14 @@ class SpankBangIE(InfoExtractor):
    }, {
        'url': 'https://spankbang.com/2y3td/embed/',
        'only_matching': True,
+    }, {
+        'url': 'https://spankbang.com/2v7ik-7ecbgu/playlist/latina+booty',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id') or mobj.group('id_2')
        webpage = self._download_webpage(
            url.replace('/%s/embed' % video_id, '/%s/video' % video_id),
            video_id, headers={'Cookie': 'country=US'})
@ -155,30 +166,33 @@ class SpankBangIE(InfoExtractor):


 class SpankBangPlaylistIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/[^/]+'
+    _VALID_URL = r'https?://(?:[^/]+\.)?spankbang\.com/(?P<id>[\da-z]+)/playlist/(?P<display_id>[^/]+)'
    _TEST = {
        'url': 'https://spankbang.com/ug0k/playlist/big+ass+titties',
        'info_dict': {
            'id': 'ug0k',
            'title': 'Big Ass Titties',
        },
-        'playlist_mincount': 50,
+        'playlist_mincount': 40,
    }

    def _real_extract(self, url):
-        playlist_id = self._match_id(url)
+        mobj = re.match(self._VALID_URL, url)
+        playlist_id = mobj.group('id')
+        display_id = mobj.group('display_id')

        webpage = self._download_webpage(
            url, playlist_id, headers={'Cookie': 'country=US; mobile=on'})

        entries = [self.url_result(
-            'https://spankbang.com/%s/video' % video_id,
-            ie=SpankBangIE.ie_key(), video_id=video_id)
-            for video_id in orderedSet(re.findall(
-                r'<a[^>]+\bhref=["\']/?([\da-z]+)/play/', webpage))]
+            urljoin(url, mobj.group('path')),
+            ie=SpankBangIE.ie_key(), video_id=mobj.group('id'))
+            for mobj in re.finditer(
+                r'<a[^>]+\bhref=(["\'])(?P<path>/?[\da-z]+-(?P<id>[\da-z]+)/playlist/%s(?:(?!\1).)*)\1'
+                % re.escape(display_id), webpage)]

        title = self._html_search_regex(
-            r'<h1>([^<]+)\s+playlist</h1>', webpage, 'playlist title',
+            r'<h1>([^<]+)\s+playlist\s*<', webpage, 'playlist title',
            fatal=False)

        return self.playlist_result(entries, playlist_id, title)
--- a/youtube_dlc/extractor/sprout.py
+++ b/youtube_dlc/extractor/sprout.py
@ -3,50 +3,62 @@ from __future__ import unicode_literals

 from .adobepass import AdobePassIE
 from ..utils import (
-    extract_attributes,
-    update_url_query,
+    int_or_none,
    smuggle_url,
+    update_url_query,
 )


 class SproutIE(AdobePassIE):
-    _VALID_URL = r'https?://(?:www\.)?sproutonline\.com/watch/(?P<id>[^/?#]+)'
-    _TEST = {
-        'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
-        'md5': '74bf14128578d1e040c3ebc82088f45f',
+    _VALID_URL = r'https?://(?:www\.)?(?:sproutonline|universalkids)\.com/(?:watch|(?:[^/]+/)*videos)/(?P<id>[^/?#]+)'
+    _TESTS = [{
+        'url': 'https://www.universalkids.com/shows/remy-and-boo/season/1/videos/robot-bike-race',
        'info_dict': {
-            'id': '9dexnwtmh8_X',
+            'id': 'bm0foJFaTKqb',
            'ext': 'mp4',
-            'title': 'A Cowboy Adventure',
-            'description': 'Ruff-Ruff, Tweet and Dave get to be cowboys for the day at Six Cow Corral.',
-            'timestamp': 1437758640,
-            'upload_date': '20150724',
-            'uploader': 'NBCU-SPROUT-NEW',
-        }
-    }
+            'title': 'Robot Bike Race',
+            'description': 'md5:436b1d97117cc437f54c383f4debc66d',
+            'timestamp': 1606148940,
+            'upload_date': '20201123',
+            'uploader': 'NBCU-MPAT',
+        },
+        'params': {
+            'skip_download': True,
+        },
+    }, {
+        'url': 'http://www.sproutonline.com/watch/cowboy-adventure',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.universalkids.com/watch/robot-bike-race',
+        'only_matching': True,
+    }]
+    _GEO_COUNTRIES = ['US']

    def _real_extract(self, url):
-        video_id = self._match_id(url)
-        webpage = self._download_webpage(url, video_id)
-        video_component = self._search_regex(
-            r'(?s)(<div[^>]+data-component="video"[^>]*?>)',
-            webpage, 'video component', default=None)
-        if video_component:
-            options = self._parse_json(extract_attributes(
-                video_component)['data-options'], video_id)
-            theplatform_url = options['video']
-            query = {
-                'mbr': 'true',
-                'manifest': 'm3u',
-            }
-            if options.get('protected'):
-                query['auth'] = self._extract_mvpd_auth(url, options['pid'], 'sprout', 'sprout')
-            theplatform_url = smuggle_url(update_url_query(
-                theplatform_url, query), {'force_smil_url': True})
-        else:
-            iframe = self._search_regex(
-                r'(<iframe[^>]+id="sproutVideoIframe"[^>]*?>)',
-                webpage, 'iframe')
-            theplatform_url = extract_attributes(iframe)['src']
-
-        return self.url_result(theplatform_url, 'ThePlatform')
+        display_id = self._match_id(url)
+        mpx_metadata = self._download_json(
+            # http://nbcuunikidsprod.apps.nbcuni.com/networks/universalkids/content/videos/
+            'https://www.universalkids.com/_api/videos/' + display_id,
+            display_id)['mpxMetadata']
+        media_pid = mpx_metadata['mediaPid']
+        theplatform_url = 'https://link.theplatform.com/s/HNK2IC/' + media_pid
+        query = {
+            'mbr': 'true',
+            'manifest': 'm3u',
+        }
+        if mpx_metadata.get('entitlement') == 'auth':
+            query['auth'] = self._extract_mvpd_auth(url, media_pid, 'sprout', 'sprout')
+        theplatform_url = smuggle_url(
+            update_url_query(theplatform_url, query), {
+                'force_smil_url': True,
+                'geo_countries': self._GEO_COUNTRIES,
+            })
+        return {
+            '_type': 'url_transparent',
+            'id': media_pid,
+            'url': theplatform_url,
+            'series': mpx_metadata.get('seriesName'),
+            'season_number': int_or_none(mpx_metadata.get('seasonNumber')),
+            'episode_number': int_or_none(mpx_metadata.get('episodeNumber')),
+            'ie_key': 'ThePlatform',
+        }
--- a/youtube_dlc/extractor/theplatform.py
+++ b/youtube_dlc/extractor/theplatform.py
@ -234,6 +234,9 @@ class ThePlatformIE(ThePlatformBaseIE, AdobePassIE):

    def _real_extract(self, url):
        url, smuggled_data = unsmuggle_url(url, {})
+        self._initialize_geo_bypass({
+            'countries': smuggled_data.get('geo_countries'),
+        })

        mobj = re.match(self._VALID_URL, url)
        provider_id = mobj.group('provider_id')
--- a/youtube_dlc/extractor/theweatherchannel.py
+++ b/youtube_dlc/extractor/theweatherchannel.py
@ -1,18 +1,22 @@
 # coding: utf-8
 from __future__ import unicode_literals

+import json
+import re
+
 from .theplatform import ThePlatformIE
 from ..utils import (
    determine_ext,
    parse_duration,
+    parse_iso8601,
 )


 class TheWeatherChannelIE(ThePlatformIE):
-    _VALID_URL = r'https?://(?:www\.)?weather\.com/(?:[^/]+/)*video/(?P<id>[^/?#]+)'
+    _VALID_URL = r'https?://(?:www\.)?weather\.com(?P<asset_name>(?:/(?P<locale>[a-z]{2}-[A-Z]{2}))?/(?:[^/]+/)*video/(?P<id>[^/?#]+))'
    _TESTS = [{
        'url': 'https://weather.com/series/great-outdoors/video/ice-climber-is-in-for-a-shock',
-        'md5': 'ab924ac9574e79689c24c6b95e957def',
+        'md5': 'c4cbe74c9c17c5676b704b950b73dd92',
        'info_dict': {
            'id': 'cc82397e-cc3f-4d11-9390-a785add090e8',
            'ext': 'mp4',
@ -20,18 +24,33 @@ class TheWeatherChannelIE(ThePlatformIE):
            'description': 'md5:55606ce1378d4c72e6545e160c9d9695',
            'uploader': 'TWC - Digital (No Distro)',
            'uploader_id': '6ccd5455-16bb-46f2-9c57-ff858bb9f62c',
+            'upload_date': '20160720',
+            'timestamp': 1469018835,
        }
+    }, {
+        'url': 'https://weather.com/en-CA/international/videos/video/unidentified-object-falls-from-sky-in-india',
+        'only_matching': True,
    }]

    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        drupal_settings = self._parse_json(self._search_regex(
-            r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);',
-            webpage, 'drupal settings'), display_id)
-        video_id = drupal_settings['twc']['contexts']['node']['uuid']
-        video_data = self._download_json(
-            'https://dsx.weather.com/cms/v4/asset-collection/en_US/' + video_id, video_id)
+        asset_name, locale, display_id = re.match(self._VALID_URL, url).groups()
+        if not locale:
+            locale = 'en-US'
+        video_data = list(self._download_json(
+            'https://weather.com/api/v1/p/redux-dal', display_id, data=json.dumps([{
+                'name': 'getCMSAssetsUrlConfig',
+                'params': {
+                    'language': locale.replace('-', '_'),
+                    'query': {
+                        'assetName': {
+                            '$in': asset_name,
+                        },
+                    },
+                }
+            }]).encode(), headers={
+                'Content-Type': 'application/json',
+            })['dal']['getCMSAssetsUrlConfig'].values())[0]['data'][0]
+        video_id = video_data['id']
        seo_meta = video_data.get('seometa', {})
        title = video_data.get('title') or seo_meta['title']

@ -66,6 +85,8 @@ class TheWeatherChannelIE(ThePlatformIE):
                })
        self._sort_formats(formats)

+        cc_url = video_data.get('cc_url')
+
        return {
            'id': video_id,
            'display_id': display_id,
@ -74,6 +95,8 @@ class TheWeatherChannelIE(ThePlatformIE):
            'duration': parse_duration(video_data.get('duration')),
            'uploader': video_data.get('providername'),
            'uploader_id': video_data.get('providerid'),
+            'timestamp': parse_iso8601(video_data.get('publishdate')),
+            'subtitles': {locale[:2]: [{'url': cc_url}]} if cc_url else None,
            'thumbnails': thumbnails,
            'formats': formats,
        }
--- a/youtube_dlc/extractor/wistia.py
+++ b/youtube_dlc/extractor/wistia.py
@ -5,79 +5,34 @@ import re
 from .common import InfoExtractor
 from ..utils import (
    ExtractorError,
-    int_or_none,
    float_or_none,
+    int_or_none,
+    try_get,
    unescapeHTML,
 )


-class WistiaIE(InfoExtractor):
-    _VALID_URL = r'(?:wistia:|https?://(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/)(?P<id>[a-z0-9]{10})'
+class WistiaBaseIE(InfoExtractor):
+    _VALID_ID_REGEX = r'(?P<id>[a-z0-9]{10})'
+    _VALID_URL_BASE = r'https?://(?:fast\.)?wistia\.(?:net|com)/embed/'
    _EMBED_BASE_URL = 'http://fast.wistia.com/embed/'

-    _TESTS = [{
-        'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
-        'md5': 'cafeb56ec0c53c18c97405eecb3133df',
-        'info_dict': {
-            'id': 'sh7fpupwlt',
-            'ext': 'mov',
-            'title': 'Being Resourceful',
-            'description': 'a Clients From Hell Video Series video from worldwidewebhosting',
-            'upload_date': '20131204',
-            'timestamp': 1386185018,
-            'duration': 117,
-        },
-    }, {
-        'url': 'wistia:sh7fpupwlt',
-        'only_matching': True,
-    }, {
-        # with hls video
-        'url': 'wistia:807fafadvk',
-        'only_matching': True,
-    }, {
-        'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
-        'only_matching': True,
-    }, {
-        'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
-        'only_matching': True,
-    }]
-
-    # https://wistia.com/support/embed-and-share/video-on-your-website
-    @staticmethod
-    def _extract_url(webpage):
-        urls = WistiaIE._extract_urls(webpage)
-        return urls[0] if urls else None
-
-    @staticmethod
-    def _extract_urls(webpage):
-        urls = []
-        for match in re.finditer(
-                r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
-            urls.append(unescapeHTML(match.group('url')))
-        for match in re.finditer(
-                r'''(?sx)
-                    <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
-                ''', webpage):
-            urls.append('wistia:%s' % match.group('id'))
-        for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
-            urls.append('wistia:%s' % match.group('id'))
-        return urls
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        data_json = self._download_json(
-            self._EMBED_BASE_URL + 'medias/%s.json' % video_id, video_id,
-            # Some videos require this.
-            headers={
-                'Referer': url if url.startswith('http') else self._EMBED_BASE_URL + 'iframe/' + video_id,
+    def _download_embed_config(self, config_type, config_id, referer):
+        base_url = self._EMBED_BASE_URL + '%ss/%s' % (config_type, config_id)
+        embed_config = self._download_json(
+            base_url + '.json', config_id, headers={
+                'Referer': referer if referer.startswith('http') else base_url,  # Some videos require this.
            })

-        if data_json.get('error'):
+        if isinstance(embed_config, dict) and embed_config.get('error'):
            raise ExtractorError(
                'Error while getting the playlist', expected=True)

-        data = data_json['media']
+        return embed_config
+
+    def _extract_media(self, embed_config):
+        data = embed_config['media']
+        video_id = data['hashedId']
        title = data['name']

        formats = []
@ -160,3 +115,85 @@ class WistiaIE(InfoExtractor):
            'timestamp': int_or_none(data.get('createdAt')),
            'subtitles': subtitles,
        }
+
+
+class WistiaIE(WistiaBaseIE):
+    _VALID_URL = r'(?:wistia:|%s(?:iframe|medias)/)%s' % (WistiaBaseIE._VALID_URL_BASE, WistiaBaseIE._VALID_ID_REGEX)
+
+    _TESTS = [{
+        # with hls video
+        'url': 'wistia:807fafadvk',
+        'md5': 'daff0f3687a41d9a71b40e0e8c2610fe',
+        'info_dict': {
+            'id': '807fafadvk',
+            'ext': 'mp4',
+            'title': 'Drip Brennan Dunn Workshop',
+            'description': 'a JV Webinars video',
+            'upload_date': '20160518',
+            'timestamp': 1463607249,
+            'duration': 4987.11,
+        },
+    }, {
+        'url': 'wistia:sh7fpupwlt',
+        'only_matching': True,
+    }, {
+        'url': 'http://fast.wistia.net/embed/iframe/sh7fpupwlt',
+        'only_matching': True,
+    }, {
+        'url': 'http://fast.wistia.com/embed/iframe/sh7fpupwlt',
+        'only_matching': True,
+    }, {
+        'url': 'http://fast.wistia.net/embed/medias/sh7fpupwlt.json',
+        'only_matching': True,
+    }]
+
+    # https://wistia.com/support/embed-and-share/video-on-your-website
+    @staticmethod
+    def _extract_url(webpage):
+        urls = WistiaIE._extract_urls(webpage)
+        return urls[0] if urls else None
+
+    @staticmethod
+    def _extract_urls(webpage):
+        urls = []
+        for match in re.finditer(
+                r'<(?:meta[^>]+?content|(?:iframe|script)[^>]+?src)=["\'](?P<url>(?:https?:)?//(?:fast\.)?wistia\.(?:net|com)/embed/(?:iframe|medias)/[a-z0-9]{10})', webpage):
+            urls.append(unescapeHTML(match.group('url')))
+        for match in re.finditer(
+                r'''(?sx)
+                    <div[^>]+class=(["'])(?:(?!\1).)*?\bwistia_async_(?P<id>[a-z0-9]{10})\b(?:(?!\1).)*?\1
+                ''', webpage):
+            urls.append('wistia:%s' % match.group('id'))
+        for match in re.finditer(r'(?:data-wistia-?id=["\']|Wistia\.embed\(["\']|id=["\']wistia_)(?P<id>[a-z0-9]{10})', webpage):
+            urls.append('wistia:%s' % match.group('id'))
+        return urls
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        embed_config = self._download_embed_config('media', video_id, url)
+        return self._extract_media(embed_config)
+
+
+class WistiaPlaylistIE(WistiaBaseIE):
+    _VALID_URL = r'%splaylists/%s' % (WistiaIE._VALID_URL_BASE, WistiaIE._VALID_ID_REGEX)
+
+    _TEST = {
+        'url': 'https://fast.wistia.net/embed/playlists/aodt9etokc',
+        'info_dict': {
+            'id': 'aodt9etokc',
+        },
+        'playlist_count': 3,
+    }
+
+    def _real_extract(self, url):
+        playlist_id = self._match_id(url)
+        playlist = self._download_embed_config('playlist', playlist_id, url)
+
+        entries = []
+        for media in (try_get(playlist, lambda x: x[0]['medias']) or []):
+            embed_config = media.get('embed_config')
+            if not embed_config:
+                continue
+            entries.append(self._extract_media(embed_config))
+
+        return self.playlist_result(entries, playlist_id)
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@ -64,9 +64,9 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
    _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'

    _RESERVED_NAMES = (
-        r'course|embed|channel|c|user|playlist|watch|w|results|storefront|oops|'
-        r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
-        r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
+        r'embed|e|channel|c|user|playlist|watch|w|v|results|shared|'
+        r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
+        r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')

    _NETRC_MACHINE = 'youtube'
    # If True it will raise an error if no login info is provided
@ -2544,7 +2544,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
                                feed/|
                                (?:playlist|watch)\?.*?\blist=
                            )|
-                            (?!(%s)([/#?]|$))  # Direct URLs
+                            (?!(?:%s)\b)  # Direct URLs
                        )
                        (?P<id>[^/?\#&]+)
                    ''' % YoutubeBaseInfoExtractor._RESERVED_NAMES
@ -2813,13 +2813,22 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
        # inline playlist with not always working continuations
        'url': 'https://www.youtube.com/watch?v=UC6u0Tct-Fo&list=PL36D642111D65BE7C',
        'only_matching': True,
-    }
-        # TODO
-        # {
-        #     'url': 'https://www.youtube.com/TheYoungTurks/live',
-        #     'only_matching': True,
-        # }
-    ]
+    }, {
+        'url': 'https://www.youtube.com/course?list=ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/course',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/zsecurity',
+        'only_matching': True,
+    }, {
+        'url': 'http://www.youtube.com/NASAgovVideo/videos',
+        'only_matching': True,
+    }, {
+        'url': 'https://www.youtube.com/TheYoungTurks/live',
+        'only_matching': True,
+    }]

    def _extract_channel_id(self, webpage):
        channel_id = self._html_search_meta(