Update to release 2020.12.14

Except: [hotstar] fix and improve extraction bb38a12157
5 years ago · c09b3b1318
parent b1ef860624
commit c09b3b1318
12 changed files with 292 additions and 98 deletions
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@ -98,6 +98,55 @@ class TestInfoExtractor(unittest.TestCase):
        self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
        self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
    def test_search_json_ld_realworld(self):
        # https://github.com/ytdl-org/youtube-dl/issues/23306
        expect_dict(
            self,
            self.ie._search_json_ld(r'''<script type="application/ld+json">
 {
 "@context": "http://schema.org/",
 "@type": "VideoObject",
 "name": "1 On 1 With Kleio",
 "url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
 "duration": "PT0H12M23S",
 "thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
 "contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
 "embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
 "image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
 "width": "1920",
 "height": "1080",
 "encodingFormat": "mp4",
 "bitrate": "6617kbps",
 "isFamilyFriendly": "False",
 "description": "Kleio Valentien",
 "uploadDate": "2015-12-05T21:24:35+01:00",
 "interactionStatistic": {
 "@type": "InteractionCounter",
 "interactionType": { "@type": "http://schema.org/WatchAction" },
 "userInteractionCount": 1120958
 }, "aggregateRating": {
 "@type": "AggregateRating",
 "ratingValue": "88",
 "ratingCount": "630",
 "bestRating": "100",
 "worstRating": "0"
 }, "actor": [{
 "@type": "Person",
 "name": "Kleio Valentien",
 "url": "https://www.eporner.com/pornstar/kleio-valentien/"
 }]}
 </script>''', None),
            {
                'title': '1 On 1 With Kleio',
                'description': 'Kleio Valentien',
                'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
                'timestamp': 1449347075,
                'duration': 743.0,
                'view_count': 1120958,
                'width': 1920,
                'height': 1080,
            })
    def test_download_json(self):
        uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
        self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})
--- a/youtube_dlc/downloader/hls.py
+++ b/youtube_dlc/downloader/hls.py
@ -42,11 +42,13 @@ class HlsFD(FragmentFD):
            # no segments will definitely be appended to the end of the playlist.
            # r'#EXT-X-PLAYLIST-TYPE:EVENT',  # media segments may be appended to the end of
            #                                 # event media playlists [4]
            r'#EXT-X-MAP:',  # media initialization [5]
            # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
            # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
            # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
            # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
            # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
        )
        check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
        is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest
--- a/youtube_dlc/extractor/common.py
+++ b/youtube_dlc/extractor/common.py
@ -336,8 +336,8 @@ class InfoExtractor(object):
    object, each element of which is a valid dictionary by this specification.
    Additionally, playlists can have "id", "title", "description", "uploader",
-    "uploader_id", "uploader_url" attributes with the same semantics as videos
+    "uploader_id", "uploader_url", "duration" attributes with the same semantics
-    (see above).
+    as videos (see above).
    _type "multi_video" indicates that there are multiple videos that
@ -1237,8 +1237,16 @@ class InfoExtractor(object):
            'ViewAction': 'view',
        }
        def extract_interaction_type(e):
            interaction_type = e.get('interactionType')
            if isinstance(interaction_type, dict):
                interaction_type = interaction_type.get('@type')
            return str_or_none(interaction_type)
        def extract_interaction_statistic(e):
            interaction_statistic = e.get('interactionStatistic')
            if isinstance(interaction_statistic, dict):
                interaction_statistic = [interaction_statistic]
            if not isinstance(interaction_statistic, list):
                return
            for is_e in interaction_statistic:
@ -1246,8 +1254,8 @@ class InfoExtractor(object):
                    continue
                if is_e.get('@type') != 'InteractionCounter':
                    continue
-                interaction_type = is_e.get('interactionType')
+                interaction_type = extract_interaction_type(is_e)
-                if not isinstance(interaction_type, compat_str):
+                if not interaction_type:
                    continue
                # For interaction count some sites provide string instead of
                # an integer (as per spec) with non digit characters (e.g. ",")
--- a/youtube_dlc/extractor/eporner.py
+++ b/youtube_dlc/extractor/eporner.py
@ -16,7 +16,7 @@ from ..utils import (
 class EpornerIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
+    _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
    _TESTS = [{
        'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
        'md5': '39d486f046212d8e1b911c52ab4691f8',
@ -43,7 +43,10 @@ class EpornerIE(InfoExtractor):
        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
        'only_matching': True,
    }, {
-        'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
+        'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
        'only_matching': True,
    }, {
        'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
        'only_matching': True,
    }]
@ -57,7 +60,7 @@ class EpornerIE(InfoExtractor):
        video_id = self._match_id(urlh.geturl())
        hash = self._search_regex(
-            r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash')
+            r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
        title = self._og_search_title(webpage, default=None) or self._html_search_regex(
            r'<title>(.+?) - EPORNER', webpage, 'title')
@ -115,8 +118,8 @@ class EpornerIE(InfoExtractor):
        duration = parse_duration(self._html_search_meta(
            'duration', webpage, default=None))
        view_count = str_to_int(self._search_regex(
-            r'id="cinemaviews">\s*([0-9,]+)\s*<small>views',
+            r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
-            webpage, 'view count', fatal=False))
+            webpage, 'view count', default=None))
        return merge_dicts(json_ld, {
            'id': video_id,
--- a/youtube_dlc/extractor/itv.py
+++ b/youtube_dlc/extractor/itv.py
@ -7,6 +7,7 @@ import re
 from .common import InfoExtractor
 from .brightcove import BrightcoveNewIE
 from ..utils import (
    clean_html,
    determine_ext,
    extract_attributes,
    get_element_by_class,
@ -14,7 +15,6 @@ from ..utils import (
    merge_dicts,
    parse_duration,
    smuggle_url,
    strip_or_none,
    try_get,
    url_or_none,
 )
@ -147,7 +147,7 @@ class ITVIE(InfoExtractor):
            'formats': formats,
            'subtitles': subtitles,
            'duration': parse_duration(video_data.get('Duration')),
-            'description': strip_or_none(get_element_by_class('episode-info__synopsis', webpage)),
+            'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
        }, info)
--- a/youtube_dlc/extractor/linuxacademy.py
+++ b/youtube_dlc/extractor/linuxacademy.py
@ -8,11 +8,15 @@ from .common import InfoExtractor
 from ..compat import (
    compat_b64decode,
    compat_HTTPError,
    compat_str,
 )
 from ..utils import (
    clean_html,
    ExtractorError,
-    orderedSet,
+    js_to_json,
-    unescapeHTML,
+    parse_duration,
    try_get,
    unified_timestamp,
    urlencode_postdata,
    urljoin,
 )
@ -28,11 +32,15 @@ class LinuxAcademyIE(InfoExtractor):
                        )
                    '''
    _TESTS = [{
-        'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154',
+        'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
        'info_dict': {
-            'id': '1498-2',
+            'id': '7971-2',
            'ext': 'mp4',
-            'title': "Introduction to the Practitioner's Brief",
+            'title': 'What Is Data Science',
            'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
            'timestamp': 1607387907,
            'upload_date': '20201208',
            'duration': 304,
        },
        'params': {
            'skip_download': True,
@ -46,7 +54,8 @@ class LinuxAcademyIE(InfoExtractor):
        'info_dict': {
            'id': '154',
            'title': 'AWS Certified Cloud Practitioner',
-            'description': 'md5:039db7e60e4aac9cf43630e0a75fa834',
+            'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
            'duration': 28835,
        },
        'playlist_count': 41,
        'skip': 'Requires Linux Academy account credentials',
@ -74,6 +83,7 @@ class LinuxAcademyIE(InfoExtractor):
            self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
                'client_id': self._CLIENT_ID,
                'response_type': 'token id_token',
                'response_mode': 'web_message',
                'redirect_uri': self._ORIGIN_URL,
                'scope': 'openid email user_impersonation profile',
                'audience': self._ORIGIN_URL,
@ -129,7 +139,13 @@ class LinuxAcademyIE(InfoExtractor):
        access_token = self._search_regex(
            r'access_token=([^=&]+)', urlh.geturl(),
-            'access token')
+            'access token', default=None)
        if not access_token:
            access_token = self._parse_json(
                self._search_regex(
                    r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
                    'authorization response'), None,
                transform_source=js_to_json)['response']['access_token']
        self._download_webpage(
            'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
@ -144,30 +160,84 @@ class LinuxAcademyIE(InfoExtractor):
        # course path
        if course_id:
-            entries = [
+            module = self._parse_json(
-                self.url_result(
+                self._search_regex(
-                    urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key())
+                    r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
-                for lesson_url in orderedSet(re.findall(
+                item_id)
-                    r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)',
+            entries = []
-                    webpage))]
+            chapter_number = None
-            title = unescapeHTML(self._html_search_regex(
+            chapter = None
-                (r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)',
+            chapter_id = None
-                 r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'),
+            for item in module['items']:
-                webpage, 'title', default=None, group='value'))
+                if not isinstance(item, dict):
-            description = unescapeHTML(self._html_search_regex(
+                    continue
-                r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
+
-                webpage, 'description', default=None, group='value'))
+                def type_field(key):
-            return self.playlist_result(entries, course_id, title, description)
+                    return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
                type_fields = (type_field('name'), type_field('slug'))
                # Move to next module section
                if 'section' in type_fields:
                    chapter = item.get('course_name')
                    chapter_id = item.get('course_module')
                    chapter_number = 1 if not chapter_number else chapter_number + 1
                    continue
                # Skip non-lessons
                if 'lesson' not in type_fields:
                    continue
                lesson_url = urljoin(url, item.get('url'))
                if not lesson_url:
                    continue
                title = item.get('title') or item.get('lesson_name')
                description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
                entries.append({
                    '_type': 'url_transparent',
                    'url': lesson_url,
                    'ie_key': LinuxAcademyIE.ie_key(),
                    'title': title,
                    'description': description,
                    'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
                    'duration': parse_duration(item.get('duration')),
                    'chapter': chapter,
                    'chapter_id': chapter_id,
                    'chapter_number': chapter_number,
                })
            return {
                '_type': 'playlist',
                'entries': entries,
                'id': course_id,
                'title': module.get('title'),
                'description': module.get('md_desc') or clean_html(module.get('desc')),
                'duration': parse_duration(module.get('duration')),
            }
        # single video path
-        info = self._extract_jwplayer_data(
+        m3u8_url = self._parse_json(
-            webpage, item_id, require_title=False, m3u8_id='hls',)
+            self._search_regex(
-        title = self._search_regex(
+                r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
            item_id)[0]['file']
        formats = self._extract_m3u8_formats(
            m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
            m3u8_id='hls')
        self._sort_formats(formats)
        info = {
            'id': item_id,
            'formats': formats,
        }
        lesson = self._parse_json(
            self._search_regex(
                (r'window\.lesson\s*=\s*({.+?})\s*;',
                 r'player\.lesson\s*=\s*({.+?})\s*;'),
                webpage, 'lesson', default='{}'), item_id, fatal=False)
        if lesson:
            info.update({
                'title': lesson.get('lesson_name'),
                'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
                'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
                'duration': parse_duration(lesson.get('duration')),
            })
        if not info.get('title'):
            info['title'] = self._search_regex(
                (r'>Lecture\s*:\s*(?P<value>[^<]+)',
                 r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
                'title', group='value')
        info.update({
            'id': item_id,
            'title': title,
        })
        return info
--- a/youtube_dlc/extractor/mdr.py
+++ b/youtube_dlc/extractor/mdr.py
@ -2,12 +2,16 @@
 from __future__ import unicode_literals
 from .common import InfoExtractor
-from ..compat import compat_urlparse
+from ..compat import (
    compat_str,
    compat_urlparse,
 )
 from ..utils import (
    determine_ext,
    int_or_none,
    parse_duration,
    parse_iso8601,
    url_or_none,
    xpath_text,
 )
@ -16,6 +20,8 @@ class MDRIE(InfoExtractor):
    IE_DESC = 'MDR.DE and KiKA'
    _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
    _GEO_COUNTRIES = ['DE']
    _TESTS = [{
        # MDR regularly deletes its videos
        'url': 'http://www.mdr.de/fakt/video189002.html',
@ -66,6 +72,22 @@ class MDRIE(InfoExtractor):
            'duration': 3239,
            'uploader': 'MITTELDEUTSCHER RUNDFUNK',
        },
    }, {
        # empty bitrateVideo and bitrateAudio
        'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
        'info_dict': {
            'id': '128372',
            'ext': 'mp4',
            'title': 'Der kleine Wichtel kehrt zurück',
            'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
            'duration': 4876,
            'timestamp': 1607823300,
            'upload_date': '20201213',
            'uploader': 'ZDF',
        },
        'params': {
            'skip_download': True,
        },
    }, {
        'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
        'only_matching': True,
@ -91,10 +113,13 @@ class MDRIE(InfoExtractor):
        title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
        type_ = xpath_text(doc, './type', default=None)
        formats = []
        processed_urls = []
        for asset in doc.findall('./assets/asset'):
            for source in (
                    'download',
                    'progressiveDownload',
                    'dynamicHttpStreamingRedirector',
                    'adaptiveHttpStreamingRedirector'):
@ -102,63 +127,49 @@ class MDRIE(InfoExtractor):
                if url_el is None:
                    continue
-                video_url = url_el.text
+                video_url = url_or_none(url_el.text)
-                if video_url in processed_urls:
+                if not video_url or video_url in processed_urls:
                    continue
                processed_urls.append(video_url)
-                vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
+                ext = determine_ext(video_url)
                abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
                ext = determine_ext(url_el.text)
                if ext == 'm3u8':
-                    url_formats = self._extract_m3u8_formats(
+                    formats.extend(self._extract_m3u8_formats(
                        video_url, video_id, 'mp4', entry_protocol='m3u8_native',
-                        preference=0, m3u8_id='HLS', fatal=False)
+                        preference=0, m3u8_id='HLS', fatal=False))
                elif ext == 'f4m':
-                    url_formats = self._extract_f4m_formats(
+                    formats.extend(self._extract_f4m_formats(
                        video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
-                        preference=0, f4m_id='HDS', fatal=False)
+                        preference=0, f4m_id='HDS', fatal=False))
                else:
                    media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
                    vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
                    abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
                    filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
                    format_id = [media_type]
                    if vbr or abr:
                        format_id.append(compat_str(vbr or abr))
                    f = {
                        'url': video_url,
-                        'format_id': '%s-%d' % (media_type, vbr or abr),
+                        'format_id': '-'.join(format_id),
                        'filesize': filesize,
                        'abr': abr,
-                        'preference': 1,
+                        'vbr': vbr,
                    }
                    if vbr:
                        width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
                        height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
                        f.update({
-                            'vbr': vbr,
+                            'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
-                            'width': width,
+                            'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
                            'height': height,
                        })
-                    url_formats = [f]
+                    if type_ == 'audio':
-
+                        f['vcodec'] = 'none'
                if not url_formats:
                    continue
                if not vbr:
                    for f in url_formats:
                        abr = f.get('tbr') or abr
                        if 'tbr' in f:
                            del f['tbr']
                        f.update({
                            'abr': abr,
                            'vcodec': 'none',
                        })
-                formats.extend(url_formats)
+                    formats.append(f)
        self._sort_formats(formats)
--- a/youtube_dlc/extractor/slideslive.py
+++ b/youtube_dlc/extractor/slideslive.py
@ -83,9 +83,10 @@ class SlidesLiveIE(InfoExtractor):
            else:
                formats = []
                _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
                # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
                formats.extend(self._extract_m3u8_formats(
-                    _MANIFEST_PATTERN % (service_id, 'm3u8'), service_id, 'mp4',
+                    _MANIFEST_PATTERN % (service_id, 'm3u8'),
-                    entry_protocol='m3u8_native', m3u8_id='hls', fatal=False))
+                    service_id, 'mp4', m3u8_id='hls', fatal=False))
                formats.extend(self._extract_mpd_formats(
                    _MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
                    mpd_id='dash', fatal=False))
--- a/youtube_dlc/extractor/twitcasting.py
+++ b/youtube_dlc/extractor/twitcasting.py
@ -1,11 +1,20 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..utils import urlencode_postdata
 import re
 from .common import InfoExtractor
 from ..utils import (
    clean_html,
    float_or_none,
    get_element_by_class,
    get_element_by_id,
    parse_duration,
    str_to_int,
    unified_timestamp,
    urlencode_postdata,
 )
 class TwitCastingIE(InfoExtractor):
    _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
@ -17,8 +26,12 @@ class TwitCastingIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Live #2357609',
            'uploader_id': 'ivetesangalo',
-            'description': "Moi! I'm live on TwitCasting from my iPhone.",
+            'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20110822',
            'timestamp': 1314010824,
            'duration': 32,
            'view_count': int,
        },
        'params': {
            'skip_download': True,
@ -30,8 +43,12 @@ class TwitCastingIE(InfoExtractor):
            'ext': 'mp4',
            'title': 'Live playing something #3689740',
            'uploader_id': 'mttbernardini',
-            'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)",
+            'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.',
            'thumbnail': r're:^https?://.*\.jpg$',
            'upload_date': '20120212',
            'timestamp': 1329028024,
            'duration': 681,
            'view_count': int,
        },
        'params': {
            'skip_download': True,
@ -40,9 +57,7 @@ class TwitCastingIE(InfoExtractor):
    }]
    def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
+        uploader_id, video_id = re.match(self._VALID_URL, url).groups()
        video_id = mobj.group('id')
        uploader_id = mobj.group('uploader_id')
        video_password = self._downloader.params.get('videopassword')
        request_data = None
@ -52,30 +67,45 @@ class TwitCastingIE(InfoExtractor):
            })
        webpage = self._download_webpage(url, video_id, data=request_data)
-        title = self._html_search_regex(
+        title = clean_html(get_element_by_id(
-            r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</',
+            'movietitle', webpage)) or self._html_search_meta(
-            webpage, 'title', default=None) or self._html_search_meta(
+            ['og:title', 'twitter:title'], webpage, fatal=True)
            'twitter:title', webpage, fatal=True)
        video_js_data = {}
        m3u8_url = self._search_regex(
-            (r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
+            r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
-             r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'),
+            webpage, 'm3u8 url', group='url', default=None)
-            webpage, 'm3u8 url', group='url')
+        if not m3u8_url:
            video_js_data = self._parse_json(self._search_regex(
                r"data-movie-playlist='(\[[^']+\])'",
                webpage, 'movie playlist'), video_id)[0]
            m3u8_url = video_js_data['source']['url']
        # use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
        formats = self._extract_m3u8_formats(
-            m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native',
+            m3u8_url, video_id, 'mp4', m3u8_id='hls')
            m3u8_id='hls')
-        thumbnail = self._og_search_thumbnail(webpage)
+        thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
-        description = self._og_search_description(
+        description = clean_html(get_element_by_id(
-            webpage, default=None) or self._html_search_meta(
+            'authorcomment', webpage)) or self._html_search_meta(
-            'twitter:description', webpage)
+            ['description', 'og:description', 'twitter:description'], webpage)
        duration = float_or_none(video_js_data.get(
            'duration'), 1000) or parse_duration(clean_html(
                get_element_by_class('tw-player-duration-time', webpage)))
        view_count = str_to_int(self._search_regex(
            r'Total\s*:\s*([\d,]+)\s*Views', webpage, 'views', None))
        timestamp = unified_timestamp(self._search_regex(
            r'data-toggle="true"[^>]+datetime="([^"]+)"',
            webpage, 'datetime', None))
        return {
            'id': video_id,
            'title': title,
            'description': description,
            'thumbnail': thumbnail,
            'timestamp': timestamp,
            'uploader_id': uploader_id,
            'duration': duration,
            'view_count': view_count,
            'formats': formats,
        }
--- a/youtube_dlc/extractor/vlive.py
+++ b/youtube_dlc/extractor/vlive.py
@ -155,6 +155,7 @@ class VLiveIE(VLiveBaseIE):
                    'old/v3/live/%s/playInfo',
                    video_id)['result']['adaptiveStreamUrl']
                formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
                self._sort_formats(formats)
                info = get_common_fields()
                info.update({
                    'title': self._live_title(video['title']),
--- a/youtube_dlc/extractor/yandexmusic.py
+++ b/youtube_dlc/extractor/yandexmusic.py
@ -260,6 +260,14 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
        },
        'playlist_count': 33,
        # 'skip': 'Travis CI servers blocked by YandexMusic',
    }, {
        # empty artists
        'url': 'https://music.yandex.ru/album/9091882',
        'info_dict': {
            'id': '9091882',
            'title': 'ТЕД на русском',
        },
        'playlist_count': 187,
    }]
    def _real_extract(self, url):
@ -273,7 +281,10 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
        entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
-        title = '%s - %s' % (album['artists'][0]['name'], album['title'])
+        title = album['title']
        artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
        if artist:
            title = '%s - %s' % (artist, title)
        year = album.get('year')
        if year:
            title += ' (%s)' % year
--- a/youtube_dlc/extractor/youtube.py
+++ b/youtube_dlc/extractor/youtube.py
@ -343,10 +343,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                            (?:www\.)?invidious\.kabi\.tk/|
                            (?:www\.)?invidious\.13ad\.de/|
                            (?:www\.)?invidious\.mastodon\.host/|
                            (?:www\.)?invidious\.zapashcanon\.fr/|
                            (?:www\.)?invidious\.kavin\.rocks/|
                            (?:www\.)?invidious\.tube/|
                            (?:www\.)?invidiou\.site/|
                            (?:www\.)?invidious\.site/|
                            (?:www\.)?invidious\.xyz/|
                            (?:www\.)?invidious\.nixnet\.xyz/|
                            (?:www\.)?invidious\.drycat\.fr/|
                            (?:www\.)?tube\.poal\.co/|
                            (?:www\.)?tube\.connect\.cafe/|
                            (?:www\.)?vid\.wxzm\.sx/|
                            (?:www\.)?vid\.mint\.lgbt/|
                            (?:www\.)?yewtu\.be/|
                            (?:www\.)?yt\.elukerio\.org/|
                            (?:www\.)?yt\.lelux\.fi/|