Update to release 2020.12.14

Except: [hotstar] fix and improve extraction bb38a12157
pull/280/head
pukkandan 5 years ago
parent b1ef860624
commit c09b3b1318

@ -98,6 +98,55 @@ class TestInfoExtractor(unittest.TestCase):
self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True)
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
def test_search_json_ld_realworld(self):
# https://github.com/ytdl-org/youtube-dl/issues/23306
expect_dict(
self,
self.ie._search_json_ld(r'''<script type="application/ld+json">
{
"@context": "http://schema.org/",
"@type": "VideoObject",
"name": "1 On 1 With Kleio",
"url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/",
"duration": "PT0H12M23S",
"thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"],
"contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4",
"embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/",
"image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg",
"width": "1920",
"height": "1080",
"encodingFormat": "mp4",
"bitrate": "6617kbps",
"isFamilyFriendly": "False",
"description": "Kleio Valentien",
"uploadDate": "2015-12-05T21:24:35+01:00",
"interactionStatistic": {
"@type": "InteractionCounter",
"interactionType": { "@type": "http://schema.org/WatchAction" },
"userInteractionCount": 1120958
}, "aggregateRating": {
"@type": "AggregateRating",
"ratingValue": "88",
"ratingCount": "630",
"bestRating": "100",
"worstRating": "0"
}, "actor": [{
"@type": "Person",
"name": "Kleio Valentien",
"url": "https://www.eporner.com/pornstar/kleio-valentien/"
}]}
</script>''', None),
{
'title': '1 On 1 With Kleio',
'description': 'Kleio Valentien',
'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
'timestamp': 1449347075,
'duration': 743.0,
'view_count': 1120958,
'width': 1920,
'height': 1080,
})
def test_download_json(self): def test_download_json(self):
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'})

@ -42,11 +42,13 @@ class HlsFD(FragmentFD):
# no segments will definitely be appended to the end of the playlist. # no segments will definitely be appended to the end of the playlist.
# r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of
# # event media playlists [4] # # event media playlists [4]
r'#EXT-X-MAP:', # media initialization [5]
# 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4
# 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2
# 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2
# 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5
# 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5
) )
check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES] check_results = [not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES]
is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest is_aes128_enc = '#EXT-X-KEY:METHOD=AES-128' in manifest

@ -336,8 +336,8 @@ class InfoExtractor(object):
object, each element of which is a valid dictionary by this specification. object, each element of which is a valid dictionary by this specification.
Additionally, playlists can have "id", "title", "description", "uploader", Additionally, playlists can have "id", "title", "description", "uploader",
"uploader_id", "uploader_url" attributes with the same semantics as videos "uploader_id", "uploader_url", "duration" attributes with the same semantics
(see above). as videos (see above).
_type "multi_video" indicates that there are multiple videos that _type "multi_video" indicates that there are multiple videos that
@ -1237,8 +1237,16 @@ class InfoExtractor(object):
'ViewAction': 'view', 'ViewAction': 'view',
} }
def extract_interaction_type(e):
interaction_type = e.get('interactionType')
if isinstance(interaction_type, dict):
interaction_type = interaction_type.get('@type')
return str_or_none(interaction_type)
def extract_interaction_statistic(e): def extract_interaction_statistic(e):
interaction_statistic = e.get('interactionStatistic') interaction_statistic = e.get('interactionStatistic')
if isinstance(interaction_statistic, dict):
interaction_statistic = [interaction_statistic]
if not isinstance(interaction_statistic, list): if not isinstance(interaction_statistic, list):
return return
for is_e in interaction_statistic: for is_e in interaction_statistic:
@ -1246,8 +1254,8 @@ class InfoExtractor(object):
continue continue
if is_e.get('@type') != 'InteractionCounter': if is_e.get('@type') != 'InteractionCounter':
continue continue
interaction_type = is_e.get('interactionType') interaction_type = extract_interaction_type(is_e)
if not isinstance(interaction_type, compat_str): if not interaction_type:
continue continue
# For interaction count some sites provide string instead of # For interaction count some sites provide string instead of
# an integer (as per spec) with non digit characters (e.g. ",") # an integer (as per spec) with non digit characters (e.g. ",")

@ -16,7 +16,7 @@ from ..utils import (
class EpornerIE(InfoExtractor): class EpornerIE(InfoExtractor):
_VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:hd-porn|embed)/(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?' _VALID_URL = r'https?://(?:www\.)?eporner\.com/(?:(?:hd-porn|embed)/|video-)(?P<id>\w+)(?:/(?P<display_id>[\w-]+))?'
_TESTS = [{ _TESTS = [{
'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/', 'url': 'http://www.eporner.com/hd-porn/95008/Infamous-Tiffany-Teen-Strip-Tease-Video/',
'md5': '39d486f046212d8e1b911c52ab4691f8', 'md5': '39d486f046212d8e1b911c52ab4691f8',
@ -43,7 +43,10 @@ class EpornerIE(InfoExtractor):
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', 'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0',
'only_matching': True, 'only_matching': True,
}, { }, {
'url': 'http://www.eporner.com/hd-porn/3YRUtzMcWn0', 'url': 'http://www.eporner.com/embed/3YRUtzMcWn0',
'only_matching': True,
}, {
'url': 'https://www.eporner.com/video-FJsA19J3Y3H/one-of-the-greats/',
'only_matching': True, 'only_matching': True,
}] }]
@ -57,7 +60,7 @@ class EpornerIE(InfoExtractor):
video_id = self._match_id(urlh.geturl()) video_id = self._match_id(urlh.geturl())
hash = self._search_regex( hash = self._search_regex(
r'hash\s*:\s*["\']([\da-f]{32})', webpage, 'hash') r'hash\s*[:=]\s*["\']([\da-f]{32})', webpage, 'hash')
title = self._og_search_title(webpage, default=None) or self._html_search_regex( title = self._og_search_title(webpage, default=None) or self._html_search_regex(
r'<title>(.+?) - EPORNER', webpage, 'title') r'<title>(.+?) - EPORNER', webpage, 'title')
@ -115,8 +118,8 @@ class EpornerIE(InfoExtractor):
duration = parse_duration(self._html_search_meta( duration = parse_duration(self._html_search_meta(
'duration', webpage, default=None)) 'duration', webpage, default=None))
view_count = str_to_int(self._search_regex( view_count = str_to_int(self._search_regex(
r'id="cinemaviews">\s*([0-9,]+)\s*<small>views', r'id=["\']cinemaviews1["\'][^>]*>\s*([0-9,]+)',
webpage, 'view count', fatal=False)) webpage, 'view count', default=None))
return merge_dicts(json_ld, { return merge_dicts(json_ld, {
'id': video_id, 'id': video_id,

@ -7,6 +7,7 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from .brightcove import BrightcoveNewIE from .brightcove import BrightcoveNewIE
from ..utils import ( from ..utils import (
clean_html,
determine_ext, determine_ext,
extract_attributes, extract_attributes,
get_element_by_class, get_element_by_class,
@ -14,7 +15,6 @@ from ..utils import (
merge_dicts, merge_dicts,
parse_duration, parse_duration,
smuggle_url, smuggle_url,
strip_or_none,
try_get, try_get,
url_or_none, url_or_none,
) )
@ -147,7 +147,7 @@ class ITVIE(InfoExtractor):
'formats': formats, 'formats': formats,
'subtitles': subtitles, 'subtitles': subtitles,
'duration': parse_duration(video_data.get('Duration')), 'duration': parse_duration(video_data.get('Duration')),
'description': strip_or_none(get_element_by_class('episode-info__synopsis', webpage)), 'description': clean_html(get_element_by_class('episode-info__synopsis', webpage)),
}, info) }, info)

@ -8,11 +8,15 @@ from .common import InfoExtractor
from ..compat import ( from ..compat import (
compat_b64decode, compat_b64decode,
compat_HTTPError, compat_HTTPError,
compat_str,
) )
from ..utils import ( from ..utils import (
clean_html,
ExtractorError, ExtractorError,
orderedSet, js_to_json,
unescapeHTML, parse_duration,
try_get,
unified_timestamp,
urlencode_postdata, urlencode_postdata,
urljoin, urljoin,
) )
@ -28,11 +32,15 @@ class LinuxAcademyIE(InfoExtractor):
) )
''' '''
_TESTS = [{ _TESTS = [{
'url': 'https://linuxacademy.com/cp/courses/lesson/course/1498/lesson/2/module/154', 'url': 'https://linuxacademy.com/cp/courses/lesson/course/7971/lesson/2/module/675',
'info_dict': { 'info_dict': {
'id': '1498-2', 'id': '7971-2',
'ext': 'mp4', 'ext': 'mp4',
'title': "Introduction to the Practitioner's Brief", 'title': 'What Is Data Science',
'description': 'md5:c574a3c20607144fb36cb65bdde76c99',
'timestamp': 1607387907,
'upload_date': '20201208',
'duration': 304,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -46,7 +54,8 @@ class LinuxAcademyIE(InfoExtractor):
'info_dict': { 'info_dict': {
'id': '154', 'id': '154',
'title': 'AWS Certified Cloud Practitioner', 'title': 'AWS Certified Cloud Practitioner',
'description': 'md5:039db7e60e4aac9cf43630e0a75fa834', 'description': 'md5:a68a299ca9bb98d41cca5abc4d4ce22c',
'duration': 28835,
}, },
'playlist_count': 41, 'playlist_count': 41,
'skip': 'Requires Linux Academy account credentials', 'skip': 'Requires Linux Academy account credentials',
@ -74,6 +83,7 @@ class LinuxAcademyIE(InfoExtractor):
self._AUTHORIZE_URL, None, 'Downloading authorize page', query={ self._AUTHORIZE_URL, None, 'Downloading authorize page', query={
'client_id': self._CLIENT_ID, 'client_id': self._CLIENT_ID,
'response_type': 'token id_token', 'response_type': 'token id_token',
'response_mode': 'web_message',
'redirect_uri': self._ORIGIN_URL, 'redirect_uri': self._ORIGIN_URL,
'scope': 'openid email user_impersonation profile', 'scope': 'openid email user_impersonation profile',
'audience': self._ORIGIN_URL, 'audience': self._ORIGIN_URL,
@ -129,7 +139,13 @@ class LinuxAcademyIE(InfoExtractor):
access_token = self._search_regex( access_token = self._search_regex(
r'access_token=([^=&]+)', urlh.geturl(), r'access_token=([^=&]+)', urlh.geturl(),
'access token') 'access token', default=None)
if not access_token:
access_token = self._parse_json(
self._search_regex(
r'authorizationResponse\s*=\s*({.+?})\s*;', callback_page,
'authorization response'), None,
transform_source=js_to_json)['response']['access_token']
self._download_webpage( self._download_webpage(
'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s' 'https://linuxacademy.com/cp/login/tokenValidateLogin/token/%s'
@ -144,30 +160,84 @@ class LinuxAcademyIE(InfoExtractor):
# course path # course path
if course_id: if course_id:
entries = [ module = self._parse_json(
self.url_result( self._search_regex(
urljoin(url, lesson_url), ie=LinuxAcademyIE.ie_key()) r'window\.module\s*=\s*({.+?})\s*;', webpage, 'module'),
for lesson_url in orderedSet(re.findall( item_id)
r'<a[^>]+\bhref=["\'](/cp/courses/lesson/course/\d+/lesson/\d+/module/\d+)', entries = []
webpage))] chapter_number = None
title = unescapeHTML(self._html_search_regex( chapter = None
(r'class=["\']course-title["\'][^>]*>(?P<value>[^<]+)', chapter_id = None
r'var\s+title\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), for item in module['items']:
webpage, 'title', default=None, group='value')) if not isinstance(item, dict):
description = unescapeHTML(self._html_search_regex( continue
r'var\s+description\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1',
webpage, 'description', default=None, group='value')) def type_field(key):
return self.playlist_result(entries, course_id, title, description) return (try_get(item, lambda x: x['type'][key], compat_str) or '').lower()
type_fields = (type_field('name'), type_field('slug'))
# Move to next module section
if 'section' in type_fields:
chapter = item.get('course_name')
chapter_id = item.get('course_module')
chapter_number = 1 if not chapter_number else chapter_number + 1
continue
# Skip non-lessons
if 'lesson' not in type_fields:
continue
lesson_url = urljoin(url, item.get('url'))
if not lesson_url:
continue
title = item.get('title') or item.get('lesson_name')
description = item.get('md_desc') or clean_html(item.get('description')) or clean_html(item.get('text'))
entries.append({
'_type': 'url_transparent',
'url': lesson_url,
'ie_key': LinuxAcademyIE.ie_key(),
'title': title,
'description': description,
'timestamp': unified_timestamp(item.get('date')) or unified_timestamp(item.get('created_on')),
'duration': parse_duration(item.get('duration')),
'chapter': chapter,
'chapter_id': chapter_id,
'chapter_number': chapter_number,
})
return {
'_type': 'playlist',
'entries': entries,
'id': course_id,
'title': module.get('title'),
'description': module.get('md_desc') or clean_html(module.get('desc')),
'duration': parse_duration(module.get('duration')),
}
# single video path # single video path
info = self._extract_jwplayer_data( m3u8_url = self._parse_json(
webpage, item_id, require_title=False, m3u8_id='hls',) self._search_regex(
title = self._search_regex( r'player\.playlist\s*=\s*(\[.+?\])\s*;', webpage, 'playlist'),
item_id)[0]['file']
formats = self._extract_m3u8_formats(
m3u8_url, item_id, 'mp4', entry_protocol='m3u8_native',
m3u8_id='hls')
self._sort_formats(formats)
info = {
'id': item_id,
'formats': formats,
}
lesson = self._parse_json(
self._search_regex(
(r'window\.lesson\s*=\s*({.+?})\s*;',
r'player\.lesson\s*=\s*({.+?})\s*;'),
webpage, 'lesson', default='{}'), item_id, fatal=False)
if lesson:
info.update({
'title': lesson.get('lesson_name'),
'description': lesson.get('md_desc') or clean_html(lesson.get('desc')),
'timestamp': unified_timestamp(lesson.get('date')) or unified_timestamp(lesson.get('created_on')),
'duration': parse_duration(lesson.get('duration')),
})
if not info.get('title'):
info['title'] = self._search_regex(
(r'>Lecture\s*:\s*(?P<value>[^<]+)', (r'>Lecture\s*:\s*(?P<value>[^<]+)',
r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage, r'lessonName\s*=\s*(["\'])(?P<value>(?:(?!\1).)+)\1'), webpage,
'title', group='value') 'title', group='value')
info.update({
'id': item_id,
'title': title,
})
return info return info

@ -2,12 +2,16 @@
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_urlparse from ..compat import (
compat_str,
compat_urlparse,
)
from ..utils import ( from ..utils import (
determine_ext, determine_ext,
int_or_none, int_or_none,
parse_duration, parse_duration,
parse_iso8601, parse_iso8601,
url_or_none,
xpath_text, xpath_text,
) )
@ -16,6 +20,8 @@ class MDRIE(InfoExtractor):
IE_DESC = 'MDR.DE and KiKA' IE_DESC = 'MDR.DE and KiKA'
_VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html' _VALID_URL = r'https?://(?:www\.)?(?:mdr|kika)\.de/(?:.*)/[a-z-]+-?(?P<id>\d+)(?:_.+?)?\.html'
_GEO_COUNTRIES = ['DE']
_TESTS = [{ _TESTS = [{
# MDR regularly deletes its videos # MDR regularly deletes its videos
'url': 'http://www.mdr.de/fakt/video189002.html', 'url': 'http://www.mdr.de/fakt/video189002.html',
@ -66,6 +72,22 @@ class MDRIE(InfoExtractor):
'duration': 3239, 'duration': 3239,
'uploader': 'MITTELDEUTSCHER RUNDFUNK', 'uploader': 'MITTELDEUTSCHER RUNDFUNK',
}, },
}, {
# empty bitrateVideo and bitrateAudio
'url': 'https://www.kika.de/filme/sendung128372_zc-572e3f45_zs-1d9fb70e.html',
'info_dict': {
'id': '128372',
'ext': 'mp4',
'title': 'Der kleine Wichtel kehrt zurück',
'description': 'md5:f77fafdff90f7aa1e9dca14f662c052a',
'duration': 4876,
'timestamp': 1607823300,
'upload_date': '20201213',
'uploader': 'ZDF',
},
'params': {
'skip_download': True,
},
}, { }, {
'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html', 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
'only_matching': True, 'only_matching': True,
@ -91,10 +113,13 @@ class MDRIE(InfoExtractor):
title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True) title = xpath_text(doc, ['./title', './broadcast/broadcastName'], 'title', fatal=True)
type_ = xpath_text(doc, './type', default=None)
formats = [] formats = []
processed_urls = [] processed_urls = []
for asset in doc.findall('./assets/asset'): for asset in doc.findall('./assets/asset'):
for source in ( for source in (
'download',
'progressiveDownload', 'progressiveDownload',
'dynamicHttpStreamingRedirector', 'dynamicHttpStreamingRedirector',
'adaptiveHttpStreamingRedirector'): 'adaptiveHttpStreamingRedirector'):
@ -102,63 +127,49 @@ class MDRIE(InfoExtractor):
if url_el is None: if url_el is None:
continue continue
video_url = url_el.text video_url = url_or_none(url_el.text)
if video_url in processed_urls: if not video_url or video_url in processed_urls:
continue continue
processed_urls.append(video_url) processed_urls.append(video_url)
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) ext = determine_ext(video_url)
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
ext = determine_ext(url_el.text)
if ext == 'm3u8': if ext == 'm3u8':
url_formats = self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
video_url, video_id, 'mp4', entry_protocol='m3u8_native', video_url, video_id, 'mp4', entry_protocol='m3u8_native',
preference=0, m3u8_id='HLS', fatal=False) preference=0, m3u8_id='HLS', fatal=False))
elif ext == 'f4m': elif ext == 'f4m':
url_formats = self._extract_f4m_formats( formats.extend(self._extract_f4m_formats(
video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, video_url + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id,
preference=0, f4m_id='HDS', fatal=False) preference=0, f4m_id='HDS', fatal=False))
else: else:
media_type = xpath_text(asset, './mediaType', 'media type', default='MP4') media_type = xpath_text(asset, './mediaType', 'media type', default='MP4')
vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000) vbr = int_or_none(xpath_text(asset, './bitrateVideo', 'vbr'), 1000)
abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000) abr = int_or_none(xpath_text(asset, './bitrateAudio', 'abr'), 1000)
filesize = int_or_none(xpath_text(asset, './fileSize', 'file size')) filesize = int_or_none(xpath_text(asset, './fileSize', 'file size'))
format_id = [media_type]
if vbr or abr:
format_id.append(compat_str(vbr or abr))
f = { f = {
'url': video_url, 'url': video_url,
'format_id': '%s-%d' % (media_type, vbr or abr), 'format_id': '-'.join(format_id),
'filesize': filesize, 'filesize': filesize,
'abr': abr, 'abr': abr,
'preference': 1, 'vbr': vbr,
} }
if vbr: if vbr:
width = int_or_none(xpath_text(asset, './frameWidth', 'width'))
height = int_or_none(xpath_text(asset, './frameHeight', 'height'))
f.update({ f.update({
'vbr': vbr, 'width': int_or_none(xpath_text(asset, './frameWidth', 'width')),
'width': width, 'height': int_or_none(xpath_text(asset, './frameHeight', 'height')),
'height': height,
}) })
url_formats = [f] if type_ == 'audio':
f['vcodec'] = 'none'
if not url_formats:
continue
if not vbr:
for f in url_formats:
abr = f.get('tbr') or abr
if 'tbr' in f:
del f['tbr']
f.update({
'abr': abr,
'vcodec': 'none',
})
formats.extend(url_formats) formats.append(f)
self._sort_formats(formats) self._sort_formats(formats)

@ -83,9 +83,10 @@ class SlidesLiveIE(InfoExtractor):
else: else:
formats = [] formats = []
_MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s' _MANIFEST_PATTERN = 'https://01.cdn.yoda.slideslive.com/%s/master.%s'
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
formats.extend(self._extract_m3u8_formats( formats.extend(self._extract_m3u8_formats(
_MANIFEST_PATTERN % (service_id, 'm3u8'), service_id, 'mp4', _MANIFEST_PATTERN % (service_id, 'm3u8'),
entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) service_id, 'mp4', m3u8_id='hls', fatal=False))
formats.extend(self._extract_mpd_formats( formats.extend(self._extract_mpd_formats(
_MANIFEST_PATTERN % (service_id, 'mpd'), service_id, _MANIFEST_PATTERN % (service_id, 'mpd'), service_id,
mpd_id='dash', fatal=False)) mpd_id='dash', fatal=False))

@ -1,11 +1,20 @@
# coding: utf-8 # coding: utf-8
from __future__ import unicode_literals from __future__ import unicode_literals
from .common import InfoExtractor
from ..utils import urlencode_postdata
import re import re
from .common import InfoExtractor
from ..utils import (
clean_html,
float_or_none,
get_element_by_class,
get_element_by_id,
parse_duration,
str_to_int,
unified_timestamp,
urlencode_postdata,
)
class TwitCastingIE(InfoExtractor): class TwitCastingIE(InfoExtractor):
_VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)' _VALID_URL = r'https?://(?:[^/]+\.)?twitcasting\.tv/(?P<uploader_id>[^/]+)/movie/(?P<id>\d+)'
@ -17,8 +26,12 @@ class TwitCastingIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Live #2357609', 'title': 'Live #2357609',
'uploader_id': 'ivetesangalo', 'uploader_id': 'ivetesangalo',
'description': "Moi! I'm live on TwitCasting from my iPhone.", 'description': 'Twitter Oficial da cantora brasileira Ivete Sangalo.',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20110822',
'timestamp': 1314010824,
'duration': 32,
'view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -30,8 +43,12 @@ class TwitCastingIE(InfoExtractor):
'ext': 'mp4', 'ext': 'mp4',
'title': 'Live playing something #3689740', 'title': 'Live playing something #3689740',
'uploader_id': 'mttbernardini', 'uploader_id': 'mttbernardini',
'description': "I'm live on TwitCasting from my iPad. password: abc (Santa Marinella/Lazio, Italia)", 'description': 'Salve, io sono Matto (ma con la e). Questa è la mia presentazione, in quanto sono letteralmente matto (nel senso di strano), con qualcosa in più.',
'thumbnail': r're:^https?://.*\.jpg$', 'thumbnail': r're:^https?://.*\.jpg$',
'upload_date': '20120212',
'timestamp': 1329028024,
'duration': 681,
'view_count': int,
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
@ -40,9 +57,7 @@ class TwitCastingIE(InfoExtractor):
}] }]
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) uploader_id, video_id = re.match(self._VALID_URL, url).groups()
video_id = mobj.group('id')
uploader_id = mobj.group('uploader_id')
video_password = self._downloader.params.get('videopassword') video_password = self._downloader.params.get('videopassword')
request_data = None request_data = None
@ -52,30 +67,45 @@ class TwitCastingIE(InfoExtractor):
}) })
webpage = self._download_webpage(url, video_id, data=request_data) webpage = self._download_webpage(url, video_id, data=request_data)
title = self._html_search_regex( title = clean_html(get_element_by_id(
r'(?s)<[^>]+id=["\']movietitle[^>]+>(.+?)</', 'movietitle', webpage)) or self._html_search_meta(
webpage, 'title', default=None) or self._html_search_meta( ['og:title', 'twitter:title'], webpage, fatal=True)
'twitter:title', webpage, fatal=True)
video_js_data = {}
m3u8_url = self._search_regex( m3u8_url = self._search_regex(
(r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1', r'data-movie-url=(["\'])(?P<url>(?:(?!\1).)+)\1',
r'(["\'])(?P<url>http.+?\.m3u8.*?)\1'), webpage, 'm3u8 url', group='url', default=None)
webpage, 'm3u8 url', group='url') if not m3u8_url:
video_js_data = self._parse_json(self._search_regex(
r"data-movie-playlist='(\[[^']+\])'",
webpage, 'movie playlist'), video_id)[0]
m3u8_url = video_js_data['source']['url']
# use `m3u8` entry_protocol until EXT-X-MAP is properly supported by `m3u8_native` entry_protocol
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_url, video_id, 'mp4', m3u8_id='hls')
m3u8_id='hls')
thumbnail = self._og_search_thumbnail(webpage) thumbnail = video_js_data.get('thumbnailUrl') or self._og_search_thumbnail(webpage)
description = self._og_search_description( description = clean_html(get_element_by_id(
webpage, default=None) or self._html_search_meta( 'authorcomment', webpage)) or self._html_search_meta(
'twitter:description', webpage) ['description', 'og:description', 'twitter:description'], webpage)
duration = float_or_none(video_js_data.get(
'duration'), 1000) or parse_duration(clean_html(
get_element_by_class('tw-player-duration-time', webpage)))
view_count = str_to_int(self._search_regex(
r'Total\s*:\s*([\d,]+)\s*Views', webpage, 'views', None))
timestamp = unified_timestamp(self._search_regex(
r'data-toggle="true"[^>]+datetime="([^"]+)"',
webpage, 'datetime', None))
return { return {
'id': video_id, 'id': video_id,
'title': title, 'title': title,
'description': description, 'description': description,
'thumbnail': thumbnail, 'thumbnail': thumbnail,
'timestamp': timestamp,
'uploader_id': uploader_id, 'uploader_id': uploader_id,
'duration': duration,
'view_count': view_count,
'formats': formats, 'formats': formats,
} }

@ -155,6 +155,7 @@ class VLiveIE(VLiveBaseIE):
'old/v3/live/%s/playInfo', 'old/v3/live/%s/playInfo',
video_id)['result']['adaptiveStreamUrl'] video_id)['result']['adaptiveStreamUrl']
formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4') formats = self._extract_m3u8_formats(stream_url, video_id, 'mp4')
self._sort_formats(formats)
info = get_common_fields() info = get_common_fields()
info.update({ info.update({
'title': self._live_title(video['title']), 'title': self._live_title(video['title']),

@ -260,6 +260,14 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
}, },
'playlist_count': 33, 'playlist_count': 33,
# 'skip': 'Travis CI servers blocked by YandexMusic', # 'skip': 'Travis CI servers blocked by YandexMusic',
}, {
# empty artists
'url': 'https://music.yandex.ru/album/9091882',
'info_dict': {
'id': '9091882',
'title': 'ТЕД на русском',
},
'playlist_count': 187,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
@ -273,7 +281,10 @@ class YandexMusicAlbumIE(YandexMusicPlaylistBaseIE):
entries = self._build_playlist([track for volume in album['volumes'] for track in volume]) entries = self._build_playlist([track for volume in album['volumes'] for track in volume])
title = '%s - %s' % (album['artists'][0]['name'], album['title']) title = album['title']
artist = try_get(album, lambda x: x['artists'][0]['name'], compat_str)
if artist:
title = '%s - %s' % (artist, title)
year = album.get('year') year = album.get('year')
if year: if year:
title += ' (%s)' % year title += ' (%s)' % year

@ -343,10 +343,18 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
(?:www\.)?invidious\.kabi\.tk/| (?:www\.)?invidious\.kabi\.tk/|
(?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.13ad\.de/|
(?:www\.)?invidious\.mastodon\.host/| (?:www\.)?invidious\.mastodon\.host/|
(?:www\.)?invidious\.zapashcanon\.fr/|
(?:www\.)?invidious\.kavin\.rocks/|
(?:www\.)?invidious\.tube/|
(?:www\.)?invidiou\.site/|
(?:www\.)?invidious\.site/|
(?:www\.)?invidious\.xyz/|
(?:www\.)?invidious\.nixnet\.xyz/| (?:www\.)?invidious\.nixnet\.xyz/|
(?:www\.)?invidious\.drycat\.fr/| (?:www\.)?invidious\.drycat\.fr/|
(?:www\.)?tube\.poal\.co/| (?:www\.)?tube\.poal\.co/|
(?:www\.)?tube\.connect\.cafe/|
(?:www\.)?vid\.wxzm\.sx/| (?:www\.)?vid\.wxzm\.sx/|
(?:www\.)?vid\.mint\.lgbt/|
(?:www\.)?yewtu\.be/| (?:www\.)?yewtu\.be/|
(?:www\.)?yt\.elukerio\.org/| (?:www\.)?yt\.elukerio\.org/|
(?:www\.)?yt\.lelux\.fi/| (?:www\.)?yt\.lelux\.fi/|

Loading…
Cancel
Save