Update to 2020.12.29

pull/280/head
pukkandan 5 years ago
parent 8bff4f84b5
commit 262db5f1ce

@ -322,7 +322,6 @@
- **Funk** - **Funk**
- **Fusion** - **Fusion**
- **Fux** - **Fux**
- **FXNetworks**
- **Gaia** - **Gaia**
- **GameInformer** - **GameInformer**
- **GameSpot** - **GameSpot**

@ -80,7 +80,8 @@ class AMCNetworksIE(ThePlatformIE):
title = theplatform_metadata['title'] title = theplatform_metadata['title']
rating = try_get( rating = try_get(
theplatform_metadata, lambda x: x['ratings'][0]['rating']) theplatform_metadata, lambda x: x['ratings'][0]['rating'])
if properties.get('videoCategory') == 'TVE-Auth': video_category = properties.get('videoCategory')
if video_category and video_category.endswith('-Auth'):
resource = self._get_mvpd_resource( resource = self._get_mvpd_resource(
requestor_id, title, video_id, rating) requestor_id, title, video_id, rating)
query['auth'] = self._extract_mvpd_auth( query['auth'] = self._extract_mvpd_auth(

@ -3,6 +3,7 @@ from __future__ import unicode_literals
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
get_element_by_id,
int_or_none, int_or_none,
merge_dicts, merge_dicts,
mimetype2ext, mimetype2ext,
@ -39,23 +40,15 @@ class AparatIE(InfoExtractor):
webpage = self._download_webpage(url, video_id, fatal=False) webpage = self._download_webpage(url, video_id, fatal=False)
if not webpage: if not webpage:
# Note: There is an easier-to-parse configuration at
# http://www.aparat.com/video/video/config/videohash/%video_id
# but the URL in there does not work
webpage = self._download_webpage( webpage = self._download_webpage(
'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id,
video_id) video_id)
options = self._parse_json( options = self._parse_json(self._search_regex(
self._search_regex( r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id)
r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P<value>(?:(?!\1).)+)\1\s*\)',
webpage, 'options', group='value'),
video_id)
player = options['plugins']['sabaPlayerPlugin']
formats = [] formats = []
for sources in player['multiSRC']: for sources in (options.get('multiSRC') or []):
for item in sources: for item in sources:
if not isinstance(item, dict): if not isinstance(item, dict):
continue continue
@ -85,11 +78,12 @@ class AparatIE(InfoExtractor):
info = self._search_json_ld(webpage, video_id, default={}) info = self._search_json_ld(webpage, video_id, default={})
if not info.get('title'): if not info.get('title'):
info['title'] = player['title'] info['title'] = get_element_by_id('videoTitle', webpage) or \
self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True)
return merge_dicts(info, { return merge_dicts(info, {
'id': video_id, 'id': video_id,
'thumbnail': url_or_none(options.get('poster')), 'thumbnail': url_or_none(options.get('poster')),
'duration': int_or_none(player.get('duration')), 'duration': int_or_none(options.get('duration')),
'formats': formats, 'formats': formats,
}) })

@ -543,15 +543,6 @@ class BrightcoveNewIE(AdobePassIE):
if sources_num == key_systems_present: if sources_num == key_systems_present:
raise ExtractorError('This video is DRM protected', expected=True) raise ExtractorError('This video is DRM protected', expected=True)
if not formats:
# for sonyliv.com DRM protected videos
s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl')
if s3_source_url:
formats.append({
'url': s3_source_url,
'format_id': 'source',
})
errors = json_data.get('errors') errors = json_data.get('errors')
if not formats and errors: if not formats and errors:
error = errors[0] error = errors[0]

@ -415,7 +415,6 @@ from .fujitv import FujiTVFODPlus7IE
from .funimation import FunimationIE from .funimation import FunimationIE
from .funk import FunkIE from .funk import FunkIE
from .fusion import FusionIE from .fusion import FusionIE
from .fxnetworks import FXNetworksIE
from .gaia import GaiaIE from .gaia import GaiaIE
from .gameinformer import GameInformerIE from .gameinformer import GameInformerIE
from .gamespot import GameSpotIE from .gamespot import GameSpotIE

@ -64,7 +64,10 @@ from .tube8 import Tube8IE
from .mofosex import MofosexEmbedIE from .mofosex import MofosexEmbedIE
from .spankwire import SpankwireIE from .spankwire import SpankwireIE
from .youporn import YouPornIE from .youporn import YouPornIE
from .vimeo import VimeoIE from .vimeo import (
VimeoIE,
VHXEmbedIE,
)
from .dailymotion import DailymotionIE from .dailymotion import DailymotionIE
from .dailymail import DailyMailIE from .dailymail import DailyMailIE
from .onionstudios import OnionStudiosIE from .onionstudios import OnionStudiosIE
@ -2191,7 +2194,18 @@ class GenericIE(InfoExtractor):
# 'params': { # 'params': {
# 'force_generic_extractor': True, # 'force_generic_extractor': True,
# }, # },
# } # },
{
# VHX Embed
'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy',
'info_dict': {
'id': '858208',
'ext': 'mp4',
'title': 'Untitled',
'uploader_id': 'user80538407',
'uploader': 'OTT Videos',
},
},
] ]
def report_following_redirect(self, new_url): def report_following_redirect(self, new_url):
@ -2569,6 +2583,10 @@ class GenericIE(InfoExtractor):
if vimeo_urls: if vimeo_urls:
return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()) return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key())
vhx_url = VHXEmbedIE._extract_url(webpage)
if vhx_url:
return self.url_result(vhx_url, VHXEmbedIE.ie_key())
vid_me_embed_url = self._search_regex( vid_me_embed_url = self._search_regex(
r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]',
webpage, 'vid.me embed', default=None) webpage, 'vid.me embed', default=None)

@ -38,13 +38,17 @@ class GoIE(AdobePassIE):
'disneynow': { 'disneynow': {
'brand': '011', 'brand': '011',
'resource_id': 'Disney', 'resource_id': 'Disney',
} },
'fxnow.fxnetworks': {
'brand': '025',
'requestor_id': 'dtci',
},
} }
_VALID_URL = r'''(?x) _VALID_URL = r'''(?x)
https?:// https?://
(?: (?:
(?:(?P<sub_domain>%s)\.)?go| (?:(?P<sub_domain>%s)\.)?go|
(?P<sub_domain_2>abc|freeform|disneynow) (?P<sub_domain_2>abc|freeform|disneynow|fxnow\.fxnetworks)
)\.com/ )\.com/
(?: (?:
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)| (?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
@ -99,6 +103,19 @@ class GoIE(AdobePassIE):
# m3u8 download # m3u8 download
'skip_download': True, 'skip_download': True,
}, },
}, {
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
'info_dict': {
'id': 'VDKA12782841',
'ext': 'mp4',
'title': 'First Look: Better Things - Season 2',
'description': 'md5:fa73584a95761c605d9d54904e35b407',
},
'params': {
'geo_bypass_ip_block': '3.244.239.0/24',
# m3u8 download
'skip_download': True,
},
}, { }, {
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
'only_matching': True, 'only_matching': True,

@ -2,15 +2,14 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import json import json
from .common import InfoExtractor from .telecinco import TelecincoIE
from ..utils import ( from ..utils import (
int_or_none, int_or_none,
parse_iso8601, parse_iso8601,
smuggle_url,
) )
class MiTeleIE(InfoExtractor): class MiTeleIE(TelecincoIE):
IE_DESC = 'mitele.es' IE_DESC = 'mitele.es'
_VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P<id>[^/]+)/player'
@ -53,7 +52,7 @@ class MiTeleIE(InfoExtractor):
}, },
'params': { 'params': {
'skip_download': True, 'skip_download': True,
} },
}, { }, {
'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player',
'only_matching': True, 'only_matching': True,
@ -69,13 +68,11 @@ class MiTeleIE(InfoExtractor):
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})', r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})',
webpage, 'Pre Player'), display_id)['prePlayer'] webpage, 'Pre Player'), display_id)['prePlayer']
title = pre_player['title'] title = pre_player['title']
video = pre_player['video'] video_info = self._parse_content(pre_player['video'], url)
video_id = video['dataMediaId']
content = pre_player.get('content') or {} content = pre_player.get('content') or {}
info = content.get('info') or {} info = content.get('info') or {}
info = { video_info.update({
'id': video_id,
'title': title, 'title': title,
'description': info.get('synopsis'), 'description': info.get('synopsis'),
'series': content.get('title'), 'series': content.get('title'),
@ -83,38 +80,7 @@ class MiTeleIE(InfoExtractor):
'episode': content.get('subtitle'), 'episode': content.get('subtitle'),
'episode_number': int_or_none(info.get('episode_number')), 'episode_number': int_or_none(info.get('episode_number')),
'duration': int_or_none(info.get('duration')), 'duration': int_or_none(info.get('duration')),
'thumbnail': video.get('dataPoster'),
'age_limit': int_or_none(info.get('rating')), 'age_limit': int_or_none(info.get('rating')),
'timestamp': parse_iso8601(pre_player.get('publishedTime')), 'timestamp': parse_iso8601(pre_player.get('publishedTime')),
} })
return video_info
if video.get('dataCmsId') == 'ooyala':
info.update({
'_type': 'url_transparent',
# for some reason only HLS is supported
'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}),
})
else:
config = self._download_json(
video['dataConfig'], video_id, 'Downloading config JSON')
services = config['services']
gbx = self._download_json(
services['gbx'], video_id, 'Downloading gbx JSON')
caronte = self._download_json(
services['caronte'], video_id, 'Downloading caronte JSON')
cerbero = self._download_json(
caronte['cerbero'], video_id, 'Downloading cerbero JSON',
headers={
'Content-Type': 'application/json;charset=UTF-8',
'Origin': 'https://www.mitele.es'
},
data=json.dumps({
'bbx': caronte['bbx'],
'gbx': gbx['gbx']
}).encode('utf-8'))
formats = self._extract_m3u8_formats(
caronte['dls'][0]['stream'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls',
query=dict([cerbero['tokens']['1']['cdn'].split('=', 1)]))
info['formats'] = formats
return info

@ -90,7 +90,7 @@ class NhkVodIE(NhkBaseIE):
_TESTS = [{ _TESTS = [{
# video clip # video clip
'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/',
'md5': '256a1be14f48d960a7e61e2532d95ec3', 'md5': '7a90abcfe610ec22a6bfe15bd46b30ca',
'info_dict': { 'info_dict': {
'id': 'a95j5iza', 'id': 'a95j5iza',
'ext': 'mp4', 'ext': 'mp4',

@ -6,16 +6,33 @@ import re
from .common import InfoExtractor from .common import InfoExtractor
from ..compat import compat_str from ..compat import compat_str
from ..utils import ( from ..utils import (
ExtractorError,
dict_get, dict_get,
ExtractorError,
int_or_none, int_or_none,
unescapeHTML,
parse_iso8601, parse_iso8601,
try_get,
unescapeHTML,
) )
class PikselIE(InfoExtractor): class PikselIE(InfoExtractor):
_VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P<id>[a-z0-9_]+)' _VALID_URL = r'''(?x)https?://
(?:
(?:
player\.
(?:
olympusattelecom|
vibebyvista
)|
(?:api|player)\.multicastmedia|
(?:api-ovp|player)\.piksel
)\.com|
(?:
mz-edge\.stream\.co|
movie-s\.nhk\.or
)\.jp|
vidego\.baltimorecity\.gov
)/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)'''
_TESTS = [ _TESTS = [
{ {
'url': 'http://player.piksel.com/v/ums2867l', 'url': 'http://player.piksel.com/v/ums2867l',
@ -56,46 +73,41 @@ class PikselIE(InfoExtractor):
if mobj: if mobj:
return mobj.group('url') return mobj.group('url')
def _call_api(self, app_token, resource, display_id, query, fatal=True):
response = (self._download_json(
'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token),
display_id, query=query, fatal=fatal) or {}).get('response')
failure = try_get(response, lambda x: x['failure']['reason'])
if failure:
if fatal:
raise ExtractorError(failure, expected=True)
self.report_warning(failure)
return response
def _real_extract(self, url): def _real_extract(self, url):
display_id = self._match_id(url) ref_id, display_id = re.match(self._VALID_URL, url).groups()
webpage = self._download_webpage(url, display_id) webpage = self._download_webpage(url, display_id)
video_id = self._search_regex(
r'data-de-program-uuid=[\'"]([a-z0-9]+)',
webpage, 'program uuid', default=display_id)
app_token = self._search_regex([ app_token = self._search_regex([
r'clientAPI\s*:\s*"([^"]+)"', r'clientAPI\s*:\s*"([^"]+)"',
r'data-de-api-key\s*=\s*"([^"]+)"' r'data-de-api-key\s*=\s*"([^"]+)"'
], webpage, 'app token') ], webpage, 'app token')
response = self._download_json( query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id}
'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, program = self._call_api(
video_id, query={ app_token, 'program', display_id, query)['WsProgramResponse']['program']
'v': video_id video_id = program['uuid']
})['response'] video_data = program['asset']
failure = response.get('failure')
if failure:
raise ExtractorError(response['failure']['reason'], expected=True)
video_data = response['WsProgramResponse']['program']['asset']
title = video_data['title'] title = video_data['title']
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
formats = [] formats = []
m3u8_url = dict_get(video_data, [ def process_asset_file(asset_file):
'm3u8iPadURL', if not asset_file:
'ipadM3u8Url', return
'm3u8AndroidURL',
'm3u8iPhoneURL',
'iphoneM3u8Url'])
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
asset_type = dict_get(video_data, ['assetType', 'asset_type'])
for asset_file in video_data.get('assetFiles', []):
# TODO: extract rtmp formats # TODO: extract rtmp formats
http_url = asset_file.get('http_url') http_url = asset_file.get('http_url')
if not http_url: if not http_url:
continue return
tbr = None tbr = None
vbr = int_or_none(asset_file.get('videoBitrate'), 1024) vbr = int_or_none(asset_file.get('videoBitrate'), 1024)
abr = int_or_none(asset_file.get('audioBitrate'), 1024) abr = int_or_none(asset_file.get('audioBitrate'), 1024)
@ -118,6 +130,43 @@ class PikselIE(InfoExtractor):
'filesize': int_or_none(asset_file.get('filesize')), 'filesize': int_or_none(asset_file.get('filesize')),
'tbr': tbr, 'tbr': tbr,
}) })
def process_asset_files(asset_files):
for asset_file in (asset_files or []):
process_asset_file(asset_file)
process_asset_files(video_data.get('assetFiles'))
process_asset_file(video_data.get('referenceFile'))
if not formats:
asset_id = video_data.get('assetid') or program.get('assetid')
if asset_id:
process_asset_files(try_get(self._call_api(
app_token, 'asset_file', display_id, {
'assetid': asset_id,
}, False), lambda x: x['WsAssetFileResponse']['AssetFiles']))
m3u8_url = dict_get(video_data, [
'm3u8iPadURL',
'ipadM3u8Url',
'm3u8AndroidURL',
'm3u8iPhoneURL',
'iphoneM3u8Url'])
if m3u8_url:
formats.extend(self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil'])
if smil_url:
transform_source = None
if ref_id == 'nhkworld':
# TODO: figure out if this is something to be fixed in urljoin,
# _parse_smil_formats or keep it here
transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"')
formats.extend(self._extract_smil_formats(
re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id,
transform_source=transform_source, fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
subtitles = {} subtitles = {}

@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE):
@staticmethod @staticmethod
def _is_teachable(webpage): def _is_teachable(webpage):
return 'teachableTracker.linker:autoLink' in webpage and re.search( return 'teachableTracker.linker:autoLink' in webpage and re.search(
r'<link[^>]+href=["\']https?://process\.fs\.teachablecdn\.com', r'<link[^>]+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com',
webpage) webpage)
@staticmethod @staticmethod

@ -5,14 +5,11 @@ import json
import re import re
from .common import InfoExtractor from .common import InfoExtractor
from .ooyala import OoyalaIE
from ..utils import ( from ..utils import (
clean_html, clean_html,
determine_ext,
int_or_none, int_or_none,
str_or_none, str_or_none,
try_get, try_get,
urljoin,
) )
@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor):
'description': 'md5:716caf5601e25c3c5ab6605b1ae71529', 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529',
}, },
'playlist': [{ 'playlist': [{
'md5': 'adb28c37238b675dad0f042292f209a7', 'md5': '7ee56d665cfd241c0e6d80fd175068b0',
'info_dict': { 'info_dict': {
'id': 'JEA5ijCnF6p5W08A1rNKn7', 'id': 'JEA5ijCnF6p5W08A1rNKn7',
'ext': 'mp4', 'ext': 'mp4',
@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor):
}] }]
}, { }, {
'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html',
'md5': '9468140ebc300fbb8b9d65dc6e5c4b43', 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a',
'info_dict': { 'info_dict': {
'id': 'jn24Od1zGLG4XUZcnUnZB6', 'id': 'jn24Od1zGLG4XUZcnUnZB6',
'ext': 'mp4', 'ext': 'mp4',
@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor):
}, },
}, { }, {
'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html',
'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6', 'md5': 'eddb50291df704ce23c74821b995bcac',
'info_dict': { 'info_dict': {
'id': 'aywerkD2Sv1vGNqq9b85Q2', 'id': 'aywerkD2Sv1vGNqq9b85Q2',
'ext': 'mp4', 'ext': 'mp4',
@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor):
def _parse_content(self, content, url): def _parse_content(self, content, url):
video_id = content['dataMediaId'] video_id = content['dataMediaId']
if content.get('dataCmsId') == 'ooyala':
return self.url_result(
'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id)
config_url = urljoin(url, content['dataConfig'])
config = self._download_json( config = self._download_json(
config_url, video_id, 'Downloading config JSON') content['dataConfig'], video_id, 'Downloading config JSON')
title = config['info']['title'] title = config['info']['title']
services = config['services']
def mmc_url(mmc_type): caronte = self._download_json(services['caronte'], video_id)
return re.sub( stream = caronte['dls'][0]['stream']
r'/(?:flash|html5)\.json', '/%s.json' % mmc_type, headers = self.geo_verification_headers()
config['services']['mmc']) headers.update({
'Content-Type': 'application/json;charset=UTF-8',
duration = None 'Origin': re.match(r'https?://[^/]+', url).group(0),
formats = [] })
for mmc_type in ('flash', 'html5'): cdn = self._download_json(
mmc = self._download_json( caronte['cerbero'], video_id, data=json.dumps({
mmc_url(mmc_type), video_id, 'bbx': caronte['bbx'],
'Downloading %s mmc JSON' % mmc_type, fatal=False) 'gbx': self._download_json(services['gbx'], video_id)['gbx'],
if not mmc: }).encode(), headers=headers)['tokens']['1']['cdn']
continue formats = self._extract_m3u8_formats(
if not duration: stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
duration = int_or_none(mmc.get('duration'))
for location in mmc['locations']:
gat = self._proto_relative_url(location.get('gat'), 'http:')
gcp = location.get('gcp')
ogn = location.get('ogn')
if None in (gat, gcp, ogn):
continue
token_data = {
'gcp': gcp,
'ogn': ogn,
'sta': 0,
}
media = self._download_json(
gat, video_id, data=json.dumps(token_data).encode('utf-8'),
headers={
'Content-Type': 'application/json;charset=utf-8',
'Referer': url,
}, fatal=False) or {}
stream = media.get('stream') or media.get('file')
if not stream:
continue
ext = determine_ext(stream)
if ext == 'f4m':
formats.extend(self._extract_f4m_formats(
stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18',
video_id, f4m_id='hds', fatal=False))
elif ext == 'm3u8':
formats.extend(self._extract_m3u8_formats(
stream, video_id, 'mp4', 'm3u8_native',
m3u8_id='hls', fatal=False))
self._sort_formats(formats) self._sort_formats(formats)
return { return {
@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor):
'title': title, 'title': title,
'formats': formats, 'formats': formats,
'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'), 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'),
'duration': duration, 'duration': int_or_none(content.get('dataDuration')),
} }
def _real_extract(self, url): def _real_extract(self, url):

@ -200,7 +200,7 @@ class ToggleIE(InfoExtractor):
class MeWatchIE(InfoExtractor): class MeWatchIE(InfoExtractor):
IE_NAME = 'mewatch' IE_NAME = 'mewatch'
_VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)' _VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P<id>[0-9]+)'
_TESTS = [{ _TESTS = [{
'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371', 'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371',
'info_dict': { 'info_dict': {
@ -220,6 +220,9 @@ class MeWatchIE(InfoExtractor):
}, { }, {
'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232', 'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232',
'only_matching': True, 'only_matching': True,
}, {
'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):

@ -1119,6 +1119,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
IE_NAME = 'vhx:embed' IE_NAME = 'vhx:embed'
_VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P<id>\d+)'
@staticmethod
def _extract_url(webpage):
mobj = re.search(
r'<iframe[^>]+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage)
return unescapeHTML(mobj.group(1)) if mobj else None
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
webpage = self._download_webpage(url, video_id) webpage = self._download_webpage(url, video_id)
@ -1127,5 +1133,6 @@ class VHXEmbedIE(VimeoBaseInfoExtractor):
'ott data'), video_id, js_to_json)['config_url'] 'ott data'), video_id, js_to_json)['config_url']
config = self._download_json(config_url, video_id) config = self._download_json(config_url, video_id)
info = self._parse_config(config, video_id) info = self._parse_config(config, video_id)
info['id'] = video_id
self._vimeo_sort_formats(info['formats']) self._vimeo_sort_formats(info['formats'])
return info return info

@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter
from ..swfinterp import SWFInterpreter from ..swfinterp import SWFInterpreter
from ..compat import ( from ..compat import (
compat_chr, compat_chr,
compat_HTTPError,
compat_kwargs, compat_kwargs,
compat_parse_qs, compat_parse_qs,
compat_urllib_parse_unquote, compat_urllib_parse_unquote,
@ -64,7 +65,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
_RESERVED_NAMES = ( _RESERVED_NAMES = (
r'embed|e|channel|c|user|playlist|watch|w|v|results|shared|' r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|'
r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|' r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|'
r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)') r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)')
@ -303,6 +304,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
_YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;'
_YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;'
_YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|</script|\n)'
def _call_api(self, ep, query, video_id): def _call_api(self, ep, query, video_id):
data = self._DEFAULT_API_DATA.copy() data = self._DEFAULT_API_DATA.copy()
@ -320,7 +322,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
def _extract_yt_initial_data(self, video_id, webpage): def _extract_yt_initial_data(self, video_id, webpage):
return self._parse_json( return self._parse_json(
self._search_regex( self._search_regex(
(r'%s\s*\n' % self._YT_INITIAL_DATA_RE, (r'%s\s*%s' % (self._YT_INITIAL_DATA_RE, self._YT_INITIAL_BOUNDARY_RE),
self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'), self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
video_id) video_id)
@ -345,7 +347,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
# Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances # Invidious instances taken from https://github.com/omarroth/invidious/wiki/Invidious-Instances
(?:(?:www|dev)\.)?invidio\.us/| (?:(?:www|dev)\.)?invidio\.us/|
(?:(?:www|no)\.)?invidiou\.sh/| (?:(?:www|no)\.)?invidiou\.sh/|
(?:(?:www|fi|de)\.)?invidious\.snopyta\.org/| (?:(?:www|fi)\.)?invidious\.snopyta\.org/|
(?:www\.)?invidious\.kabi\.tk/| (?:www\.)?invidious\.kabi\.tk/|
(?:www\.)?invidious\.13ad\.de/| (?:www\.)?invidious\.13ad\.de/|
(?:www\.)?invidious\.mastodon\.host/| (?:www\.)?invidious\.mastodon\.host/|
@ -1120,6 +1122,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
'skip_download': True, 'skip_download': True,
}, },
}, },
{
# another example of '};' in ytInitialData
'url': 'https://www.youtube.com/watch?v=gVfgbahppCY',
'only_matching': True,
},
{
'url': 'https://www.youtube.com/watch_popup?v=63RmMXCd_bQ',
'only_matching': True,
},
] ]
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -1779,7 +1790,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
if not video_info and not player_response: if not video_info and not player_response:
player_response = extract_player_response( player_response = extract_player_response(
self._search_regex( self._search_regex(
(r'%s\s*(?:var\s+meta|</script|\n)' % self._YT_INITIAL_PLAYER_RESPONSE_RE, (r'%s\s*%s' % (self._YT_INITIAL_PLAYER_RESPONSE_RE, self._YT_INITIAL_BOUNDARY_RE),
self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE), video_webpage,
'initial player response', default='{}'), 'initial player response', default='{}'),
video_id) video_id)
@ -2830,6 +2841,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
'only_matching': True, 'only_matching': True,
}] }]
@classmethod
def suitable(cls, url):
return False if YoutubeIE.suitable(url) else super(
YoutubeTabIE, cls).suitable(url)
def _extract_channel_id(self, webpage): def _extract_channel_id(self, webpage):
channel_id = self._html_search_meta( channel_id = self._html_search_meta(
'channelId', webpage, 'channel id', default=None) 'channelId', webpage, 'channel id', default=None)
@ -3143,10 +3159,24 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
for page_num in itertools.count(1): for page_num in itertools.count(1):
if not continuation: if not continuation:
break break
browse = self._download_json( count = 0
'https://www.youtube.com/browse_ajax', None, retries = 3
'Downloading page %d' % page_num, while count <= retries:
headers=headers, query=continuation, fatal=False) try:
# Downloading page may result in intermittent 5xx HTTP error
# that is usually worked around with a retry
browse = self._download_json(
'https://www.youtube.com/browse_ajax', None,
'Downloading page %d%s'
% (page_num, ' (retry #%d)' % count if count else ''),
headers=headers, query=continuation)
break
except ExtractorError as e:
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
count += 1
if count <= retries:
continue
raise
if not browse: if not browse:
break break
response = try_get(browse, lambda x: x[1]['response'], dict) response = try_get(browse, lambda x: x[1]['response'], dict)

@ -85,7 +85,13 @@ class ZypeIE(InfoExtractor):
else: else:
m3u8_url = self._search_regex( m3u8_url = self._search_regex(
r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', r'(["\'])(?P<url>(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1',
body, 'm3u8 url', group='url') body, 'm3u8 url', group='url', default=None)
if not m3u8_url:
source = self._parse_json(self._search_regex(
r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body,
'source'), video_id, js_to_json)
if source.get('integration') == 'verizon-media':
m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id']
formats = self._extract_m3u8_formats( formats = self._extract_m3u8_formats(
m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')
text_tracks = self._search_regex( text_tracks = self._search_regex(

Loading…
Cancel
Save