From 262db5f1ce586119752e659b8f426564b57108ed Mon Sep 17 00:00:00 2001 From: pukkandan Date: Tue, 29 Dec 2020 11:37:57 +0530 Subject: [PATCH] Update to 2020.12.29 --- docs/supportedsites.md | 1 - youtube_dlc/extractor/amcnetworks.py | 3 +- youtube_dlc/extractor/aparat.py | 20 ++--- youtube_dlc/extractor/brightcove.py | 9 --- youtube_dlc/extractor/extractors.py | 1 - youtube_dlc/extractor/generic.py | 22 +++++- youtube_dlc/extractor/go.py | 21 +++++- youtube_dlc/extractor/mitele.py | 48 ++---------- youtube_dlc/extractor/nhk.py | 2 +- youtube_dlc/extractor/piksel.py | 109 +++++++++++++++++++-------- youtube_dlc/extractor/teachable.py | 2 +- youtube_dlc/extractor/telecinco.py | 77 +++++-------------- youtube_dlc/extractor/toggle.py | 5 +- youtube_dlc/extractor/vimeo.py | 7 ++ youtube_dlc/extractor/youtube.py | 46 +++++++++-- youtube_dlc/extractor/zype.py | 8 +- 16 files changed, 212 insertions(+), 169 deletions(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 45ee65728..651ad0428 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -322,7 +322,6 @@ - **Funk** - **Fusion** - **Fux** - - **FXNetworks** - **Gaia** - **GameInformer** - **GameSpot** diff --git a/youtube_dlc/extractor/amcnetworks.py b/youtube_dlc/extractor/amcnetworks.py index 12b6de0bf..b8027bbca 100644 --- a/youtube_dlc/extractor/amcnetworks.py +++ b/youtube_dlc/extractor/amcnetworks.py @@ -80,7 +80,8 @@ class AMCNetworksIE(ThePlatformIE): title = theplatform_metadata['title'] rating = try_get( theplatform_metadata, lambda x: x['ratings'][0]['rating']) - if properties.get('videoCategory') == 'TVE-Auth': + video_category = properties.get('videoCategory') + if video_category and video_category.endswith('-Auth'): resource = self._get_mvpd_resource( requestor_id, title, video_id, rating) query['auth'] = self._extract_mvpd_auth( diff --git a/youtube_dlc/extractor/aparat.py b/youtube_dlc/extractor/aparat.py index 883dcee7a..a9527e785 100644 --- a/youtube_dlc/extractor/aparat.py +++ b/youtube_dlc/extractor/aparat.py @@ -3,6 +3,7 @@ from __future__ import unicode_literals from .common import InfoExtractor from ..utils import ( + get_element_by_id, int_or_none, merge_dicts, mimetype2ext, @@ -39,23 +40,15 @@ class AparatIE(InfoExtractor): webpage = self._download_webpage(url, video_id, fatal=False) if not webpage: - # Note: There is an easier-to-parse configuration at - # http://www.aparat.com/video/video/config/videohash/%video_id - # but the URL in there does not work webpage = self._download_webpage( 'http://www.aparat.com/video/video/embed/vt/frame/showvideo/yes/videohash/' + video_id, video_id) - options = self._parse_json( - self._search_regex( - r'options\s*=\s*JSON\.parse\(\s*(["\'])(?P(?:(?!\1).)+)\1\s*\)', - webpage, 'options', group='value'), - video_id) - - player = options['plugins']['sabaPlayerPlugin'] + options = self._parse_json(self._search_regex( + r'options\s*=\s*({.+?})\s*;', webpage, 'options'), video_id) formats = [] - for sources in player['multiSRC']: + for sources in (options.get('multiSRC') or []): for item in sources: if not isinstance(item, dict): continue @@ -85,11 +78,12 @@ class AparatIE(InfoExtractor): info = self._search_json_ld(webpage, video_id, default={}) if not info.get('title'): - info['title'] = player['title'] + info['title'] = get_element_by_id('videoTitle', webpage) or \ + self._html_search_meta(['og:title', 'twitter:title', 'DC.Title', 'title'], webpage, fatal=True) return merge_dicts(info, { 'id': video_id, 'thumbnail': url_or_none(options.get('poster')), - 'duration': int_or_none(player.get('duration')), + 'duration': int_or_none(options.get('duration')), 'formats': formats, }) diff --git a/youtube_dlc/extractor/brightcove.py b/youtube_dlc/extractor/brightcove.py index 615e77916..1a2e7c62a 100644 --- a/youtube_dlc/extractor/brightcove.py +++ b/youtube_dlc/extractor/brightcove.py @@ -543,15 +543,6 @@ class BrightcoveNewIE(AdobePassIE): if sources_num == key_systems_present: raise ExtractorError('This video is DRM protected', expected=True) - if not formats: - # for sonyliv.com DRM protected videos - s3_source_url = json_data.get('custom_fields', {}).get('s3sourceurl') - if s3_source_url: - formats.append({ - 'url': s3_source_url, - 'format_id': 'source', - }) - errors = json_data.get('errors') if not formats and errors: error = errors[0] diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index 6e68f0960..135951e57 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -415,7 +415,6 @@ from .fujitv import FujiTVFODPlus7IE from .funimation import FunimationIE from .funk import FunkIE from .fusion import FusionIE -from .fxnetworks import FXNetworksIE from .gaia import GaiaIE from .gameinformer import GameInformerIE from .gamespot import GameSpotIE diff --git a/youtube_dlc/extractor/generic.py b/youtube_dlc/extractor/generic.py index 81c2ae650..dde76bd1e 100644 --- a/youtube_dlc/extractor/generic.py +++ b/youtube_dlc/extractor/generic.py @@ -64,7 +64,10 @@ from .tube8 import Tube8IE from .mofosex import MofosexEmbedIE from .spankwire import SpankwireIE from .youporn import YouPornIE -from .vimeo import VimeoIE +from .vimeo import ( + VimeoIE, + VHXEmbedIE, +) from .dailymotion import DailymotionIE from .dailymail import DailyMailIE from .onionstudios import OnionStudiosIE @@ -2191,7 +2194,18 @@ class GenericIE(InfoExtractor): # 'params': { # 'force_generic_extractor': True, # }, - # } + # }, + { + # VHX Embed + 'url': 'https://demo.vhx.tv/category-c/videos/file-example-mp4-480-1-5mg-copy', + 'info_dict': { + 'id': '858208', + 'ext': 'mp4', + 'title': 'Untitled', + 'uploader_id': 'user80538407', + 'uploader': 'OTT Videos', + }, + }, ] def report_following_redirect(self, new_url): @@ -2569,6 +2583,10 @@ class GenericIE(InfoExtractor): if vimeo_urls: return self.playlist_from_matches(vimeo_urls, video_id, video_title, ie=VimeoIE.ie_key()) + vhx_url = VHXEmbedIE._extract_url(webpage) + if vhx_url: + return self.url_result(vhx_url, VHXEmbedIE.ie_key()) + vid_me_embed_url = self._search_regex( r'src=[\'"](https?://vid\.me/[^\'"]+)[\'"]', webpage, 'vid.me embed', default=None) diff --git a/youtube_dlc/extractor/go.py b/youtube_dlc/extractor/go.py index 7a75dfa49..85dc561e2 100644 --- a/youtube_dlc/extractor/go.py +++ b/youtube_dlc/extractor/go.py @@ -38,13 +38,17 @@ class GoIE(AdobePassIE): 'disneynow': { 'brand': '011', 'resource_id': 'Disney', - } + }, + 'fxnow.fxnetworks': { + 'brand': '025', + 'requestor_id': 'dtci', + }, } _VALID_URL = r'''(?x) https?:// (?: (?:(?P%s)\.)?go| - (?Pabc|freeform|disneynow) + (?Pabc|freeform|disneynow|fxnow\.fxnetworks) )\.com/ (?: (?:[^/]+/)*(?P[Vv][Dd][Kk][Aa]\w+)| @@ -99,6 +103,19 @@ class GoIE(AdobePassIE): # m3u8 download 'skip_download': True, }, + }, { + 'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841', + 'info_dict': { + 'id': 'VDKA12782841', + 'ext': 'mp4', + 'title': 'First Look: Better Things - Season 2', + 'description': 'md5:fa73584a95761c605d9d54904e35b407', + }, + 'params': { + 'geo_bypass_ip_block': '3.244.239.0/24', + # m3u8 download + 'skip_download': True, + }, }, { 'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding', 'only_matching': True, diff --git a/youtube_dlc/extractor/mitele.py b/youtube_dlc/extractor/mitele.py index 7f5718e21..0b240d27f 100644 --- a/youtube_dlc/extractor/mitele.py +++ b/youtube_dlc/extractor/mitele.py @@ -2,15 +2,14 @@ from __future__ import unicode_literals import json -from .common import InfoExtractor +from .telecinco import TelecincoIE from ..utils import ( int_or_none, parse_iso8601, - smuggle_url, ) -class MiTeleIE(InfoExtractor): +class MiTeleIE(TelecincoIE): IE_DESC = 'mitele.es' _VALID_URL = r'https?://(?:www\.)?mitele\.es/(?:[^/]+/)+(?P[^/]+)/player' @@ -53,7 +52,7 @@ class MiTeleIE(InfoExtractor): }, 'params': { 'skip_download': True, - } + }, }, { 'url': 'http://www.mitele.es/series-online/la-que-se-avecina/57aac5c1c915da951a8b45ed/player', 'only_matching': True, @@ -69,13 +68,11 @@ class MiTeleIE(InfoExtractor): r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=\s*({.+})', webpage, 'Pre Player'), display_id)['prePlayer'] title = pre_player['title'] - video = pre_player['video'] - video_id = video['dataMediaId'] + video_info = self._parse_content(pre_player['video'], url) content = pre_player.get('content') or {} info = content.get('info') or {} - info = { - 'id': video_id, + video_info.update({ 'title': title, 'description': info.get('synopsis'), 'series': content.get('title'), @@ -83,38 +80,7 @@ class MiTeleIE(InfoExtractor): 'episode': content.get('subtitle'), 'episode_number': int_or_none(info.get('episode_number')), 'duration': int_or_none(info.get('duration')), - 'thumbnail': video.get('dataPoster'), 'age_limit': int_or_none(info.get('rating')), 'timestamp': parse_iso8601(pre_player.get('publishedTime')), - } - - if video.get('dataCmsId') == 'ooyala': - info.update({ - '_type': 'url_transparent', - # for some reason only HLS is supported - 'url': smuggle_url('ooyala:' + video_id, {'supportedformats': 'm3u8,dash'}), - }) - else: - config = self._download_json( - video['dataConfig'], video_id, 'Downloading config JSON') - services = config['services'] - gbx = self._download_json( - services['gbx'], video_id, 'Downloading gbx JSON') - caronte = self._download_json( - services['caronte'], video_id, 'Downloading caronte JSON') - cerbero = self._download_json( - caronte['cerbero'], video_id, 'Downloading cerbero JSON', - headers={ - 'Content-Type': 'application/json;charset=UTF-8', - 'Origin': 'https://www.mitele.es' - }, - data=json.dumps({ - 'bbx': caronte['bbx'], - 'gbx': gbx['gbx'] - }).encode('utf-8')) - formats = self._extract_m3u8_formats( - caronte['dls'][0]['stream'], video_id, 'mp4', 'm3u8_native', m3u8_id='hls', - query=dict([cerbero['tokens']['1']['cdn'].split('=', 1)])) - info['formats'] = formats - - return info + }) + return video_info diff --git a/youtube_dlc/extractor/nhk.py b/youtube_dlc/extractor/nhk.py index c5b406573..8a9331a79 100644 --- a/youtube_dlc/extractor/nhk.py +++ b/youtube_dlc/extractor/nhk.py @@ -90,7 +90,7 @@ class NhkVodIE(NhkBaseIE): _TESTS = [{ # video clip 'url': 'https://www3.nhk.or.jp/nhkworld/en/ondemand/video/9999011/', - 'md5': '256a1be14f48d960a7e61e2532d95ec3', + 'md5': '7a90abcfe610ec22a6bfe15bd46b30ca', 'info_dict': { 'id': 'a95j5iza', 'ext': 'mp4', diff --git a/youtube_dlc/extractor/piksel.py b/youtube_dlc/extractor/piksel.py index 88b6859b0..ecf56ff8f 100644 --- a/youtube_dlc/extractor/piksel.py +++ b/youtube_dlc/extractor/piksel.py @@ -6,16 +6,33 @@ import re from .common import InfoExtractor from ..compat import compat_str from ..utils import ( - ExtractorError, dict_get, + ExtractorError, int_or_none, - unescapeHTML, parse_iso8601, + try_get, + unescapeHTML, ) class PikselIE(InfoExtractor): - _VALID_URL = r'https?://player\.piksel\.com/v/(?:refid/[^/]+/prefid/)?(?P[a-z0-9_]+)' + _VALID_URL = r'''(?x)https?:// + (?: + (?: + player\. + (?: + olympusattelecom| + vibebyvista + )| + (?:api|player)\.multicastmedia| + (?:api-ovp|player)\.piksel + )\.com| + (?: + mz-edge\.stream\.co| + movie-s\.nhk\.or + )\.jp| + vidego\.baltimorecity\.gov + )/v/(?:refid/(?P[^/]+)/prefid/)?(?P[\w-]+)''' _TESTS = [ { 'url': 'http://player.piksel.com/v/ums2867l', @@ -56,46 +73,41 @@ class PikselIE(InfoExtractor): if mobj: return mobj.group('url') + def _call_api(self, app_token, resource, display_id, query, fatal=True): + response = (self._download_json( + 'http://player.piksel.com/ws/ws_%s/api/%s/mode/json/apiv/5' % (resource, app_token), + display_id, query=query, fatal=fatal) or {}).get('response') + failure = try_get(response, lambda x: x['failure']['reason']) + if failure: + if fatal: + raise ExtractorError(failure, expected=True) + self.report_warning(failure) + return response + def _real_extract(self, url): - display_id = self._match_id(url) + ref_id, display_id = re.match(self._VALID_URL, url).groups() webpage = self._download_webpage(url, display_id) - video_id = self._search_regex( - r'data-de-program-uuid=[\'"]([a-z0-9]+)', - webpage, 'program uuid', default=display_id) app_token = self._search_regex([ r'clientAPI\s*:\s*"([^"]+)"', r'data-de-api-key\s*=\s*"([^"]+)"' ], webpage, 'app token') - response = self._download_json( - 'http://player.piksel.com/ws/ws_program/api/%s/mode/json/apiv/5' % app_token, - video_id, query={ - 'v': video_id - })['response'] - failure = response.get('failure') - if failure: - raise ExtractorError(response['failure']['reason'], expected=True) - video_data = response['WsProgramResponse']['program']['asset'] + query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} + program = self._call_api( + app_token, 'program', display_id, query)['WsProgramResponse']['program'] + video_id = program['uuid'] + video_data = program['asset'] title = video_data['title'] + asset_type = dict_get(video_data, ['assetType', 'asset_type']) formats = [] - m3u8_url = dict_get(video_data, [ - 'm3u8iPadURL', - 'ipadM3u8Url', - 'm3u8AndroidURL', - 'm3u8iPhoneURL', - 'iphoneM3u8Url']) - if m3u8_url: - formats.extend(self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - - asset_type = dict_get(video_data, ['assetType', 'asset_type']) - for asset_file in video_data.get('assetFiles', []): + def process_asset_file(asset_file): + if not asset_file: + return # TODO: extract rtmp formats http_url = asset_file.get('http_url') if not http_url: - continue + return tbr = None vbr = int_or_none(asset_file.get('videoBitrate'), 1024) abr = int_or_none(asset_file.get('audioBitrate'), 1024) @@ -118,6 +130,43 @@ class PikselIE(InfoExtractor): 'filesize': int_or_none(asset_file.get('filesize')), 'tbr': tbr, }) + + def process_asset_files(asset_files): + for asset_file in (asset_files or []): + process_asset_file(asset_file) + + process_asset_files(video_data.get('assetFiles')) + process_asset_file(video_data.get('referenceFile')) + if not formats: + asset_id = video_data.get('assetid') or program.get('assetid') + if asset_id: + process_asset_files(try_get(self._call_api( + app_token, 'asset_file', display_id, { + 'assetid': asset_id, + }, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) + + m3u8_url = dict_get(video_data, [ + 'm3u8iPadURL', + 'ipadM3u8Url', + 'm3u8AndroidURL', + 'm3u8iPhoneURL', + 'iphoneM3u8Url']) + if m3u8_url: + formats.extend(self._extract_m3u8_formats( + m3u8_url, video_id, 'mp4', 'm3u8_native', + m3u8_id='hls', fatal=False)) + + smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) + if smil_url: + transform_source = None + if ref_id == 'nhkworld': + # TODO: figure out if this is something to be fixed in urljoin, + # _parse_smil_formats or keep it here + transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"') + formats.extend(self._extract_smil_formats( + re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id, + transform_source=transform_source, fatal=False)) + self._sort_formats(formats) subtitles = {} diff --git a/youtube_dlc/extractor/teachable.py b/youtube_dlc/extractor/teachable.py index 6f264bddc..2394f86d4 100644 --- a/youtube_dlc/extractor/teachable.py +++ b/youtube_dlc/extractor/teachable.py @@ -140,7 +140,7 @@ class TeachableIE(TeachableBaseIE): @staticmethod def _is_teachable(webpage): return 'teachableTracker.linker:autoLink' in webpage and re.search( - r']+href=["\']https?://process\.fs\.teachablecdn\.com', + r']+href=["\']https?://(?:process\.fs|assets)\.teachablecdn\.com', webpage) @staticmethod diff --git a/youtube_dlc/extractor/telecinco.py b/youtube_dlc/extractor/telecinco.py index 9ba3da341..eecd6a5c9 100644 --- a/youtube_dlc/extractor/telecinco.py +++ b/youtube_dlc/extractor/telecinco.py @@ -5,14 +5,11 @@ import json import re from .common import InfoExtractor -from .ooyala import OoyalaIE from ..utils import ( clean_html, - determine_ext, int_or_none, str_or_none, try_get, - urljoin, ) @@ -28,7 +25,7 @@ class TelecincoIE(InfoExtractor): 'description': 'md5:716caf5601e25c3c5ab6605b1ae71529', }, 'playlist': [{ - 'md5': 'adb28c37238b675dad0f042292f209a7', + 'md5': '7ee56d665cfd241c0e6d80fd175068b0', 'info_dict': { 'id': 'JEA5ijCnF6p5W08A1rNKn7', 'ext': 'mp4', @@ -38,7 +35,7 @@ class TelecincoIE(InfoExtractor): }] }, { 'url': 'http://www.cuatro.com/deportes/futbol/barcelona/Leo_Messi-Champions-Roma_2_2052780128.html', - 'md5': '9468140ebc300fbb8b9d65dc6e5c4b43', + 'md5': 'c86fe0d99e3bdb46b7950d38bf6ef12a', 'info_dict': { 'id': 'jn24Od1zGLG4XUZcnUnZB6', 'ext': 'mp4', @@ -48,7 +45,7 @@ class TelecincoIE(InfoExtractor): }, }, { 'url': 'http://www.mediaset.es/12meses/campanas/doylacara/conlatratanohaytrato/Ayudame-dar-cara-trata-trato_2_1986630220.html', - 'md5': 'ae2dc6b7b50b2392076a51c0f70e01f6', + 'md5': 'eddb50291df704ce23c74821b995bcac', 'info_dict': { 'id': 'aywerkD2Sv1vGNqq9b85Q2', 'ext': 'mp4', @@ -90,58 +87,24 @@ class TelecincoIE(InfoExtractor): def _parse_content(self, content, url): video_id = content['dataMediaId'] - if content.get('dataCmsId') == 'ooyala': - return self.url_result( - 'ooyala:%s' % video_id, OoyalaIE.ie_key(), video_id) - config_url = urljoin(url, content['dataConfig']) config = self._download_json( - config_url, video_id, 'Downloading config JSON') + content['dataConfig'], video_id, 'Downloading config JSON') title = config['info']['title'] - - def mmc_url(mmc_type): - return re.sub( - r'/(?:flash|html5)\.json', '/%s.json' % mmc_type, - config['services']['mmc']) - - duration = None - formats = [] - for mmc_type in ('flash', 'html5'): - mmc = self._download_json( - mmc_url(mmc_type), video_id, - 'Downloading %s mmc JSON' % mmc_type, fatal=False) - if not mmc: - continue - if not duration: - duration = int_or_none(mmc.get('duration')) - for location in mmc['locations']: - gat = self._proto_relative_url(location.get('gat'), 'http:') - gcp = location.get('gcp') - ogn = location.get('ogn') - if None in (gat, gcp, ogn): - continue - token_data = { - 'gcp': gcp, - 'ogn': ogn, - 'sta': 0, - } - media = self._download_json( - gat, video_id, data=json.dumps(token_data).encode('utf-8'), - headers={ - 'Content-Type': 'application/json;charset=utf-8', - 'Referer': url, - }, fatal=False) or {} - stream = media.get('stream') or media.get('file') - if not stream: - continue - ext = determine_ext(stream) - if ext == 'f4m': - formats.extend(self._extract_f4m_formats( - stream + '&hdcore=3.2.0&plugin=aasp-3.2.0.77.18', - video_id, f4m_id='hds', fatal=False)) - elif ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - stream, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) + services = config['services'] + caronte = self._download_json(services['caronte'], video_id) + stream = caronte['dls'][0]['stream'] + headers = self.geo_verification_headers() + headers.update({ + 'Content-Type': 'application/json;charset=UTF-8', + 'Origin': re.match(r'https?://[^/]+', url).group(0), + }) + cdn = self._download_json( + caronte['cerbero'], video_id, data=json.dumps({ + 'bbx': caronte['bbx'], + 'gbx': self._download_json(services['gbx'], video_id)['gbx'], + }).encode(), headers=headers)['tokens']['1']['cdn'] + formats = self._extract_m3u8_formats( + stream + '?' + cdn, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') self._sort_formats(formats) return { @@ -149,7 +112,7 @@ class TelecincoIE(InfoExtractor): 'title': title, 'formats': formats, 'thumbnail': content.get('dataPoster') or config.get('poster', {}).get('imageUrl'), - 'duration': duration, + 'duration': int_or_none(content.get('dataDuration')), } def _real_extract(self, url): diff --git a/youtube_dlc/extractor/toggle.py b/youtube_dlc/extractor/toggle.py index 3b9b54759..270c84daa 100644 --- a/youtube_dlc/extractor/toggle.py +++ b/youtube_dlc/extractor/toggle.py @@ -200,7 +200,7 @@ class ToggleIE(InfoExtractor): class MeWatchIE(InfoExtractor): IE_NAME = 'mewatch' - _VALID_URL = r'https?://(?:www\.)?mewatch\.sg/watch/[^/?#&]+-(?P[0-9]+)' + _VALID_URL = r'https?://(?:(?:www|live)\.)?mewatch\.sg/watch/[^/?#&]+-(?P[0-9]+)' _TESTS = [{ 'url': 'https://www.mewatch.sg/watch/Recipe-Of-Life-E1-179371', 'info_dict': { @@ -220,6 +220,9 @@ class MeWatchIE(InfoExtractor): }, { 'url': 'https://www.mewatch.sg/watch/Little-Red-Dot-Detectives-S2-%E6%90%9C%E5%AF%86%E3%80%82%E6%89%93%E5%8D%A1%E3%80%82%E5%B0%8F%E7%BA%A2%E7%82%B9-S2-E1-176232', 'only_matching': True, + }, { + 'url': 'https://live.mewatch.sg/watch/Recipe-Of-Life-E41-189759', + 'only_matching': True, }] def _real_extract(self, url): diff --git a/youtube_dlc/extractor/vimeo.py b/youtube_dlc/extractor/vimeo.py index 51a0ab2fa..a5ab0ce97 100644 --- a/youtube_dlc/extractor/vimeo.py +++ b/youtube_dlc/extractor/vimeo.py @@ -1119,6 +1119,12 @@ class VHXEmbedIE(VimeoBaseInfoExtractor): IE_NAME = 'vhx:embed' _VALID_URL = r'https?://embed\.vhx\.tv/videos/(?P\d+)' + @staticmethod + def _extract_url(webpage): + mobj = re.search( + r']+src="(https?://embed\.vhx\.tv/videos/\d+[^"]*)"', webpage) + return unescapeHTML(mobj.group(1)) if mobj else None + def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) @@ -1127,5 +1133,6 @@ class VHXEmbedIE(VimeoBaseInfoExtractor): 'ott data'), video_id, js_to_json)['config_url'] config = self._download_json(config_url, video_id) info = self._parse_config(config, video_id) + info['id'] = video_id self._vimeo_sort_formats(info['formats']) return info diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index ad248a356..c67ecde04 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -16,6 +16,7 @@ from ..jsinterp import JSInterpreter from ..swfinterp import SWFInterpreter from ..compat import ( compat_chr, + compat_HTTPError, compat_kwargs, compat_parse_qs, compat_urllib_parse_unquote, @@ -64,7 +65,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}' _RESERVED_NAMES = ( - r'embed|e|channel|c|user|playlist|watch|w|v|results|shared|' + r'embed|e|watch_popup|channel|c|user|playlist|watch|w|v|movies|results|shared|' r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout|' r'feed/(?:watch_later|history|subscriptions|library|trending|recommended)') @@ -303,6 +304,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' + _YT_INITIAL_BOUNDARY_RE = r'(?:var\s+meta|(?:(?!\1).)+\.m3u8(?:(?!\1).)*)\1', - body, 'm3u8 url', group='url') + body, 'm3u8 url', group='url', default=None) + if not m3u8_url: + source = self._parse_json(self._search_regex( + r'(?s)sources\s*:\s*\[\s*({.+?})\s*\]', body, + 'source'), video_id, js_to_json) + if source.get('integration') == 'verizon-media': + m3u8_url = 'https://content.uplynk.com/%s.m3u8' % source['id'] formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls') text_tracks = self._search_regex(