From e4edeb6226e2ba1433e562f52fa37b00e3da1a17 Mon Sep 17 00:00:00 2001 From: nixxo Date: Thu, 11 Mar 2021 08:58:51 +0100 Subject: [PATCH] [wimtv] Add extractor (#161) Added support for VODs, live and embeds Authored by: nixxo --- yt_dlp/extractor/extractors.py | 1 + yt_dlp/extractor/generic.py | 15 +++ yt_dlp/extractor/wimtv.py | 163 +++++++++++++++++++++++++++++++++ 3 files changed, 179 insertions(+) create mode 100644 yt_dlp/extractor/wimtv.py diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index d80eafaf9..460edf1dc 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1559,6 +1559,7 @@ from .weibo import ( WeiboMobileIE ) from .weiqitv import WeiqiTVIE +from .wimtv import WimTVIE from .wistia import ( WistiaIE, WistiaPlaylistIE, diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index a3a8f6b0d..cdd773477 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -134,6 +134,7 @@ from .rumble import RumbleEmbedIE from .arcpublishing import ArcPublishingIE from .medialaan import MedialaanIE from .simplecast import SimplecastIE +from .wimtv import WimTVIE class GenericIE(InfoExtractor): @@ -2250,6 +2251,15 @@ class GenericIE(InfoExtractor): }, 'playlist_mincount': 52, }, + { + # WimTv embed player + 'url': 'http://www.msmotor.tv/wearefmi-pt-2-2021/', + 'info_dict': { + 'id': 'wearefmi-pt-2-2021', + 'title': '#WEAREFMI – PT.2 – 2021 – MsMotorTV', + }, + 'playlist_count': 1, + }, ] def report_following_redirect(self, new_url): @@ -3350,6 +3360,11 @@ class GenericIE(InfoExtractor): return self.playlist_from_matches( rcs_urls, video_id, video_title, ie=RCSEmbedsIE.ie_key()) + wimtv_urls = WimTVIE._extract_urls(webpage) + if wimtv_urls: + return self.playlist_from_matches( + wimtv_urls, video_id, video_title, ie=WimTVIE.ie_key()) + bitchute_urls = BitChuteIE._extract_urls(webpage) if bitchute_urls: return self.playlist_from_matches( diff --git a/yt_dlp/extractor/wimtv.py b/yt_dlp/extractor/wimtv.py new file mode 100644 index 000000000..f32d0afc8 --- /dev/null +++ b/yt_dlp/extractor/wimtv.py @@ -0,0 +1,163 @@ +# coding: utf-8 +from __future__ import unicode_literals + +import re + +from .common import InfoExtractor +from ..utils import ( + determine_ext, + parse_duration, + urlencode_postdata, + ExtractorError, +) + + +class WimTVIE(InfoExtractor): + _player = None + _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' + _VALID_URL = r'''(?x) + https?://platform.wim.tv/ + (?: + (?:embed/)?\? + |\#/webtv/.+?/ + ) + (?Pvod|live|cast)[=/] + (?P%s).*?''' % _UUID_RE + _TESTS = [{ + # vod stream + 'url': 'https://platform.wim.tv/embed/?vod=db29fb32-bade-47b6-a3a6-cb69fe80267a', + 'md5': 'db29fb32-bade-47b6-a3a6-cb69fe80267a', + 'info_dict': { + 'id': 'db29fb32-bade-47b6-a3a6-cb69fe80267a', + 'ext': 'mp4', + 'title': 'AMA SUPERCROSS 2020 - R2 ST. LOUIS', + 'duration': 6481, + 'thumbnail': r're:https?://.+?/thumbnail/.+?/720$' + }, + 'params': { + 'skip_download': True, + }, + }, { + # live stream + 'url': 'https://platform.wim.tv/embed/?live=28e22c22-49db-40f3-8c37-8cbb0ff44556&autostart=true', + 'info_dict': { + 'id': '28e22c22-49db-40f3-8c37-8cbb0ff44556', + 'ext': 'mp4', + 'title': 'Streaming MSmotorTV', + 'is_live': True, + }, + 'params': { + 'skip_download': True, + }, + }, { + 'url': 'https://platform.wim.tv/#/webtv/automotornews/vod/422492b6-539e-474d-9c6b-68c9d5893365', + 'only_matching': True, + }, { + 'url': 'https://platform.wim.tv/#/webtv/renzoarborechannel/cast/f47e0d15-5b45-455e-bf0d-dba8ffa96365', + 'only_matching': True, + }] + + @staticmethod + def _extract_urls(webpage): + return [ + mobj.group('url') + for mobj in re.finditer( + r']+src=["\'](?P%s)' % WimTVIE._VALID_URL, + webpage)] + + def _real_initialize(self): + if not self._player: + self._get_player_data() + + def _get_player_data(self): + msg_id = 'Player data' + self._player = {} + + datas = [{ + 'url': 'https://platform.wim.tv/common/libs/player/wimtv/wim-rest.js', + 'vars': [{ + 'regex': r'appAuth = "(.+?)"', + 'variable': 'app_auth', + }] + }, { + 'url': 'https://platform.wim.tv/common/config/endpointconfig.js', + 'vars': [{ + 'regex': r'PRODUCTION_HOSTNAME_THUMB = "(.+?)"', + 'variable': 'thumb_server', + }, { + 'regex': r'PRODUCTION_HOSTNAME_THUMB\s*\+\s*"(.+?)"', + 'variable': 'thumb_server_path', + }] + }] + + for data in datas: + temp = self._download_webpage(data['url'], msg_id) + for var in data['vars']: + val = self._search_regex(var['regex'], temp, msg_id) + if not val: + raise ExtractorError('%s not found' % var['variable']) + self._player[var['variable']] = val + + def _generate_token(self): + json = self._download_json( + 'https://platform.wim.tv/wimtv-server/oauth/token', 'Token generation', + headers={'Authorization': 'Basic %s' % self._player['app_auth']}, + data=urlencode_postdata({'grant_type': 'client_credentials'})) + token = json.get('access_token') + if not token: + raise ExtractorError('access token not generated') + return token + + def _generate_thumbnail(self, thumb_id, width='720'): + if not thumb_id or not self._player.get('thumb_server'): + return None + if not self._player.get('thumb_server_path'): + self._player['thumb_server_path'] = '' + return '%s%s/asset/thumbnail/%s/%s' % ( + self._player['thumb_server'], + self._player['thumb_server_path'], + thumb_id, width) + + def _real_extract(self, url): + urlc = re.match(self._VALID_URL, url).groupdict() + video_id = urlc['id'] + stream_type = is_live = None + if urlc['type'] in {'live', 'cast'}: + stream_type = urlc['type'] + '/channel' + is_live = True + else: + stream_type = 'vod' + is_live = False + token = self._generate_token() + json = self._download_json( + 'https://platform.wim.tv/wimtv-server/api/public/%s/%s/play' % ( + stream_type, video_id), video_id, + headers={'Authorization': 'Bearer %s' % token, + 'Content-Type': 'application/json'}, + data=bytes('{}', 'utf-8')) + + formats = [] + for src in json.get('srcs') or []: + if src.get('mimeType') == 'application/x-mpegurl': + formats.extend( + self._extract_m3u8_formats( + src.get('uniqueStreamer'), video_id, 'mp4')) + if src.get('mimeType') == 'video/flash': + formats.append({ + 'format_id': 'rtmp', + 'url': src.get('uniqueStreamer'), + 'ext': determine_ext(src.get('uniqueStreamer'), 'flv'), + 'rtmp_live': is_live, + }) + json = json.get('resource') + thumb = self._generate_thumbnail(json.get('thumbnailId')) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': json.get('title') or json.get('name'), + 'duration': parse_duration(json.get('duration')), + 'formats': formats, + 'thumbnail': thumb, + 'is_live': is_live, + }