From 12300fa45a3d7574f68f7570afe4fcee4799bb5c Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Sun, 22 Nov 2020 13:01:56 +0100 Subject: [PATCH 1/2] [skyit] add support for multiple Sky Italia websites(closes #26629) --- youtube_dlc/extractor/extractors.py | 10 ++ youtube_dlc/extractor/skyit.py | 239 ++++++++++++++++++++++++++++ 2 files changed, 249 insertions(+) create mode 100644 youtube_dlc/extractor/skyit.py diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index f5894504e..d51e87476 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -1052,6 +1052,16 @@ from .shared import ( from .showroomlive import ShowRoomLiveIE from .sina import SinaIE from .sixplay import SixPlayIE +from .skyit import ( + SkyItPlayerIE, + SkyItVideoIE, + SkyItVideoLiveIE, + SkyItIE, + SkyItAcademyIE, + SkyItArteIE, + CieloTVItIE, + TV8ItIE, +) from .skylinewebcams import SkylineWebcamsIE from .skynewsarabia import ( SkyNewsArabiaIE, diff --git a/youtube_dlc/extractor/skyit.py b/youtube_dlc/extractor/skyit.py new file mode 100644 index 000000000..14a4d8d4c --- /dev/null +++ b/youtube_dlc/extractor/skyit.py @@ -0,0 +1,239 @@ +# coding: utf-8 +from __future__ import unicode_literals + +from .common import InfoExtractor +from ..compat import ( + compat_str, + compat_parse_qs, + compat_urllib_parse_urlparse, +) +from ..utils import ( + dict_get, + int_or_none, + parse_duration, + unified_timestamp, +) + + +class SkyItPlayerIE(InfoExtractor): + IE_NAME = 'player.sky.it' + _VALID_URL = r'https?://player\.sky\.it/player/(?:external|social)\.html\?.*?\bid=(?P\d+)' + _GEO_BYPASS = False + _DOMAIN = 'sky' + _PLAYER_TMPL = 'https://player.sky.it/player/external.html?id=%s&domain=%s' + # http://static.sky.it/static/skyplayer/conf.json + _TOKEN_MAP = { + 'cielo': 'Hh9O7M8ks5yi6nSROL7bKYz933rdf3GhwZlTLMgvy4Q', + 'hotclub': 'kW020K2jq2lk2eKRJD2vWEg832ncx2EivZlTLQput2C', + 'mtv8': 'A5Nn9GGb326CI7vP5e27d7E4PIaQjota', + 'salesforce': 'C6D585FD1615272C98DE38235F38BD86', + 'sitocommerciale': 'VJwfFuSGnLKnd9Phe9y96WkXgYDCguPMJ2dLhGMb2RE', + 'sky': 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk', + 'skyacademy': 'A6LAn7EkO2Q26FRy0IAMBekX6jzDXYL3', + 'skyarte': 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd', + 'theupfront': 'PRSGmDMsg6QMGc04Obpoy7Vsbn7i2Whp', + } + + def _player_url_result(self, video_id): + return self.url_result( + self._PLAYER_TMPL % (video_id, self._DOMAIN), + SkyItPlayerIE.ie_key(), video_id) + + def _parse_video(self, video, video_id): + title = video['title'] + is_live = video.get('type') == 'live' + hls_url = video.get(('streaming' if is_live else 'hls') + '_url') + if not hls_url and video.get('geoblock' if is_live else 'geob'): + self.raise_geo_restricted(countries=['IT']) + + if is_live: + formats = self._extract_m3u8_formats(hls_url, video_id, 'mp4') + else: + formats = self._extract_akamai_formats( + hls_url, video_id, {'http': 'videoplatform.sky.it'}) + self._sort_formats(formats) + + return { + 'id': video_id, + 'title': self._live_title(title) if is_live else title, + 'formats': formats, + 'thumbnail': dict_get(video, ('video_still', 'video_still_medium', 'thumb')), + 'description': video.get('short_desc') or None, + 'timestamp': unified_timestamp(video.get('create_date')), + 'duration': int_or_none(video.get('duration_sec')) or parse_duration(video.get('duration')), + 'is_live': is_live, + } + + def _real_extract(self, url): + video_id = self._match_id(url) + domain = compat_parse_qs(compat_urllib_parse_urlparse( + url).query).get('domain', [None])[0] + token = dict_get(self._TOKEN_MAP, (domain, 'sky')) + video = self._download_json( + 'https://apid.sky.it/vdp/v1/getVideoData', + video_id, query={ + 'caller': 'sky', + 'id': video_id, + 'token': token + }, headers=self.geo_verification_headers()) + return self._parse_video(video, video_id) + + +class SkyItVideoIE(SkyItPlayerIE): + IE_NAME = 'video.sky.it' + _VALID_URL = r'https?://(?:masterchef|video|xfactor)\.sky\.it(?:/[^/]+)*/video/[0-9a-z-]+-(?P\d+)' + _TESTS = [{ + 'url': 'https://video.sky.it/news/mondo/video/uomo-ucciso-da-uno-squalo-in-australia-631227', + 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd', + 'info_dict': { + 'id': '631227', + 'ext': 'mp4', + 'title': 'Uomo ucciso da uno squalo in Australia', + 'timestamp': 1606036192, + 'upload_date': '20201122', + } + }, { + 'url': 'https://xfactor.sky.it/video/x-factor-2020-replay-audizioni-1-615820', + 'only_matching': True, + }, { + 'url': 'https://masterchef.sky.it/video/masterchef-9-cosa-e-successo-nella-prima-puntata-562831', + 'only_matching': True, + }] + + def _real_extract(self, url): + video_id = self._match_id(url) + return self._player_url_result(video_id) + + +class SkyItVideoLiveIE(SkyItPlayerIE): + IE_NAME = 'video.sky.it:live' + _VALID_URL = r'https?://video\.sky\.it/diretta/(?P[^/?&#]+)' + _TEST = { + 'url': 'https://video.sky.it/diretta/tg24', + 'info_dict': { + 'id': '1', + 'ext': 'mp4', + 'title': r're:Diretta TG24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'description': 'Guarda la diretta streaming di SkyTg24, segui con Sky tutti gli appuntamenti e gli speciali di Tg24.', + }, + 'params': { + # m3u8 download + 'skip_download': True, + }, + } + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + asset_id = compat_str(self._parse_json(self._search_regex( + r']+id="__NEXT_DATA__"[^>]*>({.+?})', + webpage, 'next data'), display_id)['props']['initialState']['livePage']['content']['asset_id']) + livestream = self._download_json( + 'https://apid.sky.it/vdp/v1/getLivestream', + asset_id, query={'id': asset_id}) + return self._parse_video(livestream, asset_id) + + +class SkyItIE(SkyItPlayerIE): + IE_NAME = 'sky.it' + _VALID_URL = r'https?://(?:sport|tg24)\.sky\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P[^/?&#]+)' + _TESTS = [{ + 'url': 'https://sport.sky.it/calcio/serie-a/2020/11/21/juventus-cagliari-risultato-gol', + 'info_dict': { + 'id': '631201', + 'ext': 'mp4', + 'title': 'Un rosso alla violenza: in campo per i diritti delle donne', + 'upload_date': '20201121', + 'timestamp': 1605995753, + }, + 'expected_warnings': ['Unable to download f4m manifest'], + }, { + 'url': 'https://tg24.sky.it/mondo/2020/11/22/australia-squalo-uccide-uomo', + 'md5': 'fe5c91e59a84a3437eaa0bca6e134ccd', + 'info_dict': { + 'id': '631227', + 'ext': 'mp4', + 'title': 'Uomo ucciso da uno squalo in Australia', + 'timestamp': 1606036192, + 'upload_date': '20201122', + }, + }] + _VIDEO_ID_REGEX = r'data-videoid="(\d+)"' + + def _real_extract(self, url): + display_id = self._match_id(url) + webpage = self._download_webpage(url, display_id) + video_id = self._search_regex( + self._VIDEO_ID_REGEX, webpage, 'video id') + return self._player_url_result(video_id) + + +class SkyItAcademyIE(SkyItIE): + IE_NAME = 'skyacademy.it' + _VALID_URL = r'https?://(?:www\.)?skyacademy\.it(?:/[^/]+)*/\d{4}/\d{2}/\d{2}/(?P[^/?&#]+)' + _TESTS = [{ + 'url': 'https://www.skyacademy.it/eventi-speciali/2019/07/05/a-lezione-di-cinema-con-sky-academy-/', + 'md5': 'ced5c26638b7863190cbc44dd6f6ba08', + 'info_dict': { + 'id': '523458', + 'ext': 'mp4', + 'title': 'Sky Academy "The Best CineCamp 2019"', + 'timestamp': 1562843784, + 'upload_date': '20190711', + } + }] + _DOMAIN = 'skyacademy' + _VIDEO_ID_REGEX = r'id="news-videoId_(\d+)"' + + +class SkyItArteIE(SkyItIE): + IE_NAME = 'arte.sky.it' + _VALID_URL = r'https?://arte\.sky\.it/video/(?P[^/?&#]+)' + _TESTS = [{ + 'url': 'https://arte.sky.it/video/serie-musei-venezia-collezionismo-12-novembre/', + 'md5': '515aee97b87d7a018b6c80727d3e7e17', + 'info_dict': { + 'id': '627926', + 'ext': 'mp4', + 'title': "Musei Galleria Franchetti alla Ca' d'Oro Palazzo Grimani", + 'upload_date': '20201106', + 'timestamp': 1604664493, + } + }] + _DOMAIN = 'skyarte' + _VIDEO_ID_REGEX = r'(?s)]+src="(?:https:)?//player\.sky\.it/player/external\.html\?[^"]*\bid=(\d+)' + + +class CieloTVItIE(SkyItIE): + IE_NAME = 'cielotv.it' + _VALID_URL = r'https?://(?:www\.)?cielotv\.it/video/(?P[^.]+)\.html' + _TESTS = [{ + 'url': 'https://www.cielotv.it/video/Il-lunedi-e-sempre-un-dramma.html', + 'md5': 'c4deed77552ba901c2a0d9258320304b', + 'info_dict': { + 'id': '499240', + 'ext': 'mp4', + 'title': 'Il lunedì è sempre un dramma', + 'upload_date': '20190329', + 'timestamp': 1553862178, + } + }] + _DOMAIN = 'cielo' + _VIDEO_ID_REGEX = r'videoId\s*=\s*"(\d+)"' + + +class TV8ItIE(SkyItVideoIE): + IE_NAME = 'tv8.it' + _VALID_URL = r'https?://tv8\.it/showvideo/(?P\d+)' + _TESTS = [{ + 'url': 'https://tv8.it/showvideo/630529/ogni-mattina-ucciso-asino-di-andrea-lo-cicero/18-11-2020/', + 'md5': '9ab906a3f75ea342ed928442f9dabd21', + 'info_dict': { + 'id': '630529', + 'ext': 'mp4', + 'title': 'Ogni mattina - Ucciso asino di Andrea Lo Cicero', + 'timestamp': 1605721374, + 'upload_date': '20201118', + } + }] + _DOMAIN = 'mtv8' From 4f618e64f5359047367a427c6b2972fed1d7c870 Mon Sep 17 00:00:00 2001 From: nixxo Date: Tue, 1 Dec 2020 12:10:50 +0100 Subject: [PATCH 2/2] [skyit] removed old skyitalia extractor --- youtube_dlc/extractor/extractors.py | 4 - youtube_dlc/extractor/skyitalia.py | 123 ---------------------------- 2 files changed, 127 deletions(-) delete mode 100644 youtube_dlc/extractor/skyitalia.py diff --git a/youtube_dlc/extractor/extractors.py b/youtube_dlc/extractor/extractors.py index d51e87476..d74d4bbae 100644 --- a/youtube_dlc/extractor/extractors.py +++ b/youtube_dlc/extractor/extractors.py @@ -1071,10 +1071,6 @@ from .sky import ( SkyNewsIE, SkySportsIE, ) -from .skyitalia import ( - SkyArteItaliaIE, - SkyItaliaIE, -) from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE diff --git a/youtube_dlc/extractor/skyitalia.py b/youtube_dlc/extractor/skyitalia.py deleted file mode 100644 index 22a6be2be..000000000 --- a/youtube_dlc/extractor/skyitalia.py +++ /dev/null @@ -1,123 +0,0 @@ -# coding: utf-8 -from __future__ import unicode_literals - -from .common import InfoExtractor -from ..utils import ExtractorError - - -class SkyItaliaBaseIE(InfoExtractor): - _GET_VIDEO_DATA = 'https://apid.sky.it/vdp/v1/getVideoData?token={token}&caller=sky&rendition=web&id={id}' - _RES = { - 'low': [426, 240], - 'med': [640, 360], - 'high': [854, 480], - 'hd': [1280, 720] - } - _GEO_BYPASS = False - - def _extract_video_id(self, url): - webpage = self._download_webpage(url, 'skyitalia') - video_id = self._html_search_regex( - [r'data-videoid=\"(\d+)\"', - r'http://player\.sky\.it/social\?id=(\d+)\&'], - webpage, 'video_id') - if video_id: - return video_id - raise ExtractorError('Video ID not found.') - - def _get_formats(self, video_id, token): - data_url = self._GET_VIDEO_DATA.replace('{id}', video_id) - data_url = data_url.replace('{token}', token) - video_data = self._parse_json( - self._download_webpage(data_url, video_id), - video_id) - - formats = [] - for q, r in self._RES.items(): - key = 'web_%s_url' % q - if key not in video_data: - continue - formats.append({ - 'url': video_data.get(key), - 'format_id': q, - 'width': r[0], - 'height': r[1] - }) - - if not formats and video_data.get('geob') == 1: - self.raise_geo_restricted(countries=['IT']) - - self._sort_formats(formats) - title = video_data.get('title') - thumb = video_data.get('thumb') - - return { - 'id': video_id, - 'title': title, - 'thumbnail': thumb, - 'formats': formats - } - - def _real_extract(self, url): - video_id = self._match_id(url) - if video_id == 'None': - video_id = self._extract_video_id(url) - return self._get_formats(video_id, self._TOKEN) - - -class SkyItaliaIE(SkyItaliaBaseIE): - IE_NAME = 'sky.it' - _VALID_URL = r'''(?x)https?:// - (?Psport|tg24|video) - \.sky\.it/(?:.+?) - (?P[0-9]{6})? - (?:$|\?)''' - - _TESTS = [{ - 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162', - 'md5': '9c03b590b06e5952d8051f0e02b0feca', - 'info_dict': { - 'id': '616162', - 'ext': 'mp4', - 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', - } - }, { - 'url': 'https://sport.sky.it/motogp/2020/09/18/motogp-gp-emilia-romagna-misano-2020-prove-libere-diretta', - 'md5': '9c03b590b06e5952d8051f0e02b0feca', - 'info_dict': { - 'id': '616162', - 'ext': 'mp4', - 'title': 'MotoGP, GP Emilia Romagna: gli highlights delle prove libere', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/18/1600441214452_hl-libere-motogp-misano2_5602634_thumbnail_1.jpg', - } - }, { - 'url': 'https://tg24.sky.it/salute-e-benessere/2020/09/18/coronavirus-vaccino-ue-sanofi', - 'md5': 'caa25e62dadb529bc5e0b078da99f854', - 'info_dict': { - 'id': '615904', - 'ext': 'mp4', - 'title': 'Covid-19, al Buzzi di Milano tamponi drive-in per studenti', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/17/1600351405841_error-coronavirus-al-buzzi-di-milano-tamponi_thumbnail_1.jpg', - } - }, { - 'url': 'https://video.sky.it/sport/motogp/video/motogp-gp-emilia-romagna-highlights-prove-libere-616162?itm_source=parsely-api', - 'only_matching': True, - }] - _TOKEN = 'F96WlOd8yoFmLQgiqv6fNQRvHZcsWk5jDaYnDvhbiJk' - - -class SkyArteItaliaIE(SkyItaliaBaseIE): - IE_NAME = 'arte.sky.it' - _VALID_URL = r'https?://arte\.sky\.it/video/.+?(?P[0-9]{6})?$' - _TEST = { - 'url': 'https://arte.sky.it/video/federico-fellini-maestri-cinema/', - 'md5': '2f22513a89f45142f2746f878d690647', - 'info_dict': { - 'id': '612888', - 'ext': 'mp4', - 'title': 'I maestri del cinema Federico Felini', - 'thumbnail': 'https://videoplatform.sky.it/thumbnail/2020/09/03/1599146747305_i-maestri-del-cinema-federico-felini_thumbnail_1.jpg', - } - } - _TOKEN = 'LWk29hfiU39NNdq87ePeRach3nzTSV20o0lTv2001Cd'