From 1d9e0a4f40deaeb2f722cf964e6bf13b3835e617 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Fri, 5 May 2017 16:12:40 +0100 Subject: [PATCH] [vice] update tests and add support for ooyala embeds in article pages --- youtube_dl/extractor/vice.py | 81 +++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 34 deletions(-) diff --git a/youtube_dl/extractor/vice.py b/youtube_dl/extractor/vice.py index b2e95734b..54e207b39 100644 --- a/youtube_dl/extractor/vice.py +++ b/youtube_dl/extractor/vice.py @@ -32,7 +32,8 @@ class ViceBaseIE(AdobePassIE): resource = self._get_mvpd_resource( 'VICELAND', title, video_id, watch_hub_data.get('video-rating')) - query['tvetoken'] = self._extract_mvpd_auth(url, video_id, 'VICELAND', resource) + query['tvetoken'] = self._extract_mvpd_auth( + url, video_id, 'VICELAND', resource) # signature generation algorithm is reverse engineered from signatureGenerator in # webpack:///../shared/~/vice-player/dist/js/vice-player.js in @@ -45,11 +46,14 @@ class ViceBaseIE(AdobePassIE): try: host = 'www.viceland' if is_locked else self._PREPLAY_HOST - preplay = self._download_json('https://%s.com/%s/preplay/%s' % (host, locale, video_id), video_id, query=query) + preplay = self._download_json( + 'https://%s.com/%s/preplay/%s' % (host, locale, video_id), + video_id, query=query) except ExtractorError as e: if isinstance(e.cause, compat_HTTPError) and e.cause.code == 400: error = json.loads(e.cause.read().decode()) - raise ExtractorError('%s said: %s' % (self.IE_NAME, error['details']), expected=True) + raise ExtractorError('%s said: %s' % ( + self.IE_NAME, error['details']), expected=True) raise video_data = preplay['video'] @@ -88,16 +92,17 @@ class ViceBaseIE(AdobePassIE): class ViceIE(ViceBaseIE): - _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?P[^/]+)/(?:[^/]+/)?videos?/(?P[^/?#&]+)' + IE_NAME = 'vice' + _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:(?P[^/]+)/)?videos?/(?P[^/?#&]+)' _TESTS = [{ - 'url': 'http://www.vice.com/video/cowboy-capitalists-part-1', - 'md5': 'e9d77741f9e42ba583e683cd170660f7', + 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', + 'md5': '7d3ae2f9ba5f196cdd9f9efd43657ac2', 'info_dict': { - 'id': '43cW1mYzpia9IlestBjVpd23Yu3afAfp', + 'id': 'N2bzkydjraWDGwnt8jAttCF6Y0PDv4Zj', 'ext': 'flv', - 'title': 'VICE_COWBOYCAPITALISTS_PART01_v1_VICE_WM_1080p.mov', - 'duration': 725.983, + 'title': 'Monkey Labs of Holland', + 'description': 'md5:92b3c7dcbfe477f772dd4afa496c9149', }, 'add_ie': ['Ooyala'], }, { @@ -136,22 +141,13 @@ class ViceIE(ViceBaseIE): }, 'add_ie': ['UplynkPreplay'], }, { - 'url': 'https://news.vice.com/video/experimenting-on-animals-inside-the-monkey-lab', - 'only_matching': True, - }, { - 'url': 'http://www.vice.com/ru/video/big-night-out-ibiza-clive-martin-229', - 'only_matching': True, - }, { - 'url': 'https://munchies.vice.com/en/videos/watch-the-trailer-for-our-new-series-the-pizza-show', + 'url': 'https://video.vice.com/en_us/video/pizza-show-trailer/56d8c9a54d286ed92f7f30e4', 'only_matching': True, }] _PREPLAY_HOST = 'video.vice' def _real_extract(self, url): - mobj = re.match(self._VALID_URL, url) - video_id = mobj.group('id') - locale = mobj.group('locale') - video_id = self._match_id(url) + locale, video_id = re.match(self._VALID_URL, url).groups() webpage, urlh = self._download_webpage_handle(url, video_id) embed_code = self._search_regex( r'embedCode=([^&\'"]+)', webpage, @@ -166,6 +162,7 @@ class ViceIE(ViceBaseIE): class ViceShowIE(InfoExtractor): + IE_NAME = 'vice:show' _VALID_URL = r'https?://(?:.+?\.)?vice\.com/(?:[^/]+/)?show/(?P[^/?#&]+)' _TEST = { @@ -192,12 +189,14 @@ class ViceShowIE(InfoExtractor): r'(.+?)', webpage, 'title', default=None) if title: title = re.sub(r'(.+)\s*\|\s*.+$', r'\1', title).strip() - description = self._html_search_meta('description', webpage, 'description') + description = self._html_search_meta( + 'description', webpage, 'description') return self.playlist_result(entries, show_id, title, description) class ViceArticleIE(InfoExtractor): + IE_NAME = 'vice:article' _VALID_URL = r'https://www.vice.com/[^/]+/article/(?P[^?#]+)' _TESTS = [{ @@ -216,8 +215,9 @@ class ViceArticleIE(InfoExtractor): # AES-encrypted m3u8 'skip_download': True, }, + 'add_ie': ['UplynkPreplay'], }, { - 'url': 'http://www.vice.com/video/how-to-hack-a-car', + 'url': 'https://www.vice.com/en_us/article/how-to-hack-a-car', 'md5': 'a7ecf64ee4fa19b916c16f4b56184ae2', 'info_dict': { 'id': '3jstaBeXgAs', @@ -229,6 +229,12 @@ class ViceArticleIE(InfoExtractor): 'upload_date': '20140529', }, 'add_ie': ['Youtube'], + }, { + 'url': 'https://www.vice.com/en_us/article/cowboy-capitalists-part-1', + 'only_matching': True, + }, { + 'url': 'https://www.vice.com/ru/article/big-night-out-ibiza-clive-martin-229', + 'only_matching': True, }] def _real_extract(self, url): @@ -240,22 +246,29 @@ class ViceArticleIE(InfoExtractor): r'window\.__PREFETCH_DATA\s*=\s*({.*});', webpage, 'prefetch data'), display_id) body = prefetch_data['body'] - youtube_url = self._html_search_regex( - r']+src="(.*youtube\.com/.*)"', body, 'YouTube URL', default=None) - if youtube_url: + + def _url_res(video_url, ie_key): return { '_type': 'url_transparent', - 'url': youtube_url, + 'url': video_url, 'display_id': display_id, - 'ie_key': 'Youtube', + 'ie_key': ie_key, } + embed_code = self._search_regex( + r'embedCode=([^&\'"]+)', body, + 'ooyala embed code', default=None) + if embed_code: + return _url_res('ooyala:%s' % embed_code, 'Ooyala') + + youtube_url = self._html_search_regex( + r']+src="(.*youtube\.com/.*)"', + body, 'YouTube URL', default=None) + if youtube_url: + return _url_res(youtube_url, 'Youtube') + video_url = self._html_search_regex( - r'data-video-url="([^"]+)"', prefetch_data['embed_code'], 'video URL') + r'data-video-url="([^"]+)"', + prefetch_data['embed_code'], 'video URL') - return { - '_type': 'url_transparent', - 'url': video_url, - 'display_id': display_id, - 'ie_key': ViceIE.ie_key(), - } + return _url_res(video_url, ViceIE.ie_key())