From eb4b5818e2a297bd001eb1b4962d709b1245fd24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Sun, 29 Oct 2017 04:16:07 +0700 Subject: [PATCH] [younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436) --- youtube_dl/extractor/extractors.py | 2 +- youtube_dl/extractor/younow.py | 225 +++++++++++++++-------------- 2 files changed, 116 insertions(+), 111 deletions(-) diff --git a/youtube_dl/extractor/extractors.py b/youtube_dl/extractor/extractors.py index b6ad50ec7..2eed706f9 100644 --- a/youtube_dl/extractor/extractors.py +++ b/youtube_dl/extractor/extractors.py @@ -1336,7 +1336,7 @@ from .youku import ( YoukuShowIE, ) from .younow import ( - YouNowIE, + YouNowLiveIE, YouNowChannelIE, YouNowMomentIE, ) diff --git a/youtube_dl/extractor/younow.py b/youtube_dl/extractor/younow.py index 99abd66a8..04dbc87fc 100644 --- a/youtube_dl/extractor/younow.py +++ b/youtube_dl/extractor/younow.py @@ -1,17 +1,22 @@ # coding: utf-8 from __future__ import unicode_literals -from datetime import date, datetime + +import itertools from .common import InfoExtractor from ..compat import compat_str -from ..utils import int_or_none, UnsupportedError +from ..utils import ( + ExtractorError, + int_or_none, + try_get, +) -MOMENT_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/fetch/id=%s' -STREAM_URL_FORMAT = 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8' +CDN_API_BASE = 'https://cdn.younow.com/php/api' +MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE -class YouNowIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P[^/]+)' +class YouNowLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P[^/?#&]+)' _TEST = { 'url': 'https://www.younow.com/AmandaPadeezy', 'info_dict': { @@ -19,179 +24,179 @@ class YouNowIE(InfoExtractor): 'ext': 'mp4', 'is_live': True, 'title': 'March 26, 2017', - 'description': 'YouNow is the best way to broadcast live and get an audience to watch you.', - 'thumbnail': 'https://ynassets.s3.amazonaws.com/broadcast/live/157869188/157869188.jpg', + 'thumbnail': r're:^https?://.*\.jpg$', 'tags': ['girls'], 'categories': ['girls'], 'uploader': 'AmandaPadeezy', 'uploader_id': '6716501', 'uploader_url': 'https://www.younow.com/AmandaPadeezy', 'creator': 'AmandaPadeezy', - 'formats': [{ - 'url': 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=157869188/channelId=6716501', - 'ext': 'mp4', - 'protocol': 'm3u8', - }], - } + }, + 'skip': True, } + @classmethod + def suitable(cls, url): + return (False + if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url) + else super(YouNowLiveIE, cls).suitable(url)) + def _real_extract(self, url): username = self._match_id(url) - data = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username) - - if data.get('media'): - stream_url = 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' % ( - data.get('broadcastId'), - data.get('userId'), - ) - else: - raise UnsupportedError('Unsupported stream or user is not streaming at this time') - - webpage = self._download_webpage(url, username) - try: - uploader = data['user']['profileUrlString'] - except KeyError: - uploader = username - try: - title = data['title'] - except KeyError: - title = date.today().strftime('%B %d, %Y') + + data = self._download_json( + 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' + % username, username) + + if data.get('errorCode') != 0: + raise ExtractorError(data['errorMsg'], expected=True) + + uploader = try_get( + data, lambda x: x['user']['profileUrlString'], + compat_str) or username return { 'id': uploader, 'is_live': True, - 'title': title, - 'description': self._og_search_description(webpage), + 'title': self._live_title(uploader), 'thumbnail': data.get('awsUrl'), 'tags': data.get('tags'), 'categories': data.get('tags'), 'uploader': uploader, 'uploader_id': data.get('userId'), - 'uploader_url': 'https://www.younow.com/%s' % (data['user']['profileUrlString'],), + 'uploader_url': 'https://www.younow.com/%s' % username, 'creator': uploader, 'view_count': int_or_none(data.get('viewers')), 'like_count': int_or_none(data.get('likes')), 'formats': [{ - 'url': stream_url, + 'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' + % (CDN_API_BASE, data['broadcastId'], data['userId']), 'ext': 'mp4', 'protocol': 'm3u8', }], } -def _moment_to_entry(item): +def _extract_moment(item, fatal=True): + moment_id = item.get('momentId') + if not moment_id: + if not fatal: + return + raise ExtractorError('Unable to extract moment id') + + moment_id = compat_str(moment_id) + title = item.get('text') - title_type = item.get('titleType') if not title: - if title_type: - title = 'YouNow %s' % item.get('titleType') - else: - title = 'YouNow moment' + title = 'YouNow %s' % ( + item.get('momentType') or item.get('titleType') or 'moment') + + uploader = try_get(item, lambda x: x['owner']['name'], compat_str) + uploader_id = try_get(item, lambda x: x['owner']['userId']) + uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None entry = { - 'id': compat_str(item['momentId']), + 'extractor_key': 'YouNowMoment', + 'id': moment_id, 'title': title, 'view_count': int_or_none(item.get('views')), 'like_count': int_or_none(item.get('likes')), 'timestamp': int_or_none(item.get('created')), + 'creator': uploader, + 'uploader': uploader, + 'uploader_id': uploader_id, + 'uploader_url': uploader_url, 'formats': [{ - 'url': STREAM_URL_FORMAT % (item['momentId'], item['momentId']), + 'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8' + % (moment_id, moment_id), 'ext': 'mp4', - 'protocol': 'm3u8', + 'protocol': 'm3u8_native', }], } - try: - entry['uploader'] = entry['creator'] = item['owner']['name'] - entry['uploader_url'] = 'https://www.younow.com/%s' % (item['owner']['name'],) - entry['uploader_id'] = item['owner']['userId'] - except KeyError: - pass - return entry class YouNowChannelIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P[^/]+)/channel' _TEST = { - 'url': 'https://www.younow.com/Kate_Swiz/channel', + 'url': 'https://www.younow.com/its_Kateee_/channel', 'info_dict': { - 'title': 'Kate_Swiz moments' + 'id': '14629760', + 'title': 'its_Kateee_ moments' }, - 'playlist_count': 6, + 'playlist_mincount': 8, } - MOMENTS_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/profile/channelId=%s/createdBefore=%d/records=20' - - def _real_extract(self, url): - entries = [] - username = self._match_id(url) - user_info = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username, note='Downloading user information') - channel_id = user_info['userId'] + def _entries(self, username, channel_id): created_before = 0 - moment_ids = [] - moment_ids_processed = [] - err = False - - while True: - if created_before: - cb = datetime.fromtimestamp(created_before) - else: - cb = datetime.now() - info = self._download_json(self.MOMENTS_URL_FORMAT % (channel_id, created_before), username, note='Downloading moments data (created before %s)' % (cb)) - - for item in info['items']: - if item['type'] == 'moment': - entry = _moment_to_entry(item) - moment_ids_processed.append(entry['id']) - entries.append(entry) - elif item['type'] == 'collection': - moment_ids += [compat_str(x) for x in item['momentsIds']] - - try: - created_before = int_or_none(item['created']) - except KeyError: - err = True - break - - if (err or - not info['hasMore'] or - 'items' not in info or - not info['items']): + for page_num in itertools.count(1): + if created_before is None: break + info = self._download_json( + '%s/moment/profile/channelId=%s/createdBefore=%d/records=20' + % (CDN_API_BASE, channel_id, created_before), username, + note='Downloading moments page %d' % page_num) + items = info.get('items') + if not items or not isinstance(items, list): + break + for item in items: + if not isinstance(item, dict): + continue + item_type = item.get('type') + if item_type == 'moment': + entry = _extract_moment(item, fatal=False) + if entry: + yield entry + elif item_type == 'collection': + moments = item.get('momentsIds') + if isinstance(moments, list): + for moment_id in moments: + m = self._download_json( + MOMENT_URL_FORMAT % moment_id, username, + note='Downloading %s moment JSON' % moment_id, + fatal=False) + if m and isinstance(m, dict) and m.get('item'): + entry = _extract_moment(m['item']) + if entry: + yield entry + created_before = int_or_none(item.get('created')) - for mid in set(moment_ids): - if mid in moment_ids_processed: - continue - item = self._download_json(MOMENT_URL_FORMAT % (mid), mid) - entries.append(_moment_to_entry(item['item'])) - - return self.playlist_result(entries, playlist_title='%s moments' % (username)) + def _real_extract(self, url): + username = self._match_id(url) + channel_id = compat_str(self._download_json( + 'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' + % username, username, note='Downloading user information')['userId']) + return self.playlist_result( + self._entries(username, channel_id), channel_id, + '%s moments' % username) class YouNowMomentIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P[^/]+)/[^/]+' + _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P[^/?#&]+)' _TEST = { 'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m', + 'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807', 'info_dict': { 'id': '20712117', 'ext': 'mp4', 'title': 'YouNow capture', - 'view_count': 19, - 'like_count': 0, + 'view_count': int, + 'like_count': int, 'timestamp': 1490432040, - 'formats': [{ - 'url': 'https://hls.younow.com/momentsplaylists/live/20712117/20712117.m3u8', - 'ext': 'mp4', - 'protocol': 'm3u8', - }], 'upload_date': '20170325', 'uploader': 'GABO...', 'uploader_id': 35917228, }, } + @classmethod + def suitable(cls, url): + return (False + if YouNowChannelIE.suitable(url) + else super(YouNowMomentIE, cls).suitable(url)) + def _real_extract(self, url): - mid = self._match_id(url) - item = self._download_json(MOMENT_URL_FORMAT % (mid), mid) - return _moment_to_entry(item['item']) + video_id = self._match_id(url) + item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id) + return _extract_moment(item['item'])