From df086e74e20e8de6f027c6c551887f5c22b8c637 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sergey=20M=E2=80=A4?= Date: Fri, 30 Dec 2016 00:12:35 +0700 Subject: [PATCH] [showroomlive] Improve (closes #11458) --- youtube_dl/extractor/showroomlive.py | 114 ++++++++++++++------------- 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/youtube_dl/extractor/showroomlive.py b/youtube_dl/extractor/showroomlive.py index 8bfae510b..efd9d561f 100644 --- a/youtube_dl/extractor/showroomlive.py +++ b/youtube_dl/extractor/showroomlive.py @@ -2,79 +2,83 @@ from __future__ import unicode_literals from .common import InfoExtractor -from ..utils import ExtractorError, compat_urlparse +from ..compat import compat_str +from ..utils import ( + ExtractorError, + int_or_none, + urljoin, +) -class ShowroomLiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?P[0-9a-zA-Z_]+)' +class ShowRoomLiveIE(InfoExtractor): + _VALID_URL = r'https?://(?:www\.)?showroom-live\.com/(?!onlive|timetable|event|campaign|news|ranking|room)(?P[^/?#&]+)' _TEST = { 'url': 'https://www.showroom-live.com/48_Nana_Okada', - 'skip': 'Only live broadcasts, can\'t predict test case.', - 'info_dict': { - 'id': '48_Nana_Okada', - 'ext': 'mp4', - 'uploader_id': '48_Nana_Okada', - } + 'only_matching': True, } def _real_extract(self, url): broadcaster_id = self._match_id(url) - # There is no showroom on these pages. - if broadcaster_id in ['onlive', 'timetable', 'event', 'campaign', 'news', 'ranking']: - raise ExtractorError('URL %s does not contain a showroom' % url) - - # Retrieve the information we need webpage = self._download_webpage(url, broadcaster_id) - room_id = self._search_regex(r'profile\?room_id\=(\d+)', webpage, 'room_id') - room_url = compat_urlparse.urljoin(url, "/api/room/profile?room_id=%s") % room_id - room = self._download_json(room_url, broadcaster_id) - is_live = room.get('is_onlive') - if not is_live: - raise ExtractorError('%s their showroom is not live' % broadcaster_id) + room_id = self._search_regex( + (r'SrGlobal\.roomId\s*=\s*(\d+)', + r'(?:profile|room)\?room_id\=(\d+)'), webpage, 'room_id') - # Prepare and return the information - uploader = room.get('performer_name') or broadcaster_id # performer_name can be an empty string. - title = room.get('room_name', room.get('main_name', "%s's Showroom" % uploader)) + room = self._download_json( + urljoin(url, '/api/room/profile?room_id=%s' % room_id), + broadcaster_id) - return { - 'is_live': is_live, - 'id': str(room.get('live_id')), - 'timestamp': room.get('current_live_started_at'), - 'uploader': uploader, - 'uploader_id': broadcaster_id, - 'title': title, - 'description': room.get('description'), - 'formats': self._extract_formats(url, broadcaster_id, room_id) - } + is_live = room.get('is_onlive') + if is_live is not True: + raise ExtractorError('%s is offline' % broadcaster_id, expected=True) - def _extract_formats(self, url, broadcaster_id, room_id): - formats = [] + uploader = room.get('performer_name') or broadcaster_id + title = room.get('room_name') or room.get('main_name') or uploader - stream_url = compat_urlparse.urljoin(url, "/api/live/streaming_url?room_id=%s") % room_id - streaming_url_list = self._download_json(stream_url, broadcaster_id).get('streaming_url_list', []) + streaming_url_list = self._download_json( + urljoin(url, '/api/live/streaming_url?room_id=%s' % room_id), + broadcaster_id)['streaming_url_list'] + formats = [] for stream in streaming_url_list: - if stream.get('type') == "hls": - formats.extend(self._extract_m3u8_formats( - stream.get('url'), - broadcaster_id, - ext='mp4', - m3u8_id='hls', - preference=stream.get('quality', 100), - live=True - )) - elif stream.get('type') == 'rtmp': - url = stream.get('url') + '/' + stream.get('stream_name') + stream_url = stream.get('url') + if not stream_url: + continue + stream_type = stream.get('type') + if stream_type == 'hls': + m3u8_formats = self._extract_m3u8_formats( + stream_url, broadcaster_id, ext='mp4', m3u8_id='hls', + live=True) + for f in m3u8_formats: + f['quality'] = int_or_none(stream.get('quality', 100)) + formats.extend(m3u8_formats) + elif stream_type == 'rtmp': + stream_name = stream.get('stream_name') + if not stream_name: + continue formats.append({ - 'url': url, - 'format_id': 'rtmp', - 'protocol': 'rtmp', + 'url': stream_url, + 'play_path': stream_name, + 'page_url': url, + 'player_url': 'https://www.showroom-live.com/assets/swf/v3/ShowRoomLive.swf', + 'rtmp_live': True, 'ext': 'flv', - 'preference': stream.get('quality', 100), - 'format_note': stream.get('label') + 'format_id': 'rtmp', + 'format_note': stream.get('label'), + 'quality': int_or_none(stream.get('quality', 100)), }) - self._sort_formats(formats) - return formats + + return { + 'id': compat_str(room.get('live_id') or broadcaster_id), + 'title': self._live_title(title), + 'description': room.get('description'), + 'timestamp': int_or_none(room.get('current_live_started_at')), + 'uploader': uploader, + 'uploader_id': broadcaster_id, + 'view_count': int_or_none(room.get('view_num')), + 'formats': formats, + 'is_live': True, + }