diff --git a/youtube_dlc/extractor/youtube.py b/youtube_dlc/extractor/youtube.py index 97cc793f9..b9feaa24b 100644 --- a/youtube_dlc/extractor/youtube.py +++ b/youtube_dlc/extractor/youtube.py @@ -2947,6 +2947,87 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): url = self._TEMPLATE_URL % playlist_id page = self._download_webpage(url, playlist_id) + yt_initial = self._get_yt_initial_data('', page) + if yt_initial: + playlist_items = try_get(yt_initial, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'], list) + entries = [] + playlist_page = 1 + api_key = self._search_regex( + r'"INNERTUBE_API_KEY":"([^"]+)"', + page, 'api key', default="AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", fatal=False) + ytcfg_string = self._search_regex( + r'ytcfg\.set\(({.*?"INNERTUBE_CONTEXT".*?})\);', + page, 'client context') + api_client_context = self._parse_json(ytcfg_string, 'client context', transform_source=js_to_json)['INNERTUBE_CONTEXT'] + while playlist_items: + item = playlist_items.pop(0) + + item_video = try_get(item, lambda x: x['playlistVideoRenderer'], dict) + if item_video: + video_id = try_get(item_video, lambda x: x['videoId'], compat_str) + if not video_id: + continue + entry = { + '_type': 'url', + 'duration': int_or_none(try_get(item_video, lambda x: x['lengthSeconds'], compat_str)), + 'id': video_id, + 'ie_key': 'Youtube', + # 'thumbnails': try_get(item_video, lambda x: x['thumbnail']['thumbnails'], list), + 'title': try_get(item_video, lambda x: x['title']['runs'][0]['text'], compat_str), + 'url': video_id + } + entries.append(entry) + + item_continue = try_get(item, lambda x: x['continuationItemRenderer'], dict) + if item_continue: + playlist_page += 1 + continuation_token = try_get(item_continue, lambda x: x['continuationEndpoint']['continuationCommand']['token'], compat_str) + request_data = { + 'context': api_client_context, + 'continuation': continuation_token + } + response = self._download_json( + 'https://www.youtube.com/youtubei/v1/browse?key=%s' % api_key, + data=json.dumps(request_data).encode('utf8'), + errnote='Unable to download playlist page', fatal=False, + headers={'Content-Type': 'application/json'}, + note='Downloading page %s' % playlist_page, + video_id=playlist_id) + playlist_items_new = try_get(response, lambda x: x['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'], list) + if playlist_items_new: + playlist_items.extend(playlist_items_new) + + playlist_title = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['title'], compat_str) + playlist_description = try_get(yt_initial, lambda x: x['microformat']['microformatDataRenderer']['description'], compat_str) + playlist = self.playlist_result( + entries, + playlist_id=playlist_id, + playlist_title=playlist_title, + playlist_description=playlist_description) + + uploader_info = try_get(yt_initial, lambda x: x['sidebar']['playlistSidebarRenderer']['items'][1]['playlistSidebarSecondaryInfoRenderer']['videoOwner']['videoOwnerRenderer'], dict) + if uploader_info: + playlist.update({ + 'uploader': try_get(uploader_info, lambda x: x['title']['runs'][0]['text'], compat_str), + 'uploader_id': try_get(uploader_info, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId']), + 'uploader_url': 'https://youtube.com{}'.format(try_get(uploader_info, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str)) + }) + if playlist_id.startswith(self._YTM_PLAYLIST_PREFIX): + playlist.update(self._YTM_CHANNEL_INFO) + + playlist_stats = try_get(yt_initial, lambda x: x['sidebar']['playlistSidebarRenderer']['items'][0]['playlistSidebarPrimaryInfoRenderer']['stats'], list) + if playlist_stats: + views_str = try_get(playlist_stats, lambda x: x[1]['simpleText'], compat_str) + if views_str.endswith('views'): + playlist['view_count'] = int_or_none("".join(re.findall(r'\d+', views_str))) + last_updated_str = try_get(playlist_stats, lambda x: x[2]['runs'][1]['text'], compat_str) + if last_updated_str: + playlist['updated_date'] = unified_strdate(last_updated_str) + + has_videos = bool(entries) + return has_videos, playlist + + # todo: fix this # the yt-alert-message now has tabindex attribute (see https://github.com/ytdl-org/youtube-dl/issues/11604) for match in re.findall(r'
', page): match = match.strip() @@ -2968,44 +3049,14 @@ class YoutubePlaylistIE(YoutubePlaylistBaseInfoExtractor): else: self.report_warning('Youtube gives an alert message: ' + match) - playlist_title = self._html_search_regex( - r'(?s)