From e16fefd8699c56d7a565e933ed1f55112ad399b4 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 19 Nov 2021 00:48:48 +0200 Subject: [PATCH] [Reddit] Add support for 1080p videos (#1682) Fixes: https://github.com/ytdl-org/youtube-dl/issues/29565 Authored by: xenova --- yt_dlp/extractor/extractors.py | 5 +-- yt_dlp/extractor/generic.py | 28 ++++++++++++ yt_dlp/extractor/reddit.py | 82 +++++++++++++++++----------------- 3 files changed, 71 insertions(+), 44 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index 6bad1f40c..d19c67243 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1206,10 +1206,7 @@ from .redbulltv import ( RedBullTVRrnContentIE, RedBullIE, ) -from .reddit import ( - RedditIE, - RedditRIE, -) +from .reddit import RedditIE from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/generic.py b/yt_dlp/extractor/generic.py index d6631e2f3..9c7fa4a21 100644 --- a/yt_dlp/extractor/generic.py +++ b/yt_dlp/extractor/generic.py @@ -2344,6 +2344,34 @@ class GenericIE(InfoExtractor): 'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg', } }, + { + # Reddit-hosted video that will redirect and be processed by RedditIE + # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ + 'url': 'https://v.redd.it/zv89llsvexdz', + 'md5': '87f5f02f6c1582654146f830f21f8662', + 'info_dict': { + 'id': 'zv89llsvexdz', + 'ext': 'mp4', + 'timestamp': 1501941939.0, + 'title': 'That small heart attack.', + 'upload_date': '20170805', + 'uploader': 'Antw87' + } + }, + { + # 1080p Reddit-hosted video that will redirect and be processed by RedditIE + 'url': 'https://v.redd.it/33hgok7dfbz71/', + 'md5': '7a1d587940242c9bb3bd6eb320b39258', + 'info_dict': { + 'id': '33hgok7dfbz71', + 'ext': 'mp4', + 'title': "The game Didn't want me to Knife that Guy I guess", + 'uploader': 'paraf1ve', + 'timestamp': 1636788683.0, + 'upload_date': '20211113' + } + } + # ] def report_following_redirect(self, new_url): diff --git a/yt_dlp/extractor/reddit.py b/yt_dlp/extractor/reddit.py index 3ea750aeb..a042a59cc 100644 --- a/yt_dlp/extractor/reddit.py +++ b/yt_dlp/extractor/reddit.py @@ -8,43 +8,11 @@ from ..utils import ( try_get, unescapeHTML, url_or_none, + traverse_obj ) class RedditIE(InfoExtractor): - _VALID_URL = r'https?://v\.redd\.it/(?P[^/?#&]+)' - _TEST = { - # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/ - 'url': 'https://v.redd.it/zv89llsvexdz', - 'md5': '0a070c53eba7ec4534d95a5a1259e253', - 'info_dict': { - 'id': 'zv89llsvexdz', - 'ext': 'mp4', - 'title': 'zv89llsvexdz', - }, - } - - def _real_extract(self, url): - video_id = self._match_id(url) - - formats = self._extract_m3u8_formats( - 'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id, - 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) - - formats.extend(self._extract_mpd_formats( - 'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id, - mpd_id='dash', fatal=False)) - - self._sort_formats(formats) - - return { - 'id': video_id, - 'title': video_id, - 'formats': formats, - } - - -class RedditRIE(InfoExtractor): _VALID_URL = r'https?://(?P[^/]+\.)?reddit(?:media)?\.com/r/(?P[^/]+/comments/(?P[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', @@ -147,19 +115,53 @@ class RedditRIE(InfoExtractor): for resolution in resolutions: add_thumbnail(resolution) - return { - '_type': 'url_transparent', - 'url': video_url, + info = { 'title': data.get('title'), 'thumbnails': thumbnails, 'timestamp': float_or_none(data.get('created_utc')), 'uploader': data.get('author'), - 'duration': int_or_none(try_get( - data, - (lambda x: x['media']['reddit_video']['duration'], - lambda x: x['secure_media']['reddit_video']['duration']))), 'like_count': int_or_none(data.get('ups')), 'dislike_count': int_or_none(data.get('downs')), 'comment_count': int_or_none(data.get('num_comments')), 'age_limit': age_limit, } + + # Check if media is hosted on reddit: + reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False) + if reddit_video: + playlist_urls = [ + try_get(reddit_video, lambda x: unescapeHTML(x[y])) + for y in ('dash_url', 'hls_url') + ] + + # Update video_id + display_id = video_id + video_id = self._search_regex( + r'https?://v\.redd\.it/(?P[^/?#&]+)', reddit_video['fallback_url'], + 'video_id', default=display_id) + + dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' + hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' + + formats = self._extract_m3u8_formats( + hls_playlist_url, display_id, 'mp4', + entry_protocol='m3u8_native', m3u8_id='hls', fatal=False) + formats.extend(self._extract_mpd_formats( + dash_playlist_url, display_id, mpd_id='dash', fatal=False)) + self._sort_formats(formats) + + return { + **info, + 'id': video_id, + 'display_id': display_id, + 'formats': formats, + 'duration': int_or_none(reddit_video.get('duration')), + } + + # Not hosted on reddit, must continue extraction + return { + **info, + 'display_id': video_id, + '_type': 'url_transparent', + 'url': video_url, + }