[Reddit] Add support for 1080p videos (#1682)

Fixes: https://github.com/ytdl-org/youtube-dl/issues/29565 Authored by: xenova
4 years ago · e16fefd869
parent c6118ca2cc
commit e16fefd869
3 changed files with 71 additions and 44 deletions
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -1206,10 +1206,7 @@ from .redbulltv import (
    RedBullTVRrnContentIE,
    RedBullIE,
 )
-from .reddit import (
-    RedditIE,
-    RedditRIE,
-)
+from .reddit import RedditIE
 from .redtube import RedTubeIE
 from .regiotv import RegioTVIE
 from .rentv import (
--- a/yt_dlp/extractor/generic.py
+++ b/yt_dlp/extractor/generic.py
@ -2344,6 +2344,34 @@ class GenericIE(InfoExtractor):
                'thumbnail': 'https://bogmedia.org/contents/videos_screenshots/21000/21217/preview_480p.mp4.jpg',
            }
        },
+        {
+            # Reddit-hosted video that will redirect and be processed by RedditIE
+            # Redirects to https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
+            'url': 'https://v.redd.it/zv89llsvexdz',
+            'md5': '87f5f02f6c1582654146f830f21f8662',
+            'info_dict': {
+                'id': 'zv89llsvexdz',
+                'ext': 'mp4',
+                'timestamp': 1501941939.0,
+                'title': 'That small heart attack.',
+                'upload_date': '20170805',
+                'uploader': 'Antw87'
+            }
+        },
+        {
+            # 1080p Reddit-hosted video that will redirect and be processed by RedditIE
+            'url': 'https://v.redd.it/33hgok7dfbz71/',
+            'md5': '7a1d587940242c9bb3bd6eb320b39258',
+            'info_dict': {
+                'id': '33hgok7dfbz71',
+                'ext': 'mp4',
+                'title': "The game Didn't want me to Knife that Guy I guess",
+                'uploader': 'paraf1ve',
+                'timestamp': 1636788683.0,
+                'upload_date': '20211113'
+            }
+        }
+        #
    ]

    def report_following_redirect(self, new_url):
--- a/yt_dlp/extractor/reddit.py
+++ b/yt_dlp/extractor/reddit.py
@ -8,43 +8,11 @@ from ..utils import (
    try_get,
    unescapeHTML,
    url_or_none,
+    traverse_obj
 )


 class RedditIE(InfoExtractor):
-    _VALID_URL = r'https?://v\.redd\.it/(?P<id>[^/?#&]+)'
-    _TEST = {
-        # from https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/
-        'url': 'https://v.redd.it/zv89llsvexdz',
-        'md5': '0a070c53eba7ec4534d95a5a1259e253',
-        'info_dict': {
-            'id': 'zv89llsvexdz',
-            'ext': 'mp4',
-            'title': 'zv89llsvexdz',
-        },
-    }
-
-    def _real_extract(self, url):
-        video_id = self._match_id(url)
-
-        formats = self._extract_m3u8_formats(
-            'https://v.redd.it/%s/HLSPlaylist.m3u8' % video_id, video_id,
-            'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
-
-        formats.extend(self._extract_mpd_formats(
-            'https://v.redd.it/%s/DASHPlaylist.mpd' % video_id, video_id,
-            mpd_id='dash', fatal=False))
-
-        self._sort_formats(formats)
-
-        return {
-            'id': video_id,
-            'title': video_id,
-            'formats': formats,
-        }
-
-
-class RedditRIE(InfoExtractor):
    _VALID_URL = r'https?://(?P<subdomain>[^/]+\.)?reddit(?:media)?\.com/r/(?P<slug>[^/]+/comments/(?P<id>[^/?#&]+))'
    _TESTS = [{
        'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/',
@ -147,19 +115,53 @@ class RedditRIE(InfoExtractor):
                for resolution in resolutions:
                    add_thumbnail(resolution)

-        return {
-            '_type': 'url_transparent',
-            'url': video_url,
+        info = {
            'title': data.get('title'),
            'thumbnails': thumbnails,
            'timestamp': float_or_none(data.get('created_utc')),
            'uploader': data.get('author'),
-            'duration': int_or_none(try_get(
-                data,
-                (lambda x: x['media']['reddit_video']['duration'],
-                 lambda x: x['secure_media']['reddit_video']['duration']))),
            'like_count': int_or_none(data.get('ups')),
            'dislike_count': int_or_none(data.get('downs')),
            'comment_count': int_or_none(data.get('num_comments')),
            'age_limit': age_limit,
        }
+
+        # Check if media is hosted on reddit:
+        reddit_video = traverse_obj(data, (('media', 'secure_media'), 'reddit_video'), get_all=False)
+        if reddit_video:
+            playlist_urls = [
+                try_get(reddit_video, lambda x: unescapeHTML(x[y]))
+                for y in ('dash_url', 'hls_url')
+            ]
+
+            # Update video_id
+            display_id = video_id
+            video_id = self._search_regex(
+                r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'],
+                'video_id', default=display_id)
+
+            dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd'
+            hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8'
+
+            formats = self._extract_m3u8_formats(
+                hls_playlist_url, display_id, 'mp4',
+                entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)
+            formats.extend(self._extract_mpd_formats(
+                dash_playlist_url, display_id, mpd_id='dash', fatal=False))
+            self._sort_formats(formats)
+
+            return {
+                **info,
+                'id': video_id,
+                'display_id': display_id,
+                'formats': formats,
+                'duration': int_or_none(reddit_video.get('duration')),
+            }
+
+        # Not hosted on reddit, must continue extraction
+        return {
+            **info,
+            'display_id': video_id,
+            '_type': 'url_transparent',
+            'url': video_url,
+        }