From bf57cfa8b7a64f5b8382fa17a74af8b9aa2ffa55 Mon Sep 17 00:00:00 2001 From: Deer-Spangle <60626596+Deer-Spangle@users.noreply.github.com> Date: Sun, 28 Nov 2021 05:04:06 +0000 Subject: [PATCH] [RedGifs] Add Search and User extractors (#1808) Authored by: Deer-Spangle --- yt_dlp/extractor/extractors.py | 6 +- yt_dlp/extractor/redgifs.py | 212 +++++++++++++++++++++++++++------ 2 files changed, 180 insertions(+), 38 deletions(-) diff --git a/yt_dlp/extractor/extractors.py b/yt_dlp/extractor/extractors.py index a4baad2da..45eec47c6 100644 --- a/yt_dlp/extractor/extractors.py +++ b/yt_dlp/extractor/extractors.py @@ -1216,7 +1216,11 @@ from .redbulltv import ( RedBullIE, ) from .reddit import RedditIE -from .redgifs import RedGifsIE +from .redgifs import ( + RedGifsIE, + RedGifsSearchIE, + RedGifsUserIE, +) from .redtube import RedTubeIE from .regiotv import RegioTVIE from .rentv import ( diff --git a/yt_dlp/extractor/redgifs.py b/yt_dlp/extractor/redgifs.py index 1257d1344..55196b768 100644 --- a/yt_dlp/extractor/redgifs.py +++ b/yt_dlp/extractor/redgifs.py @@ -1,21 +1,94 @@ # coding: utf-8 +import functools from .common import InfoExtractor +from ..compat import compat_parse_qs from ..utils import ( ExtractorError, int_or_none, qualities, try_get, + OnDemandPagedList, ) -class RedGifsIE(InfoExtractor): - _VALID_URL = r'https?://(?:(?:www|thumbs2?)\.)?redgifs\.com/(?:watch/)?(?P[^-/?#\.]+)' +class RedGifsBaseInfoExtractor(InfoExtractor): _FORMATS = { 'gif': 250, 'sd': 480, 'hd': None, } + + def _parse_gif_data(self, gif_data): + video_id = gif_data.get('id') + quality = qualities(tuple(self._FORMATS.keys())) + + orig_height = int_or_none(gif_data.get('height')) + aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) + + formats = [] + for format_id, height in self._FORMATS.items(): + video_url = gif_data['urls'].get(format_id) + if not video_url: + continue + height = min(orig_height, height or orig_height) + formats.append({ + 'url': video_url, + 'format_id': format_id, + 'width': height * aspect_ratio if aspect_ratio else None, + 'height': height, + 'quality': quality(format_id), + }) + self._sort_formats(formats) + + return { + 'id': video_id, + 'webpage_url': f'https://redgifs.com/watch/{video_id}', + 'ie_key': RedGifsIE.ie_key(), + 'extractor': 'RedGifs', + 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs', + 'timestamp': int_or_none(gif_data.get('createDate')), + 'uploader': gif_data.get('userName'), + 'duration': int_or_none(gif_data.get('duration')), + 'view_count': int_or_none(gif_data.get('views')), + 'like_count': int_or_none(gif_data.get('likes')), + 'categories': gif_data.get('tags') or [], + 'tags': gif_data.get('tags'), + 'age_limit': 18, + 'formats': formats, + } + + def _call_api(self, ep, video_id, *args, **kwargs): + data = self._download_json( + f'https://api.redgifs.com/v2/{ep}', video_id, *args, **kwargs) + if 'error' in data: + raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) + return data + + def _fetch_page(self, ep, video_id, query, page): + query['page'] = page + 1 + data = self._call_api( + ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}') + + for entry in data['gifs']: + yield self._parse_gif_data(entry) + + def _prepare_api_query(self, query, fields): + api_query = [ + (field_name, query.get(field_name, (default,))[0]) + for field_name, default in fields.items()] + + return {key: val for key, val in api_query if val is not None} + + def _paged_entries(self, ep, item_id, query, fields): + page = int_or_none(query.get('page', (None,))[0]) + page_fetcher = functools.partial( + self._fetch_page, ep, item_id, self._prepare_api_query(query, fields)) + return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE) + + +class RedGifsIE(RedGifsBaseInfoExtractor): + _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P[^-/?#\.]+)' _TESTS = [{ 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', 'info_dict': { @@ -50,45 +123,110 @@ class RedGifsIE(InfoExtractor): def _real_extract(self, url): video_id = self._match_id(url).lower() + video_info = self._call_api( + f'gifs/{video_id}', video_id, note='Downloading video info') + return self._parse_gif_data(video_info['gif']) - video_info = self._download_json( - 'https://api.redgifs.com/v2/gifs/%s' % video_id, - video_id, 'Downloading video info') - if 'error' in video_info: - raise ExtractorError(f'RedGifs said: {video_info["error"]}', expected=True) - gif = video_info['gif'] - urls = gif['urls'] +class RedGifsSearchIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs search' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P[^#]+)' + _PAGE_SIZE = 80 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/browse?tags=Lesbian', + 'info_dict': { + 'id': 'tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by trending' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2', + 'info_dict': { + 'id': 'type=g&order=latest&tags=Lesbian&page=2', + 'title': 'Lesbian', + 'description': 'RedGifs search for Lesbian, ordered by latest' + }, + 'playlist_count': 80, + } + ] - quality = qualities(tuple(self._FORMATS.keys())) + def _real_extract(self, url): + query_str = self._match_valid_url(url).group('query') + query = compat_parse_qs(query_str) + if not query.get('tags'): + raise ExtractorError('Invalid query tags', expected=True) - orig_height = int_or_none(gif.get('height')) - aspect_ratio = try_get(gif, lambda x: orig_height / x['width']) + tags = query.get('tags')[0] + order = query.get('order', ('trending',))[0] - formats = [] - for format_id, height in self._FORMATS.items(): - video_url = urls.get(format_id) - if not video_url: - continue - height = min(orig_height, height or orig_height) - formats.append({ - 'url': video_url, - 'format_id': format_id, - 'width': height * aspect_ratio if aspect_ratio else None, - 'height': height, - 'quality': quality(format_id), - }) - self._sort_formats(formats) + query['search_text'] = [tags] + entries = self._paged_entries('gifs/search', query_str, query, { + 'search_text': None, + 'order': 'trending', + 'type': None, + }) - return { - 'id': video_id, - 'title': ' '.join(gif.get('tags') or []) or 'RedGifs', - 'timestamp': int_or_none(gif.get('createDate')), - 'uploader': gif.get('userName'), - 'duration': int_or_none(gif.get('duration')), - 'view_count': int_or_none(gif.get('views')), - 'like_count': int_or_none(gif.get('likes')), - 'categories': gif.get('tags') or [], - 'age_limit': 18, - 'formats': formats, + return self.playlist_result( + entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}') + + +class RedGifsUserIE(RedGifsBaseInfoExtractor): + IE_DESC = 'Redgifs user' + _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P[^/?#]+)(?:\?(?P[^#]+))?' + _PAGE_SIZE = 30 + _TESTS = [ + { + 'url': 'https://www.redgifs.com/users/lamsinka89', + 'info_dict': { + 'id': 'lamsinka89', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_mincount': 100, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', + 'info_dict': { + 'id': 'lamsinka89?page=3', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by recent' + }, + 'playlist_count': 30, + }, + { + 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', + 'info_dict': { + 'id': 'lamsinka89?order=best&type=g', + 'title': 'lamsinka89', + 'description': 'RedGifs user lamsinka89, ordered by best' + }, + 'playlist_mincount': 100, } + ] + + def _real_extract(self, url): + username, query_str = self._match_valid_url(url).group('username', 'query') + playlist_id = f'{username}?{query_str}' if query_str else username + + query = compat_parse_qs(query_str) + order = query.get('order', ('recent',))[0] + + entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { + 'order': 'recent', + 'type': None, + }) + + return self.playlist_result( + entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}')