From 1e8fe57e5cd0f33f940df87430d75e1230ec5b7a Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 13 Jul 2022 15:03:05 +0530 Subject: [PATCH] [extractor] Support multiple archive ids for one video (#4307) Closes #4352 --- README.md | 2 +- yt_dlp/YoutubeDL.py | 8 +++----- yt_dlp/extractor/common.py | 1 + yt_dlp/extractor/funimation.py | 3 ++- yt_dlp/extractor/genericembeds.py | 3 +++ yt_dlp/extractor/twitch.py | 3 +++ 6 files changed, 13 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f0c49eef9..a1c7287a9 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,6 @@ Some of yt-dlp's default options are different from that of youtube-dl and youtu * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this -* All *experiences* of a funimation episode are considered as a single video. This behavior breaks existing archives. Use `--compat-options seperate-video-versions` to extract information from only the default player * Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading * Youtube channel URLs are automatically redirected to `/video`. Append a `/featured` to the URL to download only the videos in the home page. If the channel does not have a videos tab, we try to download the equivalent `UU` playlist instead. For all other tabs, if the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections * Unavailable videos are also listed for youtube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this @@ -2132,6 +2131,7 @@ These options may no longer work as intended --no-include-ads Default --write-annotations No supported site has annotations now --no-write-annotations Default + --compat-options seperate-video-versions No longer needed #### Removed These options were deprecated since 2014 and have now been entirely removed diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py index f6f97b8ec..14823a4c6 100644 --- a/yt_dlp/YoutubeDL.py +++ b/yt_dlp/YoutubeDL.py @@ -3455,11 +3455,9 @@ class YoutubeDL: if fn is None: return False - vid_id = self._make_archive_id(info_dict) - if not vid_id: - return False # Incomplete video information - - return vid_id in self.archive + vid_ids = [self._make_archive_id(info_dict)] + vid_ids.extend(info_dict.get('_old_archive_ids', [])) + return any(id_ in self.archive for id_ in vid_ids) def record_download_archive(self, info_dict): fn = self.params.get('download_archive') diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 317aa270e..c91260cb0 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -334,6 +334,7 @@ class InfoExtractor: 'private', 'premium_only', 'subscriber_only', 'needs_auth', 'unlisted' or 'public'. Use 'InfoExtractor._availability' to set it + _old_archive_ids: A list of old archive ids needed for backward compatibility __post_extractor: A function to be called just before the metadata is written to either disk, logger or console. The function must return a dict which will be added to the info_dict. diff --git a/yt_dlp/extractor/funimation.py b/yt_dlp/extractor/funimation.py index 12cacd3b4..5881f1687 100644 --- a/yt_dlp/extractor/funimation.py +++ b/yt_dlp/extractor/funimation.py @@ -249,7 +249,8 @@ class FunimationIE(FunimationBaseIE): self._sort_formats(formats, ('lang', 'source')) return { - 'id': initial_experience_id if only_initial_experience else episode_id, + 'id': episode_id, + '_old_archive_ids': [initial_experience_id], 'display_id': display_id, 'duration': duration, 'title': episode['episodeTitle'], diff --git a/yt_dlp/extractor/genericembeds.py b/yt_dlp/extractor/genericembeds.py index ec2673059..f3add4794 100644 --- a/yt_dlp/extractor/genericembeds.py +++ b/yt_dlp/extractor/genericembeds.py @@ -22,6 +22,9 @@ class HTML5MediaEmbedIE(InfoExtractor): entry.update({ 'id': f'{video_id}-{num}', 'title': f'{title} ({num})', + '_old_archive_ids': [ + f'Generic {f"{video_id}-{num}" if len(entries) > 1 else video_id}', + ], }) self._sort_formats(entry['formats']) yield entry diff --git a/yt_dlp/extractor/twitch.py b/yt_dlp/extractor/twitch.py index 028e7a1e8..7a798b912 100644 --- a/yt_dlp/extractor/twitch.py +++ b/yt_dlp/extractor/twitch.py @@ -1162,8 +1162,11 @@ class TwitchClipsIE(TwitchBaseIE): }) thumbnails.append(thumb) + old_id = self._search_regex(r'%7C(\d+)(?:-\d+)?.mp4', formats[-1]['url'], 'old id', default=None) + return { 'id': clip.get('id') or video_id, + '_old_archive_ids': [f'{self.ie_key()} {old_id}'] if old_id else None, 'display_id': video_id, 'title': clip.get('title') or video_id, 'formats': formats,