[youtube:feed] Check each 'load more' portion for unique video ids

10 years ago · 62c95fd5fc
parent 25f14e9f93
commit 62c95fd5fc
1 changed files with 8 additions and 2 deletions
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@ -1621,10 +1621,16 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
        # for the video ids doesn't contain an index
        ids = []
        more_widget_html = content_html = page
        for page_num in itertools.count(1):
            matches = re.findall(r'href="\s*/watch\?v=([0-9A-Za-z_-]{11})', content_html)
-            new_ids = orderedSet(matches)
+
            # 'recommended' feed has infinite 'load more' and each new portion spins
            # the same videos in (sometimes) slightly different order, so we'll check
            # for unicity and break when portion has no new videos
            new_ids = filter(lambda video_id: video_id not in ids, orderedSet(matches))
            if not new_ids:
                break
            ids.extend(new_ids)
            mobj = re.search(r'data-uix-load-more-href="/?(?P<more>[^"]+)"', more_widget_html)