[Rumble] Add RumbleChannelIE (#1088)

Authored by: Ashish0804
4 years ago · f1d42a83ab
parent 3cf4b91dc5
commit f1d42a83ab
2 changed files with 40 additions and 2 deletions
--- a/yt_dlp/extractor/extractors.py
+++ b/yt_dlp/extractor/extractors.py
@ -1205,7 +1205,10 @@ from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE, RTVELiveIE, RTVETe
 from .rtvnh import RTVNHIE
 from .rtvs import RTVSIE
 from .ruhd import RUHDIE
-from .rumble import RumbleEmbedIE
+from .rumble import (
    RumbleEmbedIE,
    RumbleChannelIE,
 )
 from .rutube import (
    RutubeIE,
    RutubeChannelIE,
--- a/yt_dlp/extractor/rumble.py
+++ b/yt_dlp/extractor/rumble.py
@ -1,15 +1,17 @@
 # coding: utf-8
 from __future__ import unicode_literals
 import itertools
 import re
 from .common import InfoExtractor
-from ..compat import compat_str
+from ..compat import compat_str, compat_HTTPError
 from ..utils import (
    determine_ext,
    int_or_none,
    parse_iso8601,
    try_get,
    ExtractorError,
 )
@ -75,3 +77,36 @@ class RumbleEmbedIE(InfoExtractor):
            'channel_url': author.get('url'),
            'duration': int_or_none(video.get('duration')),
        }
 class RumbleChannelIE(InfoExtractor):
    _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))'
    _TESTS = [{
        'url': 'https://rumble.com/c/Styxhexenhammer666',
        'playlist_mincount': 1160,
        'info_dict': {
            'id': 'Styxhexenhammer666',
        },
    }, {
        'url': 'https://rumble.com/user/goldenpoodleharleyeuna',
        'playlist_count': 4,
        'info_dict': {
            'id': 'goldenpoodleharleyeuna',
        },
    }]
    def entries(self, url, playlist_id):
        for page in itertools.count(1):
            try:
                webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note='Downloading page %d' % page)
            except ExtractorError as e:
                if isinstance(e.cause, compat_HTTPError) and e.cause.code == 404:
                    break
                raise
            for video_url in re.findall(r'class=video-item--a\s?href=([^>]+\.html)', webpage):
                yield self.url_result('https://rumble.com' + video_url)
    def _real_extract(self, url):
        url, playlist_id = self._match_valid_url(url).groups()
        return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id)