[vimeo] add an extractor for channels

12 years ago · caeefc29eb
parent a3c736def2
commit caeefc29eb
3 changed files with 39 additions and 2 deletions
--- a/test/test_playlists.py
+++ b/test/test_playlists.py
@ -8,7 +8,7 @@ import json
 import os
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dl.extractor import DailymotionPlaylistIE
+from youtube_dl.extractor import DailymotionPlaylistIE, VimeoChannelIE
 from youtube_dl.utils import *
 from helper import FakeYDL
@ -26,5 +26,13 @@ class TestPlaylists(unittest.TestCase):
        self.assertEqual(result['title'], u'SPORT')
        self.assertTrue(len(result['entries']) > 20)
    def test_vimeo_channel(self):
        dl = FakeYDL()
        ie = VimeoChannelIE(dl)
        result = ie.extract('http://vimeo.com/channels/tributes')
        self.assertIsPlaylist(result)
        self.assertEqual(result['title'], u'Vimeo Tributes')
        self.assertTrue(len(result['entries']) > 24)
 if __name__ == '__main__':
    unittest.main()
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -71,7 +71,7 @@ from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .vevo import VevoIE
-from .vimeo import VimeoIE
+from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .c56 import C56IE
 from .wat import WatIE
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@ -1,5 +1,6 @@
 import json
 import re
 import itertools
 from .common import InfoExtractor
 from ..utils import (
@ -171,3 +172,31 @@ class VimeoIE(InfoExtractor):
            'thumbnail':    video_thumbnail,
            'description':  video_description,
        }]
 class VimeoChannelIE(InfoExtractor):
    IE_NAME = u'vimeo:channel'
    _VALID_URL = r'(?:https?://)?vimeo.\com/channels/(?P<id>[^/]+)'
    _MORE_PAGES_INDICATOR = r'<a.+?rel="next"'
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        channel_id =  mobj.group('id')
        video_ids = []
        for pagenum in itertools.count(1):
            webpage = self._download_webpage('http://vimeo.com/channels/%s/videos/page:%d' % (channel_id, pagenum),
                                             channel_id, u'Downloading page %s' % pagenum)
            video_ids.extend(re.findall(r'id="clip_(\d+?)"', webpage))
            if re.search(self._MORE_PAGES_INDICATOR, webpage, re.DOTALL) is None:
                break
        entries = [self.url_result('http://vimeo.com/%s' % video_id, 'Vimeo')
                   for video_id in video_ids]
        channel_title = self._html_search_regex(r'<a href="/channels/%s">(.*?)</a>' % channel_id,
                                                webpage, u'channel title')
        return {'_type': 'playlist',
                'id': channel_id,
                'title': channel_title,
                'entries': entries,
                }