Merge remote-tracking branch 'dstftw/rutube-channel'

pull/8/head
Philipp Hagemeister 11 years ago
commit 4857beba3a

@ -33,6 +33,7 @@ from youtube_dl.extractor import (
ImdbListIE, ImdbListIE,
KhanAcademyIE, KhanAcademyIE,
EveryonesMixtapeIE, EveryonesMixtapeIE,
RutubeChannelIE,
) )
@ -220,6 +221,14 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], 'Driving') self.assertEqual(result['title'], 'Driving')
self.assertEqual(len(result['entries']), 24) self.assertEqual(len(result['entries']), 24)
def test_rutube_channel(self):
dl = FakeYDL()
ie = RutubeChannelIE(dl)
result = ie.extract('http://rutube.ru/tags/video/1409')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -161,7 +161,12 @@ from .ro220 import Ro220IE
from .rottentomatoes import RottenTomatoesIE from .rottentomatoes import RottenTomatoesIE
from .roxwel import RoxwelIE from .roxwel import RoxwelIE
from .rtlnow import RTLnowIE from .rtlnow import RTLnowIE
from .rutube import RutubeIE from .rutube import (
RutubeIE,
RutubeChannelIE,
RutubeMovieIE,
RutubePersonIE,
)
from .servingsys import ServingSysIE from .servingsys import ServingSysIE
from .sina import SinaIE from .sina import SinaIE
from .slashdot import SlashdotIE from .slashdot import SlashdotIE

@ -1,58 +1,119 @@
# encoding: utf-8 # encoding: utf-8
from __future__ import unicode_literals
import re import re
import json import json
import itertools
from .common import InfoExtractor from .common import InfoExtractor
from ..utils import ( from ..utils import (
compat_urlparse,
compat_str, compat_str,
unified_strdate,
ExtractorError, ExtractorError,
) )
class RutubeIE(InfoExtractor): class RutubeIE(InfoExtractor):
_VALID_URL = r'https?://rutube\.ru/video/(?P<long_id>\w+)' IE_NAME = 'rutube'
IE_DESC = 'Rutube videos'
_VALID_URL = r'https?://rutube\.ru/video/(?P<id>[\da-z]{32})'
_TEST = { _TEST = {
u'url': u'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/',
u'file': u'3eac3b4561676c17df9132a9a1e62e3e.mp4', 'file': '3eac3b4561676c17df9132a9a1e62e3e.mp4',
u'info_dict': { 'info_dict': {
u'title': u'Раненный кенгуру забежал в аптеку', 'title': 'Раненный кенгуру забежал в аптеку',
u'uploader': u'NTDRussian', 'uploader': 'NTDRussian',
u'uploader_id': u'29790', 'uploader_id': '29790',
}, },
u'params': { 'params': {
# It requires ffmpeg (m3u8 download) # It requires ffmpeg (m3u8 download)
u'skip_download': True, 'skip_download': True,
}, },
} }
def _get_api_response(self, short_id, subpath):
api_url = 'http://rutube.ru/api/play/%s/%s/?format=json' % (subpath, short_id)
response_json = self._download_webpage(api_url, short_id,
u'Downloading %s json' % subpath)
return json.loads(response_json)
def _real_extract(self, url): def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url) mobj = re.match(self._VALID_URL, url)
long_id = mobj.group('long_id') video_id = mobj.group('id')
webpage = self._download_webpage(url, long_id)
og_video = self._og_search_video_url(webpage) api_response = self._download_webpage('http://rutube.ru/api/video/%s/?format=json' % video_id,
short_id = compat_urlparse.urlparse(og_video).path[1:] video_id, 'Downloading video JSON')
options = self._get_api_response(short_id, 'options') video = json.loads(api_response)
trackinfo = self._get_api_response(short_id, 'trackinfo')
api_response = self._download_webpage('http://rutube.ru/api/play/trackinfo/%s/?format=json' % video_id,
video_id, 'Downloading trackinfo JSON')
trackinfo = json.loads(api_response)
# Some videos don't have the author field # Some videos don't have the author field
author = trackinfo.get('author') or {} author = trackinfo.get('author') or {}
m3u8_url = trackinfo['video_balancer'].get('m3u8') m3u8_url = trackinfo['video_balancer'].get('m3u8')
if m3u8_url is None: if m3u8_url is None:
raise ExtractorError(u'Couldn\'t find m3u8 manifest url') raise ExtractorError('Couldn\'t find m3u8 manifest url')
return { return {
'id': trackinfo['id'], 'id': video['id'],
'title': trackinfo['title'], 'title': video['title'],
'description': video['description'],
'duration': video['duration'],
'view_count': video['hits'],
'url': m3u8_url, 'url': m3u8_url,
'ext': 'mp4', 'ext': 'mp4',
'thumbnail': options['thumbnail_url'], 'thumbnail': video['thumbnail_url'],
'uploader': author.get('name'), 'uploader': author.get('name'),
'uploader_id': compat_str(author['id']) if author else None, 'uploader_id': compat_str(author['id']) if author else None,
'upload_date': unified_strdate(video['created_ts']),
'age_limit': 18 if video['is_adult'] else 0,
} }
class RutubeChannelIE(InfoExtractor):
IE_NAME = 'rutube:channel'
IE_DESC = 'Rutube channels'
_VALID_URL = r'http://rutube\.ru/tags/video/(?P<id>\d+)'
_PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json'
def _extract_videos(self, channel_id, channel_title=None):
entries = []
for pagenum in itertools.count(1):
api_response = self._download_webpage(self._PAGE_TEMPLATE % (channel_id, pagenum),
channel_id, 'Downloading page %s' % pagenum)
page = json.loads(api_response)
results = page['results']
if len(results) == 0:
break;
entries.extend(self.url_result(result['video_url'], 'Rutube') for result in results)
if page['has_next'] is False:
break;
return self.playlist_result(entries, channel_id, channel_title)
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
channel_id = mobj.group('id')
return self._extract_videos(channel_id)
class RutubeMovieIE(RutubeChannelIE):
IE_NAME = 'rutube:movie'
IE_DESC = 'Rutube movies'
_VALID_URL = r'http://rutube\.ru/metainfo/tv/(?P<id>\d+)'
_MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json'
_PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
movie_id = mobj.group('id')
api_response = self._download_webpage(self._MOVIE_TEMPLATE % movie_id, movie_id,
'Downloading movie JSON')
movie = json.loads(api_response)
movie_name = movie['name']
return self._extract_videos(movie_id, movie_name)
class RutubePersonIE(RutubeChannelIE):
IE_NAME = 'rutube:person'
IE_DESC = 'Rutube person videos'
_VALID_URL = r'http://rutube\.ru/video/person/(?P<id>\d+)'
_PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json'
Loading…
Cancel
Save