Merge pull request #2041 from dstftw/imdb-list

[imdb] Add support for IMDb list (#2033)
pull/2/head
Jaime Marquínez Ferrándiz 11 years ago
commit 4fb757d1e0

@ -28,7 +28,8 @@ from youtube_dl.extractor import (
BandcampAlbumIE, BandcampAlbumIE,
SmotriCommunityIE, SmotriCommunityIE,
SmotriUserIE, SmotriUserIE,
IviCompilationIE IviCompilationIE,
ImdbListIE,
) )
@ -188,6 +189,15 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон') self.assertEqual(result['title'], u'Дежурный ангел (2010 - 2012) 2 сезон')
self.assertTrue(len(result['entries']) >= 20) self.assertTrue(len(result['entries']) >= 20)
def test_imdb_list(self):
dl = FakeYDL()
ie = ImdbListIE(dl)
result = ie.extract('http://www.imdb.com/list/sMjedvGDd8U')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], u'sMjedvGDd8U')
self.assertEqual(result['title'], u'Animated and Family Films')
self.assertTrue(len(result['entries']) >= 48)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -80,7 +80,10 @@ from .hotnewhiphop import HotNewHipHopIE
from .howcast import HowcastIE from .howcast import HowcastIE
from .hypem import HypemIE from .hypem import HypemIE
from .ign import IGNIE, OneUPIE from .ign import IGNIE, OneUPIE
from .imdb import ImdbIE from .imdb import (
ImdbIE,
ImdbListIE
)
from .ina import InaIE from .ina import InaIE
from .infoq import InfoQIE from .infoq import InfoQIE
from .instagram import InstagramIE from .instagram import InstagramIE

@ -55,3 +55,32 @@ class ImdbIE(InfoExtractor):
'description': descr, 'description': descr,
'thumbnail': format_info['slate'], 'thumbnail': format_info['slate'],
} }
class ImdbListIE(InfoExtractor):
IE_NAME = u'imdb:list'
IE_DESC = u'Internet Movie Database lists'
_VALID_URL = r'http://www\.imdb\.com/list/(?P<id>[\da-zA-Z_-]{11})'
def _real_extract(self, url):
mobj = re.match(self._VALID_URL, url)
list_id = mobj.group('id')
# RSS XML is sometimes malformed
rss = self._download_webpage('http://rss.imdb.com/list/%s' % list_id, list_id, u'Downloading list RSS')
list_title = self._html_search_regex(r'<title>(.*?)</title>', rss, u'list title')
# Export is independent of actual author_id, but returns 404 if no author_id is provided.
# However, passing dummy author_id seems to be enough.
csv = self._download_webpage('http://www.imdb.com/list/export?list_id=%s&author_id=ur00000000' % list_id,
list_id, u'Downloading list CSV')
entries = []
for item in csv.split('\n')[1:]:
cols = item.split(',')
if len(cols) < 2:
continue
item_id = cols[1][1:-1]
if item_id.startswith('vi'):
entries.append(self.url_result('http://www.imdb.com/video/imdb/%s' % item_id, 'Imdb'))
return self.playlist_result(entries, list_id, list_title)
Loading…
Cancel
Save