[generic] Add support for multiple brightcove URLs (Fixes #2283)

pull/2301/head
Philipp Hagemeister 11 years ago
parent b0268cb6ce
commit 99877772d0

@ -34,6 +34,7 @@ from youtube_dl.extractor import (
KhanAcademyIE, KhanAcademyIE,
EveryonesMixtapeIE, EveryonesMixtapeIE,
RutubeChannelIE, RutubeChannelIE,
GenericIE,
) )
@ -229,6 +230,16 @@ class TestPlaylists(unittest.TestCase):
self.assertEqual(result['id'], '1409') self.assertEqual(result['id'], '1409')
self.assertTrue(len(result['entries']) >= 34) self.assertTrue(len(result['entries']) >= 34)
def test_multiple_brightcove_videos(self):
# https://github.com/rg3/youtube-dl/issues/2283
dl = FakeYDL()
ie = GenericIE(dl)
result = ie.extract('http://www.newyorker.com/online/blogs/newsdesk/2014/01/always-never-nuclear-command-and-control.html')
self.assertIsPlaylist(result)
self.assertEqual(result['id'], 'always-never-nuclear-command-and-control')
self.assertEqual(result['title'], 'Always/Never: A Little-Seen Movie About Nuclear Command and Control : The New Yorker')
self.assertEqual(len(result['entries']), 3)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

@ -127,25 +127,28 @@ class BrightcoveIE(InfoExtractor):
@classmethod @classmethod
def _extract_brightcove_url(cls, webpage): def _extract_brightcove_url(cls, webpage):
"""Try to extract the brightcove url from the wepbage, returns None """Try to extract the brightcove url from the webpage, returns None
if it can't be found if it can't be found
""" """
urls = cls._extract_brightcove_urls(webpage)
return urls[0] if urls else None
@classmethod
def _extract_brightcove_urls(cls, webpage):
"""Return a list of all Brightcove URLs from the webpage """
url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage) url_m = re.search(r'<meta\s+property="og:video"\s+content="(http://c.brightcove.com/[^"]+)"', webpage)
if url_m: if url_m:
return url_m.group(1) return [url_m.group(1)]
m_brightcove = re.search( matches = re.findall(
r'''(?sx)<object r'''(?sx)<object
(?: (?:
[^>]+?class=([\'"])[^>]*?BrightcoveExperience.*?\1 | [^>]+?class=[\'"][^>]*?BrightcoveExperience.*?[\'"] |
[^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/ [^>]*?>\s*<param\s+name="movie"\s+value="https?://[^/]*brightcove\.com/
).+?</object>''', ).+?</object>''',
webpage) webpage)
if m_brightcove is not None: return [cls._build_brighcove_url(m) for m in matches]
return cls._build_brighcove_url(m_brightcove.group())
else:
return None
def _real_extract(self, url): def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {}) url, smuggled_data = unsmuggle_url(url, {})

@ -234,11 +234,21 @@ class GenericIE(InfoExtractor):
r'^(?:https?://)?([^/]*)/.*', url, 'video uploader') r'^(?:https?://)?([^/]*)/.*', url, 'video uploader')
# Look for BrightCove: # Look for BrightCove:
bc_url = BrightcoveIE._extract_brightcove_url(webpage) bc_urls = BrightcoveIE._extract_brightcove_urls(webpage)
if bc_url is not None: if bc_urls:
self.to_screen('Brightcove video detected.') self.to_screen('Brightcove video detected.')
surl = smuggle_url(bc_url, {'Referer': url}) entries = [{
return self.url_result(surl, 'Brightcove') '_type': 'url',
'url': smuggle_url(bc_url, {'Referer': url}),
'ie_key': 'Brightcove'
} for bc_url in bc_urls]
return {
'_type': 'playlist',
'title': video_title,
'id': video_id,
'entries': entries,
}
# Look for embedded (iframe) Vimeo player # Look for embedded (iframe) Vimeo player
mobj = re.search( mobj = re.search(

Loading…
Cancel
Save